src信息收集时常也会通过浏览器去搜索相关目标域名等信息,一两个我们还可以通过手动,成千上万个呢,所以写了这个小demo。把我想要查的数千个关键字,比如公司名称等放入当前目录target.txt下,然后运行脚本即可解放双手,坐享其成。
代码语言:javascript复制# -*- coding: UTF-8 -*-
import requests
import time
import random
from lxml import etree
import urllib3
import threading
banner = """
V1.0.1
___ _ __ _
/ _ ___ ___ __ _| | ___ / _ ___ __ _ _ __ ___| |__
/ /_/ _ / _ / _` | |/ _ / _ / _` | '__/ __| '_
/ /_\ (_) | (_) | (_| | | __/ _ __/ (_| | | | (__| | | |
____/___/ ___/ __, |_|___| __/___|__,_|_| ___|_| |_|
|___/
Tommonkey
"""
# setting proxy
proxies = {
'http': 'http://localhost:7890',
'https': 'http://localhost:7890'
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36",
}
def input_data():
List = []
with open(r"./target.txt", encoding="utf=8") as f:
for u in f.readlines():
u = u.strip("n")
List.append(u)
# print(List)
return List
# send requests
def requestPackage(i):
try:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
googleUrl = "https://www.google.com/search?q="
googleUrl = googleUrl i
data_Raw = requests.get(url=googleUrl, headers=headers, proxies=proxies, timeout=20, verify=False)
data_Raw.close()
time.sleep(random.randint(2, 5))
print("[ ] 正在搜索{}".format(i))
if data_Raw.status_code == 200:
print("[ ] 回包状态值:{}".format(data_Raw.status_code))
data_text = data_Raw.text
# core deal string
result = etree.HTML(data_text).xpath('//*[@class="tjvcx GvPZzd cHaqb"]/text()')
if len(result) != 0:
if "edu" in result[0]:
print("[ ]" result[0])
with open("result.txt", mode="a ") as fd:
fd.write( i ":" result[0] "n")
else:
print("[ ]" result[0])
with open("fail.txt", mode="a ") as fd:
fd.write(result[0] ":" i "n")
return 0
else:
return 1
except OSError:
pass
return 1
def queryData(list):
try:
for i in list:
status = requestPackage(i)
if status == 0:
pass
else:
for num in [1,2,3]:
print("[ ] 发送请求失败,正在重试,重试次数:{}/3".format(num))
statusT = requestPackage(i)
if statusT == 1:
if num == 3:
print("[ ] 抓取失败,写入NoCapture.txt")
with open("NoCapture.txt", mode="a ") as fd:
fd.write(i "n")
continue
else:
break
except Exception as err:
print(err)
if __name__ == "__main__":
print(banner)
list = input_data()
result = queryData(list)
print(result)
将需要查询的关键字批量放入当前目录下的target.txt。直接运行该脚本即可。
github地址
代码语言:javascript复制https://github.com/tonmonkey/googleFirstWeb