Python与Seo工具源码,全网搜录查询助手exe

2020-11-09 12:13:44 浏览数 (1)

Python与Seo工具源码,全网搜录查询助手exe

很多人都说,seo是一个玄学,诚然,一方面是排名,另一方面是收录,尤其是对于渣渣而言,当然收录和排名都需要去验证,去查询,乃至去监控,大批量的话,你得自己去用工具去完成,能参与排名的前提是需要收录,所以关注搜录数据是必备的!

seo助手工具,全网搜录查询助手exe,应用python编写,可以实现批量导入要查询的网址,能实现的搜索引擎为:百度,搜狗以及360搜索引擎,导出网址已经分开为未收录文本文档(fail)以及已收录文本文档(succ),low比版,开了四个线程,差不多够用吧,工具和谐程度未知,等着你试用分享!

估计大佬们都看不上,看着玩吧!

对了,谨慎下载,需要关注本渣渣公众号,直接回复“搜录查询”,获取授权运行码,介意勿下!

链接: https://pan.baidu.com/s/1thNfN1ZVzXs01NipBel6CQ

提取码: s25b

当然,这里源码已经附上,可自行修改和调试!

有问题也可以加本渣渣微信交流探讨!

导入查询网址

导入查询网址,格式为txt,一行一个网址

代码语言:javascript复制
#导入查询网址
class Get_urls():
    def __init__(self):
        self.search_urls=[]

    def get_txt_urls(self,txt_path):
        print('>> 正在尝试导入文本数据..')
        try:
            with open(txt_path,'r',encoding='gbk') as f:
                txt_urls=f.readlines()
        except:
            print("该文档为 utf-8 编码格式!")
            with open(txt_path,'r',encoding='utf-8') as f:
                txt_urls=f.readlines()

        print(f'>> 共有{len(txt_urls)}条初始数据!')


        for url in txt_urls:
            url=url.split()[0]
            if "锘縣ttps" in url:
                url=url.replace("锘縣ttp","http")
            self.search_urls.append(url)

        print(self.search_urls)

        return self.search_urls

百度搜录查询

百度搜录查询,验证关键词:"没有找到"

代码语言:javascript复制
#百度搜录查询
class B_search():
    def __init__(self):
        self.ua=UserAgent()
        self.join_urls=[]
        self.results=[]
        self.headers={
            "User-Agent":self.ua.random,
            "Cookie": "BIDUPSID=A461AAF74635FFC7C8F38B6A59BA8BF5; PSTM=1595934808; H_WISE_SIDS=148077_150112_150830_153110_149355_149986_150076_147089_150087_151863_148867_150745_147279_152309_150647_153106_152537_151533_151015_151558_152590_148523_151032_127969_153226_146548_152505_152902_146652_151319_146732_153059_152740_150764_131423_152018_151389_149009_107319_151182_149251_153181_152284_144966_153116_152269_152514_152739_152249_149773_148868_150113_151703_110085; BAIDUID=0671E35F80D2F5FEEDB8387648940553:FG=1; MCITY=-:; BD_UPN=12314353; sug=3; ORIGIN=0; bdime=0; sugstore=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; delPer=0; BD_CK_SAM=1; PSINO=6; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross={"distinct_id":"19306004:0","first_id":"17587fc96ba21d-04c73361861a03-4e4c0f20-2073600-17587fc96bb59d","props":{"$latest_traffic_source_type":"直接流量","$latest_search_keyword":"未取到值_直接打开","$latest_referrer":""},"$device_id":"17587fc96ba21d-04c73361861a03-4e4c0f20-2073600-17587fc96bb59d"}; COOKIE_SESSION=2015_0_6_6_8_12_0_2_6_3_0_0_1952_0_0_0_1604300209_0_1604302136|9#109815_15_1604053694|9; H_PS_PSSID=32820_1468_32874_32950_31660_32971_32723_7517_32117_32761; H_PS_645EC=b52c4oJuV08TA0pmH4qjMkzfvHBbPk1qT7MqNhHISWgWQcT+oTNGjSbPDbc; BA_HECTOR=0l8ga00h8k20843jt31fpvigt0o",
        }


    def bjoinurls(self,search_urls):
        for search_url in search_urls:
            join_url=f'https://www.baidu.com/s?wd={search_url}'
            self.join_urls.append(join_url)

        print(self.join_urls)
        return self.join_urls

    #单线程
    def bdsearch(self,url):
        print(f'>> 正在查询 {url} 百度是否收录..')
        response=requests.get(url,headers=self.headers,timeout=5)
        time.sleep(2)
        if response.status_code ==200:
            html=response.content.decode('utf-8')
            result_url = url.split("wd=")[1]
            #print(html)
            if "没有找到" in html:
                print(f"{result_url} -百度未收录")
                self.results.append(f"{result_url} -百度未收录")
                with open('fail_baidu.txt','a ',encoding='utf-8') as f:
                    f.write(f"{result_url}n")
            else:
                print(f"{result_url} -百度已收录")
                self.results.append(f"{result_url} -百度已收录")
                with open('succ_baidu.txt','a ',encoding='utf-8') as f:
                    f.write(f"{result_url}n")

    #多线程
    def dxc_bdsearch(self,urls):
        print(f'>> 正在开启多线程查询..')
        try:
            # 开4个 worker,没有参数时默认是 cpu 的核心数
            pool = ThreadPool()
            results = pool.map(self.bdsearch, urls)
            pool.close()
            pool.join()
            print("百度搜录查询已完成!")
        except:
            print("Error: unable to start thread")

        return self.results

搜狗搜录查询

搜狗搜录查询,大同小异,验证关键词:"0条相关"

代码语言:javascript复制
#搜狗收录查询
class Sg_search():
    def __init__(self):
        self.ua = UserAgent()
        self.join_urls = []
        self.results = []
        self.headers = {
            "User-Agent": self.ua.random,
            "Cookie": "IPLOC=CN4403; SUID=981D0EB73220910A000000005F2131A2; SUV=00972C5AB70E1D985F2131A3FD344494; _ga=GA1.2.1265874139.1596180079; CXID=E8A7C57E2DACC58DC6FC681AFAE3E866; ssuid=7879419577; sw_uuid=7947207841; pgv_pvi=1464691712; wuid=AAFxJvHlMAAAAAqLMVQSlQQAGwY=; ABTEST=8|1601979367|v17; ad=Ylllllllll2KJv@4lllllVMzgV7lllllHZfptkllllGlllll4qxlw@@@@@@@@@@@; SNUID=E87FF74FF8FD4A166C307EB8F944606F; browerV=3; osV=1; ld=yZllllllll2KXDLVlllllVMK8YUlllllHZfpTlllll9llllllklll5@@@@@@@@@@; sst0=880",
           }

    def sgjoinurls(self, search_urls):
        for search_url in search_urls:
            join_url = f'https://www.sogou.com/web?query={search_url}'
            self.join_urls.append(join_url)

        print(self.join_urls)
        return self.join_urls

    # 单线程
    def sgsearch(self, url):
        print(f'>> 正在查询 {url} 搜狗是否收录..')
        response = requests.get(url, headers=self.headers, timeout=5)
        time.sleep(2)
        if response.status_code == 200:
            html = response.content.decode('utf-8')
            result_url = url.split("query=")[1]
            # print(html)
            if "0条相关" in html:
                print(f"{result_url} -搜狗未收录")
                self.results.append(f"{result_url} -搜狗未收录")
                with open('fail_sougou.txt','a ',encoding='utf-8') as f:
                    f.write(f"{result_url}n")
            else:
                print(f"{result_url} -搜狗已收录")
                self.results.append(f"{result_url} -搜狗已收录")
                with open('succ_sougou.txt','a ',encoding='utf-8') as f:
                    f.write(f"{result_url}n")

    # 多线程
    def dxc_sgsearch(self, urls):
        print(f'>> 正在开启多线程查询..')
        try:
            # 开4个 worker,没有参数时默认是 cpu 的核心数
            pool = ThreadPool()
            results = pool.map(self.sgsearch, urls)
            pool.close()
            pool.join()
            print("搜狗搜录查询已完成!")
        except:
            print("Error: unable to start thread")

        return self.results

这里提一个醒,http和https搜录查询结果是不一样的,百思不得其解!!

360搜录查询

搜狗搜录查询,大同小异,验证关键词:"0条相关"

代码语言:javascript复制
#360收录查询
class S360_search():
    def __init__(self):
        self.ua = UserAgent()
        self.join_urls = []
        self.results=[]
        self.headers = {
            "User-Agent": self.ua.random,
            "Cookie": "QiHooGUID=41F80B0CCE5D43A22EEF0305A12CDE3F.1596003342506; __guid=15484592.2994995584481314300.1596003341831.5723; soid=TjzBKt3zrO-Rh1S7fXSb0S!6kmX5TlEerB2URZz9v4; __md=667cb161f9515972323507763d8fa7dd643a65bd2e88034.9; dpr=1; isafe=1; webp=1; _uc_m2=886a48052dbb9e2291f80055746e0d4f1f110f922b2f; _uc_mid=7cb161f953d8fa7dd643a65bd2e88034; __huid=11xZqhEl/fVeqclI4j+dQeQvX63Oph/+CVM5vxqYGxQI4=; _ga=GA1.2.607533084.1598082638; so_md=307cb1159868390853561f953d8fa7dd643a65bd2e88034.4; so_huid=11xZqhEl/fVeqclI4j+dQeQvX63Oph/+CVM5vxqYGxQI4=; __guid=9114931.3673932918268830000.1601026204371.0352; __mid=7cb161f953d8fa7dd643a65bd2e88034; __gid=9114931.910562614.1601026204503.1603272604905.85; Q=u=uhthb002&n=&le=AwH0ZGV5ZGR3WGDjpKRhL29g&m=ZGH5WGWOWGWOWGWOWGWOWGWOZGL0&qid=144048053&im=1_t018c25fbb66797efb2&src=360chrome&t=1; T=s=9015aaf14e7f5a2899505e57f0923273&t=1595934758&lm=0-1&lf=2&sk=d659153bc633959211cc6224482ba1d9&mt=1595934758&rc=3&v=2.0&a=1; _pp_wd=1; erules=p2-1|p1-7|ecl-2|kd-5; _S=lugbh23cn4guran76ieenpmj67; gtHuid=1; count=6",
           }

    def s360joinurls(self, search_urls):
        for search_url in search_urls:
            join_url = f'https://www.so.com/s?q={search_url}'
            self.join_urls.append(join_url)

        print(self.join_urls)
        return self.join_urls

    # 单线程
    def s360search(self, url):
        print(f'>> 正在查询 {url} 360是否收录..')
        response = requests.get(url, headers=self.headers, timeout=5)
        time.sleep(2)
        if response.status_code == 200:
            html = response.content.decode('utf-8')
            result_url = url.split("q=")[1]
            # print(html)
            if "找不到" in html:
                print(f"{result_url} -360未收录")
                self.results.append(f"{result_url} -360未收录")
                with open('fail_360.txt','a ',encoding='utf-8') as f:
                    f.write(f"{result_url}n")
            else:
                print(f"{result_url} -360已收录")
                self.results.append(f"{result_url} -360已收录")
                with open('succ_360.txt','a ',encoding='utf-8') as f:
                    f.write(f"{result_url}n")

    # 多线程
    def dxc_s360search(self, urls):
        print(f'>> 正在开启多线程查询..')
        try:
            # 开4个 worker,没有参数时默认是 cpu 的核心数
            pool = ThreadPool()
            results = pool.map(self.s360search, urls)
            pool.close()
            pool.join()
            print("360搜录查询已完成!")
        except:
            print("Error: unable to start thread")

        return self.results

0 人点赞