python selenium 特征屏蔽抓取Octopart cookie获取数据

2021-03-22 14:41:27 浏览数 (1)

代码语言:javascript复制
# coding:utf-8
#当前的项目名:digikey
#当前编辑文件名:dgk_selenium
#当前用户的登录名:Administrator
#当前系统日期时间:2021/3/16 13:19
#用于创建文件的IDE的名称: PyCharm
import math

import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC # 等待
from selenium.webdriver import ChromeOptions, ActionChains

from selenium.webdriver.common.keys import Keys # 按键模拟
import time
import random
stime=time.time()
options = ChromeOptions()
options.headless=False
# options.add_argument('-headless')
options.add_argument("--disable-extensions") # 非开发者模式
options.add_experimental_option("excludeSwitches", ["enable-automation"]) # 屏蔽特征
options.add_experimental_option('useAutomationExtension', False) # 屏蔽特征
options.add_argument("-disable-infobars")  # 关闭'chrome正受到自动测试软件的控制'提示
web=webdriver.Chrome('./chromedriver.exe',chrome_options=options)

# 屏蔽特征
web.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
  "source": """
    Object.defineProperty(navigator, 'webdriver', {
      get: () => undefined
    })
  """
})
# for x in ['https://www.digikey.com/en/products/filter/accessories/159','https://www.digikey.com/en/products/filter/alarms-buzzers-and-sirens/157']:
#     web.get(x)

# web.get('https://octopart.com/search?category_id=4261&manufacturer_id=572')
web.get('http://localhost:8080/')
web.maximize_window()
def r():
    ac = ActionChains(web)

    ac.move_by_offset(735, 275)
    time.sleep(0.5)
    print('暂定0.5')
    ac.double_click()
    time.sleep(1)
    print('暂定1')
    ac.click_and_hold()
    ac.perform()

    print('按动验证块')

    time.sleep(10)
    web.refresh() # 刷新页面
    time.sleep(5)
    acc = ActionChains(web)
    acc.move_by_offset(0, 0).double_click().click_and_hold().perform()
    # one_iframe = web.find_element_by_xpath("//section/div[@id='px-captcha']")
    # print(one_iframe)
    # s1 = web.find_element_by_xpath(
    #     "//iframe[@style='display: block; width: 310px; height: 100px; border: 0; padding:0; margin: 0; -moz-user-select: none;-khtml-user-select: none;-webkit-user-select: none;-ms-user-select: none;user-select: none;']")
    # print(s1)
    print(web.get_cookies())
    cookie = ''''''
    clist = [
        'session',
        '__insp_nv',
        '_pxvid',
        '__insp_targlpt',
        'ajs_anonymous_id',
        '__insp_norec_sess',
        '_pxhd',
        '__cf_bm',
        '__insp_targlpu',
        '_gat',
        '_ga',
        '__insp_slim',
        '_px',
        '_fbp',
        '__insp_wid',
        'ajs_anonymous_id',
        '_gid',
        '__cfduid',
    ]
    for x in web.get_cookies():
        cookie  = x['name']   ':'   x['value']   '; '
    with open('./cookie.txt', 'w ', encoding='utf-8')as f:
        f.write(cookie)
    web.save_screenshot('./te.png')
    print('cookies获取成功')

    try:
        t = WebDriverWait(web, 5, 0.5
                          ).until(EC.presence_of_all_elements_located((By.XPATH, "//div[@id='px-captcha']")))



        # ac.click_and_hold()
        if t:
            print(t)

    except:
        try:
            one_iframe = web.find_element_by_xpath("//section/div[@id='px-captcha']")
            print(one_iframe)
        except:
            pass

        # print('超时了')


        # return True


r()

0 人点赞