Selenium + Chrome Diver密码认证的HTTP代理

2019-03-25 11:18:20 浏览数 (1)

默认情况下,Chrome的–proxy-server=”http://ip:port”参数不支持设置用户名和密码认证。这样就使得”Selenium Chrome Driver”无法使用HTTP Basic Authentication的HTTP代理。一种变通的方式就是采用IP地址认证,但在国内网络环境下,大多数用户都采用ADSL形式网络接入,IP是变化的,也无法采用IP地址绑定认证。因此迫切需要找到一种让Chrome自动实现HTTP代理用户名密码认证的方案。

Stackoverflow上有人分享了一种利用Chrome插件实现自动代理用户密码认证的方案非常不错,详细地址http://stackoverflow.com/questions/9888323/how-to-override-basic-authentication-in-selenium2-with-java-using-chrome-driver。

鲲之鹏的技术人员在该思路的基础上用Python实现了自动化的Chrome插件创建过程,即根据指定的代理“username:password@ip:port”自动创建一个Chrome代理插件,然后就可以在”Selenium Chrome Driver”中通过安装该插件实现代理配置功能,具体代码如下:

代码语言:javascript复制
# -*- coding:utf-8 -*-
# 测试"Selenium   Chrome"使用带用户名密码认证的代理
import os,re,time,zipfile
from selenium import webdriver


# Chrome代理模板插件(https://github.com/RobinDev/Selenium-Chrome-HTTP-Private-Proxy)目录
CHROME_PROXY_HELPER_DIR = 'chrome-proxy-extensionsChrome-proxy-helper'
# 存储自定义Chrome代理扩展文件的目录
CUSTOM_CHROME_PROXY_EXTENSIONS_DIR = 'chrome-proxy-extensions'

def get_chrome_proxy_extension(proxy):
    """获取一个Chrome代理扩展,里面配置有指定的代理(带用户名密码认证)
    proxy - 指定的代理,格式: username:password@ip:port
    """
    m = re.compile('([^:] ):([^@] )@([d.] ):(d )').search(proxy)
    if m:
        # 提取代理的各项参数
        username = m.groups()[0]
        password = m.groups()[1]
        ip = m.groups()[2]
        port = m.groups()[3]
        # print(username,password,ip,port)
        # 创建一个定制Chrome代理扩展(zip文件)
        if not os.path.exists(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR):
            os.mkdir(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR)
        extension_file_path = os.path.join(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR, '{}.zip'.format(proxy.replace(':', '_')))
        if not os.path.exists(extension_file_path):
            # 扩展文件不存在,创建
            zf = zipfile.ZipFile(extension_file_path, mode='w')
            if not os.path.exists(CHROME_PROXY_HELPER_DIR):
                os.mkdir(CHROME_PROXY_HELPER_DIR)
            zf.write(os.path.join(CHROME_PROXY_HELPER_DIR, 'manifest.json'), 'manifest.json')
            # 替换模板中的代理参数
            background_content = open(os.path.join(CHROME_PROXY_HELPER_DIR, 'background.js')).read()
            background_content = background_content.replace('%proxy_host', ip)
            background_content = background_content.replace('%proxy_port', port)
            background_content = background_content.replace('%username', username)
            background_content = background_content.replace('%password', password)
            zf.writestr('background.js', background_content)
            zf.close()
        # print(extension_file_path)
        return extension_file_path
    else:
        raise Exception('Invalid proxy format. Should be username:password@ip:port')


if __name__ == '__main__':
    # 测试
    options = webdriver.ChromeOptions()
    # 添加一个自定义的代理插件(配置特定的代理,含用户名密码认证)
    options.add_extension(get_chrome_proxy_extension(proxy='username:password@ip:port'))
    driver = webdriver.Chrome(chrome_options=options, executable_path='./source/chromedriver_win32_2.35/chromedriver.exe')
    # 访问一个IP回显网站,查看代理配置是否生效了
    driver.get('http://httpbin.org/ip')
    # driver.get('http://ip138.com/')
    # driver.get('http://www.baidu.com/')
    # driver.get('https://www.google.com.hk/search?q=渤海金控&safe=strict&tbs=sbd:1&tbm=nws&ei=&start=10&sa=N&biw=&bih=&dpr=1')
    # print(driver.page_source)
    time.sleep(60)
    driver.quit()

测试结果如下所示:

{ “origin”: “192.168.8.84” }

无认证代理:

代码语言:javascript复制
options = webdriver.ChromeOptions()  
options.add_argument('--proxy-server=http://ip:port')    
driver = webdriver.Chrome(executable_path="C:chromedriver.exe", chrome_options=0ptions)  
driver.get("http://ip138.com/")  
print(driver.page_source) 

代码语言:javascript复制
# -*- coding: utf-8 -*-
import time,string,zipfile,os
from selenium import webdriver

def create_proxyauth_extension(proxy_host, proxy_port,proxy_username, proxy_password,
                               scheme='http', plugin_path=None):
    """Proxy Auth Extension
    args:
        proxy_host (str): domain or ip address, ie proxy.domain.com
        proxy_port (int): port
        proxy_username (str): auth username
        proxy_password (str): auth password
    kwargs:
        scheme (str): proxy scheme, default http
        plugin_path (str): absolute path of the extension

    return str -> plugin_path
    """
    if plugin_path is None:
        file='./chrome_proxy_helper'
        if not os.path.exists(file):
            os.mkdir(file)
        plugin_path = file '/%s_%s@%s_%s.zip'%(proxy_username,proxy_password,proxy_host,proxy_port)

    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """
    background_js = string.Template(
    """
    var config = {
            mode: "fixed_servers",
            rules: {
              singleProxy: {
                scheme: "${scheme}",
                host: "${host}",
                port: parseInt(${port})
              },
              bypassList: ["foobar.com"]
            }
          };

    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

    function callbackFn(details) {
        return {
            authCredentials: {
                username: "${username}",
                password: "${password}"
            }
        };
    }

    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """
    ).substitute(
        host=proxy_host,
        port=proxy_port,
        username=proxy_username,
        password=proxy_password,
        scheme=scheme,
    )
    with zipfile.ZipFile(plugin_path, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

    return plugin_path


if __name__=='__main__':
    proxyauth_plugin_path = create_proxyauth_extension(
        proxy_host="139.92.6.230",
        proxy_port=3100,
        proxy_username="",
        proxy_password="",
        scheme='http'
    )
    options = webdriver.ChromeOptions()
    #浏览器最大化
    options.add_argument("--start-maximized")
    #增加扩展
    options.add_extension(proxyauth_plugin_path)
    driver = webdriver.Chrome(chrome_options=options,executable_path='../source/chromedriver_win32_2.40/chromedriver.exe')
    driver.get("http://httpbin.org/ip")
    # driver.get('http://ip138.com/')
    # driver.get('https://www.google.com.hk/search?q=渤海金控&safe=strict&tbs=sbd:1&tbm=nws&ei=&start=10&sa=N&biw=&bih=&dpr=1')
    # print(driver.page_source)
    time.sleep(10)
    driver.quit()

插件源代码 https://github.com/RobinDev/Selenium-Chrome-HTTP-Private-Proxy

参考:https://www.cnblogs.com/rookies/p/6119786.html https://www.cnblogs.com/roystime/p/6935543.html

0 人点赞