puppeteer反反爬设置汇总

2021-02-24 11:13:18 浏览数 (1)

记录一下使用puppeteer遇到反爬情况时的设置处理

  • 启动设置
  • webdriver
  • window.chrome
  • userAgent
  • plugins
  • languages
  • permissions
  • WebGL

# 启动设置

代码语言:javascript复制
const browser = await puppeteer.launch({
    headless: true,
    args: [
        '--no-sandbox',
        '--disable-setuid-sandbox',
        '--disable-blink-features=AutomationControlled',
    ],
    dumpio: false,
});

# webdriver

代码语言:javascript复制
// webdriver
await page.evaluateOnNewDocument(() => {
    const newProto = navigator.__proto__;
    delete newProto.webdriver; //删除 navigator.webdriver字段
    navigator.__proto__ = newProto;
});

# window.chrome

代码语言:javascript复制
// 添加 window.chrome字段,向内部填充一些值
await page.evaluateOnNewDocument(() => {
    window.chrome = {};
    window.chrome.app = {
        InstallState: 'hehe',
        RunningState: 'haha',
        getDetails: 'xixi',
        getIsInstalled: 'ohno',
    };
    window.chrome.csi = function () {};
    window.chrome.loadTimes = function () {};
    window.chrome.runtime = function () {};
});

# userAgent

代码语言:javascript复制
// userAgent设置
await page.evaluateOnNewDocument(() => {
    Object.defineProperty(navigator, 'userAgent', {
        //userAgent在无头模式下有headless字样,所以需覆盖
        get: () =>
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36',
    });
});

# plugins

代码语言:javascript复制
// plugins设置
await page.evaluateOnNewDocument(() => {
    Object.defineProperty(navigator, 'plugins', {
        //伪装真实的插件信息
        get: () => [
        {
            0: {
            type: 'application/x-google-chrome-pdf',
            suffixes: 'pdf',
            description: 'Portable Document Format',
            enabledPlugin: Plugin,
            },
            description: 'Portable Document Format',
            filename: 'internal-pdf-viewer',
            length: 1,
            name: 'Chrome PDF Plugin',
        },
        {
            0: {
            type: 'application/pdf',
            suffixes: 'pdf',
            description: '',
            enabledPlugin: Plugin,
            },
            description: '',
            filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai',
            length: 1,
            name: 'Chrome PDF Viewer',
        },
        {
            0: {
            type: 'application/x-nacl',
            suffixes: '',
            description: 'Native Client Executable',
            enabledPlugin: Plugin,
            },
            1: {
            type: 'application/x-pnacl',
            suffixes: '',
            description: 'Portable Native Client Executable',
            enabledPlugin: Plugin,
            },
            description: '',
            filename: 'internal-nacl-plugin',
            length: 2,
            name: 'Native Client',
        },
        ],
    });
});

# languages

代码语言:javascript复制
// languages设置
await page.evaluateOnNewDocument(() => {
    Object.defineProperty(navigator, 'languages', {
        //添加语言
        get: () => ['zh-CN', 'zh', 'en'],
    });
});

# permissions

代码语言:javascript复制
// permissions设置
await page.evaluateOnNewDocument(() => {
    const originalQuery = window.navigator.permissions.query; //notification伪装
    window.navigator.permissions.query = (parameters) =>
        parameters.name === 'notifications'
        ? Promise.resolve({ state: Notification.permission })
        : originalQuery(parameters);
});

# WebGL

代码语言:javascript复制
// WebGL设置
await page.evaluateOnNewDocument(() => {
    const getParameter = WebGLRenderingContext.getParameter;
    WebGLRenderingContext.prototype.getParameter = function (parameter) {
        // UNMASKED_VENDOR_WEBGL
        if (parameter === 37445) {
            return 'Intel Inc.';
        }
        // UNMASKED_RENDERER_WEBGL
        if (parameter === 37446) {
            return 'Intel(R) Iris(TM) Graphics 6100';
        }
        return getParameter(parameter);
    };
});

0 人点赞