Python爬虫之Selenium库的使用方法

2021-01-29 14:58:46 浏览数 (1)

这篇文章主要介绍了Python爬虫之Selenium库的使用方法,帮助大家更好的理解和使用爬虫,感兴趣的朋友可以了解下

Selenium 是一个用于Web应用程序测试的工具。Selenium测试直接运行在浏览器中,就像真正的用户在操作一样。支持的浏览器包括IE(7, 8, 9, 10, 11),Mozilla Firefox,Safari,Google Chrome,Opera等。这个工具的主要功能包括:测试与浏览器的兼容性——测试你的应用程序看是否能够很好得工作在不同浏览器和操作系统之上。测试系统功能——创建回归测试检验软件功能和用户需求。支持自动录制动作和自动生成 .Net、Java、Perl等不同语言的测试脚本。(摘自百科)

代码语言:javascript复制
/*
* 提示:该行代码过长,系统自动注释不进行高亮。一键复制会移除系统注释 
* # 基本使用from selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.common.keys import Keysfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.support.wait import WebDriverWaitbrowser = webdriver.Chrome()try:browser.get('https://www.baidu.com')input = browser.find_element_by_id('kw')input.send_keys('Python')input.send_keys(Keys.ENTER)wait = WebDriverWait(browser, 10)wait.until(EC.presence_of_element_located((By.ID, 'content_left')))print(browser.current_url)print(browser.get_cookies())print(browser.page_source)finally:browser.close() # 声明浏览器对象from selenium import webdriverbrowser = webdriver.Chrome()browser = webdriver.Firefox()browser = webdriver.Edge()browser = webdriver.PhantomJS()browser = webdriver.Safari() # 访问页面from selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.taobao.com')print(browser.page_source)browser.close() # 查找元素# 单个元素from selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.taobao.com')# 下面三个效果是一样的input_first = browser.find_element_by_id('q')input_second = browser.find_element_by_css_selector('#q')input_third = browser.find_element_by_xpath('//*[@id="q"]')print(input_first)print(input_second)print(input_third)browser.close() from selenium import webdriverfrom selenium.webdriver.common.by import Bybrowser = webdriver.Chrome()browser.get('https://www.taobao.com')input_first = browser.find_element(By.ID, 'q')print(input_first)browser.close() # 多个元素from selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.taobao.com')lis = browser.find_elements_by_css_selector('.service-bd li')print(lis)browser.close() from selenium import webdriverfrom selenium.webdriver.common.by import Bybrowser = webdriver.Chrome()browser.get('https://www.taobao.com')lis = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')print(lis)browser.close() # 元素交互操作# 对获取的元素调用交互方法from selenium import webdriverimport timebrowser = webdriver.Chrome()browser.get('https://www.taobao.com')input = browser.find_element_by_id('q')input.send_keys('笔记本电脑')time.sleep(5)input.clear()input.send_keys('iPad')button = browser.find_element_by_class_name('btn-search')# button.click() # 交互动作# 将动作附加到动作链中串行执行from selenium import webdriverfrom selenium.webdriver import ActionChainsbrowser = webdriver.Chrome()url = "http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable"browser.get(url)browser.switch_to.frame('iframeResult')source = browser.find_element_by_css_selector('#draggable')target = browser.find_element_by_css_selector('#droppable')actions = ActionChains(browser)actions.drag_and_drop(source, target)actions.perform() # 执行JavaScriptfrom selenium import webdriverbrowser = webdriver.Chrome()browser.get("https://www.zhihu.com/explore")browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')browser.execute_script('alert("To Bottom")') # 获取元素信息# 获取属性from selenium import webdriverfrom selenium.webdriver import ActionChainsbrowser = webdriver.Chrome()browser.get("https://www.zhihu.com/explore")logo = browser.find_element_by_id('zh-top-link-logo')print(logo)print(logo.get_attribute('class')) # 获取文本值from selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.zhihu.com/explore')input = browser.find_element_by_class_name('zu-top-add-question')print(input.text) # 获取ID、位置、标签名、大小from selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.zhihu.com/explore')input = browser.find_element_by_class_name('zu-top-add-question')print(input.id)print(input.location)print(input.tag_name)print(input.size) # Frameimport timefrom selenium import webdriverfrom selenium.common.exceptions import NoSuchElementExceptionbrowser = webdriver.Chrome()browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')browser.switch_to.frame('iframeResult')source = browser.find_element_by_css_selector('#draggable')print(source)try:logo = browser.find_element_by_class_name('logo')except NoSuchElementException:print('NO LOGO')browser.switch_to.parent_frame()logo = browser.find_element_by_class_name('logo')print(logo)print(logo.text) # 等待# 隐式等待# 当使用了隐式等待执行测试的时候,如果WebDriver没有在DOM中找到元素,将继续等待,超出设定时间后抛出找不到元素的异常,# 换句话说,当查找元素或元素并没有立即出现的时候,隐式等待将等待一段时间再查找DOM,默认的时间是0from selenium import webdriverbrowser = webdriver.Chrome()browser.implicitly_wait(10)browser.get('https://www.zhihu.com/explore')input = browser.find_element_by_class_name('zu-top-add-question')print(input) # 显示等待from selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECbrowser = webdriver.Chrome()browser.get('https://www.taobao.com/')wait = WebDriverWait(browser, 10)input = wait.until(EC.presence_of_element_located((By.ID, 'q')))button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))print(input, button) # 前进后退import timefrom selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.baidu.com')browser.get('https://www.taobao.com')browser.get('https://www.python.org')browser.back()time.sleep(5)browser.forward()browser.close() # Cookiesfrom selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.zhihu.com/explore')print(browser.get_cookies())browser.add_cookie({'name':'name', 'domain':'www.zhihu.com', 'value':'germey'})print(browser.get_cookies())browser.delete_all_cookies()print(browser.get_cookies()) # 选项卡管理import timefrom selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.baidu.com')browser.execute_script('window.open()')print(browser.window_handles)browser.switch_to_window(browser.window_handles[1])browser.get('https://www.taobao.com')time.sleep(5)browser.switch_to_window(browser.window_handles[0])browser.get('https://python.org') # 异常处理from selenium import webdriverbrowser = webdriver.Chrome()browser.get('https://www.baidu.com')browser.find_element_by_id('hello') from selenium import webdriverfrom selenium.common.exceptions import TimeoutException, NoSuchElementExceptionbrowser = webdriver.Chrome()try:browser.get('https://www.baidu.com')except TimeoutException:print('Time Out')try:browser.find_element_by_id('hello')except NoSuchElementException:print('No Element')finally:browser.close()
*/

以上就是Python爬虫之Selenium库的使用方法的详细内容

欢迎大家点赞,留言,转发,转载,感谢大家的相伴与支持

万水千山总是情,点个【在看】行不行

*声明:本文于网络整理,版权归原作者所有,如来源信息有误或侵犯权益,请联系我们删除或授权事宜

0 人点赞