简单爬取网站图片

2020-04-16 15:30:54 浏览数 (1)

涉世浅,点染亦浅;历事深,机械亦深。故君子与其练达,不若朴鲁;与其曲谨,不若疏狂。

pyhton爬取图片

代码语言:javascript复制
# -*- coding:utf-8 -*-
import requests #调用第三方库
import re #正则
import urllib.request
#print(requests.get(url))
def rree(url):
    wb_date = requests.get(url).text  #获取当前网页的Html,返回Unicode格式
    #返回的是一个包含服务器资源的Response对象。包含从服务器返回的所有的相关资源。
    res = re.compile(r'src="(http. ?jpg)"') #正则表达式匹配图片
    reg = re.findall(res,wb_date)
    #print(wb_date)
    return reg

def download(reg,path):
    num=0
    path = path
    for i in reg: #遍历
        print(i)
        a = requests.get(i)
        #f =open('%s.jpg'%nun,'wb')
        f =open(path '%s.jpg'%num,'wb')
        f.write(a.content)#requests.get(url).content 返回bytes格式
        f.close()
        num=num 1
        print('第%s个图片下载完毕'%num)
if __name__ =="__main__":
    #网站链接
    url = input("Please input the correct url: ")
    #储存位置
    path  = "F:/C-and-Python-Algorithn/python/interest/image/ "
    reg = rree(url)
    download(reg,path)

requests.get(url).text

代码语言:javascript复制
<!DOCTYPE html>
<script src="/js/src/click.js"></script>
<script type="text/javascript" src="//libs.baidu.com/jquery/1.8.3/jquery.min.js"></script>
<html lang="en">
<head><meta name="generator" content="Hexo 3.9.0">
    <meta charset="utf-8">
<title>AngelNI&#39;s Blog</title>
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="google-site-verification" content="JwzmnANXlpmQlhwaSORuqpEB6P3Sr-tRhw4R_XDRcEc">
<meta name="baidu-site-verification" content="ztPAF2oDcw">
    <meta name="description" content="This is a simple record of learning bits and pieces, so that each step of learning into an unforgettable memory.">    
<meta name="keywords" content="learning note">
<meta property="og:type" content="website">
<meta property="og:title" content="AngelNI&#39;s Blog">
<meta property="og:url" content="https://angelni.github.io/index.html">

    ......

re.compile(r’src=”(http. ?jpg)”‘)

代码语言:javascript复制
<!--图片格式-->
<img class="thumbnail" src="https://s2.ax1x.com/2020/01/29/1QPiUf.jpg" alt="冬日" title="" style="">

拿我的博客举栗

0 人点赞