3.异步协程爬取下载网络小说

2024-07-08 15:44:16 浏览数 (2)

之前,我们已经通过多线程的方式实现了下载网络小说,参阅文章地址,下面将采用异步携程的方式进行下载。

代码语言:Python复制
import json
import aiofiles
import aiohttp
import asyncio
import requests

"""
1.同步操作:访问getCatalog拿到所有章节的name和cid
https://dushu.baidu.com/api/pc/getDetail?data={"book_id":"4306063500"}
2.异步操作:访问getChapterContent下载所有的文章内容
https://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":"4306063500","cid":"4306063500|1569782244","need_bookinfo":1}
"""
async def getCatalog(url):
    resp=requests.get(url)
    result=resp.json()
    resp.close()
    for item in result['data']['novel']['items']:#items就是对应每一个章节的name和cid
        title=item['title']
        cid=item['cid']
        tasks=[
            asyncio.create_task(getChapterContent(cid,book_id,title))
        ]
    await asyncio.wait(tasks)

async def getChapterContent(cid,book_id,title):
    data={
        "book_id": book_id,
        "cid": f"{book_id}|{cid}",
        "need_bookinfo": 1
    }
    data=json.dumps(data)
    url='https://dushu.baidu.com/api/pc/getChapterContent?data=' data
    print(url)
    async with aiohttp.ClientSession() as s:
        async with s.get(url) as response:
            result=await response.json()
            content = result['data']['novel']['content']
            async with aiofiles.open(f"西游记/{title}.txt",mode='w',encoding='utf-8') as f:
                await f.write(content)


if __name__ == '__main__':
    book_id='4306063500'
    url='https://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"' book_id '"}'
    asyncio.run(getCatalog(url))
    print('爬取西游记小说完毕!')

0 人点赞