shua.js 笔记

2022-01-24 09:40:53 浏览数 (1)

代码语言:javascript复制
let superagent = require('superagent')
let cheerio = require('cheerio')

let baseUrl = 'https://blog.csdn.net/github_35631540/article/list/1'
// https://blog.csdn.net/github_35631540/article/list/1?t=1&
// https://blog.csdn.net/github_35631540/article/list/2?t=1&
// https://blog.csdn.net/github_35631540/article/list/3?t=1&
let blogHrefArr = []
let totalPage = 4

const setData = {
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
  'Referrer': 'https://blog.csdn.net/github_35631540?t=1',
  'Content-Type': 'text/html; charset=UTF-8',
}

let mainFun = () => {
  superagent
    .get(`${baseUrl}`)
    .set(setData)
    .end((err, res) => {
      let $ = cheerio.load(res.text)
      let len = $('.article-list .content a').length
      if (len > 0) {
        console.log(`获取到${len}条博客记录`)
        console.log(`开始爬取博客记录....`)
        for (let i = 0; i < $('.article-list .content a').length; i  ) {
          let blogItem = {
            name: $('.article-list h4').eq(i).text().replace(/s /g, ''),
            href: $('.article-list .content a').eq(i).attr('href'),
          }
          getBlogDetail(blogItem)
          blogHrefArr.push(blogItem)
        }
        // console.log(blogHrefArr)
      }
  })
}

let getBlogDetail = (blogItem) => {
  superagent
  .get(`${blogItem.href}`)
  .set(setData)
  .end((err, res) => {
    if(res.statusCode === 200) {
      console.log(`爬取成功:__${blogItem.name}`)
    }
  })
}

// 使用递归获取所有页的博客链接
let getAllBlogHreef = (n) => {
  superagent
    .get(`https://blog.csdn.net/github_35631540/article/list/${n 1}?t=1&`)
    .set(setData)
    .end((err,res) => {
      let $ = cheerio.load(res.text)
      let len = $('.article-list .content a').length
      if (len > 0) {
        console.log(`获取到${len}条博客记录`)
        console.log(`开始获取博客地址....`)
        for (let i = 0; i < $('.article-list .content a').length; i  ) {
          let blogItem = {
            name: $('.article-list h4').eq(i).text().replace(/s /g, ''),
            href: $('.article-list .content a').eq(i).attr('href'),
          }
          getBlogDetail(blogItem)
          // blogHrefArr.push(blogItem)
        }
        n  
        if(n<totalPage){
          getAllBlogHreef(n)
        }else{
          return blogHrefArr
        }
      }
  })
}

console.log(getAllBlogHreef(0))
代码语言:javascript复制
cd E:Main_ProFUNupblog & node shua.js

0 人点赞