适用于服务器渲染的页面,一次加载所有的资源
代码语言:javascript复制const fs = require('fs')
const path = require('path')
const https = require('https')
const http = require('http')
const url = require('url')
const cheerio = require('cheerio')
const targetUrl = 'https://example.com' // 指定下载的网站链接
const outputDir = './downloaded_resources' // 指定下载的资源存放目录
// 创建目录函数
function createDir(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath)
}
}
// 下载文件函数
function downloadFile(fileUrl, filePath) {
const file = fs.createWriteStream(filePath)
const protocol = url.parse(fileUrl).protocol === 'https:' ? https : http
return new Promise((resolve, reject) => {
protocol
.get(fileUrl, (response) => {
if (response.statusCode !== 200) {
reject(new Error(`Failed to download ${fileUrl}, status code: ${response.statusCode}`))
return
}
response.pipe(file)
file.on('finish', () => {
file.close()
resolve()
})
file.on('error', (err) => {
fs.unlink(filePath)
reject(err)
})
})
.on('error', (err) => {
fs.unlink(filePath)
reject(err)
})
})
}
// 下载资源函数
async function downloadResources(url, outputDir) {
try {
const html = await new Promise((resolve, reject) => {
const protocol = url.protocol === 'https:' ? https : http
protocol
.get(url, (res) => {
res.setEncoding('utf8')
let rawData = ''
res.on('data', (chunk) => {
rawData = chunk
})
res.on('end', () => {
resolve(rawData)
})
})
.on('error', (err) => {
reject(err)
})
})
const $ = cheerio.load(html)
$('img, script, link[rel="stylesheet"], audio, video, source, object').each(async function () {
let resourceUrl = $(this).attr('src') || $(this).attr('href') || $(this).attr('data')
if (!resourceUrl) return
const parsedUrl = new URL(resourceUrl, url)
const relativePath = parsedUrl.pathname.slice(1)
const filePath = path.join(outputDir, relativePath)
createDir(path.dirname(filePath))
console.log(`Downloading ${resourceUrl} to ${filePath}...`)
await downloadFile(parsedUrl.href, filePath)
})
console.log(`All resources downloaded to ${outputDir}!`)
} catch (error) {
console.error(error)
}
}
downloadResources(new URL(targetUrl), outputDir)
手动收集资源链接,批量下载到对应的目录
手动获取所有的资源
浏览器控制台执行,自动下载资源链接
代码语言:javascript复制;(() => {
// 获取当前页面所有资源链接
const getResourceLinks = () => {
const links = new Set()
// 获取所有图片链接
document.querySelectorAll('img').forEach((img) => {
if (img.src && !img.src.startsWith('blob:')) links.add(img.src)
})
// 获取所有视频链接
document.querySelectorAll('video').forEach((video) => {
video.querySelectorAll('source').forEach((source) => {
if (source.src && !source.src.startsWith('blob:')) links.add(source.src)
})
})
// 获取所有音频链接
document.querySelectorAll('audio').forEach((audio) => {
audio.querySelectorAll('source').forEach((source) => {
if (source.src && !source.src.startsWith('blob:')) links.add(source.src)
})
})
// 获取所有样式表链接 (CSS)
document.querySelectorAll('link[rel="stylesheet"]').forEach((link) => {
if (link.href && !link.href.startsWith('blob:')) links.add(link.href)
})
// 获取所有字体链接 (通过 @font-face 的CSS文件)
const styleSheets = document.styleSheets
for (const sheet of styleSheets) {
try {
const rules = sheet.cssRules || sheet.rules
for (const rule of rules) {
if (rule.style && rule.style.src) {
const fontUrls = rule.style.src.match(/url(["']?([^"')] )["']?)/g)
if (fontUrls) {
fontUrls.forEach((fontUrl) => {
const url = fontUrl.match(/url(["']?([^"')] )["']?)/)[1]
if (!url.startsWith('blob:')) links.add(url)
})
}
}
}
} catch (error) {
// 忽略跨域样式表的错误
}
}
// 获取所有脚本链接
document.querySelectorAll('script').forEach((script) => {
if (script.src && !script.src.startsWith('blob:')) links.add(script.src)
})
// 获取所有背景图片链接
document.querySelectorAll('*').forEach((el) => {
const bgImage = getComputedStyle(el).backgroundImage
if (bgImage && bgImage !== 'none') {
const urlMatch = bgImage.match(/url(["']?([^"')] )["']?)/)
if (urlMatch) {
const url = urlMatch[1]
if (!url.startsWith('blob:')) links.add(url)
}
}
})
return Array.from(links)
}
// 将资源链接保存为文本文件
const downloadTextFile = (text, fileName) => {
const blob = new Blob([text], { type: 'text/plain' })
const a = document.createElement('a')
a.href = URL.createObjectURL(blob)
a.download = fileName
document.body.appendChild(a)
a.click()
document.body.removeChild(a)
}
// 调用函数并获取资源链接
const resources = getResourceLinks()
console.log('资源链接:', resources)
// 将资源链接转换为文本并下载为文件
const fileContent = JSON.stringify(resources)
downloadTextFile(`const urls = ${fileContent}`, 'resource-links.txt')
})()
获取所有链接资源下载到对应的目录中
将上一步获取的urls替换一下,执行
代码语言:javascript复制const https = require('https')
const fs = require('fs')
const path = require('path')
const { URL } = require('url')
// 创建目录(如果不存在的话)
const ensureDirectoryExistence = (filePath) => {
const dirname = path.dirname(filePath)
if (!fs.existsSync(dirname)) {
fs.mkdirSync(dirname, { recursive: true })
}
}
// 下载文件并保存到指定路径
const downloadFile = (url, dest) => {
return new Promise((resolve, reject) => {
const request = https.get(url, (response) => {
// 处理重定向
if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
return downloadFile(response.headers.location, dest).then(resolve).catch(reject)
}
// 检查是否请求成功
if (response.statusCode !== 200) {
return reject(new Error(`Failed to get '${url}' (${response.statusCode})`))
}
// 确保目录存在
ensureDirectoryExistence(dest)
const file = fs.createWriteStream(dest)
// 监听流中的错误
response.on('error', (err) => {
fs.unlink(dest, () => reject(err)) // 删除不完整文件
})
// 将响应流写入文件
response.pipe(file)
// 确保文件写入完成后关闭文件
file.on('finish', () => {
file.close(() => resolve(dest)) // 关闭文件并完成Promise
})
// 处理文件流错误
file.on('error', (err) => {
fs.unlink(dest, () => reject(err)) // 删除未完成的文件
})
})
// 请求错误处理
request.on('error', (err) => {
reject(err)
})
})
}
// 主函数处理 URL 数组
const downloadResources = async (urls, directory) => {
for (const url of urls) {
try {
const urlObj = new URL(url)
const relativePath = urlObj.pathname
const destPath = path.join(directory, relativePath)
// 下载并保存文件
await downloadFile(url, destPath)
console.log(`Downloaded: ${url} -> ${destPath}`)
} catch (error) {
console.error(`Failed to download ${url}:`, error)
}
}
}
// 示例 URL 数组和目录
const urls = [
'https://profile-avatar.csdnimg.cn/a543dcdeef584c9f855695e5a65600ea_github_35631540.jpg',
'https://i-blog.csdnimg.cn/direct/6d71ec4ddf7c47eca0fee08caec7bcd5.png'
]
const targetDirectory = './test'
// 开始下载
downloadResources(urls, targetDirectory)
.then(() => {
console.log('All downloads completed.')
})
.catch((error) => {
console.error('Error downloading resources:', error)
})