前言
首先初始化两个文件
js配置文件
代码语言:javascript复制npm init -y
生成
代码语言:javascript复制{
"name": "typescript",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"dev:build": "tsc -w",
"dev:start": "nodemon node ./build/crowller.js",
"dev": "concurrently npm:dev:*"
},
"nodemonConfig": {
"ignore": [
"data/*"
]
},
"keywords": [],
"author": "",
"license": "ISC",
"devDependencies": {
"@types/cheerio": "^0.22.14",
"@types/superagent": "^4.1.4",
"concurrently": "^5.0.0",
"nodemon": "^2.0.1",
"ts-node": "^8.5.2",
"typescript": "^3.7.2"
},
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"superagent": "^5.1.1"
}
}
TS配置文件
代码语言:javascript复制tsc --init
代码语言:javascript复制{
"compilerOptions": {
/* Basic Options */
// "incremental": true, /* Enable incremental compilation */
"target": "es5", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019' or 'ESNEXT'. */
"module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */
// "lib": [], /* Specify library files to be included in the compilation. */
// "allowJs": true, /* Allow javascript files to be compiled. */
// "checkJs": true, /* Report errors in .js files. */
// "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */
// "declaration": true, /* Generates corresponding '.d.ts' file. */
// "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */
// "sourceMap": true, /* Generates corresponding '.map' file. */
// "outFile": "./", /* Concatenate and emit output to single file. */
// "outDir": "./", /* Redirect output structure to the directory. */
// "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */
// "composite": true, /* Enable project compilation */
// "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */
// "removeComments": true, /* Do not emit comments to output. */
// "noEmit": true, /* Do not emit outputs. */
// "importHelpers": true, /* Import emit helpers from 'tslib'. */
// "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */
// "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
/* Strict Type-Checking Options */
"strict": true, /* Enable all strict type-checking options. */
// "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* Enable strict null checks. */
// "strictFunctionTypes": true, /* Enable strict checking of function types. */
// "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */
// "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */
// "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */
// "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */
/* Additional Checks */
// "noUnusedLocals": true, /* Report errors on unused locals. */
// "noUnusedParameters": true, /* Report errors on unused parameters. */
// "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */
// "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */
/* Module Resolution Options */
// "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
// "baseUrl": "./", /* Base directory to resolve non-absolute module names. */
// "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
// "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */
// "typeRoots": [], /* List of folders to include type definitions from. */
// "types": [], /* Type declaration files to be included in compilation. */
// "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
"esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */
// "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
/* Source Map Options */
// "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */
// "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
/* Experimental Options */
// "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */
// "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */
}
}
在安装
代码语言:javascript复制npm install -D ts-node
npm install -D typescript
提示:以下是本篇文章正文内容,下面案例可供参考
一、自建网页爬取
页面地址:http://www.dell-lee.com/typescript/demo.html?secret=secretKey
二、爬虫代码
crowller.ts
代码语言:javascript复制import fs from 'fs';
import path from 'path';
import superagent from 'superagent';
import LeeAnalyzer from './leeAnalyzer';
export interface Analyzer {
analyze: (html: string, filePath: string) => string;
}
class Crowller {
private filePath = path.resolve(__dirname, '../data/course.json');
async getRawHtml() {
const result = await superagent.get(this.url);
return result.text;
}
writeFile(content: string) {
fs.writeFileSync(this.filePath, content);
}
async initSpiderProcess() {
const html = await this.getRawHtml();
const fileContent = this.analyzer.analyze(html, this.filePath);
this.writeFile(fileContent);
}
constructor(private url: string, private analyzer: Analyzer) {
this.initSpiderProcess();
}
}
const secret = 'secretKey';
const url = `http://www.dell-lee.com/typescript/demo.html?secret=${secret}`;
const analyzer = new LeeAnalyzer();
new Crowller(url, analyzer);
dellAnalyzer.ts
代码语言:javascript复制import fs from 'fs';
import cheerio from 'cheerio';
import { Analyzer } from './crowller';
interface Course {
title: string;
count: number;
}
interface CourseResult {
time: number;
data: Course[];
}
interface Content {
[propName: number]: Course[];
}
export default class DellAnalyzer implements Analyzer {
private getCourseInfo(html: string) {
const $ = cheerio.load(html);
const courseItems = $('.course-item');
const courseInfos: Course[] = [];
courseItems.map((index, element) => {
const descs = $(element).find('.course-desc');
const title = descs.eq(0).text();
const count = parseInt(
descs
.eq(1)
.text()
.split(':')[1],
10
);
courseInfos.push({ title, count });
});
return {
time: new Date().getTime(),
data: courseInfos
};
}
generateJsonContent(courseInfo: CourseResult, filePath: string) {
let fileContent: Content = {};
if (fs.existsSync(filePath)) {
fileContent = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
}
fileContent[courseInfo.time] = courseInfo.data;
return fileContent;
}
public analyze(html: string, filePath: string) {
const courseInfo = this.getCourseInfo(html);
const fileContent = this.generateJsonContent(courseInfo, filePath);
return JSON.stringify(fileContent);
}
}
leeAnalyzer.ts
代码语言:javascript复制import { Analyzer } from './crowller';
export default class LeeAnalyzer implements Analyzer {
public analyze(html: string, filePath: string) {
return html;
}
}
三、执行代码后得到
得到json
代码语言:javascript复制{
time: 1640504759317,
data: [
{ title: 'Vue2.5开发去哪儿网App 从零基础入门到实战项目', count: NaN },
{ title: 'React 16.4 开发简书项目 从零基础入门到实战', count: NaN },
{ title: 'Vue2.5开发去哪儿网App 从零基础入门到实战项目', count: NaN },
{ title: '从基础到实战t手把手带你掌握新版Webpack4.0', count: NaN },
{ title: 'Dell Lee 的微课堂,职业规划答疑解惑,精彩文章与你分享', count: NaN }
]
}