把源码中的头文件http_parser.h和源码http_parser.c直接拷贝到项目中(https://github.com/nodejs/http-parser),然后一起编译即可;
我们写一个简单地测试例子:
main.c
代码语言:javascript复制#include "http_parser.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <time.h>
static http_parser *parser;
int on_message_begin(http_parser* _) {
(void)_;
printf("n***MESSAGE BEGIN***nn");
return 0;
}
int on_headers_complete(http_parser* _) {
(void)_;
printf("n***HEADERS COMPLETE***nn");
return 0;
}
int on_message_complete(http_parser* _) {
(void)_;
printf("n***MESSAGE COMPLETE***nn");
return 0;
}
int on_url(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Url: %.*sn", (int)length, at);
return 0;
}
int on_header_field(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Header field: %.*sn", (int)length, at);
return 0;
}
int on_header_value(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Header value: %.*sn", (int)length, at);
return 0;
}
int on_body(http_parser* _, const char* at, size_t length) {
(void)_;
printf("Body: %.*sn", (int)length, at);
return 0;
}
int main() {
http_parser_settings parser_set;
// http_parser的回调函数,需要获取HEADER后者BODY信息,可以在这里面处理。
parser_set.on_message_begin = on_message_begin;
parser_set.on_header_field = on_header_field;
parser_set.on_header_value = on_header_value;
parser_set.on_url = on_url;
parser_set.on_body = on_body;
parser_set.on_headers_complete = on_headers_complete;
parser_set.on_message_complete = on_message_complete;
char buf[1024]="GET /a/b/c/d HTTP/1.1";
size_t parsed;
parser = (http_parser*)malloc(sizeof(http_parser)); // 分配一个http_parser
http_parser_init(parser, HTTP_REQUEST); // 初始化parser为Request类型
parsed = http_parser_execute(parser, &parser_set, buf, strlen(buf)); // 执行解析过程
http_parser_execute(parser, &parser_set, buf, 0); // 信息读取完毕
free(parser);
parser = NULL;
}
使用主要分三步:
1. 申请一块http_parser大小的内存作为当前请求的parser对象,里面包含了对这次请求的解析信息;
2.申请一块http_parser_settings大小内存作为设置对象,它包含了我们设置的各种回调函数;
3.调用http_parser_execute解析请求串,根据parsed与总字符是否相等来判断是否成功还是失败;
下面我们为源码添加一些注释,大家自行对照代码去分析里面的状态机的转化过程。
http_parser.h
代码语言:javascript复制/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
extern "C"
{
#endif
/* Also update SONAME in the Makefile whenever you change these. */
#define HTTP_PARSER_VERSION_MAJOR 2
#define HTTP_PARSER_VERSION_MINOR 9
#define HTTP_PARSER_VERSION_PATCH 4
#include <stddef.h>
#if defined(_WIN32) && !defined(__MINGW32__) &&
(!defined(_MSC_VER) || _MSC_VER < 1600) && !defined(__WINE__)
#include <BaseTsd.h>
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#elif (defined(__sun) || defined(__sun__)) && defined(__SunOS_5_9)
#include <sys/inttypes.h>
#else
#include <stdint.h>
#endif
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
* faster
*/
#ifndef HTTP_PARSER_STRICT
#define HTTP_PARSER_STRICT 1
#endif
/* Maximium header size allowed. If the macro is not defined
* before including this header then the default is used. To
* change the maximum header size, define the macro in the build
* environment (e.g. -DHTTP_MAX_HEADER_SIZE=<value>). To remove
* the effective limit on the size of the header, define the macro
* to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff)
*/
#ifndef HTTP_MAX_HEADER_SIZE
#define HTTP_MAX_HEADER_SIZE (80 * 1024)
#endif
typedef struct http_parser http_parser;
typedef struct http_parser_settings http_parser_settings;
/* Callbacks should return non-zero to indicate an error. The parser will
* then halt execution.
*
* The one exception is on_headers_complete. In a HTTP_RESPONSE parser
* returning '1' from on_headers_complete will tell the parser that it
* should not expect a body. This is used when receiving a response to a
* HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
* chunked' headers that indicate the presence of a body.
*
* Returning `2` from on_headers_complete will tell parser that it should not
* expect neither a body nor any futher responses on this connection. This is
* useful for handling responses to a CONNECT request which may not contain
* `Upgrade` or `Connection: upgrade` headers.
*
* http_data_cb does not return data chunks. It will be called arbitrarily
* many times for each string. E.G. you might get 10 callbacks for "on_url"
* each providing just a few characters more data.
*/
typedef int (*http_data_cb)(http_parser *, const char *at, size_t length);
typedef int (*http_cb)(http_parser *);
// 三个值分别为: 状态码 响应报文中对应的字符串 解释型描述
/* Status Codes */
#define HTTP_STATUS_MAP(XX)
XX(100, CONTINUE, Continue)
XX(101, SWITCHING_PROTOCOLS, Switching Protocols)
XX(102, PROCESSING, Processing)
XX(200, OK, OK)
XX(201, CREATED, Created)
XX(202, ACCEPTED, Accepted)
XX(203, NON_AUTHORITATIVE_INFORMATION, Non - Authoritative Information)
XX(204, NO_CONTENT, No Content)
XX(205, RESET_CONTENT, Reset Content)
XX(206, PARTIAL_CONTENT, Partial Content)
XX(207, MULTI_STATUS, Multi - Status)
XX(208, ALREADY_REPORTED, Already Reported)
XX(226, IM_USED, IM Used)
XX(300, MULTIPLE_CHOICES, Multiple Choices)
XX(301, MOVED_PERMANENTLY, Moved Permanently)
XX(302, FOUND, Found)
XX(303, SEE_OTHER, See Other)
XX(304, NOT_MODIFIED, Not Modified)
XX(305, USE_PROXY, Use Proxy)
XX(307, TEMPORARY_REDIRECT, Temporary Redirect)
XX(308, PERMANENT_REDIRECT, Permanent Redirect)
XX(400, BAD_REQUEST, Bad Request)
XX(401, UNAUTHORIZED, Unauthorized)
XX(402, PAYMENT_REQUIRED, Payment Required)
XX(403, FORBIDDEN, Forbidden)
XX(404, NOT_FOUND, Not Found)
XX(405, METHOD_NOT_ALLOWED, Method Not Allowed)
XX(406, NOT_ACCEPTABLE, Not Acceptable)
XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required)
XX(408, REQUEST_TIMEOUT, Request Timeout)
XX(409, CONFLICT, Conflict)
XX(410, GONE, Gone)
XX(411, LENGTH_REQUIRED, Length Required)
XX(412, PRECONDITION_FAILED, Precondition Failed)
XX(413, PAYLOAD_TOO_LARGE, Payload Too Large)
XX(414, URI_TOO_LONG, URI Too Long)
XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type)
XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable)
XX(417, EXPECTATION_FAILED, Expectation Failed)
XX(421, MISDIRECTED_REQUEST, Misdirected Request)
XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity)
XX(423, LOCKED, Locked)
XX(424, FAILED_DEPENDENCY, Failed Dependency)
XX(426, UPGRADE_REQUIRED, Upgrade Required)
XX(428, PRECONDITION_REQUIRED, Precondition Required)
XX(429, TOO_MANY_REQUESTS, Too Many Requests)
XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large)
XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons)
XX(500, INTERNAL_SERVER_ERROR, Internal Server Error)
XX(501, NOT_IMPLEMENTED, Not Implemented)
XX(502, BAD_GATEWAY, Bad Gateway)
XX(503, SERVICE_UNAVAILABLE, Service Unavailable)
XX(504, GATEWAY_TIMEOUT, Gateway Timeout)
XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported)
XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates)
XX(507, INSUFFICIENT_STORAGE, Insufficient Storage)
XX(508, LOOP_DETECTED, Loop Detected)
XX(510, NOT_EXTENDED, Not Extended)
XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required)
// 下面的声明会进行2次 define 语句的替换:
/**
* 第一次替换:HTTP_STATUS_MAP(XX) 得到
* enum http_status {
* #define XX(num, name, string) HTTP_STATUS_##name = num,
XX(100, CONTINUE, Continue)
XX(101, SWITCHING_PROTOCOLS, Switching Protocols)
XX(102, PROCESSING, Processing)
XX(200, OK, OK)
...
#undef XX
* }
* 第二次替换:XX(num, name, string) 得到
* enum http_status {
* HTTP_STATUS_CONTINUE = 100,
* HTTP_STATUS_SWITCHING_PROTOCOLS = 101,
* HTTP_STATUS_SWITCHING_PROCESSING = 102,
* HTTP_STATUS_OK = 200,
* ...
* }
*
* 得到了全部status状态码的枚举
*/
enum http_status
{
#define XX(num, name, string) HTTP_STATUS_##name = num,
HTTP_STATUS_MAP(XX)
#undef XX
};
/* Request Methods */
#define HTTP_METHOD_MAP(XX)
XX(0, DELETE, DELETE)
XX(1, GET, GET)
XX(2, HEAD, HEAD)
XX(3, POST, POST)
XX(4, PUT, PUT)
/* pathological */
XX(5, CONNECT, CONNECT)
XX(6, OPTIONS, OPTIONS)
XX(7, TRACE, TRACE)
/* WebDAV */
XX(8, COPY, COPY)
XX(9, LOCK, LOCK)
XX(10, MKCOL, MKCOL)
XX(11, MOVE, MOVE)
XX(12, PROPFIND, PROPFIND)
XX(13, PROPPATCH, PROPPATCH)
XX(14, SEARCH, SEARCH)
XX(15, UNLOCK, UNLOCK)
XX(16, BIND, BIND)
XX(17, REBIND, REBIND)
XX(18, UNBIND, UNBIND)
XX(19, ACL, ACL)
/* subversion */
XX(20, REPORT, REPORT)
XX(21, MKACTIVITY, MKACTIVITY)
XX(22, CHECKOUT, CHECKOUT)
XX(23, MERGE, MERGE)
/* upnp */
XX(24, MSEARCH, M - SEARCH)
XX(25, NOTIFY, NOTIFY)
XX(26, SUBSCRIBE, SUBSCRIBE)
XX(27, UNSUBSCRIBE, UNSUBSCRIBE)
/* RFC-5789 */
XX(28, PATCH, PATCH)
XX(29, PURGE, PURGE)
/* CalDAV */
XX(30, MKCALENDAR, MKCALENDAR)
/* RFC-2068, section 19.6.1.2 */
XX(31, LINK, LINK)
XX(32, UNLINK, UNLINK)
/* icecast */
XX(33, SOURCE, SOURCE)
// 原理同上:
/**
* enum http_method {
* HTTP_DELETE = 0,
* HTTP_GET = 1,
* ...
* }
*
* 得到了请求方法METHOD的枚举
*/
enum http_method
{
#define XX(num, name, string) HTTP_##name = num,
HTTP_METHOD_MAP(XX)
#undef XX
};
// 解析的类型 请求还是响应 2者都是?
enum http_parser_type
{
HTTP_REQUEST,
HTTP_RESPONSE,
HTTP_BOTH
};
// 这个请求解析过程得到的一些状态 用位来标识
/* Flag values for http_parser.flags field */
enum flags
{
// chunked传输
F_CHUNKED = 1 << 0,
// 保持连接
F_CONNECTION_KEEP_ALIVE = 1 << 1,
// 关闭连接
F_CONNECTION_CLOSE = 1 << 2,
// 升级服务
F_CONNECTION_UPGRADE = 1 << 3,
// chunked读取完毕
F_TRAILING = 1 << 4,
// 协议升级
F_UPGRADE = 1 << 5,
// 跳过body体
F_SKIPBODY = 1 << 6,
// 报文数据段的有长度值
F_CONTENTLENGTH = 1 << 7
};
/* Map for errno-related constants
*
* The provided argument should be a macro that takes 2 arguments.
*/
#define HTTP_ERRNO_MAP(XX)
/* No error */
XX(OK, "success")
/* Callback-related errors */
XX(CB_message_begin, "the on_message_begin callback failed")
XX(CB_url, "the on_url callback failed")
XX(CB_header_field, "the on_header_field callback failed")
XX(CB_header_value, "the on_header_value callback failed")
XX(CB_headers_complete, "the on_headers_complete callback failed")
XX(CB_body, "the on_body callback failed")
XX(CB_message_complete, "the on_message_complete callback failed")
XX(CB_status, "the on_status callback failed")
XX(CB_chunk_header, "the on_chunk_header callback failed")
XX(CB_chunk_complete, "the on_chunk_complete callback failed")
/* Parsing-related errors */
XX(INVALID_EOF_STATE, "stream ended at an unexpected time")
XX(HEADER_OVERFLOW,
"too many header bytes seen; overflow detected")
XX(CLOSED_CONNECTION,
"data received after completed connection: close message")
XX(INVALID_VERSION, "invalid HTTP version")
XX(INVALID_STATUS, "invalid HTTP status code")
XX(INVALID_METHOD, "invalid HTTP method")
XX(INVALID_URL, "invalid URL")
XX(INVALID_HOST, "invalid host")
XX(INVALID_PORT, "invalid port")
XX(INVALID_PATH, "invalid path")
XX(INVALID_QUERY_STRING, "invalid query string")
XX(INVALID_FRAGMENT, "invalid fragment")
XX(LF_EXPECTED, "LF character expected")
XX(INVALID_HEADER_TOKEN, "invalid character in header")
XX(INVALID_CONTENT_LENGTH,
"invalid character in content-length header")
XX(UNEXPECTED_CONTENT_LENGTH,
"unexpected content-length header")
XX(INVALID_CHUNK_SIZE,
"invalid character in chunk size header")
XX(INVALID_CONSTANT, "invalid constant string")
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")
XX(STRICT, "strict mode assertion failed")
XX(PAUSED, "parser is paused")
XX(UNKNOWN, "an unknown error occurred")
XX(INVALID_TRANSFER_ENCODING,
"request has invalid transfer-encoding")
// 原理同上:
/**
* enum http_errno
{
HPE_OK,
HPE_CB_message_begin,
HPE_CB_url,
...
};
得到错误类型枚举
*/
/* Define HPE_* values for each errno value above */
#define HTTP_ERRNO_GEN(n, s) HPE_##n,
enum http_errno
{
HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
};
#undef HTTP_ERRNO_GEN
/* Get an http_errno value from an http_parser */
#define HTTP_PARSER_ERRNO(p) ((enum http_errno)(p)->http_errno)
// 每个tcp连接处理一次报文的时候初始化一个新的http_parser结构体来存储解析的信息和状态
struct http_parser
{
/** PRIVATE **/
// 2位bit来区分解析报文的类型
unsigned int type : 2; /* enum http_parser_type */
// 8位bit来存储上文所述的flags各个字段
unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
// 7位bit来表示目前报文状态机的当前状态
unsigned int state : 7; /* enum state from http_parser.c */
// 7位bit来表示目前正在解析头部字段所述的头部状态机的状态
unsigned int header_state : 7; /* enum header_state from http_parser.c */
// 5位ibit 解析每一类数据 如 method url version head 等都把它重置为0 表示解析到当前类型数据的第几个字符
unsigned int index : 5; /* index into current matcher */
// 1位bit 是否有这个transfer-encoding字段
unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */
// 1位bit 是否同时有length和chunked
unsigned int allow_chunked_length : 1; /* Allow headers with both
* `Content-Length` and
* `Transfer-Encoding: chunked` set */
// 1位bit 放宽对头部字段字符集的限制
unsigned int lenient_http_headers : 1;
// paser当前总共分析了多少个字符
uint32_t nread; /* # bytes read in various scenarios */
// 如果有content_length字段 存储它的值
uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one)
* if no Content-Length header.
*/
/** READ-ONLY **/
// HTTP主版本
unsigned short http_major;
// HTTP次版本
unsigned short http_minor;
// 16位状态码
unsigned int status_code : 16; /* responses only */
// 8位请求方法
unsigned int method : 8; /* requests only */
// parser所处的errno状态 只有OK才算正常errno
unsigned int http_errno : 7;
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
// 协议升级
unsigned int upgrade : 1;
// 与外界数据产生关联的钩子
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
};
// 存储用户传入的回调函数的地址setting
struct http_parser_settings
{
// 开始解析时触发
http_cb on_message_begin;
// 解析出完整的请求url时触发
http_data_cb on_url;
// 响应报文解析出 status时触发
http_data_cb on_status;
// 解析出一个头部字段key时触发
http_data_cb on_header_field;
// 解析出一个头部字段值value时触发
http_data_cb on_header_value;
// 整个头部字段(0-N)行解析完成触发
http_cb on_headers_complete;
// 报文数据读读取完时触发
http_data_cb on_body;
// 整个解析完成时触发
http_cb on_message_complete;
/* When on_chunk_header is called, the current chunk length is stored
* in parser->content_length.
*/
// 解析得到一行chunked的size大小 值放在此时的content-length中 触发
http_cb on_chunk_header;
// 对应上面size的chunked数据读取完触发
http_cb on_chunk_complete;
};
enum http_parser_url_fields
{
// 协议
UF_SCHEMA = 0,
// host 域名或者ip地址
UF_HOST = 1,
// 端口号
UF_PORT = 2,
// 请求路径path
UF_PATH = 3,
// 查询参数
UF_QUERY = 4,
// 哈希字段
UF_FRAGMENT = 5,
// 用户信息
UF_USERINFO = 6,
// 最大位数
UF_MAX = 7
};
/* Result structure for http_parser_parse_url().
*
* Callers should index into field_data[] with UF_* values iff field_set
* has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
* because we probably have padding left over), we convert any port to
* a uint16_t.
*/
struct http_parser_url
{
// url哪些部分存在 存在则置位1
uint16_t field_set; /* Bitmask of (1 << UF_*) values */
// 端口号
uint16_t port; /* Converted UF_PORT string */
// 每个数据字段的偏移和长度
struct
{
uint16_t off; /* Offset into buffer in which field starts */
uint16_t len; /* Length of run in buffer */
} field_data[UF_MAX];
};
/* Returns the library version. Bits 16-23 contain the major version number,
* bits 8-15 the minor version number and bits 0-7 the patch level.
* Usage example:
*
* unsigned long version = http_parser_version();
* unsigned major = (version >> 16) & 255;
* unsigned minor = (version >> 8) & 255;
* unsigned patch = version & 255;
* printf("http_parser v%u.%u.%un", major, minor, patch);
*/
unsigned long http_parser_version(void);
void http_parser_init(http_parser *parser, enum http_parser_type type);
/* Initialize http_parser_settings members to 0
*/
void http_parser_settings_init(http_parser_settings *settings);
/* Executes the parser. Returns number of parsed bytes. Sets
* `parser->http_errno` on error. */
size_t http_parser_execute(http_parser *parser,
const http_parser_settings *settings,
const char *data,
size_t len);
/* If http_should_keep_alive() in the on_headers_complete or
* on_message_complete callback returns 0, then this should be
* the last message on the connection.
* If you are the server, respond with the "Connection: close" header.
* If you are the client, close the connection.
*/
int http_should_keep_alive(const http_parser *parser);
/* Returns a string version of the HTTP method. */
const char *http_method_str(enum http_method m);
/* Returns a string version of the HTTP status code. */
const char *http_status_str(enum http_status s);
/* Return a string name of the given error */
const char *http_errno_name(enum http_errno err);
/* Return a string description of the given error */
const char *http_errno_description(enum http_errno err);
/* Initialize all http_parser_url members to 0 */
void http_parser_url_init(struct http_parser_url *u);
/* Parse a URL; return nonzero on failure */
int http_parser_parse_url(const char *buf, size_t buflen,
int is_connect,
struct http_parser_url *u);
/* Pause or un-pause the parser; a nonzero value pauses */
void http_parser_pause(http_parser *parser, int paused);
/* Checks if this is the final chunk of the body. */
int http_body_is_final(const http_parser *parser);
/* Change the maximum header size provided at compile time. */
void http_parser_set_max_header_size(uint32_t size);
#ifdef __cplusplus
}
#endif
#endif
http_parser.c由于字符数超标,文件地址为: http_parser.c
里面包含了全部内容,大家请自行查阅哈。