C/C++ 实现URL路径拆分

2023-02-25 18:29:28 浏览数 (1)

URL路径拆分: 例如我们传入 http://www.baidu.com/index.php 拆分为 www.baidu.com 和 /index.php

代码语言:javascript复制
#include <Windows.h>
#include <iostream>

int ParseUrl(char szUrl[], char szHost[], char szPath[])
{
	int iStart = 0;
	int iEnd = 0;
	int iLen = 0;

	if (strncmp(szUrl, "http://", 7) == 0)
		iStart = 7;
	else if (strncmp(szUrl, "https://", 8) == 0)
		iStart = 8;

	while (szUrl[iStart   iLen] != '' && szUrl[iStart   iLen] != '/')
	{ iLen  ; }

	memcpy(szHost, szUrl   iStart, iLen);
	if (strlen(szUrl) - iStart - iLen == 0)
		szPath[0] = '/';
	else
		memcpy(szPath, szUrl   iStart   iLen, strlen(szUrl) - iStart - iLen);
	return 0;
}

int main(int argc,char *argv [])
{
	char szUrl[] = "http://www.baidu.com/index.html";
	char szHost[1024] = { 0 };
	char szPath[2048] = { 0 };

	int ret = ParseUrl(szUrl,szHost,szPath);

	if (ret == 0)
	{
		printf("主机: %s n", szHost);
		printf("路径: %s n", szPath);
	}

	system("pause");
	return 0;
}

http 文件下载

代码语言:javascript复制
#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Spide(const char *pszUrl, const char *pszFile)
{
	char szHost[256] = {0};
	char *ptr = (char *)pszUrl;

	// 判断开头是否为http://如果不是则返回-1
	if (_strnicmp(ptr, "http://", 7) != 0) { return -1; }

	ptr = ptr   7;
	int index = 0;

	while (index < 255 && *ptr && *ptr != '/')
	{
		szHost[index  ] = *ptr  ;
	}
	szHost[index] = '';

	//printf("去掉http后的域名地址: %s n", szHost);

	char *buffer = new char[1024 * 8];
	index = sprintf(buffer,
		"GET %s HTTP/1.1rn"
		"Host: %srn"
		"User-Agent: IE or Chromern"
		"Accept-Type: */*rn"
		"Connection: Closernrn",
		ptr, szHost);

		//printf("构建好的请求头:n %s n", buffer);

		// ------------------------------------------------------------

		SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);

		SOCKADDR_IN addr;
		addr.sin_addr.S_un.S_addr = 0;
		addr.sin_port = htons(0);
		addr.sin_family = AF_INET;

		index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
		hostent *p = ::gethostbyname(szHost);

		if (p) {
			ULONG ai = *(ULONG*)p->h_addr_list[0];
			addr.sin_addr.S_un.S_addr = ai;
			addr.sin_port = htons(80);
			index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
			if (index == NOERROR) {
				index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
				FILE *pf = fopen(pszFile, "wb");
				do {
					index = recv(fd, buffer, 8191, 0);
					if (index <= 0) {
						break;
					}
					buffer[index] = '';
					fwrite(buffer, 1, index, pf);
					printf("%s", buffer);
				} while (TRUE);
				fclose(pf);
			}
		}
		closesocket(fd);
		delete[] buffer;
		return 0;
}


int main(int argc,char *argv[])
{
	WSADATA wsaData;
	WSAStartup(0x0202, &wsaData);

	Spide("http://cn.bing.com/","index.html");

	system("pause");
	return 0;
}

实现HTTP页面下载功能

代码语言:javascript复制
#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Curl_Get(const char *pszUrl)
{
	char szHost[256] = { 0 };
	char *ptr = (char *)pszUrl;

	// 判断开头是否为http:// 或者 https:// 如果不是则返回-1
	if (_strnicmp(ptr, "http://", 7) == 0)
		ptr = ptr   7;
	else if (_strnicmp(ptr, "https://", 8) == 0)
		ptr = ptr   8;
	else
		return -1;

	int index = 0;
	while (index < 255 && *ptr && *ptr != '/')
		szHost[index  ] = *ptr  ;
	szHost[index] = '';

	char *buffer = new char[1024 * 8];
	index = sprintf(buffer,
		"GET %s HTTP/1.1 rn"
		"Host: %s rn"
		"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0 rn"
		"Accept-Type: */* rn"
		"Accept: text/html,application/xhtml xml,application/xml;q=0.9,image/webp,*/*;q=0.8 rn"
		"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 rn"
		"Connection: Close rnrn",
		ptr, szHost);
	printf("%s n", buffer);

	SOCKADDR_IN addr;
	SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);
	addr.sin_addr.S_un.S_addr = 0;
	addr.sin_port = htons(0);
	addr.sin_family = AF_INET;
	index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
	hostent *p = gethostbyname(szHost);

	if (p)
	{
		ULONG ai = *(ULONG*)p->h_addr_list[0];
		addr.sin_addr.S_un.S_addr = ai;
		addr.sin_port = htons(80);

		index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
		if (index == NOERROR)
		{
			index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
			do
			{
				index = recv(fd, buffer, 8191, 0);
				if (index <= 0) { break; }
				buffer[index] = '';
				printf("%s n", buffer);
			} while (TRUE);
		}
	}
	closesocket(fd);
	return 0;
}

int main(int argc, char *argv[])
{
	WSADATA wsaData;
	WSAStartup(0x0202, &wsaData);
	Curl_Get("http://cn.bing.com/");

	WSACleanup();

	system("pause");
	return 0;
}

0 人点赞