爬虫14.Cc++语言代码下载完整视频
爬虫12.Cc++语言代码下载完整视频
一些视频网站如果没有直接的下载链接,就需要抓包或者使用浏览器中的开发者工具分析html源码中的链接标签从而得到视频地址,而后通过之前写的http下载器就能下载。
抓包工具可以使用单独的,也可以使用浏览器开发者工具中自带的’’网络’’工具。
1.使用http/https下载器下载,例如浏览器
2.使用C语言与winsocket下载
代码实现http/https下载器,
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
int main(int argc, char** argv) {
const char* url = NULL;
const char* referer = NULL;
if (argc >= 2) {
url = argv[1];
printf("URL:\n\t%s\n", argv[1]);
} else {
printf("Usage: %s <URL> [Referer]\n", argv[0]);
return 1;
}
if (argc >= 3) {
referer = argv[2];
printf("Referer:\n\t%s\n", argv[2]);
}
bool use_ssl = strstr(url, "https://") == url; // 根据URL选择HTTP或HTTPS
int port = use_ssl ? 443 : 80; // 根据协议选择端口
char host[256], path[256] = "";
sscanf(url, use_ssl ? "https://%255[^/]/%255[^\n]" : "http://%255[^/]/%255[^\n]", host, path);
// 解析文件名
const char* filename = strrchr(path, '/') + 1;
printf("下载文件名:%s\n", filename);
// 初始化Winsock和创建套接字
initialize_winsock();
SOCKET sockfd = create_socket();
// 解析主机名并获取服务器地址
struct sockaddr_in server_addr;
resolve_hostname(host, port, &server_addr);
// 连接服务器
if (connect(sockfd, (struct sockaddr*)&server_addr, sizeof(server_addr)) < 0) {
printf("连接服务器失败\n");
closesocket(sockfd);
WSACleanup();
return 1;
}
SSL_CTX* ctx = NULL;
SSL* ssl = NULL;
if (use_ssl) {
// 创建SSL上下文并建立SSL连接
ctx = create_ssl_context();
ssl = connect_ssl(ctx, sockfd);
}
// 发送HTTP请求并接收响应
send_http_request(ssl, sockfd, host, path, referer, use_ssl);
receive_http_response(ssl, sockfd, filename, use_ssl);
// 清理SSL
if (use_ssl) {
SSL_CTX_free(ctx);
ERR_free_strings();
}
// 清理Winsock
WSACleanup();
return 0;
}http_client_utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void initialize_winsock();
SOCKET create_socket();
void resolve_hostname(const char* hostname, int port, struct sockaddr_in* server_addr);
SSL_CTX* create_ssl_context();
SSL* connect_ssl(SSL_CTX* ctx, SOCKET sockfd);
void send_http_request(SSL* ssl, SOCKET sockfd, const char* host, const char* path, const char* referer, bool use_ssl);
void receive_http_response(SSL* ssl, SOCKET sockfd, const char* output_file, bool use_ssl);http_client_utils.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
void initialize_winsock() {
WSADATA wsaData;
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
printf("Winsock初始化失败\n");
exit(1);
}
}
SOCKET create_socket() {
SOCKET sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd == INVALID_SOCKET) {
printf("创建套接字失败\n");
WSACleanup();
exit(1);
}
return sockfd;
}
void resolve_hostname(const char* hostname, int port, struct sockaddr_in* server_addr) {
struct addrinfo hints, * res;
char port_str[6];
snprintf(port_str, sizeof(port_str), "%d", port);
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
if (getaddrinfo(hostname, port_str, &hints, &res) != 0) {
printf("无法解析主机名\n");
WSACleanup();
exit(1);
}
memcpy(server_addr, res->ai_addr, sizeof(struct sockaddr_in));
freeaddrinfo(res);
}
SSL_CTX* create_ssl_context() {
SSL_CTX* ctx;
SSL_load_error_strings();
OpenSSL_add_ssl_algorithms();
ctx = SSL_CTX_new(TLS_client_method());
if (!ctx) {
printf("无法创建SSL上下文\n");
ERR_print_errors_fp(stderr);
exit(1);
}
return ctx;
}
SSL* connect_ssl(SSL_CTX* ctx, SOCKET sockfd) {
SSL* ssl = SSL_new(ctx);
if (!ssl) {
printf("无法创建SSL结构\n");
ERR_print_errors_fp(stderr);
exit(1);
}
SSL_set_fd(ssl, sockfd);
if (SSL_connect(ssl) <= 0) {
printf("无法建立SSL连接\n");
ERR_print_errors_fp(stderr);
exit(1);
}
// 添加调试信息
printf("SSL握手成功\n");
return ssl;
}
void send_http_request(SSL* ssl, SOCKET sockfd, const char* host, const char* path, const char* referer, bool use_ssl) {
char request[4096];
if (referer) {
snprintf(request, sizeof(request), "GET /%s HTTP/1.1\r\n"
"Host: %s\r\n"
"Connection: close\r\n"
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3\r\n"
"Referer: %s\r\n\r\n", path, host, referer);
} else {
snprintf(request, sizeof(request), "GET /%s HTTP/1.1\r\n"
"Host: %s\r\n"
"Connection: close\r\n"
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3\r\n\r\n", path, host);
}
if (use_ssl) {
SSL_write(ssl, request, strlen(request));
} else {
send(sockfd, request, strlen(request), 0);
}
}
void receive_http_response(SSL* ssl, SOCKET sockfd, const char* output_file, bool use_ssl) {
FILE* file = fopen(output_file, "wb");
if (!file) {
printf("打开输出文件失败\n");
if (use_ssl) {
SSL_free(ssl);
} else {
closesocket(sockfd);
}
return;
}
char buffer[4096];
int bytes_received;
bool header_passed = false;
size_t total_bytes_received = 0;
size_t content_length = 0;
while ((bytes_received = (use_ssl ? SSL_read(ssl, buffer, sizeof(buffer)) : recv(sockfd, buffer, sizeof(buffer), 0))) > 0) {
if (!header_passed) {
// 打印并跳过HTTP响应头
char* header_end = strstr(buffer, "\r\n\r\n");
if (header_end) {
*header_end = '\0';
printf("HTTP响应头:\n%s\n", buffer);
// 解析Content-Length
char* content_length_str = strstr(buffer, "Content-Length: ");
if (content_length_str) {
content_length_str += strlen("Content-Length: ");
content_length = strtoul(content_length_str, NULL, 10);
}
header_end += 4; // 跳过"\r\n\r\n"
bytes_received -= (header_end - buffer);
memmove(buffer, header_end, bytes_received);
header_passed = true;
} else {
// 尚未完整接收到HTTP头,继续接收
continue;
}
}
fwrite(buffer, 1, bytes_received, file);
total_bytes_received += bytes_received;
}
if (bytes_received < 0) {
printf("接收HTTP响应时出错\n");
if (use_ssl) {
ERR_print_errors_fp(stderr);
} else {
printf("错误代码:%d\n", WSAGetLastError());
}
}
printf("总共接收到字节数:%zu\n", total_bytes_received);
if (total_bytes_received == content_length) {
printf("文件下载成功:%s\n", output_file);
} else {
printf("文件下载不完整:%s\n", output_file);
printf("预期字节数:%zu, 实际接收字节数:%zu\n", content_length, total_bytes_received);
}
fclose(file);
if (use_ssl) {
SSL_free(ssl);
} else {
closesocket(sockfd);
}
}编译运行crawlerc.exe。
查看文件下载成功。
3.使用c语言与win32api下载
抓包抓取链接
1
https://i1.hdslb.com/bfs/archive/ff94fe95186a555e4662683164c6a3338d1a3d2e.jpg@100w_100h_1c.webp
编写代码
1 | //crawlerc.cpp |
运行
运行后会被杀毒软件报毒。
4 .使用c++与winhttp库下载
1.使用C++与winhttp库下载
- 编写代码
1 |
|
代码说明:
parse_url函数:解析URL,提取协议、主机、端口和路径。
WinHttpOpen:初始化WinHTTP会话。
WinHttpConnect:连接到指定的主机。
WinHttpOpenRequest:创建HTTP请求句柄。
WinHttpSendRequest:发送HTTP请求。
WinHttpReceiveResponse:接收HTTP响应。
WinHttpQueryHeaders:查询并打印响应状态码。
WinHttpQueryDataAvailable 和 WinHttpReadData:读取响应数据并写入文件。
编译后运行
1
crawlerc.exe "https://i1.hdslb.com/bfs/archive/ff94fe95186a555e4662683164c6a3338d1a3d2e.jpg@100w_100h_1c.webp" "D:\\test\\CC++\\C+Windows+Pragrma\\C+Socks+demo\\x64\\Debug\\ff94fe95186a555e4662683164c6a3338d1a3d2e.jpg"
2.添加请求头 并解析链接中的文件名
1 | //crawlerc.cpp |
编译后运行
1 | crawlerc.exe temp0.exe "https://i1.hdslb.com/bfs/archive/ff94fe95186a555e4662683164c6a3338d1a3d2e.jpg@100w_100h_1c.webp" |
All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.
