ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 201
Committed: 2003-07-15T01:01:00-07:00 (21 years, 11 months ago) by douglas
File size: 13477 byte(s)
Log Message:
Moved openssl version function to HttpHandler.h and fixed agent string.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // $Id: HttpHandler.cpp,v 1.19 2003/07/15 08:01:00 douglas Exp $
50
51 #include "HttpHandler.h"
52
53 // Lovely C Sockets!
54 #ifndef _WIN32
55 // BSD Sockets
56 #include <unistd.h>
57 #include <sys/types.h>
58 #include <sys/socket.h>
59 #include <netinet/in.h>
60 #include <netdb.h>
61
62 #define INVALID_SOCKET -1
63 #define SOCKET_ERROR -1
64
65 inline int closesocket(SOCKET s) { return close(s); }
66 #endif
67
68 HttpHandler::HttpHandler()
69 {
70 buffer = new char[BUFSIZ + 1];
71
72 #ifdef _WIN32
73 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
74 {
75 error(program + ": WSAStartup");
76 exit(1);
77 }
78 #endif // _WIN32
79
80 length = 0;
81 chunked = false;
82 #ifdef _OpenSSL_
83 tls = false;
84 #endif
85 }
86
87 HttpHandler::~HttpHandler()
88 {
89 delete [] buffer;
90
91 #ifdef _WIN32
92 WSACleanup();
93 #endif // _WIN32
94 }
95
96 bool HttpHandler::handle(URL &url, const string referer, bool head)
97 {
98 bool answer = false;
99
100 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
101 {
102 error(program + ": Socket");
103 exit(1);
104 }
105
106 sockaddr_in address;
107 hostent* host;
108
109 address.sin_family = AF_INET;
110
111 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
112 {
113 error(program + ": Host: " + url.getAddress(), true);
114 return answer;
115 }
116
117 address.sin_addr = *((in_addr*)*host->h_addr_list);
118 address.sin_port = htons(url.getPort());
119
120 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
121 SOCKET_ERROR)
122 {
123 error(program + ": Connect");
124 return answer;
125 }
126
127 if (head)
128 {
129 putline("HEAD " + url.getPath() + " HTTP/1.1");
130 }
131 else
132 {
133 putline("GET " + url.getPath() + " HTTP/1.1");
134 }
135
136 putline("Accept: text/html; text/plain");
137 #ifndef _OpenSSL_
138 putline("User-Agent: " + agent(true) + ' ' + platform());
139 #else
140 putline("User-Agent: " + agent(true) + ' ' + platform() + ' '
141 + openssl(true));
142 #endif
143
144 if (url.getPort() == 80)
145 {
146 putline("Host: " + url.getAddress());
147 }
148 else
149 {
150 char* port = new char[1024];
151 sprintf(port, "%u", url.getPort());
152
153 putline("Host: " + url.getAddress() + ':' + port);
154
155 delete [] port;
156 }
157
158 if (referer != "")
159 {
160 putline("Referer: " + referer);
161 }
162
163 putline("Connection: close");
164 putline();
165
166 code response;
167 string line;
168
169 do
170 {
171 line = getline();
172
173 if (line.find("HTTP/") != 0)
174 {
175 return answer;
176 }
177
178 unsigned dot = line.find('.');
179 unsigned space = line.find(' ');
180
181 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
182 unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
183 0, 10);
184
185 if (major > 1)
186 {
187 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
188 major << "." << minor << "\n";
189
190 return answer;
191 }
192
193 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
194
195 if (response < ok) do line = getline(); while (line != "");
196 }
197 while (response < ok);
198
199 do
200 {
201 line = getline();
202
203 if (line != "")
204 {
205 unsigned colon = line.find(':');
206
207 string field = line.substr(0, colon);
208 string value = line.substr(colon + 1);
209
210 while (isspace(value[0])) value.erase(0, 1);
211
212 if (field == "Content-Type")
213 {
214 type = value;
215 }
216 else if (field == "Content-Length")
217 {
218 length = strtoul(value.c_str(), 0, 10);
219 }
220 else if (field == "Location")
221 {
222 location = value;
223 }
224 else if (field == "Transfer-Encoding")
225 {
226 chunked = value == "chunked";
227 }
228 }
229 }
230 while (line != "");
231
232 switch (response)
233 {
234 case ok:
235 if (debug) cerr << "response = " << response << "\n";
236 answer = true;
237 break;
238 case choices:
239 case moved:
240 case found:
241 if (debug) cerr << "response = " << response << "\n"
242 << "location = " << location << "\n";
243 location = getLink(location, url);
244 break;
245 case notfound:
246 case internal:
247 if (debug) cerr << "response = " << response << "\n";
248 break;
249 default:
250 if (debug) cerr << "response = " << response << "\n";
251 if (response <= 299)
252 {
253 answer = true;
254 }
255 else if (response <= 399)
256 {
257 location = getLink(location, url);
258 }
259 break;
260 }
261
262 if (!head && answer) populate();
263
264 return answer;
265 }
266
267 HttpHandler& HttpHandler::getline(string& line, char endline)
268 {
269 unsigned end = page.find(endline);
270 unsigned newline = page.find('\n');
271
272 if (newline < end || end == string::npos)
273 {
274 end = newline;
275 }
276
277 line = page.substr(0, end);
278 page.erase(0, (end == string::npos ? end : end + 1));
279
280 return *this;
281 }
282
283 void HttpHandler::clear()
284 {
285 closesocket(http);
286
287 type = "";
288 length = 0;
289 location = "";
290 page = "";
291 chunked = false;
292 #ifdef _OpenSSL_
293 tls = false;
294 #endif
295 }
296
297 void HttpHandler::populate()
298 {
299 if (!chunked)
300 {
301 unsigned left = length;
302
303 while (left > 0)
304 {
305 memset(buffer, 0, BUFSIZ + 1);
306
307 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
308 unsigned received;
309
310 while (true)
311 {
312 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
313 {
314 error(program + ": Recv");
315 exit(1);
316 }
317 else if (received != bytes)
318 {
319 left -= received;
320 page += buffer;
321
322 memset(buffer, 0, BUFSIZ + 1);
323
324 bytes -= received;
325 }
326 else
327 {
328 break;
329 }
330 }
331
332 page += buffer;
333 left -= bytes;
334 }
335 }
336 else
337 {
338 unsigned chunk;
339
340 do
341 {
342 chunk = strtoul(getline().c_str(), 0, 16);
343
344 unsigned left = chunk;
345
346 while (left > 0)
347 {
348 memset(buffer, 0, BUFSIZ + 1);
349
350 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
351 unsigned received;
352
353 while (true)
354 {
355 if ((received = recv(http, buffer, bytes, 0)) ==
356 SOCKET_ERROR)
357 {
358 error(program + ": Recv");
359 exit(1);
360 }
361 else if (received != bytes)
362 {
363 left -= received;
364 page += buffer;
365
366 memset(buffer, 0, BUFSIZ + 1);
367
368 bytes -= received;
369 }
370 else
371 {
372 break;
373 }
374 }
375
376 page += buffer;
377 left -= bytes;
378 }
379
380 getline();
381 length += chunk;
382 }
383 while (chunk > 0);
384 }
385
386 for (unsigned index = 0; index < page.length(); index++)
387 {
388 if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
389 1] == '\n' : false)
390 {
391 page.erase(index, 1);
392 }
393 else if (page[index] == '\r')
394 {
395 page[index] = '\n';
396 }
397 }
398 }
399
400 void HttpHandler::putline(const string line)
401 {
402 sprintf(buffer, "%s\r\n", line.c_str());
403 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
404 {
405 error(program + ": Send");
406 exit(1);
407 }
408 }
409
410 string HttpHandler::getline()
411 {
412 string line;
413 char byte;
414
415 do
416 {
417 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
418 {
419 error(program + ": Recv");
420 }
421
422 if (byte != '\r' && byte != '\n')
423 {
424 line += byte;
425 }
426 }
427 while (byte != '\n');
428
429 return line;
430 }
431
432 void HttpHandler::error(const string& prefix, bool host)
433 {
434 #ifdef _WIN32
435 string error;
436
437 switch (WSAGetLastError())
438 {
439 case WSAEACCES:
440 error = "Permission denied";
441 break;
442 case WSAEADDRINUSE:
443 error = "Address already in use";
444 break;
445 case WSAEADDRNOTAVAIL:
446 error = "Cannot assign requested address";
447 break;
448 case WSAEAFNOSUPPORT:
449 error = "Address family not supported by protocol family";
450 break;
451 case WSAEALREADY:
452 error = "Operation already in progress";
453 break;
454 case WSAECONNABORTED:
455 error = "Software caused connection abort";
456 break;
457 case WSAECONNREFUSED:
458 error = "Connection refused";
459 break;
460 case WSAECONNRESET:
461 error = "Connection reset by peer";
462 break;
463 case WSAEDESTADDRREQ:
464 error = "Destination address required";
465 break;
466 case WSAEFAULT:
467 error = "Bad address";
468 break;
469 case WSAEHOSTDOWN:
470 error = "Host is down";
471 break;
472 case WSAEHOSTUNREACH:
473 error = "No route to host";
474 break;
475 case WSAEINPROGRESS:
476 error = "Operation now in progress";
477 break;
478 case WSAEINTR:
479 error = "Interrupted function call";
480 break;
481 case WSAEINVAL:
482 error = "Invalid argument";
483 break;
484 case WSAEISCONN:
485 error = "Socket is already connected";
486 break;
487 case WSAEMFILE:
488 error = "Too many open files";
489 break;
490 case WSAEMSGSIZE:
491 error = "Message too long";
492 break;
493 case WSAENETDOWN:
494 error = "Network is down";
495 break;
496 case WSAENETRESET:
497 error = "Network dropped connection on reset";
498 break;
499 case WSAENETUNREACH:
500 error = "Network is unreachable";
501 break;
502 case WSAENOBUFS:
503 error = "No buffer space available";
504 break;
505 case WSAENOPROTOOPT:
506 error = "Bad protocol option";
507 break;
508 case WSAENOTCONN:
509 error = "Socket is not connected";
510 break;
511 case WSAENOTSOCK:
512 error = "Socket operation on non-socket";
513 break;
514 case WSAEOPNOTSUPP:
515 error = "Operation not supported";
516 break;
517 case WSAEPFNOSUPPORT:
518 error = "Protocol family not supported";
519 break;
520 case WSAEPROCLIM:
521 error = "Too many processes";
522 break;
523 case WSAEPROTONOSUPPORT:
524 error = "Protocol not supported";
525 break;
526 case WSAEPROTOTYPE:
527 error = "Protocol wrong type for socket";
528 break;
529 case WSAESHUTDOWN:
530 error = "Cannot send after socket shutdown";
531 break;
532 case WSAESOCKTNOSUPPORT:
533 error = "Socket type not supported";
534 break;
535 case WSAETIMEDOUT:
536 error = "Connection timed out";
537 break;
538 case WSATYPE_NOT_FOUND:
539 error = "Class type not found";
540 break;
541 case WSAEWOULDBLOCK:
542 error = "Resource temporarily unavailable";
543 break;
544 case WSAHOST_NOT_FOUND:
545 error = "Host not found";
546 break;
547 case WSA_INVALID_HANDLE:
548 error = "Specified event object handle is invalid";
549 break;
550 case WSA_INVALID_PARAMETER:
551 error = "One or more parameters are invalid";
552 break;
553 // case WSAINVALIDPROCTABLE:
554 // error = "Invalid procedure table from service provider";
555 // break;
556 // case WSAINVALIDPROVIDER:
557 // error = "Invalid service provider version number";
558 // break;
559 case WSA_IO_INCOMPLETE:
560 error = "Overlapped I/O event object not in signaled state";
561 break;
562 case WSA_IO_PENDING:
563 error = "Overlapped operations will complete later";
564 break;
565 case WSA_NOT_ENOUGH_MEMORY:
566 error = "Insufficient memory available";
567 break;
568 case WSANOTINITIALISED:
569 error = "Successful WSAStartup not yet performed";
570 break;
571 case WSANO_DATA:
572 error = "Valid name, no data record of requested type";
573 break;
574 case WSANO_RECOVERY:
575 error = "This is a non-recoverable error";
576 break;
577 // case WSAPROVIDERFAILEDINIT:
578 // error = "Unable to initialize a service provider";
579 // break;
580 case WSASYSCALLFAILURE:
581 error = "System call failure";
582 break;
583 case WSASYSNOTREADY:
584 error = "Network subsystem is unavailable";
585 break;
586 case WSATRY_AGAIN:
587 error = "Non-authoritative host not found";
588 break;
589 case WSAVERNOTSUPPORTED:
590 error = "WINSOCK.DLL version out of range";
591 break;
592 case WSAEDISCON:
593 error = "Graceful shutdown in progress";
594 break;
595 case WSA_OPERATION_ABORTED:
596 error = "Overlapped operation aborted";
597 break;
598 default:
599 error = "Unknown error";
600 break;
601 }
602
603 cerr << prefix << ": " << error << "\n";
604 #else
605 if (host)
606 {
607 string error;
608
609 switch (h_errno)
610 {
611 case HOST_NOT_FOUND:
612 error = "Unknown host";
613 break;
614 case TRY_AGAIN:
615 error = "Host name lookup failure";
616 break;
617 case NO_RECOVERY:
618 error = "Unknown server error";
619 break;
620 case NO_DATA:
621 error = "No address associated with name";
622 break;
623 default:
624 error = "Unknown error";
625 break;
626 }
627
628 cerr << prefix << ": " << error << "\n";
629 }
630 else
631 {
632 perror(prefix.c_str());
633 }
634 #endif // _WIN32
635 }