ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 209
Committed: 2003-07-18T00:26:41-07:00 (21 years, 11 months ago) by douglas
File size: 15709 byte(s)
Log Message:
Oops, picky picky!

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // $Id: HttpHandler.cpp,v 1.21 2003/07/18 07:26:41 douglas Exp $
50
51 #include "HttpHandler.h"
52
53 // Lovely C Sockets!
54 #ifndef _WIN32
55 // BSD Sockets
56 #include <unistd.h>
57 #include <sys/types.h>
58 #include <sys/socket.h>
59 #include <netinet/in.h>
60 #include <netdb.h>
61
62 #define INVALID_SOCKET -1
63 #define SOCKET_ERROR -1
64
65 inline int closesocket(SOCKET s) { return close(s); }
66 #endif
67
68 HttpHandler::HttpHandler()
69 {
70 buffer = new char[BUFSIZ + 1];
71
72 #ifdef _WIN32
73 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
74 {
75 error(program + ": WSAStartup");
76 exit(1);
77 }
78 #endif // _WIN32
79
80 binary = false;
81 length = 0;
82 chunked = false;
83 #ifdef _OpenSSL_
84 tls = false;
85 #endif
86 }
87
88 HttpHandler::~HttpHandler()
89 {
90 delete [] buffer;
91
92 #ifdef _WIN32
93 WSACleanup();
94 #endif // _WIN32
95 }
96
97 bool HttpHandler::handle(URL &url, const string referer, bool head)
98 {
99 bool answer = false;
100
101 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
102 {
103 error(program + ": Socket");
104 exit(1);
105 }
106
107 sockaddr_in address;
108 hostent* host;
109
110 address.sin_family = AF_INET;
111
112 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
113 {
114 error(program + ": Host: " + url.getAddress(), true);
115 return answer;
116 }
117
118 address.sin_addr = *((in_addr*)*host->h_addr_list);
119 address.sin_port = htons(url.getPort());
120
121 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
122 SOCKET_ERROR)
123 {
124 error(program + ": Connect");
125 return answer;
126 }
127
128 #ifdef _OpenSSL_
129 if (url.getTls())
130 {
131 tls = true;
132
133 if (!starttls()) return answer;
134 }
135 #endif
136
137 if (head)
138 {
139 putline("HEAD " + url.getPath() + " HTTP/1.1");
140 }
141 else
142 {
143 putline("GET " + url.getPath() + " HTTP/1.1");
144 }
145
146 putline("Accept: text/html; text/plain");
147 #ifndef _OpenSSL_
148 putline("User-Agent: " + agent(true) + ' ' + platform());
149
150 if (url.getPort() == 80)
151 #else
152 putline("User-Agent: " + agent(true) + ' ' + platform() + ' '
153 + openssl(true));
154
155 if (url.getPort() == 80 && tls || url.getPort() == 443 && tls)
156 #endif
157 {
158 putline("Host: " + url.getAddress());
159 }
160 else
161 {
162 ostringstream port;
163
164 port << url.getPort();
165
166 putline("Host: " + url.getAddress() + ':' + port.str());
167 }
168
169 if (referer != "")
170 {
171 putline("Referer: " + referer);
172 }
173
174 putline("Connection: close");
175 putline();
176
177 code response;
178 string line;
179
180 do
181 {
182 line = getline();
183
184 if (line.find("HTTP/") != 0)
185 {
186 return answer;
187 }
188
189 unsigned dot = line.find('.');
190 unsigned space = line.find(' ');
191
192 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
193 unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
194 0, 10);
195
196 if (major > 1)
197 {
198 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
199 major << "." << minor << "\n";
200
201 return answer;
202 }
203
204 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
205
206 if (response < ok) do line = getline(); while (line != "");
207 }
208 while (response < ok);
209
210 do
211 {
212 line = getline();
213
214 if (line != "")
215 {
216 unsigned colon = line.find(':');
217
218 string field = line.substr(0, colon);
219 string value = line.substr(colon + 1);
220
221 while (isspace(value[0])) value.erase(0, 1);
222
223 if (field == "Content-Type")
224 {
225 type = value;
226 }
227 else if (field == "Content-Length")
228 {
229 length = strtoul(value.c_str(), 0, 10);
230 }
231 else if (field == "Location")
232 {
233 location = value;
234 }
235 else if (field == "Transfer-Encoding")
236 {
237 chunked = value == "chunked";
238 }
239 }
240 }
241 while (line != "");
242
243 switch (response)
244 {
245 case ok:
246 if (debug) cerr << "response = " << response << "\n";
247 answer = true;
248 break;
249 case choices:
250 case moved:
251 case found:
252 if (debug) cerr << "response = " << response << "\n"
253 << "location = " << location << "\n";
254 location = getLink(location, url);
255 break;
256 case notfound:
257 case internal:
258 if (debug) cerr << "response = " << response << "\n";
259 break;
260 default:
261 if (debug) cerr << "response = " << response << "\n";
262 if (response <= 299)
263 {
264 answer = true;
265 }
266 else if (response <= 399)
267 {
268 location = getLink(location, url);
269 }
270 break;
271 }
272
273 if (!head && answer) populate();
274
275 return answer;
276 }
277
278 void HttpHandler::clear()
279 {
280 if (tls)
281 {
282 SSL_shutdown(ssl);
283 SSL_free(ssl);
284 SSL_CTX_free(ctx);
285 }
286
287 closesocket(http);
288
289 type = "";
290 length = 0;
291 location = "";
292 page.clear();
293 page.str("");
294 chunked = false;
295 #ifdef _OpenSSL_
296 tls = false;
297 #endif
298 }
299
300 void HttpHandler::populate()
301 {
302 if (!chunked)
303 {
304 unsigned left = length;
305
306 while (left > 0)
307 {
308 memset(buffer, 0, BUFSIZ + 1);
309
310 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
311 long received;
312
313 while (true)
314 {
315 #ifndef _OpenSSL_
316 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
317 {
318 error(program + ": Recv");
319 exit(1);
320 }
321 #else
322 if ((received = !tls ? recv(http, buffer, bytes, 0) :
323 SSL_read(ssl, buffer, bytes)) <= 0)
324 {
325 !tls ? error(program + ": Recv") : error(program +
326 ": SSL Read", int(received));
327 }
328 #endif
329 else if (received != bytes)
330 {
331 left -= received;
332 page << buffer;
333
334 memset(buffer, 0, BUFSIZ + 1);
335
336 bytes -= received;
337 }
338 else
339 {
340 break;
341 }
342 }
343
344 page << buffer;
345 left -= bytes;
346 }
347 }
348 else
349 {
350 unsigned chunk;
351
352 do
353 {
354 chunk = strtoul(getline().c_str(), 0, 16);
355
356 unsigned left = chunk;
357
358 while (left > 0)
359 {
360 memset(buffer, 0, BUFSIZ + 1);
361
362 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
363 long received;
364
365 while (true)
366 {
367 #ifndef _OpenSSL_
368 if ((received = recv(http, buffer, bytes, 0)) ==
369 SOCKET_ERROR)
370 {
371 error(program + ": Recv");
372 exit(1);
373 }
374 #else
375 if ((received = !tls ? recv(http, buffer, bytes, 0) :
376 SSL_read(ssl, buffer, bytes)) <= 0)
377 {
378 !tls ? error(program + ": Recv") : error(program +
379 ": SSL Read", int(received));
380 exit(1);
381 }
382 #endif
383 else if (received != bytes)
384 {
385 left -= received;
386 page << buffer;
387
388 memset(buffer, 0, BUFSIZ + 1);
389
390 bytes -= received;
391 }
392 else
393 {
394 break;
395 }
396 }
397
398 page << buffer;
399 left -= bytes;
400 }
401
402 getline();
403 length += chunk;
404 }
405 while (chunk > 0);
406 }
407
408 if (!binary)
409 {
410 string page = this->page.str();
411
412 for (unsigned index = 0; index < page.length(); index++)
413 {
414 if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
415 1] == '\n' : false)
416 {
417 page.erase(index, 1);
418 }
419 else if (page[index] == '\r')
420 {
421 page[index] = '\n';
422 }
423 }
424
425 this->page.str(page);
426 }
427 }
428
429 void HttpHandler::putline(const string line)
430 {
431 sprintf(buffer, "%s\r\n", line.c_str());
432
433 #ifndef _OpenSSL_
434 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
435 {
436 error(program + ": Send");
437 exit(1);
438 }
439 #else
440 if (!tls)
441 {
442 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
443 {
444 error(program + ": Send");
445 exit(1);
446 }
447 }
448 else
449 {
450 int number;
451
452 if ((number = SSL_write(ssl, buffer, strlen(buffer))) <= 0)
453 {
454 error(program + ": SSL Write", number);
455 exit(1);
456 }
457 }
458 #endif
459 }
460
461 string HttpHandler::getline()
462 {
463 string line;
464 char byte;
465
466 do
467 {
468 #ifndef _OpenSSL_
469 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
470 {
471 error(program + ": Recv");
472 }
473 #else
474 if (!tls)
475 {
476 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
477 {
478 error(program + ": Recv");
479 }
480 }
481 else
482 {
483 int number;
484
485 if ((number = SSL_read(ssl, &byte, 1)) <= 0)
486 {
487 error(program + ": SSL Read", number);
488 }
489 }
490 #endif
491
492 if (byte != '\r' && byte != '\n')
493 {
494 line += byte;
495 }
496 }
497 while (byte != '\n');
498
499 return line;
500 }
501
502 void HttpHandler::error(const string& prefix, bool host)
503 {
504 #ifdef _WIN32
505 string error;
506
507 switch (WSAGetLastError())
508 {
509 case WSAEACCES:
510 error = "Permission denied";
511 break;
512 case WSAEADDRINUSE:
513 error = "Address already in use";
514 break;
515 case WSAEADDRNOTAVAIL:
516 error = "Cannot assign requested address";
517 break;
518 case WSAEAFNOSUPPORT:
519 error = "Address family not supported by protocol family";
520 break;
521 case WSAEALREADY:
522 error = "Operation already in progress";
523 break;
524 case WSAECONNABORTED:
525 error = "Software caused connection abort";
526 break;
527 case WSAECONNREFUSED:
528 error = "Connection refused";
529 break;
530 case WSAECONNRESET:
531 error = "Connection reset by peer";
532 break;
533 case WSAEDESTADDRREQ:
534 error = "Destination address required";
535 break;
536 case WSAEFAULT:
537 error = "Bad address";
538 break;
539 case WSAEHOSTDOWN:
540 error = "Host is down";
541 break;
542 case WSAEHOSTUNREACH:
543 error = "No route to host";
544 break;
545 case WSAEINPROGRESS:
546 error = "Operation now in progress";
547 break;
548 case WSAEINTR:
549 error = "Interrupted function call";
550 break;
551 case WSAEINVAL:
552 error = "Invalid argument";
553 break;
554 case WSAEISCONN:
555 error = "Socket is already connected";
556 break;
557 case WSAEMFILE:
558 error = "Too many open files";
559 break;
560 case WSAEMSGSIZE:
561 error = "Message too long";
562 break;
563 case WSAENETDOWN:
564 error = "Network is down";
565 break;
566 case WSAENETRESET:
567 error = "Network dropped connection on reset";
568 break;
569 case WSAENETUNREACH:
570 error = "Network is unreachable";
571 break;
572 case WSAENOBUFS:
573 error = "No buffer space available";
574 break;
575 case WSAENOPROTOOPT:
576 error = "Bad protocol option";
577 break;
578 case WSAENOTCONN:
579 error = "Socket is not connected";
580 break;
581 case WSAENOTSOCK:
582 error = "Socket operation on non-socket";
583 break;
584 case WSAEOPNOTSUPP:
585 error = "Operation not supported";
586 break;
587 case WSAEPFNOSUPPORT:
588 error = "Protocol family not supported";
589 break;
590 case WSAEPROCLIM:
591 error = "Too many processes";
592 break;
593 case WSAEPROTONOSUPPORT:
594 error = "Protocol not supported";
595 break;
596 case WSAEPROTOTYPE:
597 error = "Protocol wrong type for socket";
598 break;
599 case WSAESHUTDOWN:
600 error = "Cannot send after socket shutdown";
601 break;
602 case WSAESOCKTNOSUPPORT:
603 error = "Socket type not supported";
604 break;
605 case WSAETIMEDOUT:
606 error = "Connection timed out";
607 break;
608 case WSATYPE_NOT_FOUND:
609 error = "Class type not found";
610 break;
611 case WSAEWOULDBLOCK:
612 error = "Resource temporarily unavailable";
613 break;
614 case WSAHOST_NOT_FOUND:
615 error = "Host not found";
616 break;
617 case WSA_INVALID_HANDLE:
618 error = "Specified event object handle is invalid";
619 break;
620 case WSA_INVALID_PARAMETER:
621 error = "One or more parameters are invalid";
622 break;
623 // case WSAINVALIDPROCTABLE:
624 // error = "Invalid procedure table from service provider";
625 // break;
626 // case WSAINVALIDPROVIDER:
627 // error = "Invalid service provider version number";
628 // break;
629 case WSA_IO_INCOMPLETE:
630 error = "Overlapped I/O event object not in signaled state";
631 break;
632 case WSA_IO_PENDING:
633 error = "Overlapped operations will complete later";
634 break;
635 case WSA_NOT_ENOUGH_MEMORY:
636 error = "Insufficient memory available";
637 break;
638 case WSANOTINITIALISED:
639 error = "Successful WSAStartup not yet performed";
640 break;
641 case WSANO_DATA:
642 error = "Valid name, no data record of requested type";
643 break;
644 case WSANO_RECOVERY:
645 error = "This is a non-recoverable error";
646 break;
647 // case WSAPROVIDERFAILEDINIT:
648 // error = "Unable to initialize a service provider";
649 // break;
650 case WSASYSCALLFAILURE:
651 error = "System call failure";
652 break;
653 case WSASYSNOTREADY:
654 error = "Network subsystem is unavailable";
655 break;
656 case WSATRY_AGAIN:
657 error = "Non-authoritative host not found";
658 break;
659 case WSAVERNOTSUPPORTED:
660 error = "WINSOCK.DLL version out of range";
661 break;
662 case WSAEDISCON:
663 error = "Graceful shutdown in progress";
664 break;
665 case WSA_OPERATION_ABORTED:
666 error = "Overlapped operation aborted";
667 break;
668 default:
669 error = "Unknown error";
670 break;
671 }
672
673 cerr << prefix << ": " << error << "\n";
674 #else
675 if (host)
676 {
677 string error;
678
679 switch (h_errno)
680 {
681 case HOST_NOT_FOUND:
682 error = "Unknown host";
683 break;
684 case TRY_AGAIN:
685 error = "Host name lookup failure";
686 break;
687 case NO_RECOVERY:
688 error = "Unknown server error";
689 break;
690 case NO_DATA:
691 error = "No address associated with name";
692 break;
693 default:
694 error = "Unknown error";
695 break;
696 }
697
698 cerr << prefix << ": " << error << "\n";
699 }
700 else
701 {
702 perror(prefix.c_str());
703 }
704 #endif // _WIN32
705 }
706
707 #ifdef _OpenSSL_
708 void HttpHandler::error(const string& prefix, int number)
709 {
710 string error;
711
712 switch (SSL_get_error(ssl, number))
713 {
714 case SSL_ERROR_NONE:
715 error = "The TLS/SSL I/O operation completed";
716 break;
717 case SSL_ERROR_ZERO_RETURN:
718 error = "The TLS/SSL connection has been closed";
719 break;
720 case SSL_ERROR_WANT_READ:
721 case SSL_ERROR_WANT_WRITE:
722 case SSL_ERROR_WANT_CONNECT:
723 // case SSL_ERROR_WANT_ACCEPT:
724 case SSL_ERROR_WANT_X509_LOOKUP:
725 error = "The operation did not complete";
726 break;
727 case SSL_ERROR_SYSCALL:
728 if (int err = ERR_get_error() != 0)
729 {
730 error = ERR_reason_error_string(err);
731 }
732 else
733 {
734 switch (number)
735 {
736 case 0:
737 error = "An EOF was observed that violates the protocol";
738 break;
739 case -1:
740 this->error(prefix);
741 return;
742 default:
743 error = "Unknown error";
744 break;
745 }
746 }
747 break;
748 case SSL_ERROR_SSL:
749 error = ERR_reason_error_string(ERR_get_error());
750 break;
751 default:
752 error = "Unknown error";
753 break;
754 }
755
756 cerr << prefix << ": " << error << "\n";
757 }
758
759 bool HttpHandler::starttls()
760 {
761 SSL_load_error_strings();
762 SSL_library_init();
763
764 //
765
766 return true;
767 }
768 #endif