ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 312
Committed: 2004-01-01T15:00:34-08:00 (21 years, 5 months ago) by douglas
File size: 16858 byte(s)
Log Message:
Updated copyright years.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2004, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // $Id: HttpHandler.cpp,v 1.27 2004/01/01 23:00:34 douglas Exp $
50
51 #include "HttpHandler.h"
52
53 // Lovely C Sockets!
54 #ifndef _WIN32
55 // BSD Sockets
56 #include <unistd.h>
57 #include <sys/types.h>
58 #include <sys/socket.h>
59 #include <netinet/in.h>
60 #include <netdb.h>
61
62 inline int closesocket(SOCKET s) { return close(s); }
63 #endif
64
65 HttpHandler::HttpHandler()
66 {
67 buffer = new char[BUFSIZ + 1];
68
69 #ifdef _WIN32
70 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
71 {
72 error(program + ": WSAStartup");
73 exit(1);
74 }
75 #endif // _WIN32
76
77 binary = false;
78 length = 0;
79 chunked = false;
80 #ifdef _OpenSSL_
81 tls = false;
82 #endif
83 }
84
85 HttpHandler::~HttpHandler()
86 {
87 delete [] buffer;
88
89 #ifdef _WIN32
90 WSACleanup();
91 #endif // _WIN32
92 }
93
94 bool HttpHandler::handle(URL &url, const string referer, bool head)
95 {
96 bool answer = false;
97
98 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
99 {
100 error(program + ": Socket");
101 exit(1);
102 }
103
104 sockaddr_in address;
105 hostent* host;
106
107 address.sin_family = AF_INET;
108
109 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
110 {
111 error(program + ": Host: " + url.getAddress(), true);
112 return answer;
113 }
114
115 address.sin_addr = *((in_addr*)*host->h_addr_list);
116 address.sin_port = htons(url.getPort());
117
118 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
119 SOCKET_ERROR)
120 {
121 error(program + ": Connect");
122 return answer;
123 }
124
125 #ifdef _OpenSSL_
126 if (url.getTls())
127 {
128 tls = true;
129
130 if (!starttls()) return answer;
131 }
132 #endif
133
134 if (head)
135 {
136 putline("HEAD " + url.getPath() + " HTTP/1.1");
137 }
138 else
139 {
140 putline("GET " + url.getPath() + " HTTP/1.1");
141 }
142
143 putline("Accept: text/html; text/plain");
144 #ifndef _OpenSSL_
145 putline("User-Agent: " + agent(true) + ' ' + platform());
146
147 if (url.getPort() == 80)
148 #else
149 putline("User-Agent: " + agent(true) + ' ' + platform() + ' '
150 + openssl(true));
151
152 if (url.getPort() == 80 && tls || url.getPort() == 443 && tls)
153 #endif
154 {
155 putline("Host: " + url.getAddress());
156 }
157 else
158 {
159 ostringstream port;
160
161 port << url.getPort();
162
163 putline("Host: " + url.getAddress() + ':' + port.str());
164 }
165
166 if (referer != "")
167 {
168 putline("Referer: " + referer);
169 }
170
171 putline("Connection: close");
172 putline();
173
174 code response;
175 string line;
176
177 do
178 {
179 line = getline();
180
181 if (line.find("HTTP/") != 0)
182 {
183 return answer;
184 }
185
186 unsigned dot = line.find('.');
187 unsigned space = line.find(' ');
188
189 unsigned major;
190 unsigned minor;
191
192 istringstream number(line.substr(5, dot - 5) + " " + line.substr(dot
193 + 1, space - dot - 1));
194
195 number >> major;
196 number >> minor;
197
198 if (major > 1)
199 {
200 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
201 major << "." << minor << "\n";
202
203 return answer;
204 }
205
206 number.clear();
207 number.str(line.substr(space + 1, 3));
208 number >> response;
209
210 if (response < ok) do line = getline(); while (line != "");
211 }
212 while (response < ok);
213
214 do
215 {
216 line = getline();
217
218 if (line != "")
219 {
220 unsigned colon = line.find(':');
221
222 string field = line.substr(0, colon);
223 string value = line.substr(colon + 1);
224
225 while (isspace(value[0])) value.erase(0, 1);
226
227 if (field == "Content-Type")
228 {
229 type = value;
230 }
231 else if (field == "Content-Length")
232 {
233 istringstream number(value);
234
235 number >> length;
236 }
237 else if (field == "Location")
238 {
239 location = value;
240 }
241 else if (field == "Transfer-Encoding")
242 {
243 chunked = value == "chunked";
244 }
245 }
246 }
247 while (line != "");
248
249 switch (response)
250 {
251 case ok:
252 if (debug) cerr << "response = " << response << "\n";
253 answer = true;
254 break;
255 case choices:
256 case moved:
257 case found:
258 if (debug) cerr << "response = " << response << "\n"
259 << "location = " << location << "\n";
260 location = getLink(location, url);
261 break;
262 case notfound:
263 case internal:
264 if (debug) cerr << "response = " << response << "\n";
265 break;
266 default:
267 if (debug) cerr << "response = " << response << "\n";
268 if (response <= 299)
269 {
270 answer = true;
271 }
272 else if (response <= 399)
273 {
274 location = getLink(location, url);
275 }
276 break;
277 }
278
279 if (!head && answer) populate();
280
281 return answer;
282 }
283
284 void HttpHandler::clear()
285 {
286 #ifdef _OpenSSL_
287 if (tls)
288 {
289 SSL_shutdown(ssl);
290 SSL_free(ssl);
291 SSL_CTX_free(ctx);
292 }
293 #endif
294
295 closesocket(http);
296
297 type = "";
298 length = 0;
299 location = "";
300 page.clear();
301 page.str("");
302 chunked = false;
303 #ifdef _OpenSSL_
304 tls = false;
305 #endif
306 }
307
308 void HttpHandler::populate()
309 {
310 if (!chunked)
311 {
312 unsigned left = length;
313
314 while (left > 0)
315 {
316 memset(buffer, 0, BUFSIZ + 1);
317
318 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
319 long received;
320
321 while (true)
322 {
323 #ifndef _OpenSSL_
324 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
325 {
326 error(program + ": Recv");
327 exit(1);
328 }
329 #else
330 if ((received = !tls ? recv(http, buffer, bytes, 0) :
331 SSL_read(ssl, buffer, bytes)) <= 0)
332 {
333 !tls ? error(program + ": Recv") : error(program +
334 ": SSL Read", int(received));
335 }
336 #endif
337 else if (received != bytes)
338 {
339 left -= received;
340 page << buffer;
341
342 memset(buffer, 0, BUFSIZ + 1);
343
344 bytes -= received;
345 }
346 else
347 {
348 break;
349 }
350 }
351
352 page << buffer;
353 left -= bytes;
354 }
355 }
356 else
357 {
358 unsigned chunk;
359
360 do
361 {
362 istringstream number(getline());
363
364 number.setf(ios_base::hex, ios_base::basefield);
365 number >> chunk;
366
367 unsigned left = chunk;
368
369 while (left > 0)
370 {
371 memset(buffer, 0, BUFSIZ + 1);
372
373 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
374 long received;
375
376 while (true)
377 {
378 #ifndef _OpenSSL_
379 if ((received = recv(http, buffer, bytes, 0)) ==
380 SOCKET_ERROR)
381 {
382 error(program + ": Recv");
383 exit(1);
384 }
385 #else
386 if ((received = !tls ? recv(http, buffer, bytes, 0) :
387 SSL_read(ssl, buffer, bytes)) <= 0)
388 {
389 !tls ? error(program + ": Recv") : error(program +
390 ": SSL Read", int(received));
391 exit(1);
392 }
393 #endif
394 else if (received != bytes)
395 {
396 left -= received;
397 page << buffer;
398
399 memset(buffer, 0, BUFSIZ + 1);
400
401 bytes -= received;
402 }
403 else
404 {
405 break;
406 }
407 }
408
409 page << buffer;
410 left -= bytes;
411 }
412
413 getline();
414 length += chunk;
415 }
416 while (chunk > 0);
417 }
418
419 if (!binary)
420 {
421 string page = this->page.str();
422
423 for (unsigned index = 0; index < page.length(); index++)
424 {
425 if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
426 1] == '\n' : false)
427 {
428 page.erase(index, 1);
429 }
430 else if (page[index] == '\r')
431 {
432 page[index] = '\n';
433 }
434 }
435
436 this->page.str(page);
437 }
438 }
439
440 void HttpHandler::putline(const string line)
441 {
442 sprintf(buffer, "%s\r\n", line.c_str());
443
444 #ifndef _OpenSSL_
445 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
446 {
447 error(program + ": Send");
448 exit(1);
449 }
450 #else
451 if (!tls)
452 {
453 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
454 {
455 error(program + ": Send");
456 exit(1);
457 }
458 }
459 else
460 {
461 int number;
462
463 if ((number = SSL_write(ssl, buffer, strlen(buffer))) <= 0)
464 {
465 error(program + ": SSL Write", number);
466 exit(1);
467 }
468 }
469 #endif
470 }
471
472 string HttpHandler::getline()
473 {
474 string line;
475 char byte;
476
477 do
478 {
479 #ifndef _OpenSSL_
480 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
481 {
482 error(program + ": Recv");
483 }
484 #else
485 if (!tls)
486 {
487 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
488 {
489 error(program + ": Recv");
490 }
491 }
492 else
493 {
494 int number;
495
496 if ((number = SSL_read(ssl, &byte, 1)) <= 0)
497 {
498 error(program + ": SSL Read", number);
499 }
500 }
501 #endif
502
503 if (byte != '\r' && byte != '\n')
504 {
505 line += byte;
506 }
507 }
508 while (byte != '\n');
509
510 return line;
511 }
512
513 void HttpHandler::error(const string& prefix, bool host)
514 {
515 #ifdef _WIN32
516 string error;
517
518 switch (WSAGetLastError())
519 {
520 case WSAEACCES:
521 error = "Permission denied";
522 break;
523 case WSAEADDRINUSE:
524 error = "Address already in use";
525 break;
526 case WSAEADDRNOTAVAIL:
527 error = "Cannot assign requested address";
528 break;
529 case WSAEAFNOSUPPORT:
530 error = "Address family not supported by protocol family";
531 break;
532 case WSAEALREADY:
533 error = "Operation already in progress";
534 break;
535 case WSAECONNABORTED:
536 error = "Software caused connection abort";
537 break;
538 case WSAECONNREFUSED:
539 error = "Connection refused";
540 break;
541 case WSAECONNRESET:
542 error = "Connection reset by peer";
543 break;
544 case WSAEDESTADDRREQ:
545 error = "Destination address required";
546 break;
547 case WSAEFAULT:
548 error = "Bad address";
549 break;
550 case WSAEHOSTDOWN:
551 error = "Host is down";
552 break;
553 case WSAEHOSTUNREACH:
554 error = "No route to host";
555 break;
556 case WSAEINPROGRESS:
557 error = "Operation now in progress";
558 break;
559 case WSAEINTR:
560 error = "Interrupted function call";
561 break;
562 case WSAEINVAL:
563 error = "Invalid argument";
564 break;
565 case WSAEISCONN:
566 error = "Socket is already connected";
567 break;
568 case WSAEMFILE:
569 error = "Too many open files";
570 break;
571 case WSAEMSGSIZE:
572 error = "Message too long";
573 break;
574 case WSAENETDOWN:
575 error = "Network is down";
576 break;
577 case WSAENETRESET:
578 error = "Network dropped connection on reset";
579 break;
580 case WSAENETUNREACH:
581 error = "Network is unreachable";
582 break;
583 case WSAENOBUFS:
584 error = "No buffer space available";
585 break;
586 case WSAENOPROTOOPT:
587 error = "Bad protocol option";
588 break;
589 case WSAENOTCONN:
590 error = "Socket is not connected";
591 break;
592 case WSAENOTSOCK:
593 error = "Socket operation on non-socket";
594 break;
595 case WSAEOPNOTSUPP:
596 error = "Operation not supported";
597 break;
598 case WSAEPFNOSUPPORT:
599 error = "Protocol family not supported";
600 break;
601 case WSAEPROCLIM:
602 error = "Too many processes";
603 break;
604 case WSAEPROTONOSUPPORT:
605 error = "Protocol not supported";
606 break;
607 case WSAEPROTOTYPE:
608 error = "Protocol wrong type for socket";
609 break;
610 case WSAESHUTDOWN:
611 error = "Cannot send after socket shutdown";
612 break;
613 case WSAESOCKTNOSUPPORT:
614 error = "Socket type not supported";
615 break;
616 case WSAETIMEDOUT:
617 error = "Connection timed out";
618 break;
619 case WSATYPE_NOT_FOUND:
620 error = "Class type not found";
621 break;
622 case WSAEWOULDBLOCK:
623 error = "Resource temporarily unavailable";
624 break;
625 case WSAHOST_NOT_FOUND:
626 error = "Host not found";
627 break;
628 case WSA_INVALID_HANDLE:
629 error = "Specified event object handle is invalid";
630 break;
631 case WSA_INVALID_PARAMETER:
632 error = "One or more parameters are invalid";
633 break;
634 // case WSAINVALIDPROCTABLE:
635 // error = "Invalid procedure table from service provider";
636 // break;
637 // case WSAINVALIDPROVIDER:
638 // error = "Invalid service provider version number";
639 // break;
640 case WSA_IO_INCOMPLETE:
641 error = "Overlapped I/O event object not in signaled state";
642 break;
643 case WSA_IO_PENDING:
644 error = "Overlapped operations will complete later";
645 break;
646 case WSA_NOT_ENOUGH_MEMORY:
647 error = "Insufficient memory available";
648 break;
649 case WSANOTINITIALISED:
650 error = "Successful WSAStartup not yet performed";
651 break;
652 case WSANO_DATA:
653 error = "Valid name, no data record of requested type";
654 break;
655 case WSANO_RECOVERY:
656 error = "This is a non-recoverable error";
657 break;
658 // case WSAPROVIDERFAILEDINIT:
659 // error = "Unable to initialize a service provider";
660 // break;
661 case WSASYSCALLFAILURE:
662 error = "System call failure";
663 break;
664 case WSASYSNOTREADY:
665 error = "Network subsystem is unavailable";
666 break;
667 case WSATRY_AGAIN:
668 error = "Non-authoritative host not found";
669 break;
670 case WSAVERNOTSUPPORTED:
671 error = "WINSOCK.DLL version out of range";
672 break;
673 case WSAEDISCON:
674 error = "Graceful shutdown in progress";
675 break;
676 case WSA_OPERATION_ABORTED:
677 error = "Overlapped operation aborted";
678 break;
679 default:
680 error = "Unknown error";
681 break;
682 }
683
684 cerr << prefix << ": " << error << "\n";
685 #else
686 if (host)
687 {
688 string error;
689
690 switch (h_errno)
691 {
692 case HOST_NOT_FOUND:
693 error = "Unknown host";
694 break;
695 case TRY_AGAIN:
696 error = "Host name lookup failure";
697 break;
698 case NO_RECOVERY:
699 error = "Unknown server error";
700 break;
701 case NO_DATA:
702 error = "No address associated with name";
703 break;
704 default:
705 error = "Unknown error";
706 break;
707 }
708
709 cerr << prefix << ": " << error << "\n";
710 }
711 else
712 {
713 perror(prefix.c_str());
714 }
715 #endif // _WIN32
716 }
717
718 #ifdef _OpenSSL_
719 void HttpHandler::error(const string& prefix, int number)
720 {
721 string error;
722
723 switch (SSL_get_error(ssl, number))
724 {
725 case SSL_ERROR_NONE:
726 error = "The TLS/SSL I/O operation completed";
727 break;
728 case SSL_ERROR_ZERO_RETURN:
729 error = "The TLS/SSL connection has been closed";
730 break;
731 case SSL_ERROR_WANT_READ:
732 case SSL_ERROR_WANT_WRITE:
733 case SSL_ERROR_WANT_CONNECT:
734 // case SSL_ERROR_WANT_ACCEPT:
735 case SSL_ERROR_WANT_X509_LOOKUP:
736 error = "The operation did not complete";
737 break;
738 case SSL_ERROR_SYSCALL:
739 if (int err = ERR_get_error() != 0)
740 {
741 error = ERR_reason_error_string(err);
742 }
743 else
744 {
745 switch (number)
746 {
747 case 0:
748 error = "An EOF was observed that violates the protocol";
749 break;
750 case -1:
751 this->error(prefix);
752 return;
753 default:
754 error = "Unknown error";
755 break;
756 }
757 }
758 break;
759 case SSL_ERROR_SSL:
760 error = ERR_reason_error_string(ERR_get_error());
761 break;
762 default:
763 error = "Unknown error";
764 break;
765 }
766
767 cerr << prefix << ": " << error << "\n";
768 }
769
770 bool HttpHandler::starttls()
771 {
772 SSL_load_error_strings();
773 SSL_library_init();
774
775 #ifndef _urandomdev_
776 int pid = getpid();
777 int now = time(NULL);
778
779 unsigned seed = now > pid ? now - pid : pid - now;
780
781 char* junk = new char[seed % 30 + 2];
782 junk[0] = pid;
783 junk[seed % 30 + 1] = now;
784
785 srand(seed);
786
787 for (int index = 1; index < seed % 30 + 1; index++)
788 {
789 junk[index] = rand();
790 }
791
792 if (debug)
793 {
794 cerr << "junk = {\n";
795
796 for (int index = 1; index < seed % 30 + 2; index++)
797 {
798 cerr << " [" << index << "] = " << int(junk[index]) << "\n";
799 }
800
801 cerr << "}\n";
802 }
803
804 RAND_seed(junk, seed % 30 + 2);
805
806 delete junk;
807 #else
808 if (debug) cerr << "junk = /dev/urandom\n";
809 #endif
810
811 ctx = SSL_CTX_new(TLSv1_client_method());
812
813 if (ctx == NULL)
814 {
815 cerr << program << ": SSL CTX New: "
816 << ERR_reason_error_string(ERR_get_error()) << "\n";
817 return false;
818 }
819
820 ssl = SSL_new(ctx);
821
822 if (SSL_set_fd(ssl, http) == 0)
823 {
824 cerr << program << ": SSL Set FD: "
825 << ERR_reason_error_string(ERR_get_error()) << "\n";
826 return false;
827 }
828
829 int number;
830
831 if ((number = SSL_connect(ssl)) <= 0)
832 {
833 error(program + ": SSL Connect", number);
834 return false;
835 }
836
837 return true;
838 }
839 #endif