ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 211
Committed: 2003-07-19T18:46:12-07:00 (21 years, 11 months ago) by douglas
File size: 16618 byte(s)
Log Message:
OpenSSL stuff should work, made string tolower inline function, fixed size
output in Outputer.body().

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // $Id: HttpHandler.cpp,v 1.22 2003/07/20 01:46:12 douglas Exp $
50
51 #include "HttpHandler.h"
52
53 // Lovely C Sockets!
54 #ifndef _WIN32
55 // BSD Sockets
56 #include <unistd.h>
57 #include <sys/types.h>
58 #include <sys/socket.h>
59 #include <netinet/in.h>
60 #include <netdb.h>
61 #endif
62
63 HttpHandler::HttpHandler()
64 {
65 buffer = new char[BUFSIZ + 1];
66
67 #ifdef _WIN32
68 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
69 {
70 error(program + ": WSAStartup");
71 exit(1);
72 }
73 #endif // _WIN32
74
75 binary = false;
76 length = 0;
77 chunked = false;
78 #ifdef _OpenSSL_
79 tls = false;
80 #endif
81 }
82
83 HttpHandler::~HttpHandler()
84 {
85 delete [] buffer;
86
87 #ifdef _WIN32
88 WSACleanup();
89 #endif // _WIN32
90 }
91
92 bool HttpHandler::handle(URL &url, const string referer, bool head)
93 {
94 bool answer = false;
95
96 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
97 {
98 error(program + ": Socket");
99 exit(1);
100 }
101
102 sockaddr_in address;
103 hostent* host;
104
105 address.sin_family = AF_INET;
106
107 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
108 {
109 error(program + ": Host: " + url.getAddress(), true);
110 return answer;
111 }
112
113 address.sin_addr = *((in_addr*)*host->h_addr_list);
114 address.sin_port = htons(url.getPort());
115
116 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
117 SOCKET_ERROR)
118 {
119 error(program + ": Connect");
120 return answer;
121 }
122
123 #ifdef _OpenSSL_
124 if (url.getTls())
125 {
126 tls = true;
127
128 if (!starttls()) return answer;
129 }
130 #endif
131
132 if (head)
133 {
134 putline("HEAD " + url.getPath() + " HTTP/1.1");
135 }
136 else
137 {
138 putline("GET " + url.getPath() + " HTTP/1.1");
139 }
140
141 putline("Accept: text/html; text/plain");
142 #ifndef _OpenSSL_
143 putline("User-Agent: " + agent(true) + ' ' + platform());
144
145 if (url.getPort() == 80)
146 #else
147 putline("User-Agent: " + agent(true) + ' ' + platform() + ' '
148 + openssl(true));
149
150 if (url.getPort() == 80 && tls || url.getPort() == 443 && tls)
151 #endif
152 {
153 putline("Host: " + url.getAddress());
154 }
155 else
156 {
157 ostringstream port;
158
159 port << url.getPort();
160
161 putline("Host: " + url.getAddress() + ':' + port.str());
162 }
163
164 if (referer != "")
165 {
166 putline("Referer: " + referer);
167 }
168
169 putline("Connection: close");
170 putline();
171
172 code response;
173 string line;
174
175 do
176 {
177 line = getline();
178
179 if (line.find("HTTP/") != 0)
180 {
181 return answer;
182 }
183
184 unsigned dot = line.find('.');
185 unsigned space = line.find(' ');
186
187 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
188 unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
189 0, 10);
190
191 if (major > 1)
192 {
193 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
194 major << "." << minor << "\n";
195
196 return answer;
197 }
198
199 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
200
201 if (response < ok) do line = getline(); while (line != "");
202 }
203 while (response < ok);
204
205 do
206 {
207 line = getline();
208
209 if (line != "")
210 {
211 unsigned colon = line.find(':');
212
213 string field = line.substr(0, colon);
214 string value = line.substr(colon + 1);
215
216 while (isspace(value[0])) value.erase(0, 1);
217
218 if (field == "Content-Type")
219 {
220 type = value;
221 }
222 else if (field == "Content-Length")
223 {
224 length = strtoul(value.c_str(), 0, 10);
225 }
226 else if (field == "Location")
227 {
228 location = value;
229 }
230 else if (field == "Transfer-Encoding")
231 {
232 chunked = value == "chunked";
233 }
234 }
235 }
236 while (line != "");
237
238 switch (response)
239 {
240 case ok:
241 if (debug) cerr << "response = " << response << "\n";
242 answer = true;
243 break;
244 case choices:
245 case moved:
246 case found:
247 if (debug) cerr << "response = " << response << "\n"
248 << "location = " << location << "\n";
249 location = getLink(location, url);
250 break;
251 case notfound:
252 case internal:
253 if (debug) cerr << "response = " << response << "\n";
254 break;
255 default:
256 if (debug) cerr << "response = " << response << "\n";
257 if (response <= 299)
258 {
259 answer = true;
260 }
261 else if (response <= 399)
262 {
263 location = getLink(location, url);
264 }
265 break;
266 }
267
268 if (!head && answer) populate();
269
270 return answer;
271 }
272
273 void HttpHandler::clear()
274 {
275 if (tls)
276 {
277 SSL_shutdown(ssl);
278 SSL_free(ssl);
279 SSL_CTX_free(ctx);
280 }
281
282 closesocket(http);
283
284 type = "";
285 length = 0;
286 location = "";
287 page.clear();
288 page.str("");
289 chunked = false;
290 #ifdef _OpenSSL_
291 tls = false;
292 #endif
293 }
294
295 void HttpHandler::populate()
296 {
297 if (!chunked)
298 {
299 unsigned left = length;
300
301 while (left > 0)
302 {
303 memset(buffer, 0, BUFSIZ + 1);
304
305 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
306 long received;
307
308 while (true)
309 {
310 #ifndef _OpenSSL_
311 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
312 {
313 error(program + ": Recv");
314 exit(1);
315 }
316 #else
317 if ((received = !tls ? recv(http, buffer, bytes, 0) :
318 SSL_read(ssl, buffer, bytes)) <= 0)
319 {
320 !tls ? error(program + ": Recv") : error(program +
321 ": SSL Read", int(received));
322 }
323 #endif
324 else if (received != bytes)
325 {
326 left -= received;
327 page << buffer;
328
329 memset(buffer, 0, BUFSIZ + 1);
330
331 bytes -= received;
332 }
333 else
334 {
335 break;
336 }
337 }
338
339 page << buffer;
340 left -= bytes;
341 }
342 }
343 else
344 {
345 unsigned chunk;
346
347 do
348 {
349 chunk = strtoul(getline().c_str(), 0, 16);
350
351 unsigned left = chunk;
352
353 while (left > 0)
354 {
355 memset(buffer, 0, BUFSIZ + 1);
356
357 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
358 long received;
359
360 while (true)
361 {
362 #ifndef _OpenSSL_
363 if ((received = recv(http, buffer, bytes, 0)) ==
364 SOCKET_ERROR)
365 {
366 error(program + ": Recv");
367 exit(1);
368 }
369 #else
370 if ((received = !tls ? recv(http, buffer, bytes, 0) :
371 SSL_read(ssl, buffer, bytes)) <= 0)
372 {
373 !tls ? error(program + ": Recv") : error(program +
374 ": SSL Read", int(received));
375 exit(1);
376 }
377 #endif
378 else if (received != bytes)
379 {
380 left -= received;
381 page << buffer;
382
383 memset(buffer, 0, BUFSIZ + 1);
384
385 bytes -= received;
386 }
387 else
388 {
389 break;
390 }
391 }
392
393 page << buffer;
394 left -= bytes;
395 }
396
397 getline();
398 length += chunk;
399 }
400 while (chunk > 0);
401 }
402
403 if (!binary)
404 {
405 string page = this->page.str();
406
407 for (unsigned index = 0; index < page.length(); index++)
408 {
409 if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
410 1] == '\n' : false)
411 {
412 page.erase(index, 1);
413 }
414 else if (page[index] == '\r')
415 {
416 page[index] = '\n';
417 }
418 }
419
420 this->page.str(page);
421 }
422 }
423
424 void HttpHandler::putline(const string line)
425 {
426 sprintf(buffer, "%s\r\n", line.c_str());
427
428 #ifndef _OpenSSL_
429 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
430 {
431 error(program + ": Send");
432 exit(1);
433 }
434 #else
435 if (!tls)
436 {
437 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
438 {
439 error(program + ": Send");
440 exit(1);
441 }
442 }
443 else
444 {
445 int number;
446
447 if ((number = SSL_write(ssl, buffer, strlen(buffer))) <= 0)
448 {
449 error(program + ": SSL Write", number);
450 exit(1);
451 }
452 }
453 #endif
454 }
455
456 string HttpHandler::getline()
457 {
458 string line;
459 char byte;
460
461 do
462 {
463 #ifndef _OpenSSL_
464 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
465 {
466 error(program + ": Recv");
467 }
468 #else
469 if (!tls)
470 {
471 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
472 {
473 error(program + ": Recv");
474 }
475 }
476 else
477 {
478 int number;
479
480 if ((number = SSL_read(ssl, &byte, 1)) <= 0)
481 {
482 error(program + ": SSL Read", number);
483 }
484 }
485 #endif
486
487 if (byte != '\r' && byte != '\n')
488 {
489 line += byte;
490 }
491 }
492 while (byte != '\n');
493
494 return line;
495 }
496
497 void HttpHandler::error(const string& prefix, bool host)
498 {
499 #ifdef _WIN32
500 string error;
501
502 switch (WSAGetLastError())
503 {
504 case WSAEACCES:
505 error = "Permission denied";
506 break;
507 case WSAEADDRINUSE:
508 error = "Address already in use";
509 break;
510 case WSAEADDRNOTAVAIL:
511 error = "Cannot assign requested address";
512 break;
513 case WSAEAFNOSUPPORT:
514 error = "Address family not supported by protocol family";
515 break;
516 case WSAEALREADY:
517 error = "Operation already in progress";
518 break;
519 case WSAECONNABORTED:
520 error = "Software caused connection abort";
521 break;
522 case WSAECONNREFUSED:
523 error = "Connection refused";
524 break;
525 case WSAECONNRESET:
526 error = "Connection reset by peer";
527 break;
528 case WSAEDESTADDRREQ:
529 error = "Destination address required";
530 break;
531 case WSAEFAULT:
532 error = "Bad address";
533 break;
534 case WSAEHOSTDOWN:
535 error = "Host is down";
536 break;
537 case WSAEHOSTUNREACH:
538 error = "No route to host";
539 break;
540 case WSAEINPROGRESS:
541 error = "Operation now in progress";
542 break;
543 case WSAEINTR:
544 error = "Interrupted function call";
545 break;
546 case WSAEINVAL:
547 error = "Invalid argument";
548 break;
549 case WSAEISCONN:
550 error = "Socket is already connected";
551 break;
552 case WSAEMFILE:
553 error = "Too many open files";
554 break;
555 case WSAEMSGSIZE:
556 error = "Message too long";
557 break;
558 case WSAENETDOWN:
559 error = "Network is down";
560 break;
561 case WSAENETRESET:
562 error = "Network dropped connection on reset";
563 break;
564 case WSAENETUNREACH:
565 error = "Network is unreachable";
566 break;
567 case WSAENOBUFS:
568 error = "No buffer space available";
569 break;
570 case WSAENOPROTOOPT:
571 error = "Bad protocol option";
572 break;
573 case WSAENOTCONN:
574 error = "Socket is not connected";
575 break;
576 case WSAENOTSOCK:
577 error = "Socket operation on non-socket";
578 break;
579 case WSAEOPNOTSUPP:
580 error = "Operation not supported";
581 break;
582 case WSAEPFNOSUPPORT:
583 error = "Protocol family not supported";
584 break;
585 case WSAEPROCLIM:
586 error = "Too many processes";
587 break;
588 case WSAEPROTONOSUPPORT:
589 error = "Protocol not supported";
590 break;
591 case WSAEPROTOTYPE:
592 error = "Protocol wrong type for socket";
593 break;
594 case WSAESHUTDOWN:
595 error = "Cannot send after socket shutdown";
596 break;
597 case WSAESOCKTNOSUPPORT:
598 error = "Socket type not supported";
599 break;
600 case WSAETIMEDOUT:
601 error = "Connection timed out";
602 break;
603 case WSATYPE_NOT_FOUND:
604 error = "Class type not found";
605 break;
606 case WSAEWOULDBLOCK:
607 error = "Resource temporarily unavailable";
608 break;
609 case WSAHOST_NOT_FOUND:
610 error = "Host not found";
611 break;
612 case WSA_INVALID_HANDLE:
613 error = "Specified event object handle is invalid";
614 break;
615 case WSA_INVALID_PARAMETER:
616 error = "One or more parameters are invalid";
617 break;
618 // case WSAINVALIDPROCTABLE:
619 // error = "Invalid procedure table from service provider";
620 // break;
621 // case WSAINVALIDPROVIDER:
622 // error = "Invalid service provider version number";
623 // break;
624 case WSA_IO_INCOMPLETE:
625 error = "Overlapped I/O event object not in signaled state";
626 break;
627 case WSA_IO_PENDING:
628 error = "Overlapped operations will complete later";
629 break;
630 case WSA_NOT_ENOUGH_MEMORY:
631 error = "Insufficient memory available";
632 break;
633 case WSANOTINITIALISED:
634 error = "Successful WSAStartup not yet performed";
635 break;
636 case WSANO_DATA:
637 error = "Valid name, no data record of requested type";
638 break;
639 case WSANO_RECOVERY:
640 error = "This is a non-recoverable error";
641 break;
642 // case WSAPROVIDERFAILEDINIT:
643 // error = "Unable to initialize a service provider";
644 // break;
645 case WSASYSCALLFAILURE:
646 error = "System call failure";
647 break;
648 case WSASYSNOTREADY:
649 error = "Network subsystem is unavailable";
650 break;
651 case WSATRY_AGAIN:
652 error = "Non-authoritative host not found";
653 break;
654 case WSAVERNOTSUPPORTED:
655 error = "WINSOCK.DLL version out of range";
656 break;
657 case WSAEDISCON:
658 error = "Graceful shutdown in progress";
659 break;
660 case WSA_OPERATION_ABORTED:
661 error = "Overlapped operation aborted";
662 break;
663 default:
664 error = "Unknown error";
665 break;
666 }
667
668 cerr << prefix << ": " << error << "\n";
669 #else
670 if (host)
671 {
672 string error;
673
674 switch (h_errno)
675 {
676 case HOST_NOT_FOUND:
677 error = "Unknown host";
678 break;
679 case TRY_AGAIN:
680 error = "Host name lookup failure";
681 break;
682 case NO_RECOVERY:
683 error = "Unknown server error";
684 break;
685 case NO_DATA:
686 error = "No address associated with name";
687 break;
688 default:
689 error = "Unknown error";
690 break;
691 }
692
693 cerr << prefix << ": " << error << "\n";
694 }
695 else
696 {
697 perror(prefix.c_str());
698 }
699 #endif // _WIN32
700 }
701
702 #ifdef _OpenSSL_
703 void HttpHandler::error(const string& prefix, int number)
704 {
705 string error;
706
707 switch (SSL_get_error(ssl, number))
708 {
709 case SSL_ERROR_NONE:
710 error = "The TLS/SSL I/O operation completed";
711 break;
712 case SSL_ERROR_ZERO_RETURN:
713 error = "The TLS/SSL connection has been closed";
714 break;
715 case SSL_ERROR_WANT_READ:
716 case SSL_ERROR_WANT_WRITE:
717 case SSL_ERROR_WANT_CONNECT:
718 // case SSL_ERROR_WANT_ACCEPT:
719 case SSL_ERROR_WANT_X509_LOOKUP:
720 error = "The operation did not complete";
721 break;
722 case SSL_ERROR_SYSCALL:
723 if (int err = ERR_get_error() != 0)
724 {
725 error = ERR_reason_error_string(err);
726 }
727 else
728 {
729 switch (number)
730 {
731 case 0:
732 error = "An EOF was observed that violates the protocol";
733 break;
734 case -1:
735 this->error(prefix);
736 return;
737 default:
738 error = "Unknown error";
739 break;
740 }
741 }
742 break;
743 case SSL_ERROR_SSL:
744 error = ERR_reason_error_string(ERR_get_error());
745 break;
746 default:
747 error = "Unknown error";
748 break;
749 }
750
751 cerr << prefix << ": " << error << "\n";
752 }
753
754 bool HttpHandler::starttls()
755 {
756 SSL_load_error_strings();
757 SSL_library_init();
758
759 #ifndef _urandomdev_
760 int pid = getpid();
761 int now = time(NULL);
762
763 unsigned seed = now > pid ? now - pid : pid - now;
764
765 char* junk = new char[seed % 30 + 2];
766 junk[0] = pid;
767 junk[seed % 30 + 1] = now;
768
769 srand(seed);
770
771 for (int index = 1; index < seed % 30 + 1; index++)
772 {
773 junk[index] = rand();
774 }
775
776 if (debug)
777 {
778 cerr << "junk = {\n";
779
780 for (int index = 1; index < seed % 30 + 2; index++)
781 {
782 cerr << " [" << index << "] = " << int(junk[index]) << "\n";
783 }
784
785 cerr << "}\n";
786 }
787
788 RAND_seed(junk, seed % 30 + 2);
789
790 delete junk;
791 #endif
792
793 ctx = SSL_CTX_new(TLSv1_client_method());
794
795 if (ctx == NULL)
796 {
797 cerr << program << ": SSL CTX New: "
798 << ERR_reason_error_string(ERR_get_error()) << "\n";
799 return false;
800 }
801
802 ssl = SSL_new(ctx);
803
804 if (SSL_set_fd(ssl, http) == 0)
805 {
806 cerr << program << ": SSL Set FD: "
807 << ERR_reason_error_string(ERR_get_error()) << "\n";
808 return false;
809 }
810
811 int number;
812
813 if ((number = SSL_connect(ssl)) <= 0)
814 {
815 error(program + ": SSL Connect", number);
816 return false;
817 }
818
819 return true;
820 }
821 #endif