ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 212
Committed: 2003-07-19T21:00:25-07:00 (21 years, 11 months ago) by douglas
File size: 16924 byte(s)
Log Message:
Replaced some strtoul functions with istringstream stuff, and fixed Outputer
stuff.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // $Id: HttpHandler.cpp,v 1.23 2003/07/20 04:00:25 douglas Exp $
50
51 #include "HttpHandler.h"
52
53 // Lovely C Sockets!
54 #ifndef _WIN32
55 // BSD Sockets
56 #include <unistd.h>
57 #include <sys/types.h>
58 #include <sys/socket.h>
59 #include <netinet/in.h>
60 #include <netdb.h>
61
62 inline int closesocket(SOCKET s) { return close(s); }
63 #endif
64
65 HttpHandler::HttpHandler()
66 {
67 buffer = new char[BUFSIZ + 1];
68
69 #ifdef _WIN32
70 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
71 {
72 error(program + ": WSAStartup");
73 exit(1);
74 }
75 #endif // _WIN32
76
77 binary = false;
78 length = 0;
79 chunked = false;
80 #ifdef _OpenSSL_
81 tls = false;
82 #endif
83 }
84
85 HttpHandler::~HttpHandler()
86 {
87 delete [] buffer;
88
89 #ifdef _WIN32
90 WSACleanup();
91 #endif // _WIN32
92 }
93
94 bool HttpHandler::handle(URL &url, const string referer, bool head)
95 {
96 bool answer = false;
97
98 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
99 {
100 error(program + ": Socket");
101 exit(1);
102 }
103
104 sockaddr_in address;
105 hostent* host;
106
107 address.sin_family = AF_INET;
108
109 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
110 {
111 error(program + ": Host: " + url.getAddress(), true);
112 return answer;
113 }
114
115 address.sin_addr = *((in_addr*)*host->h_addr_list);
116 address.sin_port = htons(url.getPort());
117
118 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
119 SOCKET_ERROR)
120 {
121 error(program + ": Connect");
122 return answer;
123 }
124
125 #ifdef _OpenSSL_
126 if (url.getTls())
127 {
128 tls = true;
129
130 if (!starttls()) return answer;
131 }
132 #endif
133
134 if (head)
135 {
136 putline("HEAD " + url.getPath() + " HTTP/1.1");
137 }
138 else
139 {
140 putline("GET " + url.getPath() + " HTTP/1.1");
141 }
142
143 putline("Accept: text/html; text/plain");
144 #ifndef _OpenSSL_
145 putline("User-Agent: " + agent(true) + ' ' + platform());
146
147 if (url.getPort() == 80)
148 #else
149 putline("User-Agent: " + agent(true) + ' ' + platform() + ' '
150 + openssl(true));
151
152 if (url.getPort() == 80 && tls || url.getPort() == 443 && tls)
153 #endif
154 {
155 putline("Host: " + url.getAddress());
156 }
157 else
158 {
159 ostringstream port;
160
161 port << url.getPort();
162
163 putline("Host: " + url.getAddress() + ':' + port.str());
164 }
165
166 if (referer != "")
167 {
168 putline("Referer: " + referer);
169 }
170
171 putline("Connection: close");
172 putline();
173
174 code response;
175 string line;
176
177 do
178 {
179 line = getline();
180
181 if (line.find("HTTP/") != 0)
182 {
183 return answer;
184 }
185
186 unsigned dot = line.find('.');
187 unsigned space = line.find(' ');
188
189 unsigned major;
190 unsigned minor;
191
192 istringstream number(line.substr(5, dot - 5) + " " + line.substr(dot
193 + 1, space - dot - 1));
194
195 number >> major;
196 number >> minor;
197
198 if (major > 1)
199 {
200 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
201 major << "." << minor << "\n";
202
203 return answer;
204 }
205
206 number.clear();
207 number.str(line.substr(space + 1, 3));
208 number >> response;
209
210 if (response < ok) do line = getline(); while (line != "");
211 }
212 while (response < ok);
213
214 do
215 {
216 line = getline();
217
218 if (line != "")
219 {
220 unsigned colon = line.find(':');
221
222 string field = line.substr(0, colon);
223 string value = line.substr(colon + 1);
224
225 while (isspace(value[0])) value.erase(0, 1);
226
227 if (field == "Content-Type")
228 {
229 type = value;
230 }
231 else if (field == "Content-Length")
232 {
233 istringstream number(value);
234
235 number >> length;
236 }
237 else if (field == "Location")
238 {
239 location = value;
240 }
241 else if (field == "Transfer-Encoding")
242 {
243 chunked = value == "chunked";
244 }
245 }
246 }
247 while (line != "");
248
249 switch (response)
250 {
251 case ok:
252 if (debug) cerr << "response = " << response << "\n";
253 answer = true;
254 break;
255 case choices:
256 case moved:
257 case found:
258 if (debug) cerr << "response = " << response << "\n"
259 << "location = " << location << "\n";
260 location = getLink(location, url);
261 break;
262 case notfound:
263 case internal:
264 if (debug) cerr << "response = " << response << "\n";
265 break;
266 default:
267 if (debug) cerr << "response = " << response << "\n";
268 if (response <= 299)
269 {
270 answer = true;
271 }
272 else if (response <= 399)
273 {
274 location = getLink(location, url);
275 }
276 break;
277 }
278
279 if (!head && answer) populate();
280
281 return answer;
282 }
283
284 void HttpHandler::clear()
285 {
286 if (tls)
287 {
288 SSL_shutdown(ssl);
289 SSL_free(ssl);
290 SSL_CTX_free(ctx);
291 }
292
293 closesocket(http);
294
295 type = "";
296 length = 0;
297 location = "";
298 page.clear();
299 page.str("");
300 chunked = false;
301 #ifdef _OpenSSL_
302 tls = false;
303 #endif
304 }
305
306 void HttpHandler::populate()
307 {
308 if (!chunked)
309 {
310 unsigned left = length;
311
312 while (left > 0)
313 {
314 memset(buffer, 0, BUFSIZ + 1);
315
316 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
317 long received;
318
319 while (true)
320 {
321 #ifndef _OpenSSL_
322 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
323 {
324 error(program + ": Recv");
325 exit(1);
326 }
327 #else
328 if ((received = !tls ? recv(http, buffer, bytes, 0) :
329 SSL_read(ssl, buffer, bytes)) <= 0)
330 {
331 !tls ? error(program + ": Recv") : error(program +
332 ": SSL Read", int(received));
333 }
334 #endif
335 else if (received != bytes)
336 {
337 left -= received;
338 page << buffer;
339
340 memset(buffer, 0, BUFSIZ + 1);
341
342 bytes -= received;
343 }
344 else
345 {
346 break;
347 }
348 }
349
350 page << buffer;
351 left -= bytes;
352 }
353 }
354 else
355 {
356 unsigned chunk;
357
358 do
359 {
360 istringstream number(getline());
361
362 number.setf(ios_base::hex, ios_base::basefield);
363 number >> chunk;
364
365 unsigned left = chunk;
366
367 while (left > 0)
368 {
369 memset(buffer, 0, BUFSIZ + 1);
370
371 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
372 long received;
373
374 while (true)
375 {
376 #ifndef _OpenSSL_
377 if ((received = recv(http, buffer, bytes, 0)) ==
378 SOCKET_ERROR)
379 {
380 error(program + ": Recv");
381 exit(1);
382 }
383 #else
384 if ((received = !tls ? recv(http, buffer, bytes, 0) :
385 SSL_read(ssl, buffer, bytes)) <= 0)
386 {
387 !tls ? error(program + ": Recv") : error(program +
388 ": SSL Read", int(received));
389 exit(1);
390 }
391 #endif
392 else if (received != bytes)
393 {
394 left -= received;
395 page << buffer;
396
397 memset(buffer, 0, BUFSIZ + 1);
398
399 bytes -= received;
400 }
401 else
402 {
403 break;
404 }
405 }
406
407 page << buffer;
408 left -= bytes;
409 }
410
411 getline();
412 length += chunk;
413 }
414 while (chunk > 0);
415 }
416
417 if (!binary)
418 {
419 string page = this->page.str();
420
421 for (unsigned index = 0; index < page.length(); index++)
422 {
423 if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
424 1] == '\n' : false)
425 {
426 page.erase(index, 1);
427 }
428 else if (page[index] == '\r')
429 {
430 page[index] = '\n';
431 }
432 }
433
434 this->page.str(page);
435 }
436 }
437
438 void HttpHandler::putline(const string line)
439 {
440 sprintf(buffer, "%s\r\n", line.c_str());
441
442 #ifndef _OpenSSL_
443 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
444 {
445 error(program + ": Send");
446 exit(1);
447 }
448 #else
449 if (!tls)
450 {
451 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
452 {
453 error(program + ": Send");
454 exit(1);
455 }
456 }
457 else
458 {
459 int number;
460
461 if ((number = SSL_write(ssl, buffer, strlen(buffer))) <= 0)
462 {
463 error(program + ": SSL Write", number);
464 exit(1);
465 }
466 }
467 #endif
468 }
469
470 string HttpHandler::getline()
471 {
472 string line;
473 char byte;
474
475 do
476 {
477 #ifndef _OpenSSL_
478 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
479 {
480 error(program + ": Recv");
481 }
482 #else
483 if (!tls)
484 {
485 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
486 {
487 error(program + ": Recv");
488 }
489 }
490 else
491 {
492 int number;
493
494 if ((number = SSL_read(ssl, &byte, 1)) <= 0)
495 {
496 error(program + ": SSL Read", number);
497 }
498 }
499 #endif
500
501 if (byte != '\r' && byte != '\n')
502 {
503 line += byte;
504 }
505 }
506 while (byte != '\n');
507
508 return line;
509 }
510
511 void HttpHandler::error(const string& prefix, bool host)
512 {
513 #ifdef _WIN32
514 string error;
515
516 switch (WSAGetLastError())
517 {
518 case WSAEACCES:
519 error = "Permission denied";
520 break;
521 case WSAEADDRINUSE:
522 error = "Address already in use";
523 break;
524 case WSAEADDRNOTAVAIL:
525 error = "Cannot assign requested address";
526 break;
527 case WSAEAFNOSUPPORT:
528 error = "Address family not supported by protocol family";
529 break;
530 case WSAEALREADY:
531 error = "Operation already in progress";
532 break;
533 case WSAECONNABORTED:
534 error = "Software caused connection abort";
535 break;
536 case WSAECONNREFUSED:
537 error = "Connection refused";
538 break;
539 case WSAECONNRESET:
540 error = "Connection reset by peer";
541 break;
542 case WSAEDESTADDRREQ:
543 error = "Destination address required";
544 break;
545 case WSAEFAULT:
546 error = "Bad address";
547 break;
548 case WSAEHOSTDOWN:
549 error = "Host is down";
550 break;
551 case WSAEHOSTUNREACH:
552 error = "No route to host";
553 break;
554 case WSAEINPROGRESS:
555 error = "Operation now in progress";
556 break;
557 case WSAEINTR:
558 error = "Interrupted function call";
559 break;
560 case WSAEINVAL:
561 error = "Invalid argument";
562 break;
563 case WSAEISCONN:
564 error = "Socket is already connected";
565 break;
566 case WSAEMFILE:
567 error = "Too many open files";
568 break;
569 case WSAEMSGSIZE:
570 error = "Message too long";
571 break;
572 case WSAENETDOWN:
573 error = "Network is down";
574 break;
575 case WSAENETRESET:
576 error = "Network dropped connection on reset";
577 break;
578 case WSAENETUNREACH:
579 error = "Network is unreachable";
580 break;
581 case WSAENOBUFS:
582 error = "No buffer space available";
583 break;
584 case WSAENOPROTOOPT:
585 error = "Bad protocol option";
586 break;
587 case WSAENOTCONN:
588 error = "Socket is not connected";
589 break;
590 case WSAENOTSOCK:
591 error = "Socket operation on non-socket";
592 break;
593 case WSAEOPNOTSUPP:
594 error = "Operation not supported";
595 break;
596 case WSAEPFNOSUPPORT:
597 error = "Protocol family not supported";
598 break;
599 case WSAEPROCLIM:
600 error = "Too many processes";
601 break;
602 case WSAEPROTONOSUPPORT:
603 error = "Protocol not supported";
604 break;
605 case WSAEPROTOTYPE:
606 error = "Protocol wrong type for socket";
607 break;
608 case WSAESHUTDOWN:
609 error = "Cannot send after socket shutdown";
610 break;
611 case WSAESOCKTNOSUPPORT:
612 error = "Socket type not supported";
613 break;
614 case WSAETIMEDOUT:
615 error = "Connection timed out";
616 break;
617 case WSATYPE_NOT_FOUND:
618 error = "Class type not found";
619 break;
620 case WSAEWOULDBLOCK:
621 error = "Resource temporarily unavailable";
622 break;
623 case WSAHOST_NOT_FOUND:
624 error = "Host not found";
625 break;
626 case WSA_INVALID_HANDLE:
627 error = "Specified event object handle is invalid";
628 break;
629 case WSA_INVALID_PARAMETER:
630 error = "One or more parameters are invalid";
631 break;
632 // case WSAINVALIDPROCTABLE:
633 // error = "Invalid procedure table from service provider";
634 // break;
635 // case WSAINVALIDPROVIDER:
636 // error = "Invalid service provider version number";
637 // break;
638 case WSA_IO_INCOMPLETE:
639 error = "Overlapped I/O event object not in signaled state";
640 break;
641 case WSA_IO_PENDING:
642 error = "Overlapped operations will complete later";
643 break;
644 case WSA_NOT_ENOUGH_MEMORY:
645 error = "Insufficient memory available";
646 break;
647 case WSANOTINITIALISED:
648 error = "Successful WSAStartup not yet performed";
649 break;
650 case WSANO_DATA:
651 error = "Valid name, no data record of requested type";
652 break;
653 case WSANO_RECOVERY:
654 error = "This is a non-recoverable error";
655 break;
656 // case WSAPROVIDERFAILEDINIT:
657 // error = "Unable to initialize a service provider";
658 // break;
659 case WSASYSCALLFAILURE:
660 error = "System call failure";
661 break;
662 case WSASYSNOTREADY:
663 error = "Network subsystem is unavailable";
664 break;
665 case WSATRY_AGAIN:
666 error = "Non-authoritative host not found";
667 break;
668 case WSAVERNOTSUPPORTED:
669 error = "WINSOCK.DLL version out of range";
670 break;
671 case WSAEDISCON:
672 error = "Graceful shutdown in progress";
673 break;
674 case WSA_OPERATION_ABORTED:
675 error = "Overlapped operation aborted";
676 break;
677 default:
678 error = "Unknown error";
679 break;
680 }
681
682 cerr << prefix << ": " << error << "\n";
683 #else
684 if (host)
685 {
686 string error;
687
688 switch (h_errno)
689 {
690 case HOST_NOT_FOUND:
691 error = "Unknown host";
692 break;
693 case TRY_AGAIN:
694 error = "Host name lookup failure";
695 break;
696 case NO_RECOVERY:
697 error = "Unknown server error";
698 break;
699 case NO_DATA:
700 error = "No address associated with name";
701 break;
702 default:
703 error = "Unknown error";
704 break;
705 }
706
707 cerr << prefix << ": " << error << "\n";
708 }
709 else
710 {
711 perror(prefix.c_str());
712 }
713 #endif // _WIN32
714 }
715
716 #ifdef _OpenSSL_
717 void HttpHandler::error(const string& prefix, int number)
718 {
719 string error;
720
721 switch (SSL_get_error(ssl, number))
722 {
723 case SSL_ERROR_NONE:
724 error = "The TLS/SSL I/O operation completed";
725 break;
726 case SSL_ERROR_ZERO_RETURN:
727 error = "The TLS/SSL connection has been closed";
728 break;
729 case SSL_ERROR_WANT_READ:
730 case SSL_ERROR_WANT_WRITE:
731 case SSL_ERROR_WANT_CONNECT:
732 // case SSL_ERROR_WANT_ACCEPT:
733 case SSL_ERROR_WANT_X509_LOOKUP:
734 error = "The operation did not complete";
735 break;
736 case SSL_ERROR_SYSCALL:
737 if (int err = ERR_get_error() != 0)
738 {
739 error = ERR_reason_error_string(err);
740 }
741 else
742 {
743 switch (number)
744 {
745 case 0:
746 error = "An EOF was observed that violates the protocol";
747 break;
748 case -1:
749 this->error(prefix);
750 return;
751 default:
752 error = "Unknown error";
753 break;
754 }
755 }
756 break;
757 case SSL_ERROR_SSL:
758 error = ERR_reason_error_string(ERR_get_error());
759 break;
760 default:
761 error = "Unknown error";
762 break;
763 }
764
765 cerr << prefix << ": " << error << "\n";
766 }
767
768 bool HttpHandler::starttls()
769 {
770 SSL_load_error_strings();
771 SSL_library_init();
772
773 #ifndef _urandomdev_
774 int pid = getpid();
775 int now = time(NULL);
776
777 unsigned seed = now > pid ? now - pid : pid - now;
778
779 char* junk = new char[seed % 30 + 2];
780 junk[0] = pid;
781 junk[seed % 30 + 1] = now;
782
783 srand(seed);
784
785 for (int index = 1; index < seed % 30 + 1; index++)
786 {
787 junk[index] = rand();
788 }
789
790 if (debug)
791 {
792 cerr << "junk = {\n";
793
794 for (int index = 1; index < seed % 30 + 2; index++)
795 {
796 cerr << " [" << index << "] = " << int(junk[index]) << "\n";
797 }
798
799 cerr << "}\n";
800 }
801
802 RAND_seed(junk, seed % 30 + 2);
803
804 delete junk;
805 #endif
806
807 ctx = SSL_CTX_new(TLSv1_client_method());
808
809 if (ctx == NULL)
810 {
811 cerr << program << ": SSL CTX New: "
812 << ERR_reason_error_string(ERR_get_error()) << "\n";
813 return false;
814 }
815
816 ssl = SSL_new(ctx);
817
818 if (SSL_set_fd(ssl, http) == 0)
819 {
820 cerr << program << ": SSL Set FD: "
821 << ERR_reason_error_string(ERR_get_error()) << "\n";
822 return false;
823 }
824
825 int number;
826
827 if ((number = SSL_connect(ssl)) <= 0)
828 {
829 error(program + ": SSL Connect", number);
830 return false;
831 }
832
833 return true;
834 }
835 #endif
836
837 istream& operator>>(istream& is, HttpHandler::code& data)
838 {
839 int number;
840
841 is >> number;
842
843 data = HttpHandler::code(number);
844
845 return is;
846 }