ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/Search/trunk/HttpHandler.cpp
Revision: 201
Committed: 2003-07-15T01:01:00-07:00 (21 years, 11 months ago) by douglas
Original Path: trunk/Search/HttpHandler.cpp
File size: 13477 byte(s)
Log Message:
Moved openssl version function to HttpHandler.h and fixed agent string.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 28 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine HTTP Handler
46     //
47     // Douglas Thrift
48     //
49 douglas 201 // $Id: HttpHandler.cpp,v 1.19 2003/07/15 08:01:00 douglas Exp $
50 douglas 1
51     #include "HttpHandler.h"
52    
53 douglas 179 // Lovely C Sockets!
54 douglas 178 #ifndef _WIN32
55 douglas 179 // BSD Sockets
56 douglas 178 #include <unistd.h>
57     #include <sys/types.h>
58     #include <sys/socket.h>
59     #include <netinet/in.h>
60     #include <netdb.h>
61    
62     #define INVALID_SOCKET -1
63     #define SOCKET_ERROR -1
64    
65     inline int closesocket(SOCKET s) { return close(s); }
66     #endif
67    
68 douglas 1 HttpHandler::HttpHandler()
69     {
70 douglas 14 buffer = new char[BUFSIZ + 1];
71    
72 douglas 13 #ifdef _WIN32
73 douglas 14 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
74 douglas 13 {
75 douglas 17 error(program + ": WSAStartup");
76 douglas 13 exit(1);
77     }
78     #endif // _WIN32
79 douglas 1
80 douglas 18 length = 0;
81     chunked = false;
82 douglas 201 #ifdef _OpenSSL_
83     tls = false;
84     #endif
85 douglas 1 }
86    
87     HttpHandler::~HttpHandler()
88     {
89 douglas 14 delete [] buffer;
90    
91 douglas 13 #ifdef _WIN32
92     WSACleanup();
93     #endif // _WIN32
94 douglas 1 }
95    
96 douglas 25 bool HttpHandler::handle(URL &url, const string referer, bool head)
97 douglas 1 {
98     bool answer = false;
99    
100 douglas 14 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
101     {
102     error(program + ": Socket");
103     exit(1);
104     }
105 douglas 1
106 douglas 14 sockaddr_in address;
107     hostent* host;
108 douglas 1
109 douglas 14 address.sin_family = AF_INET;
110 douglas 1
111 douglas 14 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
112     {
113     error(program + ": Host: " + url.getAddress(), true);
114     return answer;
115     }
116 douglas 1
117 douglas 14 address.sin_addr = *((in_addr*)*host->h_addr_list);
118     address.sin_port = htons(url.getPort());
119    
120     if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
121     SOCKET_ERROR)
122     {
123     error(program + ": Connect");
124     return answer;
125     }
126    
127     if (head)
128     {
129     putline("HEAD " + url.getPath() + " HTTP/1.1");
130     }
131     else
132     {
133     putline("GET " + url.getPath() + " HTTP/1.1");
134     }
135    
136     putline("Accept: text/html; text/plain");
137 douglas 201 #ifndef _OpenSSL_
138 douglas 14 putline("User-Agent: " + agent(true) + ' ' + platform());
139 douglas 201 #else
140     putline("User-Agent: " + agent(true) + ' ' + platform() + ' '
141     + openssl(true));
142     #endif
143 douglas 14
144     if (url.getPort() == 80)
145     {
146     putline("Host: " + url.getAddress());
147     }
148     else
149     {
150     char* port = new char[1024];
151     sprintf(port, "%u", url.getPort());
152    
153     putline("Host: " + url.getAddress() + ':' + port);
154    
155     delete [] port;
156     }
157    
158 douglas 25 if (referer != "")
159     {
160     putline("Referer: " + referer);
161     }
162    
163 douglas 18 putline("Connection: close");
164 douglas 14 putline();
165    
166 douglas 18 code response;
167     string line;
168 douglas 17
169 douglas 18 do
170 douglas 17 {
171 douglas 18 line = getline();
172 douglas 17
173 douglas 18 if (line.find("HTTP/") != 0)
174     {
175     return answer;
176     }
177 douglas 17
178 douglas 18 unsigned dot = line.find('.');
179     unsigned space = line.find(' ');
180 douglas 17
181 douglas 19 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
182     unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
183     0, 10);
184 douglas 17
185 douglas 24 if (major > 1)
186 douglas 18 {
187 douglas 19 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
188     major << "." << minor << "\n";
189 douglas 18
190     return answer;
191     }
192    
193 douglas 19 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
194 douglas 18
195     if (response < ok) do line = getline(); while (line != "");
196 douglas 17 }
197 douglas 18 while (response < ok);
198 douglas 17
199     do
200     {
201     line = getline();
202 douglas 18
203     if (line != "")
204     {
205     unsigned colon = line.find(':');
206    
207     string field = line.substr(0, colon);
208     string value = line.substr(colon + 1);
209    
210     while (isspace(value[0])) value.erase(0, 1);
211    
212 douglas 19 if (field == "Content-Type")
213     {
214     type = value;
215     }
216     else if (field == "Content-Length")
217     {
218     length = strtoul(value.c_str(), 0, 10);
219     }
220     else if (field == "Location")
221     {
222     location = value;
223     }
224     else if (field == "Transfer-Encoding")
225     {
226     chunked = value == "chunked";
227     }
228 douglas 18 }
229 douglas 17 }
230     while (line != "");
231    
232     switch (response)
233     {
234     case ok:
235 douglas 18 if (debug) cerr << "response = " << response << "\n";
236 douglas 17 answer = true;
237     break;
238 douglas 18 case choices:
239 douglas 17 case moved:
240     case found:
241 douglas 18 if (debug) cerr << "response = " << response << "\n"
242     << "location = " << location << "\n";
243     location = getLink(location, url);
244 douglas 17 break;
245     case notfound:
246     case internal:
247 douglas 18 if (debug) cerr << "response = " << response << "\n";
248 douglas 17 break;
249     default:
250 douglas 18 if (debug) cerr << "response = " << response << "\n";
251     if (response <= 299)
252     {
253     answer = true;
254     }
255     else if (response <= 399)
256     {
257     location = getLink(location, url);
258     }
259 douglas 17 break;
260     }
261    
262 douglas 19 if (!head && answer) populate();
263    
264 douglas 1 return answer;
265     }
266    
267     HttpHandler& HttpHandler::getline(string& line, char endline)
268     {
269 douglas 21 unsigned end = page.find(endline);
270     unsigned newline = page.find('\n');
271 douglas 1
272     if (newline < end || end == string::npos)
273     {
274     end = newline;
275     }
276    
277 douglas 20 line = page.substr(0, end);
278     page.erase(0, (end == string::npos ? end : end + 1));
279 douglas 1
280     return *this;
281     }
282    
283     void HttpHandler::clear()
284     {
285 douglas 18 closesocket(http);
286    
287 douglas 17 type = "";
288     length = 0;
289     location = "";
290 douglas 1 page = "";
291 douglas 18 chunked = false;
292 douglas 201 #ifdef _OpenSSL_
293     tls = false;
294     #endif
295 douglas 1 }
296    
297 douglas 19 void HttpHandler::populate()
298     {
299     if (!chunked)
300     {
301     unsigned left = length;
302    
303     while (left > 0)
304     {
305     memset(buffer, 0, BUFSIZ + 1);
306    
307     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
308 douglas 24 unsigned received;
309 douglas 19
310 douglas 145 while (true)
311 douglas 19 {
312 douglas 145 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
313 douglas 24 {
314     error(program + ": Recv");
315     exit(1);
316     }
317 douglas 145 else if (received != bytes)
318     {
319     left -= received;
320     page += buffer;
321    
322     memset(buffer, 0, BUFSIZ + 1);
323    
324     bytes -= received;
325     }
326     else
327     {
328     break;
329     }
330 douglas 24 }
331    
332 douglas 19 page += buffer;
333     left -= bytes;
334     }
335     }
336     else
337     {
338 douglas 20 unsigned chunk;
339    
340     do
341     {
342     chunk = strtoul(getline().c_str(), 0, 16);
343    
344     unsigned left = chunk;
345    
346     while (left > 0)
347     {
348     memset(buffer, 0, BUFSIZ + 1);
349    
350     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
351 douglas 24 unsigned received;
352 douglas 20
353 douglas 145 while (true)
354 douglas 20 {
355 douglas 145 if ((received = recv(http, buffer, bytes, 0)) ==
356     SOCKET_ERROR)
357 douglas 24 {
358     error(program + ": Recv");
359     exit(1);
360     }
361 douglas 145 else if (received != bytes)
362     {
363     left -= received;
364     page += buffer;
365    
366     memset(buffer, 0, BUFSIZ + 1);
367    
368     bytes -= received;
369     }
370     else
371     {
372     break;
373     }
374 douglas 24 }
375    
376 douglas 20 page += buffer;
377     left -= bytes;
378     }
379    
380     getline();
381     length += chunk;
382     }
383     while (chunk > 0);
384 douglas 19 }
385    
386 douglas 20 for (unsigned index = 0; index < page.length(); index++)
387     {
388     if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
389     1] == '\n' : false)
390     {
391     page.erase(index, 1);
392     }
393     else if (page[index] == '\r')
394     {
395     page[index] = '\n';
396     }
397     }
398 douglas 19 }
399    
400 douglas 14 void HttpHandler::putline(const string line)
401     {
402     sprintf(buffer, "%s\r\n", line.c_str());
403     if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
404     {
405     error(program + ": Send");
406     exit(1);
407     }
408     }
409    
410 douglas 17 string HttpHandler::getline()
411     {
412     string line;
413     char byte;
414    
415     do
416     {
417     if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
418     {
419     error(program + ": Recv");
420     }
421    
422     if (byte != '\r' && byte != '\n')
423     {
424     line += byte;
425     }
426     }
427     while (byte != '\n');
428    
429     return line;
430     }
431    
432 douglas 18 void HttpHandler::error(const string& prefix, bool host)
433 douglas 1 {
434 douglas 13 #ifdef _WIN32
435     string error;
436 douglas 1
437 douglas 13 switch (WSAGetLastError())
438 douglas 1 {
439 douglas 13 case WSAEACCES:
440 douglas 180 error = "Permission denied";
441 douglas 13 break;
442     case WSAEADDRINUSE:
443 douglas 180 error = "Address already in use";
444 douglas 13 break;
445     case WSAEADDRNOTAVAIL:
446 douglas 180 error = "Cannot assign requested address";
447 douglas 13 break;
448     case WSAEAFNOSUPPORT:
449 douglas 180 error = "Address family not supported by protocol family";
450 douglas 13 break;
451     case WSAEALREADY:
452 douglas 180 error = "Operation already in progress";
453 douglas 13 break;
454     case WSAECONNABORTED:
455 douglas 180 error = "Software caused connection abort";
456 douglas 13 break;
457     case WSAECONNREFUSED:
458 douglas 180 error = "Connection refused";
459 douglas 13 break;
460     case WSAECONNRESET:
461 douglas 180 error = "Connection reset by peer";
462 douglas 13 break;
463     case WSAEDESTADDRREQ:
464 douglas 180 error = "Destination address required";
465 douglas 13 break;
466     case WSAEFAULT:
467 douglas 180 error = "Bad address";
468 douglas 13 break;
469     case WSAEHOSTDOWN:
470 douglas 180 error = "Host is down";
471 douglas 13 break;
472     case WSAEHOSTUNREACH:
473 douglas 180 error = "No route to host";
474 douglas 13 break;
475     case WSAEINPROGRESS:
476 douglas 180 error = "Operation now in progress";
477 douglas 13 break;
478     case WSAEINTR:
479 douglas 180 error = "Interrupted function call";
480 douglas 13 break;
481     case WSAEINVAL:
482 douglas 180 error = "Invalid argument";
483 douglas 13 break;
484     case WSAEISCONN:
485 douglas 180 error = "Socket is already connected";
486 douglas 13 break;
487     case WSAEMFILE:
488 douglas 180 error = "Too many open files";
489 douglas 13 break;
490     case WSAEMSGSIZE:
491 douglas 180 error = "Message too long";
492 douglas 13 break;
493     case WSAENETDOWN:
494 douglas 180 error = "Network is down";
495 douglas 13 break;
496     case WSAENETRESET:
497 douglas 180 error = "Network dropped connection on reset";
498 douglas 13 break;
499     case WSAENETUNREACH:
500 douglas 180 error = "Network is unreachable";
501 douglas 13 break;
502     case WSAENOBUFS:
503 douglas 180 error = "No buffer space available";
504 douglas 13 break;
505     case WSAENOPROTOOPT:
506 douglas 180 error = "Bad protocol option";
507 douglas 13 break;
508     case WSAENOTCONN:
509 douglas 180 error = "Socket is not connected";
510 douglas 13 break;
511     case WSAENOTSOCK:
512 douglas 180 error = "Socket operation on non-socket";
513 douglas 13 break;
514     case WSAEOPNOTSUPP:
515 douglas 180 error = "Operation not supported";
516 douglas 13 break;
517     case WSAEPFNOSUPPORT:
518 douglas 180 error = "Protocol family not supported";
519 douglas 13 break;
520     case WSAEPROCLIM:
521 douglas 180 error = "Too many processes";
522 douglas 13 break;
523     case WSAEPROTONOSUPPORT:
524 douglas 180 error = "Protocol not supported";
525 douglas 13 break;
526     case WSAEPROTOTYPE:
527 douglas 180 error = "Protocol wrong type for socket";
528 douglas 13 break;
529     case WSAESHUTDOWN:
530 douglas 180 error = "Cannot send after socket shutdown";
531 douglas 13 break;
532     case WSAESOCKTNOSUPPORT:
533 douglas 180 error = "Socket type not supported";
534 douglas 13 break;
535     case WSAETIMEDOUT:
536 douglas 180 error = "Connection timed out";
537 douglas 13 break;
538     case WSATYPE_NOT_FOUND:
539 douglas 180 error = "Class type not found";
540 douglas 13 break;
541     case WSAEWOULDBLOCK:
542 douglas 180 error = "Resource temporarily unavailable";
543 douglas 13 break;
544     case WSAHOST_NOT_FOUND:
545 douglas 180 error = "Host not found";
546 douglas 13 break;
547     case WSA_INVALID_HANDLE:
548 douglas 180 error = "Specified event object handle is invalid";
549 douglas 13 break;
550     case WSA_INVALID_PARAMETER:
551 douglas 180 error = "One or more parameters are invalid";
552 douglas 13 break;
553     // case WSAINVALIDPROCTABLE:
554 douglas 180 // error = "Invalid procedure table from service provider";
555 douglas 13 // break;
556     // case WSAINVALIDPROVIDER:
557 douglas 180 // error = "Invalid service provider version number";
558 douglas 13 // break;
559     case WSA_IO_INCOMPLETE:
560 douglas 180 error = "Overlapped I/O event object not in signaled state";
561 douglas 13 break;
562     case WSA_IO_PENDING:
563 douglas 180 error = "Overlapped operations will complete later";
564 douglas 13 break;
565     case WSA_NOT_ENOUGH_MEMORY:
566 douglas 180 error = "Insufficient memory available";
567 douglas 13 break;
568     case WSANOTINITIALISED:
569 douglas 180 error = "Successful WSAStartup not yet performed";
570 douglas 13 break;
571     case WSANO_DATA:
572 douglas 180 error = "Valid name, no data record of requested type";
573 douglas 13 break;
574     case WSANO_RECOVERY:
575 douglas 180 error = "This is a non-recoverable error";
576 douglas 13 break;
577     // case WSAPROVIDERFAILEDINIT:
578 douglas 180 // error = "Unable to initialize a service provider";
579 douglas 13 // break;
580     case WSASYSCALLFAILURE:
581 douglas 180 error = "System call failure";
582 douglas 13 break;
583     case WSASYSNOTREADY:
584 douglas 180 error = "Network subsystem is unavailable";
585 douglas 13 break;
586     case WSATRY_AGAIN:
587 douglas 180 error = "Non-authoritative host not found";
588 douglas 13 break;
589     case WSAVERNOTSUPPORTED:
590 douglas 180 error = "WINSOCK.DLL version out of range";
591 douglas 13 break;
592     case WSAEDISCON:
593 douglas 180 error = "Graceful shutdown in progress";
594 douglas 13 break;
595     case WSA_OPERATION_ABORTED:
596 douglas 180 error = "Overlapped operation aborted";
597 douglas 13 break;
598     default:
599 douglas 180 error = "Unknown error";
600 douglas 13 break;
601     }
602 douglas 1
603 douglas 13 cerr << prefix << ": " << error << "\n";
604     #else
605     if (host)
606     {
607 douglas 179 string error;
608    
609     switch (h_errno)
610     {
611     case HOST_NOT_FOUND:
612     error = "Unknown host";
613     break;
614     case TRY_AGAIN:
615     error = "Host name lookup failure";
616     break;
617     case NO_RECOVERY:
618     error = "Unknown server error";
619     break;
620     case NO_DATA:
621     error = "No address associated with name";
622     break;
623     default:
624     error = "Unknown error";
625     break;
626     }
627    
628     cerr << prefix << ": " << error << "\n";
629 douglas 1 }
630     else
631     {
632 douglas 13 perror(prefix.c_str());
633 douglas 1 }
634 douglas 13 #endif // _WIN32
635     }