ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 194
Committed: 2003-07-11T00:54:47-07:00 (21 years, 11 months ago) by douglas
File size: 13287 byte(s)
Log Message:
Added Id tags to a bunch of files. $Id$!

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 28 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine HTTP Handler
46     //
47     // Douglas Thrift
48     //
49 douglas 194 // $Id: HttpHandler.cpp,v 1.18 2003/07/11 07:54:46 douglas Exp $
50 douglas 1
51     #include "HttpHandler.h"
52    
53 douglas 179 // Lovely C Sockets!
54 douglas 178 #ifndef _WIN32
55 douglas 179 // BSD Sockets
56 douglas 178 #include <unistd.h>
57     #include <sys/types.h>
58     #include <sys/socket.h>
59     #include <netinet/in.h>
60     #include <netdb.h>
61    
62     #define INVALID_SOCKET -1
63     #define SOCKET_ERROR -1
64    
65     inline int closesocket(SOCKET s) { return close(s); }
66     #endif
67    
68 douglas 1 HttpHandler::HttpHandler()
69     {
70 douglas 14 buffer = new char[BUFSIZ + 1];
71    
72 douglas 13 #ifdef _WIN32
73 douglas 14 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
74 douglas 13 {
75 douglas 17 error(program + ": WSAStartup");
76 douglas 13 exit(1);
77     }
78     #endif // _WIN32
79 douglas 1
80 douglas 18 length = 0;
81     chunked = false;
82 douglas 1 }
83    
84     HttpHandler::~HttpHandler()
85     {
86 douglas 14 delete [] buffer;
87    
88 douglas 13 #ifdef _WIN32
89     WSACleanup();
90     #endif // _WIN32
91 douglas 1 }
92    
93 douglas 25 bool HttpHandler::handle(URL &url, const string referer, bool head)
94 douglas 1 {
95     bool answer = false;
96    
97 douglas 14 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
98     {
99     error(program + ": Socket");
100     exit(1);
101     }
102 douglas 1
103 douglas 14 sockaddr_in address;
104     hostent* host;
105 douglas 1
106 douglas 14 address.sin_family = AF_INET;
107 douglas 1
108 douglas 14 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
109     {
110     error(program + ": Host: " + url.getAddress(), true);
111     return answer;
112     }
113 douglas 1
114 douglas 14 address.sin_addr = *((in_addr*)*host->h_addr_list);
115     address.sin_port = htons(url.getPort());
116    
117     if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
118     SOCKET_ERROR)
119     {
120     error(program + ": Connect");
121     return answer;
122     }
123    
124     if (head)
125     {
126     putline("HEAD " + url.getPath() + " HTTP/1.1");
127     }
128     else
129     {
130     putline("GET " + url.getPath() + " HTTP/1.1");
131     }
132    
133     putline("Accept: text/html; text/plain");
134     putline("User-Agent: " + agent(true) + ' ' + platform());
135    
136     if (url.getPort() == 80)
137     {
138     putline("Host: " + url.getAddress());
139     }
140     else
141     {
142     char* port = new char[1024];
143     sprintf(port, "%u", url.getPort());
144    
145     putline("Host: " + url.getAddress() + ':' + port);
146    
147     delete [] port;
148     }
149    
150 douglas 25 if (referer != "")
151     {
152     putline("Referer: " + referer);
153     }
154    
155 douglas 18 putline("Connection: close");
156 douglas 14 putline();
157    
158 douglas 18 code response;
159     string line;
160 douglas 17
161 douglas 18 do
162 douglas 17 {
163 douglas 18 line = getline();
164 douglas 17
165 douglas 18 if (line.find("HTTP/") != 0)
166     {
167     return answer;
168     }
169 douglas 17
170 douglas 18 unsigned dot = line.find('.');
171     unsigned space = line.find(' ');
172 douglas 17
173 douglas 19 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
174     unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
175     0, 10);
176 douglas 17
177 douglas 24 if (major > 1)
178 douglas 18 {
179 douglas 19 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
180     major << "." << minor << "\n";
181 douglas 18
182     return answer;
183     }
184    
185 douglas 19 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
186 douglas 18
187     if (response < ok) do line = getline(); while (line != "");
188 douglas 17 }
189 douglas 18 while (response < ok);
190 douglas 17
191     do
192     {
193     line = getline();
194 douglas 18
195     if (line != "")
196     {
197     unsigned colon = line.find(':');
198    
199     string field = line.substr(0, colon);
200     string value = line.substr(colon + 1);
201    
202     while (isspace(value[0])) value.erase(0, 1);
203    
204 douglas 19 if (field == "Content-Type")
205     {
206     type = value;
207     }
208     else if (field == "Content-Length")
209     {
210     length = strtoul(value.c_str(), 0, 10);
211     }
212     else if (field == "Location")
213     {
214     location = value;
215     }
216     else if (field == "Transfer-Encoding")
217     {
218     chunked = value == "chunked";
219     }
220 douglas 18 }
221 douglas 17 }
222     while (line != "");
223    
224     switch (response)
225     {
226     case ok:
227 douglas 18 if (debug) cerr << "response = " << response << "\n";
228 douglas 17 answer = true;
229     break;
230 douglas 18 case choices:
231 douglas 17 case moved:
232     case found:
233 douglas 18 if (debug) cerr << "response = " << response << "\n"
234     << "location = " << location << "\n";
235     location = getLink(location, url);
236 douglas 17 break;
237     case notfound:
238     case internal:
239 douglas 18 if (debug) cerr << "response = " << response << "\n";
240 douglas 17 break;
241     default:
242 douglas 18 if (debug) cerr << "response = " << response << "\n";
243     if (response <= 299)
244     {
245     answer = true;
246     }
247     else if (response <= 399)
248     {
249     location = getLink(location, url);
250     }
251 douglas 17 break;
252     }
253    
254 douglas 19 if (!head && answer) populate();
255    
256 douglas 1 return answer;
257     }
258    
259     HttpHandler& HttpHandler::getline(string& line, char endline)
260     {
261 douglas 21 unsigned end = page.find(endline);
262     unsigned newline = page.find('\n');
263 douglas 1
264     if (newline < end || end == string::npos)
265     {
266     end = newline;
267     }
268    
269 douglas 20 line = page.substr(0, end);
270     page.erase(0, (end == string::npos ? end : end + 1));
271 douglas 1
272     return *this;
273     }
274    
275     void HttpHandler::clear()
276     {
277 douglas 18 closesocket(http);
278    
279 douglas 17 type = "";
280     length = 0;
281     location = "";
282 douglas 1 page = "";
283 douglas 18 chunked = false;
284 douglas 1 }
285    
286 douglas 19 void HttpHandler::populate()
287     {
288     if (!chunked)
289     {
290     unsigned left = length;
291    
292     while (left > 0)
293     {
294     memset(buffer, 0, BUFSIZ + 1);
295    
296     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
297 douglas 24 unsigned received;
298 douglas 19
299 douglas 145 while (true)
300 douglas 19 {
301 douglas 145 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
302 douglas 24 {
303     error(program + ": Recv");
304     exit(1);
305     }
306 douglas 145 else if (received != bytes)
307     {
308     left -= received;
309     page += buffer;
310    
311     memset(buffer, 0, BUFSIZ + 1);
312    
313     bytes -= received;
314     }
315     else
316     {
317     break;
318     }
319 douglas 24 }
320    
321 douglas 19 page += buffer;
322     left -= bytes;
323     }
324     }
325     else
326     {
327 douglas 20 unsigned chunk;
328    
329     do
330     {
331     chunk = strtoul(getline().c_str(), 0, 16);
332    
333     unsigned left = chunk;
334    
335     while (left > 0)
336     {
337     memset(buffer, 0, BUFSIZ + 1);
338    
339     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
340 douglas 24 unsigned received;
341 douglas 20
342 douglas 145 while (true)
343 douglas 20 {
344 douglas 145 if ((received = recv(http, buffer, bytes, 0)) ==
345     SOCKET_ERROR)
346 douglas 24 {
347     error(program + ": Recv");
348     exit(1);
349     }
350 douglas 145 else if (received != bytes)
351     {
352     left -= received;
353     page += buffer;
354    
355     memset(buffer, 0, BUFSIZ + 1);
356    
357     bytes -= received;
358     }
359     else
360     {
361     break;
362     }
363 douglas 24 }
364    
365 douglas 20 page += buffer;
366     left -= bytes;
367     }
368    
369     getline();
370     length += chunk;
371     }
372     while (chunk > 0);
373 douglas 19 }
374    
375 douglas 20 for (unsigned index = 0; index < page.length(); index++)
376     {
377     if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
378     1] == '\n' : false)
379     {
380     page.erase(index, 1);
381     }
382     else if (page[index] == '\r')
383     {
384     page[index] = '\n';
385     }
386     }
387 douglas 19 }
388    
389 douglas 14 void HttpHandler::putline(const string line)
390     {
391     sprintf(buffer, "%s\r\n", line.c_str());
392     if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
393     {
394     error(program + ": Send");
395     exit(1);
396     }
397     }
398    
399 douglas 17 string HttpHandler::getline()
400     {
401     string line;
402     char byte;
403    
404     do
405     {
406     if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
407     {
408     error(program + ": Recv");
409     }
410    
411     if (byte != '\r' && byte != '\n')
412     {
413     line += byte;
414     }
415     }
416     while (byte != '\n');
417    
418     return line;
419     }
420    
421 douglas 18 void HttpHandler::error(const string& prefix, bool host)
422 douglas 1 {
423 douglas 13 #ifdef _WIN32
424     string error;
425 douglas 1
426 douglas 13 switch (WSAGetLastError())
427 douglas 1 {
428 douglas 13 case WSAEACCES:
429 douglas 180 error = "Permission denied";
430 douglas 13 break;
431     case WSAEADDRINUSE:
432 douglas 180 error = "Address already in use";
433 douglas 13 break;
434     case WSAEADDRNOTAVAIL:
435 douglas 180 error = "Cannot assign requested address";
436 douglas 13 break;
437     case WSAEAFNOSUPPORT:
438 douglas 180 error = "Address family not supported by protocol family";
439 douglas 13 break;
440     case WSAEALREADY:
441 douglas 180 error = "Operation already in progress";
442 douglas 13 break;
443     case WSAECONNABORTED:
444 douglas 180 error = "Software caused connection abort";
445 douglas 13 break;
446     case WSAECONNREFUSED:
447 douglas 180 error = "Connection refused";
448 douglas 13 break;
449     case WSAECONNRESET:
450 douglas 180 error = "Connection reset by peer";
451 douglas 13 break;
452     case WSAEDESTADDRREQ:
453 douglas 180 error = "Destination address required";
454 douglas 13 break;
455     case WSAEFAULT:
456 douglas 180 error = "Bad address";
457 douglas 13 break;
458     case WSAEHOSTDOWN:
459 douglas 180 error = "Host is down";
460 douglas 13 break;
461     case WSAEHOSTUNREACH:
462 douglas 180 error = "No route to host";
463 douglas 13 break;
464     case WSAEINPROGRESS:
465 douglas 180 error = "Operation now in progress";
466 douglas 13 break;
467     case WSAEINTR:
468 douglas 180 error = "Interrupted function call";
469 douglas 13 break;
470     case WSAEINVAL:
471 douglas 180 error = "Invalid argument";
472 douglas 13 break;
473     case WSAEISCONN:
474 douglas 180 error = "Socket is already connected";
475 douglas 13 break;
476     case WSAEMFILE:
477 douglas 180 error = "Too many open files";
478 douglas 13 break;
479     case WSAEMSGSIZE:
480 douglas 180 error = "Message too long";
481 douglas 13 break;
482     case WSAENETDOWN:
483 douglas 180 error = "Network is down";
484 douglas 13 break;
485     case WSAENETRESET:
486 douglas 180 error = "Network dropped connection on reset";
487 douglas 13 break;
488     case WSAENETUNREACH:
489 douglas 180 error = "Network is unreachable";
490 douglas 13 break;
491     case WSAENOBUFS:
492 douglas 180 error = "No buffer space available";
493 douglas 13 break;
494     case WSAENOPROTOOPT:
495 douglas 180 error = "Bad protocol option";
496 douglas 13 break;
497     case WSAENOTCONN:
498 douglas 180 error = "Socket is not connected";
499 douglas 13 break;
500     case WSAENOTSOCK:
501 douglas 180 error = "Socket operation on non-socket";
502 douglas 13 break;
503     case WSAEOPNOTSUPP:
504 douglas 180 error = "Operation not supported";
505 douglas 13 break;
506     case WSAEPFNOSUPPORT:
507 douglas 180 error = "Protocol family not supported";
508 douglas 13 break;
509     case WSAEPROCLIM:
510 douglas 180 error = "Too many processes";
511 douglas 13 break;
512     case WSAEPROTONOSUPPORT:
513 douglas 180 error = "Protocol not supported";
514 douglas 13 break;
515     case WSAEPROTOTYPE:
516 douglas 180 error = "Protocol wrong type for socket";
517 douglas 13 break;
518     case WSAESHUTDOWN:
519 douglas 180 error = "Cannot send after socket shutdown";
520 douglas 13 break;
521     case WSAESOCKTNOSUPPORT:
522 douglas 180 error = "Socket type not supported";
523 douglas 13 break;
524     case WSAETIMEDOUT:
525 douglas 180 error = "Connection timed out";
526 douglas 13 break;
527     case WSATYPE_NOT_FOUND:
528 douglas 180 error = "Class type not found";
529 douglas 13 break;
530     case WSAEWOULDBLOCK:
531 douglas 180 error = "Resource temporarily unavailable";
532 douglas 13 break;
533     case WSAHOST_NOT_FOUND:
534 douglas 180 error = "Host not found";
535 douglas 13 break;
536     case WSA_INVALID_HANDLE:
537 douglas 180 error = "Specified event object handle is invalid";
538 douglas 13 break;
539     case WSA_INVALID_PARAMETER:
540 douglas 180 error = "One or more parameters are invalid";
541 douglas 13 break;
542     // case WSAINVALIDPROCTABLE:
543 douglas 180 // error = "Invalid procedure table from service provider";
544 douglas 13 // break;
545     // case WSAINVALIDPROVIDER:
546 douglas 180 // error = "Invalid service provider version number";
547 douglas 13 // break;
548     case WSA_IO_INCOMPLETE:
549 douglas 180 error = "Overlapped I/O event object not in signaled state";
550 douglas 13 break;
551     case WSA_IO_PENDING:
552 douglas 180 error = "Overlapped operations will complete later";
553 douglas 13 break;
554     case WSA_NOT_ENOUGH_MEMORY:
555 douglas 180 error = "Insufficient memory available";
556 douglas 13 break;
557     case WSANOTINITIALISED:
558 douglas 180 error = "Successful WSAStartup not yet performed";
559 douglas 13 break;
560     case WSANO_DATA:
561 douglas 180 error = "Valid name, no data record of requested type";
562 douglas 13 break;
563     case WSANO_RECOVERY:
564 douglas 180 error = "This is a non-recoverable error";
565 douglas 13 break;
566     // case WSAPROVIDERFAILEDINIT:
567 douglas 180 // error = "Unable to initialize a service provider";
568 douglas 13 // break;
569     case WSASYSCALLFAILURE:
570 douglas 180 error = "System call failure";
571 douglas 13 break;
572     case WSASYSNOTREADY:
573 douglas 180 error = "Network subsystem is unavailable";
574 douglas 13 break;
575     case WSATRY_AGAIN:
576 douglas 180 error = "Non-authoritative host not found";
577 douglas 13 break;
578     case WSAVERNOTSUPPORTED:
579 douglas 180 error = "WINSOCK.DLL version out of range";
580 douglas 13 break;
581     case WSAEDISCON:
582 douglas 180 error = "Graceful shutdown in progress";
583 douglas 13 break;
584     case WSA_OPERATION_ABORTED:
585 douglas 180 error = "Overlapped operation aborted";
586 douglas 13 break;
587     default:
588 douglas 180 error = "Unknown error";
589 douglas 13 break;
590     }
591 douglas 1
592 douglas 13 cerr << prefix << ": " << error << "\n";
593     #else
594     if (host)
595     {
596 douglas 179 string error;
597    
598     switch (h_errno)
599     {
600     case HOST_NOT_FOUND:
601     error = "Unknown host";
602     break;
603     case TRY_AGAIN:
604     error = "Host name lookup failure";
605     break;
606     case NO_RECOVERY:
607     error = "Unknown server error";
608     break;
609     case NO_DATA:
610     error = "No address associated with name";
611     break;
612     default:
613     error = "Unknown error";
614     break;
615     }
616    
617     cerr << prefix << ": " << error << "\n";
618 douglas 1 }
619     else
620     {
621 douglas 13 perror(prefix.c_str());
622 douglas 1 }
623 douglas 13 #endif // _WIN32
624     }