ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 179
Committed: 2003-07-05T19:47:23-07:00 (21 years, 11 months ago) by douglas
File size: 13295 byte(s)
Log Message:
Replaced herror function with a switch.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 28 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine HTTP Handler
46     //
47     // Douglas Thrift
48     //
49     // HttpHandler.cpp
50    
51     #include "HttpHandler.h"
52    
53 douglas 179 // Lovely C Sockets!
54 douglas 178 #ifndef _WIN32
55 douglas 179 // BSD Sockets
56 douglas 178 #include <unistd.h>
57     #include <sys/types.h>
58     #include <sys/socket.h>
59     #include <netinet/in.h>
60     #include <netdb.h>
61    
62     #define INVALID_SOCKET -1
63     #define SOCKET_ERROR -1
64    
65     inline int closesocket(SOCKET s) { return close(s); }
66     #endif
67    
68 douglas 1 HttpHandler::HttpHandler()
69     {
70 douglas 14 buffer = new char[BUFSIZ + 1];
71    
72 douglas 13 #ifdef _WIN32
73 douglas 14 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
74 douglas 13 {
75 douglas 17 error(program + ": WSAStartup");
76 douglas 13 exit(1);
77     }
78     #endif // _WIN32
79 douglas 1
80 douglas 18 length = 0;
81     chunked = false;
82 douglas 1 }
83    
84     HttpHandler::~HttpHandler()
85     {
86 douglas 14 delete [] buffer;
87    
88 douglas 13 #ifdef _WIN32
89     WSACleanup();
90     #endif // _WIN32
91 douglas 1 }
92    
93 douglas 25 bool HttpHandler::handle(URL &url, const string referer, bool head)
94 douglas 1 {
95     bool answer = false;
96    
97 douglas 14 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
98     {
99     error(program + ": Socket");
100     exit(1);
101     }
102 douglas 1
103 douglas 14 sockaddr_in address;
104     hostent* host;
105 douglas 1
106 douglas 14 address.sin_family = AF_INET;
107 douglas 1
108 douglas 14 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
109     {
110     error(program + ": Host: " + url.getAddress(), true);
111     return answer;
112     }
113 douglas 1
114 douglas 14 address.sin_addr = *((in_addr*)*host->h_addr_list);
115     address.sin_port = htons(url.getPort());
116    
117     if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
118     SOCKET_ERROR)
119     {
120     error(program + ": Connect");
121     return answer;
122     }
123    
124     if (head)
125     {
126     putline("HEAD " + url.getPath() + " HTTP/1.1");
127     }
128     else
129     {
130     putline("GET " + url.getPath() + " HTTP/1.1");
131     }
132    
133     putline("Accept: text/html; text/plain");
134     putline("User-Agent: " + agent(true) + ' ' + platform());
135    
136     if (url.getPort() == 80)
137     {
138     putline("Host: " + url.getAddress());
139     }
140     else
141     {
142     char* port = new char[1024];
143     sprintf(port, "%u", url.getPort());
144    
145     putline("Host: " + url.getAddress() + ':' + port);
146    
147     delete [] port;
148     }
149    
150 douglas 25 if (referer != "")
151     {
152     putline("Referer: " + referer);
153     }
154    
155 douglas 18 putline("Connection: close");
156 douglas 14 putline();
157    
158 douglas 18 code response;
159     string line;
160 douglas 17
161 douglas 18 do
162 douglas 17 {
163 douglas 18 line = getline();
164 douglas 17
165 douglas 18 if (line.find("HTTP/") != 0)
166     {
167     return answer;
168     }
169 douglas 17
170 douglas 18 unsigned dot = line.find('.');
171     unsigned space = line.find(' ');
172 douglas 17
173 douglas 19 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
174     unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
175     0, 10);
176 douglas 17
177 douglas 24 if (major > 1)
178 douglas 18 {
179 douglas 19 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
180     major << "." << minor << "\n";
181 douglas 18
182     return answer;
183     }
184    
185 douglas 19 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
186 douglas 18
187     if (response < ok) do line = getline(); while (line != "");
188 douglas 17 }
189 douglas 18 while (response < ok);
190 douglas 17
191     do
192     {
193     line = getline();
194 douglas 18
195     if (line != "")
196     {
197     unsigned colon = line.find(':');
198    
199     string field = line.substr(0, colon);
200     string value = line.substr(colon + 1);
201    
202     while (isspace(value[0])) value.erase(0, 1);
203    
204 douglas 19 if (field == "Content-Type")
205     {
206     type = value;
207     }
208     else if (field == "Content-Length")
209     {
210     length = strtoul(value.c_str(), 0, 10);
211     }
212     else if (field == "Location")
213     {
214     location = value;
215     }
216     else if (field == "Transfer-Encoding")
217     {
218     chunked = value == "chunked";
219     }
220 douglas 18 }
221 douglas 17 }
222     while (line != "");
223    
224     switch (response)
225     {
226     case ok:
227 douglas 18 if (debug) cerr << "response = " << response << "\n";
228 douglas 17 answer = true;
229     break;
230 douglas 18 case choices:
231 douglas 17 case moved:
232     case found:
233 douglas 18 if (debug) cerr << "response = " << response << "\n"
234     << "location = " << location << "\n";
235     location = getLink(location, url);
236 douglas 17 break;
237     case notfound:
238     case internal:
239 douglas 18 if (debug) cerr << "response = " << response << "\n";
240 douglas 17 break;
241     default:
242 douglas 18 if (debug) cerr << "response = " << response << "\n";
243     if (response <= 299)
244     {
245     answer = true;
246     }
247     else if (response <= 399)
248     {
249     location = getLink(location, url);
250     }
251 douglas 17 break;
252     }
253    
254 douglas 19 if (!head && answer) populate();
255    
256 douglas 1 return answer;
257     }
258    
259     HttpHandler& HttpHandler::getline(string& line, char endline)
260     {
261 douglas 21 unsigned end = page.find(endline);
262     unsigned newline = page.find('\n');
263 douglas 1
264     if (newline < end || end == string::npos)
265     {
266     end = newline;
267     }
268    
269 douglas 20 line = page.substr(0, end);
270     page.erase(0, (end == string::npos ? end : end + 1));
271 douglas 1
272     return *this;
273     }
274    
275     void HttpHandler::clear()
276     {
277 douglas 18 closesocket(http);
278    
279 douglas 17 type = "";
280     length = 0;
281     location = "";
282 douglas 1 page = "";
283 douglas 18 chunked = false;
284 douglas 1 }
285    
286 douglas 19 void HttpHandler::populate()
287     {
288     if (!chunked)
289     {
290     unsigned left = length;
291    
292     while (left > 0)
293     {
294     memset(buffer, 0, BUFSIZ + 1);
295    
296     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
297 douglas 24 unsigned received;
298 douglas 19
299 douglas 145 while (true)
300 douglas 19 {
301 douglas 145 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
302 douglas 24 {
303     error(program + ": Recv");
304     exit(1);
305     }
306 douglas 145 else if (received != bytes)
307     {
308     left -= received;
309     page += buffer;
310    
311     memset(buffer, 0, BUFSIZ + 1);
312    
313     bytes -= received;
314     }
315     else
316     {
317     break;
318     }
319 douglas 24 }
320    
321 douglas 19 page += buffer;
322     left -= bytes;
323     }
324     }
325     else
326     {
327 douglas 20 unsigned chunk;
328    
329     do
330     {
331     chunk = strtoul(getline().c_str(), 0, 16);
332    
333     unsigned left = chunk;
334    
335     while (left > 0)
336     {
337     memset(buffer, 0, BUFSIZ + 1);
338    
339     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
340 douglas 24 unsigned received;
341 douglas 20
342 douglas 145 while (true)
343 douglas 20 {
344 douglas 145 if ((received = recv(http, buffer, bytes, 0)) ==
345     SOCKET_ERROR)
346 douglas 24 {
347     error(program + ": Recv");
348     exit(1);
349     }
350 douglas 145 else if (received != bytes)
351     {
352     left -= received;
353     page += buffer;
354    
355     memset(buffer, 0, BUFSIZ + 1);
356    
357     bytes -= received;
358     }
359     else
360     {
361     break;
362     }
363 douglas 24 }
364    
365 douglas 20 page += buffer;
366     left -= bytes;
367     }
368    
369     getline();
370     length += chunk;
371     }
372     while (chunk > 0);
373 douglas 19 }
374    
375 douglas 20 for (unsigned index = 0; index < page.length(); index++)
376     {
377     if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
378     1] == '\n' : false)
379     {
380     page.erase(index, 1);
381     }
382     else if (page[index] == '\r')
383     {
384     page[index] = '\n';
385     }
386     }
387 douglas 19 }
388    
389 douglas 14 void HttpHandler::putline(const string line)
390     {
391     sprintf(buffer, "%s\r\n", line.c_str());
392     if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
393     {
394     error(program + ": Send");
395     exit(1);
396     }
397     }
398    
399 douglas 17 string HttpHandler::getline()
400     {
401     string line;
402     char byte;
403    
404     do
405     {
406     if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
407     {
408     error(program + ": Recv");
409     }
410    
411     if (byte != '\r' && byte != '\n')
412     {
413     line += byte;
414     }
415     }
416     while (byte != '\n');
417    
418     return line;
419     }
420    
421 douglas 18 void HttpHandler::error(const string& prefix, bool host)
422 douglas 1 {
423 douglas 13 #ifdef _WIN32
424     string error;
425 douglas 1
426 douglas 13 switch (WSAGetLastError())
427 douglas 1 {
428 douglas 13 case WSAEACCES:
429     error = "Permission denied.";
430     break;
431     case WSAEADDRINUSE:
432     error = "Address already in use.";
433     break;
434     case WSAEADDRNOTAVAIL:
435     error = "Cannot assign requested address.";
436     break;
437     case WSAEAFNOSUPPORT:
438     error = "Address family not supported by protocol family.";
439     break;
440     case WSAEALREADY:
441     error = "Operation already in progress.";
442     break;
443     case WSAECONNABORTED:
444     error = "Software caused connection abort.";
445     break;
446     case WSAECONNREFUSED:
447     error = "Connection refused.";
448     break;
449     case WSAECONNRESET:
450     error = "Connection reset by peer.";
451     break;
452     case WSAEDESTADDRREQ:
453     error = "Destination address required.";
454     break;
455     case WSAEFAULT:
456     error = "Bad address.";
457     break;
458     case WSAEHOSTDOWN:
459     error = "Host is down.";
460     break;
461     case WSAEHOSTUNREACH:
462     error = "No route to host.";
463     break;
464     case WSAEINPROGRESS:
465     error = "Operation now in progress.";
466     break;
467     case WSAEINTR:
468     error = "Interrupted function call.";
469     break;
470     case WSAEINVAL:
471     error = "Invalid argument.";
472     break;
473     case WSAEISCONN:
474     error = "Socket is already connected.";
475     break;
476     case WSAEMFILE:
477     error = "Too many open files.";
478     break;
479     case WSAEMSGSIZE:
480     error = "Message too long.";
481     break;
482     case WSAENETDOWN:
483     error = "Network is down.";
484     break;
485     case WSAENETRESET:
486     error = "Network dropped connection on reset.";
487     break;
488     case WSAENETUNREACH:
489     error = "Network is unreachable.";
490     break;
491     case WSAENOBUFS:
492     error = "No buffer space available.";
493     break;
494     case WSAENOPROTOOPT:
495     error = "Bad protocol option.";
496     break;
497     case WSAENOTCONN:
498     error = "Socket is not connected.";
499     break;
500     case WSAENOTSOCK:
501     error = "Socket operation on non-socket.";
502     break;
503     case WSAEOPNOTSUPP:
504     error = "Operation not supported.";
505     break;
506     case WSAEPFNOSUPPORT:
507     error = "Protocol family not supported.";
508     break;
509     case WSAEPROCLIM:
510     error = "Too many processes.";
511     break;
512     case WSAEPROTONOSUPPORT:
513     error = "Protocol not supported.";
514     break;
515     case WSAEPROTOTYPE:
516     error = "Protocol wrong type for socket.";
517     break;
518     case WSAESHUTDOWN:
519     error = "Cannot send after socket shutdown.";
520     break;
521     case WSAESOCKTNOSUPPORT:
522     error = "Socket type not supported.";
523     break;
524     case WSAETIMEDOUT:
525     error = "Connection timed out.";
526     break;
527     case WSATYPE_NOT_FOUND:
528     error = "Class type not found.";
529     break;
530     case WSAEWOULDBLOCK:
531     error = "Resource temporarily unavailable.";
532     break;
533     case WSAHOST_NOT_FOUND:
534     error = "Host not found.";
535     break;
536     case WSA_INVALID_HANDLE:
537     error = "Specified event object handle is invalid.";
538     break;
539     case WSA_INVALID_PARAMETER:
540     error = "One or more parameters are invalid.";
541     break;
542     // case WSAINVALIDPROCTABLE:
543     // error = "Invalid procedure table from service provider.";
544     // break;
545     // case WSAINVALIDPROVIDER:
546     // error = "Invalid service provider version number.";
547     // break;
548     case WSA_IO_INCOMPLETE:
549     error = "Overlapped I/O event object not in signaled state.";
550     break;
551     case WSA_IO_PENDING:
552     error = "Overlapped operations will complete later.";
553     break;
554     case WSA_NOT_ENOUGH_MEMORY:
555     error = "Insufficient memory available.";
556     break;
557     case WSANOTINITIALISED:
558     error = "Successful WSAStartup not yet performed.";
559     break;
560     case WSANO_DATA:
561     error = "Valid name, no data record of requested type.";
562     break;
563     case WSANO_RECOVERY:
564     error = "This is a non-recoverable error.";
565     break;
566     // case WSAPROVIDERFAILEDINIT:
567     // error = "Unable to initialize a service provider.";
568     // break;
569     case WSASYSCALLFAILURE:
570     error = "System call failure.";
571     break;
572     case WSASYSNOTREADY:
573     error = "Network subsystem is unavailable.";
574     break;
575     case WSATRY_AGAIN:
576     error = "Non-authoritative host not found.";
577     break;
578     case WSAVERNOTSUPPORTED:
579     error = "WINSOCK.DLL version out of range.";
580     break;
581     case WSAEDISCON:
582     error = "Graceful shutdown in progress.";
583     break;
584     case WSA_OPERATION_ABORTED:
585     error = "Overlapped operation aborted.";
586     break;
587     default:
588     error = "Unknown error.";
589     break;
590     }
591 douglas 1
592 douglas 13 cerr << prefix << ": " << error << "\n";
593     #else
594     if (host)
595     {
596 douglas 179 string error;
597    
598     switch (h_errno)
599     {
600     case HOST_NOT_FOUND:
601     error = "Unknown host";
602     break;
603     case TRY_AGAIN:
604     error = "Host name lookup failure";
605     break;
606     case NO_RECOVERY:
607     error = "Unknown server error";
608     break;
609     case NO_DATA:
610     error = "No address associated with name";
611     break;
612     default:
613     error = "Unknown error";
614     break;
615     }
616    
617     cerr << prefix << ": " << error << "\n";
618 douglas 1 }
619     else
620     {
621 douglas 13 perror(prefix.c_str());
622 douglas 1 }
623 douglas 13 #endif // _WIN32
624     }