ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 178
Committed: 2003-07-05T19:13:12-07:00 (21 years, 11 months ago) by douglas
File size: 12884 byte(s)
Log Message:
Moved Unix socket stuff to header file to source file.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 28 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine HTTP Handler
46     //
47     // Douglas Thrift
48     //
49     // HttpHandler.cpp
50    
51     #include "HttpHandler.h"
52    
53 douglas 178 #ifndef _WIN32
54     #include <unistd.h>
55     #include <sys/types.h>
56     #include <sys/socket.h>
57     #include <netinet/in.h>
58     #include <netdb.h>
59    
60     #define INVALID_SOCKET -1
61     #define SOCKET_ERROR -1
62    
63     inline int closesocket(SOCKET s) { return close(s); }
64     #endif
65    
66 douglas 1 HttpHandler::HttpHandler()
67     {
68 douglas 14 buffer = new char[BUFSIZ + 1];
69    
70 douglas 13 #ifdef _WIN32
71 douglas 14 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
72 douglas 13 {
73 douglas 17 error(program + ": WSAStartup");
74 douglas 13 exit(1);
75     }
76     #endif // _WIN32
77 douglas 1
78 douglas 18 length = 0;
79     chunked = false;
80 douglas 1 }
81    
82     HttpHandler::~HttpHandler()
83     {
84 douglas 14 delete [] buffer;
85    
86 douglas 13 #ifdef _WIN32
87     WSACleanup();
88     #endif // _WIN32
89 douglas 1 }
90    
91 douglas 25 bool HttpHandler::handle(URL &url, const string referer, bool head)
92 douglas 1 {
93     bool answer = false;
94    
95 douglas 14 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
96     {
97     error(program + ": Socket");
98     exit(1);
99     }
100 douglas 1
101 douglas 14 sockaddr_in address;
102     hostent* host;
103 douglas 1
104 douglas 14 address.sin_family = AF_INET;
105 douglas 1
106 douglas 14 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
107     {
108     error(program + ": Host: " + url.getAddress(), true);
109     return answer;
110     }
111 douglas 1
112 douglas 14 address.sin_addr = *((in_addr*)*host->h_addr_list);
113     address.sin_port = htons(url.getPort());
114    
115     if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
116     SOCKET_ERROR)
117     {
118     error(program + ": Connect");
119     return answer;
120     }
121    
122     if (head)
123     {
124     putline("HEAD " + url.getPath() + " HTTP/1.1");
125     }
126     else
127     {
128     putline("GET " + url.getPath() + " HTTP/1.1");
129     }
130    
131     putline("Accept: text/html; text/plain");
132     putline("User-Agent: " + agent(true) + ' ' + platform());
133    
134     if (url.getPort() == 80)
135     {
136     putline("Host: " + url.getAddress());
137     }
138     else
139     {
140     char* port = new char[1024];
141     sprintf(port, "%u", url.getPort());
142    
143     putline("Host: " + url.getAddress() + ':' + port);
144    
145     delete [] port;
146     }
147    
148 douglas 25 if (referer != "")
149     {
150     putline("Referer: " + referer);
151     }
152    
153 douglas 18 putline("Connection: close");
154 douglas 14 putline();
155    
156 douglas 18 code response;
157     string line;
158 douglas 17
159 douglas 18 do
160 douglas 17 {
161 douglas 18 line = getline();
162 douglas 17
163 douglas 18 if (line.find("HTTP/") != 0)
164     {
165     return answer;
166     }
167 douglas 17
168 douglas 18 unsigned dot = line.find('.');
169     unsigned space = line.find(' ');
170 douglas 17
171 douglas 19 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
172     unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
173     0, 10);
174 douglas 17
175 douglas 24 if (major > 1)
176 douglas 18 {
177 douglas 19 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
178     major << "." << minor << "\n";
179 douglas 18
180     return answer;
181     }
182    
183 douglas 19 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
184 douglas 18
185     if (response < ok) do line = getline(); while (line != "");
186 douglas 17 }
187 douglas 18 while (response < ok);
188 douglas 17
189     do
190     {
191     line = getline();
192 douglas 18
193     if (line != "")
194     {
195     unsigned colon = line.find(':');
196    
197     string field = line.substr(0, colon);
198     string value = line.substr(colon + 1);
199    
200     while (isspace(value[0])) value.erase(0, 1);
201    
202 douglas 19 if (field == "Content-Type")
203     {
204     type = value;
205     }
206     else if (field == "Content-Length")
207     {
208     length = strtoul(value.c_str(), 0, 10);
209     }
210     else if (field == "Location")
211     {
212     location = value;
213     }
214     else if (field == "Transfer-Encoding")
215     {
216     chunked = value == "chunked";
217     }
218 douglas 18 }
219 douglas 17 }
220     while (line != "");
221    
222     switch (response)
223     {
224     case ok:
225 douglas 18 if (debug) cerr << "response = " << response << "\n";
226 douglas 17 answer = true;
227     break;
228 douglas 18 case choices:
229 douglas 17 case moved:
230     case found:
231 douglas 18 if (debug) cerr << "response = " << response << "\n"
232     << "location = " << location << "\n";
233     location = getLink(location, url);
234 douglas 17 break;
235     case notfound:
236     case internal:
237 douglas 18 if (debug) cerr << "response = " << response << "\n";
238 douglas 17 break;
239     default:
240 douglas 18 if (debug) cerr << "response = " << response << "\n";
241     if (response <= 299)
242     {
243     answer = true;
244     }
245     else if (response <= 399)
246     {
247     location = getLink(location, url);
248     }
249 douglas 17 break;
250     }
251    
252 douglas 19 if (!head && answer) populate();
253    
254 douglas 1 return answer;
255     }
256    
257     HttpHandler& HttpHandler::getline(string& line, char endline)
258     {
259 douglas 21 unsigned end = page.find(endline);
260     unsigned newline = page.find('\n');
261 douglas 1
262     if (newline < end || end == string::npos)
263     {
264     end = newline;
265     }
266    
267 douglas 20 line = page.substr(0, end);
268     page.erase(0, (end == string::npos ? end : end + 1));
269 douglas 1
270     return *this;
271     }
272    
273     void HttpHandler::clear()
274     {
275 douglas 18 closesocket(http);
276    
277 douglas 17 type = "";
278     length = 0;
279     location = "";
280 douglas 1 page = "";
281 douglas 18 chunked = false;
282 douglas 1 }
283    
284 douglas 19 void HttpHandler::populate()
285     {
286     if (!chunked)
287     {
288     unsigned left = length;
289    
290     while (left > 0)
291     {
292     memset(buffer, 0, BUFSIZ + 1);
293    
294     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
295 douglas 24 unsigned received;
296 douglas 19
297 douglas 145 while (true)
298 douglas 19 {
299 douglas 145 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
300 douglas 24 {
301     error(program + ": Recv");
302     exit(1);
303     }
304 douglas 145 else if (received != bytes)
305     {
306     left -= received;
307     page += buffer;
308    
309     memset(buffer, 0, BUFSIZ + 1);
310    
311     bytes -= received;
312     }
313     else
314     {
315     break;
316     }
317 douglas 24 }
318    
319 douglas 19 page += buffer;
320     left -= bytes;
321     }
322     }
323     else
324     {
325 douglas 20 unsigned chunk;
326    
327     do
328     {
329     chunk = strtoul(getline().c_str(), 0, 16);
330    
331     unsigned left = chunk;
332    
333     while (left > 0)
334     {
335     memset(buffer, 0, BUFSIZ + 1);
336    
337     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
338 douglas 24 unsigned received;
339 douglas 20
340 douglas 145 while (true)
341 douglas 20 {
342 douglas 145 if ((received = recv(http, buffer, bytes, 0)) ==
343     SOCKET_ERROR)
344 douglas 24 {
345     error(program + ": Recv");
346     exit(1);
347     }
348 douglas 145 else if (received != bytes)
349     {
350     left -= received;
351     page += buffer;
352    
353     memset(buffer, 0, BUFSIZ + 1);
354    
355     bytes -= received;
356     }
357     else
358     {
359     break;
360     }
361 douglas 24 }
362    
363 douglas 20 page += buffer;
364     left -= bytes;
365     }
366    
367     getline();
368     length += chunk;
369     }
370     while (chunk > 0);
371 douglas 19 }
372    
373 douglas 20 for (unsigned index = 0; index < page.length(); index++)
374     {
375     if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
376     1] == '\n' : false)
377     {
378     page.erase(index, 1);
379     }
380     else if (page[index] == '\r')
381     {
382     page[index] = '\n';
383     }
384     }
385 douglas 19 }
386    
387 douglas 14 void HttpHandler::putline(const string line)
388     {
389     sprintf(buffer, "%s\r\n", line.c_str());
390     if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
391     {
392     error(program + ": Send");
393     exit(1);
394     }
395     }
396    
397 douglas 17 string HttpHandler::getline()
398     {
399     string line;
400     char byte;
401    
402     do
403     {
404     if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
405     {
406     error(program + ": Recv");
407     }
408    
409     if (byte != '\r' && byte != '\n')
410     {
411     line += byte;
412     }
413     }
414     while (byte != '\n');
415    
416     return line;
417     }
418    
419 douglas 18 void HttpHandler::error(const string& prefix, bool host)
420 douglas 1 {
421 douglas 13 #ifdef _WIN32
422     string error;
423 douglas 1
424 douglas 13 switch (WSAGetLastError())
425 douglas 1 {
426 douglas 13 case WSAEACCES:
427     error = "Permission denied.";
428     break;
429     case WSAEADDRINUSE:
430     error = "Address already in use.";
431     break;
432     case WSAEADDRNOTAVAIL:
433     error = "Cannot assign requested address.";
434     break;
435     case WSAEAFNOSUPPORT:
436     error = "Address family not supported by protocol family.";
437     break;
438     case WSAEALREADY:
439     error = "Operation already in progress.";
440     break;
441     case WSAECONNABORTED:
442     error = "Software caused connection abort.";
443     break;
444     case WSAECONNREFUSED:
445     error = "Connection refused.";
446     break;
447     case WSAECONNRESET:
448     error = "Connection reset by peer.";
449     break;
450     case WSAEDESTADDRREQ:
451     error = "Destination address required.";
452     break;
453     case WSAEFAULT:
454     error = "Bad address.";
455     break;
456     case WSAEHOSTDOWN:
457     error = "Host is down.";
458     break;
459     case WSAEHOSTUNREACH:
460     error = "No route to host.";
461     break;
462     case WSAEINPROGRESS:
463     error = "Operation now in progress.";
464     break;
465     case WSAEINTR:
466     error = "Interrupted function call.";
467     break;
468     case WSAEINVAL:
469     error = "Invalid argument.";
470     break;
471     case WSAEISCONN:
472     error = "Socket is already connected.";
473     break;
474     case WSAEMFILE:
475     error = "Too many open files.";
476     break;
477     case WSAEMSGSIZE:
478     error = "Message too long.";
479     break;
480     case WSAENETDOWN:
481     error = "Network is down.";
482     break;
483     case WSAENETRESET:
484     error = "Network dropped connection on reset.";
485     break;
486     case WSAENETUNREACH:
487     error = "Network is unreachable.";
488     break;
489     case WSAENOBUFS:
490     error = "No buffer space available.";
491     break;
492     case WSAENOPROTOOPT:
493     error = "Bad protocol option.";
494     break;
495     case WSAENOTCONN:
496     error = "Socket is not connected.";
497     break;
498     case WSAENOTSOCK:
499     error = "Socket operation on non-socket.";
500     break;
501     case WSAEOPNOTSUPP:
502     error = "Operation not supported.";
503     break;
504     case WSAEPFNOSUPPORT:
505     error = "Protocol family not supported.";
506     break;
507     case WSAEPROCLIM:
508     error = "Too many processes.";
509     break;
510     case WSAEPROTONOSUPPORT:
511     error = "Protocol not supported.";
512     break;
513     case WSAEPROTOTYPE:
514     error = "Protocol wrong type for socket.";
515     break;
516     case WSAESHUTDOWN:
517     error = "Cannot send after socket shutdown.";
518     break;
519     case WSAESOCKTNOSUPPORT:
520     error = "Socket type not supported.";
521     break;
522     case WSAETIMEDOUT:
523     error = "Connection timed out.";
524     break;
525     case WSATYPE_NOT_FOUND:
526     error = "Class type not found.";
527     break;
528     case WSAEWOULDBLOCK:
529     error = "Resource temporarily unavailable.";
530     break;
531     case WSAHOST_NOT_FOUND:
532     error = "Host not found.";
533     break;
534     case WSA_INVALID_HANDLE:
535     error = "Specified event object handle is invalid.";
536     break;
537     case WSA_INVALID_PARAMETER:
538     error = "One or more parameters are invalid.";
539     break;
540     // case WSAINVALIDPROCTABLE:
541     // error = "Invalid procedure table from service provider.";
542     // break;
543     // case WSAINVALIDPROVIDER:
544     // error = "Invalid service provider version number.";
545     // break;
546     case WSA_IO_INCOMPLETE:
547     error = "Overlapped I/O event object not in signaled state.";
548     break;
549     case WSA_IO_PENDING:
550     error = "Overlapped operations will complete later.";
551     break;
552     case WSA_NOT_ENOUGH_MEMORY:
553     error = "Insufficient memory available.";
554     break;
555     case WSANOTINITIALISED:
556     error = "Successful WSAStartup not yet performed.";
557     break;
558     case WSANO_DATA:
559     error = "Valid name, no data record of requested type.";
560     break;
561     case WSANO_RECOVERY:
562     error = "This is a non-recoverable error.";
563     break;
564     // case WSAPROVIDERFAILEDINIT:
565     // error = "Unable to initialize a service provider.";
566     // break;
567     case WSASYSCALLFAILURE:
568     error = "System call failure.";
569     break;
570     case WSASYSNOTREADY:
571     error = "Network subsystem is unavailable.";
572     break;
573     case WSATRY_AGAIN:
574     error = "Non-authoritative host not found.";
575     break;
576     case WSAVERNOTSUPPORTED:
577     error = "WINSOCK.DLL version out of range.";
578     break;
579     case WSAEDISCON:
580     error = "Graceful shutdown in progress.";
581     break;
582     case WSA_OPERATION_ABORTED:
583     error = "Overlapped operation aborted.";
584     break;
585     default:
586     error = "Unknown error.";
587     break;
588     }
589 douglas 1
590 douglas 13 cerr << prefix << ": " << error << "\n";
591     #else
592     if (host)
593     {
594     herror(prefix.c_str());
595 douglas 1 }
596     else
597     {
598 douglas 13 perror(prefix.c_str());
599 douglas 1 }
600 douglas 13 #endif // _WIN32
601     }