ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 21
Committed: 2002-12-10T14:44:50-08:00 (22 years, 6 months ago) by douglas
File size: 12088 byte(s)
Log Message:
Found the bug in private Processor.process() and fixed it.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4     * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine HTTP Handler
46     //
47     // Douglas Thrift
48     //
49     // HttpHandler.cpp
50    
51     #include "HttpHandler.h"
52    
53     HttpHandler::HttpHandler()
54     {
55 douglas 14 buffer = new char[BUFSIZ + 1];
56    
57 douglas 13 #ifdef _WIN32
58 douglas 14 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
59 douglas 13 {
60 douglas 17 error(program + ": WSAStartup");
61 douglas 13 exit(1);
62     }
63     #endif // _WIN32
64 douglas 1
65 douglas 18 length = 0;
66     chunked = false;
67 douglas 1 }
68    
69     HttpHandler::~HttpHandler()
70     {
71 douglas 14 delete [] buffer;
72    
73 douglas 13 #ifdef _WIN32
74     WSACleanup();
75     #endif // _WIN32
76 douglas 1 }
77    
78 douglas 14 bool HttpHandler::handle(URL &url, bool head)
79 douglas 1 {
80     bool answer = false;
81    
82 douglas 14 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
83     {
84     error(program + ": Socket");
85     exit(1);
86     }
87 douglas 1
88 douglas 14 sockaddr_in address;
89     hostent* host;
90 douglas 1
91 douglas 14 address.sin_family = AF_INET;
92 douglas 1
93 douglas 14 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
94     {
95     error(program + ": Host: " + url.getAddress(), true);
96     return answer;
97     }
98 douglas 1
99 douglas 14 address.sin_addr = *((in_addr*)*host->h_addr_list);
100     address.sin_port = htons(url.getPort());
101    
102     if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
103     SOCKET_ERROR)
104     {
105     error(program + ": Connect");
106     return answer;
107     }
108    
109     if (head)
110     {
111     putline("HEAD " + url.getPath() + " HTTP/1.1");
112     }
113     else
114     {
115     putline("GET " + url.getPath() + " HTTP/1.1");
116     }
117    
118     putline("Accept: text/html; text/plain");
119     putline("User-Agent: " + agent(true) + ' ' + platform());
120    
121     if (url.getPort() == 80)
122     {
123     putline("Host: " + url.getAddress());
124     }
125     else
126     {
127     char* port = new char[1024];
128     sprintf(port, "%u", url.getPort());
129    
130     putline("Host: " + url.getAddress() + ':' + port);
131    
132     delete [] port;
133     }
134    
135 douglas 19 // putline("Referer: " + ?referer?);
136 douglas 18 putline("Connection: close");
137 douglas 14 putline();
138    
139 douglas 18 code response;
140     string line;
141 douglas 17
142 douglas 18 do
143 douglas 17 {
144 douglas 18 line = getline();
145 douglas 17
146 douglas 18 if (line.find("HTTP/") != 0)
147     {
148     return answer;
149     }
150 douglas 17
151 douglas 18 unsigned dot = line.find('.');
152     unsigned space = line.find(' ');
153 douglas 17
154 douglas 19 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
155     unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
156     0, 10);
157 douglas 17
158 douglas 18 if (major > 1 || minor < 1)
159     {
160 douglas 19 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
161     major << "." << minor << "\n";
162 douglas 18
163     return answer;
164     }
165    
166 douglas 19 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
167 douglas 18
168     if (response < ok) do line = getline(); while (line != "");
169 douglas 17 }
170 douglas 18 while (response < ok);
171 douglas 17
172     do
173     {
174     line = getline();
175 douglas 18
176     if (line != "")
177     {
178     unsigned colon = line.find(':');
179    
180     string field = line.substr(0, colon);
181     string value = line.substr(colon + 1);
182    
183     while (isspace(value[0])) value.erase(0, 1);
184    
185 douglas 19 if (field == "Content-Type")
186     {
187     type = value;
188     }
189     else if (field == "Content-Length")
190     {
191     length = strtoul(value.c_str(), 0, 10);
192     }
193     else if (field == "Location")
194     {
195     location = value;
196     }
197     else if (field == "Transfer-Encoding")
198     {
199     chunked = value == "chunked";
200     }
201 douglas 18 }
202 douglas 17 }
203     while (line != "");
204    
205     switch (response)
206     {
207     case ok:
208 douglas 18 if (debug) cerr << "response = " << response << "\n";
209 douglas 17 answer = true;
210     break;
211 douglas 18 case choices:
212 douglas 17 case moved:
213     case found:
214 douglas 18 if (debug) cerr << "response = " << response << "\n"
215     << "location = " << location << "\n";
216     location = getLink(location, url);
217 douglas 17 break;
218     case notfound:
219     case internal:
220 douglas 18 if (debug) cerr << "response = " << response << "\n";
221 douglas 17 break;
222     default:
223 douglas 18 if (debug) cerr << "response = " << response << "\n";
224     if (response <= 299)
225     {
226     answer = true;
227     }
228     else if (response <= 399)
229     {
230     location = getLink(location, url);
231     }
232 douglas 17 break;
233     }
234    
235 douglas 19 if (!head && answer) populate();
236    
237 douglas 1 return answer;
238     }
239    
240     HttpHandler& HttpHandler::getline(string& line, char endline)
241     {
242 douglas 21 unsigned end = page.find(endline);
243     unsigned newline = page.find('\n');
244 douglas 1
245     if (newline < end || end == string::npos)
246     {
247     end = newline;
248     }
249    
250 douglas 20 line = page.substr(0, end);
251     page.erase(0, (end == string::npos ? end : end + 1));
252 douglas 1
253     return *this;
254     }
255    
256     void HttpHandler::clear()
257     {
258 douglas 18 closesocket(http);
259    
260 douglas 17 type = "";
261     length = 0;
262     location = "";
263 douglas 1 page = "";
264 douglas 18 chunked = false;
265 douglas 1 }
266    
267 douglas 19 void HttpHandler::populate()
268     {
269     if (!chunked)
270     {
271     unsigned left = length;
272    
273     while (left > 0)
274     {
275     memset(buffer, 0, BUFSIZ + 1);
276    
277     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
278    
279     if (recv(http, buffer, bytes, 0) == SOCKET_ERROR)
280     {
281 douglas 20 error(program + ": Recv");
282 douglas 19 exit(1);
283     }
284    
285     page += buffer;
286     left -= bytes;
287     }
288     }
289     else
290     {
291 douglas 20 unsigned chunk;
292    
293     do
294     {
295     chunk = strtoul(getline().c_str(), 0, 16);
296    
297     unsigned left = chunk;
298    
299     while (left > 0)
300     {
301     memset(buffer, 0, BUFSIZ + 1);
302    
303     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
304    
305     if (recv(http, buffer, bytes, 0) == SOCKET_ERROR)
306     {
307     error(program + ": Recv");
308     exit(1);
309     }
310    
311     page += buffer;
312     left -= bytes;
313     }
314    
315     getline();
316     length += chunk;
317     }
318     while (chunk > 0);
319 douglas 19 }
320    
321 douglas 20 for (unsigned index = 0; index < page.length(); index++)
322     {
323     if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
324     1] == '\n' : false)
325     {
326     page.erase(index, 1);
327     }
328     else if (page[index] == '\r')
329     {
330     page[index] = '\n';
331     }
332     }
333 douglas 19 }
334    
335 douglas 14 void HttpHandler::putline(const string line)
336     {
337     sprintf(buffer, "%s\r\n", line.c_str());
338     if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
339     {
340     error(program + ": Send");
341     exit(1);
342     }
343     }
344    
345 douglas 17 string HttpHandler::getline()
346     {
347     string line;
348     char byte;
349    
350     do
351     {
352     if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
353     {
354     error(program + ": Recv");
355     }
356    
357     if (byte != '\r' && byte != '\n')
358     {
359     line += byte;
360     }
361     }
362     while (byte != '\n');
363    
364     return line;
365     }
366    
367 douglas 18 void HttpHandler::error(const string& prefix, bool host)
368 douglas 1 {
369 douglas 13 #ifdef _WIN32
370     string error;
371 douglas 1
372 douglas 13 switch (WSAGetLastError())
373 douglas 1 {
374 douglas 13 case WSAEACCES:
375     error = "Permission denied.";
376     break;
377     case WSAEADDRINUSE:
378     error = "Address already in use.";
379     break;
380     case WSAEADDRNOTAVAIL:
381     error = "Cannot assign requested address.";
382     break;
383     case WSAEAFNOSUPPORT:
384     error = "Address family not supported by protocol family.";
385     break;
386     case WSAEALREADY:
387     error = "Operation already in progress.";
388     break;
389     case WSAECONNABORTED:
390     error = "Software caused connection abort.";
391     break;
392     case WSAECONNREFUSED:
393     error = "Connection refused.";
394     break;
395     case WSAECONNRESET:
396     error = "Connection reset by peer.";
397     break;
398     case WSAEDESTADDRREQ:
399     error = "Destination address required.";
400     break;
401     case WSAEFAULT:
402     error = "Bad address.";
403     break;
404     case WSAEHOSTDOWN:
405     error = "Host is down.";
406     break;
407     case WSAEHOSTUNREACH:
408     error = "No route to host.";
409     break;
410     case WSAEINPROGRESS:
411     error = "Operation now in progress.";
412     break;
413     case WSAEINTR:
414     error = "Interrupted function call.";
415     break;
416     case WSAEINVAL:
417     error = "Invalid argument.";
418     break;
419     case WSAEISCONN:
420     error = "Socket is already connected.";
421     break;
422     case WSAEMFILE:
423     error = "Too many open files.";
424     break;
425     case WSAEMSGSIZE:
426     error = "Message too long.";
427     break;
428     case WSAENETDOWN:
429     error = "Network is down.";
430     break;
431     case WSAENETRESET:
432     error = "Network dropped connection on reset.";
433     break;
434     case WSAENETUNREACH:
435     error = "Network is unreachable.";
436     break;
437     case WSAENOBUFS:
438     error = "No buffer space available.";
439     break;
440     case WSAENOPROTOOPT:
441     error = "Bad protocol option.";
442     break;
443     case WSAENOTCONN:
444     error = "Socket is not connected.";
445     break;
446     case WSAENOTSOCK:
447     error = "Socket operation on non-socket.";
448     break;
449     case WSAEOPNOTSUPP:
450     error = "Operation not supported.";
451     break;
452     case WSAEPFNOSUPPORT:
453     error = "Protocol family not supported.";
454     break;
455     case WSAEPROCLIM:
456     error = "Too many processes.";
457     break;
458     case WSAEPROTONOSUPPORT:
459     error = "Protocol not supported.";
460     break;
461     case WSAEPROTOTYPE:
462     error = "Protocol wrong type for socket.";
463     break;
464     case WSAESHUTDOWN:
465     error = "Cannot send after socket shutdown.";
466     break;
467     case WSAESOCKTNOSUPPORT:
468     error = "Socket type not supported.";
469     break;
470     case WSAETIMEDOUT:
471     error = "Connection timed out.";
472     break;
473     case WSATYPE_NOT_FOUND:
474     error = "Class type not found.";
475     break;
476     case WSAEWOULDBLOCK:
477     error = "Resource temporarily unavailable.";
478     break;
479     case WSAHOST_NOT_FOUND:
480     error = "Host not found.";
481     break;
482     case WSA_INVALID_HANDLE:
483     error = "Specified event object handle is invalid.";
484     break;
485     case WSA_INVALID_PARAMETER:
486     error = "One or more parameters are invalid.";
487     break;
488     // case WSAINVALIDPROCTABLE:
489     // error = "Invalid procedure table from service provider.";
490     // break;
491     // case WSAINVALIDPROVIDER:
492     // error = "Invalid service provider version number.";
493     // break;
494     case WSA_IO_INCOMPLETE:
495     error = "Overlapped I/O event object not in signaled state.";
496     break;
497     case WSA_IO_PENDING:
498     error = "Overlapped operations will complete later.";
499     break;
500     case WSA_NOT_ENOUGH_MEMORY:
501     error = "Insufficient memory available.";
502     break;
503     case WSANOTINITIALISED:
504     error = "Successful WSAStartup not yet performed.";
505     break;
506     case WSANO_DATA:
507     error = "Valid name, no data record of requested type.";
508     break;
509     case WSANO_RECOVERY:
510     error = "This is a non-recoverable error.";
511     break;
512     // case WSAPROVIDERFAILEDINIT:
513     // error = "Unable to initialize a service provider.";
514     // break;
515     case WSASYSCALLFAILURE:
516     error = "System call failure.";
517     break;
518     case WSASYSNOTREADY:
519     error = "Network subsystem is unavailable.";
520     break;
521     case WSATRY_AGAIN:
522     error = "Non-authoritative host not found.";
523     break;
524     case WSAVERNOTSUPPORTED:
525     error = "WINSOCK.DLL version out of range.";
526     break;
527     case WSAEDISCON:
528     error = "Graceful shutdown in progress.";
529     break;
530     case WSA_OPERATION_ABORTED:
531     error = "Overlapped operation aborted.";
532     break;
533     default:
534     error = "Unknown error.";
535     break;
536     }
537 douglas 1
538 douglas 13 cerr << prefix << ": " << error << "\n";
539     #else
540     if (host)
541     {
542     herror(prefix.c_str());
543 douglas 1 }
544     else
545     {
546 douglas 13 perror(prefix.c_str());
547 douglas 1 }
548 douglas 13 #endif // _WIN32
549     }