ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 25
Committed: 2002-12-22T23:32:58-08:00 (22 years, 6 months ago) by douglas
File size: 12714 byte(s)
Log Message:
Added "referer" handling to Indexer and HttpHandler.handle().

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4     * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine HTTP Handler
46     //
47     // Douglas Thrift
48     //
49     // HttpHandler.cpp
50    
51     #include "HttpHandler.h"
52    
53     HttpHandler::HttpHandler()
54     {
55 douglas 14 buffer = new char[BUFSIZ + 1];
56    
57 douglas 13 #ifdef _WIN32
58 douglas 14 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
59 douglas 13 {
60 douglas 17 error(program + ": WSAStartup");
61 douglas 13 exit(1);
62     }
63     #endif // _WIN32
64 douglas 1
65 douglas 18 length = 0;
66     chunked = false;
67 douglas 1 }
68    
69     HttpHandler::~HttpHandler()
70     {
71 douglas 14 delete [] buffer;
72    
73 douglas 13 #ifdef _WIN32
74     WSACleanup();
75     #endif // _WIN32
76 douglas 1 }
77    
78 douglas 25 bool HttpHandler::handle(URL &url, const string referer, bool head)
79 douglas 1 {
80     bool answer = false;
81    
82 douglas 14 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
83     {
84     error(program + ": Socket");
85     exit(1);
86     }
87 douglas 1
88 douglas 14 sockaddr_in address;
89     hostent* host;
90 douglas 1
91 douglas 14 address.sin_family = AF_INET;
92 douglas 1
93 douglas 14 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
94     {
95     error(program + ": Host: " + url.getAddress(), true);
96     return answer;
97     }
98 douglas 1
99 douglas 14 address.sin_addr = *((in_addr*)*host->h_addr_list);
100     address.sin_port = htons(url.getPort());
101    
102     if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
103     SOCKET_ERROR)
104     {
105     error(program + ": Connect");
106     return answer;
107     }
108    
109     if (head)
110     {
111     putline("HEAD " + url.getPath() + " HTTP/1.1");
112     }
113     else
114     {
115     putline("GET " + url.getPath() + " HTTP/1.1");
116     }
117    
118     putline("Accept: text/html; text/plain");
119     putline("User-Agent: " + agent(true) + ' ' + platform());
120    
121     if (url.getPort() == 80)
122     {
123     putline("Host: " + url.getAddress());
124     }
125     else
126     {
127     char* port = new char[1024];
128     sprintf(port, "%u", url.getPort());
129    
130     putline("Host: " + url.getAddress() + ':' + port);
131    
132     delete [] port;
133     }
134    
135 douglas 25 if (referer != "")
136     {
137     putline("Referer: " + referer);
138     }
139    
140 douglas 18 putline("Connection: close");
141 douglas 14 putline();
142    
143 douglas 18 code response;
144     string line;
145 douglas 17
146 douglas 18 do
147 douglas 17 {
148 douglas 18 line = getline();
149 douglas 17
150 douglas 18 if (line.find("HTTP/") != 0)
151     {
152     return answer;
153     }
154 douglas 17
155 douglas 18 unsigned dot = line.find('.');
156     unsigned space = line.find(' ');
157 douglas 17
158 douglas 19 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
159     unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
160     0, 10);
161 douglas 17
162 douglas 24 if (major > 1)
163 douglas 18 {
164 douglas 19 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
165     major << "." << minor << "\n";
166 douglas 18
167     return answer;
168     }
169    
170 douglas 19 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
171 douglas 18
172     if (response < ok) do line = getline(); while (line != "");
173 douglas 17 }
174 douglas 18 while (response < ok);
175 douglas 17
176     do
177     {
178     line = getline();
179 douglas 18
180     if (line != "")
181     {
182     unsigned colon = line.find(':');
183    
184     string field = line.substr(0, colon);
185     string value = line.substr(colon + 1);
186    
187     while (isspace(value[0])) value.erase(0, 1);
188    
189 douglas 19 if (field == "Content-Type")
190     {
191     type = value;
192     }
193     else if (field == "Content-Length")
194     {
195     length = strtoul(value.c_str(), 0, 10);
196     }
197     else if (field == "Location")
198     {
199     location = value;
200     }
201     else if (field == "Transfer-Encoding")
202     {
203     chunked = value == "chunked";
204     }
205 douglas 18 }
206 douglas 17 }
207     while (line != "");
208    
209     switch (response)
210     {
211     case ok:
212 douglas 18 if (debug) cerr << "response = " << response << "\n";
213 douglas 17 answer = true;
214     break;
215 douglas 18 case choices:
216 douglas 17 case moved:
217     case found:
218 douglas 18 if (debug) cerr << "response = " << response << "\n"
219     << "location = " << location << "\n";
220     location = getLink(location, url);
221 douglas 17 break;
222     case notfound:
223     case internal:
224 douglas 18 if (debug) cerr << "response = " << response << "\n";
225 douglas 17 break;
226     default:
227 douglas 18 if (debug) cerr << "response = " << response << "\n";
228     if (response <= 299)
229     {
230     answer = true;
231     }
232     else if (response <= 399)
233     {
234     location = getLink(location, url);
235     }
236 douglas 17 break;
237     }
238    
239 douglas 19 if (!head && answer) populate();
240    
241 douglas 1 return answer;
242     }
243    
244     HttpHandler& HttpHandler::getline(string& line, char endline)
245     {
246 douglas 21 unsigned end = page.find(endline);
247     unsigned newline = page.find('\n');
248 douglas 1
249     if (newline < end || end == string::npos)
250     {
251     end = newline;
252     }
253    
254 douglas 20 line = page.substr(0, end);
255     page.erase(0, (end == string::npos ? end : end + 1));
256 douglas 1
257     return *this;
258     }
259    
260     void HttpHandler::clear()
261     {
262 douglas 18 closesocket(http);
263    
264 douglas 17 type = "";
265     length = 0;
266     location = "";
267 douglas 1 page = "";
268 douglas 18 chunked = false;
269 douglas 1 }
270    
271 douglas 19 void HttpHandler::populate()
272     {
273     if (!chunked)
274     {
275     unsigned left = length;
276    
277     while (left > 0)
278     {
279     memset(buffer, 0, BUFSIZ + 1);
280    
281     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
282 douglas 24 unsigned received;
283 douglas 19
284 douglas 24 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
285 douglas 19 {
286 douglas 20 error(program + ": Recv");
287 douglas 19 exit(1);
288     }
289 douglas 24 else if (received != bytes)
290     {
291     left -= received;
292     page += buffer;
293 douglas 19
294 douglas 24 memset(buffer, 0, BUFSIZ + 1);
295    
296     bytes -= received;
297     if (recv(http, buffer, bytes, 0) == SOCKET_ERROR)
298     {
299     error(program + ": Recv");
300     exit(1);
301     }
302     }
303    
304 douglas 19 page += buffer;
305     left -= bytes;
306     }
307     }
308     else
309     {
310 douglas 20 unsigned chunk;
311    
312     do
313     {
314     chunk = strtoul(getline().c_str(), 0, 16);
315    
316     unsigned left = chunk;
317    
318     while (left > 0)
319     {
320     memset(buffer, 0, BUFSIZ + 1);
321    
322     unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
323 douglas 24 unsigned received;
324 douglas 20
325 douglas 24 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
326 douglas 20 {
327     error(program + ": Recv");
328     exit(1);
329     }
330 douglas 24 else if (received != bytes)
331     {
332     left -= received;
333     page += buffer;
334 douglas 20
335 douglas 24 memset(buffer, 0, BUFSIZ + 1);
336    
337     bytes -= received;
338     if (recv(http, buffer, bytes, 0) == SOCKET_ERROR)
339     {
340     error(program + ": Recv");
341     exit(1);
342     }
343     }
344    
345 douglas 20 page += buffer;
346     left -= bytes;
347     }
348    
349     getline();
350     length += chunk;
351     }
352     while (chunk > 0);
353 douglas 19 }
354    
355 douglas 20 for (unsigned index = 0; index < page.length(); index++)
356     {
357     if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
358     1] == '\n' : false)
359     {
360     page.erase(index, 1);
361     }
362     else if (page[index] == '\r')
363     {
364     page[index] = '\n';
365     }
366     }
367 douglas 19 }
368    
369 douglas 14 void HttpHandler::putline(const string line)
370     {
371     sprintf(buffer, "%s\r\n", line.c_str());
372     if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
373     {
374     error(program + ": Send");
375     exit(1);
376     }
377     }
378    
379 douglas 17 string HttpHandler::getline()
380     {
381     string line;
382     char byte;
383    
384     do
385     {
386     if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
387     {
388     error(program + ": Recv");
389     }
390    
391     if (byte != '\r' && byte != '\n')
392     {
393     line += byte;
394     }
395     }
396     while (byte != '\n');
397    
398     return line;
399     }
400    
401 douglas 18 void HttpHandler::error(const string& prefix, bool host)
402 douglas 1 {
403 douglas 13 #ifdef _WIN32
404     string error;
405 douglas 1
406 douglas 13 switch (WSAGetLastError())
407 douglas 1 {
408 douglas 13 case WSAEACCES:
409     error = "Permission denied.";
410     break;
411     case WSAEADDRINUSE:
412     error = "Address already in use.";
413     break;
414     case WSAEADDRNOTAVAIL:
415     error = "Cannot assign requested address.";
416     break;
417     case WSAEAFNOSUPPORT:
418     error = "Address family not supported by protocol family.";
419     break;
420     case WSAEALREADY:
421     error = "Operation already in progress.";
422     break;
423     case WSAECONNABORTED:
424     error = "Software caused connection abort.";
425     break;
426     case WSAECONNREFUSED:
427     error = "Connection refused.";
428     break;
429     case WSAECONNRESET:
430     error = "Connection reset by peer.";
431     break;
432     case WSAEDESTADDRREQ:
433     error = "Destination address required.";
434     break;
435     case WSAEFAULT:
436     error = "Bad address.";
437     break;
438     case WSAEHOSTDOWN:
439     error = "Host is down.";
440     break;
441     case WSAEHOSTUNREACH:
442     error = "No route to host.";
443     break;
444     case WSAEINPROGRESS:
445     error = "Operation now in progress.";
446     break;
447     case WSAEINTR:
448     error = "Interrupted function call.";
449     break;
450     case WSAEINVAL:
451     error = "Invalid argument.";
452     break;
453     case WSAEISCONN:
454     error = "Socket is already connected.";
455     break;
456     case WSAEMFILE:
457     error = "Too many open files.";
458     break;
459     case WSAEMSGSIZE:
460     error = "Message too long.";
461     break;
462     case WSAENETDOWN:
463     error = "Network is down.";
464     break;
465     case WSAENETRESET:
466     error = "Network dropped connection on reset.";
467     break;
468     case WSAENETUNREACH:
469     error = "Network is unreachable.";
470     break;
471     case WSAENOBUFS:
472     error = "No buffer space available.";
473     break;
474     case WSAENOPROTOOPT:
475     error = "Bad protocol option.";
476     break;
477     case WSAENOTCONN:
478     error = "Socket is not connected.";
479     break;
480     case WSAENOTSOCK:
481     error = "Socket operation on non-socket.";
482     break;
483     case WSAEOPNOTSUPP:
484     error = "Operation not supported.";
485     break;
486     case WSAEPFNOSUPPORT:
487     error = "Protocol family not supported.";
488     break;
489     case WSAEPROCLIM:
490     error = "Too many processes.";
491     break;
492     case WSAEPROTONOSUPPORT:
493     error = "Protocol not supported.";
494     break;
495     case WSAEPROTOTYPE:
496     error = "Protocol wrong type for socket.";
497     break;
498     case WSAESHUTDOWN:
499     error = "Cannot send after socket shutdown.";
500     break;
501     case WSAESOCKTNOSUPPORT:
502     error = "Socket type not supported.";
503     break;
504     case WSAETIMEDOUT:
505     error = "Connection timed out.";
506     break;
507     case WSATYPE_NOT_FOUND:
508     error = "Class type not found.";
509     break;
510     case WSAEWOULDBLOCK:
511     error = "Resource temporarily unavailable.";
512     break;
513     case WSAHOST_NOT_FOUND:
514     error = "Host not found.";
515     break;
516     case WSA_INVALID_HANDLE:
517     error = "Specified event object handle is invalid.";
518     break;
519     case WSA_INVALID_PARAMETER:
520     error = "One or more parameters are invalid.";
521     break;
522     // case WSAINVALIDPROCTABLE:
523     // error = "Invalid procedure table from service provider.";
524     // break;
525     // case WSAINVALIDPROVIDER:
526     // error = "Invalid service provider version number.";
527     // break;
528     case WSA_IO_INCOMPLETE:
529     error = "Overlapped I/O event object not in signaled state.";
530     break;
531     case WSA_IO_PENDING:
532     error = "Overlapped operations will complete later.";
533     break;
534     case WSA_NOT_ENOUGH_MEMORY:
535     error = "Insufficient memory available.";
536     break;
537     case WSANOTINITIALISED:
538     error = "Successful WSAStartup not yet performed.";
539     break;
540     case WSANO_DATA:
541     error = "Valid name, no data record of requested type.";
542     break;
543     case WSANO_RECOVERY:
544     error = "This is a non-recoverable error.";
545     break;
546     // case WSAPROVIDERFAILEDINIT:
547     // error = "Unable to initialize a service provider.";
548     // break;
549     case WSASYSCALLFAILURE:
550     error = "System call failure.";
551     break;
552     case WSASYSNOTREADY:
553     error = "Network subsystem is unavailable.";
554     break;
555     case WSATRY_AGAIN:
556     error = "Non-authoritative host not found.";
557     break;
558     case WSAVERNOTSUPPORTED:
559     error = "WINSOCK.DLL version out of range.";
560     break;
561     case WSAEDISCON:
562     error = "Graceful shutdown in progress.";
563     break;
564     case WSA_OPERATION_ABORTED:
565     error = "Overlapped operation aborted.";
566     break;
567     default:
568     error = "Unknown error.";
569     break;
570     }
571 douglas 1
572 douglas 13 cerr << prefix << ": " << error << "\n";
573     #else
574     if (host)
575     {
576     herror(prefix.c_str());
577 douglas 1 }
578     else
579     {
580 douglas 13 perror(prefix.c_str());
581 douglas 1 }
582 douglas 13 #endif // _WIN32
583     }