ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 179
Committed: 2003-07-05T19:47:23-07:00 (21 years, 11 months ago) by douglas
File size: 13295 byte(s)
Log Message:
Replaced herror function with a switch.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // HttpHandler.cpp
50
51 #include "HttpHandler.h"
52
53 // Lovely C Sockets!
54 #ifndef _WIN32
55 // BSD Sockets
56 #include <unistd.h>
57 #include <sys/types.h>
58 #include <sys/socket.h>
59 #include <netinet/in.h>
60 #include <netdb.h>
61
62 #define INVALID_SOCKET -1
63 #define SOCKET_ERROR -1
64
65 inline int closesocket(SOCKET s) { return close(s); }
66 #endif
67
68 HttpHandler::HttpHandler()
69 {
70 buffer = new char[BUFSIZ + 1];
71
72 #ifdef _WIN32
73 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
74 {
75 error(program + ": WSAStartup");
76 exit(1);
77 }
78 #endif // _WIN32
79
80 length = 0;
81 chunked = false;
82 }
83
84 HttpHandler::~HttpHandler()
85 {
86 delete [] buffer;
87
88 #ifdef _WIN32
89 WSACleanup();
90 #endif // _WIN32
91 }
92
93 bool HttpHandler::handle(URL &url, const string referer, bool head)
94 {
95 bool answer = false;
96
97 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
98 {
99 error(program + ": Socket");
100 exit(1);
101 }
102
103 sockaddr_in address;
104 hostent* host;
105
106 address.sin_family = AF_INET;
107
108 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
109 {
110 error(program + ": Host: " + url.getAddress(), true);
111 return answer;
112 }
113
114 address.sin_addr = *((in_addr*)*host->h_addr_list);
115 address.sin_port = htons(url.getPort());
116
117 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
118 SOCKET_ERROR)
119 {
120 error(program + ": Connect");
121 return answer;
122 }
123
124 if (head)
125 {
126 putline("HEAD " + url.getPath() + " HTTP/1.1");
127 }
128 else
129 {
130 putline("GET " + url.getPath() + " HTTP/1.1");
131 }
132
133 putline("Accept: text/html; text/plain");
134 putline("User-Agent: " + agent(true) + ' ' + platform());
135
136 if (url.getPort() == 80)
137 {
138 putline("Host: " + url.getAddress());
139 }
140 else
141 {
142 char* port = new char[1024];
143 sprintf(port, "%u", url.getPort());
144
145 putline("Host: " + url.getAddress() + ':' + port);
146
147 delete [] port;
148 }
149
150 if (referer != "")
151 {
152 putline("Referer: " + referer);
153 }
154
155 putline("Connection: close");
156 putline();
157
158 code response;
159 string line;
160
161 do
162 {
163 line = getline();
164
165 if (line.find("HTTP/") != 0)
166 {
167 return answer;
168 }
169
170 unsigned dot = line.find('.');
171 unsigned space = line.find(' ');
172
173 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
174 unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
175 0, 10);
176
177 if (major > 1)
178 {
179 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
180 major << "." << minor << "\n";
181
182 return answer;
183 }
184
185 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
186
187 if (response < ok) do line = getline(); while (line != "");
188 }
189 while (response < ok);
190
191 do
192 {
193 line = getline();
194
195 if (line != "")
196 {
197 unsigned colon = line.find(':');
198
199 string field = line.substr(0, colon);
200 string value = line.substr(colon + 1);
201
202 while (isspace(value[0])) value.erase(0, 1);
203
204 if (field == "Content-Type")
205 {
206 type = value;
207 }
208 else if (field == "Content-Length")
209 {
210 length = strtoul(value.c_str(), 0, 10);
211 }
212 else if (field == "Location")
213 {
214 location = value;
215 }
216 else if (field == "Transfer-Encoding")
217 {
218 chunked = value == "chunked";
219 }
220 }
221 }
222 while (line != "");
223
224 switch (response)
225 {
226 case ok:
227 if (debug) cerr << "response = " << response << "\n";
228 answer = true;
229 break;
230 case choices:
231 case moved:
232 case found:
233 if (debug) cerr << "response = " << response << "\n"
234 << "location = " << location << "\n";
235 location = getLink(location, url);
236 break;
237 case notfound:
238 case internal:
239 if (debug) cerr << "response = " << response << "\n";
240 break;
241 default:
242 if (debug) cerr << "response = " << response << "\n";
243 if (response <= 299)
244 {
245 answer = true;
246 }
247 else if (response <= 399)
248 {
249 location = getLink(location, url);
250 }
251 break;
252 }
253
254 if (!head && answer) populate();
255
256 return answer;
257 }
258
259 HttpHandler& HttpHandler::getline(string& line, char endline)
260 {
261 unsigned end = page.find(endline);
262 unsigned newline = page.find('\n');
263
264 if (newline < end || end == string::npos)
265 {
266 end = newline;
267 }
268
269 line = page.substr(0, end);
270 page.erase(0, (end == string::npos ? end : end + 1));
271
272 return *this;
273 }
274
275 void HttpHandler::clear()
276 {
277 closesocket(http);
278
279 type = "";
280 length = 0;
281 location = "";
282 page = "";
283 chunked = false;
284 }
285
286 void HttpHandler::populate()
287 {
288 if (!chunked)
289 {
290 unsigned left = length;
291
292 while (left > 0)
293 {
294 memset(buffer, 0, BUFSIZ + 1);
295
296 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
297 unsigned received;
298
299 while (true)
300 {
301 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
302 {
303 error(program + ": Recv");
304 exit(1);
305 }
306 else if (received != bytes)
307 {
308 left -= received;
309 page += buffer;
310
311 memset(buffer, 0, BUFSIZ + 1);
312
313 bytes -= received;
314 }
315 else
316 {
317 break;
318 }
319 }
320
321 page += buffer;
322 left -= bytes;
323 }
324 }
325 else
326 {
327 unsigned chunk;
328
329 do
330 {
331 chunk = strtoul(getline().c_str(), 0, 16);
332
333 unsigned left = chunk;
334
335 while (left > 0)
336 {
337 memset(buffer, 0, BUFSIZ + 1);
338
339 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
340 unsigned received;
341
342 while (true)
343 {
344 if ((received = recv(http, buffer, bytes, 0)) ==
345 SOCKET_ERROR)
346 {
347 error(program + ": Recv");
348 exit(1);
349 }
350 else if (received != bytes)
351 {
352 left -= received;
353 page += buffer;
354
355 memset(buffer, 0, BUFSIZ + 1);
356
357 bytes -= received;
358 }
359 else
360 {
361 break;
362 }
363 }
364
365 page += buffer;
366 left -= bytes;
367 }
368
369 getline();
370 length += chunk;
371 }
372 while (chunk > 0);
373 }
374
375 for (unsigned index = 0; index < page.length(); index++)
376 {
377 if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
378 1] == '\n' : false)
379 {
380 page.erase(index, 1);
381 }
382 else if (page[index] == '\r')
383 {
384 page[index] = '\n';
385 }
386 }
387 }
388
389 void HttpHandler::putline(const string line)
390 {
391 sprintf(buffer, "%s\r\n", line.c_str());
392 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
393 {
394 error(program + ": Send");
395 exit(1);
396 }
397 }
398
399 string HttpHandler::getline()
400 {
401 string line;
402 char byte;
403
404 do
405 {
406 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
407 {
408 error(program + ": Recv");
409 }
410
411 if (byte != '\r' && byte != '\n')
412 {
413 line += byte;
414 }
415 }
416 while (byte != '\n');
417
418 return line;
419 }
420
421 void HttpHandler::error(const string& prefix, bool host)
422 {
423 #ifdef _WIN32
424 string error;
425
426 switch (WSAGetLastError())
427 {
428 case WSAEACCES:
429 error = "Permission denied.";
430 break;
431 case WSAEADDRINUSE:
432 error = "Address already in use.";
433 break;
434 case WSAEADDRNOTAVAIL:
435 error = "Cannot assign requested address.";
436 break;
437 case WSAEAFNOSUPPORT:
438 error = "Address family not supported by protocol family.";
439 break;
440 case WSAEALREADY:
441 error = "Operation already in progress.";
442 break;
443 case WSAECONNABORTED:
444 error = "Software caused connection abort.";
445 break;
446 case WSAECONNREFUSED:
447 error = "Connection refused.";
448 break;
449 case WSAECONNRESET:
450 error = "Connection reset by peer.";
451 break;
452 case WSAEDESTADDRREQ:
453 error = "Destination address required.";
454 break;
455 case WSAEFAULT:
456 error = "Bad address.";
457 break;
458 case WSAEHOSTDOWN:
459 error = "Host is down.";
460 break;
461 case WSAEHOSTUNREACH:
462 error = "No route to host.";
463 break;
464 case WSAEINPROGRESS:
465 error = "Operation now in progress.";
466 break;
467 case WSAEINTR:
468 error = "Interrupted function call.";
469 break;
470 case WSAEINVAL:
471 error = "Invalid argument.";
472 break;
473 case WSAEISCONN:
474 error = "Socket is already connected.";
475 break;
476 case WSAEMFILE:
477 error = "Too many open files.";
478 break;
479 case WSAEMSGSIZE:
480 error = "Message too long.";
481 break;
482 case WSAENETDOWN:
483 error = "Network is down.";
484 break;
485 case WSAENETRESET:
486 error = "Network dropped connection on reset.";
487 break;
488 case WSAENETUNREACH:
489 error = "Network is unreachable.";
490 break;
491 case WSAENOBUFS:
492 error = "No buffer space available.";
493 break;
494 case WSAENOPROTOOPT:
495 error = "Bad protocol option.";
496 break;
497 case WSAENOTCONN:
498 error = "Socket is not connected.";
499 break;
500 case WSAENOTSOCK:
501 error = "Socket operation on non-socket.";
502 break;
503 case WSAEOPNOTSUPP:
504 error = "Operation not supported.";
505 break;
506 case WSAEPFNOSUPPORT:
507 error = "Protocol family not supported.";
508 break;
509 case WSAEPROCLIM:
510 error = "Too many processes.";
511 break;
512 case WSAEPROTONOSUPPORT:
513 error = "Protocol not supported.";
514 break;
515 case WSAEPROTOTYPE:
516 error = "Protocol wrong type for socket.";
517 break;
518 case WSAESHUTDOWN:
519 error = "Cannot send after socket shutdown.";
520 break;
521 case WSAESOCKTNOSUPPORT:
522 error = "Socket type not supported.";
523 break;
524 case WSAETIMEDOUT:
525 error = "Connection timed out.";
526 break;
527 case WSATYPE_NOT_FOUND:
528 error = "Class type not found.";
529 break;
530 case WSAEWOULDBLOCK:
531 error = "Resource temporarily unavailable.";
532 break;
533 case WSAHOST_NOT_FOUND:
534 error = "Host not found.";
535 break;
536 case WSA_INVALID_HANDLE:
537 error = "Specified event object handle is invalid.";
538 break;
539 case WSA_INVALID_PARAMETER:
540 error = "One or more parameters are invalid.";
541 break;
542 // case WSAINVALIDPROCTABLE:
543 // error = "Invalid procedure table from service provider.";
544 // break;
545 // case WSAINVALIDPROVIDER:
546 // error = "Invalid service provider version number.";
547 // break;
548 case WSA_IO_INCOMPLETE:
549 error = "Overlapped I/O event object not in signaled state.";
550 break;
551 case WSA_IO_PENDING:
552 error = "Overlapped operations will complete later.";
553 break;
554 case WSA_NOT_ENOUGH_MEMORY:
555 error = "Insufficient memory available.";
556 break;
557 case WSANOTINITIALISED:
558 error = "Successful WSAStartup not yet performed.";
559 break;
560 case WSANO_DATA:
561 error = "Valid name, no data record of requested type.";
562 break;
563 case WSANO_RECOVERY:
564 error = "This is a non-recoverable error.";
565 break;
566 // case WSAPROVIDERFAILEDINIT:
567 // error = "Unable to initialize a service provider.";
568 // break;
569 case WSASYSCALLFAILURE:
570 error = "System call failure.";
571 break;
572 case WSASYSNOTREADY:
573 error = "Network subsystem is unavailable.";
574 break;
575 case WSATRY_AGAIN:
576 error = "Non-authoritative host not found.";
577 break;
578 case WSAVERNOTSUPPORTED:
579 error = "WINSOCK.DLL version out of range.";
580 break;
581 case WSAEDISCON:
582 error = "Graceful shutdown in progress.";
583 break;
584 case WSA_OPERATION_ABORTED:
585 error = "Overlapped operation aborted.";
586 break;
587 default:
588 error = "Unknown error.";
589 break;
590 }
591
592 cerr << prefix << ": " << error << "\n";
593 #else
594 if (host)
595 {
596 string error;
597
598 switch (h_errno)
599 {
600 case HOST_NOT_FOUND:
601 error = "Unknown host";
602 break;
603 case TRY_AGAIN:
604 error = "Host name lookup failure";
605 break;
606 case NO_RECOVERY:
607 error = "Unknown server error";
608 break;
609 case NO_DATA:
610 error = "No address associated with name";
611 break;
612 default:
613 error = "Unknown error";
614 break;
615 }
616
617 cerr << prefix << ": " << error << "\n";
618 }
619 else
620 {
621 perror(prefix.c_str());
622 }
623 #endif // _WIN32
624 }