ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 178
Committed: 2003-07-05T19:13:12-07:00 (21 years, 11 months ago) by douglas
File size: 12884 byte(s)
Log Message:
Moved Unix socket stuff to header file to source file.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // HttpHandler.cpp
50
51 #include "HttpHandler.h"
52
53 #ifndef _WIN32
54 #include <unistd.h>
55 #include <sys/types.h>
56 #include <sys/socket.h>
57 #include <netinet/in.h>
58 #include <netdb.h>
59
60 #define INVALID_SOCKET -1
61 #define SOCKET_ERROR -1
62
63 inline int closesocket(SOCKET s) { return close(s); }
64 #endif
65
66 HttpHandler::HttpHandler()
67 {
68 buffer = new char[BUFSIZ + 1];
69
70 #ifdef _WIN32
71 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
72 {
73 error(program + ": WSAStartup");
74 exit(1);
75 }
76 #endif // _WIN32
77
78 length = 0;
79 chunked = false;
80 }
81
82 HttpHandler::~HttpHandler()
83 {
84 delete [] buffer;
85
86 #ifdef _WIN32
87 WSACleanup();
88 #endif // _WIN32
89 }
90
91 bool HttpHandler::handle(URL &url, const string referer, bool head)
92 {
93 bool answer = false;
94
95 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
96 {
97 error(program + ": Socket");
98 exit(1);
99 }
100
101 sockaddr_in address;
102 hostent* host;
103
104 address.sin_family = AF_INET;
105
106 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
107 {
108 error(program + ": Host: " + url.getAddress(), true);
109 return answer;
110 }
111
112 address.sin_addr = *((in_addr*)*host->h_addr_list);
113 address.sin_port = htons(url.getPort());
114
115 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
116 SOCKET_ERROR)
117 {
118 error(program + ": Connect");
119 return answer;
120 }
121
122 if (head)
123 {
124 putline("HEAD " + url.getPath() + " HTTP/1.1");
125 }
126 else
127 {
128 putline("GET " + url.getPath() + " HTTP/1.1");
129 }
130
131 putline("Accept: text/html; text/plain");
132 putline("User-Agent: " + agent(true) + ' ' + platform());
133
134 if (url.getPort() == 80)
135 {
136 putline("Host: " + url.getAddress());
137 }
138 else
139 {
140 char* port = new char[1024];
141 sprintf(port, "%u", url.getPort());
142
143 putline("Host: " + url.getAddress() + ':' + port);
144
145 delete [] port;
146 }
147
148 if (referer != "")
149 {
150 putline("Referer: " + referer);
151 }
152
153 putline("Connection: close");
154 putline();
155
156 code response;
157 string line;
158
159 do
160 {
161 line = getline();
162
163 if (line.find("HTTP/") != 0)
164 {
165 return answer;
166 }
167
168 unsigned dot = line.find('.');
169 unsigned space = line.find(' ');
170
171 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
172 unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
173 0, 10);
174
175 if (major > 1)
176 {
177 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
178 major << "." << minor << "\n";
179
180 return answer;
181 }
182
183 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
184
185 if (response < ok) do line = getline(); while (line != "");
186 }
187 while (response < ok);
188
189 do
190 {
191 line = getline();
192
193 if (line != "")
194 {
195 unsigned colon = line.find(':');
196
197 string field = line.substr(0, colon);
198 string value = line.substr(colon + 1);
199
200 while (isspace(value[0])) value.erase(0, 1);
201
202 if (field == "Content-Type")
203 {
204 type = value;
205 }
206 else if (field == "Content-Length")
207 {
208 length = strtoul(value.c_str(), 0, 10);
209 }
210 else if (field == "Location")
211 {
212 location = value;
213 }
214 else if (field == "Transfer-Encoding")
215 {
216 chunked = value == "chunked";
217 }
218 }
219 }
220 while (line != "");
221
222 switch (response)
223 {
224 case ok:
225 if (debug) cerr << "response = " << response << "\n";
226 answer = true;
227 break;
228 case choices:
229 case moved:
230 case found:
231 if (debug) cerr << "response = " << response << "\n"
232 << "location = " << location << "\n";
233 location = getLink(location, url);
234 break;
235 case notfound:
236 case internal:
237 if (debug) cerr << "response = " << response << "\n";
238 break;
239 default:
240 if (debug) cerr << "response = " << response << "\n";
241 if (response <= 299)
242 {
243 answer = true;
244 }
245 else if (response <= 399)
246 {
247 location = getLink(location, url);
248 }
249 break;
250 }
251
252 if (!head && answer) populate();
253
254 return answer;
255 }
256
257 HttpHandler& HttpHandler::getline(string& line, char endline)
258 {
259 unsigned end = page.find(endline);
260 unsigned newline = page.find('\n');
261
262 if (newline < end || end == string::npos)
263 {
264 end = newline;
265 }
266
267 line = page.substr(0, end);
268 page.erase(0, (end == string::npos ? end : end + 1));
269
270 return *this;
271 }
272
273 void HttpHandler::clear()
274 {
275 closesocket(http);
276
277 type = "";
278 length = 0;
279 location = "";
280 page = "";
281 chunked = false;
282 }
283
284 void HttpHandler::populate()
285 {
286 if (!chunked)
287 {
288 unsigned left = length;
289
290 while (left > 0)
291 {
292 memset(buffer, 0, BUFSIZ + 1);
293
294 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
295 unsigned received;
296
297 while (true)
298 {
299 if ((received = recv(http, buffer, bytes, 0)) == SOCKET_ERROR)
300 {
301 error(program + ": Recv");
302 exit(1);
303 }
304 else if (received != bytes)
305 {
306 left -= received;
307 page += buffer;
308
309 memset(buffer, 0, BUFSIZ + 1);
310
311 bytes -= received;
312 }
313 else
314 {
315 break;
316 }
317 }
318
319 page += buffer;
320 left -= bytes;
321 }
322 }
323 else
324 {
325 unsigned chunk;
326
327 do
328 {
329 chunk = strtoul(getline().c_str(), 0, 16);
330
331 unsigned left = chunk;
332
333 while (left > 0)
334 {
335 memset(buffer, 0, BUFSIZ + 1);
336
337 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
338 unsigned received;
339
340 while (true)
341 {
342 if ((received = recv(http, buffer, bytes, 0)) ==
343 SOCKET_ERROR)
344 {
345 error(program + ": Recv");
346 exit(1);
347 }
348 else if (received != bytes)
349 {
350 left -= received;
351 page += buffer;
352
353 memset(buffer, 0, BUFSIZ + 1);
354
355 bytes -= received;
356 }
357 else
358 {
359 break;
360 }
361 }
362
363 page += buffer;
364 left -= bytes;
365 }
366
367 getline();
368 length += chunk;
369 }
370 while (chunk > 0);
371 }
372
373 for (unsigned index = 0; index < page.length(); index++)
374 {
375 if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
376 1] == '\n' : false)
377 {
378 page.erase(index, 1);
379 }
380 else if (page[index] == '\r')
381 {
382 page[index] = '\n';
383 }
384 }
385 }
386
387 void HttpHandler::putline(const string line)
388 {
389 sprintf(buffer, "%s\r\n", line.c_str());
390 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
391 {
392 error(program + ": Send");
393 exit(1);
394 }
395 }
396
397 string HttpHandler::getline()
398 {
399 string line;
400 char byte;
401
402 do
403 {
404 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
405 {
406 error(program + ": Recv");
407 }
408
409 if (byte != '\r' && byte != '\n')
410 {
411 line += byte;
412 }
413 }
414 while (byte != '\n');
415
416 return line;
417 }
418
419 void HttpHandler::error(const string& prefix, bool host)
420 {
421 #ifdef _WIN32
422 string error;
423
424 switch (WSAGetLastError())
425 {
426 case WSAEACCES:
427 error = "Permission denied.";
428 break;
429 case WSAEADDRINUSE:
430 error = "Address already in use.";
431 break;
432 case WSAEADDRNOTAVAIL:
433 error = "Cannot assign requested address.";
434 break;
435 case WSAEAFNOSUPPORT:
436 error = "Address family not supported by protocol family.";
437 break;
438 case WSAEALREADY:
439 error = "Operation already in progress.";
440 break;
441 case WSAECONNABORTED:
442 error = "Software caused connection abort.";
443 break;
444 case WSAECONNREFUSED:
445 error = "Connection refused.";
446 break;
447 case WSAECONNRESET:
448 error = "Connection reset by peer.";
449 break;
450 case WSAEDESTADDRREQ:
451 error = "Destination address required.";
452 break;
453 case WSAEFAULT:
454 error = "Bad address.";
455 break;
456 case WSAEHOSTDOWN:
457 error = "Host is down.";
458 break;
459 case WSAEHOSTUNREACH:
460 error = "No route to host.";
461 break;
462 case WSAEINPROGRESS:
463 error = "Operation now in progress.";
464 break;
465 case WSAEINTR:
466 error = "Interrupted function call.";
467 break;
468 case WSAEINVAL:
469 error = "Invalid argument.";
470 break;
471 case WSAEISCONN:
472 error = "Socket is already connected.";
473 break;
474 case WSAEMFILE:
475 error = "Too many open files.";
476 break;
477 case WSAEMSGSIZE:
478 error = "Message too long.";
479 break;
480 case WSAENETDOWN:
481 error = "Network is down.";
482 break;
483 case WSAENETRESET:
484 error = "Network dropped connection on reset.";
485 break;
486 case WSAENETUNREACH:
487 error = "Network is unreachable.";
488 break;
489 case WSAENOBUFS:
490 error = "No buffer space available.";
491 break;
492 case WSAENOPROTOOPT:
493 error = "Bad protocol option.";
494 break;
495 case WSAENOTCONN:
496 error = "Socket is not connected.";
497 break;
498 case WSAENOTSOCK:
499 error = "Socket operation on non-socket.";
500 break;
501 case WSAEOPNOTSUPP:
502 error = "Operation not supported.";
503 break;
504 case WSAEPFNOSUPPORT:
505 error = "Protocol family not supported.";
506 break;
507 case WSAEPROCLIM:
508 error = "Too many processes.";
509 break;
510 case WSAEPROTONOSUPPORT:
511 error = "Protocol not supported.";
512 break;
513 case WSAEPROTOTYPE:
514 error = "Protocol wrong type for socket.";
515 break;
516 case WSAESHUTDOWN:
517 error = "Cannot send after socket shutdown.";
518 break;
519 case WSAESOCKTNOSUPPORT:
520 error = "Socket type not supported.";
521 break;
522 case WSAETIMEDOUT:
523 error = "Connection timed out.";
524 break;
525 case WSATYPE_NOT_FOUND:
526 error = "Class type not found.";
527 break;
528 case WSAEWOULDBLOCK:
529 error = "Resource temporarily unavailable.";
530 break;
531 case WSAHOST_NOT_FOUND:
532 error = "Host not found.";
533 break;
534 case WSA_INVALID_HANDLE:
535 error = "Specified event object handle is invalid.";
536 break;
537 case WSA_INVALID_PARAMETER:
538 error = "One or more parameters are invalid.";
539 break;
540 // case WSAINVALIDPROCTABLE:
541 // error = "Invalid procedure table from service provider.";
542 // break;
543 // case WSAINVALIDPROVIDER:
544 // error = "Invalid service provider version number.";
545 // break;
546 case WSA_IO_INCOMPLETE:
547 error = "Overlapped I/O event object not in signaled state.";
548 break;
549 case WSA_IO_PENDING:
550 error = "Overlapped operations will complete later.";
551 break;
552 case WSA_NOT_ENOUGH_MEMORY:
553 error = "Insufficient memory available.";
554 break;
555 case WSANOTINITIALISED:
556 error = "Successful WSAStartup not yet performed.";
557 break;
558 case WSANO_DATA:
559 error = "Valid name, no data record of requested type.";
560 break;
561 case WSANO_RECOVERY:
562 error = "This is a non-recoverable error.";
563 break;
564 // case WSAPROVIDERFAILEDINIT:
565 // error = "Unable to initialize a service provider.";
566 // break;
567 case WSASYSCALLFAILURE:
568 error = "System call failure.";
569 break;
570 case WSASYSNOTREADY:
571 error = "Network subsystem is unavailable.";
572 break;
573 case WSATRY_AGAIN:
574 error = "Non-authoritative host not found.";
575 break;
576 case WSAVERNOTSUPPORTED:
577 error = "WINSOCK.DLL version out of range.";
578 break;
579 case WSAEDISCON:
580 error = "Graceful shutdown in progress.";
581 break;
582 case WSA_OPERATION_ABORTED:
583 error = "Overlapped operation aborted.";
584 break;
585 default:
586 error = "Unknown error.";
587 break;
588 }
589
590 cerr << prefix << ": " << error << "\n";
591 #else
592 if (host)
593 {
594 herror(prefix.c_str());
595 }
596 else
597 {
598 perror(prefix.c_str());
599 }
600 #endif // _WIN32
601 }