ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 20
Committed: 2002-12-10T14:04:39-08:00 (22 years, 6 months ago) by douglas
File size: 12201 byte(s)
Log Message:
Implemented chunked encoding handling.
Rewrote HttpHandler.good() and public HttpHandler.getline() functions.
There is a bug somewhere were something isn't always checking
HttpHandler.good()!

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // HttpHandler.cpp
50
51 #include "HttpHandler.h"
52
53 HttpHandler::HttpHandler()
54 {
55 buffer = new char[BUFSIZ + 1];
56
57 #ifdef _WIN32
58 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
59 {
60 error(program + ": WSAStartup");
61 exit(1);
62 }
63 #endif // _WIN32
64
65 length = 0;
66 chunked = false;
67 }
68
69 HttpHandler::~HttpHandler()
70 {
71 delete [] buffer;
72
73 #ifdef _WIN32
74 WSACleanup();
75 #endif // _WIN32
76 }
77
78 bool HttpHandler::handle(URL &url, bool head)
79 {
80 bool answer = false;
81
82 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
83 {
84 error(program + ": Socket");
85 exit(1);
86 }
87
88 sockaddr_in address;
89 hostent* host;
90
91 address.sin_family = AF_INET;
92
93 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
94 {
95 error(program + ": Host: " + url.getAddress(), true);
96 return answer;
97 }
98
99 address.sin_addr = *((in_addr*)*host->h_addr_list);
100 address.sin_port = htons(url.getPort());
101
102 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
103 SOCKET_ERROR)
104 {
105 error(program + ": Connect");
106 return answer;
107 }
108
109 if (head)
110 {
111 putline("HEAD " + url.getPath() + " HTTP/1.1");
112 }
113 else
114 {
115 putline("GET " + url.getPath() + " HTTP/1.1");
116 }
117
118 putline("Accept: text/html; text/plain");
119 putline("User-Agent: " + agent(true) + ' ' + platform());
120
121 if (url.getPort() == 80)
122 {
123 putline("Host: " + url.getAddress());
124 }
125 else
126 {
127 char* port = new char[1024];
128 sprintf(port, "%u", url.getPort());
129
130 putline("Host: " + url.getAddress() + ':' + port);
131
132 delete [] port;
133 }
134
135 // putline("Referer: " + ?referer?);
136 putline("Connection: close");
137 putline();
138
139 code response;
140 string line;
141
142 do
143 {
144 line = getline();
145
146 if (line.find("HTTP/") != 0)
147 {
148 return answer;
149 }
150
151 unsigned dot = line.find('.');
152 unsigned space = line.find(' ');
153
154 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 10);
155 unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(),
156 0, 10);
157
158 if (major > 1 || minor < 1)
159 {
160 cerr << program << ": Potentially Incompatible Server: HTTP/" <<
161 major << "." << minor << "\n";
162
163 return answer;
164 }
165
166 response = code(strtoul(line.substr(space + 1).c_str(), 0, 10));
167
168 if (response < ok) do line = getline(); while (line != "");
169 }
170 while (response < ok);
171
172 do
173 {
174 line = getline();
175
176 if (line != "")
177 {
178 unsigned colon = line.find(':');
179
180 string field = line.substr(0, colon);
181 string value = line.substr(colon + 1);
182
183 while (isspace(value[0])) value.erase(0, 1);
184
185 if (field == "Content-Type")
186 {
187 type = value;
188 }
189 else if (field == "Content-Length")
190 {
191 length = strtoul(value.c_str(), 0, 10);
192 }
193 else if (field == "Location")
194 {
195 location = value;
196 }
197 else if (field == "Transfer-Encoding")
198 {
199 chunked = value == "chunked";
200 }
201 }
202 }
203 while (line != "");
204
205 switch (response)
206 {
207 case ok:
208 if (debug) cerr << "response = " << response << "\n";
209 answer = true;
210 break;
211 case choices:
212 case moved:
213 case found:
214 if (debug) cerr << "response = " << response << "\n"
215 << "location = " << location << "\n";
216 location = getLink(location, url);
217 break;
218 case notfound:
219 case internal:
220 if (debug) cerr << "response = " << response << "\n";
221 break;
222 default:
223 if (debug) cerr << "response = " << response << "\n";
224 if (response <= 299)
225 {
226 answer = true;
227 }
228 else if (response <= 399)
229 {
230 location = getLink(location, url);
231 }
232 break;
233 }
234
235 if (!head && answer) populate();
236
237 return answer;
238 }
239
240 HttpHandler& HttpHandler::getline(string& line, char endline)
241 {
242 int end = page.find(endline);
243 int newline = page.find('\n');
244
245 if (newline < end || end == string::npos)
246 {
247 end = newline;
248 }
249
250 line = page.substr(0, end);
251 page.erase(0, (end == string::npos ? end : end + 1));
252
253 // if (line == "") cerr << "line = [" << line << "]\npage = [" << page
254 // << "]" << (good() ? "true" : "false") << "\n";
255
256 return *this;
257 }
258
259 void HttpHandler::clear()
260 {
261 closesocket(http);
262
263 type = "";
264 length = 0;
265 location = "";
266 page = "";
267 chunked = false;
268 }
269
270 void HttpHandler::populate()
271 {
272 if (!chunked)
273 {
274 unsigned left = length;
275
276 while (left > 0)
277 {
278 memset(buffer, 0, BUFSIZ + 1);
279
280 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
281
282 if (recv(http, buffer, bytes, 0) == SOCKET_ERROR)
283 {
284 error(program + ": Recv");
285 exit(1);
286 }
287
288 page += buffer;
289 left -= bytes;
290 }
291 }
292 else
293 {
294 unsigned chunk;
295
296 do
297 {
298 chunk = strtoul(getline().c_str(), 0, 16);
299
300 unsigned left = chunk;
301
302 while (left > 0)
303 {
304 memset(buffer, 0, BUFSIZ + 1);
305
306 unsigned bytes = left > BUFSIZ ? BUFSIZ : left;
307
308 if (recv(http, buffer, bytes, 0) == SOCKET_ERROR)
309 {
310 error(program + ": Recv");
311 exit(1);
312 }
313
314 page += buffer;
315 left -= bytes;
316 }
317
318 getline();
319 length += chunk;
320 }
321 while (chunk > 0);
322 }
323
324 for (unsigned index = 0; index < page.length(); index++)
325 {
326 if (page[index] == '\r' && (index + 1 < page.length()) ? page[index +
327 1] == '\n' : false)
328 {
329 page.erase(index, 1);
330 }
331 else if (page[index] == '\r')
332 {
333 page[index] = '\n';
334 }
335 }
336 }
337
338 void HttpHandler::putline(const string line)
339 {
340 sprintf(buffer, "%s\r\n", line.c_str());
341 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
342 {
343 error(program + ": Send");
344 exit(1);
345 }
346 }
347
348 string HttpHandler::getline()
349 {
350 string line;
351 char byte;
352
353 do
354 {
355 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
356 {
357 error(program + ": Recv");
358 }
359
360 if (byte != '\r' && byte != '\n')
361 {
362 line += byte;
363 }
364 }
365 while (byte != '\n');
366
367 return line;
368 }
369
370 void HttpHandler::error(const string& prefix, bool host)
371 {
372 #ifdef _WIN32
373 string error;
374
375 switch (WSAGetLastError())
376 {
377 case WSAEACCES:
378 error = "Permission denied.";
379 break;
380 case WSAEADDRINUSE:
381 error = "Address already in use.";
382 break;
383 case WSAEADDRNOTAVAIL:
384 error = "Cannot assign requested address.";
385 break;
386 case WSAEAFNOSUPPORT:
387 error = "Address family not supported by protocol family.";
388 break;
389 case WSAEALREADY:
390 error = "Operation already in progress.";
391 break;
392 case WSAECONNABORTED:
393 error = "Software caused connection abort.";
394 break;
395 case WSAECONNREFUSED:
396 error = "Connection refused.";
397 break;
398 case WSAECONNRESET:
399 error = "Connection reset by peer.";
400 break;
401 case WSAEDESTADDRREQ:
402 error = "Destination address required.";
403 break;
404 case WSAEFAULT:
405 error = "Bad address.";
406 break;
407 case WSAEHOSTDOWN:
408 error = "Host is down.";
409 break;
410 case WSAEHOSTUNREACH:
411 error = "No route to host.";
412 break;
413 case WSAEINPROGRESS:
414 error = "Operation now in progress.";
415 break;
416 case WSAEINTR:
417 error = "Interrupted function call.";
418 break;
419 case WSAEINVAL:
420 error = "Invalid argument.";
421 break;
422 case WSAEISCONN:
423 error = "Socket is already connected.";
424 break;
425 case WSAEMFILE:
426 error = "Too many open files.";
427 break;
428 case WSAEMSGSIZE:
429 error = "Message too long.";
430 break;
431 case WSAENETDOWN:
432 error = "Network is down.";
433 break;
434 case WSAENETRESET:
435 error = "Network dropped connection on reset.";
436 break;
437 case WSAENETUNREACH:
438 error = "Network is unreachable.";
439 break;
440 case WSAENOBUFS:
441 error = "No buffer space available.";
442 break;
443 case WSAENOPROTOOPT:
444 error = "Bad protocol option.";
445 break;
446 case WSAENOTCONN:
447 error = "Socket is not connected.";
448 break;
449 case WSAENOTSOCK:
450 error = "Socket operation on non-socket.";
451 break;
452 case WSAEOPNOTSUPP:
453 error = "Operation not supported.";
454 break;
455 case WSAEPFNOSUPPORT:
456 error = "Protocol family not supported.";
457 break;
458 case WSAEPROCLIM:
459 error = "Too many processes.";
460 break;
461 case WSAEPROTONOSUPPORT:
462 error = "Protocol not supported.";
463 break;
464 case WSAEPROTOTYPE:
465 error = "Protocol wrong type for socket.";
466 break;
467 case WSAESHUTDOWN:
468 error = "Cannot send after socket shutdown.";
469 break;
470 case WSAESOCKTNOSUPPORT:
471 error = "Socket type not supported.";
472 break;
473 case WSAETIMEDOUT:
474 error = "Connection timed out.";
475 break;
476 case WSATYPE_NOT_FOUND:
477 error = "Class type not found.";
478 break;
479 case WSAEWOULDBLOCK:
480 error = "Resource temporarily unavailable.";
481 break;
482 case WSAHOST_NOT_FOUND:
483 error = "Host not found.";
484 break;
485 case WSA_INVALID_HANDLE:
486 error = "Specified event object handle is invalid.";
487 break;
488 case WSA_INVALID_PARAMETER:
489 error = "One or more parameters are invalid.";
490 break;
491 // case WSAINVALIDPROCTABLE:
492 // error = "Invalid procedure table from service provider.";
493 // break;
494 // case WSAINVALIDPROVIDER:
495 // error = "Invalid service provider version number.";
496 // break;
497 case WSA_IO_INCOMPLETE:
498 error = "Overlapped I/O event object not in signaled state.";
499 break;
500 case WSA_IO_PENDING:
501 error = "Overlapped operations will complete later.";
502 break;
503 case WSA_NOT_ENOUGH_MEMORY:
504 error = "Insufficient memory available.";
505 break;
506 case WSANOTINITIALISED:
507 error = "Successful WSAStartup not yet performed.";
508 break;
509 case WSANO_DATA:
510 error = "Valid name, no data record of requested type.";
511 break;
512 case WSANO_RECOVERY:
513 error = "This is a non-recoverable error.";
514 break;
515 // case WSAPROVIDERFAILEDINIT:
516 // error = "Unable to initialize a service provider.";
517 // break;
518 case WSASYSCALLFAILURE:
519 error = "System call failure.";
520 break;
521 case WSASYSNOTREADY:
522 error = "Network subsystem is unavailable.";
523 break;
524 case WSATRY_AGAIN:
525 error = "Non-authoritative host not found.";
526 break;
527 case WSAVERNOTSUPPORTED:
528 error = "WINSOCK.DLL version out of range.";
529 break;
530 case WSAEDISCON:
531 error = "Graceful shutdown in progress.";
532 break;
533 case WSA_OPERATION_ABORTED:
534 error = "Overlapped operation aborted.";
535 break;
536 default:
537 error = "Unknown error.";
538 break;
539 }
540
541 cerr << prefix << ": " << error << "\n";
542 #else
543 if (host)
544 {
545 herror(prefix.c_str());
546 }
547 else
548 {
549 perror(prefix.c_str());
550 }
551 #endif // _WIN32
552 }