ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 18
Committed: 2002-12-09T21:40:12-08:00 (22 years, 6 months ago) by douglas
File size: 10928 byte(s)
Log Message:
Implemented more HttpHandler stuff.
Added news: protocol to those ignored by getLink().

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // HttpHandler.cpp
50
51 #include "HttpHandler.h"
52
53 HttpHandler::HttpHandler()
54 {
55 buffer = new char[BUFSIZ + 1];
56
57 #ifdef _WIN32
58 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
59 {
60 error(program + ": WSAStartup");
61 exit(1);
62 }
63 #endif // _WIN32
64
65 begin = 0;
66 length = 0;
67 chunked = false;
68 }
69
70 HttpHandler::~HttpHandler()
71 {
72 delete [] buffer;
73
74 #ifdef _WIN32
75 WSACleanup();
76 #endif // _WIN32
77 }
78
79 bool HttpHandler::handle(URL &url, bool head)
80 {
81 bool answer = false;
82
83 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
84 {
85 error(program + ": Socket");
86 exit(1);
87 }
88
89 sockaddr_in address;
90 hostent* host;
91
92 address.sin_family = AF_INET;
93
94 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
95 {
96 error(program + ": Host: " + url.getAddress(), true);
97 return answer;
98 }
99
100 address.sin_addr = *((in_addr*)*host->h_addr_list);
101 address.sin_port = htons(url.getPort());
102
103 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
104 SOCKET_ERROR)
105 {
106 error(program + ": Connect");
107 return answer;
108 }
109
110 if (head)
111 {
112 putline("HEAD " + url.getPath() + " HTTP/1.1");
113 }
114 else
115 {
116 putline("GET " + url.getPath() + " HTTP/1.1");
117 }
118
119 putline("Accept: text/html; text/plain");
120 putline("User-Agent: " + agent(true) + ' ' + platform());
121
122 if (url.getPort() == 80)
123 {
124 putline("Host: " + url.getAddress());
125 }
126 else
127 {
128 char* port = new char[1024];
129 sprintf(port, "%u", url.getPort());
130
131 putline("Host: " + url.getAddress() + ':' + port);
132
133 delete [] port;
134 }
135
136 putline("Connection: close");
137 putline();
138
139 code response;
140 string line;
141
142 do
143 {
144 line = getline();
145
146 if (line.find("HTTP/") != 0)
147 {
148 return answer;
149 }
150
151 unsigned dot = line.find('.');
152 unsigned space = line.find(' ');
153
154 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 0);
155 unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(), 0,
156 0);
157
158 if (major > 1 || minor < 1)
159 {
160 cerr << program << ": Potentially Incompatible Server: HTTP/" << major
161 << "." << minor << "\n";
162
163 return answer;
164 }
165
166 response = code(strtoul(line.substr(space + 1).c_str(), 0, 0));
167
168 if (response < ok) do line = getline(); while (line != "");
169 }
170 while (response < ok);
171
172 do
173 {
174 line = getline();
175
176 if (line != "")
177 {
178 unsigned colon = line.find(':');
179
180 string field = line.substr(0, colon);
181 string value = line.substr(colon + 1);
182
183 while (isspace(value[0])) value.erase(0, 1);
184
185 // if (field =
186 }
187 }
188 while (line != "");
189
190 switch (response)
191 {
192 case ok:
193 if (debug) cerr << "response = " << response << "\n";
194 answer = true;
195 break;
196 case choices:
197 case moved:
198 case found:
199 if (debug) cerr << "response = " << response << "\n"
200 << "location = " << location << "\n";
201 location = getLink(location, url);
202 break;
203 case notfound:
204 case internal:
205 if (debug) cerr << "response = " << response << "\n";
206 break;
207 default:
208 if (debug) cerr << "response = " << response << "\n";
209 if (response <= 299)
210 {
211 answer = true;
212 }
213 else if (response <= 399)
214 {
215 location = getLink(location, url);
216 }
217 break;
218 }
219
220 return answer;
221 }
222
223 HttpHandler& HttpHandler::getline(string& line, char endline)
224 {
225 int end = page.find(endline, begin);
226 int newline = page.find('\n', begin);
227
228 if (newline < end || end == string::npos)
229 {
230 end = newline;
231 }
232
233 line = page.substr(begin, end - begin);
234
235 if (end == string::npos)
236 {
237 begin = end;
238 }
239 else
240 {
241 begin = end + 1;
242 }
243
244 return *this;
245 }
246
247 bool HttpHandler::good()
248 {
249 bool answer = true;
250
251 if (begin >= page.length())
252 {
253 answer = false;
254 }
255 else if (begin == string::npos)
256 {
257 answer = false;
258 }
259
260 return answer;
261 }
262
263 void HttpHandler::clear()
264 {
265 closesocket(http);
266
267 type = "";
268 length = 0;
269 location = "";
270 begin = 0;
271 page = "";
272 chunked = false;
273 }
274
275 void HttpHandler::putline(const string line)
276 {
277 sprintf(buffer, "%s\r\n", line.c_str());
278 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
279 {
280 error(program + ": Send");
281 exit(1);
282 }
283 }
284
285 string HttpHandler::getline()
286 {
287 string line;
288 char byte;
289
290 do
291 {
292 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
293 {
294 error(program + ": Recv");
295 }
296
297 if (byte != '\r' && byte != '\n')
298 {
299 line += byte;
300 }
301 }
302 while (byte != '\n');
303
304 return line;
305 }
306
307 void HttpHandler::error(const string& prefix, bool host)
308 {
309 #ifdef _WIN32
310 string error;
311
312 switch (WSAGetLastError())
313 {
314 case WSAEACCES:
315 error = "Permission denied.";
316 break;
317 case WSAEADDRINUSE:
318 error = "Address already in use.";
319 break;
320 case WSAEADDRNOTAVAIL:
321 error = "Cannot assign requested address.";
322 break;
323 case WSAEAFNOSUPPORT:
324 error = "Address family not supported by protocol family.";
325 break;
326 case WSAEALREADY:
327 error = "Operation already in progress.";
328 break;
329 case WSAECONNABORTED:
330 error = "Software caused connection abort.";
331 break;
332 case WSAECONNREFUSED:
333 error = "Connection refused.";
334 break;
335 case WSAECONNRESET:
336 error = "Connection reset by peer.";
337 break;
338 case WSAEDESTADDRREQ:
339 error = "Destination address required.";
340 break;
341 case WSAEFAULT:
342 error = "Bad address.";
343 break;
344 case WSAEHOSTDOWN:
345 error = "Host is down.";
346 break;
347 case WSAEHOSTUNREACH:
348 error = "No route to host.";
349 break;
350 case WSAEINPROGRESS:
351 error = "Operation now in progress.";
352 break;
353 case WSAEINTR:
354 error = "Interrupted function call.";
355 break;
356 case WSAEINVAL:
357 error = "Invalid argument.";
358 break;
359 case WSAEISCONN:
360 error = "Socket is already connected.";
361 break;
362 case WSAEMFILE:
363 error = "Too many open files.";
364 break;
365 case WSAEMSGSIZE:
366 error = "Message too long.";
367 break;
368 case WSAENETDOWN:
369 error = "Network is down.";
370 break;
371 case WSAENETRESET:
372 error = "Network dropped connection on reset.";
373 break;
374 case WSAENETUNREACH:
375 error = "Network is unreachable.";
376 break;
377 case WSAENOBUFS:
378 error = "No buffer space available.";
379 break;
380 case WSAENOPROTOOPT:
381 error = "Bad protocol option.";
382 break;
383 case WSAENOTCONN:
384 error = "Socket is not connected.";
385 break;
386 case WSAENOTSOCK:
387 error = "Socket operation on non-socket.";
388 break;
389 case WSAEOPNOTSUPP:
390 error = "Operation not supported.";
391 break;
392 case WSAEPFNOSUPPORT:
393 error = "Protocol family not supported.";
394 break;
395 case WSAEPROCLIM:
396 error = "Too many processes.";
397 break;
398 case WSAEPROTONOSUPPORT:
399 error = "Protocol not supported.";
400 break;
401 case WSAEPROTOTYPE:
402 error = "Protocol wrong type for socket.";
403 break;
404 case WSAESHUTDOWN:
405 error = "Cannot send after socket shutdown.";
406 break;
407 case WSAESOCKTNOSUPPORT:
408 error = "Socket type not supported.";
409 break;
410 case WSAETIMEDOUT:
411 error = "Connection timed out.";
412 break;
413 case WSATYPE_NOT_FOUND:
414 error = "Class type not found.";
415 break;
416 case WSAEWOULDBLOCK:
417 error = "Resource temporarily unavailable.";
418 break;
419 case WSAHOST_NOT_FOUND:
420 error = "Host not found.";
421 break;
422 case WSA_INVALID_HANDLE:
423 error = "Specified event object handle is invalid.";
424 break;
425 case WSA_INVALID_PARAMETER:
426 error = "One or more parameters are invalid.";
427 break;
428 // case WSAINVALIDPROCTABLE:
429 // error = "Invalid procedure table from service provider.";
430 // break;
431 // case WSAINVALIDPROVIDER:
432 // error = "Invalid service provider version number.";
433 // break;
434 case WSA_IO_INCOMPLETE:
435 error = "Overlapped I/O event object not in signaled state.";
436 break;
437 case WSA_IO_PENDING:
438 error = "Overlapped operations will complete later.";
439 break;
440 case WSA_NOT_ENOUGH_MEMORY:
441 error = "Insufficient memory available.";
442 break;
443 case WSANOTINITIALISED:
444 error = "Successful WSAStartup not yet performed.";
445 break;
446 case WSANO_DATA:
447 error = "Valid name, no data record of requested type.";
448 break;
449 case WSANO_RECOVERY:
450 error = "This is a non-recoverable error.";
451 break;
452 // case WSAPROVIDERFAILEDINIT:
453 // error = "Unable to initialize a service provider.";
454 // break;
455 case WSASYSCALLFAILURE:
456 error = "System call failure.";
457 break;
458 case WSASYSNOTREADY:
459 error = "Network subsystem is unavailable.";
460 break;
461 case WSATRY_AGAIN:
462 error = "Non-authoritative host not found.";
463 break;
464 case WSAVERNOTSUPPORTED:
465 error = "WINSOCK.DLL version out of range.";
466 break;
467 case WSAEDISCON:
468 error = "Graceful shutdown in progress.";
469 break;
470 case WSA_OPERATION_ABORTED:
471 error = "Overlapped operation aborted.";
472 break;
473 default:
474 error = "Unknown error.";
475 break;
476 }
477
478 cerr << prefix << ": " << error << "\n";
479 #else
480 if (host)
481 {
482 herror(prefix.c_str());
483 }
484 else
485 {
486 perror(prefix.c_str());
487 }
488 #endif // _WIN32
489 }