ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 17
Committed: 2002-12-09T18:31:11-08:00 (22 years, 6 months ago) by douglas
File size: 10082 byte(s)
Log Message:
Started reimplementing HttpHandler, moved Processor.getLink() to be global and
included in URL.h and implemented in URL.cpp.

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine HTTP Handler
46 //
47 // Douglas Thrift
48 //
49 // HttpHandler.cpp
50
51 #include "HttpHandler.h"
52
53 HttpHandler::HttpHandler()
54 {
55 buffer = new char[BUFSIZ + 1];
56
57 #ifdef _WIN32
58 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
59 {
60 error(program + ": WSAStartup");
61 exit(1);
62 }
63 #endif // _WIN32
64
65 begin = 0;
66 }
67
68 HttpHandler::~HttpHandler()
69 {
70 delete [] buffer;
71
72 #ifdef _WIN32
73 WSACleanup();
74 #endif // _WIN32
75 }
76
77 bool HttpHandler::handle(URL &url, bool head)
78 {
79 bool answer = false;
80
81 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
82 {
83 error(program + ": Socket");
84 exit(1);
85 }
86
87 sockaddr_in address;
88 hostent* host;
89
90 address.sin_family = AF_INET;
91
92 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
93 {
94 error(program + ": Host: " + url.getAddress(), true);
95 return answer;
96 }
97
98 address.sin_addr = *((in_addr*)*host->h_addr_list);
99 address.sin_port = htons(url.getPort());
100
101 if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
102 SOCKET_ERROR)
103 {
104 error(program + ": Connect");
105 return answer;
106 }
107
108 if (head)
109 {
110 putline("HEAD " + url.getPath() + " HTTP/1.1");
111 }
112 else
113 {
114 putline("GET " + url.getPath() + " HTTP/1.1");
115 }
116
117 putline("Accept: text/html; text/plain");
118 putline("User-Agent: " + agent(true) + ' ' + platform());
119
120 if (url.getPort() == 80)
121 {
122 putline("Host: " + url.getAddress());
123 }
124 else
125 {
126 char* port = new char[1024];
127 sprintf(port, "%u", url.getPort());
128
129 putline("Host: " + url.getAddress() + ':' + port);
130
131 delete [] port;
132 }
133
134 putline();
135
136 string line = getline();
137
138 if (line.find("HTTP/") != 0)
139 {
140 return answer;
141 }
142
143 unsigned dot = line.find('.');
144 unsigned space = line.find(' ');
145
146 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 0);
147 unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(), 0,
148 0);
149
150 if (major > 1 || minor < 1)
151 {
152 cerr << program << ": Potentially Incompatible Server: HTTP/" << major
153 << "." << minor << "\n";
154
155 return answer;
156 }
157
158 code response = code(strtoul(line.substr(space + 1).c_str(), 0, 0));
159
160 do
161 {
162 line = getline();
163 }
164 while (line != "");
165
166 switch (response)
167 {
168 case ok:
169 answer = true;
170 break;
171 case choices:
172 break;
173 case moved:
174 break;
175 case found:
176 break;
177 case notfound:
178 break;
179 case internal:
180 break;
181 default:
182 break;
183 }
184
185 return answer;
186 }
187
188 HttpHandler& HttpHandler::getline(string& line, char endline)
189 {
190 int end = page.find(endline, begin);
191 int newline = page.find('\n', begin);
192
193 if (newline < end || end == string::npos)
194 {
195 end = newline;
196 }
197
198 line = page.substr(begin, end - begin);
199
200 if (end == string::npos)
201 {
202 begin = end;
203 }
204 else
205 {
206 begin = end + 1;
207 }
208
209 return *this;
210 }
211
212 bool HttpHandler::good()
213 {
214 bool answer = true;
215
216 if (begin >= page.length())
217 {
218 answer = false;
219 }
220 else if (begin == string::npos)
221 {
222 answer = false;
223 }
224
225 return answer;
226 }
227
228 void HttpHandler::clear()
229 {
230 type = "";
231 length = 0;
232 location = "";
233 begin = 0;
234 page = "";
235 }
236
237 void HttpHandler::putline(const string line)
238 {
239 sprintf(buffer, "%s\r\n", line.c_str());
240 if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
241 {
242 error(program + ": Send");
243 exit(1);
244 }
245 }
246
247 string HttpHandler::getline()
248 {
249 string line;
250 char byte;
251
252 do
253 {
254 if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
255 {
256 error(program + ": Recv");
257 }
258
259 if (byte != '\r' && byte != '\n')
260 {
261 line += byte;
262 }
263 }
264 while (byte != '\n');
265
266 return line;
267 }
268
269 void HttpHandler::error(const string prefix, bool host)
270 {
271 #ifdef _WIN32
272 string error;
273
274 switch (WSAGetLastError())
275 {
276 case WSAEACCES:
277 error = "Permission denied.";
278 break;
279 case WSAEADDRINUSE:
280 error = "Address already in use.";
281 break;
282 case WSAEADDRNOTAVAIL:
283 error = "Cannot assign requested address.";
284 break;
285 case WSAEAFNOSUPPORT:
286 error = "Address family not supported by protocol family.";
287 break;
288 case WSAEALREADY:
289 error = "Operation already in progress.";
290 break;
291 case WSAECONNABORTED:
292 error = "Software caused connection abort.";
293 break;
294 case WSAECONNREFUSED:
295 error = "Connection refused.";
296 break;
297 case WSAECONNRESET:
298 error = "Connection reset by peer.";
299 break;
300 case WSAEDESTADDRREQ:
301 error = "Destination address required.";
302 break;
303 case WSAEFAULT:
304 error = "Bad address.";
305 break;
306 case WSAEHOSTDOWN:
307 error = "Host is down.";
308 break;
309 case WSAEHOSTUNREACH:
310 error = "No route to host.";
311 break;
312 case WSAEINPROGRESS:
313 error = "Operation now in progress.";
314 break;
315 case WSAEINTR:
316 error = "Interrupted function call.";
317 break;
318 case WSAEINVAL:
319 error = "Invalid argument.";
320 break;
321 case WSAEISCONN:
322 error = "Socket is already connected.";
323 break;
324 case WSAEMFILE:
325 error = "Too many open files.";
326 break;
327 case WSAEMSGSIZE:
328 error = "Message too long.";
329 break;
330 case WSAENETDOWN:
331 error = "Network is down.";
332 break;
333 case WSAENETRESET:
334 error = "Network dropped connection on reset.";
335 break;
336 case WSAENETUNREACH:
337 error = "Network is unreachable.";
338 break;
339 case WSAENOBUFS:
340 error = "No buffer space available.";
341 break;
342 case WSAENOPROTOOPT:
343 error = "Bad protocol option.";
344 break;
345 case WSAENOTCONN:
346 error = "Socket is not connected.";
347 break;
348 case WSAENOTSOCK:
349 error = "Socket operation on non-socket.";
350 break;
351 case WSAEOPNOTSUPP:
352 error = "Operation not supported.";
353 break;
354 case WSAEPFNOSUPPORT:
355 error = "Protocol family not supported.";
356 break;
357 case WSAEPROCLIM:
358 error = "Too many processes.";
359 break;
360 case WSAEPROTONOSUPPORT:
361 error = "Protocol not supported.";
362 break;
363 case WSAEPROTOTYPE:
364 error = "Protocol wrong type for socket.";
365 break;
366 case WSAESHUTDOWN:
367 error = "Cannot send after socket shutdown.";
368 break;
369 case WSAESOCKTNOSUPPORT:
370 error = "Socket type not supported.";
371 break;
372 case WSAETIMEDOUT:
373 error = "Connection timed out.";
374 break;
375 case WSATYPE_NOT_FOUND:
376 error = "Class type not found.";
377 break;
378 case WSAEWOULDBLOCK:
379 error = "Resource temporarily unavailable.";
380 break;
381 case WSAHOST_NOT_FOUND:
382 error = "Host not found.";
383 break;
384 case WSA_INVALID_HANDLE:
385 error = "Specified event object handle is invalid.";
386 break;
387 case WSA_INVALID_PARAMETER:
388 error = "One or more parameters are invalid.";
389 break;
390 // case WSAINVALIDPROCTABLE:
391 // error = "Invalid procedure table from service provider.";
392 // break;
393 // case WSAINVALIDPROVIDER:
394 // error = "Invalid service provider version number.";
395 // break;
396 case WSA_IO_INCOMPLETE:
397 error = "Overlapped I/O event object not in signaled state.";
398 break;
399 case WSA_IO_PENDING:
400 error = "Overlapped operations will complete later.";
401 break;
402 case WSA_NOT_ENOUGH_MEMORY:
403 error = "Insufficient memory available.";
404 break;
405 case WSANOTINITIALISED:
406 error = "Successful WSAStartup not yet performed.";
407 break;
408 case WSANO_DATA:
409 error = "Valid name, no data record of requested type.";
410 break;
411 case WSANO_RECOVERY:
412 error = "This is a non-recoverable error.";
413 break;
414 // case WSAPROVIDERFAILEDINIT:
415 // error = "Unable to initialize a service provider.";
416 // break;
417 case WSASYSCALLFAILURE:
418 error = "System call failure.";
419 break;
420 case WSASYSNOTREADY:
421 error = "Network subsystem is unavailable.";
422 break;
423 case WSATRY_AGAIN:
424 error = "Non-authoritative host not found.";
425 break;
426 case WSAVERNOTSUPPORTED:
427 error = "WINSOCK.DLL version out of range.";
428 break;
429 case WSAEDISCON:
430 error = "Graceful shutdown in progress.";
431 break;
432 case WSA_OPERATION_ABORTED:
433 error = "Overlapped operation aborted.";
434 break;
435 default:
436 error = "Unknown error.";
437 break;
438 }
439
440 cerr << prefix << ": " << error << "\n";
441 #else
442 if (host)
443 {
444 herror(prefix.c_str());
445 }
446 else
447 {
448 perror(prefix.c_str());
449 }
450 #endif // _WIN32
451 }