ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/HttpHandler.cpp
Revision: 18
Committed: 2002-12-09T21:40:12-08:00 (22 years, 6 months ago) by douglas
File size: 10928 byte(s)
Log Message:
Implemented more HttpHandler stuff.
Added news: protocol to those ignored by getLink().

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4     * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine HTTP Handler
46     //
47     // Douglas Thrift
48     //
49     // HttpHandler.cpp
50    
51     #include "HttpHandler.h"
52    
53     HttpHandler::HttpHandler()
54     {
55 douglas 14 buffer = new char[BUFSIZ + 1];
56    
57 douglas 13 #ifdef _WIN32
58 douglas 14 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
59 douglas 13 {
60 douglas 17 error(program + ": WSAStartup");
61 douglas 13 exit(1);
62     }
63     #endif // _WIN32
64 douglas 1
65     begin = 0;
66 douglas 18 length = 0;
67     chunked = false;
68 douglas 1 }
69    
70     HttpHandler::~HttpHandler()
71     {
72 douglas 14 delete [] buffer;
73    
74 douglas 13 #ifdef _WIN32
75     WSACleanup();
76     #endif // _WIN32
77 douglas 1 }
78    
79 douglas 14 bool HttpHandler::handle(URL &url, bool head)
80 douglas 1 {
81     bool answer = false;
82    
83 douglas 14 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
84     {
85     error(program + ": Socket");
86     exit(1);
87     }
88 douglas 1
89 douglas 14 sockaddr_in address;
90     hostent* host;
91 douglas 1
92 douglas 14 address.sin_family = AF_INET;
93 douglas 1
94 douglas 14 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
95     {
96     error(program + ": Host: " + url.getAddress(), true);
97     return answer;
98     }
99 douglas 1
100 douglas 14 address.sin_addr = *((in_addr*)*host->h_addr_list);
101     address.sin_port = htons(url.getPort());
102    
103     if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
104     SOCKET_ERROR)
105     {
106     error(program + ": Connect");
107     return answer;
108     }
109    
110     if (head)
111     {
112     putline("HEAD " + url.getPath() + " HTTP/1.1");
113     }
114     else
115     {
116     putline("GET " + url.getPath() + " HTTP/1.1");
117     }
118    
119     putline("Accept: text/html; text/plain");
120     putline("User-Agent: " + agent(true) + ' ' + platform());
121    
122     if (url.getPort() == 80)
123     {
124     putline("Host: " + url.getAddress());
125     }
126     else
127     {
128     char* port = new char[1024];
129     sprintf(port, "%u", url.getPort());
130    
131     putline("Host: " + url.getAddress() + ':' + port);
132    
133     delete [] port;
134     }
135    
136 douglas 18 putline("Connection: close");
137 douglas 14 putline();
138    
139 douglas 18 code response;
140     string line;
141 douglas 17
142 douglas 18 do
143 douglas 17 {
144 douglas 18 line = getline();
145 douglas 17
146 douglas 18 if (line.find("HTTP/") != 0)
147     {
148     return answer;
149     }
150 douglas 17
151 douglas 18 unsigned dot = line.find('.');
152     unsigned space = line.find(' ');
153 douglas 17
154 douglas 18 unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 0);
155     unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(), 0,
156     0);
157 douglas 17
158 douglas 18 if (major > 1 || minor < 1)
159     {
160     cerr << program << ": Potentially Incompatible Server: HTTP/" << major
161     << "." << minor << "\n";
162    
163     return answer;
164     }
165    
166     response = code(strtoul(line.substr(space + 1).c_str(), 0, 0));
167    
168     if (response < ok) do line = getline(); while (line != "");
169 douglas 17 }
170 douglas 18 while (response < ok);
171 douglas 17
172     do
173     {
174     line = getline();
175 douglas 18
176     if (line != "")
177     {
178     unsigned colon = line.find(':');
179    
180     string field = line.substr(0, colon);
181     string value = line.substr(colon + 1);
182    
183     while (isspace(value[0])) value.erase(0, 1);
184    
185     // if (field =
186     }
187 douglas 17 }
188     while (line != "");
189    
190     switch (response)
191     {
192     case ok:
193 douglas 18 if (debug) cerr << "response = " << response << "\n";
194 douglas 17 answer = true;
195     break;
196 douglas 18 case choices:
197 douglas 17 case moved:
198     case found:
199 douglas 18 if (debug) cerr << "response = " << response << "\n"
200     << "location = " << location << "\n";
201     location = getLink(location, url);
202 douglas 17 break;
203     case notfound:
204     case internal:
205 douglas 18 if (debug) cerr << "response = " << response << "\n";
206 douglas 17 break;
207     default:
208 douglas 18 if (debug) cerr << "response = " << response << "\n";
209     if (response <= 299)
210     {
211     answer = true;
212     }
213     else if (response <= 399)
214     {
215     location = getLink(location, url);
216     }
217 douglas 17 break;
218     }
219    
220 douglas 1 return answer;
221     }
222    
223     HttpHandler& HttpHandler::getline(string& line, char endline)
224     {
225     int end = page.find(endline, begin);
226     int newline = page.find('\n', begin);
227    
228     if (newline < end || end == string::npos)
229     {
230     end = newline;
231     }
232    
233     line = page.substr(begin, end - begin);
234    
235     if (end == string::npos)
236     {
237     begin = end;
238     }
239     else
240     {
241     begin = end + 1;
242     }
243    
244     return *this;
245     }
246    
247     bool HttpHandler::good()
248     {
249     bool answer = true;
250    
251     if (begin >= page.length())
252     {
253     answer = false;
254     }
255     else if (begin == string::npos)
256     {
257     answer = false;
258     }
259    
260     return answer;
261     }
262    
263     void HttpHandler::clear()
264     {
265 douglas 18 closesocket(http);
266    
267 douglas 17 type = "";
268     length = 0;
269     location = "";
270 douglas 1 begin = 0;
271     page = "";
272 douglas 18 chunked = false;
273 douglas 1 }
274    
275 douglas 14 void HttpHandler::putline(const string line)
276     {
277     sprintf(buffer, "%s\r\n", line.c_str());
278     if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
279     {
280     error(program + ": Send");
281     exit(1);
282     }
283     }
284    
285 douglas 17 string HttpHandler::getline()
286     {
287     string line;
288     char byte;
289    
290     do
291     {
292     if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
293     {
294     error(program + ": Recv");
295     }
296    
297     if (byte != '\r' && byte != '\n')
298     {
299     line += byte;
300     }
301     }
302     while (byte != '\n');
303    
304     return line;
305     }
306    
307 douglas 18 void HttpHandler::error(const string& prefix, bool host)
308 douglas 1 {
309 douglas 13 #ifdef _WIN32
310     string error;
311 douglas 1
312 douglas 13 switch (WSAGetLastError())
313 douglas 1 {
314 douglas 13 case WSAEACCES:
315     error = "Permission denied.";
316     break;
317     case WSAEADDRINUSE:
318     error = "Address already in use.";
319     break;
320     case WSAEADDRNOTAVAIL:
321     error = "Cannot assign requested address.";
322     break;
323     case WSAEAFNOSUPPORT:
324     error = "Address family not supported by protocol family.";
325     break;
326     case WSAEALREADY:
327     error = "Operation already in progress.";
328     break;
329     case WSAECONNABORTED:
330     error = "Software caused connection abort.";
331     break;
332     case WSAECONNREFUSED:
333     error = "Connection refused.";
334     break;
335     case WSAECONNRESET:
336     error = "Connection reset by peer.";
337     break;
338     case WSAEDESTADDRREQ:
339     error = "Destination address required.";
340     break;
341     case WSAEFAULT:
342     error = "Bad address.";
343     break;
344     case WSAEHOSTDOWN:
345     error = "Host is down.";
346     break;
347     case WSAEHOSTUNREACH:
348     error = "No route to host.";
349     break;
350     case WSAEINPROGRESS:
351     error = "Operation now in progress.";
352     break;
353     case WSAEINTR:
354     error = "Interrupted function call.";
355     break;
356     case WSAEINVAL:
357     error = "Invalid argument.";
358     break;
359     case WSAEISCONN:
360     error = "Socket is already connected.";
361     break;
362     case WSAEMFILE:
363     error = "Too many open files.";
364     break;
365     case WSAEMSGSIZE:
366     error = "Message too long.";
367     break;
368     case WSAENETDOWN:
369     error = "Network is down.";
370     break;
371     case WSAENETRESET:
372     error = "Network dropped connection on reset.";
373     break;
374     case WSAENETUNREACH:
375     error = "Network is unreachable.";
376     break;
377     case WSAENOBUFS:
378     error = "No buffer space available.";
379     break;
380     case WSAENOPROTOOPT:
381     error = "Bad protocol option.";
382     break;
383     case WSAENOTCONN:
384     error = "Socket is not connected.";
385     break;
386     case WSAENOTSOCK:
387     error = "Socket operation on non-socket.";
388     break;
389     case WSAEOPNOTSUPP:
390     error = "Operation not supported.";
391     break;
392     case WSAEPFNOSUPPORT:
393     error = "Protocol family not supported.";
394     break;
395     case WSAEPROCLIM:
396     error = "Too many processes.";
397     break;
398     case WSAEPROTONOSUPPORT:
399     error = "Protocol not supported.";
400     break;
401     case WSAEPROTOTYPE:
402     error = "Protocol wrong type for socket.";
403     break;
404     case WSAESHUTDOWN:
405     error = "Cannot send after socket shutdown.";
406     break;
407     case WSAESOCKTNOSUPPORT:
408     error = "Socket type not supported.";
409     break;
410     case WSAETIMEDOUT:
411     error = "Connection timed out.";
412     break;
413     case WSATYPE_NOT_FOUND:
414     error = "Class type not found.";
415     break;
416     case WSAEWOULDBLOCK:
417     error = "Resource temporarily unavailable.";
418     break;
419     case WSAHOST_NOT_FOUND:
420     error = "Host not found.";
421     break;
422     case WSA_INVALID_HANDLE:
423     error = "Specified event object handle is invalid.";
424     break;
425     case WSA_INVALID_PARAMETER:
426     error = "One or more parameters are invalid.";
427     break;
428     // case WSAINVALIDPROCTABLE:
429     // error = "Invalid procedure table from service provider.";
430     // break;
431     // case WSAINVALIDPROVIDER:
432     // error = "Invalid service provider version number.";
433     // break;
434     case WSA_IO_INCOMPLETE:
435     error = "Overlapped I/O event object not in signaled state.";
436     break;
437     case WSA_IO_PENDING:
438     error = "Overlapped operations will complete later.";
439     break;
440     case WSA_NOT_ENOUGH_MEMORY:
441     error = "Insufficient memory available.";
442     break;
443     case WSANOTINITIALISED:
444     error = "Successful WSAStartup not yet performed.";
445     break;
446     case WSANO_DATA:
447     error = "Valid name, no data record of requested type.";
448     break;
449     case WSANO_RECOVERY:
450     error = "This is a non-recoverable error.";
451     break;
452     // case WSAPROVIDERFAILEDINIT:
453     // error = "Unable to initialize a service provider.";
454     // break;
455     case WSASYSCALLFAILURE:
456     error = "System call failure.";
457     break;
458     case WSASYSNOTREADY:
459     error = "Network subsystem is unavailable.";
460     break;
461     case WSATRY_AGAIN:
462     error = "Non-authoritative host not found.";
463     break;
464     case WSAVERNOTSUPPORTED:
465     error = "WINSOCK.DLL version out of range.";
466     break;
467     case WSAEDISCON:
468     error = "Graceful shutdown in progress.";
469     break;
470     case WSA_OPERATION_ABORTED:
471     error = "Overlapped operation aborted.";
472     break;
473     default:
474     error = "Unknown error.";
475     break;
476     }
477 douglas 1
478 douglas 13 cerr << prefix << ": " << error << "\n";
479     #else
480     if (host)
481     {
482     herror(prefix.c_str());
483 douglas 1 }
484     else
485     {
486 douglas 13 perror(prefix.c_str());
487 douglas 1 }
488 douglas 13 #endif // _WIN32
489     }