ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/Search/trunk/HttpHandler.cpp
Revision: 17
Committed: 2002-12-09T18:31:11-08:00 (22 years, 6 months ago) by douglas
Original Path: trunk/Search/HttpHandler.cpp
File size: 10082 byte(s)
Log Message:
Started reimplementing HttpHandler, moved Processor.getLink() to be global and
included in URL.h and implemented in URL.cpp.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4     * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine HTTP Handler
46     //
47     // Douglas Thrift
48     //
49     // HttpHandler.cpp
50    
51     #include "HttpHandler.h"
52    
53     HttpHandler::HttpHandler()
54     {
55 douglas 14 buffer = new char[BUFSIZ + 1];
56    
57 douglas 13 #ifdef _WIN32
58 douglas 14 if (WSAStartup(MAKEWORD(2, 0), &data) != 0)
59 douglas 13 {
60 douglas 17 error(program + ": WSAStartup");
61 douglas 13 exit(1);
62     }
63     #endif // _WIN32
64 douglas 1
65     begin = 0;
66     }
67    
68     HttpHandler::~HttpHandler()
69     {
70 douglas 14 delete [] buffer;
71    
72 douglas 13 #ifdef _WIN32
73     WSACleanup();
74     #endif // _WIN32
75 douglas 1 }
76    
77 douglas 14 bool HttpHandler::handle(URL &url, bool head)
78 douglas 1 {
79     bool answer = false;
80    
81 douglas 14 if ((http = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
82     {
83     error(program + ": Socket");
84     exit(1);
85     }
86 douglas 1
87 douglas 14 sockaddr_in address;
88     hostent* host;
89 douglas 1
90 douglas 14 address.sin_family = AF_INET;
91 douglas 1
92 douglas 14 if ((host = gethostbyname(url.getAddress().c_str())) == NULL)
93     {
94     error(program + ": Host: " + url.getAddress(), true);
95     return answer;
96     }
97 douglas 1
98 douglas 14 address.sin_addr = *((in_addr*)*host->h_addr_list);
99     address.sin_port = htons(url.getPort());
100    
101     if (connect(http, (sockaddr*)&address, sizeof(sockaddr_in)) ==
102     SOCKET_ERROR)
103     {
104     error(program + ": Connect");
105     return answer;
106     }
107    
108     if (head)
109     {
110     putline("HEAD " + url.getPath() + " HTTP/1.1");
111     }
112     else
113     {
114     putline("GET " + url.getPath() + " HTTP/1.1");
115     }
116    
117     putline("Accept: text/html; text/plain");
118     putline("User-Agent: " + agent(true) + ' ' + platform());
119    
120     if (url.getPort() == 80)
121     {
122     putline("Host: " + url.getAddress());
123     }
124     else
125     {
126     char* port = new char[1024];
127     sprintf(port, "%u", url.getPort());
128    
129     putline("Host: " + url.getAddress() + ':' + port);
130    
131     delete [] port;
132     }
133    
134     putline();
135    
136 douglas 17 string line = getline();
137    
138     if (line.find("HTTP/") != 0)
139     {
140     return answer;
141     }
142    
143     unsigned dot = line.find('.');
144     unsigned space = line.find(' ');
145    
146     unsigned major = strtoul(line.substr(5, dot - 5).c_str(), 0, 0);
147     unsigned minor = strtoul(line.substr(dot + 1, space - dot - 1).c_str(), 0,
148     0);
149    
150     if (major > 1 || minor < 1)
151     {
152     cerr << program << ": Potentially Incompatible Server: HTTP/" << major
153     << "." << minor << "\n";
154    
155     return answer;
156     }
157    
158     code response = code(strtoul(line.substr(space + 1).c_str(), 0, 0));
159    
160     do
161     {
162     line = getline();
163     }
164     while (line != "");
165    
166     switch (response)
167     {
168     case ok:
169     answer = true;
170     break;
171     case choices:
172     break;
173     case moved:
174     break;
175     case found:
176     break;
177     case notfound:
178     break;
179     case internal:
180     break;
181     default:
182     break;
183     }
184    
185 douglas 1 return answer;
186     }
187    
188     HttpHandler& HttpHandler::getline(string& line, char endline)
189     {
190     int end = page.find(endline, begin);
191     int newline = page.find('\n', begin);
192    
193     if (newline < end || end == string::npos)
194     {
195     end = newline;
196     }
197    
198     line = page.substr(begin, end - begin);
199    
200     if (end == string::npos)
201     {
202     begin = end;
203     }
204     else
205     {
206     begin = end + 1;
207     }
208    
209     return *this;
210     }
211    
212     bool HttpHandler::good()
213     {
214     bool answer = true;
215    
216     if (begin >= page.length())
217     {
218     answer = false;
219     }
220     else if (begin == string::npos)
221     {
222     answer = false;
223     }
224    
225     return answer;
226     }
227    
228     void HttpHandler::clear()
229     {
230 douglas 17 type = "";
231     length = 0;
232     location = "";
233 douglas 1 begin = 0;
234     page = "";
235     }
236    
237 douglas 14 void HttpHandler::putline(const string line)
238     {
239     sprintf(buffer, "%s\r\n", line.c_str());
240     if (send(http, buffer, strlen(buffer), 0) == SOCKET_ERROR)
241     {
242     error(program + ": Send");
243     exit(1);
244     }
245     }
246    
247 douglas 17 string HttpHandler::getline()
248     {
249     string line;
250     char byte;
251    
252     do
253     {
254     if (recv(http, &byte, 1, 0) == SOCKET_ERROR)
255     {
256     error(program + ": Recv");
257     }
258    
259     if (byte != '\r' && byte != '\n')
260     {
261     line += byte;
262     }
263     }
264     while (byte != '\n');
265    
266     return line;
267     }
268    
269 douglas 13 void HttpHandler::error(const string prefix, bool host)
270 douglas 1 {
271 douglas 13 #ifdef _WIN32
272     string error;
273 douglas 1
274 douglas 13 switch (WSAGetLastError())
275 douglas 1 {
276 douglas 13 case WSAEACCES:
277     error = "Permission denied.";
278     break;
279     case WSAEADDRINUSE:
280     error = "Address already in use.";
281     break;
282     case WSAEADDRNOTAVAIL:
283     error = "Cannot assign requested address.";
284     break;
285     case WSAEAFNOSUPPORT:
286     error = "Address family not supported by protocol family.";
287     break;
288     case WSAEALREADY:
289     error = "Operation already in progress.";
290     break;
291     case WSAECONNABORTED:
292     error = "Software caused connection abort.";
293     break;
294     case WSAECONNREFUSED:
295     error = "Connection refused.";
296     break;
297     case WSAECONNRESET:
298     error = "Connection reset by peer.";
299     break;
300     case WSAEDESTADDRREQ:
301     error = "Destination address required.";
302     break;
303     case WSAEFAULT:
304     error = "Bad address.";
305     break;
306     case WSAEHOSTDOWN:
307     error = "Host is down.";
308     break;
309     case WSAEHOSTUNREACH:
310     error = "No route to host.";
311     break;
312     case WSAEINPROGRESS:
313     error = "Operation now in progress.";
314     break;
315     case WSAEINTR:
316     error = "Interrupted function call.";
317     break;
318     case WSAEINVAL:
319     error = "Invalid argument.";
320     break;
321     case WSAEISCONN:
322     error = "Socket is already connected.";
323     break;
324     case WSAEMFILE:
325     error = "Too many open files.";
326     break;
327     case WSAEMSGSIZE:
328     error = "Message too long.";
329     break;
330     case WSAENETDOWN:
331     error = "Network is down.";
332     break;
333     case WSAENETRESET:
334     error = "Network dropped connection on reset.";
335     break;
336     case WSAENETUNREACH:
337     error = "Network is unreachable.";
338     break;
339     case WSAENOBUFS:
340     error = "No buffer space available.";
341     break;
342     case WSAENOPROTOOPT:
343     error = "Bad protocol option.";
344     break;
345     case WSAENOTCONN:
346     error = "Socket is not connected.";
347     break;
348     case WSAENOTSOCK:
349     error = "Socket operation on non-socket.";
350     break;
351     case WSAEOPNOTSUPP:
352     error = "Operation not supported.";
353     break;
354     case WSAEPFNOSUPPORT:
355     error = "Protocol family not supported.";
356     break;
357     case WSAEPROCLIM:
358     error = "Too many processes.";
359     break;
360     case WSAEPROTONOSUPPORT:
361     error = "Protocol not supported.";
362     break;
363     case WSAEPROTOTYPE:
364     error = "Protocol wrong type for socket.";
365     break;
366     case WSAESHUTDOWN:
367     error = "Cannot send after socket shutdown.";
368     break;
369     case WSAESOCKTNOSUPPORT:
370     error = "Socket type not supported.";
371     break;
372     case WSAETIMEDOUT:
373     error = "Connection timed out.";
374     break;
375     case WSATYPE_NOT_FOUND:
376     error = "Class type not found.";
377     break;
378     case WSAEWOULDBLOCK:
379     error = "Resource temporarily unavailable.";
380     break;
381     case WSAHOST_NOT_FOUND:
382     error = "Host not found.";
383     break;
384     case WSA_INVALID_HANDLE:
385     error = "Specified event object handle is invalid.";
386     break;
387     case WSA_INVALID_PARAMETER:
388     error = "One or more parameters are invalid.";
389     break;
390     // case WSAINVALIDPROCTABLE:
391     // error = "Invalid procedure table from service provider.";
392     // break;
393     // case WSAINVALIDPROVIDER:
394     // error = "Invalid service provider version number.";
395     // break;
396     case WSA_IO_INCOMPLETE:
397     error = "Overlapped I/O event object not in signaled state.";
398     break;
399     case WSA_IO_PENDING:
400     error = "Overlapped operations will complete later.";
401     break;
402     case WSA_NOT_ENOUGH_MEMORY:
403     error = "Insufficient memory available.";
404     break;
405     case WSANOTINITIALISED:
406     error = "Successful WSAStartup not yet performed.";
407     break;
408     case WSANO_DATA:
409     error = "Valid name, no data record of requested type.";
410     break;
411     case WSANO_RECOVERY:
412     error = "This is a non-recoverable error.";
413     break;
414     // case WSAPROVIDERFAILEDINIT:
415     // error = "Unable to initialize a service provider.";
416     // break;
417     case WSASYSCALLFAILURE:
418     error = "System call failure.";
419     break;
420     case WSASYSNOTREADY:
421     error = "Network subsystem is unavailable.";
422     break;
423     case WSATRY_AGAIN:
424     error = "Non-authoritative host not found.";
425     break;
426     case WSAVERNOTSUPPORTED:
427     error = "WINSOCK.DLL version out of range.";
428     break;
429     case WSAEDISCON:
430     error = "Graceful shutdown in progress.";
431     break;
432     case WSA_OPERATION_ABORTED:
433     error = "Overlapped operation aborted.";
434     break;
435     default:
436     error = "Unknown error.";
437     break;
438     }
439 douglas 1
440 douglas 13 cerr << prefix << ": " << error << "\n";
441     #else
442     if (host)
443     {
444     herror(prefix.c_str());
445 douglas 1 }
446     else
447     {
448 douglas 13 perror(prefix.c_str());
449 douglas 1 }
450 douglas 13 #endif // _WIN32
451     }