ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/URL.cpp
Revision: 18
Committed: 2002-12-09T21:40:12-08:00 (22 years, 6 months ago) by douglas
File size: 5182 byte(s)
Log Message:
Implemented more HttpHandler stuff.
Added news: protocol to those ignored by getLink().

File Contents

# Content
1 /* ============================================================================
2 * Douglas Thrift's Search Engine License
3 *
4 * Copyright (C) 2002, Douglas Thrift. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. The end-user documentation included with the redistribution, if any, must
16 * include the following acknowledgment:
17 *
18 * "This product includes software developed by Douglas Thrift
19 * (http://computers.douglasthrift.net/searchengine/)."
20 *
21 * Alternately, this acknowledgment may appear in the software itself, if
22 * and wherever such third-party acknowledgments normally appear.
23 *
24 * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25 * be used to endorse or promote products derived from this software without
26 * specific prior written permission. For written permission, please visit
27 * http://www.douglasthrift.net/contact.cgi for contact information.
28 *
29 * 5. Products derived from this software may not be called "Douglas Thrift's
30 * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31 * name, without prior written permission.
32 *
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * ============================================================================
44 */
45 // Douglas Thrift's Search Engine URL
46 //
47 // Douglas Thrift
48 //
49 // URL.cpp
50
51 #include "URL.h"
52
53 URL::URL(const string& url)
54 {
55 setURL(url);
56 }
57
58 URL::URL(const string& address, unsigned port, const string& path)
59 {
60 this->address = address;
61 this->port = port;
62 this->path = path;
63 }
64
65 string URL::getURL()
66 {
67 string url = "http://" + address;
68
69 if (port != 80)
70 {
71 char* cport = new char[1024];
72
73 sprintf(cport, "%u", port);
74
75 url += string(":") + cport;
76
77 delete [] cport;
78 }
79
80 url += path;
81
82 return url;
83 }
84
85 void URL::setURL(const URL& url)
86 {
87 this->address = url.address;
88 this->port = url.port;
89 this->path = url.path;
90 }
91
92 void URL::setURL(const string& url)
93 {
94 if (url.find("http://") || url.length() <= 7)
95 {
96 cerr << program << ": Malformed URL: " << url << "\n";
97 exit(1);
98 }
99
100 int begin = 7;
101 int colon = url.find(':', begin);
102 int end = url.find('/', begin);
103
104 if (colon != string::npos && colon < end)
105 {
106 address = url.substr(begin, colon - begin);
107 port = strtoul(url.substr(colon + 1, end - colon - 1).c_str(), 0, 0);
108 }
109 else
110 {
111 address = url.substr(begin, end - begin);
112 port = 80;
113 }
114
115 if (end == string::npos)
116 {
117 path = "/";
118 }
119 else
120 {
121 path = url.substr(end);
122 }
123 }
124
125 void URL::setAddress(const string& address)
126 {
127 this->address = address;
128 }
129
130 void URL::setPort(unsigned port)
131 {
132 this->port = port;
133 }
134
135 void URL::setPath(const string& path)
136 {
137 if (path.find('/') != 0)
138 {
139 this->path = "/" + path;
140 }
141 else
142 {
143 this->path = path;
144 }
145 }
146
147 ostream& operator<<(ostream& os, URL& data)
148 {
149 os << data.getURL();
150
151 return os;
152 }
153
154 string getLink(string link, URL& url)
155 {
156 string hyperlink = "";
157
158 if (link.find('#') != string::npos)
159 {
160 unsigned pound = link.find('#');
161 link.erase(pound);
162 }
163
164 if (link.find("://") != string::npos)
165 {
166 if (link.find("http://") == 0) hyperlink = link;
167 }
168 else if (link.find("mailto:") == 0)
169 {
170 // do nothing we are not evil spammers!
171 }
172 else if (link.find("news:") == 0)
173 {
174 // do nothing this isn't Google Groups
175 }
176 else if (link.find("//") == 0)
177 {
178 hyperlink = "http:" + link;
179 }
180 else if (link.find('/') == 0)
181 {
182 hyperlink = url.getURL();
183
184 unsigned path = hyperlink.find('/', 7);
185 hyperlink.erase(path);
186
187 hyperlink += link;
188 }
189 else if (link == "")
190 {
191 // a blank link is useless
192 }
193 else
194 {
195 hyperlink = url.getURL();
196 string path = url.getPath();
197
198 unsigned cutoff = hyperlink.rfind(path);
199 hyperlink.erase(cutoff);
200
201 unsigned dir = path.rfind('/') + 1;
202 path.erase(dir);
203
204 while (link.find("../") == 0)
205 {
206 unsigned dot = path.rfind('/') - 1;
207 unsigned up = path.rfind('/', dot) + 1;
208
209 path.erase(up);
210 link.erase(0, 3);
211 }
212 while (link.find("./") == 0)
213 {
214 link.erase(0, 2);
215 }
216
217 hyperlink += path + link;
218 }
219
220 return hyperlink;
221 }