1 |
douglas |
1 |
/* ============================================================================ |
2 |
|
|
* Douglas Thrift's Search Engine License |
3 |
|
|
* |
4 |
douglas |
312 |
* Copyright (C) 2002-2004, Douglas Thrift. All Rights Reserved. |
5 |
douglas |
1 |
* Redistribution and use in source and binary forms, with or without |
6 |
|
|
* modification, are permitted provided that the following conditions are met: |
7 |
|
|
* |
8 |
|
|
* 1. Redistributions of source code must retain the above copyright notice, |
9 |
|
|
* this list of conditions and the following disclaimer. |
10 |
|
|
* |
11 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice, |
12 |
|
|
* this list of conditions and the following disclaimer in the documentation |
13 |
|
|
* and/or other materials provided with the distribution. |
14 |
|
|
* |
15 |
|
|
* 3. The end-user documentation included with the redistribution, if any, must |
16 |
|
|
* include the following acknowledgment: |
17 |
|
|
* |
18 |
|
|
* "This product includes software developed by Douglas Thrift |
19 |
|
|
* (http://computers.douglasthrift.net/searchengine/)." |
20 |
|
|
* |
21 |
|
|
* Alternately, this acknowledgment may appear in the software itself, if |
22 |
|
|
* and wherever such third-party acknowledgments normally appear. |
23 |
|
|
* |
24 |
|
|
* 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not |
25 |
|
|
* be used to endorse or promote products derived from this software without |
26 |
|
|
* specific prior written permission. For written permission, please visit |
27 |
|
|
* http://www.douglasthrift.net/contact.cgi for contact information. |
28 |
|
|
* |
29 |
|
|
* 5. Products derived from this software may not be called "Douglas Thrift's |
30 |
|
|
* Search Engine", nor may "Douglas Thrift's Search Engine" appear in their |
31 |
|
|
* name, without prior written permission. |
32 |
|
|
* |
33 |
|
|
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
34 |
|
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
35 |
|
|
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
36 |
|
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
37 |
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
38 |
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, |
39 |
|
|
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
40 |
|
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
41 |
|
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
42 |
|
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
43 |
|
|
* ============================================================================ |
44 |
|
|
*/ |
45 |
|
|
// Douglas Thrift's Search Engine URL |
46 |
|
|
// |
47 |
|
|
// Douglas Thrift |
48 |
|
|
// |
49 |
Douglas Thrift |
331 |
// $Id$ |
50 |
douglas |
1 |
|
51 |
Douglas Thrift |
334 |
#include "URL.hpp" |
52 |
douglas |
1 |
|
53 |
|
|
string URL::getURL() |
54 |
|
|
{ |
55 |
douglas |
195 |
ostringstream url; |
56 |
Douglas Thrift |
348 |
|
57 |
douglas |
195 |
#ifndef _OpenSSL_ |
58 |
|
|
url << "http://" << address; |
59 |
douglas |
1 |
|
60 |
|
|
if (port != 80) |
61 |
douglas |
195 |
#else |
62 |
|
|
url << (tls ? "https://" : "http://") << address; |
63 |
|
|
|
64 |
|
|
if (port != 80 && !tls || port != 443 && tls) |
65 |
|
|
#endif |
66 |
douglas |
1 |
{ |
67 |
douglas |
195 |
url << ":" << port; |
68 |
douglas |
1 |
} |
69 |
|
|
|
70 |
douglas |
195 |
url << path; |
71 |
douglas |
1 |
|
72 |
douglas |
195 |
return url.str(); |
73 |
douglas |
1 |
} |
74 |
|
|
|
75 |
|
|
void URL::setURL(const URL& url) |
76 |
|
|
{ |
77 |
|
|
this->address = url.address; |
78 |
|
|
this->port = url.port; |
79 |
|
|
this->path = url.path; |
80 |
douglas |
195 |
#ifdef _OpenSSL_ |
81 |
|
|
this->tls = url.tls; |
82 |
|
|
#endif |
83 |
douglas |
1 |
} |
84 |
|
|
|
85 |
|
|
void URL::setURL(const string& url) |
86 |
|
|
{ |
87 |
douglas |
195 |
#ifndef _OpenSSL_ |
88 |
|
|
if (url.find("http://") != 0 || url.length() <= 7) |
89 |
douglas |
1 |
{ |
90 |
|
|
cerr << program << ": Malformed URL: " << url << "\n"; |
91 |
Douglas Thrift |
360 |
|
92 |
douglas |
1 |
exit(1); |
93 |
|
|
} |
94 |
|
|
|
95 |
douglas |
365 |
size_t begin(7); |
96 |
Douglas Thrift |
348 |
|
97 |
douglas |
195 |
#else |
98 |
|
|
tls = false; |
99 |
douglas |
1 |
|
100 |
douglas |
195 |
if (url.find("https://") == 0 && url.length() > 8) |
101 |
|
|
{ |
102 |
|
|
tls = true; |
103 |
|
|
} |
104 |
|
|
else if (url.find("http://") != 0 || url.length() <= 7) |
105 |
|
|
{ |
106 |
|
|
cerr << program << ": Malformed URL: " << url << "\n"; |
107 |
Douglas Thrift |
348 |
|
108 |
douglas |
195 |
exit(1); |
109 |
|
|
} |
110 |
|
|
|
111 |
douglas |
365 |
size_t begin(tls ? 8 : 7); |
112 |
douglas |
195 |
#endif |
113 |
|
|
|
114 |
douglas |
365 |
size_t colon(url.find(':', begin)), end(url.find('/', begin)); |
115 |
Douglas Thrift |
348 |
|
116 |
douglas |
1 |
if (colon != string::npos && colon < end) |
117 |
|
|
{ |
118 |
|
|
address = url.substr(begin, colon - begin); |
119 |
douglas |
212 |
|
120 |
|
|
istringstream number((url.substr(colon + 1, end - colon - 1))); |
121 |
|
|
|
122 |
|
|
number >> port; |
123 |
douglas |
1 |
} |
124 |
|
|
else |
125 |
|
|
{ |
126 |
|
|
address = url.substr(begin, end - begin); |
127 |
douglas |
195 |
#ifndef _OpenSSL_ |
128 |
douglas |
1 |
port = 80; |
129 |
douglas |
195 |
#else |
130 |
|
|
port = tls ? 443 : 80; |
131 |
|
|
#endif |
132 |
douglas |
1 |
} |
133 |
|
|
|
134 |
|
|
if (end == string::npos) |
135 |
|
|
{ |
136 |
|
|
path = "/"; |
137 |
|
|
} |
138 |
|
|
else |
139 |
|
|
{ |
140 |
|
|
path = url.substr(end); |
141 |
|
|
} |
142 |
|
|
} |
143 |
|
|
|
144 |
|
|
void URL::setPath(const string& path) |
145 |
|
|
{ |
146 |
|
|
if (path.find('/') != 0) |
147 |
|
|
{ |
148 |
|
|
this->path = "/" + path; |
149 |
|
|
} |
150 |
|
|
else |
151 |
|
|
{ |
152 |
|
|
this->path = path; |
153 |
|
|
} |
154 |
|
|
} |
155 |
|
|
|
156 |
|
|
ostream& operator<<(ostream& os, URL& data) |
157 |
|
|
{ |
158 |
|
|
os << data.getURL(); |
159 |
|
|
|
160 |
|
|
return os; |
161 |
|
|
} |
162 |
douglas |
17 |
|
163 |
|
|
string getLink(string link, URL& url) |
164 |
|
|
{ |
165 |
Douglas Thrift |
348 |
string hyperlink; |
166 |
douglas |
17 |
|
167 |
|
|
if (link.find('#') != string::npos) |
168 |
|
|
{ |
169 |
douglas |
365 |
size_t pound(link.find('#')); |
170 |
Douglas Thrift |
348 |
|
171 |
douglas |
17 |
link.erase(pound); |
172 |
|
|
} |
173 |
|
|
|
174 |
|
|
if (link.find("://") != string::npos) |
175 |
|
|
{ |
176 |
douglas |
195 |
#ifndef _OpenSSL_ |
177 |
douglas |
22 |
if (link.find("http://") == 0 && link.length() > 7) hyperlink = link; |
178 |
douglas |
195 |
#else |
179 |
Douglas Thrift |
360 |
if (link.find("http://") == 0 && link.length() > 7 |
180 |
|
|
|| link.find("https://") == 0 && link.length() > 8) |
181 |
|
|
hyperlink = link; |
182 |
douglas |
195 |
#endif |
183 |
douglas |
17 |
} |
184 |
|
|
else if (link.find("mailto:") == 0) |
185 |
|
|
{ |
186 |
|
|
// do nothing we are not evil spammers! |
187 |
|
|
} |
188 |
douglas |
18 |
else if (link.find("news:") == 0) |
189 |
|
|
{ |
190 |
|
|
// do nothing this isn't Google Groups |
191 |
|
|
} |
192 |
Douglas Thrift |
348 |
else if (link.find("aim:") == 0) |
193 |
|
|
{ |
194 |
|
|
// do nothing we don't do AIM |
195 |
|
|
} |
196 |
douglas |
17 |
else if (link.find("//") == 0) |
197 |
|
|
{ |
198 |
douglas |
195 |
#ifndef _OpenSSL_ |
199 |
douglas |
17 |
hyperlink = "http:" + link; |
200 |
douglas |
195 |
#else |
201 |
|
|
hyperlink = (url.getTls() ? "https:" : "http:") + link; |
202 |
|
|
#endif |
203 |
douglas |
17 |
} |
204 |
|
|
else if (link.find('/') == 0) |
205 |
|
|
{ |
206 |
|
|
hyperlink = url.getURL(); |
207 |
|
|
|
208 |
douglas |
195 |
#ifndef _OpenSSL_ |
209 |
douglas |
365 |
size_t path(hyperlink.find('/', 7)); |
210 |
douglas |
195 |
#else |
211 |
douglas |
365 |
size_t path(hyperlink.find('/', url.getTls() ? 8 : 7)); |
212 |
douglas |
195 |
#endif |
213 |
Douglas Thrift |
348 |
|
214 |
douglas |
17 |
hyperlink.erase(path); |
215 |
|
|
|
216 |
|
|
hyperlink += link; |
217 |
|
|
} |
218 |
Douglas Thrift |
355 |
else if (link.empty()) |
219 |
douglas |
17 |
{ |
220 |
|
|
// a blank link is useless |
221 |
|
|
} |
222 |
|
|
else |
223 |
|
|
{ |
224 |
|
|
hyperlink = url.getURL(); |
225 |
|
|
|
226 |
Douglas Thrift |
348 |
string path(url.getPath()); |
227 |
douglas |
365 |
size_t cutoff(hyperlink.rfind(path)); |
228 |
Douglas Thrift |
348 |
|
229 |
douglas |
17 |
hyperlink.erase(cutoff); |
230 |
|
|
|
231 |
douglas |
365 |
size_t dir(path.rfind('/') + 1); |
232 |
Douglas Thrift |
348 |
|
233 |
douglas |
17 |
path.erase(dir); |
234 |
|
|
|
235 |
|
|
while (link.find("../") == 0) |
236 |
|
|
{ |
237 |
douglas |
365 |
size_t dot(path.rfind('/') - 1), up(path.rfind('/', dot) + 1); |
238 |
douglas |
17 |
|
239 |
|
|
path.erase(up); |
240 |
|
|
link.erase(0, 3); |
241 |
|
|
} |
242 |
|
|
while (link.find("./") == 0) |
243 |
|
|
{ |
244 |
|
|
link.erase(0, 2); |
245 |
|
|
} |
246 |
|
|
|
247 |
|
|
hyperlink += path + link; |
248 |
|
|
} |
249 |
|
|
|
250 |
|
|
return hyperlink; |
251 |
|
|
} |