ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/proj/trunk/Search/URL.cpp
Revision: 28
Committed: 2003-01-02T19:42:33-08:00 (22 years, 5 months ago) by douglas
File size: 5208 byte(s)
Log Message:
Changed Copyright notices to state 2002-2003.

File Contents

# User Rev Content
1 douglas 1 /* ============================================================================
2     * Douglas Thrift's Search Engine License
3     *
4 douglas 28 * Copyright (C) 2002-2003, Douglas Thrift. All Rights Reserved.
5 douglas 1 * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions are met:
7     *
8     * 1. Redistributions of source code must retain the above copyright notice,
9     * this list of conditions and the following disclaimer.
10     *
11     * 2. Redistributions in binary form must reproduce the above copyright notice,
12     * this list of conditions and the following disclaimer in the documentation
13     * and/or other materials provided with the distribution.
14     *
15     * 3. The end-user documentation included with the redistribution, if any, must
16     * include the following acknowledgment:
17     *
18     * "This product includes software developed by Douglas Thrift
19     * (http://computers.douglasthrift.net/searchengine/)."
20     *
21     * Alternately, this acknowledgment may appear in the software itself, if
22     * and wherever such third-party acknowledgments normally appear.
23     *
24     * 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not
25     * be used to endorse or promote products derived from this software without
26     * specific prior written permission. For written permission, please visit
27     * http://www.douglasthrift.net/contact.cgi for contact information.
28     *
29     * 5. Products derived from this software may not be called "Douglas Thrift's
30     * Search Engine", nor may "Douglas Thrift's Search Engine" appear in their
31     * name, without prior written permission.
32     *
33     * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36     * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
38     * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
39     * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
42     * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43     * ============================================================================
44     */
45     // Douglas Thrift's Search Engine URL
46     //
47     // Douglas Thrift
48     //
49     // URL.cpp
50    
51     #include "URL.h"
52    
53     URL::URL(const string& url)
54     {
55     setURL(url);
56     }
57    
58     URL::URL(const string& address, unsigned port, const string& path)
59     {
60     this->address = address;
61     this->port = port;
62     this->path = path;
63     }
64    
65     string URL::getURL()
66     {
67     string url = "http://" + address;
68    
69     if (port != 80)
70     {
71     char* cport = new char[1024];
72    
73     sprintf(cport, "%u", port);
74    
75     url += string(":") + cport;
76    
77     delete [] cport;
78     }
79    
80     url += path;
81    
82     return url;
83     }
84    
85     void URL::setURL(const URL& url)
86     {
87     this->address = url.address;
88     this->port = url.port;
89     this->path = url.path;
90     }
91    
92     void URL::setURL(const string& url)
93     {
94     if (url.find("http://") || url.length() <= 7)
95     {
96     cerr << program << ": Malformed URL: " << url << "\n";
97     exit(1);
98     }
99    
100     int begin = 7;
101     int colon = url.find(':', begin);
102     int end = url.find('/', begin);
103    
104     if (colon != string::npos && colon < end)
105     {
106     address = url.substr(begin, colon - begin);
107     port = strtoul(url.substr(colon + 1, end - colon - 1).c_str(), 0, 0);
108     }
109     else
110     {
111     address = url.substr(begin, end - begin);
112     port = 80;
113     }
114    
115     if (end == string::npos)
116     {
117     path = "/";
118     }
119     else
120     {
121     path = url.substr(end);
122     }
123     }
124    
125     void URL::setAddress(const string& address)
126     {
127     this->address = address;
128     }
129    
130     void URL::setPort(unsigned port)
131     {
132     this->port = port;
133     }
134    
135     void URL::setPath(const string& path)
136     {
137     if (path.find('/') != 0)
138     {
139     this->path = "/" + path;
140     }
141     else
142     {
143     this->path = path;
144     }
145     }
146    
147     ostream& operator<<(ostream& os, URL& data)
148     {
149     os << data.getURL();
150    
151     return os;
152     }
153 douglas 17
154     string getLink(string link, URL& url)
155     {
156     string hyperlink = "";
157    
158     if (link.find('#') != string::npos)
159     {
160     unsigned pound = link.find('#');
161     link.erase(pound);
162     }
163    
164     if (link.find("://") != string::npos)
165     {
166 douglas 22 if (link.find("http://") == 0 && link.length() > 7) hyperlink = link;
167 douglas 17 }
168     else if (link.find("mailto:") == 0)
169     {
170     // do nothing we are not evil spammers!
171     }
172 douglas 18 else if (link.find("news:") == 0)
173     {
174     // do nothing this isn't Google Groups
175     }
176 douglas 17 else if (link.find("//") == 0)
177     {
178     hyperlink = "http:" + link;
179     }
180     else if (link.find('/') == 0)
181     {
182     hyperlink = url.getURL();
183    
184     unsigned path = hyperlink.find('/', 7);
185     hyperlink.erase(path);
186    
187     hyperlink += link;
188     }
189     else if (link == "")
190     {
191     // a blank link is useless
192     }
193     else
194     {
195     hyperlink = url.getURL();
196     string path = url.getPath();
197    
198     unsigned cutoff = hyperlink.rfind(path);
199     hyperlink.erase(cutoff);
200    
201     unsigned dir = path.rfind('/') + 1;
202     path.erase(dir);
203    
204     while (link.find("../") == 0)
205     {
206     unsigned dot = path.rfind('/') - 1;
207     unsigned up = path.rfind('/', dot) + 1;
208    
209     path.erase(up);
210     link.erase(0, 3);
211     }
212     while (link.find("./") == 0)
213     {
214     link.erase(0, 2);
215     }
216    
217     hyperlink += path + link;
218     }
219    
220     return hyperlink;
221     }