1 |
douglas |
1 |
/* ============================================================================ |
2 |
|
|
* Douglas Thrift's Search Engine License |
3 |
|
|
* |
4 |
|
|
* Copyright (C) 2002, Douglas Thrift. All Rights Reserved. |
5 |
|
|
* Redistribution and use in source and binary forms, with or without |
6 |
|
|
* modification, are permitted provided that the following conditions are met: |
7 |
|
|
* |
8 |
|
|
* 1. Redistributions of source code must retain the above copyright notice, |
9 |
|
|
* this list of conditions and the following disclaimer. |
10 |
|
|
* |
11 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice, |
12 |
|
|
* this list of conditions and the following disclaimer in the documentation |
13 |
|
|
* and/or other materials provided with the distribution. |
14 |
|
|
* |
15 |
|
|
* 3. The end-user documentation included with the redistribution, if any, must |
16 |
|
|
* include the following acknowledgment: |
17 |
|
|
* |
18 |
|
|
* "This product includes software developed by Douglas Thrift |
19 |
|
|
* (http://computers.douglasthrift.net/searchengine/)." |
20 |
|
|
* |
21 |
|
|
* Alternately, this acknowledgment may appear in the software itself, if |
22 |
|
|
* and wherever such third-party acknowledgments normally appear. |
23 |
|
|
* |
24 |
|
|
* 4. The names "Douglas Thrift" and "Douglas Thrift's Search Engine" must not |
25 |
|
|
* be used to endorse or promote products derived from this software without |
26 |
|
|
* specific prior written permission. For written permission, please visit |
27 |
|
|
* http://www.douglasthrift.net/contact.cgi for contact information. |
28 |
|
|
* |
29 |
|
|
* 5. Products derived from this software may not be called "Douglas Thrift's |
30 |
|
|
* Search Engine", nor may "Douglas Thrift's Search Engine" appear in their |
31 |
|
|
* name, without prior written permission. |
32 |
|
|
* |
33 |
|
|
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
34 |
|
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
35 |
|
|
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
36 |
|
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
37 |
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
38 |
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, |
39 |
|
|
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
40 |
|
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
41 |
|
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
42 |
|
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
43 |
|
|
* ============================================================================ |
44 |
|
|
*/ |
45 |
|
|
// Douglas Thrift's Search Engine Ranker |
46 |
|
|
// |
47 |
|
|
// Douglas Thrift |
48 |
|
|
// |
49 |
|
|
// Ranker.h |
50 |
|
|
|
51 |
|
|
#ifndef _Ranker_h_ |
52 |
|
|
#define _Ranker_h_ |
53 |
|
|
|
54 |
|
|
#include "Search.h" |
55 |
|
|
#include "Page.h" |
56 |
|
|
|
57 |
|
|
class Ranker : public Page |
58 |
|
|
{ |
59 |
|
|
private: |
60 |
|
|
enum state { all, title, url, text }; |
61 |
|
|
string sample; |
62 |
|
|
map<unsigned, unsigned> occurrencesTitle; |
63 |
|
|
map<unsigned, unsigned> occurrencesDescription; |
64 |
|
|
map<unsigned, unsigned> occurrencesText; |
65 |
|
|
unsigned value; |
66 |
|
|
vector<string> required; |
67 |
|
|
vector<string> excluded; |
68 |
|
|
vector<string> eitherOr; |
69 |
|
|
unsigned requiredValue; |
70 |
|
|
unsigned excludedValue; |
71 |
|
|
unsigned eitherOrValue; |
72 |
|
|
string site; |
73 |
|
|
state allIn; |
74 |
|
|
string lowerURL; |
75 |
|
|
string lowerAddress; |
76 |
|
|
string lowerTitle; |
77 |
|
|
string lowerText; |
78 |
|
|
void rank(); |
79 |
|
|
void checkRequired(); |
80 |
|
|
void checkExcluded(); |
81 |
|
|
void checkEitherOr(); |
82 |
|
|
unsigned find(string& word, string& where); |
83 |
|
|
unsigned find(string& word, string& where, map<unsigned, unsigned>& |
84 |
|
|
occurrences); |
85 |
|
|
unsigned phrase(string& phrase, string& where); |
86 |
|
|
unsigned phrase(string& phrase, string& where, map<unsigned, unsigned>& |
87 |
|
|
occurrences); |
88 |
|
|
unsigned phrase(vector<string>& words, unsigned word, unsigned& begin, bool |
89 |
|
|
start, string& where); |
90 |
|
|
unsigned phrase(vector<string>& words, unsigned word, unsigned& begin, bool |
91 |
|
|
start, string& where, map<unsigned, unsigned>& occurrences); |
92 |
|
|
unsigned evaluate(vector<unsigned>& ins); |
93 |
|
|
void decrap(string& crap); |
94 |
|
|
public: |
95 |
|
|
Ranker(); |
96 |
|
|
Ranker(Page& page); |
97 |
|
|
~Ranker() {} |
98 |
|
|
void rank(vector<string> query); |
99 |
|
|
void setSample(); |
100 |
|
|
string getTitle(); |
101 |
|
|
string getDescription(); |
102 |
|
|
string getSample() { return sample; } |
103 |
|
|
bool operator==(const unsigned number) const; |
104 |
|
|
bool operator==(const Ranker& ranker) const; |
105 |
|
|
bool operator!=(const unsigned number) const; |
106 |
|
|
bool operator!=(const Ranker& ranker) const; |
107 |
|
|
bool operator<(const unsigned number) const; |
108 |
|
|
bool operator<(const Ranker& ranker) const; |
109 |
|
|
bool operator>(const unsigned number) const; |
110 |
|
|
bool operator>(const Ranker& ranker) const; |
111 |
|
|
bool operator<=(const unsigned number) const |
112 |
|
|
{ |
113 |
|
|
return *this == number || *this < number; |
114 |
|
|
} |
115 |
|
|
bool operator<=(const Ranker& ranker) const |
116 |
|
|
{ |
117 |
|
|
return *this == ranker || *this < ranker; |
118 |
|
|
} |
119 |
|
|
bool operator>=(const unsigned number) const |
120 |
|
|
{ |
121 |
|
|
return *this == number || *this > number; |
122 |
|
|
} |
123 |
|
|
bool operator>=(const Ranker& ranker) const |
124 |
|
|
{ |
125 |
|
|
return *this == ranker || *this > ranker; |
126 |
|
|
} |
127 |
|
|
// friends: |
128 |
|
|
friend bool operator==(const unsigned number, const Ranker& ranker) |
129 |
|
|
{ |
130 |
|
|
return ranker == number; |
131 |
|
|
} |
132 |
|
|
friend bool operator!=(const unsigned number, const Ranker& ranker) |
133 |
|
|
{ |
134 |
|
|
return ranker != number; |
135 |
|
|
} |
136 |
|
|
friend bool operator>(const unsigned number, const Ranker& ranker) |
137 |
|
|
{ |
138 |
|
|
return ranker < number; |
139 |
|
|
} |
140 |
|
|
friend bool operator<(const unsigned number, const Ranker& ranker) |
141 |
|
|
{ |
142 |
|
|
return ranker > number; |
143 |
|
|
} |
144 |
|
|
friend bool operator>=(const unsigned number, const Ranker& ranker) |
145 |
|
|
{ |
146 |
|
|
return ranker <= number; |
147 |
|
|
} |
148 |
|
|
friend bool operator<=(const unsigned number, const Ranker& ranker) |
149 |
|
|
{ |
150 |
|
|
return ranker >= number; |
151 |
|
|
} |
152 |
|
|
}; |
153 |
|
|
|
154 |
|
|
#endif // _Ranker_h_ |