146 |
|
void Ranker::setSample() |
147 |
|
{ |
148 |
|
map<unsigned, unsigned>::iterator itor; |
149 |
– |
|
149 |
|
multimap<unsigned, map<unsigned, unsigned>::iterator> distances; |
150 |
|
|
151 |
|
for (itor = occurrencesText.begin(); itor != occurrencesText.end(); itor++) |
155 |
|
if (++itor != occurrencesText.end()) |
156 |
|
{ |
157 |
|
unsigned next = itor->first; |
159 |
– |
itor--; |
158 |
|
|
159 |
+ |
itor--; |
160 |
|
distance = next - (itor->first + itor->second); |
161 |
|
} |
162 |
|
else |
176 |
|
|
177 |
|
string portion; |
178 |
|
unsigned sampleLength = 0, begin = 0, end = string::npos; |
179 |
+ |
|
180 |
|
while (sampleLength < sampleMax && itor != occurrencesText.end()) |
181 |
|
{ |
182 |
< |
unsigned found = itor->first; |
183 |
< |
unsigned length = itor->second; |
182 |
> |
unsigned found = itor->first, length = itor->second; |
183 |
|
|
184 |
|
for (unsigned index = found; index > begin; index--) |
185 |
|
{ |
187 |
– |
if (index == begin) cerr << "Oh crap, I'm insane!\n"; |
186 |
|
if (found - index >= sampleMax - sampleLength - length) |
187 |
|
{ |
188 |
|
for (; index < found; index++) |
189 |
|
{ |
190 |
|
if (isspace(getText()[index])) break; |
191 |
|
} |
192 |
+ |
|
193 |
|
begin = index + 1; |
194 |
+ |
|
195 |
|
break; |
196 |
|
} |
197 |
|
else if ((index > begin ? (isupper(getText()[index]) && |
199 |
|
index != found) |
200 |
|
{ |
201 |
|
begin = index; |
202 |
+ |
|
203 |
|
break; |
204 |
|
} |
205 |
|
} |
215 |
|
entities(portion, '>', ">"); |
216 |
|
|
217 |
|
sample += portion + "<strong>"; |
217 |
– |
|
218 |
|
portion = getText().substr(found, length); |
219 |
|
sampleLength += portion.length(); |
220 |
|
|
224 |
|
entities(portion, '>', ">"); |
225 |
|
|
226 |
|
sample += portion + "</strong>"; |
227 |
– |
|
227 |
|
begin = found + length; |
228 |
|
end = begin - 1; |
229 |
|
|
240 |
|
entities(portion, '>', ">"); |
241 |
|
|
242 |
|
sample += portion; |
244 |
– |
|
243 |
|
begin = itor->first; |
244 |
|
end = begin - 1; |
245 |
|
} |
292 |
|
break; |
293 |
|
} |
294 |
|
} |
295 |
+ |
|
296 |
+ |
if (sample == "") |
297 |
+ |
{ |
298 |
+ |
for (end = sampleMax; end > 0 && (end + 1 < getText().length()); end--) |
299 |
+ |
{ |
300 |
+ |
if (isspace(getText()[end])) break; |
301 |
+ |
} |
302 |
+ |
|
303 |
+ |
sample = getText().substr(0, end + 1); |
304 |
+ |
|
305 |
+ |
entities(sample, '&', "&"); |
306 |
+ |
entities(sample, '\"', """); |
307 |
+ |
entities(sample, '<', "<"); |
308 |
+ |
entities(sample, '>', ">"); |
309 |
+ |
|
310 |
+ |
if (end + 1 < getText().length()) |
311 |
+ |
{ |
312 |
+ |
sample += " <strong>...</strong>"; |
313 |
+ |
} |
314 |
+ |
} |
315 |
|
} |
316 |
|
|
317 |
|
string Ranker::getTitle() |