1 |
|
/* ============================================================================ |
2 |
|
* Douglas Thrift's Search Engine License |
3 |
|
* |
4 |
< |
* Copyright (C) 2002, Douglas Thrift. All Rights Reserved. |
4 |
> |
* Copyright (C) 2002-2004, 2008, Douglas Thrift. All Rights Reserved. |
5 |
|
* Redistribution and use in source and binary forms, with or without |
6 |
|
* modification, are permitted provided that the following conditions are met: |
7 |
|
* |
46 |
|
// |
47 |
|
// Douglas Thrift |
48 |
|
// |
49 |
< |
// Indexer.h |
49 |
> |
// $Id$ |
50 |
|
|
51 |
< |
#ifndef _Indexer_h_ |
52 |
< |
#define _Indexer_h_ |
51 |
> |
#ifndef _Indexer_hpp_ |
52 |
> |
#define _Indexer_hpp_ |
53 |
|
|
54 |
< |
#include "Search.h" |
55 |
< |
#include "URL.h" |
56 |
< |
#include "Page.h" |
57 |
< |
#include "HttpHandler.h" |
58 |
< |
#include "Processor.h" |
54 |
> |
#include "Search.hpp" |
55 |
> |
#include "URL.hpp" |
56 |
> |
#include "Page.hpp" |
57 |
> |
#include "HttpHandler.hpp" |
58 |
> |
#include "Processor.hpp" |
59 |
|
|
60 |
|
typedef set<string> Set; |
61 |
|
typedef set<string>::iterator SetIterator; |
63 |
|
class Indexer |
64 |
|
{ |
65 |
|
private: |
66 |
< |
enum robot { none, version, name, all }; |
66 |
> |
enum Robot { none, version, name, all }; |
67 |
|
HttpHandler http; |
68 |
|
Processor processor; |
69 |
|
Set pages; |
70 |
|
queue<URL> links; |
71 |
+ |
queue<string> referers; |
72 |
|
string indexFile; |
73 |
< |
Set domains; |
74 |
< |
Set restrictions; |
74 |
< |
Set checked; |
75 |
< |
void index(URL& url, ofstream& fout); |
73 |
> |
Set domains, restrictions, checked; |
74 |
> |
void index(URL& url, ofstream& fout, const string& referer = ""); |
75 |
|
bool restricted(URL& url); |
76 |
|
void robots(URL& url); |
77 |
|
public: |
78 |
|
Indexer(string& indexFile, set<string>& domains, |
79 |
< |
set<string>& restrictions); |
79 |
> |
set<string>& restrictions) : indexFile(indexFile), domains(domains), |
80 |
> |
restrictions(restrictions) {} |
81 |
|
~Indexer() {} |
82 |
|
void index(string& begin); |
83 |
|
}; |
84 |
|
|
85 |
< |
#endif // _Indexer_h_ |
85 |
> |
#endif // _Indexer_hpp_ |