75 |
|
fout.close(); |
76 |
|
} |
77 |
|
|
78 |
< |
void Indexer::index(URL& url, ofstream& fout) |
78 |
> |
void Indexer::index(URL& url, ofstream& fout, const string referer) |
79 |
|
{ |
80 |
|
if (domains.find(url.getAddress() += url.getPort() != 80 ? ":" + |
81 |
|
url.getPort() : "") != domains.end() && pages.find(url.getURL()) == |
89 |
|
|
90 |
|
if (!restricted(url)) |
91 |
|
{ |
92 |
< |
if (http.handle(url, true)) |
92 |
> |
if (http.handle(url, referer, true)) |
93 |
|
{ |
94 |
|
if (http.contentType().find("text/plain") == 0 || |
95 |
|
http.contentType().find("text/html") == 0) |
96 |
|
{ |
97 |
|
http.clear(); |
98 |
< |
if (!http.handle(url)) exit(1); |
98 |
> |
if (!http.handle(url, referer)) exit(1); |
99 |
|
|
100 |
|
cout << "Indexing " << url << "..." << flush; |
101 |
|
|
121 |
|
if (pages.find(*link) == pages.end()) |
122 |
|
{ |
123 |
|
links.push(URL(*link)); |
124 |
+ |
referers.push(url.getURL()); |
125 |
|
} |
126 |
|
} |
127 |
|
} |
135 |
|
if (pages.find(http.redirect()) == pages.end()) |
136 |
|
{ |
137 |
|
links.push(URL(http.redirect())); |
138 |
+ |
referers.push(url.getURL()); |
139 |
|
} |
140 |
|
} |
141 |
|
|
148 |
|
URL next = links.front(); |
149 |
|
links.pop(); |
150 |
|
|
151 |
+ |
string referer = referers.front(); |
152 |
+ |
referers.pop(); |
153 |
+ |
|
154 |
|
if (debug) cerr << "next = " << next << "\n"; |
155 |
|
|
156 |
< |
index(next, fout); |
156 |
> |
index(next, fout, referer); |
157 |
|
} |
158 |
|
} |
159 |
|
|