11 |
|
|
12 |
|
# XXX: hack to make Wells Fargo http-equiv redirects actually work |
13 |
|
mechanize._http.AbstractHeadParser.head_elems = tuple(list(mechanize._http.AbstractHeadParser.head_elems) + ['body']) |
14 |
+ |
mechanize._http.HTTPRedirectHandler.max_redirections = 20 |
15 |
|
|
16 |
|
class DuckSoup(MinimalSoup): |
17 |
|
MARKUP_MASSAGE = copy.copy(MinimalSoup.MARKUP_MASSAGE) |
18 |
|
|
19 |
|
MARKUP_MASSAGE.append((re.compile(r'''</?[a-z]+['"]\+['"][a-z]+'''), lambda match: '')) |
20 |
|
|
21 |
+ |
class Factory(mechanize.DefaultFactory): |
22 |
+ |
def __init__(self, form_parser): |
23 |
+ |
mechanize.DefaultFactory.__init__(self) |
24 |
+ |
|
25 |
+ |
self._forms_factory = mechanize.FormsFactory(form_parser_class = form_parser) |
26 |
+ |
|
27 |
|
class Website(object): |
28 |
|
Soup = DuckSoup |
29 |
|
|
30 |
< |
def __init__(self, debug): |
31 |
< |
self.browser = mechanize.Browser() |
32 |
< |
self.browser.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.25 Safari/525.19')] |
30 |
> |
def __init__(self, debug, *args, **kwargs): |
31 |
> |
self.browser = mechanize.Browser(*args, **kwargs) |
32 |
> |
self.browser.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.31 Safari/530.5')] |
33 |
|
|
34 |
|
if debug: |
35 |
|
self.browser.set_debug_http(True) |
38 |
|
self.browser.set_handle_redirect(True) |
39 |
|
self.browser.set_handle_refresh(True) |
40 |
|
self.browser.set_handle_robots(False) |
41 |
+ |
|
42 |
+ |
self._count = 0 |
43 |
+ |
|
44 |
+ |
def _back(self): |
45 |
+ |
if self._count: |
46 |
+ |
result = self.browser.back(self._count) |
47 |
+ |
|
48 |
+ |
self._count = 0 |
49 |
+ |
|
50 |
+ |
return result |
51 |
+ |
|
52 |
+ |
def _follow_link(self, *args, **kwargs): |
53 |
+ |
result = self.browser.follow_link(*args, **kwargs) |
54 |
+ |
|
55 |
+ |
self._count += 1 |
56 |
+ |
|
57 |
+ |
return result |
58 |
+ |
|
59 |
+ |
def _submit(self, *args, **kwargs): |
60 |
+ |
result = self.browser.submit(*args, **kwargs) |
61 |
+ |
|
62 |
+ |
self._count += 1 |
63 |
+ |
|
64 |
+ |
return result |