-
Notifications
You must be signed in to change notification settings - Fork 3
/
fix_werkzeug_robobrowser.py
372 lines (370 loc) · 12.6 KB
/
fix_werkzeug_robobrowser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
import site
path = site.getsitepackages()[1]+"/robobrowser/browser.py"
search_string = 'from werkzeug'
text_to_append = ".utils"
file_content = open(path, 'r+')
lines = []
found = False
for line in file_content:
if search_string in line:
found = True
edited_line = ""
split_string = line.split("werkzeug")
for i in range(len(split_string)):
edited_line += split_string[i]
if i == 0:
edited_line += "werkzeug.utils"
line = edited_line
lines.append(line)
file_content.close()
if (found):
file_content = open(path, 'w')
file_content.writelines(lines)
file_content.close()
#
# """
# Robotic browser.
# """
#
# import re
# import requests
# from bs4 import BeautifulSoup
# from werkzeug import cached_property
# from requests.packages.urllib3.util.retry import Retry
#
# from robobrowser import helpers
# from robobrowser import exceptions
# from robobrowser.compat import urlparse
# from robobrowser.forms.form import Form
# from robobrowser.cache import RoboHTTPAdapter
#
# _link_ptn = re.compile(r'^(a|button)$', re.I)
# _form_ptn = re.compile(r'^form$', re.I)
#
#
# class RoboState(object):
# """Representation of a browser state. Wraps the browser and response, and
# lazily parses the response content.
#
# """
#
# def __init__(self, browser, response):
# self.browser = browser
# self.response = response
# self.url = response.url
#
# @cached_property
# def parsed(self):
# """Lazily parse response content, using HTML parser specified by the
# browser.
# """
# return BeautifulSoup(
# self.response.content,
# features=self.browser.parser,
# )
#
#
# class RoboBrowser(object):
# """Robotic web browser. Represents HTTP requests and responses using the
# requests library and parsed HTML using BeautifulSoup.
#
# :param str parser: HTML parser; used by BeautifulSoup
# :param str user_agent: Default user-agent
# :param history: History length; infinite if True, 1 if falsy, else
# takes integer value
#
# :param int timeout: Default timeout, in seconds
# :param bool allow_redirects: Allow redirects on POST/PUT/DELETE
#
# :param bool cache: Cache responses
# :param list cache_patterns: List of URL patterns for cache
# :param timedelta max_age: Max age for cache
# :param int max_count: Max count for cache
#
# :param int tries: Number of retries
# :param Exception errors: Exception or tuple of exceptions to catch
# :param int delay: Delay between retries
# :param int multiplier: Delay multiplier between retries
#
# """
#
# def __init__(self, session=None, parser=None, user_agent=None,
# history=True, timeout=None, allow_redirects=True, cache=False,
# cache_patterns=None, max_age=None, max_count=None, tries=None,
# multiplier=None):
#
# self.session = session or requests.Session()
#
# # Add default user agent string
# if user_agent is not None:
# self.session.headers['User-Agent'] = user_agent
#
# self.parser = parser
#
# self.timeout = timeout
# self.allow_redirects = allow_redirects
#
# # Set up caching
# if cache:
# adapter = RoboHTTPAdapter(max_age=max_age, max_count=max_count)
# cache_patterns = cache_patterns or ['http://', 'https://']
# for pattern in cache_patterns:
# self.session.mount(pattern, adapter)
# elif max_age:
# raise ValueError('Parameter `max_age` is provided, '
# 'but caching is turned off')
# elif max_count:
# raise ValueError('Parameter `max_count` is provided, '
# 'but caching is turned off')
#
# # Configure history
# self.history = history
# if history is True:
# self._maxlen = None
# elif not history:
# self._maxlen = 1
# else:
# self._maxlen = history
# self._states = []
# self._cursor = -1
#
# # Set up retries
# if tries:
# retry = Retry(tries, backoff_factor=multiplier)
# for protocol in ['http://', 'https://']:
# self.session.adapters[protocol].max_retries = retry
#
# def __repr__(self):
# try:
# return '<RoboBrowser url={0}>'.format(self.url)
# except exceptions.RoboError:
# return '<RoboBrowser>'
#
# @property
# def state(self):
# if self._cursor == -1:
# raise exceptions.RoboError('No state')
# try:
# return self._states[self._cursor]
# except IndexError:
# raise exceptions.RoboError('Index out of range')
#
# @property
# def response(self):
# return self.state.response
#
# @property
# def url(self):
# return self.state.url
#
# @property
# def parsed(self):
# return self.state.parsed
#
# @property
# def find(self):
# """See ``BeautifulSoup::find``."""
# try:
# return self.parsed.find
# except AttributeError:
# raise exceptions.RoboError
#
# @property
# def find_all(self):
# """See ``BeautifulSoup::find_all``."""
# try:
# return self.parsed.find_all
# except AttributeError:
# raise exceptions.RoboError
#
# @property
# def select(self):
# """See ``BeautifulSoup::select``."""
# try:
# return self.parsed.select
# except AttributeError:
# raise exceptions.RoboError
#
# def _build_url(self, url):
# """Build absolute URL.
#
# :param url: Full or partial URL
# :return: Full URL
#
# """
# return urlparse.urljoin(
# self.url,
# url
# )
#
# @property
# def _default_send_args(self):
# """
#
# """
# return {
# 'timeout': self.timeout,
# 'allow_redirects': self.allow_redirects,
# }
#
# def _build_send_args(self, **kwargs):
# """Merge optional arguments with defaults.
#
# :param kwargs: Keyword arguments to `Session::send`
#
# """
# out = {}
# out.update(self._default_send_args)
# out.update(kwargs)
# return out
#
# def open(self, url, method='get', **kwargs):
# """Open a URL.
#
# :param str url: URL to open
# :param str method: Optional method; defaults to `'get'`
# :param kwargs: Keyword arguments to `Session::request`
#
# """
# response = self.session.request(method, url, **self._build_send_args(**kwargs))
# self._update_state(response)
#
# def _update_state(self, response):
# """Update the state of the browser. Create a new state object, and
# append to or overwrite the browser's state history.
#
# :param requests.MockResponse: New response object
#
# """
# # Clear trailing states
# self._states = self._states[:self._cursor + 1]
#
# # Append new state
# state = RoboState(self, response)
# self._states.append(state)
# self._cursor += 1
#
# # Clear leading states
# if self._maxlen:
# decrement = len(self._states) - self._maxlen
# if decrement > 0:
# self._states = self._states[decrement:]
# self._cursor -= decrement
#
# def _traverse(self, n=1):
# """Traverse state history. Used by `back` and `forward` methods.
#
# :param int n: Cursor increment. Positive values move forward in the
# browser history; negative values move backward.
#
# """
# if not self.history:
# raise exceptions.RoboError('Not tracking history')
# cursor = self._cursor + n
# if cursor >= len(self._states) or cursor < 0:
# raise exceptions.RoboError('Index out of range')
# self._cursor = cursor
#
# def back(self, n=1):
# """Go back in browser history.
#
# :param int n: Number of pages to go back
#
# """
# self._traverse(-1 * n)
#
# def forward(self, n=1):
# """Go forward in browser history.
#
# :param int n: Number of pages to go forward
#
# """
# self._traverse(n)
#
# def get_link(self, text=None, *args, **kwargs):
# """Find an anchor or button by containing text, as well as standard
# BeautifulSoup arguments.
#
# :param text: String or regex to be matched in link text
# :return: BeautifulSoup tag if found, else None
#
# """
# return helpers.find(
# self.parsed, _link_ptn, text=text, *args, **kwargs
# )
#
# def get_links(self, text=None, *args, **kwargs):
# """Find anchors or buttons by containing text, as well as standard
# BeautifulSoup arguments.
#
# :param text: String or regex to be matched in link text
# :return: List of BeautifulSoup tags
#
# """
# return helpers.find_all(
# self.parsed, _link_ptn, text=text, *args, **kwargs
# )
#
# def get_form(self, id=None, *args, **kwargs):
# """Find form by ID, as well as standard BeautifulSoup arguments.
#
# :param str id: Form ID
# :return: BeautifulSoup tag if found, else None
#
# """
# if id:
# kwargs['id'] = id
# form = self.find(_form_ptn, *args, **kwargs)
# if form is not None:
# return Form(form)
#
# def get_forms(self, *args, **kwargs):
# """Find forms by standard BeautifulSoup arguments.
# :args: Positional arguments to `BeautifulSoup::find_all`
# :args: Keyword arguments to `BeautifulSoup::find_all`
#
# :return: List of BeautifulSoup tags
#
# """
# forms = self.find_all(_form_ptn, *args, **kwargs)
# return [
# Form(form)
# for form in forms
# ]
#
# def follow_link(self, link, **kwargs):
# """Click a link.
#
# :param Tag link: Link to click
# :param kwargs: Keyword arguments to `Session::send`
#
# """
# try:
# href = link['href']
# except KeyError:
# raise exceptions.RoboError('Link element must have "href" '
# 'attribute')
# self.open(self._build_url(href), **kwargs)
#
# def submit_form(self, form, submit=None, **kwargs):
# """Submit a form.
#
# :param Form form: Filled-out form object
# :param Submit submit: Optional `Submit` to click, if form includes
# multiple submits
# :param kwargs: Keyword arguments to `Session::send`
#
# """
# # Get HTTP verb
# method = form.method.upper()
#
# # Send request
# url = self._build_url(form.action) or self.url
# payload = form.serialize(submit=submit)
# serialized = payload.to_requests(method)
# send_args = self._build_send_args(**kwargs)
# send_args.update(serialized)
# response = self.session.request(method, url, **send_args)
#
# # Update history
# self._update_state(response)