forked from regosen/gallery_get
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gallery_utils.py
97 lines (83 loc) · 2.85 KB
/
gallery_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os, time
# Python 3 imports that throw in Python 2
try:
from urllib.request import Request, urlopen, URLError, HTTPError
from urllib.parse import urlparse, unquote
except ImportError:
# This is Python 2
from urllib2 import Request, urlopen, URLError, HTTPError
from urlparse import urlparse
from urllib import unquote
# Python 2 types that throw in Python 3
try:
str_input = raw_input
str_type = basestring
except:
# This is Python 3
str_input = input
str_type = str
# some galleries reject requests if they're not coming from a browser- this is to get past that.
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36'
def urlopen_safe(url, allow_redirect=True):
try:
req = Request(url)
req.add_header('User-Agent', USER_AGENT)
response = urlopen(req)
except HTTPError as e:
if allow_redirect and (e.code >= 300) and (e.code < 400):
# redirect wasn't handled by Request, do it ourselves
new_url = e.headers['Location']
return urlopen_safe(new_url, False)
else:
raise e
return response
JS_DRIVER = None
def urlopen_js(url):
global JS_DRIVER
if not JS_DRIVER:
try:
from selenium import webdriver
from chromedriver_py import binary_path
except:
raise Exception("Page requires JavaScript, please run 'pip install selenium chromedriver-py' and try again")
options = webdriver.ChromeOptions()
options.add_argument('headless')
JS_DRIVER = webdriver.Chrome(executable_path=binary_path, options=options)
JS_DRIVER.get(url)
wall_button = JS_DRIVER.find_elements_by_xpath("//div[@class='Wall-Button Button btn-wall--yes']")
if wall_button:
wall_button[0].click()
more_button = JS_DRIVER.find_elements_by_xpath("//button[@class='loadMore']")
if more_button:
more_button[0].click()
return JS_DRIVER.page_source
def safe_makedirs(folder):
if not os.path.exists(folder):
os.makedirs(folder)
# Python 2<>3 compatibility methods
def encode_safe(in_str):
try:
if isinstance(in_str,unicode):
in_str = in_str.encode("utf8")
except:
pass
return in_str
def unicode_safe(str):
try:
return str.decode("utf8")
except:
pass
try:
return str.decode("latin1")
except:
return str
def urlopen_text(url, wait_time = 0):
data = urlopen_safe(url)
# some galleries need time to finish loading a page
time.sleep(wait_time)
return unicode_safe(data.read())
def is_str(obj):
# isinstance doesn't always work here
return obj.__class__.__name__ in [str_type.__name__, 'str', 'unicode']
def is_iterable(obj):
return hasattr(obj, '__iter__') and not is_str(obj)