-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbdsmdownlr.py
116 lines (92 loc) · 3.29 KB
/
bdsmdownlr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from argparse import ArgumentParser
from bs4 import BeautifulSoup
import requests
import os
def get_input():
parser = ArgumentParser()
parser.add_argument("-b", "--blog",
type=str,
required=True,
dest="blog",
help="The bdsmlr blog to download"
)
parser.add_argument("-u", "--email",
type=str,
required=True,
dest="email",
help="your bdsmlr email"
)
parser.add_argument("-p", "--password",
type=str,
required=True,
dest="password",
help="your bdsmlr password"
)
return parser.parse_args()
def download_blog(blog, email, password):
reached_the_end = False
page = 1
with requests.Session() as session:
login(session, email, password)
while not reached_the_end:
reached_the_end = not download_page(session, blog, page)
if page % 5 == 0:
print(f' downloaded {page} pages')
page += 1
print(f'\nFinished downloading: {blog}\n')
def download_page(session, blog, page):
url = f'https://bdsmlr.com/blog/{blog}?latest=&page={page}'
r = session.get(url)
soup = BeautifulSoup(r.text, features="html.parser")
posts = soup.find_all('div', {'class': 'post_content'})
if not posts:
print(f'Page {page} does not contain any images, probably reached the end.')
return False
for post in posts:
download_post(blog, post)
print('\t', end='')
return True
def download_post(blog, post):
try:
post_id = post.find('i')['data-postid']
image_urls = list(map(lambda tag: tag['href'], post.find_all('div', {'class': 'magnify'})))
for index, imageUrl in enumerate(image_urls):
filename = ''
if len(image_urls) == 1:
filename = os.path.join(blog, get_filename(post_id, imageUrl))
else:
filename = os.path.join(blog, get_filename(f'{post_id}-{index + 1}', imageUrl))
if not os.path.exists(blog):
os.makedirs(blog)
download_image(imageUrl, filename)
except Exception as e:
#print(e)
return
print('|', end='')
def get_filename(postId, url):
suffix = url.rsplit('.', 1)[1]
return f'{postId}.{suffix}'
def download_image(url, filename):
if not os.path.exists(filename):
r_image = requests.get(url)
with open(filename, 'wb') as file:
file.write(r_image.content)
def login(session, email, password):
r = session.get('https://bdsmlr.com/login')
soup = BeautifulSoup(r.text, features="html.parser")
token = soup.find('input', {'name': '_token'})['value']
body = {
'_token': token,
'email': email,
'password': password
}
p = session.post(
'https://bdsmlr.com/login',
data=body
)
print(f'Logged in as {email}\n')
if __name__ == '__main__':
arguments = get_input()
blog = arguments.blog
print(f'\nDownloading {blog}\n')
download_blog(blog, arguments.email, arguments.password)