-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGetManga.py
152 lines (137 loc) · 5.29 KB
/
GetManga.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# __author__ = "alan.francis" #
import requests
from bs4 import BeautifulSoup
import os
import sys
from PIL import Image
from multiprocessing import Pool
def is_folder_valid(path_name):
is_valid = True
file_list = []
excluded_extn = ['.txt']
for root, dirs, files in os.walk(path_name):
for eachfile in files:
fname, ext = os.path.splitext(eachfile)
if not str(eachfile).startswith('.') and ext not in excluded_extn:
file_list.append(os.path.join(root, eachfile))
for eachfile in file_list:
try:
fimg = Image.open(eachfile, 'r')
fimg.load()
except (IOError, OSError) as exc:
print(eachfile, 'not downloaded properly')
is_valid = False
except Exception as e2:
print(eachfile, 'error')
is_valid = False
return is_valid
def download_image_from_page(page):
pagecontent = requests.get(page)
pagesoup = BeautifulSoup(pagecontent.text, 'html.parser')
imagesection = pagesoup.find(id="image")
img_url = imagesection['src']
# print(img_url)
filename = directory + pathsep + 'dummy.png'
if '?' in img_url:
filename = directory + pathsep + ((img_url.split('d/'))[1].split('?')[0])
else:
filename = directory + pathsep + (img_url.rsplit('/', 1))[1]
dont_download = False
if os.path.exists(filename):
print(filename, 'already downloaded')
try:
img = Image.open(filename, 'r')
img.load()
except (IOError, OSError) as e:
print('but file is corrupt')
else:
dont_download = True
if dont_download is False:
try:
img = requests.get(img_url)
print('saving to', filename)
if os.path.exists(filename):
os.remove(filename)
f = open(filename, 'wb')
f.write(img.content)
f.close()
except Exception as ie:
os.system('say "Manga Download Error"')
# eg root_url: 'http://www.mangahere.co/manga/akame_ga_kiru_zero/c001/'
root_url = 'http://www.mangahere.co/manga/shokugeki_no_soma/c173.1/'
thread_count = 5
if len(sys.argv) > 1:
root_url = sys.argv[1]
if len(sys.argv) > 2:
thread_count = int(sys.argv[2])
# else:
# print("Incorrect no of parameters passed. format: python3 GetManga.py <chapter_url> <thread_count>")
# print("e.g.: python3 GetManga.py http://www.mangahere.co/manga/akame_ga_kiru_zero/c001/ 5")
# exit(0)
manga_name = ((root_url.split('/manga/'))[1].split('/')[0])
print('Manga Name:',manga_name)
firstPage = requests.get(root_url)
manga_num = ((firstPage.text.split('/get_chapters'))[1].split('.js?')[0])
print('Manga #', manga_num)
location = '/Volumes/Personal/Media/Manga/'+manga_name
pathsep = os.path.sep
progressFile = location+pathsep+manga_name+'_progress.txt'
chapters = requests.get('http://www.mangahere.co/get_chapters'+manga_num+'.js')
start = 'var chapter_list = new Array('
end = ');'
chapterListText = ((chapters.text.split(start))[1].split(end)[0])
chapterList = chapterListText.split(sep='\n')
finished_chapters = ['Example Chapter']
if not os.path.exists(location):
os.makedirs(location)
if not os.path.exists(progressFile):
print('Fresh start')
else:
with open(progressFile, encoding='UTF-8') as a_file:
for line in a_file:
finished_chapters.append(line.strip())
for chapter in chapterList:
if len(chapter) > 0 and ',' in chapter:
values = chapter.split('","')
name = values[0].strip()[2:]
name = name.replace(':', '-')
name = name.replace('"', '')
name = name.replace('- Fixed', '')
name = name.replace('&', '&')
name = name.replace(''', "'")
directory = location+pathsep+name
images_valid = False
already_present = False
if name in finished_chapters:
already_present = True
print('Chapter:', name, 'already downloaded')
images_valid = is_folder_valid(directory)
if not images_valid:
print('but some images are corrupt, downloading again...')
if not images_valid or not already_present:
print('Downloading chapter:', name)
if not os.path.exists(directory):
os.makedirs(directory)
url = values[1].replace('"+series_name+"', manga_name)
url = url.strip()[: -3]
# print('Name:', name)
# print('URL:', url)
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
select_boxes = soup.find_all('select')
box = ''
for select_box in select_boxes:
if select_box['onchange'] == 'change_page(this)':
box = select_box
pages = []
for option in box.contents:
if '\n' != str(option):
pages.append(str(option['value']).strip())
pool = Pool(thread_count)
pool.map(download_image_from_page, pages)
pool.terminate()
finished_chapters.append(name)
with open(progressFile, mode='a', encoding='UTF-8') as b_file:
b_file.write(name+'\n')
print(manga_name, 'downloaded successfully')
os.system('say "Manga Download complete"')