From 5f19e7b801e38eda1aa2a2bfde9927c7f221653d Mon Sep 17 00:00:00 2001 From: "Dhruv Kanojia (Xonshiz)" Date: Sat, 16 Apr 2022 23:44:59 +0530 Subject: [PATCH] Fix for #236 & New website support - Closes #236 - Added support for TMOFans/lectortmo.com [Fix for #231 ] - Removed some unnecessary comments. - Fixed an extra method parameter in `japscan`. --- Changelog.md | 4 +- Supported_Sites.md | 3 +- comic_dl/__version__.py | 2 +- comic_dl/honcho.py | 14 +++- comic_dl/sites/comicNaver.py | 13 ++- comic_dl/sites/foolSlide.py | 4 +- comic_dl/sites/japscan.py | 3 +- comic_dl/sites/lectortmo.py | 157 +++++++++++++++++++++++++++++++++++ comic_dl/sites/webtoons.py | 1 + 9 files changed, 187 insertions(+), 14 deletions(-) create mode 100644 comic_dl/sites/lectortmo.py diff --git a/Changelog.md b/Changelog.md index 642277b..5022998 100644 --- a/Changelog.md +++ b/Changelog.md @@ -126,4 +126,6 @@ - Checking for existing CBZ/PDF files before downloading them again [Fix for #247] [2021.09.05] - Fix for chapter download at readmanganato - Added support for webtoons.com (No audio download yet) [Fix for #284] [2021.09.05.1] -- Fix for #299 [2022.04.16] \ No newline at end of file +- Fix for #299 [2022.04.16] +- Added support for TMOFans/lectortmo.com [Fix for #231] [2022.04.17] +- Fix for #236 [2022.04.17] \ No newline at end of file diff --git a/Supported_Sites.md b/Supported_Sites.md index 4e3dcb1..05622f4 100644 --- a/Supported_Sites.md +++ b/Supported_Sites.md @@ -24,4 +24,5 @@ * [comicextra.com](http://www.comicextra.com) * [readcomics.io](https://www.readcomics.io/) * [otakusmash.com](http://www.otakusmash.com/) -* [webtoons.com](https://www.webtoons.com/) \ No newline at end of file +* [webtoons.com](https://www.webtoons.com/) +* [TMOFans/lectortmo.com](https://lectortmo.com/) \ No newline at end of file diff --git a/comic_dl/__version__.py b/comic_dl/__version__.py index 08e2e57..0ffa832 100644 --- a/comic_dl/__version__.py +++ b/comic_dl/__version__.py @@ -1,4 +1,4 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -__version__ = "2022.04.16" +__version__ = "2022.04.17" diff --git a/comic_dl/honcho.py b/comic_dl/honcho.py index 955527c..c309f1c 100644 --- a/comic_dl/honcho.py +++ b/comic_dl/honcho.py @@ -26,6 +26,7 @@ from .sites import japscan from .sites import manganelo from .sites import webtoons +from .sites import lectortmo class Honcho(object): @@ -93,7 +94,8 @@ def checker(self, comic_url, download_directory, chapter_range, **kwargs): chapter_range=chapter_range, conversion=kwargs.get("conversion"), keep_files=kwargs.get("keep_files")) return 0 - elif domain in ["www.readcomiconline.li", "readcomiconline.li", "www.readcomicsonline.ru", "readcomicsonline.ru"]: + elif domain in ["www.readcomiconline.li", "readcomiconline.li", "www.readcomicsonline.ru", + "readcomicsonline.ru"]: readcomicOnlineli.ReadComicOnlineLi(manga_url=comic_url, logger=logging, current_directory=current_directory, sorting_order=sorting, log_flag=log_flag, download_directory=download_directory, @@ -237,9 +239,15 @@ def checker(self, comic_url, download_directory, chapter_range, **kwargs): return 0 elif domain in ["www.webtoons.com", "webtoons.com"]: webtoons.Webtoons(manga_url=comic_url, logger=logging, current_directory=current_directory, + sorting_order=sorting, log_flag=log_flag, download_directory=download_directory, + chapter_range=chapter_range, conversion=kwargs.get("conversion"), + keep_files=kwargs.get("keep_files"), image_quality=kwargs.get("image_quality")) + return 0 + elif domain in ["www.lectortmo.com", "lectortmo.com"]: + lectortmo.LectorTmo(manga_url=comic_url, logger=logging, current_directory=current_directory, sorting_order=sorting, log_flag=log_flag, download_directory=download_directory, chapter_range=chapter_range, conversion=kwargs.get("conversion"), - keep_files=kwargs.get("keep_files"), image_quality=kwargs.get("image_quality")) - return 0 + keep_files=kwargs.get("keep_files"), + print_index=print_index) else: print("%s is not supported at the moment. You can request it on the Github repository." % domain) diff --git a/comic_dl/sites/comicNaver.py b/comic_dl/sites/comicNaver.py index 702d82a..b2b7120 100644 --- a/comic_dl/sites/comicNaver.py +++ b/comic_dl/sites/comicNaver.py @@ -17,11 +17,11 @@ def __init__(self, manga_url, download_directory, chapter_range, **kwargs): self.sorting = kwargs.get("sorting_order") self.comic_name = self.name_cleaner(manga_url) self.print_index = kwargs.get("print_index") - if "list.nhn" in manga_url: + if "/list?" in manga_url or "list.nhn" in manga_url: self.full_series(manga_url, self.comic_name, self.sorting, download_directory, chapter_range=chapter_range, conversion=conversion, keep_files=keep_files) - elif "detail.nhn" in manga_url: + elif "/detail?" in manga_url or "detail.nhn" in manga_url: self.single_chapter(manga_url, self.comic_name, download_directory, conversion=conversion, keep_files=keep_files) @@ -38,6 +38,10 @@ def single_chapter(self, comic_url, comic_name, download_directory, conversion, # https://image-comic.pstatic.net/webtoon/183559/399/20180810173548_ffbf217190f59dc04bd6fc538e11d64b_IMAG01_1.jpg img_regex = r'https?://(?:imgcomic\.naver\.net|image-comic\.pstatic\.net)/webtoon/\d+/\d+/.+?\.(?:jpg|png|gif|bmp|JPG|PNG|GIF|BMP)' image_list = list(re.findall(img_regex, str(source))) + if len(image_list) == 0: + all_image_tags = source.find_all("img", {"alt": "comic content"}) + for img_tag in all_image_tags: + image_list.append(img_tag['src']) logging.debug("Image List : %s" % image_list) file_directory = globalFunctions.GlobalFunctions().create_file_directory(chapter_number, comic_name) @@ -68,15 +72,16 @@ def single_chapter(self, comic_url, comic_name, download_directory, conversion, return 0 def full_series(self, comic_url, comic_name, sorting, download_directory, chapter_range, conversion, keep_files): + comic_type = re.findall(r'comic.naver.com/(.*?)/', str(comic_url))[0] source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url) # print(source) - latest_chapter = re.findall(r"no=(\d+)\&", str(source))[1] + latest_chapter = re.findall(r"no=(\d+)[\&|\"]", str(source))[1] all_links = [] for x in range(1, int(latest_chapter) + 1): - chapter_url = "http://comic.naver.com/webtoon/detail.nhn?titleId=%s&no=%s" % (comic_name, x) + chapter_url = "http://comic.naver.com/{0}/detail.nhn?titleId={1}&no={2}".format(comic_type, comic_name, x) all_links.append(chapter_url) logging.debug("All Links : %s" % all_links) diff --git a/comic_dl/sites/foolSlide.py b/comic_dl/sites/foolSlide.py index 9b5852a..327959a 100644 --- a/comic_dl/sites/foolSlide.py +++ b/comic_dl/sites/foolSlide.py @@ -42,8 +42,8 @@ def single_chapter(self, chapter_url, comic_name, download_directory, conversion if not os.path.exists(directory_path): os.makedirs(directory_path) - print("Img Links : {0}".format(img_links)) - print("LEN Img Links : {0}".format(str(len(img_links)))) + # print("Img Links : {0}".format(img_links)) + # print("LEN Img Links : {0}".format(str(len(img_links)))) links = [] file_names = [] diff --git a/comic_dl/sites/japscan.py b/comic_dl/sites/japscan.py index 3be24b9..b90e765 100644 --- a/comic_dl/sites/japscan.py +++ b/comic_dl/sites/japscan.py @@ -31,8 +31,7 @@ def __init__(self, manga_url, download_directory, chapter_range, **kwargs): if 'lecture-en-ligne' in manga_url: self.comic_id = str(str(manga_url).split("/")[-2]) chapter_path = re.sub(re.compile(r'.*japscan.to'), '', str(self.manga_url)) - self.single_chapter(chapter_path, comic_id=self.comic_id, download_directory=download_directory, - scraper=scraper) + self.single_chapter(chapter_path, comic_id=self.comic_id, download_directory=download_directory) def full_series(self, comic_id, sorting, download_directory, chapter_range, conversion, keep_files): scraper = self.scraper diff --git a/comic_dl/sites/lectortmo.py b/comic_dl/sites/lectortmo.py new file mode 100644 index 0000000..e0b5f3e --- /dev/null +++ b/comic_dl/sites/lectortmo.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import random +import sys + +from bs4 import BeautifulSoup + +from comic_dl import globalFunctions +import re +import os +import logging +import json +import time + + +class LectorTmo(object): + def __init__(self, manga_url, download_directory, chapter_range, **kwargs): + + current_directory = kwargs.get("current_directory") + conversion = kwargs.get("conversion") + keep_files = kwargs.get("keep_files") + self.logging = kwargs.get("log_flag") + self.sorting = kwargs.get("sorting_order") + self.comic_name = None + self.print_index = kwargs.get("print_index") + if "/library/" in manga_url: + self.full_series(manga_url, self.comic_name, self.sorting, download_directory, chapter_range=chapter_range, + conversion=conversion, keep_files=keep_files) + # https://lectortmo.com/view_uploads/979773 + elif "/viewer/" in manga_url or "/paginated/" in manga_url or "/view_uploads/" in manga_url: + self.single_chapter(manga_url, self.comic_name, download_directory, conversion=conversion, + keep_files=keep_files) + + def single_chapter(self, comic_url, comic_name, download_directory, conversion, keep_files): + comic_url = str(comic_url) + # https://lectortmo.com/viewer/004b1c38ce59f14291118de9f59bed7e/paginated/1 + # https://lectortmo.com/view_uploads/979773 + chapter_number = comic_url.split('/')[-1] if "/view_uploads/" in comic_url else comic_url.split('/')[-3] + + source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url) + ld_json_content = source.find_all("script", {"type": "application/ld+json"}) + if len(ld_json_content) > 0: + cleaned_json_string = ld_json_content[0].next.strip().replace('\n', '') + loaded_json = json.loads(cleaned_json_string) + if loaded_json: + self.comic_name = comic_name = loaded_json['headline'] + links = [] + file_names = [] + img_url = self.extract_image_link_from_html(source=source) + links.append(img_url) + img_extension = str(img_url).rsplit('.', 1)[-1] + unique_id = str(img_url).split('/')[-2] + file_names.append('{0}.{1}'.format(1, img_extension)) + + total_page_list = source.find("select", {"id": "viewer-pages-select"}) + last_page_number = 0 + options = total_page_list.findAll('option') + if len(options) > 0: + last_page_number = int(options[-1]['value']) + if last_page_number <= 0: + print("Couldn't find all the pages. Exiting.") + sys.exit(1) + for page_number in range(2, last_page_number): + current_url = "https://lectortmo.com/viewer/{0}/paginated/{1}".format(unique_id, page_number) + print("Grabbing details for: {0}".format(current_url)) + source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=current_url, cookies=cookies) + image_url = self.extract_image_link_from_html(source=source) + links.append(image_url) + img_extension = str(image_url).rsplit('.', 1)[-1] + file_names.append('{0}.{1}'.format(page_number, img_extension)) + time.sleep(random.randint(1, 6)) + file_directory = globalFunctions.GlobalFunctions().create_file_directory(chapter_number, self.comic_name) + + directory_path = os.path.realpath(str(download_directory) + "/" + str(file_directory)) + + if not os.path.exists(directory_path): + os.makedirs(directory_path) + globalFunctions.GlobalFunctions().multithread_download(chapter_number, self.comic_name, comic_url, directory_path, + file_names, links, self.logging) + + globalFunctions.GlobalFunctions().conversion(directory_path, conversion, keep_files, self.comic_name, + chapter_number) + + return 0 + + def full_series(self, comic_url, comic_name, sorting, download_directory, chapter_range, conversion, keep_files): + source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url) + + all_links = [] + all_chapter_links = source.find_all("a", {"class": "btn btn-default btn-sm"}) + for chapter in all_chapter_links: + all_links.append(chapter['href']) + + logging.debug("All Links : %s" % all_links) + + # Uh, so the logic is that remove all the unnecessary chapters beforehand + # and then pass the list for further operations. + if chapter_range != "All": + # -1 to shift the episode number accordingly to the INDEX of it. List starts from 0 xD! + starting = int(str(chapter_range).split("-")[0]) - 1 + + if str(chapter_range).split("-")[1].isdigit(): + ending = int(str(chapter_range).split("-")[1]) + else: + ending = len(all_links) + + indexes = [x for x in range(starting, ending)] + + all_links = [all_links[x] for x in indexes][::-1] + else: + all_links = all_links + + if self.print_index: + idx = 0 + for chap_link in all_links: + idx = idx + 1 + print(str(idx) + ": " + chap_link) + return + + if str(sorting).lower() in ['new', 'desc', 'descending', 'latest']: + for chap_link in all_links: + try: + self.single_chapter(comic_url=chap_link, comic_name=comic_name, + download_directory=download_directory, + conversion=conversion, keep_files=keep_files) + except Exception as ex: + logging.error("Error downloading : %s" % chap_link) + break # break to continue processing other mangas + # if chapter range contains "__EnD__" write new value to config.json + # @Chr1st-oo - modified condition due to some changes on automatic download and config. + if chapter_range != "All" and ( + chapter_range.split("-")[1] == "__EnD__" or len(chapter_range.split("-")) == 3): + globalFunctions.GlobalFunctions().addOne(comic_url) + elif str(sorting).lower() in ['old', 'asc', 'ascending', 'oldest', 'a']: + # print("Running this") + for chap_link in all_links[::-1]: + try: + self.single_chapter(comic_url=chap_link, comic_name=comic_name, + download_directory=download_directory, + conversion=conversion, keep_files=keep_files) + except Exception as ex: + logging.error("Error downloading : %s" % chap_link) + break # break to continue processing other mangas + # if chapter range contains "__EnD__" write new value to config.json + # @Chr1st-oo - modified condition due to some changes on automatic download and config. + if chapter_range != "All" and ( + chapter_range.split("-")[1] == "__EnD__" or len(chapter_range.split("-")) == 3): + globalFunctions.GlobalFunctions().addOne(comic_url) + + return 0 + + def extract_image_link_from_html(self, source): + image_tags = source.find_all("img", {"class": "viewer-image viewer-page"}) + img_link = None + for element in image_tags: + img_link = element['src'] + return img_link diff --git a/comic_dl/sites/webtoons.py b/comic_dl/sites/webtoons.py index bdfffb8..6b7565c 100644 --- a/comic_dl/sites/webtoons.py +++ b/comic_dl/sites/webtoons.py @@ -9,6 +9,7 @@ Chapter: https://www.webtoons.com/es/fantasy/castle-swimmer/t2-ep-090/viewer?title_no=1852&episode_no=99 """ + class Webtoons(): def __init__(self, manga_url, download_directory, chapter_range, **kwargs): current_directory = kwargs.get("current_directory")