diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml new file mode 100644 index 0000000..e69de29 diff --git a/Dockerfile b/Dockerfile index 2c54270..7dc08b7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,8 @@ +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. +# ======================================================= + FROM debian:bullseye ENV DEBIAN_FRONTEND noninteractive @@ -8,9 +13,11 @@ RUN apt update # apt install RUN apt install -y \ firefox-esr \ - chrome \ python3-pip +RUN pip3 install --upgrade pip +RUN pip3 install --upgrade pip setuptools + # copy directory COPY ./ /opt/pydork WORKDIR /opt/pydork @@ -19,4 +26,4 @@ WORKDIR /opt/pydork RUN ls -la /opt/pydork # # pip install -RUN pip3 install ./ +RUN pip3 install --use-pep517 ./ diff --git a/README.md b/README.md index 13bd90a..8203566 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ PyDork ## Description -Scraping and listing text and image searches on Google, Bing, DuckDuckGo, Baidu, Yahoo japan. +Scraping and listing text and image searches on **Google**, **Bing**, **DuckDuckGo**, **Baidu**, **Yahoo japan**. ## Install diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..e69de29 diff --git a/pydork/__init__.py b/pydork/__init__.py index d4ad29b..64b797f 100755 --- a/pydork/__init__.py +++ b/pydork/__init__.py @@ -1,16 +1,21 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= +from .sub_commands import run_subcommand +from .engine import ENGINES +from . import messages -import argparse -import copy - -from datetime import datetime from pkg_resources import get_distribution +from datetime import datetime -from .engine import ENGINES -from .subcommands import run_subcommand +import copy +import argparse + +# TODO: returnではなくyieldに切り替えて、返り値をgeneratorにすることである程度途中状態でも状況を見れるような仕組みとする # version (setup.pyから取得してくる) @@ -21,7 +26,7 @@ def main(): # parserの作成 parser = argparse.ArgumentParser( - description='各種検索エンジンから指定したクエリの結果(url)およびSuggestを取得するスクリプト') + description=messages.description) subparsers = parser.add_subparsers() # ENGINESに`all`を追加 @@ -36,28 +41,28 @@ def main(): "type": str, "nargs": "?", "default": "", - "help": "検索文字列(クエリ)", + "help": messages.help_message_query, }, { "args": ["-f", "--file"], "action": "store", "type": str, "default": "", - "help": "検索文字列(クエリ)が書かれているファイル", + "help": messages.help_message_op_file, }, { "args": ["-F", "--template_file"], "action": "store", "type": str, "default": "", - "help": "検索文字列(クエリ)が書かれているテンプレートファイル(jinja2)", + "help": messages.help_message_op_template_file, }, { "args": ["-V", "--template_variable"], "action": "store", "type": str, "default": "", - "help": "テンプレートファイル(jinja2)で使用する変数セット(json)", + "help": messages.help_message_op_template_variable, }, { "args": ["-t", "--search_type"], @@ -65,73 +70,78 @@ def main(): "choices": engines_list, "nargs": "+", "type": str, - "help": "使用する検索エンジンを指定", + "help": messages.help_message_op_search_type, }, { "args": ["-l", "--lang"], "default": "ja", "choices": ["ja", "en"], "type": str, - "help": "言語を指定", + "help": messages.help_message_op_lang, }, { "args": ["-c", "--country"], "default": "JP", "choices": ["JP", "US"], "type": str, - "help": "国を指定", + "help": messages.help_message_op_country, }, { "args": ["-P", "--proxy"], "default": "", "type": str, - "help": "プロキシサーバーを指定(例:socks5://hogehoge:8080, https://fugafuga:18080)", + "help": messages.help_message_op_proxy_server, }, { "args": ["-j", "--json"], "action": "store_true", - "help": "json形式で出力する", + "help": messages.help_message_op_json, }, { "args": ["-k", "--insecure"], "action": "store_true", - "help": "sslエラーを無視する", + "help": messages.help_message_op_insecure, }, { "args": ["-s", "--selenium"], "action": "store_true", - "help": "Selenium(headless browser)を使用する(排他: Splashより優先)", + "help": messages.help_message_op_selenium, }, { "args": ["-S", "--splash"], "action": "store_true", - "help": "Splash(headless browser)を使用する(排他: Seleniumの方が優先)", + "help": messages.help_message_op_splash, }, { "args": ["-b", "--browser-endpoint"], "default": "", "type": str, - "help": "Selenium/Splash等のヘッドレスブラウザのエンドポイントを指定(例: localhost:8050)", + "help": messages.help_message_op_browser_endpoint, }, { "args": ["-B", "--browser"], "default": "firefox", "choices": ["chrome", "firefox"], "type": str, - "help": "Seleniumで使用するBrowserを指定", + "help": messages.help_message_op_browser, }, { "args": ["--color"], "default": "auto", "choices": ["auto", "none", "always"], "type": str, - "help": "color出力の切り替え" + "help": messages.help_message_op_color, }, { "args": ["--cookies"], "default": "~/.pydork_cookies", "type": str, - "help": "使用するcookieファイルの格納先ディレクトリのPATH(各検索エンジンごとでcookieファイルを個別保存)" + "help": messages.help_message_op_cookies_dir, + }, + { + "args": ["--delete-cookies"], + "action": "store_true", + "help": messages.help_message_op_delete_cookies, }, ] @@ -140,38 +150,38 @@ def main(): { "args": ["-T", "--title"], "action": "store_true", - "help": "検索結果のタイトルをセットで出力する", + "help": messages.help_message_op_title, }, { "args": ["-0", "--nullchar"], "action": "store_true", - "help": "null characterを区切り文字として使用する", + "help": messages.help_message_op_null_char, }, { "args": ["-n", "--num"], "default": 300, "type": int, - "help": "検索結果の取得数を指定する", + "help": messages.help_message_op_num, }, { "args": ["--start"], "type": lambda s: datetime.strptime(s, '%Y-%m-%d'), - "help": "期間指定(開始)", + "help": messages.help_message_op_start, }, { "args": ["--end"], "type": lambda s: datetime.strptime(s, '%Y-%m-%d'), - "help": "期間指定(終了)", + "help": messages.help_message_op_end, }, { "args": ["--debug"], "action": "store_true", - "help": "debugモードを有効にする", + "help": messages.help_message_op_debug, }, { "args": ["--disable-headless"], "action": "store_true", - "help": "Seleniumでheadlessモードを無効化する(手動でのReCaptcha対応時に必要)", + "help": messages.help_message_op_disable_headless, }, ] search_args_map.extend(copy.deepcopy(common_args_map)) @@ -181,43 +191,43 @@ def main(): { "args": ["-T", "--title"], "action": "store_true", - "help": "検索結果のタイトルをセットで出力する", + "help": messages.help_message_op_title, }, { "args": ["-p", "--pagelink"], "action": "store_true", - "help": "画像ファイルがあるhtmlのURLも出力する", + "help": messages.help_message_op_image_pagelink, }, { "args": ["-0", "--nullchar"], "action": "store_true", - "help": "null characterを区切り文字として使用する", + "help": messages.help_message_op_null_char, }, { "args": ["-n", "--num"], "default": 300, "type": int, - "help": "検索結果の取得数を指定する", + "help": messages.help_message_op_num, }, # { # "args": ["--start"], # "type": lambda s: datetime.strptime(s, '%Y-%m-%d'), - # "help": "期間指定(開始)", + # "help": messages.help_message_op_start, # }, # { # "args": ["--end"], # "type": lambda s: datetime.strptime(s, '%Y-%m-%d'), - # "help": "期間指定(終了)", + # "help": messages.help_message_op_end, # }, { "args": ["--debug"], "action": "store_true", - "help": "debugモードを有効にする", + "help": messages.help_message_op_debug, }, { "args": ["--disable-headless"], "action": "store_true", - "help": "Seleniumでheadlessモードを無効化する(手動でのReCaptcha対応時に必要)", + "help": messages.help_message_op_disable_headless, }, ] image_args_map.extend(copy.deepcopy(common_args_map)) @@ -227,17 +237,17 @@ def main(): { "args": ["--jap"], "action": "store_true", - "help": "日本語の文字を検索キーワードに追加してサジェストを取得" + "help": messages.help_message_op_suggest_jap }, { "args": ["--alph"], "action": "store_true", - "help": "アルファベット文字を検索キーワードに追加してサジェストを取得" + "help": messages.help_message_op_suggest_alph }, { "args": ["--num"], "action": "store_true", - "help": "数字を検索キーワードに追加してサジェストを取得" + "help": messages.help_message_op_suggest_num }, ] suggest_args_map.extend(copy.deepcopy(common_args_map)) @@ -260,7 +270,6 @@ def main(): # image # ---------- - # TODO: image検索をサブコマンドとして追加する parser_image = subparsers.add_parser( 'image', help='search mode. see `search -h`' diff --git a/pydork/common.py b/pydork/common.py index 2c786e7..be34670 100644 --- a/pydork/common.py +++ b/pydork/common.py @@ -1,11 +1,13 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= """common * 共通系や雑多な処理を詰め合わせたバルクモジュール. - """ import sys @@ -82,7 +84,7 @@ def out(self, text: str, is_bold=False, is_underline=False, is_reverse=False, is class Message: """Message - メッセージの出力を簡易化するためのClass(未完成...). + メッセージの出力を簡易化するためのClass. Examples: @@ -156,6 +158,17 @@ def replace(self, text): return result def print_line(self, *text, use_header=True, separator=' ', file=sys.stdout, header=None): + """print_line + + メッセージを出力する(行) + + Args: + text: メッセージとして出力するテキスト行 + use_header: `header`で指定しているヘッダーを行頭に表示するかどうか + separator: printする際に使用する区切り文字 + file: 出力先のファイル(デフォルトはstdout) + header: ヘッダーとして使用する文字列を指定 + """ # headerの生成 if header is None: header = self.HEADER @@ -169,6 +182,18 @@ def print_line(self, *text, use_header=True, separator=' ', file=sys.stdout, hea print(*text, sep=separator, file=file) def print_text(self, text, mode='message', use_header=True, separator=' ', file=sys.stdout, header=None): + """print_line + + メッセージを出力する(テキスト) + + Args: + text: メッセージとして出力するテキスト + mode: メッセージの出力モード(`message`, `error`, `warn`, `info`, `debug`) + use_header: `header`で指定しているヘッダーを行頭に表示するかどうか + separator: printする際に使用する区切り文字 + file: 出力先のファイル(デフォルトはstdout) + header: ヘッダーとして使用する文字列を指定 + """ # is_commandが有効のときのみ出力させる if not self.IS_COMMAND: return @@ -183,6 +208,7 @@ def print_text(self, text, mode='message', use_header=True, separator=' ', file= text = self.replace(text) # case + text_color: Color = Color(Color.END) if mode == 'message': # modeが `message` のとき text_color = Color(Color.WHITE) @@ -218,3 +244,26 @@ def print_text(self, text, mode='message', use_header=True, separator=' ', file= separator=separator, use_header=use_header, file=file, header=header) return + + +# 渡されたリスト内のdictに`num`を追加する関数 +def set_counter(links: list): + """set_counter + + links(list)の要素に`num`キーを追加し、連続した数値を入れていく + + Args: + links(list): リンクのリスト. ex) [{'link', 'http://...', 'title': 'hogehoge...'}, {'link': '...', 'title': '...'}, ... ] + Returns: + result(list): [{'link', 'http://...', 'title': 'hogehoge...', num: 1}, {'link': '...', 'title': '...', num: 2}, ... ] + """ + # result(list)の生成 + result = list() + + num = 1 + for d in links: + d["num"] = num + num += 1 + result.append(d) + + return result diff --git a/pydork/engine.py b/pydork/engine.py index 378d72a..2f1e68d 100644 --- a/pydork/engine.py +++ b/pydork/engine.py @@ -1,7 +1,13 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= + +# TODO: json出力時にヒット番号を付与する(SEO対策が行えているかどうかのチェック用) + """engine * Module for performing searches with SearchEngine """ @@ -15,8 +21,8 @@ from string import ascii_lowercase, digits from datetime import datetime -from .common import Color -from .common import Message +from .common import Color, Message +from .common import set_counter from .engine_baidu import Baidu from .engine_bing import Bing from .engine_duckduckgo import DuckDuckGo @@ -107,7 +113,7 @@ def set_is_debug(self, is_debug: bool): debug (bool): debug flag(Enable debug with `True`). """ - self.ENGINE.IS_DEBUG = is_debug + self.ENGINE.IS_DEBUG = is_debug # type: ignore # commandフラグ(コマンドモードでの実行)を有効化する関数 def set_is_command(self, is_command: bool): @@ -119,7 +125,7 @@ def set_is_command(self, is_command: bool): Args: is_command (bool): command flag(Enable command mode with `True`). """ - self.ENGINE.IS_COMMAND = is_command + self.ENGINE.IS_COMMAND = is_command # type: ignore # color出力が有効か否か def set_is_color(self, is_color: bool = False): @@ -157,7 +163,7 @@ def set_disable_headless(self, disable_headless: bool): """ - self.ENGINE.IS_DISABLE_HEADLESS = disable_headless + self.ENGINE.IS_DISABLE_HEADLESS = disable_headless # type: ignore # cookieファイルを入れているディレクトリを渡して、使用するcookieファイルを取得する関数 def set_cookie_files(self, cookie_dir: str): @@ -171,8 +177,8 @@ def set_cookie_files(self, cookie_dir: str): """ # フルパスに変換 - cookie_dir = pathlib.Path(cookie_dir).expanduser() - cookie_dir = pathlib.Path(cookie_dir).resolve() + cookie_dir = pathlib.Path(cookie_dir).expanduser() # type: ignore + cookie_dir = pathlib.Path(cookie_dir).resolve() # type: ignore # 存在チェックをして、ディレクトリがない場合は新規作成 if not os.path.exists(cookie_dir): @@ -199,7 +205,20 @@ def set_cookie_files(self, cookie_dir: str): open(cookie_file, 'a').close() # ENGINEのself変数にセットする - self.ENGINE.COOKIE_FILE = cookie_file + self.ENGINE.COOKIE_FILE = cookie_file # type: ignore + + # クエリ実行ごとにCookieを削除して作り直しさせるかを指定する関数 + def set_cookie_files_delete(self, is_delete_cookie: bool): + """set_cookie_files_delete + + Function that specifies whether the cookie should be deleted and recreated each time the query is executed. + + Args: + is_delete_cookie (bool): delete flag. + """ + + # ENGINEのself変数にセットする + self.ENGINE.COOKIE_FILE_DELETE = is_delete_cookie # type: ignore # 検索エンジンにわたす言語・国の設定を受け付ける def set_lang(self, lang: str = "ja", locale: str = "JP"): @@ -238,7 +257,7 @@ def set_proxy(self, proxy: str): self.ENGINE.set_proxy(proxy) # seleniumを有効にする - def set_selenium(self, uri: str = None, browser: str = None): + def set_selenium(self, uri: str = None, browser: str = None): # type: ignore """set_selenium Use Selenium (priority over Splash). @@ -263,7 +282,7 @@ def set_splash(self, splash_url: str): self.ENGINE.set_splash(splash_url) # user_agentの設定値を受け付ける - def set_user_agent(self, useragent: str = None): + def set_user_agent(self, useragent: str = None): # type: ignore """set_user_agent Specify the UserAgent. @@ -285,17 +304,17 @@ def set_ignore_ssl(self, verify: bool): Args: verify (bool): bool. """ - self.ENGINE.set_ignore_ssl = verify + self.ENGINE.set_ignore_ssl = verify # type: ignore # 検索を行う - def search(self, keyword: str, type='text', maximum=100): + def search(self, keyword: str, search_type='text', maximum=100): """search Search with a search engine. Args: keyword (str): query. - type (str, optional): search type. text or image. Defaults to 'text'. + search_type (str, optional): search type. text or image. Defaults to 'text'. maximum (int, optional): Max count of searches. Defaults to 100. Returns: @@ -323,7 +342,7 @@ def search(self, keyword: str, type='text', maximum=100): colored_keyword = self.ENGINE.MESSAGE.ENGINE_COLOR.out(keyword) self.ENGINE.MESSAGE.print_text( "$ENGINE: {} Search: {}".format( - type.capitalize(), colored_keyword), + search_type.capitalize(), colored_keyword), use_header=False, file=sys.stderr @@ -338,7 +357,7 @@ def search(self, keyword: str, type='text', maximum=100): self.ENGINE.create_session() # 検索処理の開始 - gen_url = self.ENGINE.gen_search_url(keyword, type) + gen_url = self.ENGINE.gen_search_url(keyword, search_type) while True: # リクエスト先のurlを取得 try: @@ -365,7 +384,8 @@ def search(self, keyword: str, type='text', maximum=100): ) # 検索結果の取得 - html = self.ENGINE.get_result(url, method=method, data=data) + html = self.ENGINE.get_result( + url, method=method, data=data) # type: ignore # debug self.ENGINE.MESSAGE.print_text( @@ -376,6 +396,9 @@ def search(self, keyword: str, type='text', maximum=100): Color.GRAY + '[DEBUG]: [Response]' + Color.END ) + # 初期値 + is_recaptcha = False + while True: # ReCaptchaページかどうかを識別 if html is not None: @@ -395,7 +418,8 @@ def search(self, keyword: str, type='text', maximum=100): # headless browserを使っている場合 if self.ENGINE.USE_SELENIUM or self.ENGINE.USE_SPLASH: # byass用の関数にわたす - html = self.ENGINE.bypass_recaptcha(url, html) + html = self.ENGINE.bypass_recaptcha( + url, html) # type: ignore if html is not None: # debug @@ -428,7 +452,8 @@ def search(self, keyword: str, type='text', maximum=100): # TODO: resultも関数に渡して重複チェックを行わせる # 検索結果をパースしてurlリストを取得する - links = self.ENGINE.get_links(html, type) + links = self.ENGINE.get_links( + url, html, search_type) # type: ignore # linksの件数に応じて処理を実施 if not len(links): @@ -442,7 +467,7 @@ def search(self, keyword: str, type='text', maximum=100): # loopを抜ける if self.ENGINE.NAME == "Google": - if self.ENGINE.SEARCH_NEXT_URL is None: + if self.ENGINE.SEARCH_NEXT_URL is None: # type: ignore break else: break @@ -465,6 +490,9 @@ def search(self, keyword: str, type='text', maximum=100): # 連続でアクセスすると問題があるため、3秒待機 sleep(3) + # 検索番号を指定 + result = set_counter(result) + # commandの場合の出力処理 self.ENGINE.MESSAGE.print_text( 'Finally got ' + self.ENGINE.COLOR + @@ -478,6 +506,10 @@ def search(self, keyword: str, type='text', maximum=100): if self.ENGINE.COOKIE_FILE != '': self.ENGINE.write_cookies() + # delete cookie file + if self.ENGINE.COOKIE_FILE_DELETE: + os.remove(self.ENGINE.COOKIE_FILE) + # sessionを終了 self.ENGINE.close_session() @@ -506,10 +538,10 @@ def suggest(self, keyword: str, jap=False, alph=False, num=False): chars = ['', ' '] # japフラグが有効な場合、キーワードに日本語を含めてサジェストを検索 - chars += [' ' + chr(i) for i in range(12353, 12436)] if jap else[] + chars += [' ' + chr(i) for i in range(12353, 12436)] if jap else [] # alphフラグが有効な場合、キーワードにアルファベットを含めてサジェストを検索 - chars += [' ' + char for char in ascii_lowercase] if alph else[] + chars += [' ' + char for char in ascii_lowercase] if alph else [] # numフラグが有効な場合、キーワードに数字を含めてサジェストを検索 chars += [' ' + char for char in digits] if num else [] @@ -522,7 +554,8 @@ def suggest(self, keyword: str, jap=False, alph=False, num=False): html = self.ENGINE.get_result(url) # TODO: 各エンジンでjson/textの変換処理を別途実装する必要がある - suggests = self.ENGINE.get_suggest_list(suggests, char, html) + suggests = self.ENGINE.get_suggest_list( + suggests, char, html) # type: ignore sleep(0.5) diff --git a/pydork/engine_baidu.py b/pydork/engine_baidu.py index 53ed3e8..2c388c8 100644 --- a/pydork/engine_baidu.py +++ b/pydork/engine_baidu.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= @@ -136,7 +139,7 @@ def gen_suggest_url(self, keyword: str): return url - def get_links(self, html: str, type: str): + def get_links(self, url: str, html: str, type: str): """get_links 受け付けたhtmlを解析し、検索結果をlistに加工して返す関数. @@ -158,7 +161,7 @@ def get_links(self, html: str, type: str): self.SOUP_SELECT_TEXT = '.c-gap-top-small > span' # CommonEngineの処理を呼び出す - links = super().get_links(html, type) + links = super().get_links(url, html, type) elif type == 'image': # unicode escape diff --git a/pydork/engine_bing.py b/pydork/engine_bing.py index de17616..c9534bd 100644 --- a/pydork/engine_bing.py +++ b/pydork/engine_bing.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= @@ -142,7 +145,7 @@ def gen_suggest_url(self, keyword: str): return url - def get_links(self, html: str, type: str): + def get_links(self, url: str, html: str, type: str): """get_links 受け付けたhtmlを解析し、検索結果をlistに加工して返す関数. @@ -164,7 +167,7 @@ def get_links(self, html: str, type: str): self.SOUP_SELECT_URL = '.imgpt > .iusc' # CommonEngineの処理を呼び出す - links = super().get_links(html, type) + links = super().get_links(url, html, type) return links diff --git a/pydork/engine_common.py b/pydork/engine_common.py index 43d6e2d..a8eac76 100644 --- a/pydork/engine_common.py +++ b/pydork/engine_common.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= @@ -50,6 +53,7 @@ def __init__(self): # 初期値の作成 self.LOCK = None self.COOKIE_FILE = '' + self.COOKIE_FILE_DELETE = False self.SPLASH_URI = '' self.PROXY = '' self.USER_AGENT = '' @@ -58,7 +62,7 @@ def __init__(self): self.IS_DEBUG = False self.IS_COMMAND = False self.IS_DISABLE_HEADLESS = False - self.MESSAGE = False + self.MESSAGE: Message self.IGNORE_SSL_VERIFY = False # ReCaptcha画面かどうかの識別用(初期値(ブランク)) @@ -94,7 +98,7 @@ def set_range(self, start: datetime, end: datetime): self.RANGE_END = end # user_agentの設定値を受け付ける(引数がない場合はランダム。Seleniumの際は自動的に使用したbrowserのagentを指定) - def set_user_agent(self, user_agent: str = None, browser: str = None): + def set_user_agent(self, user_agent: str = None, browser: str = None): # type: ignore """set_user_agent user_agentの値を受け付ける. @@ -134,7 +138,7 @@ def set_user_agent(self, user_agent: str = None, browser: str = None): # - splashより優先 # - host, browserは、指定がない場合はそれぞれデフォルト設定(hostは指定なし、browserはchrome)での動作 # - browserは `chrome` or `firefox` のみ受け付ける - def set_selenium(self, uri: str = None, browser: str = None): + def set_selenium(self, uri: str = None, browser: str = None): # type: ignore """set_selenium 検索時にSelenium経由で通信を行う. @@ -196,6 +200,13 @@ def read_cookies(self): 現時点ではSeleniumでのみ動作. """ + # cookieファイルが存在しない場合、空ファイルで作成する + exist_cookie_file = os.path.isfile(self.COOKIE_FILE) + if not exist_cookie_file: + cookie_file = open(self.COOKIE_FILE, 'w') + cookie_file.write('') + cookie_file.close() + # cookieファイルのサイズを取得 file_size = os.path.getsize(self.COOKIE_FILE) @@ -207,7 +218,7 @@ def read_cookies(self): # seleniumを使う場合 if self.USE_SELENIUM: # 事前アクセスが必要になるため、検索対象ドメインのTOPページにアクセスしておく - self.driver.get(self.ENGINE_TOP_URL) + self.driver.get(self.ENGINE_TOP_URL) # type: ignore # cookieを設定していく for cookie in cookies: @@ -393,7 +404,7 @@ def request_selenium(self, url: str, method='GET', data=None): EC.presence_of_all_elements_located) # wait 5 seconds(wait DOM) - if self.NAME in ('Bing', 'Baidu', 'DuckDuckGo'): + if self.NAME in ('Bing', 'Baidu', 'DuckDuckGo'): # type: ignore self.driver.implicitly_wait(20) # get result @@ -407,7 +418,7 @@ def request_selenium(self, url: str, method='GET', data=None): EC.presence_of_all_elements_located) # wait 5 seconds(wait DOM) - if self.NAME in ('Bing', 'Baidu', 'DuckDuckGo'): + if self.NAME in ('Bing', 'Baidu', 'DuckDuckGo'): # type: ignore self.driver.implicitly_wait(20) # get result @@ -448,7 +459,7 @@ def request_splash(self, url: str, method='GET', data=None): # NOTE: Googleの画像検索のPOSTがSplashではレンダリングできないので、特例対応でrequestsを使用する. # TODO: Splashでもレンダリングできるようになったら書き換える. - elif method == 'POST' and self.NAME == 'Google' and self.IMAGE_URL in url: + elif method == 'POST' and self.NAME == 'Google' and self.IMAGE_URL in url: # type: ignore # create session session = requests.session() @@ -474,7 +485,7 @@ def request_splash(self, url: str, method='GET', data=None): elif method == 'POST': headers = {'Content-Type': 'application/json'} params['http_method'] = 'POST' - params['body'] = parse.urlencode(data) + params['body'] = parse.urlencode(data) # type: ignore result = self.session.post( splash_url, @@ -604,12 +615,13 @@ def gen_search_url(self, keyword: str, type: str): return 'GET', result, None # テキスト、画像検索の結果からlinksを取得するための集約function - def get_links(self, html: str, type: str): + def get_links(self, source_url, html: str, type: str): """get_links 受け付けたhtmlを解析し、検索結果をlistに加工して返す関数. Args: + url (str): 解析する検索結果のurl. html (str): 解析する検索結果のhtml. type (str): 検索タイプ([text, image]).現時点ではtextのみ対応. @@ -626,7 +638,7 @@ def get_links(self, html: str, type: str): # before processing elists self.MESSAGE.print_text( - ','.join(elinks), + ','.join(elinks), # type: ignore header=self.MESSAGE.HEADER + ': ' + Color.BLUE + '[BeforeProcessing elinks]' + Color.END, separator=" :", @@ -648,7 +660,7 @@ def get_links(self, html: str, type: str): # after processing elists self.MESSAGE.print_text( - ','.join(elinks), + ','.join(elinks), # type: ignore header=self.MESSAGE.HEADER + ': ' + Color.GREEN + '[AfterProcessing elinks]' + Color.END, separator=" :", @@ -666,7 +678,7 @@ def get_links(self, html: str, type: str): # dictに加工してリスト化する # [{'title': 'title...', 'link': 'https://hogehoge....'}, {...}] - links = self.create_text_links(elinks, etitles, etexts) + links = self.create_text_links(source_url, elinks, etitles, etexts) return links @@ -685,8 +697,9 @@ def get_text_links(self, soup: BeautifulSoup): soup (BeautifulSoup): 解析するBeautifulSoupオブジェクト. Returns: - list: linkの検索結果([xxx,xxx,xxx...) - list: titleの検索結果([xxx,xxx,xxx...) + list: linkの検索結果([xxx,xxx,xxx...]) + list: titleの検索結果([xxx,xxx,xxx...]) + list: textの検索結果([xxx,xxx,xxx...]) """ # linkのurlを取得する elements = soup.select(self.SOUP_SELECT_URL) @@ -740,7 +753,7 @@ def processings_elist(self, elinks, etitles, etexts: list): return elinks, etitles, etexts # テキスト検索の1ページごとの検索結果から、links(links([{link: ..., title: ...},...]))を生成するfunction - def create_text_links(self, elinks, etitles, etext: list): + def create_text_links(self, source_url: str, elinks, etitles, etext: list): """create_text_links elinks, etitlesからlinks(get_linksのデータ)を返す関数. @@ -769,11 +782,15 @@ def create_text_links(self, elinks, etitles, etext: list): if len(etext) > n: d['text'] = etext[n] + # 検索元urlをdictに追加する + d['source_url'] = source_url + if before_link != link: links.append(d) before_link = link n += 1 + return links # サジェスト取得用のurlを生成 diff --git a/pydork/engine_duckduckgo.py b/pydork/engine_duckduckgo.py index 7a74713..9d7c57f 100644 --- a/pydork/engine_duckduckgo.py +++ b/pydork/engine_duckduckgo.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= @@ -178,12 +181,13 @@ def gen_suggest_url(self, keyword: str): return url - def get_links(self, html: str, type: str): + def get_links(self, source_url: str, html: str, type: str): """get_links 受け付けたhtmlを解析し、検索結果をlistに加工して返す関数. Args: + url (str): 解析する検索結果のurl. html (str): 解析する検索結果のhtml. type (str): 検索タイプ([text, image]).現時点ではtextのみ対応. @@ -214,7 +218,8 @@ def get_links(self, html: str, type: str): "title": BeautifulSoup( r_data["t"], "lxml").text, "text": BeautifulSoup( - r_data["a"], "lxml").text + r_data["a"], "lxml").text, + "source_url": source_url, } links.append(d) diff --git a/pydork/engine_google.py b/pydork/engine_google.py index 8ebaad7..d064f17 100644 --- a/pydork/engine_google.py +++ b/pydork/engine_google.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= @@ -182,17 +185,18 @@ def gen_suggest_url(self, keyword: str): return url - def get_links(self, html: str, type: str): + def get_links(self, url: str, html: str, type: str): """get_links 受け付けたhtmlを解析し、検索結果をlistに加工して返す関数. Args: + url (str): 解析する検索結果のurl. html (str): 解析する検索結果のhtml. type (str): 検索タイプ([text, image]).現時点ではtextのみ対応. Returns: - list: 検索結果(`[{'title': 'title...', 'url': 'https://hogehoge....'}, {...}]`) + list: 検索結果。変数名はlinks。(`[{'title': 'title...', 'url': 'https://hogehoge....'}, {...}]`) """ # テキスト検索の場合 @@ -207,21 +211,21 @@ def get_links(self, html: str, type: str): if self.USE_SELENIUM: self.SOUP_SELECT_URL = '.yuRUbf > a' self.SOUP_SELECT_TITLE = '.yuRUbf > a > .LC20lb' - self.SOUP_SELECT_TEXT = '.WZ8Tjf' + self.SOUP_SELECT_TEXT = '.lEBKkf' self.SOUP_SELECT_NEXT_URL = '.d6cvqb > a' # Splash経由で通信している場合 elif self.USE_SPLASH: self.SOUP_SELECT_URL = '.yuRUbf > a' self.SOUP_SELECT_TITLE = '.yuRUbf > a > .LC20lb' - self.SOUP_SELECT_TEXT = '.WZ8Tjf' + self.SOUP_SELECT_TEXT = '.lEBKkf' self.SOUP_SELECT_NEXT_URL = '.d6cvqb > a' # TODO: SEARCH_NEXT_URLを書き換える self.get_nextpage_url(html) # CommonEngineの処理を呼び出す - links = super().get_links(html, type) + links = super().get_links(url, html, type) # イメージ検索の場合 elif type == 'image': @@ -300,7 +304,7 @@ def get_suggest_list(self, suggests: list, char: str, html: str): sug_data = sug_root.xpath("//suggestion") data = [s.get("data") for s in sug_data] - suggests[char if char == '' else char[-1]] = data + suggests[char if char == '' else char[-1]] = data # type: ignore return suggests @@ -318,11 +322,13 @@ def get_nextpage_url(self, html: str): self.SEARCH_NEXT_URL = None elif len(elinks) == 1: - next_url = parse.urljoin(self.ENGINE_TOP_URL, elinks[0]) + next_url = parse.urljoin( + self.ENGINE_TOP_URL, elinks[0]) # type: ignore self.SEARCH_NEXT_URL = next_url elif len(elinks) > 1: - next_url = parse.urljoin(self.ENGINE_TOP_URL, elinks[1]) + next_url = parse.urljoin( + self.ENGINE_TOP_URL, elinks[1]) # type: ignore self.SEARCH_NEXT_URL = next_url def processings_elist(self, elinks, etitles, etexts: list): diff --git a/pydork/engine_yahoo.py b/pydork/engine_yahoo.py index f3f3d29..56670f6 100644 --- a/pydork/engine_yahoo.py +++ b/pydork/engine_yahoo.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= @@ -156,12 +159,13 @@ def gen_suggest_url(self, keyword: str): return url - def get_links(self, html: str, type: str): + def get_links(self, url: str, html: str, type: str): """get_links 受け付けたhtmlを解析し、検索結果をlistに加工して返す関数. Args: + url (str): 解析する検索結果のurl. html (str): 解析する検索結果のhtml. type (str): 検索タイプ([text, image]).現時点ではtextのみ対応. @@ -184,10 +188,11 @@ def get_links(self, html: str, type: str): if self.IS_DEBUG: print(Color.PURPLE + '[JsonElement]' + Color.END, file=sys.stderr) - print(Color.PURPLE + element + Color.END, file=sys.stderr) + print(Color.PURPLE + element + Color.END, + file=sys.stderr) # type: ignore # jsonからデータを抽出  - j = json.loads(element) + j = json.loads(element) # type: ignore # debug if self.IS_DEBUG: @@ -201,7 +206,7 @@ def get_links(self, html: str, type: str): etitles = [e['title'] for e in jd] etexts = [e['description'] for e in jd] - links = self.create_text_links(elinks, etitles, etexts) + links = self.create_text_links(url, elinks, etitles, etexts) else: self.SOUP_SELECT_URL = '.sw-Card__headerSpace > .sw-Card__title > a' @@ -209,11 +214,11 @@ def get_links(self, html: str, type: str): self.SOUP_SELECT_TEXT = '.sw-Card__floatContainer > .sw-Card__summary' # CommonEngineの処理を呼び出す - links = super().get_links(html, type) + links = super().get_links(url, html, type) elif type == 'image': # CommonEngineの処理を呼び出す - links = super().get_links(html, type) + links = super().get_links(url, html, type) return links @@ -268,7 +273,7 @@ def get_suggest_list(self, suggests: list, char: str, html: str): soup = BeautifulSoup(html, features="lxml") html = soup.find("pre").text data = json.loads(html) - suggests[char if char == '' else char[-1]] = [e['key'] + suggests[char if char == '' else char[-1]] = [e['key'] # type: ignore for e in data['gossip']['results']] return suggests diff --git a/pydork/engine_yandex.py b/pydork/engine_yandex.py new file mode 100644 index 0000000..a8e1f93 --- /dev/null +++ b/pydork/engine_yandex.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. +# ======================================================= + +"""engine_yandex + * Yandex(yandex.com)用の検索用Classを持つモジュール. +""" + +from .common import Color +from .engine_common import CommonEngine + + +class Yandex(CommonEngine): + """DuckDuckGo + + DuckDuckGo用の検索エンジン用Class. + """ + + def __init__(self): + None diff --git a/pydork/messages.py b/pydork/messages.py new file mode 100644 index 0000000..f5cdaaa --- /dev/null +++ b/pydork/messages.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. +# ======================================================= + +"""common + * commandでのhelp messageを英語・日本語対応させるためのテキストデータを持つモジュール. +""" + +import os + +lang = os.getenv('LANG') + +if lang == 'ja_JP.UTF-8': + description = "各種検索エンジンから指定したクエリの結果(url)およびSuggestを取得する" + + # common_args_map + help_message_query = "検索文字列(クエリ)" + help_message_op_file = "検索文字列(クエリ)が書かれているファイル" + help_message_op_template_file = "検索文字列(クエリ)が書かれているテンプレートファイル(jinja2)" + help_message_op_template_variable = "テンプレートファイル(jinja2)で使用する変数セット(json)" + help_message_op_search_type = "使用する検索エンジンを指定" + help_message_op_lang = "言語を指定" + help_message_op_country = "国を指定" + help_message_op_proxy_server = "プロキシサーバーを指定(例:socks5://hogehoge:8080, https://fugafuga:18080)" + help_message_op_json = "json形式で出力する" + help_message_op_insecure = "sslエラーを無視する" + help_message_op_selenium = "Selenium(headless browser)を使用する(排他: Splashより優先)" + help_message_op_splash = "Splash(headless browser)を使用する(排他: Seleniumの方が優先)" + help_message_op_browser_endpoint = "Selenium/Splash等のヘッドレスブラウザのエンドポイントを指定(例: localhost:8050)" + help_message_op_browser = "Seleniumで使用するBrowserを指定" + help_message_op_color = "color出力の切り替え" + help_message_op_cookies_dir = "使用するcookieファイルの格納先ディレクトリのPATH(各検索エンジンごとでcookieファイルを個別保存)" + help_message_op_delete_cookies = "検索クエリ実行ごとにCookieを削除する" + + # other_map + help_message_op_title = "検索結果のタイトルをセットで出力する" + help_message_op_null_char = "null characterを区切り文字として使用する" + help_message_op_num = "検索結果の取得数を指定する" + help_message_op_debug = "debugモードを有効にする" + help_message_op_disable_headless = "Seleniumでheadlessモードを無効化する(手動でのReCaptcha対応時に必要)" + help_message_op_start = "期間指定(開始)" + help_message_op_end = "期間指定(終了)" + help_message_op_image_pagelink = "画像ファイルがあるhtmlのURLも出力する" + + # suggest_map + help_message_op_suggest_jap = "日本語の文字を検索キーワードに追加してサジェストを取得" + help_message_op_suggest_alph = "アルファベット文字を検索キーワードに追加してサジェストを取得" + help_message_op_suggest_num = "数字を検索キーワードに追加してサジェストを取得" + + +else: + description = "Obtain results (url) and Suggest for a specified query from various search engines" + + # common_args_map + help_message_query = "search string(query)" + help_message_op_file = "File containing search strings(queries)" + help_message_op_template_file = "Template file (jinja2) containing search strings (queries)" + help_message_op_template_variable = "Variable set (json) used in template file (jinja2)" + help_message_op_search_type = "Specify which search engine to use" + help_message_op_lang = "Specify language" + help_message_op_country = "Specify country" + help_message_op_proxy_server = "Specify proxy server(example: socks5://hogehoge:8080, https://fugafuga:18080)" + help_message_op_json = "Output in json format" + help_message_op_insecure = "ignore ssl errors" + help_message_op_selenium = "Use Selenium (headless browser). (exclusive: takes precedence over Splash)" + help_message_op_splash = "Use Splash (headless browser) (exclusive: Selenium is preferred)" + help_message_op_browser_endpoint = "Specify the endpoint for headless browsers such as Selenium/Splash (example: localhost:8050)" + help_message_op_browser = "Specify Browser to use with Selenium" + help_message_op_color = "Switching color output" + help_message_op_cookies_dir = "PATH of the directory where the cookie files to be used are stored (cookie files are stored separately for each search engine)" + help_message_op_delete_cookies = "Delete cookies on every search query execution" + + # other_map + help_message_op_title = "Output a set of search result titles" + help_message_op_null_char = "Use null character as delimiter" + help_message_op_num = "Specify the number of search results to retrieve" + help_message_op_debug = "Enable debug mode" + help_message_op_disable_headless = "Disable headless mode in Selenium (required for manual ReCaptcha support)" + help_message_op_start = "Search period (start)" + help_message_op_end = "Search period (end)" + help_message_op_image_pagelink = "Also output the html URL where the image files are located." + + # suggest_map + help_message_op_suggest_jap = "Add Japanese characters to search keywords to get suggestions" + help_message_op_suggest_alph = "Add alphabetic characters to search keywords to get suggestions" + help_message_op_suggest_num = "Add numbers to search keywords to get suggestions" diff --git a/pydork/recaptcha.py b/pydork/recaptcha.py index ae0f5c4..e35111f 100644 --- a/pydork/recaptcha.py +++ b/pydork/recaptcha.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= """engine diff --git a/pydork/subcommands.py b/pydork/sub_commands.py similarity index 87% rename from pydork/subcommands.py rename to pydork/sub_commands.py index b4cd658..ef8171f 100644 --- a/pydork/subcommands.py +++ b/pydork/sub_commands.py @@ -1,8 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= -"""subcommand +"""subcommands * pydorkをコマンドとして動作させる際の処理を記載しているモジュール """ @@ -13,6 +16,8 @@ import os import pathlib +from typing import List +from argparse import Namespace from jinja2 import Template from .engine import SearchEngine, ENGINES @@ -34,31 +39,32 @@ def run_subcommand(subcommand, args): # query及びfileがともに指定なしの場合、エラーにして返す if args.query == "" and args.file == "" and args.template_file == "": - print("Error: クエリもしくはファイルを指定してください.") + print("Error: クエリもしくはファイルを指定してください.", file=sys.stderr) return # args.fileのチェック if args.file != "": if not os.path.exists(args.file): - print("Error: ファイルが存在しません.") + print("Error: ファイルが存在しません.", file=sys.stderr) return - # args.fileのチェック + # args.template_fileのチェック if args.template_file != "": if not os.path.exists(args.template_file): - print("Error: ファイルが存在しません.") + print("Error: ファイルが存在しません.", file=sys.stderr) return if args.template_variable == "": - print("Error: テンプレート変数が指定されていません.") + print("Error: テンプレート変数が指定されていません.", file=sys.stderr) return try: template_variable = json.loads(args.template_variable) except Exception: - print("Error: テンプレート変数の形式がまちがっています.") + print("Error: テンプレート変数の形式がまちがっています.", file=sys.stderr) return + # 各サブコマンドのチェック target = None search_mode = '' if subcommand == 'search': @@ -69,32 +75,18 @@ def run_subcommand(subcommand, args): file=sys.stderr ) return - target = search + target = run_search search_mode = 'text' elif subcommand == 'image': - target = search + target = run_search search_mode = 'image' elif subcommand == 'suggest': - target = suggest + target = run_suggest # create query_list - query_list = list() - - # append query - if args.query != "": - query_list.append(args.query) - - # append query in file - if args.file != "": - # fileのfull pathを取得 - file = pathlib.Path(args.file).expanduser() - - # ファイルを開いて1行ずつqueryに追加する - with open(file) as f: - file_querys = [s.strip() for s in f.readlines()] - query_list.extend(file_querys) + query_list = generate_query_list(args) # append query in template file if args.template_file != "": @@ -157,7 +149,7 @@ def run_subcommand(subcommand, args): # SearchEngineのオプション設定用関数 -def set_se_options(se, args): +def set_se_options(se: SearchEngine, args: Namespace): """set_se_options Args: @@ -220,11 +212,14 @@ def set_se_options(se, args): # set cookie driver(last set) se.set_cookie_files(args.cookies) + # set cookie file delete + se.set_cookie_files_delete(args.delete_cookies) + return se # 検索結果を出力する -def print_search_result(result, args, message): +def print_search_result(result, args: Namespace, message: Message): """print_search_result @@ -279,8 +274,33 @@ def print_search_result(result, args, message): message.print_line(*data, separator=sep) +# generate +def generate_query_list(args: Namespace): + """generate_query_list + + """ + # create query_list + query_list: List[str] = list() + + # append query + if args.query != "": + query_list.append(args.query) + + # append query in file + if args.file != "": + # fileのfull pathを取得 + file = pathlib.Path(args.file).expanduser() + + # ファイルを開いて1行ずつqueryに追加する + with open(file) as f: + file_querys = [s.strip() for s in f.readlines()] + query_list.extend(file_querys) + + return query_list + + # 検索 -def search(engine: str, query_list: list, args, thread_result: dict, cmd=False, lock=None, mode='text'): +def run_search(engine: str, query_list: list, args, thread_result: dict, cmd=False, lock=None, mode='text'): """search Args: @@ -293,7 +313,7 @@ def search(engine: str, query_list: list, args, thread_result: dict, cmd=False, type (str, optional): 検索タイプ. `text` or `image`. """ - # start search engine class + # start SearchEngine class se = SearchEngine() # Set Engine @@ -324,7 +344,7 @@ def search(engine: str, query_list: list, args, thread_result: dict, cmd=False, for query in query_list: # 検索を実行 result = se.search( - query, type=search_type, + query, search_type=search_type, maximum=args.num ) @@ -353,7 +373,7 @@ def search(engine: str, query_list: list, args, thread_result: dict, cmd=False, # サジェスト -def suggest(engine: str, query_list: list, args, thread_result: dict, cmd=False, lock=None, mode=''): +def run_suggest(engine: str, query_list: list, args: Namespace, thread_result: dict, cmd=False, lock=None, mode=''): """suggest Args: @@ -363,7 +383,7 @@ def suggest(engine: str, query_list: list, args, thread_result: dict, cmd=False, thread_result(dict): 結果を1箇所に集約するためのresult dict. json出力するときのみ使用. cmd (bool, optional): commandで実行しているか否か. Defaults to False. lock (threading.Lock): threadingのマルチスレッドで使用するLock.現在は未使用. Defaults to None. - mode (str, optional): マルチスレッドでsearchとある程度共用で使えるようにするための引数. 利用していない. Defaults to ''. + mode (str, optional): マルチスレッドでsearchある程度共用で使えるようにするための引数. 利用していない. Defaults to ''. """ # start search engine class diff --git a/pydork/test_engine.py b/pydork/test_engine.py index 73db670..02d484e 100644 --- a/pydork/test_engine.py +++ b/pydork/test_engine.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= diff --git a/pydork/test_engine_selenium.py b/pydork/test_engine_selenium.py index 3eb002a..65632c7 100644 --- a/pydork/test_engine_selenium.py +++ b/pydork/test_engine_selenium.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= diff --git a/setup.py b/setup.py index 4e0293f..d2e1d89 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Copyright (c) 2023 Blacknon. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. # ======================================================= @@ -79,8 +82,8 @@ def get_completefile_install_location(shell): name = 'pydork' -version = '1.1.3' -release = '1.1.3' +version = '1.1.4' +release = '1.1.4' if __name__ == "__main__": setuptools.setup( @@ -102,7 +105,7 @@ def get_completefile_install_location(shell): 'fake_useragent', 'lxml', 'requests[socks]', - 'selenium', + 'selenium==4.7.2', 'selenium_requests', 'pickle-mixin', 'sphinx', @@ -122,6 +125,7 @@ def get_completefile_install_location(shell): 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', 'License :: OSI Approved :: MIT License', ], data_files=get_data_files(),