diff --git a/pydork/__init__.py b/pydork/__init__.py index 0b2752c..8d506a9 100755 --- a/pydork/__init__.py +++ b/pydork/__init__.py @@ -56,7 +56,7 @@ def main(): "default": "JP", "choices": ["JP", "US"], "type": str, - "help": "言語を指定", + "help": "国を指定", }, { "args": ["-P", "--proxy"], @@ -64,6 +64,11 @@ def main(): "type": str, "help": "プロキシサーバーを指定(例:socks5://hogehoge:8080, https://fugafuga:18080)", }, + { + "args": ["-j", "--json"], + "action": "store_true", + "help": "json形式で出力する", + }, { "args": ["-s", "--selenium"], "action": "store_true", diff --git a/pydork/engine.py b/pydork/engine.py index b48c04b..e7c4a67 100644 --- a/pydork/engine.py +++ b/pydork/engine.py @@ -514,6 +514,4 @@ def suggest(self, keyword: str, jap=False, alph=False, num=False): # sessionを終了 self.ENGINE.close_session() - print(suggests) - return suggests diff --git a/pydork/engine_common.py b/pydork/engine_common.py index c64458d..9a57d89 100644 --- a/pydork/engine_common.py +++ b/pydork/engine_common.py @@ -20,6 +20,7 @@ from seleniumrequests import Chrome, Firefox # selenium +from selenium import webdriver from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.firefox.options import Options as FirefoxOptions from selenium.webdriver.support.ui import WebDriverWait @@ -299,6 +300,9 @@ def create_selenium_driver(self): self.driver = Chrome(options=options) elif self.SELENIUM_BROWSER == 'firefox': + # debug comment out. + # capabilities = webdriver.DesiredCapabilities().FIREFOX + # capabilities['acceptSslCerts'] = True geckodriver_autoinstaller.install() self.driver = Firefox(options=options) diff --git a/pydork/engine_yahoo.py b/pydork/engine_yahoo.py index f40752a..111654e 100644 --- a/pydork/engine_yahoo.py +++ b/pydork/engine_yahoo.py @@ -38,7 +38,7 @@ def __init__(self): self.SEARCH_URL = 'https://search.yahoo.co.jp/search' self.IMAGE_PRE_URL = 'https://search.yahoo.co.jp/image/search' self.IMAGE_URL = 'https://search.yahoo.co.jp/image/api/search' - self.SUGGEST_URL = 'https://n-assist-search.yahooapis.jp/SuggestSearchService/V5/webassistSearch' + self.SUGGEST_URL = 'https://ff.search.yahoo.com/gossip' def gen_search_url(self, keyword: str, type: str): """gen_search_url @@ -147,9 +147,7 @@ def gen_suggest_url(self, keyword: str): dict: サジェスト取得用url """ url_param = { - 'query': keyword, # 検索キーワード - # ↓正常に動作しなくなった場合はブラウザからアクセスして更新! (TODO:自動取得処理の追加) - 'eappid': 'fsj_i3itmbzOmFv2txHkxs_7_haRWhkb8W4Xkmdd.4bua0FTNAVc0G4hE6ThCR.KUnJnkEH49WOnqSe2mNz..qnR90CAq2jVyC.jc8qvCmgR8TLOkfsk5LKTSqtoKjjz_svDg_9GrNEhTiw9XE5e', + 'command': keyword, # 検索キーワード 'output': 'json', } @@ -263,16 +261,9 @@ def get_suggest_list(self, suggests: list, char: str, html: str): Returns: dict: サジェスト配列 """ - - if self.USE_SELENIUM: - soup = BeautifulSoup(html, "lxml") - json_data = soup.select_one('pre') - data = json.loads(json_data.text) - else: - data = json.loads(html) - - suggests[char if char == '' else char[-1]] = [e['Suggest'] - for e in data['Result']] + data = json.loads(html) + suggests[char if char == '' else char[-1]] = [e['key'] + for e in data['gossip']['results']] return suggests diff --git a/pydork/subcommands.py b/pydork/subcommands.py index f921fee..243d059 100644 --- a/pydork/subcommands.py +++ b/pydork/subcommands.py @@ -45,32 +45,45 @@ def run_subcommand(subcommand, args): elif subcommand == 'suggest': target = suggest - tasks = [] - lock = threading.Lock() - for st in args.search_type: + # create query_list + query_list = list() + query_list.append(args.query) + + # engine_listへ、選択されているsearch engineを入れていく + engine_list = [] + for search_type in args.search_type: # if all - if st == 'all': + if search_type == 'all': for engine in ENGINES: - task = threading.Thread( - target=target, args=(engine, args, True, lock, search_mode)) - tasks.append(task) - + engine_list.append(engine) continue # if in searchengine - if st in ENGINES: - task = threading.Thread( - target=target, args=(st, args, True, lock, search_mode)) - tasks.append(task) - + if search_type in ENGINES: + engine_list.append(search_type) continue + # engine_listから、重複したリストを削除 + engine_list = list(set(engine_list)) + + tasks = [] + thread_result = dict() + lock = threading.Lock() + for engine in engine_list: + task = threading.Thread( + target=target, args=(engine, query_list, args, thread_result, True, lock, search_mode)) + tasks.append(task) + for task in tasks: task.start() for task in tasks: task.join() + # json出力が有効だった場合、json形式で出力 + if args.json: + print(json.dumps(thread_result, ensure_ascii=False, indent=2)) + # SearchEngineのオプション設定用関数 def set_se_options(se, args): @@ -135,57 +148,22 @@ def set_se_options(se, args): return se -# 検索 -def search(engine, args, cmd=False, lock=None, mode='text'): - """search +# 検索結果を出力する +def print_search_result(result, args, message): + """print_search_result + Args: - engine (str): 使用する検索エンジン(.engine.ENGINES). + result : SearchEngine.searchのresult. args (Namespace): argparseで取得した引数(Namespace). - cmd (bool, optional): commandで実行しているか否か. Defaults to False. - lock (threading.Lock): threadingのマルチスレッドで使用するLock.現在は未使用. Defaults to None. - type (str, optional): 検索タイプ. `text` or `image`. + message (common.Message): 出力用Class. """ - # start search engine class - se = SearchEngine() - - # Set Engine - se.set(engine) - - # Set SearchEngine options - se = set_se_options(se, args) - - # Set lock - se.set_lock(lock) - - # Set color - if args.color == 'always' or (args.color == 'auto' and sys.stdout.isatty()): - se.set_is_color(True) - - # 検索タイプを設定(テキスト or 画像) - search_type = mode - - # 検索を実行 - result = se.search( - args.query, type=search_type, - maximum=args.num - ) - - # sep + # 区切り文字を指定 sep = ': ' if args.nullchar: sep = '\0' - # debug - se.ENGINE.MESSAGE.print_text( - json.dumps(result), - separator=sep, - header=se.ENGINE.MESSAGE.HEADER + ': ' + - Color.GRAY + '[DEBUG]: [Result]' + Color.END, - mode="debug", - ) - # title出力を行うか確認 title_mode = False if 'title' in args: @@ -196,7 +174,6 @@ def search(engine, args, cmd=False, lock=None, mode='text'): if 'pagelink' in args: pagelink_mode = args.pagelink - # 検索結果を出力 for d in result: data = [] link = d['link'] @@ -224,16 +201,91 @@ def search(engine, args, cmd=False, lock=None, mode='text'): data.insert(0, title) - se.ENGINE.MESSAGE.print_line(*data, separator=sep) + message.print_line(*data, separator=sep) + + +# 検索 +def search(engine: str, query_list: list, args, thread_result: dict, cmd=False, lock=None, mode='text'): + """search + + Args: + engine (str): 使用する検索エンジン(.engine.ENGINES). + query_list(list): 検索クエリのリスト. + args (Namespace): argparseで取得した引数(Namespace). + thread_result(dict): 結果を1箇所に集約するためのresult dict. json出力するときのみ使用. + cmd (bool, optional): commandで実行しているか否か. Defaults to False. + lock (threading.Lock): threadingのマルチスレッドで使用するLock.現在は未使用. Defaults to None. + type (str, optional): 検索タイプ. `text` or `image`. + """ + + # start search engine class + se = SearchEngine() + + # Set Engine + se.set(engine) + + # Set SearchEngine options + se = set_se_options(se, args) + + # Set lock + se.set_lock(lock) + + # Set color + if args.color == 'always' or (args.color == 'auto' and sys.stdout.isatty()): + se.set_is_color(True) + + # 検索タイプを設定(テキスト or 画像) + search_type = mode + + # 区切り文字を指定 + sep = ': ' + if args.nullchar: + sep = '\0' + + # json出力時の変数を宣言 + all_result_json = list() + + # query_listの内容を順番に処理 + for query in query_list: + # 検索を実行 + result = se.search( + args.query, type=search_type, + maximum=args.num + ) + + # debug + se.ENGINE.MESSAGE.print_text( + json.dumps(result), + separator=sep, + header=se.ENGINE.MESSAGE.HEADER + ': ' + + Color.GRAY + '[DEBUG]: [Result]' + Color.END, + mode="debug", + ) + + if args.json: + # all_result_jsonへ組み込むためのjson方式へ加工. + append_result = { + 'query': query, + 'result': result + } + all_result_json.append(append_result) + + else: + print_search_result(result, args, se.ENGINE.MESSAGE) + + if args.json: + thread_result[engine] = all_result_json # サジェスト -def suggest(engine, args, cmd=False, lock=None, mode=''): +def suggest(engine: str, query_list: list, args, thread_result: dict, cmd=False, lock=None, mode=''): """suggest Args: engine (str): 使用する検索エンジン(.engine.ENGINES). + query_list(list): 検索クエリのリスト. args (Namespace): argparseで取得した引数(Namespace). + thread_result(dict): 結果を1箇所に集約するためのresult dict. json出力するときのみ使用. cmd (bool, optional): commandで実行しているか否か. Defaults to False. lock (threading.Lock): threadingのマルチスレッドで使用するLock.現在は未使用. Defaults to None. mode (str, optional): マルチスレッドでsearchとある程度共用で使えるようにするための引数. 利用していない. Defaults to ''. @@ -268,14 +320,29 @@ def suggest(engine, args, cmd=False, lock=None, mode=''): header = sc.out(header) se.ENGINE.MESSAGE.set_header(header) + # json出力時の変数を宣言 + all_result_json = list() + # Suggestを取得 - result = se.suggest( - args.query, - jap=args.jap, - alph=args.alph, - num=args.num, - ) - - for words in result.values(): - for w in words: - se.ENGINE.MESSAGE.print_line(w, separator=": ") + for query in query_list: + result = se.suggest( + args.query, + jap=args.jap, + alph=args.alph, + num=args.num, + ) + + for words in result.values(): + if args.json: + append_result = { + 'query': query, + 'result': words + } + all_result_json.append(append_result) + + else: + for w in words: + se.ENGINE.MESSAGE.print_line(w, separator=": ") + + if args.json: + thread_result[engine] = all_result_json diff --git a/setup.py b/setup.py index 9c5cc85..5663f56 100755 --- a/setup.py +++ b/setup.py @@ -100,8 +100,8 @@ def get_completefile_install_location(shell): name = 'pydork' -version = '1.0.1' -release = '1.0.1' +version = '1.1.0' +release = '1.1.0' if __name__ == "__main__": setuptools.setup(