Skip to content

Commit

Permalink
Merge pull request #6 from blacknon/topic/issue#3_json出力オプションの追加
Browse files Browse the repository at this point in the history
Topic/issue#3 json出力オプションの追加
  • Loading branch information
blacknon authored Apr 21, 2022
2 parents 0a03c4f + 04cfc7a commit 621da4c
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 87 deletions.
7 changes: 6 additions & 1 deletion pydork/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,19 @@ def main():
"default": "JP",
"choices": ["JP", "US"],
"type": str,
"help": "言語を指定",
"help": "国を指定",
},
{
"args": ["-P", "--proxy"],
"default": "",
"type": str,
"help": "プロキシサーバーを指定(例:socks5://hogehoge:8080, https://fugafuga:18080)",
},
{
"args": ["-j", "--json"],
"action": "store_true",
"help": "json形式で出力する",
},
{
"args": ["-s", "--selenium"],
"action": "store_true",
Expand Down
2 changes: 0 additions & 2 deletions pydork/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,4 @@ def suggest(self, keyword: str, jap=False, alph=False, num=False):
# sessionを終了
self.ENGINE.close_session()

print(suggests)

return suggests
4 changes: 4 additions & 0 deletions pydork/engine_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from seleniumrequests import Chrome, Firefox

# selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.support.ui import WebDriverWait
Expand Down Expand Up @@ -299,6 +300,9 @@ def create_selenium_driver(self):
self.driver = Chrome(options=options)

elif self.SELENIUM_BROWSER == 'firefox':
# debug comment out.
# capabilities = webdriver.DesiredCapabilities().FIREFOX
# capabilities['acceptSslCerts'] = True
geckodriver_autoinstaller.install()
self.driver = Firefox(options=options)

Expand Down
19 changes: 5 additions & 14 deletions pydork/engine_yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self):
self.SEARCH_URL = 'https://search.yahoo.co.jp/search'
self.IMAGE_PRE_URL = 'https://search.yahoo.co.jp/image/search'
self.IMAGE_URL = 'https://search.yahoo.co.jp/image/api/search'
self.SUGGEST_URL = 'https://n-assist-search.yahooapis.jp/SuggestSearchService/V5/webassistSearch'
self.SUGGEST_URL = 'https://ff.search.yahoo.com/gossip'

def gen_search_url(self, keyword: str, type: str):
"""gen_search_url
Expand Down Expand Up @@ -147,9 +147,7 @@ def gen_suggest_url(self, keyword: str):
dict: サジェスト取得用url
"""
url_param = {
'query': keyword, # 検索キーワード
# ↓正常に動作しなくなった場合はブラウザからアクセスして更新! (TODO:自動取得処理の追加)
'eappid': 'fsj_i3itmbzOmFv2txHkxs_7_haRWhkb8W4Xkmdd.4bua0FTNAVc0G4hE6ThCR.KUnJnkEH49WOnqSe2mNz..qnR90CAq2jVyC.jc8qvCmgR8TLOkfsk5LKTSqtoKjjz_svDg_9GrNEhTiw9XE5e',
'command': keyword, # 検索キーワード
'output': 'json',
}

Expand Down Expand Up @@ -263,16 +261,9 @@ def get_suggest_list(self, suggests: list, char: str, html: str):
Returns:
dict: サジェスト配列
"""

if self.USE_SELENIUM:
soup = BeautifulSoup(html, "lxml")
json_data = soup.select_one('pre')
data = json.loads(json_data.text)
else:
data = json.loads(html)

suggests[char if char == '' else char[-1]] = [e['Suggest']
for e in data['Result']]
data = json.loads(html)
suggests[char if char == '' else char[-1]] = [e['key']
for e in data['gossip']['results']]

return suggests

Expand Down
203 changes: 135 additions & 68 deletions pydork/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,32 +45,45 @@ def run_subcommand(subcommand, args):
elif subcommand == 'suggest':
target = suggest

tasks = []
lock = threading.Lock()
for st in args.search_type:
# create query_list
query_list = list()
query_list.append(args.query)

# engine_listへ、選択されているsearch engineを入れていく
engine_list = []
for search_type in args.search_type:
# if all
if st == 'all':
if search_type == 'all':
for engine in ENGINES:
task = threading.Thread(
target=target, args=(engine, args, True, lock, search_mode))
tasks.append(task)

engine_list.append(engine)
continue

# if in searchengine
if st in ENGINES:
task = threading.Thread(
target=target, args=(st, args, True, lock, search_mode))
tasks.append(task)

if search_type in ENGINES:
engine_list.append(search_type)
continue

# engine_listから、重複したリストを削除
engine_list = list(set(engine_list))

tasks = []
thread_result = dict()
lock = threading.Lock()
for engine in engine_list:
task = threading.Thread(
target=target, args=(engine, query_list, args, thread_result, True, lock, search_mode))
tasks.append(task)

for task in tasks:
task.start()

for task in tasks:
task.join()

# json出力が有効だった場合、json形式で出力
if args.json:
print(json.dumps(thread_result, ensure_ascii=False, indent=2))


# SearchEngineのオプション設定用関数
def set_se_options(se, args):
Expand Down Expand Up @@ -135,57 +148,22 @@ def set_se_options(se, args):
return se


# 検索
def search(engine, args, cmd=False, lock=None, mode='text'):
"""search
# 検索結果を出力する
def print_search_result(result, args, message):
"""print_search_result
Args:
engine (str): 使用する検索エンジン(.engine.ENGINES).
result : SearchEngine.searchのresult.
args (Namespace): argparseで取得した引数(Namespace).
cmd (bool, optional): commandで実行しているか否か. Defaults to False.
lock (threading.Lock): threadingのマルチスレッドで使用するLock.現在は未使用. Defaults to None.
type (str, optional): 検索タイプ. `text` or `image`.
message (common.Message): 出力用Class.
"""

# start search engine class
se = SearchEngine()

# Set Engine
se.set(engine)

# Set SearchEngine options
se = set_se_options(se, args)

# Set lock
se.set_lock(lock)

# Set color
if args.color == 'always' or (args.color == 'auto' and sys.stdout.isatty()):
se.set_is_color(True)

# 検索タイプを設定(テキスト or 画像)
search_type = mode

# 検索を実行
result = se.search(
args.query, type=search_type,
maximum=args.num
)

# sep
# 区切り文字を指定
sep = ': '
if args.nullchar:
sep = '\0'

# debug
se.ENGINE.MESSAGE.print_text(
json.dumps(result),
separator=sep,
header=se.ENGINE.MESSAGE.HEADER + ': ' +
Color.GRAY + '[DEBUG]: [Result]' + Color.END,
mode="debug",
)

# title出力を行うか確認
title_mode = False
if 'title' in args:
Expand All @@ -196,7 +174,6 @@ def search(engine, args, cmd=False, lock=None, mode='text'):
if 'pagelink' in args:
pagelink_mode = args.pagelink

# 検索結果を出力
for d in result:
data = []
link = d['link']
Expand Down Expand Up @@ -224,16 +201,91 @@ def search(engine, args, cmd=False, lock=None, mode='text'):

data.insert(0, title)

se.ENGINE.MESSAGE.print_line(*data, separator=sep)
message.print_line(*data, separator=sep)


# 検索
def search(engine: str, query_list: list, args, thread_result: dict, cmd=False, lock=None, mode='text'):
"""search
Args:
engine (str): 使用する検索エンジン(.engine.ENGINES).
query_list(list): 検索クエリのリスト.
args (Namespace): argparseで取得した引数(Namespace).
thread_result(dict): 結果を1箇所に集約するためのresult dict. json出力するときのみ使用.
cmd (bool, optional): commandで実行しているか否か. Defaults to False.
lock (threading.Lock): threadingのマルチスレッドで使用するLock.現在は未使用. Defaults to None.
type (str, optional): 検索タイプ. `text` or `image`.
"""

# start search engine class
se = SearchEngine()

# Set Engine
se.set(engine)

# Set SearchEngine options
se = set_se_options(se, args)

# Set lock
se.set_lock(lock)

# Set color
if args.color == 'always' or (args.color == 'auto' and sys.stdout.isatty()):
se.set_is_color(True)

# 検索タイプを設定(テキスト or 画像)
search_type = mode

# 区切り文字を指定
sep = ': '
if args.nullchar:
sep = '\0'

# json出力時の変数を宣言
all_result_json = list()

# query_listの内容を順番に処理
for query in query_list:
# 検索を実行
result = se.search(
args.query, type=search_type,
maximum=args.num
)

# debug
se.ENGINE.MESSAGE.print_text(
json.dumps(result),
separator=sep,
header=se.ENGINE.MESSAGE.HEADER + ': ' +
Color.GRAY + '[DEBUG]: [Result]' + Color.END,
mode="debug",
)

if args.json:
# all_result_jsonへ組み込むためのjson方式へ加工.
append_result = {
'query': query,
'result': result
}
all_result_json.append(append_result)

else:
print_search_result(result, args, se.ENGINE.MESSAGE)

if args.json:
thread_result[engine] = all_result_json


# サジェスト
def suggest(engine, args, cmd=False, lock=None, mode=''):
def suggest(engine: str, query_list: list, args, thread_result: dict, cmd=False, lock=None, mode=''):
"""suggest
Args:
engine (str): 使用する検索エンジン(.engine.ENGINES).
query_list(list): 検索クエリのリスト.
args (Namespace): argparseで取得した引数(Namespace).
thread_result(dict): 結果を1箇所に集約するためのresult dict. json出力するときのみ使用.
cmd (bool, optional): commandで実行しているか否か. Defaults to False.
lock (threading.Lock): threadingのマルチスレッドで使用するLock.現在は未使用. Defaults to None.
mode (str, optional): マルチスレッドでsearchとある程度共用で使えるようにするための引数. 利用していない. Defaults to ''.
Expand Down Expand Up @@ -268,14 +320,29 @@ def suggest(engine, args, cmd=False, lock=None, mode=''):
header = sc.out(header)
se.ENGINE.MESSAGE.set_header(header)

# json出力時の変数を宣言
all_result_json = list()

# Suggestを取得
result = se.suggest(
args.query,
jap=args.jap,
alph=args.alph,
num=args.num,
)

for words in result.values():
for w in words:
se.ENGINE.MESSAGE.print_line(w, separator=": ")
for query in query_list:
result = se.suggest(
args.query,
jap=args.jap,
alph=args.alph,
num=args.num,
)

for words in result.values():
if args.json:
append_result = {
'query': query,
'result': words
}
all_result_json.append(append_result)

else:
for w in words:
se.ENGINE.MESSAGE.print_line(w, separator=": ")

if args.json:
thread_result[engine] = all_result_json
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def get_completefile_install_location(shell):


name = 'pydork'
version = '1.0.1'
release = '1.0.1'
version = '1.1.0'
release = '1.1.0'

if __name__ == "__main__":
setuptools.setup(
Expand Down

0 comments on commit 621da4c

Please sign in to comment.