Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

version 0.1.4 #16

Merged
merged 13 commits into from
Jul 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
11 changes: 9 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Copyright (c) 2023 Blacknon. All rights reserved.
# Use of this source code is governed by an MIT license
# that can be found in the LICENSE file.
# =======================================================

FROM debian:bullseye

ENV DEBIAN_FRONTEND noninteractive
Expand All @@ -8,9 +13,11 @@ RUN apt update
# apt install
RUN apt install -y \
firefox-esr \
chrome \
python3-pip

RUN pip3 install --upgrade pip
RUN pip3 install --upgrade pip setuptools

# copy directory
COPY ./ /opt/pydork
WORKDIR /opt/pydork
Expand All @@ -19,4 +26,4 @@ WORKDIR /opt/pydork
RUN ls -la /opt/pydork

# # pip install
RUN pip3 install ./
RUN pip3 install --use-pep517 ./
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ PyDork

## Description

Scraping and listing text and image searches on Google, Bing, DuckDuckGo, Baidu, Yahoo japan.
Scraping and listing text and image searches on **Google**, **Bing**, **DuckDuckGo**, **Baidu**, **Yahoo japan**.

## Install

Expand Down
Empty file added docker-compose.yml
Empty file.
93 changes: 51 additions & 42 deletions pydork/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) 2023 Blacknon. All rights reserved.
# Use of this source code is governed by an MIT license
# that can be found in the LICENSE file.
# =======================================================

from .sub_commands import run_subcommand
from .engine import ENGINES
from . import messages

import argparse
import copy

from datetime import datetime
from pkg_resources import get_distribution
from datetime import datetime

from .engine import ENGINES
from .subcommands import run_subcommand
import copy
import argparse

# TODO: returnではなくyieldに切り替えて、返り値をgeneratorにすることである程度途中状態でも状況を見れるような仕組みとする


# version (setup.pyから取得してくる)
Expand All @@ -21,7 +26,7 @@
def main():
# parserの作成
parser = argparse.ArgumentParser(
description='各種検索エンジンから指定したクエリの結果(url)およびSuggestを取得するスクリプト')
description=messages.description)
subparsers = parser.add_subparsers()

# ENGINESに`all`を追加
Expand All @@ -36,102 +41,107 @@ def main():
"type": str,
"nargs": "?",
"default": "",
"help": "検索文字列(クエリ)",
"help": messages.help_message_query,
},
{
"args": ["-f", "--file"],
"action": "store",
"type": str,
"default": "",
"help": "検索文字列(クエリ)が書かれているファイル",
"help": messages.help_message_op_file,
},
{
"args": ["-F", "--template_file"],
"action": "store",
"type": str,
"default": "",
"help": "検索文字列(クエリ)が書かれているテンプレートファイル(jinja2)",
"help": messages.help_message_op_template_file,
},
{
"args": ["-V", "--template_variable"],
"action": "store",
"type": str,
"default": "",
"help": "テンプレートファイル(jinja2)で使用する変数セット(json)",
"help": messages.help_message_op_template_variable,
},
{
"args": ["-t", "--search_type"],
"default": ["google"],
"choices": engines_list,
"nargs": "+",
"type": str,
"help": "使用する検索エンジンを指定",
"help": messages.help_message_op_search_type,
},
{
"args": ["-l", "--lang"],
"default": "ja",
"choices": ["ja", "en"],
"type": str,
"help": "言語を指定",
"help": messages.help_message_op_lang,
},
{
"args": ["-c", "--country"],
"default": "JP",
"choices": ["JP", "US"],
"type": str,
"help": "国を指定",
"help": messages.help_message_op_country,
},
{
"args": ["-P", "--proxy"],
"default": "",
"type": str,
"help": "プロキシサーバーを指定(例:socks5://hogehoge:8080, https://fugafuga:18080)",
"help": messages.help_message_op_proxy_server,
},
{
"args": ["-j", "--json"],
"action": "store_true",
"help": "json形式で出力する",
"help": messages.help_message_op_json,
},
{
"args": ["-k", "--insecure"],
"action": "store_true",
"help": "sslエラーを無視する",
"help": messages.help_message_op_insecure,
},
{
"args": ["-s", "--selenium"],
"action": "store_true",
"help": "Selenium(headless browser)を使用する(排他: Splashより優先)",
"help": messages.help_message_op_selenium,
},
{
"args": ["-S", "--splash"],
"action": "store_true",
"help": "Splash(headless browser)を使用する(排他: Seleniumの方が優先)",
"help": messages.help_message_op_splash,
},
{
"args": ["-b", "--browser-endpoint"],
"default": "",
"type": str,
"help": "Selenium/Splash等のヘッドレスブラウザのエンドポイントを指定(例: localhost:8050)",
"help": messages.help_message_op_browser_endpoint,
},
{
"args": ["-B", "--browser"],
"default": "firefox",
"choices": ["chrome", "firefox"],
"type": str,
"help": "Seleniumで使用するBrowserを指定",
"help": messages.help_message_op_browser,
},
{
"args": ["--color"],
"default": "auto",
"choices": ["auto", "none", "always"],
"type": str,
"help": "color出力の切り替え"
"help": messages.help_message_op_color,
},
{
"args": ["--cookies"],
"default": "~/.pydork_cookies",
"type": str,
"help": "使用するcookieファイルの格納先ディレクトリのPATH(各検索エンジンごとでcookieファイルを個別保存)"
"help": messages.help_message_op_cookies_dir,
},
{
"args": ["--delete-cookies"],
"action": "store_true",
"help": messages.help_message_op_delete_cookies,
},
]

Expand All @@ -140,38 +150,38 @@ def main():
{
"args": ["-T", "--title"],
"action": "store_true",
"help": "検索結果のタイトルをセットで出力する",
"help": messages.help_message_op_title,
},
{
"args": ["-0", "--nullchar"],
"action": "store_true",
"help": "null characterを区切り文字として使用する",
"help": messages.help_message_op_null_char,
},
{
"args": ["-n", "--num"],
"default": 300,
"type": int,
"help": "検索結果の取得数を指定する",
"help": messages.help_message_op_num,
},
{
"args": ["--start"],
"type": lambda s: datetime.strptime(s, '%Y-%m-%d'),
"help": "期間指定(開始)",
"help": messages.help_message_op_start,
},
{
"args": ["--end"],
"type": lambda s: datetime.strptime(s, '%Y-%m-%d'),
"help": "期間指定(終了)",
"help": messages.help_message_op_end,
},
{
"args": ["--debug"],
"action": "store_true",
"help": "debugモードを有効にする",
"help": messages.help_message_op_debug,
},
{
"args": ["--disable-headless"],
"action": "store_true",
"help": "Seleniumでheadlessモードを無効化する(手動でのReCaptcha対応時に必要)",
"help": messages.help_message_op_disable_headless,
},
]
search_args_map.extend(copy.deepcopy(common_args_map))
Expand All @@ -181,43 +191,43 @@ def main():
{
"args": ["-T", "--title"],
"action": "store_true",
"help": "検索結果のタイトルをセットで出力する",
"help": messages.help_message_op_title,
},
{
"args": ["-p", "--pagelink"],
"action": "store_true",
"help": "画像ファイルがあるhtmlのURLも出力する",
"help": messages.help_message_op_image_pagelink,
},
{
"args": ["-0", "--nullchar"],
"action": "store_true",
"help": "null characterを区切り文字として使用する",
"help": messages.help_message_op_null_char,
},
{
"args": ["-n", "--num"],
"default": 300,
"type": int,
"help": "検索結果の取得数を指定する",
"help": messages.help_message_op_num,
},
# {
# "args": ["--start"],
# "type": lambda s: datetime.strptime(s, '%Y-%m-%d'),
# "help": "期間指定(開始)",
# "help": messages.help_message_op_start,
# },
# {
# "args": ["--end"],
# "type": lambda s: datetime.strptime(s, '%Y-%m-%d'),
# "help": "期間指定(終了)",
# "help": messages.help_message_op_end,
# },
{
"args": ["--debug"],
"action": "store_true",
"help": "debugモードを有効にする",
"help": messages.help_message_op_debug,
},
{
"args": ["--disable-headless"],
"action": "store_true",
"help": "Seleniumでheadlessモードを無効化する(手動でのReCaptcha対応時に必要)",
"help": messages.help_message_op_disable_headless,
},
]
image_args_map.extend(copy.deepcopy(common_args_map))
Expand All @@ -227,17 +237,17 @@ def main():
{
"args": ["--jap"],
"action": "store_true",
"help": "日本語の文字を検索キーワードに追加してサジェストを取得"
"help": messages.help_message_op_suggest_jap
},
{
"args": ["--alph"],
"action": "store_true",
"help": "アルファベット文字を検索キーワードに追加してサジェストを取得"
"help": messages.help_message_op_suggest_alph
},
{
"args": ["--num"],
"action": "store_true",
"help": "数字を検索キーワードに追加してサジェストを取得"
"help": messages.help_message_op_suggest_num
},
]
suggest_args_map.extend(copy.deepcopy(common_args_map))
Expand All @@ -260,7 +270,6 @@ def main():

# image
# ----------
# TODO: image検索をサブコマンドとして追加する
parser_image = subparsers.add_parser(
'image',
help='search mode. see `search -h`'
Expand Down
Loading