From fcde28e7d0afcddaf3821ae8abe0aa0fa31f8e1f Mon Sep 17 00:00:00 2001 From: Dalf Date: Thu, 30 Jan 2020 18:00:54 +0100 Subject: [PATCH] - Add github action - python -m seraxinstances --check to check instances.yml syntax - .pylint configuration - setup.py / requirements.txt / requirements-update.txt --- .github/workflows/check.yml | 18 + .gitignore | 3 +- .pylintrc | 588 ++++++++++++++++++ README.md | 1 + requirements-update.txt | 3 + requirements.txt | 1 + searxinstances/__init__.py | 3 + searxinstances/__main__.py | 168 +++++ searxinstances/__version__.py | 3 + instances.yml => searxinstances/instances.yml | 0 {searxstats/list => searxinstances}/model.py | 28 +- searxinstances/utils/__init__.py | 0 .../utils}/import_rst.py | 9 +- searxstats/list/parse.py | 77 --- setup.py | 68 ++ 15 files changed, 880 insertions(+), 90 deletions(-) create mode 100644 .github/workflows/check.yml create mode 100644 .pylintrc create mode 100644 README.md create mode 100644 requirements-update.txt create mode 100644 requirements.txt create mode 100644 searxinstances/__init__.py create mode 100644 searxinstances/__main__.py create mode 100644 searxinstances/__version__.py rename instances.yml => searxinstances/instances.yml (100%) rename {searxstats/list => searxinstances}/model.py (87%) create mode 100644 searxinstances/utils/__init__.py rename {searxstats/list => searxinstances/utils}/import_rst.py (94%) delete mode 100644 searxstats/list/parse.py create mode 100644 setup.py diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..53cb883 --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,18 @@ +name: searx-instances + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v1 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-update.txt + - name: Check + run: | + python -m searxinstances --check diff --git a/.gitignore b/.gitignore index 8537c5e..5ff3326 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ -.coverage .vscode -.pytest_cache/ *~ __pycache__/ @@ -16,4 +14,5 @@ build/ dist/ *.egg-info/ +cache ve diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..4e0d0bf --- /dev/null +++ b/.pylintrc @@ -0,0 +1,588 @@ +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-whitelist=lxml + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=searxstats/data + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Specify a configuration file. +#rcfile= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=print-statement, + parameter-unpacking, + unpacking-in-except, + old-raise-syntax, + backtick, + long-suffix, + old-ne-operator, + old-octal-literal, + import-star-module-level, + non-ascii-bytes-literal, + raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + apply-builtin, + basestring-builtin, + buffer-builtin, + cmp-builtin, + coerce-builtin, + execfile-builtin, + file-builtin, + long-builtin, + raw_input-builtin, + reduce-builtin, + standarderror-builtin, + unicode-builtin, + xrange-builtin, + coerce-method, + delslice-method, + getslice-method, + setslice-method, + no-absolute-import, + old-division, + dict-iter-method, + dict-view-method, + next-method-called, + metaclass-assignment, + indexing-exception, + raising-string, + reload-builtin, + oct-method, + hex-method, + nonzero-method, + cmp-method, + input-builtin, + round-builtin, + intern-builtin, + unichr-builtin, + map-builtin-not-iterating, + zip-builtin-not-iterating, + range-builtin-not-iterating, + filter-builtin-not-iterating, + using-cmp-argument, + eq-without-hash, + div-method, + idiv-method, + rdiv-method, + exception-message-attribute, + invalid-str-codec, + sys-max-int, + bad-python3-import, + deprecated-string-function, + deprecated-str-translate-call, + deprecated-itertools-function, + deprecated-types-field, + next-method-defined, + dict-items-not-iterating, + dict-keys-not-iterating, + dict-values-not-iterating, + deprecated-operator-function, + deprecated-urllib-function, + xreadlines-attribute, + deprecated-sys-function, + exception-escape, + comprehension-escape, + C0114, + C0115, + C0116, + C1801, + R0903, + R1705, + E1701, + W0511 + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'error', 'warning', 'refactor', and 'convention' +# which contain the number of messages in each category, as well as 'statement' +# which is the total number of statements analyzed. This score is used by the +# global evaluation report (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=colorized + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +#variable-rgx= + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + + +[STRING] + +# This flag controls whether the implicit-str-concat-in-sequence should +# generate a warning on implicit string concatenation in sequences defined over +# several lines. +check-str-concat-over-line-jumps=no + + +[LOGGING] + +# Format style used to check logging format string. `old` means using % +# formatting, `new` is for `{}` formatting,and `fstr` is for f-strings. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma, + dict-separator + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled). +ext-import-graph= + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled). +import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=BaseException diff --git a/README.md b/README.md new file mode 100644 index 0000000..fb66c12 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Searx instances list diff --git a/requirements-update.txt b/requirements-update.txt new file mode 100644 index 0000000..dd2a1ec --- /dev/null +++ b/requirements-update.txt @@ -0,0 +1,3 @@ +httpx==0.11.0 +GitPython==3.0.5 +python-editor==1.0.4 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d134d7e --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pyyaml==5.1.2 diff --git a/searxinstances/__init__.py b/searxinstances/__init__.py new file mode 100644 index 0000000..d4116f6 --- /dev/null +++ b/searxinstances/__init__.py @@ -0,0 +1,3 @@ +from . import model + +__all__ = model.__all__ diff --git a/searxinstances/__main__.py b/searxinstances/__main__.py new file mode 100644 index 0000000..dc58e26 --- /dev/null +++ b/searxinstances/__main__.py @@ -0,0 +1,168 @@ +import argparse +import re +import sys +import tempfile +import subprocess + +import httpx +import rfc3986 +import editor + +from . import model + + +TITLE_RE = re.compile('(add|remove|delete|del)[ ]+(.+)', re.IGNORECASE) + + +def normalize_url(url): + if url.startswith('http://'): + return None + + if not url.startswith('https://'): + url = 'https://' + url + + try: + return rfc3986.normalize_uri(url) + except Exception: + return None + + +def load_requests(issue_number): + requests = [] + with httpx.Client() as client: + response = client.get('https://api.github.com/repos/dalf/searx-instances/issues?state=open') + rjson = response.json() + for issue in rjson: + if issue_number is not None and issue.get('number') != issue_number: + continue + if len(list(filter(lambda label: label.get('name') == 'instance', issue['labels']))): + rtitle = re.search(TITLE_RE, issue.get('title')) + issue_number = issue.get('number') + command = rtitle.group(1).lower() + url = normalize_url(rtitle.group(2)) + requests.append((issue_number, command, url, issue)) + return requests + + +def format_exception(ex): + result = "" + if ex is not None: + msg = str(ex) + for line in msg.splitlines(): + result += f"\n# {line}" + return result + + +def apply_add_request(instance_list, instance_url, issue): + tmp_instance_list = model.InstanceList() + new_instance = model.Instance(False) + tmp_instance_list[instance_url] = new_instance + + tmp_yml = model.yaml_dump(tmp_instance_list) + tmp_yml += f"#\n# {issue.get('title')}\n#" \ + + f"\n# Issue: #{issue.get('number')}\n"\ + + f"# User: @{issue.get('user').get('login')}\n" + for line in issue.get('body').splitlines(): + tmp_yml += f"# {line}\n" + edit = True + valid = False + error = "" + while edit: + tmp_yml = editor.edit(contents=tmp_yml.encode('utf-8') \ + + format_exception(error).encode('utf-8')).decode('utf-8') + + # no content: stop + if tmp_yml.strip() == '': + edit = False + error = None + continue + + try: + # parse yaml + tmp_instance_list = model.yaml_load(tmp_yml) + except Exception as ex: + error = ex + edit = True + continue + + # add new yaml + new_instance_list = instance_list.copy() + for url, instance in tmp_instance_list.items(): + try: + new_instance_list[url] = instance + except ValueError as ex: + error = ex + edit = True + break + if error is not None: + continue + + # done + valid = True + edit = False + + if valid: + # update + for url, instance in tmp_instance_list.items(): + instance_list[url] = instance + + # commit + + + +def apply_remove_request(instance_list, instance_url): + del instance_list[instance_url] + + +def apply_requests(instance_list, requests): + for request in requests: + print('Issue', request[0]) + if request[1] in ['add']: + apply_add_request(instance_list, request[2], request[3]) + elif request[1] in ['remove', 'delete', 'del']: + apply_remove_request(instance_list, request[2]) + + +def apply_change(issue_number): + requests = load_requests(issue_number) + instance_list = model.load() + apply_requests(instance_list, requests) + model.save(instance_list) + + +def run_instance_diff(content_after): + with tempfile.NamedTemporaryFile(delete=False) as tmpfile: + tmpfile.write(content_after) + tmpfile.flush() + subprocess.Popen(['diff', tmpfile.name, model.FILENAME]) + + +def check(): + print(f'Checking {model.FILENAME}') + with open(model.FILENAME, 'r') as input_file: + content = input_file.read() + instance_list = model.yaml_load(content) + content_after = model.yaml_dump(instance_list) + if content == content_after: + print('OK') + else: + print('ERROR: The file is not normalized') + run_instance_diff(content_after.encode('utf-8')) + sys.exit(1) + + +if __name__ == "__main__": + PARSER = argparse.ArgumentParser(description='Update the instance list according to the github issues.') + PARSER.add_argument('--check', action='store_true', + help='Check instances.yml syntax', + default=False) + PARSER.add_argument('--issue', '-i', + type=int, nargs='?', dest='issue', + help='Issue number to process, by default all', + default=None) + + ARGS = PARSER.parse_args() + if ARGS.check: + check() + else: + apply_change(ARGS.issue) diff --git a/searxinstances/__version__.py b/searxinstances/__version__.py new file mode 100644 index 0000000..2d03df7 --- /dev/null +++ b/searxinstances/__version__.py @@ -0,0 +1,3 @@ +__title__ = "searxinstances" +__description__ = "Searx instances." +__version__ = "0.0.1" diff --git a/instances.yml b/searxinstances/instances.yml similarity index 100% rename from instances.yml rename to searxinstances/instances.yml diff --git a/searxstats/list/model.py b/searxinstances/model.py similarity index 87% rename from searxstats/list/model.py rename to searxinstances/model.py index 60580a9..3664366 100644 --- a/searxstats/list/model.py +++ b/searxinstances/model.py @@ -1,3 +1,4 @@ +from os.path import realpath, dirname from collections import OrderedDict import json import inspect @@ -10,7 +11,7 @@ from yaml import Loader, Dumper -# https://bugs.python.org/issue19438 +# Declare NoneType (see https://bugs.python.org/issue19438) NoneType = type(None) # Model @@ -44,8 +45,12 @@ def __init__(self, safe=None, comments=None, additional_urls=None): raise ValueError('safe is not a bool') if not isinstance(comments, (list, NoneType)): raise ValueError('comments is not a list') - if not isinstance(additional_urls, AdditionalUrlList): + if not isinstance(additional_urls, (AdditionalUrlList, NoneType)): raise ValueError('additional_urls is not a AdditionalUrlList instance') + if comments is None: + comments = [] + if additional_urls is None: + additional_urls = AdditionalUrlList() # assign self.safe = safe self.comments = comments @@ -102,7 +107,7 @@ def __setitem__(self, url: str, instance: Instance): for new_url in new_urls: new_url_n = str(rfc3986.normalize_uri(new_url)) if new_url_n != new_url: - raise ValueError(f'{new_url} should be normalized to {new_url_n}') + raise ValueError(f'{new_url} should be normalized to {new_url_n}, main URL {url}') # update super().__setitem__(url, instance) @@ -174,8 +179,18 @@ def ignore_aliases(self, data): ILLoader.add_path_resolver('!AdditionalUrlList', [None, 'additional_urls'], yaml.MappingNode) # Storage +FILENAME = realpath(dirname(realpath(__file__))) + '/instances.yml' + + +def yaml_dump(instance_list: InstanceList) -> str: + return yaml.dump(instance_list, Dumper=ILDumper, width=240, allow_unicode=True) + + +def yaml_load(content: str) -> InstanceList: + instance_list = yaml.load(content, Loader=ILLoader) + assert isinstance(instance_list, InstanceList) + return instance_list -FILENAME = 'instances.yml' def load(filename: str = FILENAME) -> InstanceList: with open(filename, 'r') as input_file: @@ -183,10 +198,11 @@ def load(filename: str = FILENAME) -> InstanceList: assert isinstance(instance_list, InstanceList) return instance_list + def save(instance_list: InstanceList, filename: str = FILENAME): - output_content = yaml.dump(instance_list, Dumper=ILDumper, width=240, allow_unicode=True) + output_content = yaml_dump(instance_list) with open(filename, 'w') as output_file: output_file.write(output_content) -__all__ = ['InstanceList', 'Instance', 'AdditionalUrlList', 'load', 'save'] +__all__ = ['InstanceList', 'Instance', 'AdditionalUrlList', 'yaml_dump', 'yaml_load', 'load', 'save', 'FILENAME'] diff --git a/searxinstances/utils/__init__.py b/searxinstances/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/searxstats/list/import_rst.py b/searxinstances/utils/import_rst.py similarity index 94% rename from searxstats/list/import_rst.py rename to searxinstances/utils/import_rst.py index 2e9b68e..c987002 100644 --- a/searxstats/list/import_rst.py +++ b/searxinstances/utils/import_rst.py @@ -2,7 +2,7 @@ from urllib.parse import urlparse import re import httpx -from . import model +from .. import model SEARX_INSTANCES_URL = 'https://raw.githubusercontent.com/asciimoo/searx/master/docs/user/public_instances.rst' AFTER_ALIVE_AND_RUNNING = re.compile('Alive and running(.*)Running with an incorrect SSL certificate',\ @@ -99,12 +99,11 @@ async def import_instance(instance_list, text, section_comment): # add it aurls[url] = label # + instance = model.Instance(None, instance_comments, aurls) if main_url in instance_list: - print('duplicate found ', main_url) + print('duplicate found ', main_url, instance) else: - if len(aurls) > 0: - print(main_url, aurls) - instance_list[main_url] = model.Instance(None, instance_comments, aurls) + instance_list[main_url] = instance diff --git a/searxstats/list/parse.py b/searxstats/list/parse.py deleted file mode 100644 index 5c09bf9..0000000 --- a/searxstats/list/parse.py +++ /dev/null @@ -1,77 +0,0 @@ -import argparse -import re - -import httpx -import rfc3986 - -from . import model - - -TITLE_RE = re.compile('(add|remove|delete|del)[ ]+(.+)', re.IGNORECASE) - - -def normalize_url(url): - if url.startswith('http://'): - return None - - if not url.startswith('https://'): - url = 'https://' + url - - try: - return rfc3986.normalize_uri(url) - except Exception: - return None - - -def load_requests(issue_number): - requests = [] - with httpx.Client() as client: - response = client.get('https://api.github.com/repos/dalf/searx-instances/issues?state=open') - rjson = response.json() - for issue in rjson: - if issue_number is not None and issue.get('number') != issue_number: - continue - if len(list(filter(lambda label: label.get('name') == 'instance', issue['labels']))): - rtitle = re.search(TITLE_RE, issue.get('title')) - issue_number = issue.get('number') - command = rtitle.group(1).lower() - url = normalize_url(rtitle.group(2)) - requests.append((issue_number, command, url)) - return requests - - -def apply_add_request(instance_list, url): - new_instance = model.Instance(False, ['test']) - instance_list[url] = new_instance - - -def apply_remove_request(instance_list, url): - del instance_list[url] - - -def apply_requests(instance_list, requests): - for request in requests: - if request[1] in ['add']: - apply_add_request(instance_list, request[2]) - elif request[1] in ['remove', 'delete', 'del']: - apply_remove_request(instance_list, request[2]) - - -def main(issue_number): - requests = load_requests(issue_number) - instance_list = model.load() - apply_requests(instance_list, requests) - model.save(instance_list) - # print(instance_list.json_dump()) - # git commit - - -if __name__ == "__main__": - PARSER = argparse.ArgumentParser(description='Update the instance list according to the github issues.') - PARSER.add_argument('--issue', '-i', - type=str, nargs='?', dest='issue', - help='Issue number to process, by default all', - default=None) - - ARGS = PARSER.parse_args() - main(ARGS.issue) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..2e460c2 --- /dev/null +++ b/setup.py @@ -0,0 +1,68 @@ +import codecs +import os +import re + +from setuptools import find_namespace_packages, setup + +here = os.path.abspath(os.path.dirname(__file__)) + + +def read(*parts): + # intentionally *not* adding an encoding option to open, See: + # https://github.com/pypa/virtualenv/issues/201#issuecomment-3145690 + with codecs.open(os.path.join(here, *parts), 'r') as fp: + return fp.read() + + +def find_version(*file_paths): + version_file = read(*file_paths) + version_match = re.search( + r"^__version__ = ['\"]([^'\"]*)['\"]", + version_file, + re.M, + ) + if version_match: + return version_match.group(1) + + raise RuntimeError("Unable to find version string.") + + +long_description = read('README.md') +requirements = map(str.strip, open('requirements.txt').readlines()) +update_requirements = map(str.strip, open('requirements-update.txt').readlines()) + +setup( + name="searxinstances", + version=find_version("searxinstances", "__version__.py"), + description="Searx instances.", + long_description=long_description, + + license='GNU Affero General Public License', + classifiers=[ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: GNU Affero General Public License v3", + "Topic :: Internet", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Internet :: WWW/HTTP :: Indexing/Search", + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + ], + url='https://github.com/dalf/searx-instances', + keywords='searx', + + author='Alexandre Flament', + author_email='alex.andre@al-f.net', + + packages=find_namespace_packages(include=['searxinstances', 'searxinstances.*']), + entry_points={ + 'console_scripts': [ + 'update-searxinstances=searxinstances.__main__:main', + ], + }, + zip_safe=False, + python_requires='>=3.5.*', + install_requires=requirements, + extras_require={ + 'update': update_requirements + }, +)