Skip to content

Commit

Permalink
[enh] searxinstances.update: normalize URL, add tests
Browse files Browse the repository at this point in the history
- add a Makefile
- because fmoo/python-editor#29 , a forked has been added in this repo.
- compliance to pylint rules
  • Loading branch information
dalf committed Feb 4, 2020
1 parent 3a1b09f commit 619d2e8
Show file tree
Hide file tree
Showing 12 changed files with 274 additions and 31 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/check.yml → .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ jobs:
- uses: actions/setup-python@v1
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
make install-all
- name: Check
run: |
python -m searxinstances.check
make check
- name: QA
run: |
make qa
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
.coverage
.vscode
.pytest_cache/

*~
__pycache__/
Expand All @@ -15,4 +17,5 @@ dist/
*.egg-info/

cache
htmlcov
ve
19 changes: 19 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))

install:
python -m pip install --upgrade pip
pip install -r requirements.txt

install-update:
pip install -r requirements-update.txt

install-all: install install-update
pip install -r requirements-dev.txt

check:
python -m searxinstances.check

qa:
flake8 --max-line-length=120 searxinstances tests
pylint searxinstances tests
python -m pytest --cov-report html --cov=searxinstances tests -vv
5 changes: 5 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pylint==2.4.4
pytest==5.2.2
pytest-asyncio==0.10.0
pytest-cov==2.8.1
flake8==3.7.9
1 change: 0 additions & 1 deletion requirements-update.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
httpx==0.11.0
GitPython==3.0.5
python-editor==1.0.4
1 change: 0 additions & 1 deletion searxinstances/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from . import model
26 changes: 21 additions & 5 deletions searxinstances/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,23 @@

# Model


def host_use_http(host):
tld = host.split('.')[-1]
# onion and i2p can't part of an IP address
return tld in ['onion', 'i2p']


def url_validation(url):
nurl = rfc3986.normalize_uri(url)
if nurl != url:
return False, f'URL must be normalized to {nurl}'
purl = rfc3986.urlparse(nurl)
if purl.scheme not in ['http', 'https']:
return False, 'protocol is not https neither http'
if not(
(purl.scheme == 'https' and not host_use_http(purl.host)) or
(purl.scheme == 'http' and host_use_http(purl.host))
):
return False, 'the protocol is neither https nor http with an .onion/.i2p TLD'
if purl.query is not None:
return False, 'no query in the URL'
if purl.fragment is not None:
Expand Down Expand Up @@ -153,9 +163,10 @@ def yaml_constructor(loader: yaml.Loader, node):

# JSON serialization


class ObjectEncoder(json.JSONEncoder):

def default(self, o): # pylint: disable=E0202
def default(self, o): # pylint: disable=E0202
if hasattr(o, "to_json"):
return self.default(o.to_json())
elif hasattr(o, "__dict__"):
Expand All @@ -175,15 +186,20 @@ def default(self, o): # pylint: disable=E0202
return self.default(filtered_obj)
return o


# YAML (de)serialization

class ILLoader(Loader): # pylint: disable=too-many-ancestors
# pylint: disable=too-many-ancestors
class ILLoader(Loader):
pass

class ILDumper(Dumper): # pylint: disable=too-many-ancestors

# pylint: disable=too-many-ancestors
class ILDumper(Dumper):
def ignore_aliases(self, data):
return True


for c in [InstanceList, Instance, AdditionalUrlList]:
ILDumper.add_representer(c, c.yaml_representer)
ILLoader.add_constructor(c.yaml_tag, c.yaml_constructor)
Expand Down
64 changes: 46 additions & 18 deletions searxinstances/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,34 @@
import re
import os.path

import editor
import git
import httpx
import rfc3986
import idna

from . import model
from .utils import editor


class UserRequest:

__slots__ = ['request_id', 'request_url', 'user', 'command', 'url', 'message']
user_request_name = None

def __init__(self, request_id: str, request_url: str, user: str, url: str, message: str): # pylint: disable=too-many-arguments
# pylint: disable=too-many-arguments
def __init__(self, request_id: str, request_url: str, user: str, url: str, message: str):
self.request_id = request_id
self.request_url = request_url
self.user = user
self.url = url
self.message = message

def execute(self, instance_list: model.InstanceList, instance_list_update: model.InstanceList): # pylint: disable=no-self-use
# pylint: disable=no-self-use
def execute(self, instance_list: model.InstanceList, instance_list_update: model.InstanceList):
raise RuntimeError('Abstract method')

def get_content(self, existing_instance_list) -> str: # pylint: disable=no-self-use
# pylint: disable=no-self-use
def get_content(self, existing_instance_list) -> str:
raise RuntimeError('Not implemented')

def get_generic_content(self) -> str:
Expand Down Expand Up @@ -180,22 +184,10 @@ def __exit__(self, exc_type, exc_value, exc_traceback):


def get_git_repo():
repo_path = os.path.realpath(os.path.dirname(os.path.realpath(__file__))+ '/..')
repo_path = os.path.realpath(os.path.dirname(os.path.realpath(__file__)) + '/..')
repo = git.Repo(repo_path)
return repo

def normalize_url(url):
if url.startswith('http://'):
return None

if not url.startswith('https://'):
url = 'https://' + url

try:
return rfc3986.normalize_uri(url)
except Exception:
return None


def add_comment_prefix(message, prefix='# '):
result = ""
Expand Down Expand Up @@ -278,6 +270,42 @@ def run_user_request_list(instance_list: model.InstanceList, user_request_list):
}


def normalize_url(url):
purl = rfc3986.urlparse(url)

if purl.scheme is None and purl.host is None and purl.path is not None:
# no protocol, no // : it is a path according to the rfc3986
# but we know it is a host
purl = rfc3986.urlparse('//' + url)

if purl.scheme is None:
# The url starts with //
# Add https (or http for .onion or i2p TLD)
if model.host_use_http(purl.host):
purl = purl.copy_with(scheme='http')
else:
purl = purl.copy_with(scheme='https')

# first normalization
# * idna encoding to avoid misleading host
# * remove query and fragment
# * remove empty path
purl = purl.copy_with(scheme=purl.scheme.lower(),
host=idna.encode(purl.host).decode('utf-8').lower(),
path='' if purl.path == '/' else purl.path,
query=None,
fragment=None)

# only https (exception: http for .onion and .i2p TLD)
if (purl.scheme == 'https' and not model.host_use_http(purl.host)) or\
(purl.scheme == 'http' and model.host_use_http(purl.host)):
# normalize the URL
return rfc3986.normalize_uri(purl.geturl())

#
return None


def load_user_request_list_from_github(github_issue_list) -> list:
user_request_list = []
with httpx.Client() as client:
Expand Down Expand Up @@ -326,7 +354,7 @@ def load_user_request_list():
user_request_list += load_user_request_list_from_github(args.github_issue_list)
if len(args.add_instances) > 0:
for url in args.add_instances:
user_request_list.append(UserRequestAdd(None, None, None, url, ''))
user_request_list.append(UserRequestAdd(None, None, None, normalize_url(url), ''))
if len(args.delete_instances) > 0:
for url in args.delete_instances:
user_request_list.append(UserRequestDelete(None, None, None, url, ''))
Expand Down
147 changes: 147 additions & 0 deletions searxinstances/utils/editor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# Programmatically open an editor, capture the result.
#
# mostly copy paste from
# https://github.com/fmoo/python-editor (including PR 15 and 16)
# https://github.com/jpscaletti/texteditor
# both use an Apache 2 license.
# both use distutils.spawn.find_executable which is a problem with pytest
# see https://github.com/fmoo/python-editor/issues/29


import sys
import os.path
import subprocess
import tempfile
import shlex
from shutil import which


__all__ = [
'edit',
'get_editor',
'EditorError',
]


MACOS_EDITORS = [
# The -t flag make MacOS open the default *editor* for the file
"open -t"
]

COMMON_EDITORS = ["subl", "vscode", "atom"]

# In some linuxes `vim` and/or `emacs` come preinstalled, but we don't want
# to throw you to their unfamiliar UI unless there are other options.
# If you are using them you probably have set your $EDITOR variable anyway.
LINUX_EDITORS = COMMON_EDITORS + ["kate", "geany", "gedit", "nano", "editor"]

WINDOWS_EDITORS = COMMON_EDITORS + ["notepad++.exe", "notepad.exe"]

EDITORS = {"darwin": MACOS_EDITORS, "linux": LINUX_EDITORS, "win": WINDOWS_EDITORS}


class EditorError(RuntimeError):
pass


def get_default_editors():
sys_platform = sys.platform

for platform in EDITORS:
if sys_platform.startswith(platform):
return EDITORS[platform]

return COMMON_EDITORS


def get_editor_args(editor):
if editor in ['vim', 'gvim', 'vim.basic', 'vim.tiny']:
return ['-f', '-o']

elif editor == 'emacs':
return ['-nw']

elif editor == 'gedit':
return ['-w', '--new-window']

elif editor == 'nano':
return ['-R']

elif editor == 'code':
return ['-w', '-n']

else:
return []


def get_editor():
# Get the editor from the environment. Prefer VISUAL to EDITOR
editor = os.environ.get('VISUAL') or os.environ.get('EDITOR')
if editor:
return editor

# None found in the environment. Fallback to platform-specific defaults.
for editor in get_default_editors():
path = which(editor)
if path is not None:
return path

raise EditorError("Unable to find a viable editor on this system."
"Please consider setting your $EDITOR variable")


def get_tty_filename():
if sys.platform == 'win32':
return 'CON:'
return '/dev/tty'


def edit(filename=None, contents=None, use_tty=None, suffix=''):
# editor
editor = get_editor()

# filename
tmp = None
if filename is None:
tmp = tempfile.NamedTemporaryFile(suffix=suffix)
filename = tmp.name

try:
# write contents
if contents is not None:
if isinstance(contents, str):
contents = contents.encode('utf-8')

with open(filename, mode='wb') as file_stream:
file_stream.write(contents)

# args
args = shlex.split(editor) +\
get_editor_args(os.path.basename(os.path.realpath(editor))) +\
[filename]

# stdout
stdout = None
if use_tty is None and sys.stdin.isatty() and not sys.stdout.isatty():
stdout = open(get_tty_filename(), 'wb')

# call editor
proc = subprocess.Popen(args, close_fds=True, stdout=stdout)
proc.communicate()

# read result
result = None
if os.path.isfile(filename):
with open(filename, mode='rb') as file_stream:
result = file_stream.read()

# return result
return result

finally:
# delete the temporary file for security reasons
if tmp is not None:
try:
os.remove(tmp.name)
except OSError:
pass
Loading

0 comments on commit 619d2e8

Please sign in to comment.