diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index bf850c975a..d4441fff0c 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -35,7 +35,7 @@ jobs: - {name: Typing, python: '3.10', os: ubuntu-latest, tox: typing} steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} cache: 'pip' @@ -46,7 +46,7 @@ jobs: pip install -U setuptools python -m pip install -U pip - name: cache mypy - uses: actions/cache@v3.0.2 + uses: actions/cache@v3.0.4 with: path: ./.mypy_cache key: mypy|${{ matrix.python }}|${{ hashFiles('setup.cfg') }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f0bb833394..0fe9e58840 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,12 +3,12 @@ ci: autoupdate_schedule: monthly repos: - repo: https://github.com/asottile/pyupgrade - rev: v2.32.0 + rev: v2.37.1 hooks: - id: pyupgrade args: ["--py37-plus"] - repo: https://github.com/asottile/reorder_python_imports - rev: v3.1.0 + rev: v3.8.1 hooks: - id: reorder-python-imports name: Reorder Python imports (src, tests) @@ -21,7 +21,7 @@ repos: args: ["--application-directories", "examples"] additional_dependencies: ["setuptools>60.9"] - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 22.6.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 @@ -36,7 +36,7 @@ repos: hooks: - id: pip-compile-multi-verify - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 + rev: v4.3.0 hooks: - id: fix-byte-order-marker - id: trailing-whitespace diff --git a/CHANGES.rst b/CHANGES.rst index 3d4de804db..6cadc40bf5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,9 +3,25 @@ Version 2.2.0 ------------- -Unreleased - -- Extracted utility functions from wsgi.py +- Add MarkupSafe as a dependency and use it to escape values when + rendering HTML. :issue:`2419` +- Added the ``werkzeug.debug.preserve_context`` mechanism for + restoring context-local data for a request when running code in the + debug console. :pr:`2439` +- Fix compatibility with Python 3.11 by ensuring that ``end_lineno`` + and ``end_col_offset`` are present on AST nodes. :issue:`2425` +- Add a new faster matching router based on a state + machine. :pr:`2433` +- Names within options headers are always converted to lowercase. This + matches :rfc:`6266` that the case is not relevant. :issue:`2442` +- ``AnyConverter`` validates the value passed for it when building + URLs. :issue:`2388` +- The debugger shows enhanced error locations in tracebacks in Python + 3.11. :issue:`2407` +- Extracted get_content_length, get_query_string, get_path_info + utility functions from wsgi.py. :pr:`2415` +- Extracted is_resource_modified and parse_cookie from http.py + to sansio/http.py. :issue:`2408` Version 2.1.2 diff --git a/docs/levels.rst b/docs/levels.rst index f36f8b09d3..a07fd86d9f 100644 --- a/docs/levels.rst +++ b/docs/levels.rst @@ -19,7 +19,7 @@ user with the name entered. .. code-block:: python - from html import escape + from markupsafe import escape from werkzeug.wrappers import Request, Response @Request.application @@ -38,7 +38,7 @@ user with the name entered. Alternatively the same application could be used without request and response objects but by taking advantage of the parsing functions werkzeug provides:: - from html import escape + from markupsafe import escape from werkzeug.formparser import parse_form_data def hello_world(environ, start_response): diff --git a/docs/local.rst b/docs/local.rst index e4651a44ee..015b0e3e97 100644 --- a/docs/local.rst +++ b/docs/local.rst @@ -1,77 +1,110 @@ -============== Context Locals ============== .. module:: werkzeug.local -Sooner or later you have some things you want to have in every single view -or helper function or whatever. In PHP the way to go are global -variables. However, that isn't possible in WSGI applications without a -major drawback: As soon as you operate on the global namespace your -application isn't thread-safe any longer. +You may find that you have some data during each request that you want +to use across functions. Instead of passing these as arguments between +every function, you may want to access them as global data. However, +using global variables in Python web applications is not thread safe; +different workers might interfere with each others' data. + +Instead of storing common data during a request using global variables, +you must use context-local variables instead. A context local is +defined/imported globally, but the data it contains is specific to the +current thread, asyncio task, or greenlet. You won't accidentally get +or overwrite another worker's data. + +The current approach for storing per-context data in Python is the +:class:`contextvars` module. Context vars store data per thread, async +task, or greenlet. This replaces the older :class:`threading.local` +which only handled threads. -The Python standard library has a concept called "thread locals" (or thread-local -data). A thread local is a global object in which you can put stuff in and get back -later in a thread-safe and thread-specific way. That means that whenever you set -or get a value on a thread local object, the thread local object checks in which -thread you are and retrieves the value corresponding to your thread (if one exists). -So, you won't accidentally get another thread's data. +Werkzeug provides wrappers around :class:`~contextvars.ContextVar` to +make it easier to work with. -This approach, however, has a few disadvantages. For example, besides threads, -there are other types of concurrency in Python. A very popular one -is greenlets. Also, whether every request gets its own thread is not -guaranteed in WSGI. It could be that a request is reusing a thread from -a previous request, and hence data is left over in the thread local object. -Werkzeug provides its own implementation of local data storage called `werkzeug.local`. -This approach provides a similar functionality to thread locals but also works with -greenlets. +Proxy Objects +============= -Here's a simple example of how one could use werkzeug.local:: +:class:`LocalProxy` allows treating a context var as an object directly +instead of needing to use and check +:meth:`ContextVar.get() `. If the context +var is set, the local proxy will look and behave like the object the var +is set to. If it's not set, a ``RuntimeError`` is raised for most +operations. - from werkzeug.local import Local, LocalManager +.. code-block:: python - local = Local() - local_manager = LocalManager([local]) + from contextvars import ContextVar + from werkzeug.local import LocalProxy - def application(environ, start_response): - local.request = request = Request(environ) + _request_var = ContextVar("request") + request = LocalProxy(_request_var) + + from werkzeug.wrappers import Request + + @Request.application + def app(r): + _request_var.set(r) + check_auth() ... - application = local_manager.make_middleware(application) + from werkzeug.exceptions import Unauthorized -This binds the request to `local.request`. Every other piece of code executed -after this assignment in the same context can safely access local.request and -will get the same request object. The `make_middleware` method on the local -manager ensures that all references to the local objects are cleared up after -the request. + def check_auth(): + if request.form["username"] != "admin": + raise Unauthorized() -The same context means the same greenlet (if you're using greenlets) in -the same thread and same process. +Accessing ``request`` will point to the specific request that each +server worker is handling. You can treat ``request`` just like an actual +``Request`` object. -If a request object is not yet set on the local object and you try to -access it, you will get an `AttributeError`. You can use `getattr` to avoid -that:: +``bool(proxy)`` will always return ``False`` if the var is not set. If +you need access to the object directly instead of the proxy, you can get +it with the :meth:`~LocalProxy._get_current_object` method. - def get_request(): - return getattr(local, 'request', None) +.. autoclass:: LocalProxy + :members: _get_current_object -This will try to get the request or return `None` if the request is not -(yet?) available. -Note that local objects cannot manage themselves, for that you need a local -manager. You can pass a local manager multiple locals or add additionals -later by appending them to `manager.locals` and every time the manager -cleans up it will clean up all the data left in the locals for this -context. +Stacks and Namespaces +===================== -.. autofunction:: release_local +:class:`~contextvars.ContextVar` stores one value at a time. You may +find that you need to store a stack of items, or a namespace with +multiple attributes. A list or dict can be used for these, but using +them as context var values requires some extra care. Werkzeug provides +:class:`LocalStack` which wraps a list, and :class:`Local` which wraps a +dict. -.. autoclass:: LocalManager - :members: cleanup, make_middleware, middleware +There is some amount of performance penalty associated with these +objects. Because lists and dicts are mutable, :class:`LocalStack` and +:class:`Local` need to do extra work to ensure data isn't shared between +nested contexts. If possible, design your application to use +:class:`LocalProxy` around a context var directly. .. autoclass:: LocalStack - :members: push, pop, top + :members: push, pop, top, __call__ -.. autoclass:: LocalProxy - :members: _get_current_object +.. autoclass:: Local + :members: __call__ + + +Releasing Data +============== + +A previous implementation of ``Local`` used internal data structures +which could not be cleaned up automatically when each context ended. +Instead, the following utilities could be used to release the data. + +.. warning:: + + This should not be needed with the modern implementation, as the + data in context vars is automatically managed by Python. It is kept + for compatibility for now, but may be removed in the future. + +.. autoclass:: LocalManager + :members: cleanup, make_middleware, middleware + +.. autofunction:: release_local diff --git a/docs/routing.rst b/docs/routing.rst index 6ad3564490..ca89a8221c 100644 --- a/docs/routing.rst +++ b/docs/routing.rst @@ -105,6 +105,10 @@ converters can be overridden or extended through :attr:`Map.converters`. .. autoclass:: UUIDConverter +If a custom converter can match a forward slash, ``/``, it should have +the attribute ``part_isolating`` set to ``False``. This will ensure +that rules using the custom converter are correctly matched. + Maps, Rules and Adapters ======================== @@ -127,6 +131,13 @@ Maps, Rules and Adapters :members: empty +Matchers +======== + +.. autoclass:: StateMachineMatcher + :members: + + Rule Factories ============== @@ -261,3 +272,34 @@ scheme and host, ``force_external=True`` is implied. url = adapter.build("comm") assert url == "ws://example.org/ws" + + +State Machine Matching +====================== + +The default matching algorithm uses a state machine that transitions +between parts of the request path to find a match. To understand how +this works consider this rule:: + + /resource/ + +Firstly this rule is decomposed into two ``RulePart``. The first is a +static part with a content equal to ``resource``, the second is +dynamic and requires a regex match to ``[^/]+``. + +A state machine is then created with an initial state that represents +the rule's first ``/``. This initial state has a single, static +transition to the next state which represents the rule's second +``/``. This second state has a single dynamic transition to the final +state which includes the rule. + +To match a path the matcher starts and the initial state and follows +transitions that work. Clearly a trial path of ``/resource/2`` has the +parts ``""``, ``resource``, and ``2`` which match the transitions and +hence a rule will match. Whereas ``/other/2`` will not match as there +is no transition for the ``other`` part from the initial state. + +The only diversion from this rule is if a ``RulePart`` is not +part-isolating i.e. it will match ``/``. In this case the ``RulePart`` +is considered final and represents a transition that must include all +the subsequent parts of the trial path. diff --git a/examples/plnt/sync.py b/examples/plnt/sync.py index b569dd9fd7..2a94cb1478 100644 --- a/examples/plnt/sync.py +++ b/examples/plnt/sync.py @@ -1,8 +1,8 @@ """Does the synchronization. Called by "manage-plnt.py sync".""" from datetime import datetime -from html import escape import feedparser +from markupsafe import escape from .database import Blog from .database import Entry diff --git a/requirements/dev.txt b/requirements/dev.txt index 3699907483..50e233eca4 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -8,33 +8,35 @@ -r docs.txt -r tests.txt -r typing.txt +build==0.8.0 + # via pip-tools cfgv==3.3.1 # via pre-commit -click==8.1.2 +click==8.1.3 # via # pip-compile-multi # pip-tools distlib==0.3.4 # via virtualenv -filelock==3.6.0 +filelock==3.7.1 # via # tox # virtualenv greenlet==1.1.2 ; python_version < "3.11" # via -r requirements/tests.in -identify==2.5.0 +identify==2.5.1 # via pre-commit -nodeenv==1.6.0 +nodeenv==1.7.0 # via pre-commit pep517==0.12.0 - # via pip-tools + # via build pip-compile-multi==2.4.5 # via -r requirements/dev.in -pip-tools==6.6.0 +pip-tools==6.8.0 # via pip-compile-multi platformdirs==2.5.2 # via virtualenv -pre-commit==2.18.1 +pre-commit==2.20.0 # via -r requirements/dev.in pyyaml==6.0 # via pre-commit @@ -48,9 +50,9 @@ toml==0.10.2 # tox toposort==1.7 # via pip-compile-multi -tox==3.25.0 +tox==3.25.1 # via -r requirements/dev.in -virtualenv==20.14.1 +virtualenv==20.15.1 # via # pre-commit # tox diff --git a/requirements/docs.txt b/requirements/docs.txt index 88f627909d..8238e785ff 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -7,19 +7,19 @@ # alabaster==0.7.12 # via sphinx -babel==2.10.1 +babel==2.10.3 # via sphinx -certifi==2021.10.8 +certifi==2022.6.15 # via requests -charset-normalizer==2.0.12 +charset-normalizer==2.1.0 # via requests -docutils==0.17.1 +docutils==0.18.1 # via sphinx idna==3.3 # via requests -imagesize==1.3.0 +imagesize==1.4.1 # via sphinx -jinja2==3.1.1 +jinja2==3.1.2 # via sphinx markupsafe==2.1.1 # via jinja2 @@ -31,15 +31,15 @@ pallets-sphinx-themes==2.0.2 # via -r requirements/docs.in pygments==2.12.0 # via sphinx -pyparsing==3.0.8 +pyparsing==3.0.9 # via packaging pytz==2022.1 # via babel -requests==2.27.1 +requests==2.28.1 # via sphinx snowballstemmer==2.2.0 # via sphinx -sphinx==4.5.0 +sphinx==5.0.2 # via # -r requirements/docs.in # pallets-sphinx-themes @@ -61,5 +61,5 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -urllib3==1.26.9 +urllib3==1.26.10 # via requests diff --git a/requirements/tests.txt b/requirements/tests.txt index c4171ff284..689d8ba0e5 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -7,9 +7,9 @@ # attrs==21.4.0 # via pytest -cffi==1.15.0 +cffi==1.15.1 # via cryptography -cryptography==37.0.1 +cryptography==37.0.4 # via -r requirements/tests.in ephemeral-port-reserve==1.1.4 # via -r requirements/tests.in @@ -21,13 +21,13 @@ packaging==21.3 # via pytest pluggy==1.0.0 # via pytest -psutil==5.9.0 +psutil==5.9.1 # via pytest-xprocess py==1.11.0 # via pytest pycparser==2.21 # via cffi -pyparsing==3.0.8 +pyparsing==3.0.9 # via packaging pytest==7.1.2 # via @@ -36,9 +36,9 @@ pytest==7.1.2 # pytest-xprocess pytest-timeout==2.1.0 # via -r requirements/tests.in -pytest-xprocess==0.18.1 +pytest-xprocess==0.19.0 # via -r requirements/tests.in tomli==2.0.1 # via pytest -watchdog==2.1.7 +watchdog==2.1.9 # via -r requirements/tests.in diff --git a/requirements/typing.txt b/requirements/typing.txt index 0a8a7c942f..1f6de2c955 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -5,17 +5,17 @@ # # pip-compile-multi # -mypy==0.950 +mypy==0.961 # via -r requirements/typing.in mypy-extensions==0.4.3 # via mypy tomli==2.0.1 # via mypy -types-contextvars==2.4.5 +types-contextvars==2.4.7 # via -r requirements/typing.in -types-dataclasses==0.6.5 +types-dataclasses==0.6.6 # via -r requirements/typing.in -types-setuptools==57.4.14 +types-setuptools==62.6.1 # via -r requirements/typing.in -typing-extensions==4.2.0 +typing-extensions==4.3.0 # via mypy diff --git a/setup.py b/setup.py index 1126fea396..413ce2a72e 100644 --- a/setup.py +++ b/setup.py @@ -3,5 +3,6 @@ # Metadata goes in setup.cfg. These are here for GitHub's dependency graph. setup( name="Werkzeug", + install_requires=["MarkupSafe>=2.1.1"], extras_require={"watchdog": ["watchdog"]}, ) diff --git a/src/werkzeug/__init__.py b/src/werkzeug/__init__.py index 68216e5d26..82a2ebce25 100644 --- a/src/werkzeug/__init__.py +++ b/src/werkzeug/__init__.py @@ -3,4 +3,4 @@ from .wrappers import Request as Request from .wrappers import Response as Response -__version__ = "2.2.0.dev0" +__version__ = "2.2.0a1" diff --git a/src/werkzeug/debug/__init__.py b/src/werkzeug/debug/__init__.py index 49001e0e2d..bec43734a5 100644 --- a/src/werkzeug/debug/__init__.py +++ b/src/werkzeug/debug/__init__.py @@ -8,6 +8,8 @@ import time import typing as t import uuid +from contextlib import ExitStack +from contextlib import nullcontext from io import BytesIO from itertools import chain from os.path import basename @@ -225,8 +227,15 @@ class DebuggedApplication: from myapp import app app = DebuggedApplication(app, evalex=True) - The `evalex` keyword argument allows evaluating expressions in a - traceback's frame context. + The ``evalex`` argument allows evaluating expressions in any frame + of a traceback. This works by preserving each frame with its local + state. Some state, such as :doc:`local`, cannot be restored with the + frame by default. When ``evalex`` is enabled, + ``environ["werkzeug.debug.preserve_context"]`` will be a callable + that takes a context manager, and can be called multiple times. + Each context manager will be entered before evaluating code in the + frame, then exited again, so they can perform setup and cleanup for + each call. :param app: the WSGI application to run debugged. :param evalex: enable exception evaluation feature (interactive @@ -243,6 +252,9 @@ class DebuggedApplication: to `True`. :param pin_security: can be used to disable the pin based security system. :param pin_logging: enables the logging of the pin system. + + .. versionchanged:: 2.2 + Added the ``werkzeug.debug.preserve_context`` environ key. """ _pin: str @@ -264,6 +276,7 @@ def __init__( self.app = app self.evalex = evalex self.frames: t.Dict[int, t.Union[DebugFrameSummary, _ConsoleFrame]] = {} + self.frame_contexts: t.Dict[int, t.List[t.ContextManager[None]]] = {} self.request_key = request_key self.console_path = console_path self.console_init_func = console_init_func @@ -306,6 +319,11 @@ def debug_application( self, environ: "WSGIEnvironment", start_response: "StartResponse" ) -> t.Iterator[bytes]: """Run the application and conserve the traceback frames.""" + contexts: t.List[t.ContextManager[t.Any]] = [] + + if self.evalex: + environ["werkzeug.debug.preserve_context"] = contexts.append + app_iter = None try: app_iter = self.app(environ, start_response) @@ -320,6 +338,7 @@ def debug_application( for frame in tb.all_frames: self.frames[id(frame)] = frame + self.frame_contexts[id(frame)] = contexts is_trusted = bool(self.check_pin_trust(environ)) html = tb.render_debugger_html( @@ -344,14 +363,20 @@ def debug_application( environ["wsgi.errors"].write("".join(tb.render_traceback_text())) - def execute_command( + def execute_command( # type: ignore[return] self, request: Request, command: str, frame: t.Union[DebugFrameSummary, _ConsoleFrame], ) -> Response: """Execute a command in a console.""" - return Response(frame.eval(command), mimetype="text/html") + contexts = self.frame_contexts.get(id(frame), []) + + with ExitStack() as exit_stack: + for cm in contexts: + exit_stack.enter_context(cm) + + return Response(frame.eval(command), mimetype="text/html") def display_console(self, request: Request) -> Response: """Display a standalone shell.""" diff --git a/src/werkzeug/debug/console.py b/src/werkzeug/debug/console.py index f4a0e26a7d..69974d1235 100644 --- a/src/werkzeug/debug/console.py +++ b/src/werkzeug/debug/console.py @@ -1,10 +1,11 @@ import code import sys import typing as t -from html import escape +from contextvars import ContextVar from types import CodeType -from ..local import Local +from markupsafe import escape + from .repr import debug_repr from .repr import dump from .repr import helper @@ -12,7 +13,8 @@ if t.TYPE_CHECKING: import codeop # noqa: F401 -_local = Local() +_stream: ContextVar["HTMLStringO"] = ContextVar("werkzeug.debug.console.stream") +_ipy: ContextVar = ContextVar("werkzeug.debug.console.ipy") class HTMLStringO: @@ -64,26 +66,29 @@ class ThreadedStream: def push() -> None: if not isinstance(sys.stdout, ThreadedStream): sys.stdout = t.cast(t.TextIO, ThreadedStream()) - _local.stream = HTMLStringO() + + _stream.set(HTMLStringO()) @staticmethod def fetch() -> str: try: - stream = _local.stream - except AttributeError: + stream = _stream.get() + except LookupError: return "" - return stream.reset() # type: ignore + + return stream.reset() @staticmethod def displayhook(obj: object) -> None: try: - stream = _local.stream - except AttributeError: + stream = _stream.get() + except LookupError: return _displayhook(obj) # type: ignore + # stream._write bypasses escaping as debug_repr is # already generating HTML for us. if obj is not None: - _local._current_ipy.locals["_"] = obj + _ipy.get().locals["_"] = obj stream._write(debug_repr(obj)) def __setattr__(self, name: str, value: t.Any) -> None: @@ -94,9 +99,10 @@ def __dir__(self) -> t.List[str]: def __getattribute__(self, name: str) -> t.Any: try: - stream = _local.stream - except AttributeError: - stream = sys.__stdout__ + stream = _stream.get() + except LookupError: + stream = sys.__stdout__ # type: ignore[assignment] + return getattr(stream, name) def __repr__(self) -> str: @@ -167,7 +173,7 @@ def runsource(self, source: str, **kwargs: t.Any) -> str: # type: ignore del self.buffer[:] finally: output = ThreadedStream.fetch() - return prompt + escape(source) + output + return f"{prompt}{escape(source)}{output}" def runcode(self, code: CodeType) -> None: try: @@ -208,7 +214,7 @@ def __init__( self._ipy = _InteractiveConsole(globals, locals) def eval(self, code: str) -> str: - _local._current_ipy = self._ipy + _ipy.set(self._ipy) old_sys_stdout = sys.stdout try: return self._ipy.runsource(code) diff --git a/src/werkzeug/debug/repr.py b/src/werkzeug/debug/repr.py index 3cc45d5d66..c0872f1808 100644 --- a/src/werkzeug/debug/repr.py +++ b/src/werkzeug/debug/repr.py @@ -9,9 +9,10 @@ import sys import typing as t from collections import deque -from html import escape from traceback import format_exception_only +from markupsafe import escape + missing = object() _paragraph_re = re.compile(r"(?:\r\n|\r|\n){2,}") RegexType = type(_paragraph_re) diff --git a/src/werkzeug/debug/tbtools.py b/src/werkzeug/debug/tbtools.py index 13f6a87a9b..ea90de9254 100644 --- a/src/werkzeug/debug/tbtools.py +++ b/src/werkzeug/debug/tbtools.py @@ -6,7 +6,8 @@ import sysconfig import traceback import typing as t -from html import escape + +from markupsafe import escape from ..utils import cached_property from .console import Console @@ -174,15 +175,19 @@ def _process_traceback( elif hide_value or hidden: continue - new_stack.append( - DebugFrameSummary( - filename=fs.filename, - lineno=fs.lineno, - name=fs.name, - locals=f.f_locals, - globals=f.f_globals, - ) - ) + frame_args: t.Dict[str, t.Any] = { + "filename": fs.filename, + "lineno": fs.lineno, + "name": fs.name, + "locals": f.f_locals, + "globals": f.f_globals, + } + + if hasattr(fs, "colno"): + frame_args["colno"] = fs.colno # type: ignore[attr-defined] + frame_args["end_colno"] = fs.end_colno # type: ignore[attr-defined] + + new_stack.append(DebugFrameSummary(**frame_args)) # The codeop module is used to compile code from the interactive # debugger. Hide any codeop frames from the bottom of the traceback. @@ -361,7 +366,7 @@ def info(self) -> t.Optional[str]: @cached_property def is_library(self) -> bool: return any( - self.filename.startswith(os.path.realpath(path)) + self.filename.startswith((path, os.path.realpath(path))) for path in sysconfig.get_paths().values() ) @@ -384,9 +389,21 @@ def render_line(line: str, cls: str) -> None: line = line.expandtabs().rstrip() stripped_line = line.strip() prefix = len(line) - len(stripped_line) + colno = getattr(self, "colno", 0) + end_colno = getattr(self, "end_colno", 0) + + if cls == "current" and colno and end_colno: + arrow = ( + f'\n{" " * prefix}' + f'{" " * (colno - prefix)}{"^" * (end_colno - colno)}' + ) + else: + arrow = "" + rendered_lines.append( f'
{" " * prefix}'
-                f"{escape(stripped_line) if stripped_line else ' '}
" + f"{escape(stripped_line) if stripped_line else ' '}" + f"{arrow if arrow else ''}" ) if lines: diff --git a/src/werkzeug/exceptions.py b/src/werkzeug/exceptions.py index d089942e31..013df72bd3 100644 --- a/src/werkzeug/exceptions.py +++ b/src/werkzeug/exceptions.py @@ -45,7 +45,9 @@ def application(request): """ import typing as t from datetime import datetime -from html import escape + +from markupsafe import escape +from markupsafe import Markup from ._internal import _get_environ @@ -101,7 +103,7 @@ def get_description( else: description = self.description - description = escape(description).replace("\n", "
") + description = escape(description).replace("\n", Markup("
")) return f"

{description}

" def get_body( diff --git a/src/werkzeug/http.py b/src/werkzeug/http.py index 936990013f..066cfc13bb 100644 --- a/src/werkzeug/http.py +++ b/src/werkzeug/http.py @@ -16,13 +16,12 @@ from urllib.parse import unquote_to_bytes as _unquote from urllib.request import parse_http_list as _parse_list_header -from ._internal import _cookie_parse_impl from ._internal import _cookie_quote +from ._internal import _dt_as_utc from ._internal import _make_cookie_domain from ._internal import _to_bytes from ._internal import _to_str from ._internal import _wsgi_decoding_dance -from werkzeug._internal import _dt_as_utc if t.TYPE_CHECKING: import typing_extensions as te @@ -390,6 +389,9 @@ def parse_options_header( :param value: The header value to parse. + .. versionchanged:: 2.2 + Option names are always converted to lowercase. + .. versionchanged:: 2.1 The ``multiple`` parameter is deprecated and will be removed in Werkzeug 2.2. @@ -440,7 +442,7 @@ def parse_options_header( if not encoding: encoding = continued_encoding continued_encoding = encoding - option = unquote_header_value(option) + option = unquote_header_value(option).lower() if option_value is not None: option_value = unquote_header_value(option_value, option == "filename") @@ -1047,57 +1049,17 @@ def is_resource_modified( .. versionchanged:: 1.0.0 The check is run for methods other than ``GET`` and ``HEAD``. """ - if etag is None and data is not None: - etag = generate_etag(data) - elif data is not None: - raise TypeError("both data and etag given") - - unmodified = False - if isinstance(last_modified, str): - last_modified = parse_date(last_modified) - - # HTTP doesn't use microsecond, remove it to avoid false positive - # comparisons. Mark naive datetimes as UTC. - if last_modified is not None: - last_modified = _dt_as_utc(last_modified.replace(microsecond=0)) - - if_range = None - if not ignore_if_range and "HTTP_RANGE" in environ: - # https://tools.ietf.org/html/rfc7233#section-3.2 - # A server MUST ignore an If-Range header field received in a request - # that does not contain a Range header field. - if_range = parse_if_range_header(environ.get("HTTP_IF_RANGE")) - - if if_range is not None and if_range.date is not None: - modified_since: t.Optional[datetime] = if_range.date - else: - modified_since = parse_date(environ.get("HTTP_IF_MODIFIED_SINCE")) - - if modified_since and last_modified and last_modified <= modified_since: - unmodified = True - - if etag: - etag, _ = unquote_etag(etag) - etag = t.cast(str, etag) - - if if_range is not None and if_range.etag is not None: - unmodified = parse_etags(if_range.etag).contains(etag) - else: - if_none_match = parse_etags(environ.get("HTTP_IF_NONE_MATCH")) - if if_none_match: - # https://tools.ietf.org/html/rfc7232#section-3.2 - # "A recipient MUST use the weak comparison function when comparing - # entity-tags for If-None-Match" - unmodified = if_none_match.contains_weak(etag) - - # https://tools.ietf.org/html/rfc7232#section-3.1 - # "Origin server MUST use the strong comparison function when - # comparing entity-tags for If-Match" - if_match = parse_etags(environ.get("HTTP_IF_MATCH")) - if if_match: - unmodified = not if_match.is_strong(etag) - - return not unmodified + return _sansio_http.is_resource_modified( + http_range=environ.get("HTTP_RANGE"), + http_if_range=environ.get("HTTP_IF_RANGE"), + http_if_modified_since=environ.get("HTTP_IF_MODIFIED_SINCE"), + http_if_none_match=environ.get("HTTP_IF_NONE_MATCH"), + http_if_match=environ.get("HTTP_IF_MATCH"), + etag=etag, + data=data, + last_modified=last_modified, + ignore_if_range=ignore_if_range, + ) def remove_entity_headers( @@ -1190,29 +1152,15 @@ def parse_cookie( The ``cls`` parameter was added. """ if isinstance(header, dict): - header = header.get("HTTP_COOKIE", "") + cookie = header.get("HTTP_COOKIE", "") elif header is None: - header = "" - - # PEP 3333 sends headers through the environ as latin1 decoded - # strings. Encode strings back to bytes for parsing. - if isinstance(header, str): - header = header.encode("latin1", "replace") - - if cls is None: - cls = ds.MultiDict - - def _parse_pairs() -> t.Iterator[t.Tuple[str, str]]: - for key, val in _cookie_parse_impl(header): # type: ignore - key_str = _to_str(key, charset, errors, allow_none_charset=True) - - if not key_str: - continue - - val_str = _to_str(val, charset, errors, allow_none_charset=True) - yield key_str, val_str + cookie = "" + else: + cookie = header - return cls(_parse_pairs()) + return _sansio_http.parse_cookie( + cookie=cookie, charset=charset, errors=errors, cls=cls + ) def dump_cookie( @@ -1369,3 +1317,4 @@ def is_byte_range_valid( # circular dependencies from . import datastructures as ds +from .sansio import http as _sansio_http diff --git a/src/werkzeug/local.py b/src/werkzeug/local.py index e297f38362..16e3ce0d1b 100644 --- a/src/werkzeug/local.py +++ b/src/werkzeug/local.py @@ -5,6 +5,7 @@ from contextvars import ContextVar from functools import partial from functools import update_wrapper +from operator import attrgetter from .wsgi import ClosingIterator @@ -13,26 +14,16 @@ from _typeshed.wsgi import WSGIApplication from _typeshed.wsgi import WSGIEnvironment +T = t.TypeVar("T") F = t.TypeVar("F", bound=t.Callable[..., t.Any]) def release_local(local: t.Union["Local", "LocalStack"]) -> None: - """Releases the contents of the local for the current context. - This makes it possible to use locals without a manager. + """Release the data for the current context in a :class:`Local` or + :class:`LocalStack` without using a :class:`LocalManager`. - Example:: - - >>> loc = Local() - >>> loc.foo = 42 - >>> release_local(loc) - >>> hasattr(loc, 'foo') - False - - With this function one can release :class:`Local` objects as well - as :class:`LocalStack` objects. However it is not possible to - release data held by proxies that way, one always has to retain - a reference to the underlying local object in order to be able - to release it. + This should not be needed for modern use cases, and may be removed + in the future. .. versionadded:: 0.6.1 """ @@ -40,154 +31,204 @@ def release_local(local: t.Union["Local", "LocalStack"]) -> None: class Local: - __slots__ = ("_storage",) + """Create a namespace of context-local data. This wraps a + :class:`ContextVar` containing a :class:`dict` value. + + This may incur a performance penalty compared to using individual + context vars, as it has to copy data to avoid mutating the dict + between nested contexts. + + :param context_var: The :class:`~contextvars.ContextVar` to use as + storage for this local. If not given, one will be created. + Context vars not created at the global scope may interfere with + garbage collection. - def __init__(self) -> None: - object.__setattr__(self, "_storage", ContextVar("local_storage")) + .. versionchanged:: 2.0 + Uses ``ContextVar`` instead of a custom storage implementation. + """ - def __iter__(self) -> t.Iterator[t.Tuple[int, t.Any]]: - return iter(self._storage.get({}).items()) + __slots__ = ("__storage",) - def __call__(self, proxy: str) -> "LocalProxy": - """Create a proxy for a name.""" - return LocalProxy(self, proxy) + def __init__( + self, context_var: t.Optional[ContextVar[t.Dict[str, t.Any]]] = None + ) -> None: + if context_var is None: + # A ContextVar not created at global scope interferes with + # Python's garbage collection. However, a local only makes + # sense defined at the global scope as well, in which case + # the GC issue doesn't seem relevant. + context_var = ContextVar(f"werkzeug.Local<{id(self)}>.storage") + + object.__setattr__(self, "_Local__storage", context_var) + + def __iter__(self) -> t.Iterator[t.Tuple[str, t.Any]]: + return iter(self.__storage.get({}).items()) + + def __call__( + self, name: str, *, unbound_message: t.Optional[str] = None + ) -> "LocalProxy": + """Create a :class:`LocalProxy` that access an attribute on this + local namespace. + + :param name: Proxy this attribute. + :param unbound_message: The error message that the proxy will + show if the attribute isn't set. + """ + return LocalProxy(self, name, unbound_message=unbound_message) def __release_local__(self) -> None: - self._storage.set({}) + self.__storage.set({}) def __getattr__(self, name: str) -> t.Any: - values = self._storage.get({}) - try: + values = self.__storage.get({}) + + if name in values: return values[name] - except KeyError: - raise AttributeError(name) from None + + raise AttributeError(name) def __setattr__(self, name: str, value: t.Any) -> None: - values = self._storage.get({}).copy() + values = self.__storage.get({}).copy() values[name] = value - self._storage.set(values) + self.__storage.set(values) def __delattr__(self, name: str) -> None: - values = self._storage.get({}).copy() - try: + values = self.__storage.get({}) + + if name in values: + values = values.copy() del values[name] - self._storage.set(values) - except KeyError: - raise AttributeError(name) from None - - -class LocalStack: - """This class works similar to a :class:`Local` but keeps a stack - of objects instead. This is best explained with an example:: - - >>> ls = LocalStack() - >>> ls.push(42) - >>> ls.top - 42 - >>> ls.push(23) - >>> ls.top - 23 - >>> ls.pop() - 23 - >>> ls.top - 42 - - They can be force released by using a :class:`LocalManager` or with - the :func:`release_local` function but the correct way is to pop the - item from the stack after using. When the stack is empty it will - no longer be bound to the current context (and as such released). - - By calling the stack without arguments it returns a proxy that resolves to - the topmost item on the stack. + self.__storage.set(values) + else: + raise AttributeError(name) + + +class LocalStack(t.Generic[T]): + """Create a stack of context-local data. This wraps a + :class:`ContextVar` containing a :class:`list` value. + + This may incur a performance penalty compared to using individual + context vars, as it has to copy data to avoid mutating the list + between nested contexts. + + :param context_var: The :class:`~contextvars.ContextVar` to use as + storage for this local. If not given, one will be created. + Context vars not created at the global scope may interfere with + garbage collection. + + .. versionchanged:: 2.0 + Uses ``ContextVar`` instead of a custom storage implementation. .. versionadded:: 0.6.1 """ - def __init__(self) -> None: - self._local = Local() + __slots__ = ("_storage",) - def __release_local__(self) -> None: - self._local.__release_local__() - - def __call__(self) -> "LocalProxy": - def _lookup() -> t.Any: - rv = self.top - if rv is None: - raise RuntimeError("object unbound") - return rv - - return LocalProxy(_lookup) - - def push(self, obj: t.Any) -> t.List[t.Any]: - """Pushes a new item to the stack""" - rv = getattr(self._local, "stack", []).copy() - rv.append(obj) - self._local.stack = rv - return rv + def __init__(self, context_var: t.Optional[ContextVar[t.List[T]]] = None) -> None: + if context_var is None: + # A ContextVar not created at global scope interferes with + # Python's garbage collection. However, a local only makes + # sense defined at the global scope as well, in which case + # the GC issue doesn't seem relevant. + context_var = ContextVar(f"werkzeug.LocalStack<{id(self)}>.storage") - def pop(self) -> t.Any: - """Removes the topmost item from the stack, will return the - old value or `None` if the stack was already empty. + self._storage = context_var + + def __release_local__(self) -> None: + self._storage.set([]) + + def push(self, obj: T) -> t.List[T]: + """Add a new item to the top of the stack.""" + stack = self._storage.get([]).copy() + stack.append(obj) + self._storage.set(stack) + return stack + + def pop(self) -> t.Optional[T]: + """Remove the top item from the stack and return it. If the + stack is empty, return ``None``. """ - stack = getattr(self._local, "stack", None) - if stack is None: + stack = self._storage.get([]) + + if len(stack) == 0: return None - elif len(stack) == 1: - release_local(self._local) - return stack[-1] - else: - return stack.pop() + + rv = stack[-1] + self._storage.set(stack[:-1]) + return rv @property - def top(self) -> t.Any: + def top(self) -> t.Optional[T]: """The topmost item on the stack. If the stack is empty, `None` is returned. """ - try: - return self._local.stack[-1] - except (AttributeError, IndexError): + stack = self._storage.get([]) + + if len(stack) == 0: return None + return stack[-1] + + def __call__( + self, name: t.Optional[str] = None, *, unbound_message: t.Optional[str] = None + ) -> "LocalProxy": + """Create a :class:`LocalProxy` that accesses the top of this + local stack. + + :param name: If given, the proxy access this attribute of the + top item, rather than the item itself. + :param unbound_message: The error message that the proxy will + show if the stack is empty. + """ + return LocalProxy(self, name, unbound_message=unbound_message) + class LocalManager: - """Local objects cannot manage themselves. For that you need a local - manager. You can pass a local manager multiple locals or add them - later by appending them to `manager.locals`. Every time the manager - cleans up, it will clean up all the data left in the locals for this - context. + """Manage releasing the data for the current context in one or more + :class:`Local` and :class:`LocalStack` objects. + + This should not be needed for modern use cases, and may be removed + in the future. + + :param locals: A local or list of locals to manage. .. versionchanged:: 2.0 ``ident_func`` is deprecated and will be removed in Werkzeug 2.1. + .. versionchanged:: 0.7 + The ``ident_func`` parameter was added. + .. versionchanged:: 0.6.1 The :func:`release_local` function can be used instead of a manager. - - .. versionchanged:: 0.7 - The ``ident_func`` parameter was added. """ + __slots__ = ("locals",) + def __init__( - self, locals: t.Optional[t.Iterable[t.Union[Local, LocalStack]]] = None + self, + locals: t.Optional[ + t.Union[Local, LocalStack, t.Iterable[t.Union[Local, LocalStack]]] + ] = None, ) -> None: if locals is None: self.locals = [] elif isinstance(locals, Local): self.locals = [locals] else: - self.locals = list(locals) + self.locals = list(locals) # type: ignore[arg-type] def cleanup(self) -> None: - """Manually clean up the data in the locals for this context. Call - this at the end of the request or use `make_middleware()`. + """Release the data in the locals for this context. Call this at + the end of each request or use :meth:`make_middleware`. """ for local in self.locals: release_local(local) def make_middleware(self, app: "WSGIApplication") -> "WSGIApplication": - """Wrap a WSGI application so that cleaning up happens after - request end. + """Wrap a WSGI application so that local data is released + automatically after the response has been sent for a request. """ def application( @@ -198,17 +239,14 @@ def application( return application def middleware(self, func: "WSGIApplication") -> "WSGIApplication": - """Like `make_middleware` but for decorating functions. + """Like :meth:`make_middleware` but used as a decorator on the + WSGI application function. - Example usage:: + .. code-block:: python @manager.middleware def application(environ, start_response): ... - - The difference to `make_middleware` is that the function passed - will have all the arguments copied from the inner application - (name, docstring, module). """ return update_wrapper(self.make_middleware(func), func) @@ -275,7 +313,7 @@ def __get__(self, instance: "LocalProxy", owner: t.Optional[type] = None) -> t.A return self try: - obj = instance._get_current_object() + obj = instance._get_current_object() # type: ignore[misc] except RuntimeError: if self.fallback is None: raise @@ -337,29 +375,56 @@ def r_op(obj: t.Any, other: t.Any) -> t.Any: return t.cast(F, r_op) -class LocalProxy: - """A proxy to the object bound to a :class:`Local`. All operations - on the proxy are forwarded to the bound object. If no object is - bound, a :exc:`RuntimeError` is raised. +def _identity(o: T) -> T: + return o + + +class LocalProxy(t.Generic[T]): + """A proxy to the object bound to a context-local object. All + operations on the proxy are forwarded to the bound object. If no + object is bound, a ``RuntimeError`` is raised. + + :param local: The context-local object that provides the proxied + object. + :param name: Proxy this attribute from the proxied object. + :param unbound_message: The error message to show if the + context-local object is unbound. + + Proxy a :class:`~contextvars.ContextVar` to make it easier to + access. Pass a name to proxy that attribute. + + .. code-block:: python + + _request_var = ContextVar("request") + request = LocalProxy(_request_var) + session = LocalProxy(_request_var, "session") + + Proxy an attribute on a :class:`Local` namespace by calling the + local with the attribute name: .. code-block:: python - from werkzeug.local import Local - l = Local() + data = Local() + user = data("user") + + Proxy the top item on a :class:`LocalStack` by calling the local. + Pass a name to proxy that attribute. - # a proxy to whatever l.user is set to - user = l("user") + .. code-block:: - from werkzeug.local import LocalStack - _request_stack = LocalStack() + app_stack = LocalStack() + current_app = app_stack() + g = app_stack("g") - # a proxy to _request_stack.top - request = _request_stack() + Pass a function to proxy the return value from that function. This + was previously used to access attributes of local objects before + that was supported directly. + + .. code-block:: python - # a proxy to the session attribute of the request proxy session = LocalProxy(lambda: request.session) - ``__repr__`` and ``__class__`` are forwarded, so ``repr(x)`` and + ``__repr__`` and ``__class__`` are proxied, so ``repr(x)`` and ``isinstance(x, cls)`` will look like the proxied object. Use ``issubclass(type(x), LocalProxy)`` to check if an object is a proxy. @@ -370,10 +435,15 @@ class LocalProxy: isinstance(user, User) # True issubclass(type(user), LocalProxy) # True - :param local: The :class:`Local` or callable that provides the - proxied object. - :param name: The attribute name to look up on a :class:`Local`. Not - used if a callable is given. + .. versionchanged:: 2.2 + Can proxy a ``ContextVar`` or ``LocalStack`` directly. + + .. versionchanged:: 2.2 + The ``name`` parameter can be used with any proxied object, not + only ``Local``. + + .. versionchanged:: 2.2 + Added the ``unbound_message`` parameter. .. versionchanged:: 2.0 Updated proxied attributes and methods to reflect the current @@ -383,34 +453,74 @@ class LocalProxy: The class can be instantiated with a callable. """ - __slots__ = ("__local", "__name", "__wrapped__") + __slots__ = ("__wrapped__", "_get_current_object") + + _get_current_object: t.Callable[[], T] + """Return the current object this proxy is bound to. If the proxy is + unbound, this raises a ``RuntimeError``. + + This should be used if you need to pass the object to something that + doesn't understand the proxy. It can also be useful for performance + if you are accessing the object multiple times in a function, rather + than going through the proxy multiple times. + """ def __init__( self, - local: t.Union["Local", t.Callable[[], t.Any]], + local: t.Union[ContextVar[T], Local, LocalStack[T], t.Callable[[], T]], name: t.Optional[str] = None, + *, + unbound_message: t.Optional[str] = None, ) -> None: - object.__setattr__(self, "_LocalProxy__local", local) - object.__setattr__(self, "_LocalProxy__name", name) + if name is None: + get_name = _identity + else: + get_name = attrgetter(name) # type: ignore[assignment] + + if unbound_message is None: + unbound_message = "object is not bound" + + if isinstance(local, Local): + if name is None: + raise TypeError("'name' is required when proxying a 'Local' object.") + + def _get_current_object() -> T: + try: + return get_name(local) # type: ignore[return-value] + except AttributeError: + raise RuntimeError(unbound_message) from None + + elif isinstance(local, LocalStack): + + def _get_current_object() -> T: + obj = local.top # type: ignore[union-attr] + + if obj is None: + raise RuntimeError(unbound_message) + + return get_name(obj) + + elif isinstance(local, ContextVar): + + def _get_current_object() -> T: + try: + obj = local.get() # type: ignore[union-attr] + except LookupError: + raise RuntimeError(unbound_message) from None + + return get_name(obj) + + elif callable(local): + + def _get_current_object() -> T: + return get_name(local()) # type: ignore - if callable(local) and not hasattr(local, "__release_local__"): - # "local" is a callable that is not an instance of Local or - # LocalManager: mark it as a wrapped function. object.__setattr__(self, "__wrapped__", local) - def _get_current_object(self) -> t.Any: - """Return the current object. This is useful if you want the real - object behind the proxy at a time for performance reasons or because - you want to pass the object into a different context. - """ - if not hasattr(self.__local, "__release_local__"): # type: ignore - return self.__local() # type: ignore + else: + raise TypeError(f"Don't know how to proxy '{type(local)}'.") - try: - return getattr(self.__local, self.__name) # type: ignore - except AttributeError: - name = self.__name # type: ignore - raise RuntimeError(f"no object bound to {name}") from None + object.__setattr__(self, "_get_current_object", _get_current_object) __doc__ = _ProxyLookup( # type: ignore class_value=__doc__, fallback=lambda self: type(self).__doc__, is_attr=True diff --git a/src/werkzeug/routing.py b/src/werkzeug/routing.py deleted file mode 100644 index 1d3027b6d3..0000000000 --- a/src/werkzeug/routing.py +++ /dev/null @@ -1,2332 +0,0 @@ -"""When it comes to combining multiple controller or view functions -(however you want to call them) you need a dispatcher. A simple way -would be applying regular expression tests on the ``PATH_INFO`` and -calling registered callback functions that return the value then. - -This module implements a much more powerful system than simple regular -expression matching because it can also convert values in the URLs and -build URLs. - -Here a simple example that creates a URL map for an application with -two subdomains (www and kb) and some URL rules: - -.. code-block:: python - - m = Map([ - # Static URLs - Rule('/', endpoint='static/index'), - Rule('/about', endpoint='static/about'), - Rule('/help', endpoint='static/help'), - # Knowledge Base - Subdomain('kb', [ - Rule('/', endpoint='kb/index'), - Rule('/browse/', endpoint='kb/browse'), - Rule('/browse//', endpoint='kb/browse'), - Rule('/browse//', endpoint='kb/browse') - ]) - ], default_subdomain='www') - -If the application doesn't use subdomains it's perfectly fine to not set -the default subdomain and not use the `Subdomain` rule factory. The -endpoint in the rules can be anything, for example import paths or -unique identifiers. The WSGI application can use those endpoints to get the -handler for that URL. It doesn't have to be a string at all but it's -recommended. - -Now it's possible to create a URL adapter for one of the subdomains and -build URLs: - -.. code-block:: python - - c = m.bind('example.com') - - c.build("kb/browse", dict(id=42)) - 'http://kb.example.com/browse/42/' - - c.build("kb/browse", dict()) - 'http://kb.example.com/browse/' - - c.build("kb/browse", dict(id=42, page=3)) - 'http://kb.example.com/browse/42/3' - - c.build("static/about") - '/about' - - c.build("static/index", force_external=True) - 'http://www.example.com/' - - c = m.bind('example.com', subdomain='kb') - - c.build("static/about") - 'http://www.example.com/about' - -The first argument to bind is the server name *without* the subdomain. -Per default it will assume that the script is mounted on the root, but -often that's not the case so you can provide the real mount point as -second argument: - -.. code-block:: python - - c = m.bind('example.com', '/applications/example') - -The third argument can be the subdomain, if not given the default -subdomain is used. For more details about binding have a look at the -documentation of the `MapAdapter`. - -And here is how you can match URLs: - -.. code-block:: python - - c = m.bind('example.com') - - c.match("/") - ('static/index', {}) - - c.match("/about") - ('static/about', {}) - - c = m.bind('example.com', '/', 'kb') - - c.match("/") - ('kb/index', {}) - - c.match("/browse/42/23") - ('kb/browse', {'id': 42, 'page': 23}) - -If matching fails you get a ``NotFound`` exception, if the rule thinks -it's a good idea to redirect (for example because the URL was defined -to have a slash at the end but the request was missing that slash) it -will raise a ``RequestRedirect`` exception. Both are subclasses of -``HTTPException`` so you can use those errors as responses in the -application. - -If matching succeeded but the URL rule was incompatible to the given -method (for example there were only rules for ``GET`` and ``HEAD`` but -routing tried to match a ``POST`` request) a ``MethodNotAllowed`` -exception is raised. -""" -import ast -import difflib -import posixpath -import re -import typing -import typing as t -import uuid -import warnings -from pprint import pformat -from string import Template -from threading import Lock -from types import CodeType - -from ._internal import _encode_idna -from ._internal import _get_environ -from ._internal import _to_bytes -from ._internal import _to_str -from ._internal import _wsgi_decoding_dance -from .datastructures import ImmutableDict -from .datastructures import MultiDict -from .exceptions import BadHost -from .exceptions import BadRequest -from .exceptions import HTTPException -from .exceptions import MethodNotAllowed -from .exceptions import NotFound -from .urls import _fast_url_quote -from .urls import url_encode -from .urls import url_join -from .urls import url_quote -from .urls import url_unquote -from .utils import cached_property -from .utils import redirect -from .wsgi import get_host - -if t.TYPE_CHECKING: - import typing_extensions as te - from _typeshed.wsgi import WSGIApplication - from _typeshed.wsgi import WSGIEnvironment - from .wrappers.request import Request - from .wrappers.response import Response - -_rule_re = re.compile( - r""" - (?P[^<]*) # static rule data - < - (?: - (?P[a-zA-Z_][a-zA-Z0-9_]*) # converter name - (?:\((?P.*?)\))? # converter arguments - \: # variable delimiter - )? - (?P[a-zA-Z_][a-zA-Z0-9_]*) # variable name - > - """, - re.VERBOSE, -) -_simple_rule_re = re.compile(r"<([^>]+)>") -_converter_args_re = re.compile( - r""" - ((?P\w+)\s*=\s*)? - (?P - True|False| - \d+.\d+| - \d+.| - \d+| - [\w\d_.]+| - [urUR]?(?P"[^"]*?"|'[^']*') - )\s*, - """, - re.VERBOSE, -) - - -_PYTHON_CONSTANTS = {"None": None, "True": True, "False": False} - - -def _pythonize(value: str) -> t.Union[None, bool, int, float, str]: - if value in _PYTHON_CONSTANTS: - return _PYTHON_CONSTANTS[value] - for convert in int, float: - try: - return convert(value) # type: ignore - except ValueError: - pass - if value[:1] == value[-1:] and value[0] in "\"'": - value = value[1:-1] - return str(value) - - -def parse_converter_args(argstr: str) -> t.Tuple[t.Tuple, t.Dict[str, t.Any]]: - argstr += "," - args = [] - kwargs = {} - - for item in _converter_args_re.finditer(argstr): - value = item.group("stringval") - if value is None: - value = item.group("value") - value = _pythonize(value) - if not item.group("name"): - args.append(value) - else: - name = item.group("name") - kwargs[name] = value - - return tuple(args), kwargs - - -def parse_rule(rule: str) -> t.Iterator[t.Tuple[t.Optional[str], t.Optional[str], str]]: - """Parse a rule and return it as generator. Each iteration yields tuples - in the form ``(converter, arguments, variable)``. If the converter is - `None` it's a static url part, otherwise it's a dynamic one. - - :internal: - """ - pos = 0 - end = len(rule) - do_match = _rule_re.match - used_names = set() - while pos < end: - m = do_match(rule, pos) - if m is None: - break - data = m.groupdict() - if data["static"]: - yield None, None, data["static"] - variable = data["variable"] - converter = data["converter"] or "default" - if variable in used_names: - raise ValueError(f"variable name {variable!r} used twice.") - used_names.add(variable) - yield converter, data["args"] or None, variable - pos = m.end() - if pos < end: - remaining = rule[pos:] - if ">" in remaining or "<" in remaining: - raise ValueError(f"malformed url rule: {rule!r}") - yield None, None, remaining - - -class RoutingException(Exception): - """Special exceptions that require the application to redirect, notifying - about missing urls, etc. - - :internal: - """ - - -class RequestRedirect(HTTPException, RoutingException): - """Raise if the map requests a redirect. This is for example the case if - `strict_slashes` are activated and an url that requires a trailing slash. - - The attribute `new_url` contains the absolute destination url. - """ - - code = 308 - - def __init__(self, new_url: str) -> None: - super().__init__(new_url) - self.new_url = new_url - - def get_response( - self, - environ: t.Optional[t.Union["WSGIEnvironment", "Request"]] = None, - scope: t.Optional[dict] = None, - ) -> "Response": - return redirect(self.new_url, self.code) - - -class RequestPath(RoutingException): - """Internal exception.""" - - __slots__ = ("path_info",) - - def __init__(self, path_info: str) -> None: - super().__init__() - self.path_info = path_info - - -class RequestAliasRedirect(RoutingException): # noqa: B903 - """This rule is an alias and wants to redirect to the canonical URL.""" - - def __init__(self, matched_values: t.Mapping[str, t.Any]) -> None: - super().__init__() - self.matched_values = matched_values - - -class BuildError(RoutingException, LookupError): - """Raised if the build system cannot find a URL for an endpoint with the - values provided. - """ - - def __init__( - self, - endpoint: str, - values: t.Mapping[str, t.Any], - method: t.Optional[str], - adapter: t.Optional["MapAdapter"] = None, - ) -> None: - super().__init__(endpoint, values, method) - self.endpoint = endpoint - self.values = values - self.method = method - self.adapter = adapter - - @cached_property - def suggested(self) -> t.Optional["Rule"]: - return self.closest_rule(self.adapter) - - def closest_rule(self, adapter: t.Optional["MapAdapter"]) -> t.Optional["Rule"]: - def _score_rule(rule: "Rule") -> float: - return sum( - [ - 0.98 - * difflib.SequenceMatcher( - None, rule.endpoint, self.endpoint - ).ratio(), - 0.01 * bool(set(self.values or ()).issubset(rule.arguments)), - 0.01 * bool(rule.methods and self.method in rule.methods), - ] - ) - - if adapter and adapter.map._rules: - return max(adapter.map._rules, key=_score_rule) - - return None - - def __str__(self) -> str: - message = [f"Could not build url for endpoint {self.endpoint!r}"] - if self.method: - message.append(f" ({self.method!r})") - if self.values: - message.append(f" with values {sorted(self.values)!r}") - message.append(".") - if self.suggested: - if self.endpoint == self.suggested.endpoint: - if ( - self.method - and self.suggested.methods is not None - and self.method not in self.suggested.methods - ): - message.append( - " Did you mean to use methods" - f" {sorted(self.suggested.methods)!r}?" - ) - missing_values = self.suggested.arguments.union( - set(self.suggested.defaults or ()) - ) - set(self.values.keys()) - if missing_values: - message.append( - f" Did you forget to specify values {sorted(missing_values)!r}?" - ) - else: - message.append(f" Did you mean {self.suggested.endpoint!r} instead?") - return "".join(message) - - -class WebsocketMismatch(BadRequest): - """The only matched rule is either a WebSocket and the request is - HTTP, or the rule is HTTP and the request is a WebSocket. - """ - - -class ValidationError(ValueError): - """Validation error. If a rule converter raises this exception the rule - does not match the current URL and the next URL is tried. - """ - - -class RuleFactory: - """As soon as you have more complex URL setups it's a good idea to use rule - factories to avoid repetitive tasks. Some of them are builtin, others can - be added by subclassing `RuleFactory` and overriding `get_rules`. - """ - - def get_rules(self, map: "Map") -> t.Iterable["Rule"]: - """Subclasses of `RuleFactory` have to override this method and return - an iterable of rules.""" - raise NotImplementedError() - - -class Subdomain(RuleFactory): - """All URLs provided by this factory have the subdomain set to a - specific domain. For example if you want to use the subdomain for - the current language this can be a good setup:: - - url_map = Map([ - Rule('/', endpoint='#select_language'), - Subdomain('', [ - Rule('/', endpoint='index'), - Rule('/about', endpoint='about'), - Rule('/help', endpoint='help') - ]) - ]) - - All the rules except for the ``'#select_language'`` endpoint will now - listen on a two letter long subdomain that holds the language code - for the current request. - """ - - def __init__(self, subdomain: str, rules: t.Iterable[RuleFactory]) -> None: - self.subdomain = subdomain - self.rules = rules - - def get_rules(self, map: "Map") -> t.Iterator["Rule"]: - for rulefactory in self.rules: - for rule in rulefactory.get_rules(map): - rule = rule.empty() - rule.subdomain = self.subdomain - yield rule - - -class Submount(RuleFactory): - """Like `Subdomain` but prefixes the URL rule with a given string:: - - url_map = Map([ - Rule('/', endpoint='index'), - Submount('/blog', [ - Rule('/', endpoint='blog/index'), - Rule('/entry/', endpoint='blog/show') - ]) - ]) - - Now the rule ``'blog/show'`` matches ``/blog/entry/``. - """ - - def __init__(self, path: str, rules: t.Iterable[RuleFactory]) -> None: - self.path = path.rstrip("/") - self.rules = rules - - def get_rules(self, map: "Map") -> t.Iterator["Rule"]: - for rulefactory in self.rules: - for rule in rulefactory.get_rules(map): - rule = rule.empty() - rule.rule = self.path + rule.rule - yield rule - - -class EndpointPrefix(RuleFactory): - """Prefixes all endpoints (which must be strings for this factory) with - another string. This can be useful for sub applications:: - - url_map = Map([ - Rule('/', endpoint='index'), - EndpointPrefix('blog/', [Submount('/blog', [ - Rule('/', endpoint='index'), - Rule('/entry/', endpoint='show') - ])]) - ]) - """ - - def __init__(self, prefix: str, rules: t.Iterable[RuleFactory]) -> None: - self.prefix = prefix - self.rules = rules - - def get_rules(self, map: "Map") -> t.Iterator["Rule"]: - for rulefactory in self.rules: - for rule in rulefactory.get_rules(map): - rule = rule.empty() - rule.endpoint = self.prefix + rule.endpoint - yield rule - - -class RuleTemplate: - """Returns copies of the rules wrapped and expands string templates in - the endpoint, rule, defaults or subdomain sections. - - Here a small example for such a rule template:: - - from werkzeug.routing import Map, Rule, RuleTemplate - - resource = RuleTemplate([ - Rule('/$name/', endpoint='$name.list'), - Rule('/$name/', endpoint='$name.show') - ]) - - url_map = Map([resource(name='user'), resource(name='page')]) - - When a rule template is called the keyword arguments are used to - replace the placeholders in all the string parameters. - """ - - def __init__(self, rules: t.Iterable["Rule"]) -> None: - self.rules = list(rules) - - def __call__(self, *args: t.Any, **kwargs: t.Any) -> "RuleTemplateFactory": - return RuleTemplateFactory(self.rules, dict(*args, **kwargs)) - - -class RuleTemplateFactory(RuleFactory): - """A factory that fills in template variables into rules. Used by - `RuleTemplate` internally. - - :internal: - """ - - def __init__( - self, rules: t.Iterable[RuleFactory], context: t.Dict[str, t.Any] - ) -> None: - self.rules = rules - self.context = context - - def get_rules(self, map: "Map") -> t.Iterator["Rule"]: - for rulefactory in self.rules: - for rule in rulefactory.get_rules(map): - new_defaults = subdomain = None - if rule.defaults: - new_defaults = {} - for key, value in rule.defaults.items(): - if isinstance(value, str): - value = Template(value).substitute(self.context) - new_defaults[key] = value - if rule.subdomain is not None: - subdomain = Template(rule.subdomain).substitute(self.context) - new_endpoint = rule.endpoint - if isinstance(new_endpoint, str): - new_endpoint = Template(new_endpoint).substitute(self.context) - yield Rule( - Template(rule.rule).substitute(self.context), - new_defaults, - subdomain, - rule.methods, - rule.build_only, - new_endpoint, - rule.strict_slashes, - ) - - -def _prefix_names(src: str) -> ast.stmt: - """ast parse and prefix names with `.` to avoid collision with user vars""" - tree = ast.parse(src).body[0] - if isinstance(tree, ast.Expr): - tree = tree.value # type: ignore - for node in ast.walk(tree): - if isinstance(node, ast.Name): - node.id = f".{node.id}" - return tree - - -_CALL_CONVERTER_CODE_FMT = "self._converters[{elem!r}].to_url()" -_IF_KWARGS_URL_ENCODE_CODE = """\ -if kwargs: - params = self._encode_query_vars(kwargs) - q = "?" if params else "" -else: - q = params = "" -""" -_IF_KWARGS_URL_ENCODE_AST = _prefix_names(_IF_KWARGS_URL_ENCODE_CODE) -_URL_ENCODE_AST_NAMES = (_prefix_names("q"), _prefix_names("params")) - - -class Rule(RuleFactory): - """A Rule represents one URL pattern. There are some options for `Rule` - that change the way it behaves and are passed to the `Rule` constructor. - Note that besides the rule-string all arguments *must* be keyword arguments - in order to not break the application on Werkzeug upgrades. - - `string` - Rule strings basically are just normal URL paths with placeholders in - the format ```` where the converter and the - arguments are optional. If no converter is defined the `default` - converter is used which means `string` in the normal configuration. - - URL rules that end with a slash are branch URLs, others are leaves. - If you have `strict_slashes` enabled (which is the default), all - branch URLs that are matched without a trailing slash will trigger a - redirect to the same URL with the missing slash appended. - - The converters are defined on the `Map`. - - `endpoint` - The endpoint for this rule. This can be anything. A reference to a - function, a string, a number etc. The preferred way is using a string - because the endpoint is used for URL generation. - - `defaults` - An optional dict with defaults for other rules with the same endpoint. - This is a bit tricky but useful if you want to have unique URLs:: - - url_map = Map([ - Rule('/all/', defaults={'page': 1}, endpoint='all_entries'), - Rule('/all/page/', endpoint='all_entries') - ]) - - If a user now visits ``http://example.com/all/page/1`` they will be - redirected to ``http://example.com/all/``. If `redirect_defaults` is - disabled on the `Map` instance this will only affect the URL - generation. - - `subdomain` - The subdomain rule string for this rule. If not specified the rule - only matches for the `default_subdomain` of the map. If the map is - not bound to a subdomain this feature is disabled. - - Can be useful if you want to have user profiles on different subdomains - and all subdomains are forwarded to your application:: - - url_map = Map([ - Rule('/', subdomain='', endpoint='user/homepage'), - Rule('/stats', subdomain='', endpoint='user/stats') - ]) - - `methods` - A sequence of http methods this rule applies to. If not specified, all - methods are allowed. For example this can be useful if you want different - endpoints for `POST` and `GET`. If methods are defined and the path - matches but the method matched against is not in this list or in the - list of another rule for that path the error raised is of the type - `MethodNotAllowed` rather than `NotFound`. If `GET` is present in the - list of methods and `HEAD` is not, `HEAD` is added automatically. - - `strict_slashes` - Override the `Map` setting for `strict_slashes` only for this rule. If - not specified the `Map` setting is used. - - `merge_slashes` - Override :attr:`Map.merge_slashes` for this rule. - - `build_only` - Set this to True and the rule will never match but will create a URL - that can be build. This is useful if you have resources on a subdomain - or folder that are not handled by the WSGI application (like static data) - - `redirect_to` - If given this must be either a string or callable. In case of a - callable it's called with the url adapter that triggered the match and - the values of the URL as keyword arguments and has to return the target - for the redirect, otherwise it has to be a string with placeholders in - rule syntax:: - - def foo_with_slug(adapter, id): - # ask the database for the slug for the old id. this of - # course has nothing to do with werkzeug. - return f'foo/{Foo.get_slug_for_id(id)}' - - url_map = Map([ - Rule('/foo/', endpoint='foo'), - Rule('/some/old/url/', redirect_to='foo/'), - Rule('/other/old/url/', redirect_to=foo_with_slug) - ]) - - When the rule is matched the routing system will raise a - `RequestRedirect` exception with the target for the redirect. - - Keep in mind that the URL will be joined against the URL root of the - script so don't use a leading slash on the target URL unless you - really mean root of that domain. - - `alias` - If enabled this rule serves as an alias for another rule with the same - endpoint and arguments. - - `host` - If provided and the URL map has host matching enabled this can be - used to provide a match rule for the whole host. This also means - that the subdomain feature is disabled. - - `websocket` - If ``True``, this rule is only matches for WebSocket (``ws://``, - ``wss://``) requests. By default, rules will only match for HTTP - requests. - - .. versionchanged:: 2.1 - Percent-encoded newlines (``%0a``), which are decoded by WSGI - servers, are considered when routing instead of terminating the - match early. - - .. versionadded:: 1.0 - Added ``websocket``. - - .. versionadded:: 1.0 - Added ``merge_slashes``. - - .. versionadded:: 0.7 - Added ``alias`` and ``host``. - - .. versionchanged:: 0.6.1 - ``HEAD`` is added to ``methods`` if ``GET`` is present. - """ - - def __init__( - self, - string: str, - defaults: t.Optional[t.Mapping[str, t.Any]] = None, - subdomain: t.Optional[str] = None, - methods: t.Optional[t.Iterable[str]] = None, - build_only: bool = False, - endpoint: t.Optional[str] = None, - strict_slashes: t.Optional[bool] = None, - merge_slashes: t.Optional[bool] = None, - redirect_to: t.Optional[t.Union[str, t.Callable[..., str]]] = None, - alias: bool = False, - host: t.Optional[str] = None, - websocket: bool = False, - ) -> None: - if not string.startswith("/"): - raise ValueError("urls must start with a leading slash") - self.rule = string - self.is_leaf = not string.endswith("/") - - self.map: "Map" = None # type: ignore - self.strict_slashes = strict_slashes - self.merge_slashes = merge_slashes - self.subdomain = subdomain - self.host = host - self.defaults = defaults - self.build_only = build_only - self.alias = alias - self.websocket = websocket - - if methods is not None: - if isinstance(methods, str): - raise TypeError("'methods' should be a list of strings.") - - methods = {x.upper() for x in methods} - - if "HEAD" not in methods and "GET" in methods: - methods.add("HEAD") - - if websocket and methods - {"GET", "HEAD", "OPTIONS"}: - raise ValueError( - "WebSocket rules can only use 'GET', 'HEAD', and 'OPTIONS' methods." - ) - - self.methods = methods - self.endpoint: str = endpoint # type: ignore - self.redirect_to = redirect_to - - if defaults: - self.arguments = set(map(str, defaults)) - else: - self.arguments = set() - - self._trace: t.List[t.Tuple[bool, str]] = [] - - def empty(self) -> "Rule": - """ - Return an unbound copy of this rule. - - This can be useful if want to reuse an already bound URL for another - map. See ``get_empty_kwargs`` to override what keyword arguments are - provided to the new copy. - """ - return type(self)(self.rule, **self.get_empty_kwargs()) - - def get_empty_kwargs(self) -> t.Mapping[str, t.Any]: - """ - Provides kwargs for instantiating empty copy with empty() - - Use this method to provide custom keyword arguments to the subclass of - ``Rule`` when calling ``some_rule.empty()``. Helpful when the subclass - has custom keyword arguments that are needed at instantiation. - - Must return a ``dict`` that will be provided as kwargs to the new - instance of ``Rule``, following the initial ``self.rule`` value which - is always provided as the first, required positional argument. - """ - defaults = None - if self.defaults: - defaults = dict(self.defaults) - return dict( - defaults=defaults, - subdomain=self.subdomain, - methods=self.methods, - build_only=self.build_only, - endpoint=self.endpoint, - strict_slashes=self.strict_slashes, - redirect_to=self.redirect_to, - alias=self.alias, - host=self.host, - ) - - def get_rules(self, map: "Map") -> t.Iterator["Rule"]: - yield self - - def refresh(self) -> None: - """Rebinds and refreshes the URL. Call this if you modified the - rule in place. - - :internal: - """ - self.bind(self.map, rebind=True) - - def bind(self, map: "Map", rebind: bool = False) -> None: - """Bind the url to a map and create a regular expression based on - the information from the rule itself and the defaults from the map. - - :internal: - """ - if self.map is not None and not rebind: - raise RuntimeError(f"url rule {self!r} already bound to map {self.map!r}") - self.map = map - if self.strict_slashes is None: - self.strict_slashes = map.strict_slashes - if self.merge_slashes is None: - self.merge_slashes = map.merge_slashes - if self.subdomain is None: - self.subdomain = map.default_subdomain - self.compile() - - def get_converter( - self, - variable_name: str, - converter_name: str, - args: t.Tuple, - kwargs: t.Mapping[str, t.Any], - ) -> "BaseConverter": - """Looks up the converter for the given parameter. - - .. versionadded:: 0.9 - """ - if converter_name not in self.map.converters: - raise LookupError(f"the converter {converter_name!r} does not exist") - return self.map.converters[converter_name](self.map, *args, **kwargs) - - def _encode_query_vars(self, query_vars: t.Mapping[str, t.Any]) -> str: - return url_encode( - query_vars, - charset=self.map.charset, - sort=self.map.sort_parameters, - key=self.map.sort_key, - ) - - def compile(self) -> None: - """Compiles the regular expression and stores it.""" - assert self.map is not None, "rule not bound" - - if self.map.host_matching: - domain_rule = self.host or "" - else: - domain_rule = self.subdomain or "" - - self._trace = [] - self._converters: t.Dict[str, "BaseConverter"] = {} - self._static_weights: t.List[t.Tuple[int, int]] = [] - self._argument_weights: t.List[int] = [] - regex_parts = [] - - def _build_regex(rule: str) -> None: - index = 0 - for converter, arguments, variable in parse_rule(rule): - if converter is None: - for match in re.finditer(r"/+|[^/]+", variable): - part = match.group(0) - if part.startswith("/"): - if self.merge_slashes: - regex_parts.append(r"/+?") - self._trace.append((False, "/")) - else: - regex_parts.append(part) - self._trace.append((False, part)) - continue - self._trace.append((False, part)) - regex_parts.append(re.escape(part)) - if part: - self._static_weights.append((index, -len(part))) - else: - if arguments: - c_args, c_kwargs = parse_converter_args(arguments) - else: - c_args = () - c_kwargs = {} - convobj = self.get_converter(variable, converter, c_args, c_kwargs) - regex_parts.append(f"(?P<{variable}>{convobj.regex})") - self._converters[variable] = convobj - self._trace.append((True, variable)) - self._argument_weights.append(convobj.weight) - self.arguments.add(str(variable)) - index = index + 1 - - _build_regex(domain_rule) - regex_parts.append("\\|") - self._trace.append((False, "|")) - _build_regex(self.rule if self.is_leaf else self.rule.rstrip("/")) - if not self.is_leaf: - self._trace.append((False, "/")) - - self._build: t.Callable[..., t.Tuple[str, str]] - self._build = self._compile_builder(False).__get__(self, None) - self._build_unknown: t.Callable[..., t.Tuple[str, str]] - self._build_unknown = self._compile_builder(True).__get__(self, None) - - if self.build_only: - return - - if not (self.is_leaf and self.strict_slashes): - reps = "*" if self.merge_slashes else "?" - tail = f"(?/{reps})" - else: - tail = "" - - # Use \Z instead of $ to avoid matching before a %0a decoded to - # a \n by WSGI. - regex = rf"^{''.join(regex_parts)}{tail}$\Z" - self._regex = re.compile(regex) - - def match( - self, path: str, method: t.Optional[str] = None - ) -> t.Optional[t.MutableMapping[str, t.Any]]: - """Check if the rule matches a given path. Path is a string in the - form ``"subdomain|/path"`` and is assembled by the map. If - the map is doing host matching the subdomain part will be the host - instead. - - If the rule matches a dict with the converted values is returned, - otherwise the return value is `None`. - - :internal: - """ - if not self.build_only: - require_redirect = False - - m = self._regex.search(path) - if m is not None: - groups = m.groupdict() - # we have a folder like part of the url without a trailing - # slash and strict slashes enabled. raise an exception that - # tells the map to redirect to the same url but with a - # trailing slash - if ( - self.strict_slashes - and not self.is_leaf - and not groups.pop("__suffix__") - and ( - method is None or self.methods is None or method in self.methods - ) - ): - path += "/" - require_redirect = True - # if we are not in strict slashes mode we have to remove - # a __suffix__ - elif not self.strict_slashes: - del groups["__suffix__"] - - result = {} - for name, value in groups.items(): - try: - value = self._converters[name].to_python(value) - except ValidationError: - return None - result[str(name)] = value - if self.defaults: - result.update(self.defaults) - - if self.merge_slashes: - new_path = "|".join(self.build(result, False)) # type: ignore - if path.endswith("/") and not new_path.endswith("/"): - new_path += "/" - if new_path.count("/") < path.count("/"): - # The URL will be encoded when MapAdapter.match - # handles the RequestPath raised below. Decode - # the URL here to avoid a double encoding. - path = url_unquote(new_path) - require_redirect = True - - if require_redirect: - path = path.split("|", 1)[1] - raise RequestPath(path) - - if self.alias and self.map.redirect_defaults: - raise RequestAliasRedirect(result) - - return result - - return None - - @staticmethod - def _get_func_code(code: CodeType, name: str) -> t.Callable[..., t.Tuple[str, str]]: - globs: t.Dict[str, t.Any] = {} - locs: t.Dict[str, t.Any] = {} - exec(code, globs, locs) - return locs[name] # type: ignore - - def _compile_builder( - self, append_unknown: bool = True - ) -> t.Callable[..., t.Tuple[str, str]]: - defaults = self.defaults or {} - dom_ops: t.List[t.Tuple[bool, str]] = [] - url_ops: t.List[t.Tuple[bool, str]] = [] - - opl = dom_ops - for is_dynamic, data in self._trace: - if data == "|" and opl is dom_ops: - opl = url_ops - continue - # this seems like a silly case to ever come up but: - # if a default is given for a value that appears in the rule, - # resolve it to a constant ahead of time - if is_dynamic and data in defaults: - data = self._converters[data].to_url(defaults[data]) - opl.append((False, data)) - elif not is_dynamic: - opl.append( - (False, url_quote(_to_bytes(data, self.map.charset), safe="/:|+")) - ) - else: - opl.append((True, data)) - - def _convert(elem: str) -> ast.stmt: - ret = _prefix_names(_CALL_CONVERTER_CODE_FMT.format(elem=elem)) - ret.args = [ast.Name(str(elem), ast.Load())] # type: ignore # str for py2 - return ret - - def _parts(ops: t.List[t.Tuple[bool, str]]) -> t.List[ast.AST]: - parts = [ - _convert(elem) if is_dynamic else ast.Str(s=elem) - for is_dynamic, elem in ops - ] - parts = parts or [ast.Str("")] - # constant fold - ret = [parts[0]] - for p in parts[1:]: - if isinstance(p, ast.Str) and isinstance(ret[-1], ast.Str): - ret[-1] = ast.Str(ret[-1].s + p.s) - else: - ret.append(p) - return ret - - dom_parts = _parts(dom_ops) - url_parts = _parts(url_ops) - if not append_unknown: - body = [] - else: - body = [_IF_KWARGS_URL_ENCODE_AST] - url_parts.extend(_URL_ENCODE_AST_NAMES) - - def _join(parts: t.List[ast.AST]) -> ast.AST: - if len(parts) == 1: # shortcut - return parts[0] - return ast.JoinedStr(parts) - - body.append( - ast.Return(ast.Tuple([_join(dom_parts), _join(url_parts)], ast.Load())) - ) - - pargs = [ - elem - for is_dynamic, elem in dom_ops + url_ops - if is_dynamic and elem not in defaults - ] - kargs = [str(k) for k in defaults] - - func_ast: ast.FunctionDef = _prefix_names("def _(): pass") # type: ignore - func_ast.name = f"" - func_ast.args.args.append(ast.arg(".self", None)) - for arg in pargs + kargs: - func_ast.args.args.append(ast.arg(arg, None)) - func_ast.args.kwarg = ast.arg(".kwargs", None) - for _ in kargs: - func_ast.args.defaults.append(ast.Str("")) - func_ast.body = body - - # use `ast.parse` instead of `ast.Module` for better portability - # Python 3.8 changes the signature of `ast.Module` - module = ast.parse("") - module.body = [func_ast] - - # mark everything as on line 1, offset 0 - # less error-prone than `ast.fix_missing_locations` - # bad line numbers cause an assert to fail in debug builds - for node in ast.walk(module): - if "lineno" in node._attributes: - node.lineno = 1 - if "col_offset" in node._attributes: - node.col_offset = 0 - - code = compile(module, "", "exec") - return self._get_func_code(code, func_ast.name) - - def build( - self, values: t.Mapping[str, t.Any], append_unknown: bool = True - ) -> t.Optional[t.Tuple[str, str]]: - """Assembles the relative url for that rule and the subdomain. - If building doesn't work for some reasons `None` is returned. - - :internal: - """ - try: - if append_unknown: - return self._build_unknown(**values) - else: - return self._build(**values) - except ValidationError: - return None - - def provides_defaults_for(self, rule: "Rule") -> bool: - """Check if this rule has defaults for a given rule. - - :internal: - """ - return bool( - not self.build_only - and self.defaults - and self.endpoint == rule.endpoint - and self != rule - and self.arguments == rule.arguments - ) - - def suitable_for( - self, values: t.Mapping[str, t.Any], method: t.Optional[str] = None - ) -> bool: - """Check if the dict of values has enough data for url generation. - - :internal: - """ - # if a method was given explicitly and that method is not supported - # by this rule, this rule is not suitable. - if ( - method is not None - and self.methods is not None - and method not in self.methods - ): - return False - - defaults = self.defaults or () - - # all arguments required must be either in the defaults dict or - # the value dictionary otherwise it's not suitable - for key in self.arguments: - if key not in defaults and key not in values: - return False - - # in case defaults are given we ensure that either the value was - # skipped or the value is the same as the default value. - if defaults: - for key, value in defaults.items(): - if key in values and value != values[key]: - return False - - return True - - def match_compare_key( - self, - ) -> t.Tuple[bool, int, t.Iterable[t.Tuple[int, int]], int, t.Iterable[int]]: - """The match compare key for sorting. - - Current implementation: - - 1. rules without any arguments come first for performance - reasons only as we expect them to match faster and some - common ones usually don't have any arguments (index pages etc.) - 2. rules with more static parts come first so the second argument - is the negative length of the number of the static weights. - 3. we order by static weights, which is a combination of index - and length - 4. The more complex rules come first so the next argument is the - negative length of the number of argument weights. - 5. lastly we order by the actual argument weights. - - :internal: - """ - return ( - bool(self.arguments), - -len(self._static_weights), - self._static_weights, - -len(self._argument_weights), - self._argument_weights, - ) - - def build_compare_key(self) -> t.Tuple[int, int, int]: - """The build compare key for sorting. - - :internal: - """ - return (1 if self.alias else 0, -len(self.arguments), -len(self.defaults or ())) - - def __eq__(self, other: object) -> bool: - return isinstance(other, type(self)) and self._trace == other._trace - - __hash__ = None # type: ignore - - def __str__(self) -> str: - return self.rule - - def __repr__(self) -> str: - if self.map is None: - return f"<{type(self).__name__} (unbound)>" - parts = [] - for is_dynamic, data in self._trace: - if is_dynamic: - parts.append(f"<{data}>") - else: - parts.append(data) - parts = "".join(parts).lstrip("|") - methods = f" ({', '.join(self.methods)})" if self.methods is not None else "" - return f"<{type(self).__name__} {parts!r}{methods} -> {self.endpoint}>" - - -class BaseConverter: - """Base class for all converters.""" - - regex = "[^/]+" - weight = 100 - - def __init__(self, map: "Map", *args: t.Any, **kwargs: t.Any) -> None: - self.map = map - - def to_python(self, value: str) -> t.Any: - return value - - def to_url(self, value: t.Any) -> str: - if isinstance(value, (bytes, bytearray)): - return _fast_url_quote(value) - return _fast_url_quote(str(value).encode(self.map.charset)) - - -class UnicodeConverter(BaseConverter): - """This converter is the default converter and accepts any string but - only one path segment. Thus the string can not include a slash. - - This is the default validator. - - Example:: - - Rule('/pages/'), - Rule('/') - - :param map: the :class:`Map`. - :param minlength: the minimum length of the string. Must be greater - or equal 1. - :param maxlength: the maximum length of the string. - :param length: the exact length of the string. - """ - - def __init__( - self, - map: "Map", - minlength: int = 1, - maxlength: t.Optional[int] = None, - length: t.Optional[int] = None, - ) -> None: - super().__init__(map) - if length is not None: - length_regex = f"{{{int(length)}}}" - else: - if maxlength is None: - maxlength_value = "" - else: - maxlength_value = str(int(maxlength)) - length_regex = f"{{{int(minlength)},{maxlength_value}}}" - self.regex = f"[^/]{length_regex}" - - -class AnyConverter(BaseConverter): - """Matches one of the items provided. Items can either be Python - identifiers or strings:: - - Rule('/') - - :param map: the :class:`Map`. - :param items: this function accepts the possible items as positional - arguments. - """ - - def __init__(self, map: "Map", *items: str) -> None: - super().__init__(map) - self.regex = f"(?:{'|'.join([re.escape(x) for x in items])})" - - -class PathConverter(BaseConverter): - """Like the default :class:`UnicodeConverter`, but it also matches - slashes. This is useful for wikis and similar applications:: - - Rule('/') - Rule('//edit') - - :param map: the :class:`Map`. - """ - - regex = "[^/].*?" - weight = 200 - - -class NumberConverter(BaseConverter): - """Baseclass for `IntegerConverter` and `FloatConverter`. - - :internal: - """ - - weight = 50 - num_convert: t.Callable = int - - def __init__( - self, - map: "Map", - fixed_digits: int = 0, - min: t.Optional[int] = None, - max: t.Optional[int] = None, - signed: bool = False, - ) -> None: - if signed: - self.regex = self.signed_regex - super().__init__(map) - self.fixed_digits = fixed_digits - self.min = min - self.max = max - self.signed = signed - - def to_python(self, value: str) -> t.Any: - if self.fixed_digits and len(value) != self.fixed_digits: - raise ValidationError() - value = self.num_convert(value) - if (self.min is not None and value < self.min) or ( - self.max is not None and value > self.max - ): - raise ValidationError() - return value - - def to_url(self, value: t.Any) -> str: - value = str(self.num_convert(value)) - if self.fixed_digits: - value = value.zfill(self.fixed_digits) - return value - - @property - def signed_regex(self) -> str: - return f"-?{self.regex}" - - -class IntegerConverter(NumberConverter): - """This converter only accepts integer values:: - - Rule("/page/") - - By default it only accepts unsigned, positive values. The ``signed`` - parameter will enable signed, negative values. :: - - Rule("/page/") - - :param map: The :class:`Map`. - :param fixed_digits: The number of fixed digits in the URL. If you - set this to ``4`` for example, the rule will only match if the - URL looks like ``/0001/``. The default is variable length. - :param min: The minimal value. - :param max: The maximal value. - :param signed: Allow signed (negative) values. - - .. versionadded:: 0.15 - The ``signed`` parameter. - """ - - regex = r"\d+" - - -class FloatConverter(NumberConverter): - """This converter only accepts floating point values:: - - Rule("/probability/") - - By default it only accepts unsigned, positive values. The ``signed`` - parameter will enable signed, negative values. :: - - Rule("/offset/") - - :param map: The :class:`Map`. - :param min: The minimal value. - :param max: The maximal value. - :param signed: Allow signed (negative) values. - - .. versionadded:: 0.15 - The ``signed`` parameter. - """ - - regex = r"\d+\.\d+" - num_convert = float - - def __init__( - self, - map: "Map", - min: t.Optional[float] = None, - max: t.Optional[float] = None, - signed: bool = False, - ) -> None: - super().__init__(map, min=min, max=max, signed=signed) # type: ignore - - -class UUIDConverter(BaseConverter): - """This converter only accepts UUID strings:: - - Rule('/object/') - - .. versionadded:: 0.10 - - :param map: the :class:`Map`. - """ - - regex = ( - r"[A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-" - r"[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}" - ) - - def to_python(self, value: str) -> uuid.UUID: - return uuid.UUID(value) - - def to_url(self, value: uuid.UUID) -> str: - return str(value) - - -#: the default converter mapping for the map. -DEFAULT_CONVERTERS: t.Mapping[str, t.Type[BaseConverter]] = { - "default": UnicodeConverter, - "string": UnicodeConverter, - "any": AnyConverter, - "path": PathConverter, - "int": IntegerConverter, - "float": FloatConverter, - "uuid": UUIDConverter, -} - - -class Map: - """The map class stores all the URL rules and some configuration - parameters. Some of the configuration values are only stored on the - `Map` instance since those affect all rules, others are just defaults - and can be overridden for each rule. Note that you have to specify all - arguments besides the `rules` as keyword arguments! - - :param rules: sequence of url rules for this map. - :param default_subdomain: The default subdomain for rules without a - subdomain defined. - :param charset: charset of the url. defaults to ``"utf-8"`` - :param strict_slashes: If a rule ends with a slash but the matched - URL does not, redirect to the URL with a trailing slash. - :param merge_slashes: Merge consecutive slashes when matching or - building URLs. Matches will redirect to the normalized URL. - Slashes in variable parts are not merged. - :param redirect_defaults: This will redirect to the default rule if it - wasn't visited that way. This helps creating - unique URLs. - :param converters: A dict of converters that adds additional converters - to the list of converters. If you redefine one - converter this will override the original one. - :param sort_parameters: If set to `True` the url parameters are sorted. - See `url_encode` for more details. - :param sort_key: The sort key function for `url_encode`. - :param encoding_errors: the error method to use for decoding - :param host_matching: if set to `True` it enables the host matching - feature and disables the subdomain one. If - enabled the `host` parameter to rules is used - instead of the `subdomain` one. - - .. versionchanged:: 1.0 - If ``url_scheme`` is ``ws`` or ``wss``, only WebSocket rules - will match. - - .. versionchanged:: 1.0 - Added ``merge_slashes``. - - .. versionchanged:: 0.7 - Added ``encoding_errors`` and ``host_matching``. - - .. versionchanged:: 0.5 - Added ``sort_parameters`` and ``sort_key``. - """ - - #: A dict of default converters to be used. - default_converters = ImmutableDict(DEFAULT_CONVERTERS) - - #: The type of lock to use when updating. - #: - #: .. versionadded:: 1.0 - lock_class = Lock - - def __init__( - self, - rules: t.Optional[t.Iterable[RuleFactory]] = None, - default_subdomain: str = "", - charset: str = "utf-8", - strict_slashes: bool = True, - merge_slashes: bool = True, - redirect_defaults: bool = True, - converters: t.Optional[t.Mapping[str, t.Type[BaseConverter]]] = None, - sort_parameters: bool = False, - sort_key: t.Optional[t.Callable[[t.Any], t.Any]] = None, - encoding_errors: str = "replace", - host_matching: bool = False, - ) -> None: - self._rules: t.List[Rule] = [] - self._rules_by_endpoint: t.Dict[str, t.List[Rule]] = {} - self._remap = True - self._remap_lock = self.lock_class() - - self.default_subdomain = default_subdomain - self.charset = charset - self.encoding_errors = encoding_errors - self.strict_slashes = strict_slashes - self.merge_slashes = merge_slashes - self.redirect_defaults = redirect_defaults - self.host_matching = host_matching - - self.converters = self.default_converters.copy() - if converters: - self.converters.update(converters) - - self.sort_parameters = sort_parameters - self.sort_key = sort_key - - for rulefactory in rules or (): - self.add(rulefactory) - - def is_endpoint_expecting(self, endpoint: str, *arguments: str) -> bool: - """Iterate over all rules and check if the endpoint expects - the arguments provided. This is for example useful if you have - some URLs that expect a language code and others that do not and - you want to wrap the builder a bit so that the current language - code is automatically added if not provided but endpoints expect - it. - - :param endpoint: the endpoint to check. - :param arguments: this function accepts one or more arguments - as positional arguments. Each one of them is - checked. - """ - self.update() - arguments = set(arguments) - for rule in self._rules_by_endpoint[endpoint]: - if arguments.issubset(rule.arguments): - return True - return False - - def iter_rules(self, endpoint: t.Optional[str] = None) -> t.Iterator[Rule]: - """Iterate over all rules or the rules of an endpoint. - - :param endpoint: if provided only the rules for that endpoint - are returned. - :return: an iterator - """ - self.update() - if endpoint is not None: - return iter(self._rules_by_endpoint[endpoint]) - return iter(self._rules) - - def add(self, rulefactory: RuleFactory) -> None: - """Add a new rule or factory to the map and bind it. Requires that the - rule is not bound to another map. - - :param rulefactory: a :class:`Rule` or :class:`RuleFactory` - """ - for rule in rulefactory.get_rules(self): - rule.bind(self) - self._rules.append(rule) - self._rules_by_endpoint.setdefault(rule.endpoint, []).append(rule) - self._remap = True - - def bind( - self, - server_name: str, - script_name: t.Optional[str] = None, - subdomain: t.Optional[str] = None, - url_scheme: str = "http", - default_method: str = "GET", - path_info: t.Optional[str] = None, - query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, - ) -> "MapAdapter": - """Return a new :class:`MapAdapter` with the details specified to the - call. Note that `script_name` will default to ``'/'`` if not further - specified or `None`. The `server_name` at least is a requirement - because the HTTP RFC requires absolute URLs for redirects and so all - redirect exceptions raised by Werkzeug will contain the full canonical - URL. - - If no path_info is passed to :meth:`match` it will use the default path - info passed to bind. While this doesn't really make sense for - manual bind calls, it's useful if you bind a map to a WSGI - environment which already contains the path info. - - `subdomain` will default to the `default_subdomain` for this map if - no defined. If there is no `default_subdomain` you cannot use the - subdomain feature. - - .. versionchanged:: 1.0 - If ``url_scheme`` is ``ws`` or ``wss``, only WebSocket rules - will match. - - .. versionchanged:: 0.15 - ``path_info`` defaults to ``'/'`` if ``None``. - - .. versionchanged:: 0.8 - ``query_args`` can be a string. - - .. versionchanged:: 0.7 - Added ``query_args``. - """ - server_name = server_name.lower() - if self.host_matching: - if subdomain is not None: - raise RuntimeError("host matching enabled and a subdomain was provided") - elif subdomain is None: - subdomain = self.default_subdomain - if script_name is None: - script_name = "/" - if path_info is None: - path_info = "/" - - try: - server_name = _encode_idna(server_name) # type: ignore - except UnicodeError as e: - raise BadHost() from e - - return MapAdapter( - self, - server_name, - script_name, - subdomain, - url_scheme, - path_info, - default_method, - query_args, - ) - - def bind_to_environ( - self, - environ: t.Union["WSGIEnvironment", "Request"], - server_name: t.Optional[str] = None, - subdomain: t.Optional[str] = None, - ) -> "MapAdapter": - """Like :meth:`bind` but you can pass it an WSGI environment and it - will fetch the information from that dictionary. Note that because of - limitations in the protocol there is no way to get the current - subdomain and real `server_name` from the environment. If you don't - provide it, Werkzeug will use `SERVER_NAME` and `SERVER_PORT` (or - `HTTP_HOST` if provided) as used `server_name` with disabled subdomain - feature. - - If `subdomain` is `None` but an environment and a server name is - provided it will calculate the current subdomain automatically. - Example: `server_name` is ``'example.com'`` and the `SERVER_NAME` - in the wsgi `environ` is ``'staging.dev.example.com'`` the calculated - subdomain will be ``'staging.dev'``. - - If the object passed as environ has an environ attribute, the value of - this attribute is used instead. This allows you to pass request - objects. Additionally `PATH_INFO` added as a default of the - :class:`MapAdapter` so that you don't have to pass the path info to - the match method. - - .. versionchanged:: 1.0.0 - If the passed server name specifies port 443, it will match - if the incoming scheme is ``https`` without a port. - - .. versionchanged:: 1.0.0 - A warning is shown when the passed server name does not - match the incoming WSGI server name. - - .. versionchanged:: 0.8 - This will no longer raise a ValueError when an unexpected server - name was passed. - - .. versionchanged:: 0.5 - previously this method accepted a bogus `calculate_subdomain` - parameter that did not have any effect. It was removed because - of that. - - :param environ: a WSGI environment. - :param server_name: an optional server name hint (see above). - :param subdomain: optionally the current subdomain (see above). - """ - env = _get_environ(environ) - wsgi_server_name = get_host(env).lower() - scheme = env["wsgi.url_scheme"] - upgrade = any( - v.strip() == "upgrade" - for v in env.get("HTTP_CONNECTION", "").lower().split(",") - ) - - if upgrade and env.get("HTTP_UPGRADE", "").lower() == "websocket": - scheme = "wss" if scheme == "https" else "ws" - - if server_name is None: - server_name = wsgi_server_name - else: - server_name = server_name.lower() - - # strip standard port to match get_host() - if scheme in {"http", "ws"} and server_name.endswith(":80"): - server_name = server_name[:-3] - elif scheme in {"https", "wss"} and server_name.endswith(":443"): - server_name = server_name[:-4] - - if subdomain is None and not self.host_matching: - cur_server_name = wsgi_server_name.split(".") - real_server_name = server_name.split(".") - offset = -len(real_server_name) - - if cur_server_name[offset:] != real_server_name: - # This can happen even with valid configs if the server was - # accessed directly by IP address under some situations. - # Instead of raising an exception like in Werkzeug 0.7 or - # earlier we go by an invalid subdomain which will result - # in a 404 error on matching. - warnings.warn( - f"Current server name {wsgi_server_name!r} doesn't match configured" - f" server name {server_name!r}", - stacklevel=2, - ) - subdomain = "" - else: - subdomain = ".".join(filter(None, cur_server_name[:offset])) - - def _get_wsgi_string(name: str) -> t.Optional[str]: - val = env.get(name) - if val is not None: - return _wsgi_decoding_dance(val, self.charset) - return None - - script_name = _get_wsgi_string("SCRIPT_NAME") - path_info = _get_wsgi_string("PATH_INFO") - query_args = _get_wsgi_string("QUERY_STRING") - return Map.bind( - self, - server_name, - script_name, - subdomain, - scheme, - env["REQUEST_METHOD"], - path_info, - query_args=query_args, - ) - - def update(self) -> None: - """Called before matching and building to keep the compiled rules - in the correct order after things changed. - """ - if not self._remap: - return - - with self._remap_lock: - if not self._remap: - return - - self._rules.sort(key=lambda x: x.match_compare_key()) - for rules in self._rules_by_endpoint.values(): - rules.sort(key=lambda x: x.build_compare_key()) - self._remap = False - - def __repr__(self) -> str: - rules = self.iter_rules() - return f"{type(self).__name__}({pformat(list(rules))})" - - -class MapAdapter: - - """Returned by :meth:`Map.bind` or :meth:`Map.bind_to_environ` and does - the URL matching and building based on runtime information. - """ - - def __init__( - self, - map: Map, - server_name: str, - script_name: str, - subdomain: t.Optional[str], - url_scheme: str, - path_info: str, - default_method: str, - query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, - ): - self.map = map - self.server_name = _to_str(server_name) - script_name = _to_str(script_name) - if not script_name.endswith("/"): - script_name += "/" - self.script_name = script_name - self.subdomain = _to_str(subdomain) - self.url_scheme = _to_str(url_scheme) - self.path_info = _to_str(path_info) - self.default_method = _to_str(default_method) - self.query_args = query_args - self.websocket = self.url_scheme in {"ws", "wss"} - - def dispatch( - self, - view_func: t.Callable[[str, t.Mapping[str, t.Any]], "WSGIApplication"], - path_info: t.Optional[str] = None, - method: t.Optional[str] = None, - catch_http_exceptions: bool = False, - ) -> "WSGIApplication": - """Does the complete dispatching process. `view_func` is called with - the endpoint and a dict with the values for the view. It should - look up the view function, call it, and return a response object - or WSGI application. http exceptions are not caught by default - so that applications can display nicer error messages by just - catching them by hand. If you want to stick with the default - error messages you can pass it ``catch_http_exceptions=True`` and - it will catch the http exceptions. - - Here a small example for the dispatch usage:: - - from werkzeug.wrappers import Request, Response - from werkzeug.wsgi import responder - from werkzeug.routing import Map, Rule - - def on_index(request): - return Response('Hello from the index') - - url_map = Map([Rule('/', endpoint='index')]) - views = {'index': on_index} - - @responder - def application(environ, start_response): - request = Request(environ) - urls = url_map.bind_to_environ(environ) - return urls.dispatch(lambda e, v: views[e](request, **v), - catch_http_exceptions=True) - - Keep in mind that this method might return exception objects, too, so - use :class:`Response.force_type` to get a response object. - - :param view_func: a function that is called with the endpoint as - first argument and the value dict as second. Has - to dispatch to the actual view function with this - information. (see above) - :param path_info: the path info to use for matching. Overrides the - path info specified on binding. - :param method: the HTTP method used for matching. Overrides the - method specified on binding. - :param catch_http_exceptions: set to `True` to catch any of the - werkzeug :class:`HTTPException`\\s. - """ - try: - try: - endpoint, args = self.match(path_info, method) - except RequestRedirect as e: - return e - return view_func(endpoint, args) - except HTTPException as e: - if catch_http_exceptions: - return e - raise - - @typing.overload - def match( # type: ignore - self, - path_info: t.Optional[str] = None, - method: t.Optional[str] = None, - return_rule: "te.Literal[False]" = False, - query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, - websocket: t.Optional[bool] = None, - ) -> t.Tuple[str, t.Mapping[str, t.Any]]: - ... - - @typing.overload - def match( - self, - path_info: t.Optional[str] = None, - method: t.Optional[str] = None, - return_rule: "te.Literal[True]" = True, - query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, - websocket: t.Optional[bool] = None, - ) -> t.Tuple[Rule, t.Mapping[str, t.Any]]: - ... - - def match( - self, - path_info: t.Optional[str] = None, - method: t.Optional[str] = None, - return_rule: bool = False, - query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, - websocket: t.Optional[bool] = None, - ) -> t.Tuple[t.Union[str, Rule], t.Mapping[str, t.Any]]: - """The usage is simple: you just pass the match method the current - path info as well as the method (which defaults to `GET`). The - following things can then happen: - - - you receive a `NotFound` exception that indicates that no URL is - matching. A `NotFound` exception is also a WSGI application you - can call to get a default page not found page (happens to be the - same object as `werkzeug.exceptions.NotFound`) - - - you receive a `MethodNotAllowed` exception that indicates that there - is a match for this URL but not for the current request method. - This is useful for RESTful applications. - - - you receive a `RequestRedirect` exception with a `new_url` - attribute. This exception is used to notify you about a request - Werkzeug requests from your WSGI application. This is for example the - case if you request ``/foo`` although the correct URL is ``/foo/`` - You can use the `RequestRedirect` instance as response-like object - similar to all other subclasses of `HTTPException`. - - - you receive a ``WebsocketMismatch`` exception if the only - match is a WebSocket rule but the bind is an HTTP request, or - if the match is an HTTP rule but the bind is a WebSocket - request. - - - you get a tuple in the form ``(endpoint, arguments)`` if there is - a match (unless `return_rule` is True, in which case you get a tuple - in the form ``(rule, arguments)``) - - If the path info is not passed to the match method the default path - info of the map is used (defaults to the root URL if not defined - explicitly). - - All of the exceptions raised are subclasses of `HTTPException` so they - can be used as WSGI responses. They will all render generic error or - redirect pages. - - Here is a small example for matching: - - >>> m = Map([ - ... Rule('/', endpoint='index'), - ... Rule('/downloads/', endpoint='downloads/index'), - ... Rule('/downloads/', endpoint='downloads/show') - ... ]) - >>> urls = m.bind("example.com", "/") - >>> urls.match("/", "GET") - ('index', {}) - >>> urls.match("/downloads/42") - ('downloads/show', {'id': 42}) - - And here is what happens on redirect and missing URLs: - - >>> urls.match("/downloads") - Traceback (most recent call last): - ... - RequestRedirect: http://example.com/downloads/ - >>> urls.match("/missing") - Traceback (most recent call last): - ... - NotFound: 404 Not Found - - :param path_info: the path info to use for matching. Overrides the - path info specified on binding. - :param method: the HTTP method used for matching. Overrides the - method specified on binding. - :param return_rule: return the rule that matched instead of just the - endpoint (defaults to `False`). - :param query_args: optional query arguments that are used for - automatic redirects as string or dictionary. It's - currently not possible to use the query arguments - for URL matching. - :param websocket: Match WebSocket instead of HTTP requests. A - websocket request has a ``ws`` or ``wss`` - :attr:`url_scheme`. This overrides that detection. - - .. versionadded:: 1.0 - Added ``websocket``. - - .. versionchanged:: 0.8 - ``query_args`` can be a string. - - .. versionadded:: 0.7 - Added ``query_args``. - - .. versionadded:: 0.6 - Added ``return_rule``. - """ - self.map.update() - if path_info is None: - path_info = self.path_info - else: - path_info = _to_str(path_info, self.map.charset) - if query_args is None: - query_args = self.query_args or {} - method = (method or self.default_method).upper() - - if websocket is None: - websocket = self.websocket - - require_redirect = False - - domain_part = self.server_name if self.map.host_matching else self.subdomain - path_part = f"/{path_info.lstrip('/')}" if path_info else "" - path = f"{domain_part}|{path_part}" - - have_match_for = set() - websocket_mismatch = False - - for rule in self.map._rules: - try: - rv = rule.match(path, method) - except RequestPath as e: - raise RequestRedirect( - self.make_redirect_url( - url_quote(e.path_info, self.map.charset, safe="/:|+"), - query_args, - ) - ) from None - except RequestAliasRedirect as e: - raise RequestRedirect( - self.make_alias_redirect_url( - path, rule.endpoint, e.matched_values, method, query_args - ) - ) from None - if rv is None: - continue - if rule.methods is not None and method not in rule.methods: - have_match_for.update(rule.methods) - continue - - if rule.websocket != websocket: - websocket_mismatch = True - continue - - if self.map.redirect_defaults: - redirect_url = self.get_default_redirect(rule, method, rv, query_args) - if redirect_url is not None: - raise RequestRedirect(redirect_url) - - if rule.redirect_to is not None: - if isinstance(rule.redirect_to, str): - - def _handle_match(match: t.Match[str]) -> str: - value = rv[match.group(1)] # type: ignore - return rule._converters[match.group(1)].to_url(value) - - redirect_url = _simple_rule_re.sub(_handle_match, rule.redirect_to) - else: - redirect_url = rule.redirect_to(self, **rv) - - if self.subdomain: - netloc = f"{self.subdomain}.{self.server_name}" - else: - netloc = self.server_name - - raise RequestRedirect( - url_join( - f"{self.url_scheme or 'http'}://{netloc}{self.script_name}", - redirect_url, - ) - ) - - if require_redirect: - raise RequestRedirect( - self.make_redirect_url( - url_quote(path_info, self.map.charset, safe="/:|+"), query_args - ) - ) - - if return_rule: - return rule, rv - else: - return rule.endpoint, rv - - if have_match_for: - raise MethodNotAllowed(valid_methods=list(have_match_for)) - - if websocket_mismatch: - raise WebsocketMismatch() - - raise NotFound() - - def test( - self, path_info: t.Optional[str] = None, method: t.Optional[str] = None - ) -> bool: - """Test if a rule would match. Works like `match` but returns `True` - if the URL matches, or `False` if it does not exist. - - :param path_info: the path info to use for matching. Overrides the - path info specified on binding. - :param method: the HTTP method used for matching. Overrides the - method specified on binding. - """ - try: - self.match(path_info, method) - except RequestRedirect: - pass - except HTTPException: - return False - return True - - def allowed_methods(self, path_info: t.Optional[str] = None) -> t.Iterable[str]: - """Returns the valid methods that match for a given path. - - .. versionadded:: 0.7 - """ - try: - self.match(path_info, method="--") - except MethodNotAllowed as e: - return e.valid_methods # type: ignore - except HTTPException: - pass - return [] - - def get_host(self, domain_part: t.Optional[str]) -> str: - """Figures out the full host name for the given domain part. The - domain part is a subdomain in case host matching is disabled or - a full host name. - """ - if self.map.host_matching: - if domain_part is None: - return self.server_name - return _to_str(domain_part, "ascii") - subdomain = domain_part - if subdomain is None: - subdomain = self.subdomain - else: - subdomain = _to_str(subdomain, "ascii") - - if subdomain: - return f"{subdomain}.{self.server_name}" - else: - return self.server_name - - def get_default_redirect( - self, - rule: Rule, - method: str, - values: t.MutableMapping[str, t.Any], - query_args: t.Union[t.Mapping[str, t.Any], str], - ) -> t.Optional[str]: - """A helper that returns the URL to redirect to if it finds one. - This is used for default redirecting only. - - :internal: - """ - assert self.map.redirect_defaults - for r in self.map._rules_by_endpoint[rule.endpoint]: - # every rule that comes after this one, including ourself - # has a lower priority for the defaults. We order the ones - # with the highest priority up for building. - if r is rule: - break - if r.provides_defaults_for(rule) and r.suitable_for(values, method): - values.update(r.defaults) # type: ignore - domain_part, path = r.build(values) # type: ignore - return self.make_redirect_url(path, query_args, domain_part=domain_part) - return None - - def encode_query_args(self, query_args: t.Union[t.Mapping[str, t.Any], str]) -> str: - if not isinstance(query_args, str): - return url_encode(query_args, self.map.charset) - return query_args - - def make_redirect_url( - self, - path_info: str, - query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, - domain_part: t.Optional[str] = None, - ) -> str: - """Creates a redirect URL. - - :internal: - """ - if query_args: - suffix = f"?{self.encode_query_args(query_args)}" - else: - suffix = "" - - scheme = self.url_scheme or "http" - host = self.get_host(domain_part) - path = posixpath.join(self.script_name.strip("/"), path_info.lstrip("/")) - return f"{scheme}://{host}/{path}{suffix}" - - def make_alias_redirect_url( - self, - path: str, - endpoint: str, - values: t.Mapping[str, t.Any], - method: str, - query_args: t.Union[t.Mapping[str, t.Any], str], - ) -> str: - """Internally called to make an alias redirect URL.""" - url = self.build( - endpoint, values, method, append_unknown=False, force_external=True - ) - if query_args: - url += f"?{self.encode_query_args(query_args)}" - assert url != path, "detected invalid alias setting. No canonical URL found" - return url - - def _partial_build( - self, - endpoint: str, - values: t.Mapping[str, t.Any], - method: t.Optional[str], - append_unknown: bool, - ) -> t.Optional[t.Tuple[str, str, bool]]: - """Helper for :meth:`build`. Returns subdomain and path for the - rule that accepts this endpoint, values and method. - - :internal: - """ - # in case the method is none, try with the default method first - if method is None: - rv = self._partial_build( - endpoint, values, self.default_method, append_unknown - ) - if rv is not None: - return rv - - # Default method did not match or a specific method is passed. - # Check all for first match with matching host. If no matching - # host is found, go with first result. - first_match = None - - for rule in self.map._rules_by_endpoint.get(endpoint, ()): - if rule.suitable_for(values, method): - build_rv = rule.build(values, append_unknown) - - if build_rv is not None: - rv = (build_rv[0], build_rv[1], rule.websocket) - if self.map.host_matching: - if rv[0] == self.server_name: - return rv - elif first_match is None: - first_match = rv - else: - return rv - - return first_match - - def build( - self, - endpoint: str, - values: t.Optional[t.Mapping[str, t.Any]] = None, - method: t.Optional[str] = None, - force_external: bool = False, - append_unknown: bool = True, - url_scheme: t.Optional[str] = None, - ) -> str: - """Building URLs works pretty much the other way round. Instead of - `match` you call `build` and pass it the endpoint and a dict of - arguments for the placeholders. - - The `build` function also accepts an argument called `force_external` - which, if you set it to `True` will force external URLs. Per default - external URLs (include the server name) will only be used if the - target URL is on a different subdomain. - - >>> m = Map([ - ... Rule('/', endpoint='index'), - ... Rule('/downloads/', endpoint='downloads/index'), - ... Rule('/downloads/', endpoint='downloads/show') - ... ]) - >>> urls = m.bind("example.com", "/") - >>> urls.build("index", {}) - '/' - >>> urls.build("downloads/show", {'id': 42}) - '/downloads/42' - >>> urls.build("downloads/show", {'id': 42}, force_external=True) - 'http://example.com/downloads/42' - - Because URLs cannot contain non ASCII data you will always get - bytes back. Non ASCII characters are urlencoded with the - charset defined on the map instance. - - Additional values are converted to strings and appended to the URL as - URL querystring parameters: - - >>> urls.build("index", {'q': 'My Searchstring'}) - '/?q=My+Searchstring' - - When processing those additional values, lists are furthermore - interpreted as multiple values (as per - :py:class:`werkzeug.datastructures.MultiDict`): - - >>> urls.build("index", {'q': ['a', 'b', 'c']}) - '/?q=a&q=b&q=c' - - Passing a ``MultiDict`` will also add multiple values: - - >>> urls.build("index", MultiDict((('p', 'z'), ('q', 'a'), ('q', 'b')))) - '/?p=z&q=a&q=b' - - If a rule does not exist when building a `BuildError` exception is - raised. - - The build method accepts an argument called `method` which allows you - to specify the method you want to have an URL built for if you have - different methods for the same endpoint specified. - - :param endpoint: the endpoint of the URL to build. - :param values: the values for the URL to build. Unhandled values are - appended to the URL as query parameters. - :param method: the HTTP method for the rule if there are different - URLs for different methods on the same endpoint. - :param force_external: enforce full canonical external URLs. If the URL - scheme is not provided, this will generate - a protocol-relative URL. - :param append_unknown: unknown parameters are appended to the generated - URL as query string argument. Disable this - if you want the builder to ignore those. - :param url_scheme: Scheme to use in place of the bound - :attr:`url_scheme`. - - .. versionchanged:: 2.0 - Added the ``url_scheme`` parameter. - - .. versionadded:: 0.6 - Added the ``append_unknown`` parameter. - """ - self.map.update() - - if values: - if isinstance(values, MultiDict): - values = { - k: (v[0] if len(v) == 1 else v) - for k, v in dict.items(values) - if len(v) != 0 - } - else: # plain dict - values = {k: v for k, v in values.items() if v is not None} - else: - values = {} - - rv = self._partial_build(endpoint, values, method, append_unknown) - if rv is None: - raise BuildError(endpoint, values, method, self) - - domain_part, path, websocket = rv - host = self.get_host(domain_part) - - if url_scheme is None: - url_scheme = self.url_scheme - - # Always build WebSocket routes with the scheme (browsers - # require full URLs). If bound to a WebSocket, ensure that HTTP - # routes are built with an HTTP scheme. - secure = url_scheme in {"https", "wss"} - - if websocket: - force_external = True - url_scheme = "wss" if secure else "ws" - elif url_scheme: - url_scheme = "https" if secure else "http" - - # shortcut this. - if not force_external and ( - (self.map.host_matching and host == self.server_name) - or (not self.map.host_matching and domain_part == self.subdomain) - ): - return f"{self.script_name.rstrip('/')}/{path.lstrip('/')}" - - scheme = f"{url_scheme}:" if url_scheme else "" - return f"{scheme}//{host}{self.script_name[:-1]}/{path.lstrip('/')}" diff --git a/src/werkzeug/routing/__init__.py b/src/werkzeug/routing/__init__.py new file mode 100644 index 0000000000..7f80e8386a --- /dev/null +++ b/src/werkzeug/routing/__init__.py @@ -0,0 +1,132 @@ +"""When it comes to combining multiple controller or view functions +(however you want to call them) you need a dispatcher. A simple way +would be applying regular expression tests on the ``PATH_INFO`` and +calling registered callback functions that return the value then. + +This module implements a much more powerful system than simple regular +expression matching because it can also convert values in the URLs and +build URLs. + +Here a simple example that creates a URL map for an application with +two subdomains (www and kb) and some URL rules: + +.. code-block:: python + + m = Map([ + # Static URLs + Rule('/', endpoint='static/index'), + Rule('/about', endpoint='static/about'), + Rule('/help', endpoint='static/help'), + # Knowledge Base + Subdomain('kb', [ + Rule('/', endpoint='kb/index'), + Rule('/browse/', endpoint='kb/browse'), + Rule('/browse//', endpoint='kb/browse'), + Rule('/browse//', endpoint='kb/browse') + ]) + ], default_subdomain='www') + +If the application doesn't use subdomains it's perfectly fine to not set +the default subdomain and not use the `Subdomain` rule factory. The +endpoint in the rules can be anything, for example import paths or +unique identifiers. The WSGI application can use those endpoints to get the +handler for that URL. It doesn't have to be a string at all but it's +recommended. + +Now it's possible to create a URL adapter for one of the subdomains and +build URLs: + +.. code-block:: python + + c = m.bind('example.com') + + c.build("kb/browse", dict(id=42)) + 'http://kb.example.com/browse/42/' + + c.build("kb/browse", dict()) + 'http://kb.example.com/browse/' + + c.build("kb/browse", dict(id=42, page=3)) + 'http://kb.example.com/browse/42/3' + + c.build("static/about") + '/about' + + c.build("static/index", force_external=True) + 'http://www.example.com/' + + c = m.bind('example.com', subdomain='kb') + + c.build("static/about") + 'http://www.example.com/about' + +The first argument to bind is the server name *without* the subdomain. +Per default it will assume that the script is mounted on the root, but +often that's not the case so you can provide the real mount point as +second argument: + +.. code-block:: python + + c = m.bind('example.com', '/applications/example') + +The third argument can be the subdomain, if not given the default +subdomain is used. For more details about binding have a look at the +documentation of the `MapAdapter`. + +And here is how you can match URLs: + +.. code-block:: python + + c = m.bind('example.com') + + c.match("/") + ('static/index', {}) + + c.match("/about") + ('static/about', {}) + + c = m.bind('example.com', '/', 'kb') + + c.match("/") + ('kb/index', {}) + + c.match("/browse/42/23") + ('kb/browse', {'id': 42, 'page': 23}) + +If matching fails you get a ``NotFound`` exception, if the rule thinks +it's a good idea to redirect (for example because the URL was defined +to have a slash at the end but the request was missing that slash) it +will raise a ``RequestRedirect`` exception. Both are subclasses of +``HTTPException`` so you can use those errors as responses in the +application. + +If matching succeeded but the URL rule was incompatible to the given +method (for example there were only rules for ``GET`` and ``HEAD`` but +routing tried to match a ``POST`` request) a ``MethodNotAllowed`` +exception is raised. +""" +from .converters import AnyConverter +from .converters import BaseConverter +from .converters import FloatConverter +from .converters import IntegerConverter +from .converters import PathConverter +from .converters import UnicodeConverter +from .converters import UUIDConverter +from .exceptions import BuildError +from .exceptions import NoMatch +from .exceptions import RequestAliasRedirect +from .exceptions import RequestPath +from .exceptions import RequestRedirect +from .exceptions import RoutingException +from .exceptions import WebsocketMismatch +from .map import Map +from .map import MapAdapter +from .matcher import StateMachineMatcher +from .rules import EndpointPrefix +from .rules import parse_converter_args +from .rules import Rule +from .rules import RuleFactory +from .rules import RuleTemplate +from .rules import RuleTemplateFactory +from .rules import Subdomain +from .rules import Submount diff --git a/src/werkzeug/routing/converters.py b/src/werkzeug/routing/converters.py new file mode 100644 index 0000000000..bbad29d7ad --- /dev/null +++ b/src/werkzeug/routing/converters.py @@ -0,0 +1,257 @@ +import re +import typing as t +import uuid + +from ..urls import _fast_url_quote + +if t.TYPE_CHECKING: + from .map import Map + + +class ValidationError(ValueError): + """Validation error. If a rule converter raises this exception the rule + does not match the current URL and the next URL is tried. + """ + + +class BaseConverter: + """Base class for all converters.""" + + regex = "[^/]+" + weight = 100 + part_isolating = True + + def __init__(self, map: "Map", *args: t.Any, **kwargs: t.Any) -> None: + self.map = map + + def to_python(self, value: str) -> t.Any: + return value + + def to_url(self, value: t.Any) -> str: + if isinstance(value, (bytes, bytearray)): + return _fast_url_quote(value) + return _fast_url_quote(str(value).encode(self.map.charset)) + + +class UnicodeConverter(BaseConverter): + """This converter is the default converter and accepts any string but + only one path segment. Thus the string can not include a slash. + + This is the default validator. + + Example:: + + Rule('/pages/'), + Rule('/') + + :param map: the :class:`Map`. + :param minlength: the minimum length of the string. Must be greater + or equal 1. + :param maxlength: the maximum length of the string. + :param length: the exact length of the string. + """ + + part_isolating = True + + def __init__( + self, + map: "Map", + minlength: int = 1, + maxlength: t.Optional[int] = None, + length: t.Optional[int] = None, + ) -> None: + super().__init__(map) + if length is not None: + length_regex = f"{{{int(length)}}}" + else: + if maxlength is None: + maxlength_value = "" + else: + maxlength_value = str(int(maxlength)) + length_regex = f"{{{int(minlength)},{maxlength_value}}}" + self.regex = f"[^/]{length_regex}" + + +class AnyConverter(BaseConverter): + """Matches one of the items provided. Items can either be Python + identifiers or strings:: + + Rule('/') + + :param map: the :class:`Map`. + :param items: this function accepts the possible items as positional + arguments. + + .. versionchanged:: 2.2 + Value is validated when building a URL. + """ + + part_isolating = True + + def __init__(self, map: "Map", *items: str) -> None: + super().__init__(map) + self.items = set(items) + self.regex = f"(?:{'|'.join([re.escape(x) for x in items])})" + + def to_url(self, value: t.Any) -> str: + if value in self.items: + return str(value) + + valid_values = ", ".join(f"'{item}'" for item in sorted(self.items)) + raise ValueError(f"'{value}' is not one of {valid_values}") + + +class PathConverter(BaseConverter): + """Like the default :class:`UnicodeConverter`, but it also matches + slashes. This is useful for wikis and similar applications:: + + Rule('/') + Rule('//edit') + + :param map: the :class:`Map`. + """ + + regex = "[^/].*?" + weight = 200 + part_isolating = False + + +class NumberConverter(BaseConverter): + """Baseclass for `IntegerConverter` and `FloatConverter`. + + :internal: + """ + + weight = 50 + num_convert: t.Callable = int + part_isolating = True + + def __init__( + self, + map: "Map", + fixed_digits: int = 0, + min: t.Optional[int] = None, + max: t.Optional[int] = None, + signed: bool = False, + ) -> None: + if signed: + self.regex = self.signed_regex + super().__init__(map) + self.fixed_digits = fixed_digits + self.min = min + self.max = max + self.signed = signed + + def to_python(self, value: str) -> t.Any: + if self.fixed_digits and len(value) != self.fixed_digits: + raise ValidationError() + value = self.num_convert(value) + if (self.min is not None and value < self.min) or ( + self.max is not None and value > self.max + ): + raise ValidationError() + return value + + def to_url(self, value: t.Any) -> str: + value = str(self.num_convert(value)) + if self.fixed_digits: + value = value.zfill(self.fixed_digits) + return value + + @property + def signed_regex(self) -> str: + return f"-?{self.regex}" + + +class IntegerConverter(NumberConverter): + """This converter only accepts integer values:: + + Rule("/page/") + + By default it only accepts unsigned, positive values. The ``signed`` + parameter will enable signed, negative values. :: + + Rule("/page/") + + :param map: The :class:`Map`. + :param fixed_digits: The number of fixed digits in the URL. If you + set this to ``4`` for example, the rule will only match if the + URL looks like ``/0001/``. The default is variable length. + :param min: The minimal value. + :param max: The maximal value. + :param signed: Allow signed (negative) values. + + .. versionadded:: 0.15 + The ``signed`` parameter. + """ + + regex = r"\d+" + part_isolating = True + + +class FloatConverter(NumberConverter): + """This converter only accepts floating point values:: + + Rule("/probability/") + + By default it only accepts unsigned, positive values. The ``signed`` + parameter will enable signed, negative values. :: + + Rule("/offset/") + + :param map: The :class:`Map`. + :param min: The minimal value. + :param max: The maximal value. + :param signed: Allow signed (negative) values. + + .. versionadded:: 0.15 + The ``signed`` parameter. + """ + + regex = r"\d+\.\d+" + num_convert = float + part_isolating = True + + def __init__( + self, + map: "Map", + min: t.Optional[float] = None, + max: t.Optional[float] = None, + signed: bool = False, + ) -> None: + super().__init__(map, min=min, max=max, signed=signed) # type: ignore + + +class UUIDConverter(BaseConverter): + """This converter only accepts UUID strings:: + + Rule('/object/') + + .. versionadded:: 0.10 + + :param map: the :class:`Map`. + """ + + regex = ( + r"[A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-" + r"[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}" + ) + part_isolating = True + + def to_python(self, value: str) -> uuid.UUID: + return uuid.UUID(value) + + def to_url(self, value: uuid.UUID) -> str: + return str(value) + + +#: the default converter mapping for the map. +DEFAULT_CONVERTERS: t.Mapping[str, t.Type[BaseConverter]] = { + "default": UnicodeConverter, + "string": UnicodeConverter, + "any": AnyConverter, + "path": PathConverter, + "int": IntegerConverter, + "float": FloatConverter, + "uuid": UUIDConverter, +} diff --git a/src/werkzeug/routing/exceptions.py b/src/werkzeug/routing/exceptions.py new file mode 100644 index 0000000000..7cbe6e9131 --- /dev/null +++ b/src/werkzeug/routing/exceptions.py @@ -0,0 +1,146 @@ +import difflib +import typing as t + +from ..exceptions import BadRequest +from ..exceptions import HTTPException +from ..utils import cached_property +from ..utils import redirect + +if t.TYPE_CHECKING: + from _typeshed.wsgi import WSGIEnvironment + from .map import MapAdapter + from .rules import Rule # noqa: F401 + from ..wrappers.request import Request + from ..wrappers.response import Response + + +class RoutingException(Exception): + """Special exceptions that require the application to redirect, notifying + about missing urls, etc. + + :internal: + """ + + +class RequestRedirect(HTTPException, RoutingException): + """Raise if the map requests a redirect. This is for example the case if + `strict_slashes` are activated and an url that requires a trailing slash. + + The attribute `new_url` contains the absolute destination url. + """ + + code = 308 + + def __init__(self, new_url: str) -> None: + super().__init__(new_url) + self.new_url = new_url + + def get_response( + self, + environ: t.Optional[t.Union["WSGIEnvironment", "Request"]] = None, + scope: t.Optional[dict] = None, + ) -> "Response": + return redirect(self.new_url, self.code) + + +class RequestPath(RoutingException): + """Internal exception.""" + + __slots__ = ("path_info",) + + def __init__(self, path_info: str) -> None: + super().__init__() + self.path_info = path_info + + +class RequestAliasRedirect(RoutingException): # noqa: B903 + """This rule is an alias and wants to redirect to the canonical URL.""" + + def __init__(self, matched_values: t.Mapping[str, t.Any], endpoint: str) -> None: + super().__init__() + self.matched_values = matched_values + self.endpoint = endpoint + + +class BuildError(RoutingException, LookupError): + """Raised if the build system cannot find a URL for an endpoint with the + values provided. + """ + + def __init__( + self, + endpoint: str, + values: t.Mapping[str, t.Any], + method: t.Optional[str], + adapter: t.Optional["MapAdapter"] = None, + ) -> None: + super().__init__(endpoint, values, method) + self.endpoint = endpoint + self.values = values + self.method = method + self.adapter = adapter + + @cached_property + def suggested(self) -> t.Optional["Rule"]: + return self.closest_rule(self.adapter) + + def closest_rule(self, adapter: t.Optional["MapAdapter"]) -> t.Optional["Rule"]: + def _score_rule(rule: "Rule") -> float: + return sum( + [ + 0.98 + * difflib.SequenceMatcher( + None, rule.endpoint, self.endpoint + ).ratio(), + 0.01 * bool(set(self.values or ()).issubset(rule.arguments)), + 0.01 * bool(rule.methods and self.method in rule.methods), + ] + ) + + if adapter and adapter.map._rules: + return max(adapter.map._rules, key=_score_rule) + + return None + + def __str__(self) -> str: + message = [f"Could not build url for endpoint {self.endpoint!r}"] + if self.method: + message.append(f" ({self.method!r})") + if self.values: + message.append(f" with values {sorted(self.values)!r}") + message.append(".") + if self.suggested: + if self.endpoint == self.suggested.endpoint: + if ( + self.method + and self.suggested.methods is not None + and self.method not in self.suggested.methods + ): + message.append( + " Did you mean to use methods" + f" {sorted(self.suggested.methods)!r}?" + ) + missing_values = self.suggested.arguments.union( + set(self.suggested.defaults or ()) + ) - set(self.values.keys()) + if missing_values: + message.append( + f" Did you forget to specify values {sorted(missing_values)!r}?" + ) + else: + message.append(f" Did you mean {self.suggested.endpoint!r} instead?") + return "".join(message) + + +class WebsocketMismatch(BadRequest): + """The only matched rule is either a WebSocket and the request is + HTTP, or the rule is HTTP and the request is a WebSocket. + """ + + +class NoMatch(Exception): + __slots__ = ("have_match_for", "websocket_mismatch") + + def __init__(self, have_match_for: t.Set[str], websocket_mismatch: bool) -> None: + self.have_match_for = have_match_for + self.websocket_mismatch = websocket_mismatch diff --git a/src/werkzeug/routing/map.py b/src/werkzeug/routing/map.py new file mode 100644 index 0000000000..daf94b6a1c --- /dev/null +++ b/src/werkzeug/routing/map.py @@ -0,0 +1,944 @@ +import posixpath +import typing as t +import warnings +from pprint import pformat +from threading import Lock + +from .._internal import _encode_idna +from .._internal import _get_environ +from .._internal import _to_str +from .._internal import _wsgi_decoding_dance +from ..datastructures import ImmutableDict +from ..datastructures import MultiDict +from ..exceptions import BadHost +from ..exceptions import HTTPException +from ..exceptions import MethodNotAllowed +from ..exceptions import NotFound +from ..urls import url_encode +from ..urls import url_join +from ..urls import url_quote +from ..wsgi import get_host +from .converters import DEFAULT_CONVERTERS +from .exceptions import BuildError +from .exceptions import NoMatch +from .exceptions import RequestAliasRedirect +from .exceptions import RequestPath +from .exceptions import RequestRedirect +from .exceptions import WebsocketMismatch +from .matcher import StateMachineMatcher +from .rules import _simple_rule_re +from .rules import Rule + +if t.TYPE_CHECKING: + import typing_extensions as te + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + from .converters import BaseConverter + from .rules import RuleFactory + from ..wrappers.request import Request + + +class Map: + """The map class stores all the URL rules and some configuration + parameters. Some of the configuration values are only stored on the + `Map` instance since those affect all rules, others are just defaults + and can be overridden for each rule. Note that you have to specify all + arguments besides the `rules` as keyword arguments! + + :param rules: sequence of url rules for this map. + :param default_subdomain: The default subdomain for rules without a + subdomain defined. + :param charset: charset of the url. defaults to ``"utf-8"`` + :param strict_slashes: If a rule ends with a slash but the matched + URL does not, redirect to the URL with a trailing slash. + :param merge_slashes: Merge consecutive slashes when matching or + building URLs. Matches will redirect to the normalized URL. + Slashes in variable parts are not merged. + :param redirect_defaults: This will redirect to the default rule if it + wasn't visited that way. This helps creating + unique URLs. + :param converters: A dict of converters that adds additional converters + to the list of converters. If you redefine one + converter this will override the original one. + :param sort_parameters: If set to `True` the url parameters are sorted. + See `url_encode` for more details. + :param sort_key: The sort key function for `url_encode`. + :param encoding_errors: the error method to use for decoding + :param host_matching: if set to `True` it enables the host matching + feature and disables the subdomain one. If + enabled the `host` parameter to rules is used + instead of the `subdomain` one. + + .. versionchanged:: 1.0 + If ``url_scheme`` is ``ws`` or ``wss``, only WebSocket rules + will match. + + .. versionchanged:: 1.0 + Added ``merge_slashes``. + + .. versionchanged:: 0.7 + Added ``encoding_errors`` and ``host_matching``. + + .. versionchanged:: 0.5 + Added ``sort_parameters`` and ``sort_key``. + """ + + #: A dict of default converters to be used. + default_converters = ImmutableDict(DEFAULT_CONVERTERS) + + #: The type of lock to use when updating. + #: + #: .. versionadded:: 1.0 + lock_class = Lock + + def __init__( + self, + rules: t.Optional[t.Iterable["RuleFactory"]] = None, + default_subdomain: str = "", + charset: str = "utf-8", + strict_slashes: bool = True, + merge_slashes: bool = True, + redirect_defaults: bool = True, + converters: t.Optional[t.Mapping[str, t.Type["BaseConverter"]]] = None, + sort_parameters: bool = False, + sort_key: t.Optional[t.Callable[[t.Any], t.Any]] = None, + encoding_errors: str = "replace", + host_matching: bool = False, + ) -> None: + self._matcher = StateMachineMatcher(merge_slashes) + self._rules_by_endpoint: t.Dict[str, t.List[Rule]] = {} + self._remap = True + self._remap_lock = self.lock_class() + + self.default_subdomain = default_subdomain + self.charset = charset + self.encoding_errors = encoding_errors + self.strict_slashes = strict_slashes + self.merge_slashes = merge_slashes + self.redirect_defaults = redirect_defaults + self.host_matching = host_matching + + self.converters = self.default_converters.copy() + if converters: + self.converters.update(converters) + + self.sort_parameters = sort_parameters + self.sort_key = sort_key + + for rulefactory in rules or (): + self.add(rulefactory) + + def is_endpoint_expecting(self, endpoint: str, *arguments: str) -> bool: + """Iterate over all rules and check if the endpoint expects + the arguments provided. This is for example useful if you have + some URLs that expect a language code and others that do not and + you want to wrap the builder a bit so that the current language + code is automatically added if not provided but endpoints expect + it. + + :param endpoint: the endpoint to check. + :param arguments: this function accepts one or more arguments + as positional arguments. Each one of them is + checked. + """ + self.update() + arguments = set(arguments) + for rule in self._rules_by_endpoint[endpoint]: + if arguments.issubset(rule.arguments): + return True + return False + + @property + def _rules(self) -> t.List[Rule]: + return [rule for rules in self._rules_by_endpoint.values() for rule in rules] + + def iter_rules(self, endpoint: t.Optional[str] = None) -> t.Iterator[Rule]: + """Iterate over all rules or the rules of an endpoint. + + :param endpoint: if provided only the rules for that endpoint + are returned. + :return: an iterator + """ + self.update() + if endpoint is not None: + return iter(self._rules_by_endpoint[endpoint]) + return iter(self._rules) + + def add(self, rulefactory: "RuleFactory") -> None: + """Add a new rule or factory to the map and bind it. Requires that the + rule is not bound to another map. + + :param rulefactory: a :class:`Rule` or :class:`RuleFactory` + """ + for rule in rulefactory.get_rules(self): + rule.bind(self) + if not rule.build_only: + self._matcher.add(rule) + self._rules_by_endpoint.setdefault(rule.endpoint, []).append(rule) + self._remap = True + + def bind( + self, + server_name: str, + script_name: t.Optional[str] = None, + subdomain: t.Optional[str] = None, + url_scheme: str = "http", + default_method: str = "GET", + path_info: t.Optional[str] = None, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + ) -> "MapAdapter": + """Return a new :class:`MapAdapter` with the details specified to the + call. Note that `script_name` will default to ``'/'`` if not further + specified or `None`. The `server_name` at least is a requirement + because the HTTP RFC requires absolute URLs for redirects and so all + redirect exceptions raised by Werkzeug will contain the full canonical + URL. + + If no path_info is passed to :meth:`match` it will use the default path + info passed to bind. While this doesn't really make sense for + manual bind calls, it's useful if you bind a map to a WSGI + environment which already contains the path info. + + `subdomain` will default to the `default_subdomain` for this map if + no defined. If there is no `default_subdomain` you cannot use the + subdomain feature. + + .. versionchanged:: 1.0 + If ``url_scheme`` is ``ws`` or ``wss``, only WebSocket rules + will match. + + .. versionchanged:: 0.15 + ``path_info`` defaults to ``'/'`` if ``None``. + + .. versionchanged:: 0.8 + ``query_args`` can be a string. + + .. versionchanged:: 0.7 + Added ``query_args``. + """ + server_name = server_name.lower() + if self.host_matching: + if subdomain is not None: + raise RuntimeError("host matching enabled and a subdomain was provided") + elif subdomain is None: + subdomain = self.default_subdomain + if script_name is None: + script_name = "/" + if path_info is None: + path_info = "/" + + try: + server_name = _encode_idna(server_name) # type: ignore + except UnicodeError as e: + raise BadHost() from e + + return MapAdapter( + self, + server_name, + script_name, + subdomain, + url_scheme, + path_info, + default_method, + query_args, + ) + + def bind_to_environ( + self, + environ: t.Union["WSGIEnvironment", "Request"], + server_name: t.Optional[str] = None, + subdomain: t.Optional[str] = None, + ) -> "MapAdapter": + """Like :meth:`bind` but you can pass it an WSGI environment and it + will fetch the information from that dictionary. Note that because of + limitations in the protocol there is no way to get the current + subdomain and real `server_name` from the environment. If you don't + provide it, Werkzeug will use `SERVER_NAME` and `SERVER_PORT` (or + `HTTP_HOST` if provided) as used `server_name` with disabled subdomain + feature. + + If `subdomain` is `None` but an environment and a server name is + provided it will calculate the current subdomain automatically. + Example: `server_name` is ``'example.com'`` and the `SERVER_NAME` + in the wsgi `environ` is ``'staging.dev.example.com'`` the calculated + subdomain will be ``'staging.dev'``. + + If the object passed as environ has an environ attribute, the value of + this attribute is used instead. This allows you to pass request + objects. Additionally `PATH_INFO` added as a default of the + :class:`MapAdapter` so that you don't have to pass the path info to + the match method. + + .. versionchanged:: 1.0.0 + If the passed server name specifies port 443, it will match + if the incoming scheme is ``https`` without a port. + + .. versionchanged:: 1.0.0 + A warning is shown when the passed server name does not + match the incoming WSGI server name. + + .. versionchanged:: 0.8 + This will no longer raise a ValueError when an unexpected server + name was passed. + + .. versionchanged:: 0.5 + previously this method accepted a bogus `calculate_subdomain` + parameter that did not have any effect. It was removed because + of that. + + :param environ: a WSGI environment. + :param server_name: an optional server name hint (see above). + :param subdomain: optionally the current subdomain (see above). + """ + env = _get_environ(environ) + wsgi_server_name = get_host(env).lower() + scheme = env["wsgi.url_scheme"] + upgrade = any( + v.strip() == "upgrade" + for v in env.get("HTTP_CONNECTION", "").lower().split(",") + ) + + if upgrade and env.get("HTTP_UPGRADE", "").lower() == "websocket": + scheme = "wss" if scheme == "https" else "ws" + + if server_name is None: + server_name = wsgi_server_name + else: + server_name = server_name.lower() + + # strip standard port to match get_host() + if scheme in {"http", "ws"} and server_name.endswith(":80"): + server_name = server_name[:-3] + elif scheme in {"https", "wss"} and server_name.endswith(":443"): + server_name = server_name[:-4] + + if subdomain is None and not self.host_matching: + cur_server_name = wsgi_server_name.split(".") + real_server_name = server_name.split(".") + offset = -len(real_server_name) + + if cur_server_name[offset:] != real_server_name: + # This can happen even with valid configs if the server was + # accessed directly by IP address under some situations. + # Instead of raising an exception like in Werkzeug 0.7 or + # earlier we go by an invalid subdomain which will result + # in a 404 error on matching. + warnings.warn( + f"Current server name {wsgi_server_name!r} doesn't match configured" + f" server name {server_name!r}", + stacklevel=2, + ) + subdomain = "" + else: + subdomain = ".".join(filter(None, cur_server_name[:offset])) + + def _get_wsgi_string(name: str) -> t.Optional[str]: + val = env.get(name) + if val is not None: + return _wsgi_decoding_dance(val, self.charset) + return None + + script_name = _get_wsgi_string("SCRIPT_NAME") + path_info = _get_wsgi_string("PATH_INFO") + query_args = _get_wsgi_string("QUERY_STRING") + return Map.bind( + self, + server_name, + script_name, + subdomain, + scheme, + env["REQUEST_METHOD"], + path_info, + query_args=query_args, + ) + + def update(self) -> None: + """Called before matching and building to keep the compiled rules + in the correct order after things changed. + """ + if not self._remap: + return + + with self._remap_lock: + if not self._remap: + return + + self._matcher.update() + for rules in self._rules_by_endpoint.values(): + rules.sort(key=lambda x: x.build_compare_key()) + self._remap = False + + def __repr__(self) -> str: + rules = self.iter_rules() + return f"{type(self).__name__}({pformat(list(rules))})" + + +class MapAdapter: + + """Returned by :meth:`Map.bind` or :meth:`Map.bind_to_environ` and does + the URL matching and building based on runtime information. + """ + + def __init__( + self, + map: Map, + server_name: str, + script_name: str, + subdomain: t.Optional[str], + url_scheme: str, + path_info: str, + default_method: str, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + ): + self.map = map + self.server_name = _to_str(server_name) + script_name = _to_str(script_name) + if not script_name.endswith("/"): + script_name += "/" + self.script_name = script_name + self.subdomain = _to_str(subdomain) + self.url_scheme = _to_str(url_scheme) + self.path_info = _to_str(path_info) + self.default_method = _to_str(default_method) + self.query_args = query_args + self.websocket = self.url_scheme in {"ws", "wss"} + + def dispatch( + self, + view_func: t.Callable[[str, t.Mapping[str, t.Any]], "WSGIApplication"], + path_info: t.Optional[str] = None, + method: t.Optional[str] = None, + catch_http_exceptions: bool = False, + ) -> "WSGIApplication": + """Does the complete dispatching process. `view_func` is called with + the endpoint and a dict with the values for the view. It should + look up the view function, call it, and return a response object + or WSGI application. http exceptions are not caught by default + so that applications can display nicer error messages by just + catching them by hand. If you want to stick with the default + error messages you can pass it ``catch_http_exceptions=True`` and + it will catch the http exceptions. + + Here a small example for the dispatch usage:: + + from werkzeug.wrappers import Request, Response + from werkzeug.wsgi import responder + from werkzeug.routing import Map, Rule + + def on_index(request): + return Response('Hello from the index') + + url_map = Map([Rule('/', endpoint='index')]) + views = {'index': on_index} + + @responder + def application(environ, start_response): + request = Request(environ) + urls = url_map.bind_to_environ(environ) + return urls.dispatch(lambda e, v: views[e](request, **v), + catch_http_exceptions=True) + + Keep in mind that this method might return exception objects, too, so + use :class:`Response.force_type` to get a response object. + + :param view_func: a function that is called with the endpoint as + first argument and the value dict as second. Has + to dispatch to the actual view function with this + information. (see above) + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + :param catch_http_exceptions: set to `True` to catch any of the + werkzeug :class:`HTTPException`\\s. + """ + try: + try: + endpoint, args = self.match(path_info, method) + except RequestRedirect as e: + return e + return view_func(endpoint, args) + except HTTPException as e: + if catch_http_exceptions: + return e + raise + + @t.overload + def match( # type: ignore + self, + path_info: t.Optional[str] = None, + method: t.Optional[str] = None, + return_rule: "te.Literal[False]" = False, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + websocket: t.Optional[bool] = None, + ) -> t.Tuple[str, t.Mapping[str, t.Any]]: + ... + + @t.overload + def match( + self, + path_info: t.Optional[str] = None, + method: t.Optional[str] = None, + return_rule: "te.Literal[True]" = True, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + websocket: t.Optional[bool] = None, + ) -> t.Tuple[Rule, t.Mapping[str, t.Any]]: + ... + + def match( + self, + path_info: t.Optional[str] = None, + method: t.Optional[str] = None, + return_rule: bool = False, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + websocket: t.Optional[bool] = None, + ) -> t.Tuple[t.Union[str, Rule], t.Mapping[str, t.Any]]: + """The usage is simple: you just pass the match method the current + path info as well as the method (which defaults to `GET`). The + following things can then happen: + + - you receive a `NotFound` exception that indicates that no URL is + matching. A `NotFound` exception is also a WSGI application you + can call to get a default page not found page (happens to be the + same object as `werkzeug.exceptions.NotFound`) + + - you receive a `MethodNotAllowed` exception that indicates that there + is a match for this URL but not for the current request method. + This is useful for RESTful applications. + + - you receive a `RequestRedirect` exception with a `new_url` + attribute. This exception is used to notify you about a request + Werkzeug requests from your WSGI application. This is for example the + case if you request ``/foo`` although the correct URL is ``/foo/`` + You can use the `RequestRedirect` instance as response-like object + similar to all other subclasses of `HTTPException`. + + - you receive a ``WebsocketMismatch`` exception if the only + match is a WebSocket rule but the bind is an HTTP request, or + if the match is an HTTP rule but the bind is a WebSocket + request. + + - you get a tuple in the form ``(endpoint, arguments)`` if there is + a match (unless `return_rule` is True, in which case you get a tuple + in the form ``(rule, arguments)``) + + If the path info is not passed to the match method the default path + info of the map is used (defaults to the root URL if not defined + explicitly). + + All of the exceptions raised are subclasses of `HTTPException` so they + can be used as WSGI responses. They will all render generic error or + redirect pages. + + Here is a small example for matching: + + >>> m = Map([ + ... Rule('/', endpoint='index'), + ... Rule('/downloads/', endpoint='downloads/index'), + ... Rule('/downloads/', endpoint='downloads/show') + ... ]) + >>> urls = m.bind("example.com", "/") + >>> urls.match("/", "GET") + ('index', {}) + >>> urls.match("/downloads/42") + ('downloads/show', {'id': 42}) + + And here is what happens on redirect and missing URLs: + + >>> urls.match("/downloads") + Traceback (most recent call last): + ... + RequestRedirect: http://example.com/downloads/ + >>> urls.match("/missing") + Traceback (most recent call last): + ... + NotFound: 404 Not Found + + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + :param return_rule: return the rule that matched instead of just the + endpoint (defaults to `False`). + :param query_args: optional query arguments that are used for + automatic redirects as string or dictionary. It's + currently not possible to use the query arguments + for URL matching. + :param websocket: Match WebSocket instead of HTTP requests. A + websocket request has a ``ws`` or ``wss`` + :attr:`url_scheme`. This overrides that detection. + + .. versionadded:: 1.0 + Added ``websocket``. + + .. versionchanged:: 0.8 + ``query_args`` can be a string. + + .. versionadded:: 0.7 + Added ``query_args``. + + .. versionadded:: 0.6 + Added ``return_rule``. + """ + self.map.update() + if path_info is None: + path_info = self.path_info + else: + path_info = _to_str(path_info, self.map.charset) + if query_args is None: + query_args = self.query_args or {} + method = (method or self.default_method).upper() + + if websocket is None: + websocket = self.websocket + + domain_part = self.server_name if self.map.host_matching else self.subdomain + path_part = f"/{path_info.lstrip('/')}" if path_info else "" + + try: + result = self.map._matcher.match(domain_part, path_part, method, websocket) + except RequestPath as e: + raise RequestRedirect( + self.make_redirect_url( + url_quote(e.path_info, self.map.charset, safe="/:|+"), + query_args, + ) + ) from None + except RequestAliasRedirect as e: + raise RequestRedirect( + self.make_alias_redirect_url( + f"{domain_part}|{path_part}", + e.endpoint, + e.matched_values, + method, + query_args, + ) + ) from None + except NoMatch as e: + if e.have_match_for: + raise MethodNotAllowed(valid_methods=list(e.have_match_for)) from None + + if e.websocket_mismatch: + raise WebsocketMismatch() from None + + raise NotFound() from None + else: + rule, rv = result + + if self.map.redirect_defaults: + redirect_url = self.get_default_redirect(rule, method, rv, query_args) + if redirect_url is not None: + raise RequestRedirect(redirect_url) + + if rule.redirect_to is not None: + if isinstance(rule.redirect_to, str): + + def _handle_match(match: t.Match[str]) -> str: + value = rv[match.group(1)] + return rule._converters[match.group(1)].to_url(value) + + redirect_url = _simple_rule_re.sub(_handle_match, rule.redirect_to) + else: + redirect_url = rule.redirect_to(self, **rv) + + if self.subdomain: + netloc = f"{self.subdomain}.{self.server_name}" + else: + netloc = self.server_name + + raise RequestRedirect( + url_join( + f"{self.url_scheme or 'http'}://{netloc}{self.script_name}", + redirect_url, + ) + ) + + if return_rule: + return rule, rv + else: + return rule.endpoint, rv + + def test( + self, path_info: t.Optional[str] = None, method: t.Optional[str] = None + ) -> bool: + """Test if a rule would match. Works like `match` but returns `True` + if the URL matches, or `False` if it does not exist. + + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + """ + try: + self.match(path_info, method) + except RequestRedirect: + pass + except HTTPException: + return False + return True + + def allowed_methods(self, path_info: t.Optional[str] = None) -> t.Iterable[str]: + """Returns the valid methods that match for a given path. + + .. versionadded:: 0.7 + """ + try: + self.match(path_info, method="--") + except MethodNotAllowed as e: + return e.valid_methods # type: ignore + except HTTPException: + pass + return [] + + def get_host(self, domain_part: t.Optional[str]) -> str: + """Figures out the full host name for the given domain part. The + domain part is a subdomain in case host matching is disabled or + a full host name. + """ + if self.map.host_matching: + if domain_part is None: + return self.server_name + return _to_str(domain_part, "ascii") + subdomain = domain_part + if subdomain is None: + subdomain = self.subdomain + else: + subdomain = _to_str(subdomain, "ascii") + + if subdomain: + return f"{subdomain}.{self.server_name}" + else: + return self.server_name + + def get_default_redirect( + self, + rule: Rule, + method: str, + values: t.MutableMapping[str, t.Any], + query_args: t.Union[t.Mapping[str, t.Any], str], + ) -> t.Optional[str]: + """A helper that returns the URL to redirect to if it finds one. + This is used for default redirecting only. + + :internal: + """ + assert self.map.redirect_defaults + for r in self.map._rules_by_endpoint[rule.endpoint]: + # every rule that comes after this one, including ourself + # has a lower priority for the defaults. We order the ones + # with the highest priority up for building. + if r is rule: + break + if r.provides_defaults_for(rule) and r.suitable_for(values, method): + values.update(r.defaults) # type: ignore + domain_part, path = r.build(values) # type: ignore + return self.make_redirect_url(path, query_args, domain_part=domain_part) + return None + + def encode_query_args(self, query_args: t.Union[t.Mapping[str, t.Any], str]) -> str: + if not isinstance(query_args, str): + return url_encode(query_args, self.map.charset) + return query_args + + def make_redirect_url( + self, + path_info: str, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + domain_part: t.Optional[str] = None, + ) -> str: + """Creates a redirect URL. + + :internal: + """ + if query_args: + suffix = f"?{self.encode_query_args(query_args)}" + else: + suffix = "" + + scheme = self.url_scheme or "http" + host = self.get_host(domain_part) + path = posixpath.join(self.script_name.strip("/"), path_info.lstrip("/")) + return f"{scheme}://{host}/{path}{suffix}" + + def make_alias_redirect_url( + self, + path: str, + endpoint: str, + values: t.Mapping[str, t.Any], + method: str, + query_args: t.Union[t.Mapping[str, t.Any], str], + ) -> str: + """Internally called to make an alias redirect URL.""" + url = self.build( + endpoint, values, method, append_unknown=False, force_external=True + ) + if query_args: + url += f"?{self.encode_query_args(query_args)}" + assert url != path, "detected invalid alias setting. No canonical URL found" + return url + + def _partial_build( + self, + endpoint: str, + values: t.Mapping[str, t.Any], + method: t.Optional[str], + append_unknown: bool, + ) -> t.Optional[t.Tuple[str, str, bool]]: + """Helper for :meth:`build`. Returns subdomain and path for the + rule that accepts this endpoint, values and method. + + :internal: + """ + # in case the method is none, try with the default method first + if method is None: + rv = self._partial_build( + endpoint, values, self.default_method, append_unknown + ) + if rv is not None: + return rv + + # Default method did not match or a specific method is passed. + # Check all for first match with matching host. If no matching + # host is found, go with first result. + first_match = None + + for rule in self.map._rules_by_endpoint.get(endpoint, ()): + if rule.suitable_for(values, method): + build_rv = rule.build(values, append_unknown) + + if build_rv is not None: + rv = (build_rv[0], build_rv[1], rule.websocket) + if self.map.host_matching: + if rv[0] == self.server_name: + return rv + elif first_match is None: + first_match = rv + else: + return rv + + return first_match + + def build( + self, + endpoint: str, + values: t.Optional[t.Mapping[str, t.Any]] = None, + method: t.Optional[str] = None, + force_external: bool = False, + append_unknown: bool = True, + url_scheme: t.Optional[str] = None, + ) -> str: + """Building URLs works pretty much the other way round. Instead of + `match` you call `build` and pass it the endpoint and a dict of + arguments for the placeholders. + + The `build` function also accepts an argument called `force_external` + which, if you set it to `True` will force external URLs. Per default + external URLs (include the server name) will only be used if the + target URL is on a different subdomain. + + >>> m = Map([ + ... Rule('/', endpoint='index'), + ... Rule('/downloads/', endpoint='downloads/index'), + ... Rule('/downloads/', endpoint='downloads/show') + ... ]) + >>> urls = m.bind("example.com", "/") + >>> urls.build("index", {}) + '/' + >>> urls.build("downloads/show", {'id': 42}) + '/downloads/42' + >>> urls.build("downloads/show", {'id': 42}, force_external=True) + 'http://example.com/downloads/42' + + Because URLs cannot contain non ASCII data you will always get + bytes back. Non ASCII characters are urlencoded with the + charset defined on the map instance. + + Additional values are converted to strings and appended to the URL as + URL querystring parameters: + + >>> urls.build("index", {'q': 'My Searchstring'}) + '/?q=My+Searchstring' + + When processing those additional values, lists are furthermore + interpreted as multiple values (as per + :py:class:`werkzeug.datastructures.MultiDict`): + + >>> urls.build("index", {'q': ['a', 'b', 'c']}) + '/?q=a&q=b&q=c' + + Passing a ``MultiDict`` will also add multiple values: + + >>> urls.build("index", MultiDict((('p', 'z'), ('q', 'a'), ('q', 'b')))) + '/?p=z&q=a&q=b' + + If a rule does not exist when building a `BuildError` exception is + raised. + + The build method accepts an argument called `method` which allows you + to specify the method you want to have an URL built for if you have + different methods for the same endpoint specified. + + :param endpoint: the endpoint of the URL to build. + :param values: the values for the URL to build. Unhandled values are + appended to the URL as query parameters. + :param method: the HTTP method for the rule if there are different + URLs for different methods on the same endpoint. + :param force_external: enforce full canonical external URLs. If the URL + scheme is not provided, this will generate + a protocol-relative URL. + :param append_unknown: unknown parameters are appended to the generated + URL as query string argument. Disable this + if you want the builder to ignore those. + :param url_scheme: Scheme to use in place of the bound + :attr:`url_scheme`. + + .. versionchanged:: 2.0 + Added the ``url_scheme`` parameter. + + .. versionadded:: 0.6 + Added the ``append_unknown`` parameter. + """ + self.map.update() + + if values: + if isinstance(values, MultiDict): + values = { + k: (v[0] if len(v) == 1 else v) + for k, v in dict.items(values) + if len(v) != 0 + } + else: # plain dict + values = {k: v for k, v in values.items() if v is not None} + else: + values = {} + + rv = self._partial_build(endpoint, values, method, append_unknown) + if rv is None: + raise BuildError(endpoint, values, method, self) + + domain_part, path, websocket = rv + host = self.get_host(domain_part) + + if url_scheme is None: + url_scheme = self.url_scheme + + # Always build WebSocket routes with the scheme (browsers + # require full URLs). If bound to a WebSocket, ensure that HTTP + # routes are built with an HTTP scheme. + secure = url_scheme in {"https", "wss"} + + if websocket: + force_external = True + url_scheme = "wss" if secure else "ws" + elif url_scheme: + url_scheme = "https" if secure else "http" + + # shortcut this. + if not force_external and ( + (self.map.host_matching and host == self.server_name) + or (not self.map.host_matching and domain_part == self.subdomain) + ): + return f"{self.script_name.rstrip('/')}/{path.lstrip('/')}" + + scheme = f"{url_scheme}:" if url_scheme else "" + return f"{scheme}//{host}{self.script_name[:-1]}/{path.lstrip('/')}" diff --git a/src/werkzeug/routing/matcher.py b/src/werkzeug/routing/matcher.py new file mode 100644 index 0000000000..3c649c2e66 --- /dev/null +++ b/src/werkzeug/routing/matcher.py @@ -0,0 +1,168 @@ +import re +import typing as t +from dataclasses import dataclass +from dataclasses import field + +from .converters import ValidationError +from .exceptions import NoMatch +from .exceptions import RequestAliasRedirect +from .exceptions import RequestPath +from .rules import Rule +from .rules import RulePart + + +class SlashRequired(Exception): + pass + + +@dataclass +class State: + """A representation of a rule state. + + This includes the *rules* that correspond to the state and the + possible *static* and *dynamic* transitions to the next state. + """ + + dynamic: t.List[t.Tuple[RulePart, "State"]] = field(default_factory=list) + rules: t.List[Rule] = field(default_factory=list) + static: t.Dict[str, "State"] = field(default_factory=dict) + + +class StateMachineMatcher: + def __init__(self, merge_slashes: bool) -> None: + self._root = State() + self.merge_slashes = merge_slashes + + def add(self, rule: Rule) -> None: + state = self._root + for part in rule._parts: + if part.static: + state.static.setdefault(part.content, State()) + state = state.static[part.content] + else: + for test_part, new_state in state.dynamic: + if test_part == part: + state = new_state + break + else: + new_state = State() + state.dynamic.append((part, new_state)) + state = new_state + state.rules.append(rule) + + def update(self) -> None: + # For every state the dynamic transitions should be sorted by + # the weight of the transition + state = self._root + + def _update_state(state: State) -> None: + state.dynamic.sort(key=lambda entry: entry[0].weight) + for new_state in state.static.values(): + _update_state(new_state) + for _, new_state in state.dynamic: + _update_state(new_state) + + _update_state(state) + + def match( + self, domain: str, path: str, method: str, websocket: bool + ) -> t.Tuple[Rule, t.MutableMapping[str, t.Any]]: + # To match to a rule we need to start at the root state and + # try to follow the transitions until we find a match, or find + # there is no transition to follow. + + have_match_for = set() + websocket_mismatch = False + + def _match( + state: State, parts: t.List[str], values: t.List[str] + ) -> t.Optional[t.Tuple[Rule, t.List[str]]]: + # This function is meant to be called recursively, and will attempt + # to match the head part to the state's transitions. + nonlocal have_match_for, websocket_mismatch + + # The base case is when all parts have been matched via + # transitions. Hence if there is a rule with methods & + # websocket that work return it and the dynamic values + # extracted. + if parts == []: + for rule in state.rules: + if rule.methods is not None and method not in rule.methods: + have_match_for.update(rule.methods) + elif rule.websocket != websocket: + websocket_mismatch = True + else: + return rule, values + + # Test if there is a match with this path with a + # trailing slash, if so raise an exception to report + # that matching is possible with an additional slash + if "" in state.static: + for rule in state.static[""].rules: + if ( + rule.strict_slashes + and websocket == rule.websocket + and (rule.methods is None or method in rule.methods) + ): + raise SlashRequired() + return None + + part = parts[0] + # To match this part try the static transitions first + if part in state.static: + rv = _match(state.static[part], parts[1:], values) + if rv is not None: + return rv + # No match via the static transitions, so try the dynamic + # ones. + for test_part, new_state in state.dynamic: + target = part + remaining = parts[1:] + # A final part indicates a transition that always + # consumes the remaining parts i.e. transitions to a + # final state. + if test_part.final: + target = "/".join(parts) + remaining = [] + match = re.compile(test_part.content).match(target) + if match is not None: + rv = _match(new_state, remaining, values + list(match.groups())) + if rv is not None: + return rv + return None + + try: + rv = _match(self._root, [domain, *path.split("/")], []) + except SlashRequired: + raise RequestPath(f"{path}/") from None + + if self.merge_slashes and rv is None: + # Try to match again, but with slashes merged + path = re.sub("/{2,}?", "/", path) + try: + rv = _match(self._root, [domain, *path.split("/")], []) + except SlashRequired: + raise RequestPath(f"{path}/") from None + if rv is None: + raise NoMatch(have_match_for, websocket_mismatch) + else: + raise RequestPath(f"{path}") + elif rv is not None: + rule, values = rv + + result = {} + for name, value in zip(rule._converters.keys(), values): + try: + value = rule._converters[name].to_python(value) + except ValidationError: + raise NoMatch(have_match_for, websocket_mismatch) from None + result[str(name)] = value + if rule.defaults: + result.update(rule.defaults) + + if rule.alias and rule.map.redirect_defaults: + raise RequestAliasRedirect(result, rule.endpoint) + + return rule, result + + raise NoMatch(have_match_for, websocket_mismatch) diff --git a/src/werkzeug/routing/rules.py b/src/werkzeug/routing/rules.py new file mode 100644 index 0000000000..f16e5668a2 --- /dev/null +++ b/src/werkzeug/routing/rules.py @@ -0,0 +1,857 @@ +import ast +import re +import typing as t +from dataclasses import dataclass +from string import Template +from types import CodeType + +from .._internal import _to_bytes +from ..urls import url_encode +from ..urls import url_quote +from .converters import ValidationError + +if t.TYPE_CHECKING: + from .converters import BaseConverter + from .map import Map + + +class Weighting(t.NamedTuple): + number_static_weights: int + static_weights: t.List[t.Tuple[int, int]] + number_argument_weights: int + argument_weights: t.List[int] + + +@dataclass +class RulePart: + """A part of a rule. + + Rules can be represented by parts as delimited by `/` with + instances of this class representing those parts. The *content* is + either the raw content if *static* or a regex string to match + against. The *weight* can be used to order parts when matching. + + """ + + content: str + final: bool + static: bool + weight: Weighting + + +_part_re = re.compile( + r""" + (?P[^<]*) # static rule data + (?: + < + (?: + (?P[a-zA-Z_][a-zA-Z0-9_]*) # converter name + (?:\((?P.*?)\))? # converter arguments + \: # variable delimiter + )? + (?P[a-zA-Z_][a-zA-Z0-9_]*) # variable name + > + )? + """, + re.VERBOSE, +) + +_simple_rule_re = re.compile(r"<([^>]+)>") +_converter_args_re = re.compile( + r""" + ((?P\w+)\s*=\s*)? + (?P + True|False| + \d+.\d+| + \d+.| + \d+| + [\w\d_.]+| + [urUR]?(?P"[^"]*?"|'[^']*') + )\s*, + """, + re.VERBOSE, +) + + +_PYTHON_CONSTANTS = {"None": None, "True": True, "False": False} + + +def _find(value: str, target: str, pos: int) -> int: + """Find the *target* in *value* after *pos*. + + Returns the *value* length if *target* isn't found. + """ + try: + return value.index(target, pos) + except ValueError: + return len(value) + + +def _pythonize(value: str) -> t.Union[None, bool, int, float, str]: + if value in _PYTHON_CONSTANTS: + return _PYTHON_CONSTANTS[value] + for convert in int, float: + try: + return convert(value) # type: ignore + except ValueError: + pass + if value[:1] == value[-1:] and value[0] in "\"'": + value = value[1:-1] + return str(value) + + +def parse_converter_args(argstr: str) -> t.Tuple[t.Tuple, t.Dict[str, t.Any]]: + argstr += "," + args = [] + kwargs = {} + + for item in _converter_args_re.finditer(argstr): + value = item.group("stringval") + if value is None: + value = item.group("value") + value = _pythonize(value) + if not item.group("name"): + args.append(value) + else: + name = item.group("name") + kwargs[name] = value + + return tuple(args), kwargs + + +class RuleFactory: + """As soon as you have more complex URL setups it's a good idea to use rule + factories to avoid repetitive tasks. Some of them are builtin, others can + be added by subclassing `RuleFactory` and overriding `get_rules`. + """ + + def get_rules(self, map: "Map") -> t.Iterable["Rule"]: + """Subclasses of `RuleFactory` have to override this method and return + an iterable of rules.""" + raise NotImplementedError() + + +class Subdomain(RuleFactory): + """All URLs provided by this factory have the subdomain set to a + specific domain. For example if you want to use the subdomain for + the current language this can be a good setup:: + + url_map = Map([ + Rule('/', endpoint='#select_language'), + Subdomain('', [ + Rule('/', endpoint='index'), + Rule('/about', endpoint='about'), + Rule('/help', endpoint='help') + ]) + ]) + + All the rules except for the ``'#select_language'`` endpoint will now + listen on a two letter long subdomain that holds the language code + for the current request. + """ + + def __init__(self, subdomain: str, rules: t.Iterable[RuleFactory]) -> None: + self.subdomain = subdomain + self.rules = rules + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.subdomain = self.subdomain + yield rule + + +class Submount(RuleFactory): + """Like `Subdomain` but prefixes the URL rule with a given string:: + + url_map = Map([ + Rule('/', endpoint='index'), + Submount('/blog', [ + Rule('/', endpoint='blog/index'), + Rule('/entry/', endpoint='blog/show') + ]) + ]) + + Now the rule ``'blog/show'`` matches ``/blog/entry/``. + """ + + def __init__(self, path: str, rules: t.Iterable[RuleFactory]) -> None: + self.path = path.rstrip("/") + self.rules = rules + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.rule = self.path + rule.rule + yield rule + + +class EndpointPrefix(RuleFactory): + """Prefixes all endpoints (which must be strings for this factory) with + another string. This can be useful for sub applications:: + + url_map = Map([ + Rule('/', endpoint='index'), + EndpointPrefix('blog/', [Submount('/blog', [ + Rule('/', endpoint='index'), + Rule('/entry/', endpoint='show') + ])]) + ]) + """ + + def __init__(self, prefix: str, rules: t.Iterable[RuleFactory]) -> None: + self.prefix = prefix + self.rules = rules + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.endpoint = self.prefix + rule.endpoint + yield rule + + +class RuleTemplate: + """Returns copies of the rules wrapped and expands string templates in + the endpoint, rule, defaults or subdomain sections. + + Here a small example for such a rule template:: + + from werkzeug.routing import Map, Rule, RuleTemplate + + resource = RuleTemplate([ + Rule('/$name/', endpoint='$name.list'), + Rule('/$name/', endpoint='$name.show') + ]) + + url_map = Map([resource(name='user'), resource(name='page')]) + + When a rule template is called the keyword arguments are used to + replace the placeholders in all the string parameters. + """ + + def __init__(self, rules: t.Iterable["Rule"]) -> None: + self.rules = list(rules) + + def __call__(self, *args: t.Any, **kwargs: t.Any) -> "RuleTemplateFactory": + return RuleTemplateFactory(self.rules, dict(*args, **kwargs)) + + +class RuleTemplateFactory(RuleFactory): + """A factory that fills in template variables into rules. Used by + `RuleTemplate` internally. + + :internal: + """ + + def __init__( + self, rules: t.Iterable[RuleFactory], context: t.Dict[str, t.Any] + ) -> None: + self.rules = rules + self.context = context + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + new_defaults = subdomain = None + if rule.defaults: + new_defaults = {} + for key, value in rule.defaults.items(): + if isinstance(value, str): + value = Template(value).substitute(self.context) + new_defaults[key] = value + if rule.subdomain is not None: + subdomain = Template(rule.subdomain).substitute(self.context) + new_endpoint = rule.endpoint + if isinstance(new_endpoint, str): + new_endpoint = Template(new_endpoint).substitute(self.context) + yield Rule( + Template(rule.rule).substitute(self.context), + new_defaults, + subdomain, + rule.methods, + rule.build_only, + new_endpoint, + rule.strict_slashes, + ) + + +def _prefix_names(src: str) -> ast.stmt: + """ast parse and prefix names with `.` to avoid collision with user vars""" + tree = ast.parse(src).body[0] + if isinstance(tree, ast.Expr): + tree = tree.value # type: ignore + for node in ast.walk(tree): + if isinstance(node, ast.Name): + node.id = f".{node.id}" + return tree + + +_CALL_CONVERTER_CODE_FMT = "self._converters[{elem!r}].to_url()" +_IF_KWARGS_URL_ENCODE_CODE = """\ +if kwargs: + params = self._encode_query_vars(kwargs) + q = "?" if params else "" +else: + q = params = "" +""" +_IF_KWARGS_URL_ENCODE_AST = _prefix_names(_IF_KWARGS_URL_ENCODE_CODE) +_URL_ENCODE_AST_NAMES = (_prefix_names("q"), _prefix_names("params")) + + +class Rule(RuleFactory): + """A Rule represents one URL pattern. There are some options for `Rule` + that change the way it behaves and are passed to the `Rule` constructor. + Note that besides the rule-string all arguments *must* be keyword arguments + in order to not break the application on Werkzeug upgrades. + + `string` + Rule strings basically are just normal URL paths with placeholders in + the format ```` where the converter and the + arguments are optional. If no converter is defined the `default` + converter is used which means `string` in the normal configuration. + + URL rules that end with a slash are branch URLs, others are leaves. + If you have `strict_slashes` enabled (which is the default), all + branch URLs that are matched without a trailing slash will trigger a + redirect to the same URL with the missing slash appended. + + The converters are defined on the `Map`. + + `endpoint` + The endpoint for this rule. This can be anything. A reference to a + function, a string, a number etc. The preferred way is using a string + because the endpoint is used for URL generation. + + `defaults` + An optional dict with defaults for other rules with the same endpoint. + This is a bit tricky but useful if you want to have unique URLs:: + + url_map = Map([ + Rule('/all/', defaults={'page': 1}, endpoint='all_entries'), + Rule('/all/page/', endpoint='all_entries') + ]) + + If a user now visits ``http://example.com/all/page/1`` they will be + redirected to ``http://example.com/all/``. If `redirect_defaults` is + disabled on the `Map` instance this will only affect the URL + generation. + + `subdomain` + The subdomain rule string for this rule. If not specified the rule + only matches for the `default_subdomain` of the map. If the map is + not bound to a subdomain this feature is disabled. + + Can be useful if you want to have user profiles on different subdomains + and all subdomains are forwarded to your application:: + + url_map = Map([ + Rule('/', subdomain='', endpoint='user/homepage'), + Rule('/stats', subdomain='', endpoint='user/stats') + ]) + + `methods` + A sequence of http methods this rule applies to. If not specified, all + methods are allowed. For example this can be useful if you want different + endpoints for `POST` and `GET`. If methods are defined and the path + matches but the method matched against is not in this list or in the + list of another rule for that path the error raised is of the type + `MethodNotAllowed` rather than `NotFound`. If `GET` is present in the + list of methods and `HEAD` is not, `HEAD` is added automatically. + + `strict_slashes` + Override the `Map` setting for `strict_slashes` only for this rule. If + not specified the `Map` setting is used. + + `merge_slashes` + Override :attr:`Map.merge_slashes` for this rule. + + `build_only` + Set this to True and the rule will never match but will create a URL + that can be build. This is useful if you have resources on a subdomain + or folder that are not handled by the WSGI application (like static data) + + `redirect_to` + If given this must be either a string or callable. In case of a + callable it's called with the url adapter that triggered the match and + the values of the URL as keyword arguments and has to return the target + for the redirect, otherwise it has to be a string with placeholders in + rule syntax:: + + def foo_with_slug(adapter, id): + # ask the database for the slug for the old id. this of + # course has nothing to do with werkzeug. + return f'foo/{Foo.get_slug_for_id(id)}' + + url_map = Map([ + Rule('/foo/', endpoint='foo'), + Rule('/some/old/url/', redirect_to='foo/'), + Rule('/other/old/url/', redirect_to=foo_with_slug) + ]) + + When the rule is matched the routing system will raise a + `RequestRedirect` exception with the target for the redirect. + + Keep in mind that the URL will be joined against the URL root of the + script so don't use a leading slash on the target URL unless you + really mean root of that domain. + + `alias` + If enabled this rule serves as an alias for another rule with the same + endpoint and arguments. + + `host` + If provided and the URL map has host matching enabled this can be + used to provide a match rule for the whole host. This also means + that the subdomain feature is disabled. + + `websocket` + If ``True``, this rule is only matches for WebSocket (``ws://``, + ``wss://``) requests. By default, rules will only match for HTTP + requests. + + .. versionchanged:: 2.1 + Percent-encoded newlines (``%0a``), which are decoded by WSGI + servers, are considered when routing instead of terminating the + match early. + + .. versionadded:: 1.0 + Added ``websocket``. + + .. versionadded:: 1.0 + Added ``merge_slashes``. + + .. versionadded:: 0.7 + Added ``alias`` and ``host``. + + .. versionchanged:: 0.6.1 + ``HEAD`` is added to ``methods`` if ``GET`` is present. + """ + + def __init__( + self, + string: str, + defaults: t.Optional[t.Mapping[str, t.Any]] = None, + subdomain: t.Optional[str] = None, + methods: t.Optional[t.Iterable[str]] = None, + build_only: bool = False, + endpoint: t.Optional[str] = None, + strict_slashes: t.Optional[bool] = None, + merge_slashes: t.Optional[bool] = None, + redirect_to: t.Optional[t.Union[str, t.Callable[..., str]]] = None, + alias: bool = False, + host: t.Optional[str] = None, + websocket: bool = False, + ) -> None: + if not string.startswith("/"): + raise ValueError("urls must start with a leading slash") + self.rule = string + self.is_leaf = not string.endswith("/") + self.is_branch = string.endswith("/") + + self.map: "Map" = None # type: ignore + self.strict_slashes = strict_slashes + self.merge_slashes = merge_slashes + self.subdomain = subdomain + self.host = host + self.defaults = defaults + self.build_only = build_only + self.alias = alias + self.websocket = websocket + + if methods is not None: + if isinstance(methods, str): + raise TypeError("'methods' should be a list of strings.") + + methods = {x.upper() for x in methods} + + if "HEAD" not in methods and "GET" in methods: + methods.add("HEAD") + + if websocket and methods - {"GET", "HEAD", "OPTIONS"}: + raise ValueError( + "WebSocket rules can only use 'GET', 'HEAD', and 'OPTIONS' methods." + ) + + self.methods = methods + self.endpoint: str = endpoint # type: ignore + self.redirect_to = redirect_to + + if defaults: + self.arguments = set(map(str, defaults)) + else: + self.arguments = set() + + self._converters: t.Dict[str, "BaseConverter"] = {} + self._trace: t.List[t.Tuple[bool, str]] = [] + self._parts: t.List[RulePart] = [] + + def empty(self) -> "Rule": + """ + Return an unbound copy of this rule. + + This can be useful if want to reuse an already bound URL for another + map. See ``get_empty_kwargs`` to override what keyword arguments are + provided to the new copy. + """ + return type(self)(self.rule, **self.get_empty_kwargs()) + + def get_empty_kwargs(self) -> t.Mapping[str, t.Any]: + """ + Provides kwargs for instantiating empty copy with empty() + + Use this method to provide custom keyword arguments to the subclass of + ``Rule`` when calling ``some_rule.empty()``. Helpful when the subclass + has custom keyword arguments that are needed at instantiation. + + Must return a ``dict`` that will be provided as kwargs to the new + instance of ``Rule``, following the initial ``self.rule`` value which + is always provided as the first, required positional argument. + """ + defaults = None + if self.defaults: + defaults = dict(self.defaults) + return dict( + defaults=defaults, + subdomain=self.subdomain, + methods=self.methods, + build_only=self.build_only, + endpoint=self.endpoint, + strict_slashes=self.strict_slashes, + redirect_to=self.redirect_to, + alias=self.alias, + host=self.host, + ) + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + yield self + + def refresh(self) -> None: + """Rebinds and refreshes the URL. Call this if you modified the + rule in place. + + :internal: + """ + self.bind(self.map, rebind=True) + + def bind(self, map: "Map", rebind: bool = False) -> None: + """Bind the url to a map and create a regular expression based on + the information from the rule itself and the defaults from the map. + + :internal: + """ + if self.map is not None and not rebind: + raise RuntimeError(f"url rule {self!r} already bound to map {self.map!r}") + self.map = map + if self.strict_slashes is None: + self.strict_slashes = map.strict_slashes + if self.merge_slashes is None: + self.merge_slashes = map.merge_slashes + if self.subdomain is None: + self.subdomain = map.default_subdomain + self.compile() + + def get_converter( + self, + variable_name: str, + converter_name: str, + args: t.Tuple, + kwargs: t.Mapping[str, t.Any], + ) -> "BaseConverter": + """Looks up the converter for the given parameter. + + .. versionadded:: 0.9 + """ + if converter_name not in self.map.converters: + raise LookupError(f"the converter {converter_name!r} does not exist") + return self.map.converters[converter_name](self.map, *args, **kwargs) + + def _encode_query_vars(self, query_vars: t.Mapping[str, t.Any]) -> str: + return url_encode( + query_vars, + charset=self.map.charset, + sort=self.map.sort_parameters, + key=self.map.sort_key, + ) + + def _parse_rule(self, rule: str) -> t.Iterable[RulePart]: + pos = 0 + endpos = _find(rule, "/", pos) + content = "" + static = True + argument_weights = [] + static_weights = [] + final = False + static_parts = 0 + + while pos <= len(rule): + match = _part_re.match(rule, pos, endpos) + assert match is not None + + data = match.groupdict() + if data["static"]: + static_weights.append((static_parts, -len(data["static"]))) + static_parts += 1 + self._trace.append((False, data["static"])) + + if data["variable"] is not None: + static = False + content += re.escape(data["static"]) + c_args, c_kwargs = parse_converter_args(data["arguments"] or "") + convobj = self.get_converter( + data["variable"], data["converter"] or "default", c_args, c_kwargs + ) + self._converters[data["variable"]] = convobj + self.arguments.add(data["variable"]) + if not convobj.part_isolating: + endpos = len(rule) + final = True + content += f"({convobj.regex})" + argument_weights.append(convobj.weight) + self._trace.append((True, data["variable"])) + else: + content += data["static"] if static else re.escape(data["static"]) + + pos = match.end() + if pos == endpos: + if pos < len(rule) and rule[pos] == "/": + self._trace.append((False, "/")) + pos += 1 + weight = Weighting( + -len(static_weights), + static_weights, + -len(argument_weights), + argument_weights, + ) + if final: + content += r"$\Z" + yield RulePart( + content=content, final=final, static=static, weight=weight + ) + content = "" + static = True + argument_weights = [] + static_weights = [] + endpos = _find(rule, "/", pos) + + def compile(self) -> None: + """Compiles the regular expression and stores it.""" + assert self.map is not None, "rule not bound" + + if self.map.host_matching: + domain_rule = self.host or "" + else: + domain_rule = self.subdomain or "" + self._parts = [] + self._trace = [] + self._converters = {} + self._parts.extend(self._parse_rule(domain_rule)) + self._trace.append((False, "|")) + rule = self.rule + if self.merge_slashes: + rule = re.sub("/{2,}?", "/", self.rule) + self._parts.extend(self._parse_rule(rule)) + + self._build: t.Callable[..., t.Tuple[str, str]] + self._build = self._compile_builder(False).__get__(self, None) + self._build_unknown: t.Callable[..., t.Tuple[str, str]] + self._build_unknown = self._compile_builder(True).__get__(self, None) + + @staticmethod + def _get_func_code(code: CodeType, name: str) -> t.Callable[..., t.Tuple[str, str]]: + globs: t.Dict[str, t.Any] = {} + locs: t.Dict[str, t.Any] = {} + exec(code, globs, locs) + return locs[name] # type: ignore + + def _compile_builder( + self, append_unknown: bool = True + ) -> t.Callable[..., t.Tuple[str, str]]: + defaults = self.defaults or {} + dom_ops: t.List[t.Tuple[bool, str]] = [] + url_ops: t.List[t.Tuple[bool, str]] = [] + + opl = dom_ops + for is_dynamic, data in self._trace: + if data == "|" and opl is dom_ops: + opl = url_ops + continue + # this seems like a silly case to ever come up but: + # if a default is given for a value that appears in the rule, + # resolve it to a constant ahead of time + if is_dynamic and data in defaults: + data = self._converters[data].to_url(defaults[data]) + opl.append((False, data)) + elif not is_dynamic: + opl.append( + (False, url_quote(_to_bytes(data, self.map.charset), safe="/:|+")) + ) + else: + opl.append((True, data)) + + def _convert(elem: str) -> ast.stmt: + ret = _prefix_names(_CALL_CONVERTER_CODE_FMT.format(elem=elem)) + ret.args = [ast.Name(str(elem), ast.Load())] # type: ignore # str for py2 + return ret + + def _parts(ops: t.List[t.Tuple[bool, str]]) -> t.List[ast.AST]: + parts = [ + _convert(elem) if is_dynamic else ast.Str(s=elem) + for is_dynamic, elem in ops + ] + parts = parts or [ast.Str("")] + # constant fold + ret = [parts[0]] + for p in parts[1:]: + if isinstance(p, ast.Str) and isinstance(ret[-1], ast.Str): + ret[-1] = ast.Str(ret[-1].s + p.s) + else: + ret.append(p) + return ret + + dom_parts = _parts(dom_ops) + url_parts = _parts(url_ops) + if not append_unknown: + body = [] + else: + body = [_IF_KWARGS_URL_ENCODE_AST] + url_parts.extend(_URL_ENCODE_AST_NAMES) + + def _join(parts: t.List[ast.AST]) -> ast.AST: + if len(parts) == 1: # shortcut + return parts[0] + return ast.JoinedStr(parts) + + body.append( + ast.Return(ast.Tuple([_join(dom_parts), _join(url_parts)], ast.Load())) + ) + + pargs = [ + elem + for is_dynamic, elem in dom_ops + url_ops + if is_dynamic and elem not in defaults + ] + kargs = [str(k) for k in defaults] + + func_ast: ast.FunctionDef = _prefix_names("def _(): pass") # type: ignore + func_ast.name = f"" + func_ast.args.args.append(ast.arg(".self", None)) + for arg in pargs + kargs: + func_ast.args.args.append(ast.arg(arg, None)) + func_ast.args.kwarg = ast.arg(".kwargs", None) + for _ in kargs: + func_ast.args.defaults.append(ast.Str("")) + func_ast.body = body + + # use `ast.parse` instead of `ast.Module` for better portability + # Python 3.8 changes the signature of `ast.Module` + module = ast.parse("") + module.body = [func_ast] + + # mark everything as on line 1, offset 0 + # less error-prone than `ast.fix_missing_locations` + # bad line numbers cause an assert to fail in debug builds + for node in ast.walk(module): + if "lineno" in node._attributes: + node.lineno = 1 + if "end_lineno" in node._attributes: + node.end_lineno = node.lineno # type: ignore[attr-defined] + if "col_offset" in node._attributes: + node.col_offset = 0 + if "end_col_offset" in node._attributes: + node.end_col_offset = node.col_offset # type: ignore[attr-defined] + + code = compile(module, "", "exec") + return self._get_func_code(code, func_ast.name) + + def build( + self, values: t.Mapping[str, t.Any], append_unknown: bool = True + ) -> t.Optional[t.Tuple[str, str]]: + """Assembles the relative url for that rule and the subdomain. + If building doesn't work for some reasons `None` is returned. + + :internal: + """ + try: + if append_unknown: + return self._build_unknown(**values) + else: + return self._build(**values) + except ValidationError: + return None + + def provides_defaults_for(self, rule: "Rule") -> bool: + """Check if this rule has defaults for a given rule. + + :internal: + """ + return bool( + not self.build_only + and self.defaults + and self.endpoint == rule.endpoint + and self != rule + and self.arguments == rule.arguments + ) + + def suitable_for( + self, values: t.Mapping[str, t.Any], method: t.Optional[str] = None + ) -> bool: + """Check if the dict of values has enough data for url generation. + + :internal: + """ + # if a method was given explicitly and that method is not supported + # by this rule, this rule is not suitable. + if ( + method is not None + and self.methods is not None + and method not in self.methods + ): + return False + + defaults = self.defaults or () + + # all arguments required must be either in the defaults dict or + # the value dictionary otherwise it's not suitable + for key in self.arguments: + if key not in defaults and key not in values: + return False + + # in case defaults are given we ensure that either the value was + # skipped or the value is the same as the default value. + if defaults: + for key, value in defaults.items(): + if key in values and value != values[key]: + return False + + return True + + def build_compare_key(self) -> t.Tuple[int, int, int]: + """The build compare key for sorting. + + :internal: + """ + return (1 if self.alias else 0, -len(self.arguments), -len(self.defaults or ())) + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) and self._trace == other._trace + + __hash__ = None # type: ignore + + def __str__(self) -> str: + return self.rule + + def __repr__(self) -> str: + if self.map is None: + return f"<{type(self).__name__} (unbound)>" + parts = [] + for is_dynamic, data in self._trace: + if is_dynamic: + parts.append(f"<{data}>") + else: + parts.append(data) + parts = "".join(parts).lstrip("|") + methods = f" ({', '.join(self.methods)})" if self.methods is not None else "" + return f"<{type(self).__name__} {parts!r}{methods} -> {self.endpoint}>" diff --git a/src/werkzeug/sansio/http.py b/src/werkzeug/sansio/http.py new file mode 100644 index 0000000000..5b18809220 --- /dev/null +++ b/src/werkzeug/sansio/http.py @@ -0,0 +1,156 @@ +import re +import typing as t +from datetime import datetime + +from .._internal import _cookie_parse_impl +from .._internal import _dt_as_utc +from .._internal import _to_str +from ..http import generate_etag +from ..http import parse_date +from ..http import parse_etags +from ..http import parse_if_range_header +from ..http import unquote_etag + +_etag_re = re.compile(r'([Ww]/)?(?:"(.*?)"|(.*?))(?:\s*,\s*|$)') + + +def is_resource_modified( + http_range: t.Optional[str] = None, + http_if_range: t.Optional[str] = None, + http_if_modified_since: t.Optional[str] = None, + http_if_none_match: t.Optional[str] = None, + http_if_match: t.Optional[str] = None, + etag: t.Optional[str] = None, + data: t.Optional[bytes] = None, + last_modified: t.Optional[t.Union[datetime, str]] = None, + ignore_if_range: bool = True, +) -> bool: + """Convenience method for conditional requests. + :param http_range: Range HTTP header + :param http_if_range: If-Range HTTP header + :param http_if_modified_since: If-Modified-Since HTTP header + :param http_if_none_match: If-None-Match HTTP header + :param http_if_match: If-Match HTTP header + :param etag: the etag for the response for comparison. + :param data: or alternatively the data of the response to automatically + generate an etag using :func:`generate_etag`. + :param last_modified: an optional date of the last modification. + :param ignore_if_range: If `False`, `If-Range` header will be taken into + account. + :return: `True` if the resource was modified, otherwise `False`. + .. versionchanged:: 2.2 + Made arguments explicit to support ASGI. + + .. versionchanged:: 2.0 + SHA-1 is used to generate an etag value for the data. MD5 may + not be available in some environments. + + .. versionchanged:: 1.0.0 + The check is run for methods other than ``GET`` and ``HEAD``. + """ + if etag is None and data is not None: + etag = generate_etag(data) + elif data is not None: + raise TypeError("both data and etag given") + + unmodified = False + if isinstance(last_modified, str): + last_modified = parse_date(last_modified) + + # HTTP doesn't use microsecond, remove it to avoid false positive + # comparisons. Mark naive datetimes as UTC. + if last_modified is not None: + last_modified = _dt_as_utc(last_modified.replace(microsecond=0)) + + if_range = None + if not ignore_if_range and http_range is not None: + # https://tools.ietf.org/html/rfc7233#section-3.2 + # A server MUST ignore an If-Range header field received in a request + # that does not contain a Range header field. + if_range = parse_if_range_header(http_if_range) + + if if_range is not None and if_range.date is not None: + modified_since: t.Optional[datetime] = if_range.date + else: + modified_since = parse_date(http_if_modified_since) + + if modified_since and last_modified and last_modified <= modified_since: + unmodified = True + + if etag: + etag, _ = unquote_etag(etag) + etag = t.cast(str, etag) + + if if_range is not None and if_range.etag is not None: + unmodified = parse_etags(if_range.etag).contains(etag) + else: + if_none_match = parse_etags(http_if_none_match) + if if_none_match: + # https://tools.ietf.org/html/rfc7232#section-3.2 + # "A recipient MUST use the weak comparison function when comparing + # entity-tags for If-None-Match" + unmodified = if_none_match.contains_weak(etag) + + # https://tools.ietf.org/html/rfc7232#section-3.1 + # "Origin server MUST use the strong comparison function when + # comparing entity-tags for If-Match" + if_match = parse_etags(http_if_match) + if if_match: + unmodified = not if_match.is_strong(etag) + + return not unmodified + + +def parse_cookie( + cookie: t.Union[bytes, str, None] = "", + charset: str = "utf-8", + errors: str = "replace", + cls: t.Optional[t.Type["ds.MultiDict"]] = None, +) -> "ds.MultiDict[str, str]": + """Parse a cookie from a string. + + The same key can be provided multiple times, the values are stored + in-order. The default :class:`MultiDict` will have the first value + first, and all values can be retrieved with + :meth:`MultiDict.getlist`. + + :param cookie: The cookie header as a string. + :param charset: The charset for the cookie values. + :param errors: The error behavior for the charset decoding. + :param cls: A dict-like class to store the parsed cookies in. + Defaults to :class:`MultiDict`. + + .. versionchanged:: 2.2 + Uses explicit cookie string argument + + .. versionchanged:: 1.0.0 + Returns a :class:`MultiDict` instead of a + ``TypeConversionDict``. + + .. versionchanged:: 0.5 + Returns a :class:`TypeConversionDict` instead of a regular dict. + The ``cls`` parameter was added. + """ + # PEP 3333 sends headers through the environ as latin1 decoded + # strings. Encode strings back to bytes for parsing. + if isinstance(cookie, str): + cookie = cookie.encode("latin1", "replace") + + if cls is None: + cls = ds.MultiDict + + def _parse_pairs() -> t.Iterator[t.Tuple[str, str]]: + for key, val in _cookie_parse_impl(cookie): # type: ignore + key_str = _to_str(key, charset, errors, allow_none_charset=True) + + if not key_str: + continue + + val_str = _to_str(val, charset, errors, allow_none_charset=True) + yield key_str, val_str + + return cls(_parse_pairs()) + + +# circular dependencies +from .. import datastructures as ds diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py index 2d544224cb..d8abeb3543 100644 --- a/src/werkzeug/sansio/multipart.py +++ b/src/werkzeug/sansio/multipart.py @@ -70,6 +70,10 @@ class State(Enum): # Header values can be continued via a space or tab after the linebreak, as # per RFC2231 HEADER_CONTINUATION_RE = re.compile(b"%s[ \t]" % LINE_BREAK, re.MULTILINE) +# This must be long enough to contain any line breaks plus any +# additional boundary markers (--) such that they will be found in a +# subsequent search +SEARCH_EXTRA_LENGTH = 8 class MultipartDecoder: @@ -113,6 +117,7 @@ def __init__( % (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK), re.MULTILINE, ) + self._search_position = 0 def last_newline(self) -> int: try: @@ -141,7 +146,7 @@ def next_event(self) -> Event: event: Event = NEED_DATA if self.state == State.PREAMBLE: - match = self.preamble_re.search(self.buffer) + match = self.preamble_re.search(self.buffer, self._search_position) if match is not None: if match.group(1).startswith(b"--"): self.state = State.EPILOGUE @@ -150,9 +155,17 @@ def next_event(self) -> Event: data = bytes(self.buffer[: match.start()]) del self.buffer[: match.end()] event = Preamble(data=data) + self._search_position = 0 + else: + # Update the search start position to be equal to the + # current buffer length (already searched) minus a + # safe buffer for part of the search target. + self._search_position = max( + 0, len(self.buffer) - len(self.boundary) - SEARCH_EXTRA_LENGTH + ) elif self.state == State.PART: - match = BLANK_LINE_RE.search(self.buffer) + match = BLANK_LINE_RE.search(self.buffer, self._search_position) if match is not None: headers = self._parse_headers(self.buffer[: match.start()]) del self.buffer[: match.end()] @@ -177,6 +190,12 @@ def next_event(self) -> Event: name=name, ) self.state = State.DATA + self._search_position = 0 + else: + # Update the search start position to be equal to the + # current buffer length (already searched) minus a + # safe buffer for part of the search target. + self._search_position = max(0, len(self.buffer) - SEARCH_EXTRA_LENGTH) elif self.state == State.DATA: if self.buffer.find(b"--" + self.boundary) == -1: diff --git a/src/werkzeug/sansio/request.py b/src/werkzeug/sansio/request.py index 3802403d9e..8832baafee 100644 --- a/src/werkzeug/sansio/request.py +++ b/src/werkzeug/sansio/request.py @@ -19,7 +19,6 @@ from ..http import parse_accept_header from ..http import parse_authorization_header from ..http import parse_cache_control_header -from ..http import parse_cookie from ..http import parse_date from ..http import parse_etags from ..http import parse_if_range_header @@ -31,6 +30,7 @@ from ..user_agent import UserAgent from ..utils import cached_property from ..utils import header_property +from .http import parse_cookie from .utils import get_current_url from .utils import get_host diff --git a/src/werkzeug/testapp.py b/src/werkzeug/testapp.py index 0829e337b8..0d7ffbb187 100644 --- a/src/werkzeug/testapp.py +++ b/src/werkzeug/testapp.py @@ -5,9 +5,10 @@ import os import sys import typing as t -from html import escape from textwrap import wrap +from markupsafe import escape + from . import __version__ as _werkzeug_version from .wrappers.request import Request from .wrappers.response import Response @@ -181,8 +182,8 @@ def render_testapp(req: Request) -> bytes: wsgi_env = [] sorted_environ = sorted(req.environ.items(), key=lambda x: repr(x[0]).lower()) for key, value in sorted_environ: - value = "".join(wrap(escape(repr(value)))) - wsgi_env.append(f"{escape(str(key))}{value}") + value = "".join(wrap(str(escape(repr(value))))) + wsgi_env.append(f"{escape(key)}{value}") sys_path = [] for item, virtual, expanded in iter_sys_path(): diff --git a/src/werkzeug/utils.py b/src/werkzeug/utils.py index f94444ee5b..672e6e5ade 100644 --- a/src/werkzeug/utils.py +++ b/src/werkzeug/utils.py @@ -10,6 +10,8 @@ from time import time from zlib import adler32 +from markupsafe import escape + from ._internal import _DictAccessorProperty from ._internal import _missing from ._internal import _TAccessorValue @@ -261,12 +263,10 @@ def redirect( response. The default is :class:`werkzeug.wrappers.Response` if unspecified. """ - import html - if Response is None: from .wrappers import Response # type: ignore - display_location = html.escape(location) + display_location = escape(location) if isinstance(location, str): # Safe conversion is necessary here as we might redirect # to a broken URI scheme (for instance itms-services). @@ -280,7 +280,7 @@ def redirect( "Redirecting...\n" "

Redirecting...

\n" "

You should be redirected automatically to the target URL: " - f'{display_location}. If' + f'{display_location}. If' " not, click the link.\n", code, mimetype="text/html", diff --git a/tests/test_datastructures.py b/tests/test_datastructures.py index d036e72fd6..7f63b6470d 100644 --- a/tests/test_datastructures.py +++ b/tests/test_datastructures.py @@ -1177,6 +1177,8 @@ def test_proxy_can_access_stream_attrs(self, stream): for name in ("fileno", "writable", "readable", "seekable"): assert hasattr(file_storage, name) + file_storage.close() + def test_save_to_pathlib_dst(self, tmp_path): src = tmp_path / "src.txt" src.write_text("test") diff --git a/tests/test_debug.py b/tests/test_debug.py index 1a1531d24f..cf171d1a5b 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -24,20 +24,20 @@ def test_basic_repr(self): ) assert debug_repr([1, "test"]) == ( '[1,' - ' 'test']' + ' 'test']' ) assert debug_repr([None]) == '[None]' def test_string_repr(self): - assert debug_repr("") == '''' - assert debug_repr("foo") == ''foo'' + assert debug_repr("") == '''' + assert debug_repr("foo") == ''foo'' assert debug_repr("s" * 80) == ( - f''{"s" * 69}' - f'{"s" * 11}'' + f''{"s" * 69}' + f'{"s" * 11}'' ) assert debug_repr("<" * 80) == ( - f''{"<" * 69}' - f'{"<" * 11}'' + f''{"<" * 69}' + f'{"<" * 11}'' ) def test_string_subclass_repr(self): @@ -46,7 +46,7 @@ class Test(str): assert debug_repr(Test("foo")) == ( 'test_debug.' - 'Test('foo')' + 'Test('foo')' ) def test_sequence_repr(self): @@ -67,7 +67,7 @@ def test_sequence_repr(self): def test_mapping_repr(self): assert debug_repr({}) == "{}" assert debug_repr({"foo": 42}) == ( - '{'foo'' + '{'foo'' ': 42' "}" ) @@ -105,8 +105,8 @@ def test_mapping_repr(self): "}" ) assert debug_repr((1, "zwei", "drei")) == ( - '(1, '' - 'zwei', 'drei')' + '(1, '' + 'zwei', 'drei')' ) def test_custom_repr(self): @@ -139,10 +139,10 @@ def test_regex_repr(self): def test_set_repr(self): assert ( debug_repr(frozenset("x")) - == 'frozenset(['x'])' + == 'frozenset(['x'])' ) assert debug_repr(set("x")) == ( - 'set(['x'])' + 'set(['x'])' ) def test_recursive_repr(self): diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index d101b2bb92..d8fed9629a 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -1,7 +1,8 @@ from datetime import datetime -from html import escape import pytest +from markupsafe import escape +from markupsafe import Markup from werkzeug import exceptions from werkzeug.datastructures import Headers @@ -52,6 +53,13 @@ def test_aborter_general(test): assert type(exc_info.value) is exc_type +def test_abort_description_markup(): + with pytest.raises(HTTPException) as exc_info: + exceptions.abort(400, Markup("<")) + + assert "<" in str(exc_info.value) + + def test_aborter_custom(): myabort = exceptions.Aborter({1: exceptions.NotFound}) pytest.raises(LookupError, myabort, 404) diff --git a/tests/test_formparser.py b/tests/test_formparser.py index 5fc803ef04..49010b46ce 100644 --- a/tests/test_formparser.py +++ b/tests/test_formparser.py @@ -172,19 +172,14 @@ def test_default_stream_factory(self, no_spooled, size, monkeypatch): monkeypatch.setattr("werkzeug.formparser.SpooledTemporaryFile", None) data = b"a,b,c\n" * size - req = Request.from_values( + with Request.from_values( data={"foo": (io.BytesIO(data), "test.txt")}, method="POST" - ) - file_storage = req.files["foo"] - - try: - reader = csv.reader(io.TextIOWrapper(file_storage)) + ) as req: + reader = csv.reader(io.TextIOWrapper(req.files["foo"])) # This fails if file_storage doesn't implement IOBase. # https://github.com/pallets/werkzeug/issues/1344 # https://github.com/python/cpython/pull/3249 assert sum(1 for _ in reader) == size - finally: - file_storage.close() def test_parse_bad_content_type(self): parser = FormDataParser() @@ -258,24 +253,25 @@ def test_basic(self): folder = join(resources, name) data = get_contents(join(folder, "request.http")) for filename, field, content_type, fsname in files: - response = client.post( + with client.post( f"/?object={field}", data=data, content_type=f'multipart/form-data; boundary="{boundary}"', content_length=len(data), - ) - lines = response.get_data().split(b"\n", 3) - assert lines[0] == repr(filename).encode("ascii") - assert lines[1] == repr(field).encode("ascii") - assert lines[2] == repr(content_type).encode("ascii") - assert lines[3] == get_contents(join(folder, fsname)) - response = client.post( + ) as response: + lines = response.get_data().split(b"\n", 3) + assert lines[0] == repr(filename).encode("ascii") + assert lines[1] == repr(field).encode("ascii") + assert lines[2] == repr(content_type).encode("ascii") + assert lines[3] == get_contents(join(folder, fsname)) + + with client.post( "/?object=text", data=data, content_type=f'multipart/form-data; boundary="{boundary}"', content_length=len(data), - ) - assert response.get_data() == repr(text).encode("utf-8") + ) as response: + assert response.get_data() == repr(text).encode("utf-8") @pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") def test_ie7_unc_path(self): @@ -283,14 +279,14 @@ def test_ie7_unc_path(self): data_file = join(dirname(__file__), "multipart", "ie7_full_path_request.http") data = get_contents(data_file) boundary = "---------------------------7da36d1b4a0164" - response = client.post( + with client.post( "/?object=cb_file_upload_multiple", data=data, content_type=f'multipart/form-data; boundary="{boundary}"', content_length=len(data), - ) - lines = response.get_data().split(b"\n", 3) - assert lines[0] == b"'Sellersburg Town Council Meeting 02-22-2010doc.doc'" + ) as response: + lines = response.get_data().split(b"\n", 3) + assert lines[0] == b"'Sellersburg Town Council Meeting 02-22-2010doc.doc'" def test_end_of_file(self): # This test looks innocent but it was actually timing out in @@ -301,14 +297,14 @@ def test_end_of_file(self): b"Content-Type: text/plain\r\n\r\n" b"file contents and no end" ) - data = Request.from_values( + with Request.from_values( input_stream=io.BytesIO(data), content_length=len(data), content_type="multipart/form-data; boundary=foo", method="POST", - ) - assert not data.files - assert not data.form + ) as data: + assert not data.files + assert not data.form def test_file_no_content_type(self): data = ( @@ -316,14 +312,14 @@ def test_file_no_content_type(self): b'Content-Disposition: form-data; name="test"; filename="test.txt"\r\n\r\n' b"file contents\r\n--foo--" ) - data = Request.from_values( + with Request.from_values( input_stream=io.BytesIO(data), content_length=len(data), content_type="multipart/form-data; boundary=foo", method="POST", - ) - assert data.files["test"].filename == "test.txt" - assert data.files["test"].read() == b"file contents" + ) as data: + assert data.files["test"].filename == "test.txt" + assert data.files["test"].read() == b"file contents" def test_extra_newline(self): # this test looks innocent but it was actually timing out in @@ -352,18 +348,18 @@ def test_headers(self): b"file contents, just the contents\r\n" b"--foo--" ) - req = Request.from_values( + with Request.from_values( input_stream=io.BytesIO(data), content_length=len(data), content_type="multipart/form-data; boundary=foo", method="POST", - ) - foo = req.files["foo"] - assert foo.mimetype == "text/plain" - assert foo.mimetype_params == {"charset": "utf-8"} - assert foo.headers["content-type"] == foo.content_type - assert foo.content_type == "text/plain; charset=utf-8" - assert foo.headers["x-custom-header"] == "blah" + ) as req: + foo = req.files["foo"] + assert foo.mimetype == "text/plain" + assert foo.mimetype_params == {"charset": "utf-8"} + assert foo.headers["content-type"] == foo.content_type + assert foo.content_type == "text/plain; charset=utf-8" + assert foo.headers["x-custom-header"] == "blah" @pytest.mark.parametrize("ending", [b"\n", b"\r", b"\r\n"]) def test_nonstandard_line_endings(self, ending: bytes): @@ -442,11 +438,11 @@ def test_file_rfc2231_filename_continuations(self): b' filename*2="e f.txt"\r\n\r\n' b"file contents\r\n--foo--" ) - request = Request.from_values( + with Request.from_values( input_stream=io.BytesIO(data), content_length=len(data), content_type="multipart/form-data; boundary=foo", method="POST", - ) - assert request.files["rfc2231"].filename == "a b c d e f.txt" - assert request.files["rfc2231"].read() == b"file contents" + ) as request: + assert request.files["rfc2231"].filename == "a b c d e f.txt" + assert request.files["rfc2231"].read() == b"file contents" diff --git a/tests/test_http.py b/tests/test_http.py index 5936bfa59b..623114cb59 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -358,6 +358,10 @@ def test_parse_options_header_broken_values(self): assert http.parse_options_header(" , a ") == ("", {}) assert http.parse_options_header(" ; a ") == ("", {}) + def test_parse_options_header_case_insensitive(self): + _, options = http.parse_options_header(r'something; fileName="File.ext"') + assert options["filename"] == "File.ext" + def test_dump_options_header(self): assert http.dump_options_header("foo", {"bar": 42}) == "foo; bar=42" assert http.dump_options_header("foo", {"bar": 42, "fizz": None}) in ( diff --git a/tests/test_local.py b/tests/test_local.py index 7402876d3c..ba66f566de 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -3,6 +3,7 @@ import math import operator import time +from contextvars import ContextVar from functools import partial from threading import Thread @@ -10,9 +11,23 @@ from werkzeug import local +# Since the tests are creating local instances, use global context vars +# to avoid accumulating anonymous context vars that can't be collected. +_cv_ns = ContextVar("werkzeug.tests.ns") +_cv_stack = ContextVar("werkzeug.tests.stack") + + +@pytest.fixture(autouse=True) +def reset_context_vars(): + ns_token = _cv_ns.set({}) + stack_token = _cv_stack.set([]) + yield + _cv_ns.reset(ns_token) + _cv_stack.reset(stack_token) + def test_basic_local(): - ns = local.Local() + ns = local.Local(_cv_ns) ns.foo = 0 values = [] @@ -40,7 +55,7 @@ def delfoo(): def test_basic_local_asyncio(): - ns = local.Local() + ns = local.Local(_cv_ns) ns.foo = 0 values = [] @@ -68,19 +83,19 @@ def delfoo(): def test_local_release(): - ns = local.Local() + ns = local.Local(_cv_ns) ns.foo = 42 local.release_local(ns) assert not hasattr(ns, "foo") - ls = local.LocalStack() + ls = local.LocalStack(_cv_stack) ls.push(42) local.release_local(ls) assert ls.top is None def test_local_stack(): - ls = local.LocalStack() + ls = local.LocalStack(_cv_stack) assert ls.top is None ls.push(42) assert ls.top == 42 @@ -104,12 +119,12 @@ def test_local_stack(): def test_local_stack_asyncio(): - ls = local.LocalStack() + ls = local.LocalStack(_cv_stack) ls.push(1) async def task(): ls.push(1) - assert len(ls._local.stack) == 2 + assert len(ls._storage.get()) == 2 async def main(): futures = [asyncio.ensure_future(task()) for _ in range(3)] @@ -119,7 +134,7 @@ async def main(): def test_proxy_local(): - ns = local.Local() + ns = local.Local(_cv_ns) ns.foo = [] p = local.LocalProxy(ns, "foo") p.append(42) @@ -160,7 +175,7 @@ def lookup_func(): partial_proxy = local.LocalProxy(partial_lookup_func) assert partial_proxy.__wrapped__ == partial_lookup_func - ns = local.Local() + ns = local.Local(_cv_ns) ns.foo = SomeClassWithWrapped() ns.bar = 42 @@ -178,7 +193,7 @@ def example(): def test_proxy_fallback(): - local_stack = local.LocalStack() + local_stack = local.LocalStack(_cv_stack) local_proxy = local_stack() assert repr(local_proxy) == "" @@ -195,7 +210,7 @@ def test_proxy_fallback(): def test_proxy_unbound(): - ns = local.Local() + ns = local.Local(_cv_ns) p = ns("value") assert repr(p) == "" assert not p @@ -203,7 +218,7 @@ def test_proxy_unbound(): def _make_proxy(value): - ns = local.Local() + ns = local.Local(_cv_ns) ns.value = value p = ns("value") return ns, p diff --git a/tests/test_routing.py b/tests/test_routing.py index 6544d89b2d..b1d2ee11a0 100644 --- a/tests/test_routing.py +++ b/tests/test_routing.py @@ -7,6 +7,8 @@ from werkzeug import routing as r from werkzeug.datastructures import ImmutableDict from werkzeug.datastructures import MultiDict +from werkzeug.exceptions import MethodNotAllowed +from werkzeug.exceptions import NotFound from werkzeug.test import create_environ from werkzeug.wrappers import Response @@ -26,7 +28,7 @@ def test_basic_routing(): assert adapter.match("/foo") == ("foo", {}) assert adapter.match("/bar/") == ("bar", {}) pytest.raises(r.RequestRedirect, lambda: adapter.match("/bar")) - pytest.raises(r.NotFound, lambda: adapter.match("/blub")) + pytest.raises(NotFound, lambda: adapter.match("/blub")) adapter = map.bind("example.org", "/", url_scheme="ws") assert adapter.match("/") == ("indexws", {}) @@ -171,7 +173,7 @@ def test_strict_slashes_redirect(): # Check if exceptions are correct pytest.raises(r.RequestRedirect, adapter.match, "/bar", method="GET") - pytest.raises(r.MethodNotAllowed, adapter.match, "/bar/", method="POST") + pytest.raises(MethodNotAllowed, adapter.match, "/bar/", method="POST") with pytest.raises(r.RequestRedirect) as error_info: adapter.match("/foo", method="POST") assert error_info.value.code == 308 @@ -191,7 +193,7 @@ def test_strict_slashes_redirect(): # Check if exceptions are correct pytest.raises(r.RequestRedirect, adapter.match, "/bar", method="GET") - pytest.raises(r.MethodNotAllowed, adapter.match, "/bar/", method="POST") + pytest.raises(MethodNotAllowed, adapter.match, "/bar/", method="POST") # Check what happens when only slash route is defined map = r.Map([r.Rule("/bar/", endpoint="get", methods=["GET"])]) @@ -202,8 +204,7 @@ def test_strict_slashes_redirect(): # Check if exceptions are correct pytest.raises(r.RequestRedirect, adapter.match, "/bar", method="GET") - pytest.raises(r.MethodNotAllowed, adapter.match, "/bar/", method="POST") - pytest.raises(r.MethodNotAllowed, adapter.match, "/bar", method="POST") + pytest.raises(MethodNotAllowed, adapter.match, "/bar/", method="POST") def test_environ_defaults(): @@ -214,7 +215,7 @@ def test_environ_defaults(): assert a.match("/foo") == ("foo", {}) assert a.match() == ("foo", {}) assert a.match("/bar") == ("bar", {}) - pytest.raises(r.NotFound, a.match, "/bars") + pytest.raises(NotFound, a.match, "/bars") def test_environ_nonascii_pathinfo(): @@ -223,7 +224,7 @@ def test_environ_nonascii_pathinfo(): a = m.bind_to_environ(environ) assert a.match("/") == ("index", {}) assert a.match("/лошадь") == ("horse", {}) - pytest.raises(r.NotFound, a.match, "/барсук") + pytest.raises(NotFound, a.match, "/барсук") def test_basic_building(): @@ -330,10 +331,10 @@ def test_negative(): assert adapter.match("/bars/-0.185") == ("bars", {"page": -0.185}) # Make sure signed values are rejected in unsigned mode - pytest.raises(r.NotFound, lambda: adapter.match("/foo/-2")) - pytest.raises(r.NotFound, lambda: adapter.match("/foo/-50")) - pytest.raises(r.NotFound, lambda: adapter.match("/bar/-0.185")) - pytest.raises(r.NotFound, lambda: adapter.match("/bar/-2.0")) + pytest.raises(NotFound, lambda: adapter.match("/foo/-2")) + pytest.raises(NotFound, lambda: adapter.match("/foo/-50")) + pytest.raises(NotFound, lambda: adapter.match("/bar/-0.185")) + pytest.raises(NotFound, lambda: adapter.match("/bar/-2.0")) def test_greedy(): @@ -435,8 +436,8 @@ def dispatch(path, quiet=False): assert dispatch("/").data == b"('root', {})" assert dispatch("/foo").status_code == 308 - raise_this = r.NotFound() - pytest.raises(r.NotFound, lambda: dispatch("/bar")) + raise_this = NotFound() + pytest.raises(NotFound, lambda: dispatch("/bar")) assert dispatch("/bar", True).status_code == 404 @@ -720,6 +721,8 @@ def test_converter_with_tuples(): """ class TwoValueConverter(r.BaseConverter): + part_isolating = False + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.regex = r"(\w\w+)/(\w\w+)" @@ -754,6 +757,19 @@ def test_anyconverter(): assert a.match("/a.2") == ("yes_dot", {"a": "a.2"}) +def test_any_converter_build_validates_value() -> None: + m = r.Map([r.Rule("/", endpoint="actor")]) + a = m.bind("localhost") + + assert a.build("actor", {"value": "patient"}) == "/patient" + assert a.build("actor", {"value": "provider"}) == "/provider" + + with pytest.raises(ValueError) as exc: + a.build("actor", {"value": "invalid"}) + + assert str(exc.value) == "'invalid' is not one of 'patient', 'provider'" + + @pytest.mark.parametrize( ("endpoint", "value", "expect"), [ @@ -901,7 +917,7 @@ def test_implicit_head(): ) adapter = url_map.bind("example.org") assert adapter.match("/get", method="HEAD") == ("a", {}) - pytest.raises(r.MethodNotAllowed, adapter.match, "/post", method="HEAD") + pytest.raises(MethodNotAllowed, adapter.match, "/post", method="HEAD") def test_pass_str_as_router_methods(): @@ -1101,7 +1117,7 @@ def test_server_name_casing(): with pytest.warns(UserWarning): a = m.bind_to_environ(env, server_name="example.com") - with pytest.raises(r.NotFound): + with pytest.raises(NotFound): a.match() @@ -1181,7 +1197,7 @@ def test_both_bind_and_match_path_info_are_none(): def test_map_repr(): m = r.Map([r.Rule("/wat", endpoint="enter"), r.Rule("/woop", endpoint="foobar")]) rv = repr(m) - assert rv == "Map([ foobar>, enter>])" + assert rv == "Map([ enter>, foobar>])" def test_empty_subclass_rules_with_custom_kwargs(): @@ -1352,5 +1368,5 @@ def test_newline_match(): m = r.Map([r.Rule("/hello", endpoint="hello")]) a = m.bind("localhost") - with pytest.raises(r.NotFound): + with pytest.raises(NotFound): a.match("/hello\n") diff --git a/tests/test_test.py b/tests/test_test.py index 7cb43e1fa8..eafd579981 100644 --- a/tests/test_test.py +++ b/tests/test_test.py @@ -140,6 +140,7 @@ def test_environ_builder_basics(): assert req.files["test"].content_type == "text/plain" assert req.files["test"].filename == "test.txt" assert req.files["test"].read() == b"test contents" + req.close() def test_environ_builder_data(): @@ -272,11 +273,13 @@ def test_environ_builder_content_type(): assert builder.content_type is None builder.form["foo"] = "bar" assert builder.content_type == "application/x-www-form-urlencoded" - builder.files.add_file("blafasel", BytesIO(b"foo"), "test.txt") + builder.files.add_file("data", BytesIO(b"foo"), "test.txt") assert builder.content_type == "multipart/form-data" req = builder.get_request() + builder.close() assert req.form["foo"] == "bar" - assert req.files["blafasel"].read() == b"foo" + assert req.files["data"].read() == b"foo" + req.close() def test_basic_auth(): @@ -335,6 +338,7 @@ def test_environ_builder_unicode_file_mix(): assert files["f"].filename == "snowman.txt" assert files["f"].read() == rb"\N{SNOWMAN}" stream.close() + files["f"].close() def test_create_environ():