Add poethepoet.script package with rm function for use in script tasks

Also: - add support for delegating dry-run behavior to script tasks - set POE_VERBOSITY env var
nat-n · Oct 26, 2024 · 483c5f3 · 483c5f3
1 parent 21ccd40
commit 483c5f3
Show file tree

Hide file tree

Showing 18 changed files with 476 additions and 65 deletions.
diff --git a/docs/env_vars.rst b/docs/env_vars.rst
@@ -10,7 +10,7 @@ The following environment variables are used by Poe the Poet internally, and can
 - ``POE_PWD``: the current working directory of the poe process (unless overridden programmatically).
 - ``POE_CONF_DIR``: the path to the parent directory of the config file that defines the running task or the :ref:`cwd option<Setting a working directory for included tasks>` set when including that config.
 - ``POE_ACTIVE``: identifies the active PoeExecutor, so that Poe the Poet can tell when it is running recursively.
-
+- ``POE_VERBOSITY``: reflects the current verbosity level. Normally 0 is the default, 1 means more verbose and -1 means less.
 
 Special variables
 -----------------

diff --git a/docs/tasks/task_types/script.rst b/docs/tasks/task_types/script.rst
@@ -68,3 +68,21 @@ whereas the same behaviour can can be reliably achieved like so:
 
   [tool.poe.tasks.build]
   script = "os:makedirs('build/assets', exist_ok=True)"
+
+
+Poe scripts library
+-------------------
+
+
+Poe the Poet includes the ``poethepoet.scripts`` package including the following functions as convenient cross-platform implementations of common task capabilities.
+These functions can be referenced from script tasks if ``poethepoet`` is available in the project virtual environment.
+
+.. autofunction:: poethepoet.scripts.rm
+
+
+Delegating dry-run behavior to a script
+---------------------------------------
+
+Normally if the ``--dry-run`` global option is passed to the CLI then poe will go through the motions of running the given task, including logging to stdout, without actually running the task.
+
+However it is possible to configure poe to delegate respecting this dry run flag to an invoked script task, by passing it the ``_dry_run`` variable. When this variable is passed as an argument to the python function called within a script task then poe will always call the task, and delegate responsibility to the script for making sure that no side effects occur when run in dry-run mode.
diff --git a/poethepoet/context.py b/poethepoet/context.py
@@ -90,9 +90,18 @@ def get_executor(
         invocation: Tuple[str, ...],
         env: "EnvVarsManager",
         working_dir: Path,
+        *,
         executor_config: Optional[Mapping[str, str]] = None,
         capture_stdout: Union[str, bool] = False,
+        delegate_dry_run: bool = False,
     ) -> "PoeExecutor":
+        """
+        Get an Executor object for use with this invocation.
+
+        if delegate_dry_run is set then the task will always be executed and be
+        entrusted to not have any side effects when the dry-run flag is set.
+        """
+
         from .executor import PoeExecutor
 
         if not executor_config:
@@ -108,5 +117,5 @@ def get_executor(
             env=env,
             working_dir=working_dir,
             capture_stdout=capture_stdout,
-            dry=self.dry,
+            dry=False if delegate_dry_run else self.dry,
         )
diff --git a/poethepoet/env/manager.py b/poethepoet/env/manager.py
@@ -47,6 +47,9 @@ def __init__(  # TODO: check if we still need all these args!
             self._vars["POE_CWD"] = self.cwd
             self._vars["POE_PWD"] = self.cwd
 
+        if self._ui:
+            self._vars["POE_VERBOSITY"] = str(self._ui.verbosity)
+
         self._git_repo = GitRepo(config.project_dir)
 
     def __getitem__(self, key):

diff --git a/poethepoet/helpers/python.py b/poethepoet/helpers/python.py
@@ -6,7 +6,18 @@
 import ast
 import re
 import sys
-from typing import Any, Collection, Container, Dict, Iterator, List, Optional, Tuple
+from typing import (
+    Any,
+    Collection,
+    Container,
+    Dict,
+    Iterator,
+    List,
+    NamedTuple,
+    Optional,
+    Tuple,
+    cast,
+)
 
 from ..exceptions import ExpressionParseError
 
@@ -67,21 +78,77 @@
 Substitution = Tuple[Tuple[int, int], str]
 
 
+class FunctionCall(NamedTuple):
+    """
+    Model for a python expression consisting of a function call
+    """
+
+    expression: str
+    function_ref: str
+    referenced_args: Tuple[str, ...] = tuple()
+    referenced_globals: Tuple[str, ...] = tuple()
+
+    @classmethod
+    def parse(
+        cls,
+        source: str,
+        arguments: Container[str],
+        *,
+        args_prefix: str = "__args.",
+        allowed_vars: Container[str] = tuple(),
+    ) -> "FunctionCall":
+        root_node = cast(ast.Call, parse_and_validate(source, True, "script"))
+        name_nodes = _validate_nodes_and_get_names(root_node, source)
+
+        substitutions: List[Substitution] = []
+        referenced_args: List[str] = []
+        referenced_globals: List[str] = []
+        for node in name_nodes:
+            if node.id in arguments:
+                substitutions.append(
+                    (_get_name_node_abs_range(source, node), args_prefix + node.id)
+                )
+                referenced_args.append(node.id)
+            elif node.id in _ALLOWED_BUILTINS or node.id in allowed_vars:
+                referenced_globals.append(node.id)
+            else:
+                raise ExpressionParseError(
+                    "Invalid variable reference in script: "
+                    + _get_name_source_segment(source, node)
+                )
+
+        # Prefix references to arguments with args_prefix
+        expression = _apply_substitutions(source, substitutions)
+
+        ref_parts = []
+        func_node = root_node.func
+        while isinstance(func_node, ast.Attribute):
+            ref_parts.append(func_node.attr)
+            func_node = func_node.value
+        assert isinstance(func_node, ast.Name)
+        function_ref = ".".join((func_node.id, *reversed(ref_parts)))
+
+        return cls(
+            expression=_clean_linebreaks(expression),
+            function_ref=function_ref,
+            referenced_args=tuple(referenced_args),
+            referenced_globals=tuple(referenced_globals),
+        )
+
+
 def resolve_expression(
     source: str,
     arguments: Container[str],
     *,
-    call_only: bool = True,
     args_prefix: str = "__args.",
     allowed_vars: Container[str] = tuple(),
-):
+) -> str:
     """
     Validate function call and substitute references to arguments with their namespaced
     counterparts (e.g. `my_arg` => `__args.my_arg`).
     """
 
-    task_type = "script" if call_only else "expr"
-    root_node = parse_and_validate(source, call_only, task_type)
+    root_node = parse_and_validate(source, False, "expr")
     name_nodes = _validate_nodes_and_get_names(root_node, source)
 
     substitutions: List[Substitution] = []
@@ -92,12 +159,12 @@ def resolve_expression(
             )
         elif node.id not in _ALLOWED_BUILTINS and node.id not in allowed_vars:
             raise ExpressionParseError(
-                f"Invalid variable reference in {task_type}: "
+                "Invalid variable reference in expr: "
                 + _get_name_source_segment(source, node)
             )
 
     # Prefix references to arguments with args_prefix
-    return _apply_substitutions(source, substitutions)
+    return _clean_linebreaks(_apply_substitutions(source, substitutions))
 
 
 def parse_and_validate(
@@ -246,7 +313,7 @@ def _validate_nodes_and_get_names(
             )
 
 
-def _apply_substitutions(content: str, subs: List[Substitution]):
+def _apply_substitutions(content: str, subs: List[Substitution]) -> str:
     """
     Returns a copy of content with all of the substitutions applied.
     Uses a single pass for efficiency.
@@ -319,3 +386,11 @@ def _get_name_source_segment(source: str, node: ast.Name):
         partial_result = partial_result[:-1]
 
     return partial_result
+
+
+def _clean_linebreaks(expression: str):
+    """
+    Strip out any new lines because they can be problematic on windows
+    """
+    expression = re.sub(r"((\r\n|\r|\n) | (\r\n|\r|\n))", " ", expression)
+    return re.sub(r"(\r\n|\r|\n)", " ", expression)
diff --git a/poethepoet/scripts/__init__.py b/poethepoet/scripts/__init__.py
@@ -0,0 +1,3 @@
+from ._rm import rm
+
+__all__ = ["rm"]
diff --git a/poethepoet/scripts/_rm.py b/poethepoet/scripts/_rm.py
@@ -0,0 +1,61 @@
+# ruff: noqa: E501
+import shutil
+from pathlib import Path
+from typing import Union
+
+
+def rm(
+    *patterns: str,
+    cwd: str = ".",
+    verbosity: Union[int, str] = 0,
+    dry_run: bool = False,
+):
+    """
+    This function is intended for use in a script task to delete files and directories
+    matching any of the given patterns, as a platform agnostic alternative to
+    ``rm -rf [patterns]``
+
+    Example usage:
+
+    .. code-block:: toml
+
+        [tool.poe.tasks.clean]
+        script = "poethepoet.scripts:rm('.mypy_cache', '.pytest_cache', './**/__pycache__')"
+
+    :param *patterns:
+        One or more paths to delete.
+        `Glob patterns <https://docs.python.org/3/library/glob.html>`_ are supported.
+    :param cwd:
+        The directory relative to which patterns are evaluated. Defaults to ``.``.
+    :param verbosity:
+        An integer for setting the function's verbosity. This can be set to
+        ``environ.get('POE_VERBOSITY')`` to match the verbosity of the poe invocation.
+    :param dry_run:
+        If true then nothing will be deleted, but output to stdout will be unaffected.
+        This can be set to ``_dry_run`` to make poe delegate dry_run control to the
+        function.
+    """
+    verbosity = int(verbosity)
+
+    for pattern in patterns:
+        matches = list(Path(cwd).glob(pattern))
+        if verbosity > 0 and not matches:
+            print(f"No files or directories to delete matching {pattern!r}")
+        elif verbosity >= 0 and len(matches) > 1:
+            print(f"Deleting paths matching {pattern!r}")
+
+        for match in matches:
+            _delete_path(match, verbosity, dry_run)
+
+
+def _delete_path(path: Path, verbosity: int, dry_run: bool):
+    if path.is_dir():
+        if verbosity > 0:
+            print(f"Deleting directory '{path}'")
+        if not dry_run:
+            shutil.rmtree(path)
+    else:
+        if verbosity > 0:
+            print(f"Deleting file '{path}'")
+        if not dry_run:
+            path.unlink()
diff --git a/poethepoet/task/base.py b/poethepoet/task/base.py
@@ -485,13 +485,20 @@ def _handle_run(
         """
         raise NotImplementedError
 
-    def _get_executor(self, context: "RunContext", env: "EnvVarsManager"):
+    def _get_executor(
+        self,
+        context: "RunContext",
+        env: "EnvVarsManager",
+        *,
+        delegate_dry_run: bool = False,
+    ):
         return context.get_executor(
             self.invocation,
             env,
             working_dir=self.get_working_dir(env),
             executor_config=self.spec.options.get("executor"),
             capture_stdout=self.capture_stdout,
+            delegate_dry_run=delegate_dry_run,
         )
 
     def get_working_dir(

diff --git a/poethepoet/task/expr.py b/poethepoet/task/expr.py
@@ -124,7 +124,6 @@ def parse_content(
         expression = resolve_expression(
             source=expression,
             arguments=set(args or tuple()),
-            call_only=False,
             allowed_vars={"sys", "__env", *imports},
         )
         # Strip out any new lines because they can be problematic on windows

diff --git a/poethepoet/task/script.py b/poethepoet/task/script.py
@@ -1,4 +1,3 @@
-import re
 import shlex
 from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
 
@@ -9,6 +8,7 @@
     from ..config import PoeConfig
     from ..context import RunContext
     from ..env.manager import EnvVarsManager
+    from ..helpers.python import FunctionCall
     from .base import TaskSpecFactory
 
 
@@ -68,7 +68,7 @@ def _handle_run(
         # TODO: do something about extra_args, error?
 
         target_module, function_call = self.parse_content(named_arg_values)
-        function_ref = function_call[: function_call.index("(")]
+        function_ref = function_call.function_ref
 
         argv = [
             self.name,
@@ -78,16 +78,20 @@ def _handle_run(
         # TODO: check whether the project really does use src layout, and don't do
         #       sys.path.append('src') if it doesn't
 
+        has_dry_run_ref = "_dry_run" in function_call.referenced_globals
+        dry_run = self.ctx.ui["dry_run"]
+
         script = [
             "import asyncio,os,sys;",
             "from inspect import iscoroutinefunction as _c;",
             "from os import environ;",
             "from importlib import import_module as _i;",
+            f"_dry_run = {'True' if dry_run else 'False'};" if has_dry_run_ref else "",
             f"sys.argv = {argv!r}; sys.path.append('src');",
             f"{format_class(named_arg_values)}",
             f"_m = _i('{target_module}');",
-            f"_r = asyncio.run(_m.{function_call}) if _c(_m.{function_ref})",
-            f" else _m.{function_call};",
+            f"_r = asyncio.run(_m.{function_call.expression}) if _c(_m.{function_ref})",
+            f" else _m.{function_call.expression};",
         ]
 
         if self.spec.options.get("print_result"):
@@ -99,19 +103,21 @@ def _handle_run(
         cmd = ("python", "-c", "".join(script))
 
         self._print_action(shlex.join(argv), context.dry)
-        return self._get_executor(context, env).execute(
-            cmd, use_exec=self.spec.options.get("use_exec", False)
-        )
+        return self._get_executor(
+            context, env, delegate_dry_run=has_dry_run_ref
+        ).execute(cmd, use_exec=self.spec.options.get("use_exec", False))
 
-    def parse_content(self, args: Optional[Dict[str, Any]]) -> Tuple[str, str]:
+    def parse_content(
+        self, args: Optional[Dict[str, Any]]
+    ) -> Tuple[str, "FunctionCall"]:
         """
         Returns the module to load, and the function call to execute.
 
         Will raise an exception if the function call contains invalid syntax or
         references variables that are not in scope.
         """
 
-        from ..helpers.python import resolve_expression
+        from ..helpers.python import FunctionCall
 
         try:
             target_module, target_ref = self.spec.content.strip().split(":", 1)
@@ -122,17 +128,14 @@ def parse_content(self, args: Optional[Dict[str, Any]]) -> Tuple[str, str]:
 
         if target_ref.isidentifier():
             if args:
-                return target_module, f"{target_ref}(**({args}))"
-            return target_module, f"{target_ref}()"
-
-        function_call = resolve_expression(
-            target_ref,
-            set(args or tuple()),
-            call_only=True,
-            allowed_vars={"sys", "os", "environ"},
-        )
-        # Strip out any new lines because they can be problematic on windows
-        function_call = re.sub(r"((\r\n|\r|\n) | (\r\n|\r|\n))", " ", function_call)
-        function_call = re.sub(r"(\r\n|\r|\n)", " ", function_call)
+                function_call = FunctionCall(f"{target_ref}(**({args}))", target_ref)
+            else:
+                function_call = FunctionCall(f"{target_ref}()", target_ref)
+        else:
+            function_call = FunctionCall.parse(
+                source=target_ref,
+                arguments=set(args or tuple()),
+                allowed_vars={"sys", "os", "environ", "_dry_run"},
+            )
 
         return target_module, function_call