From b40b6b8d203d4cb170a54155defcdf385a16c838 Mon Sep 17 00:00:00 2001 From: Feras Date: Thu, 3 Nov 2022 17:58:26 -0500 Subject: [PATCH 1/6] Add environment vars expansion and pre defined git hashes in the yaml file --- src/soopervisor/commons/docker.py | 39 ++++++++++++++++++++++++++++++- tests/test_commons_docker.py | 3 ++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/soopervisor/commons/docker.py b/src/soopervisor/commons/docker.py index 29651be..366e218 100644 --- a/src/soopervisor/commons/docker.py +++ b/src/soopervisor/commons/docker.py @@ -2,12 +2,18 @@ import shlex import tarfile from pathlib import Path +from typing import Mapping + +import yaml +from ploomber.env.envdict import EnvDict + +from ploomber.util.default import path_to_env_from_spec from ploomber.io._commander import CommanderStop from ploomber_core.telemetry import telemetry from soopervisor.commons import source, dependencies -from soopervisor.exceptions import ConfigurationError, MissingDockerfileError +from soopervisor.exceptions import ConfigurationError, MissingDockerfileError, ConfigurationFileTypeError def _validate_repository(repository): @@ -52,6 +58,35 @@ def get_dependencies(): return dependency_files, lock_paths +def prepare_env_file(entry_point: str): + """ + Given an entrypoint pipeline.yaml file determine the env.yaml in use + and populate it with the default placeholders or ignore them if they already exist. + The env file will be created in the root of the pipeline file if one doesn't exist. + """ + env_path = path_to_env_from_spec(entry_point) + env_default = EnvDict({}) + if env_path is not None: + env_data = yaml.safe_load(Path(env_path).read_text()) + if not isinstance(env_data, Mapping): + raise ConfigurationFileTypeError(env_path, env_data) + env_data = dict(env_data) + if "git" in env_default: + env_data.setdefault("git", env_default["git"]) + if "git_hash" in env_default: + env_data.setdefault("git_hash", env_default["git_hash"]) + else: + env_path = Path(entry_point).parents[0] / "env.yaml" + env_data = {} + if "git" in env_default: + env_data.setdefault("git", env_default["git"]) + if "git_hash" in env_default: + env_data.setdefault("git_hash", env_default["git_hash"]) + env_path.write_text(yaml.safe_dump(env_data)) + + return env_path + + def build_image( e, cfg, @@ -195,6 +230,8 @@ def build(e, dependencies.check_lock_files_exist() dependency_files, lock_paths = get_dependencies() + env_file_path = prepare_env_file(entry_point) + image_map = {} setup_flow = Path('setup.py').exists() diff --git a/tests/test_commons_docker.py b/tests/test_commons_docker.py index 05e4dff..4ef9ba0 100644 --- a/tests/test_commons_docker.py +++ b/tests/test_commons_docker.py @@ -180,7 +180,8 @@ def test_docker_build_caches_pkg_installation(EXPORTER, config, capture_output=True) ls = out.stdout.decode() - expected = ('environment.yml\nfast-pipeline.tar.gz\n' + + expected = ('env.yaml\nenvironment.yml\nfast-pipeline.tar.gz\n' 'fast_pipeline.py\nmy-env\npipeline.yaml\n' 'requirements.lock.txt\nsoopervisor.yaml\n') assert ls == expected From c52639117cbc436b51877d34bc2cc57a0eb2a4b2 Mon Sep 17 00:00:00 2001 From: Feras Date: Wed, 9 Nov 2022 13:23:07 -0500 Subject: [PATCH 2/6] Fix flake8 lint --- src/soopervisor/commons/docker.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/soopervisor/commons/docker.py b/src/soopervisor/commons/docker.py index 366e218..dc14e95 100644 --- a/src/soopervisor/commons/docker.py +++ b/src/soopervisor/commons/docker.py @@ -13,7 +13,8 @@ from ploomber_core.telemetry import telemetry from soopervisor.commons import source, dependencies -from soopervisor.exceptions import ConfigurationError, MissingDockerfileError, ConfigurationFileTypeError +from soopervisor.exceptions import ConfigurationError, \ + MissingDockerfileError, ConfigurationFileTypeError def _validate_repository(repository): @@ -61,8 +62,9 @@ def get_dependencies(): def prepare_env_file(entry_point: str): """ Given an entrypoint pipeline.yaml file determine the env.yaml in use - and populate it with the default placeholders or ignore them if they already exist. - The env file will be created in the root of the pipeline file if one doesn't exist. + and populate it with the default placeholders or ignore them if + they already exist. The env file will be created in the root of + the pipeline file if one doesn't exist. """ env_path = path_to_env_from_spec(entry_point) env_default = EnvDict({}) @@ -231,7 +233,7 @@ def build(e, dependency_files, lock_paths = get_dependencies() env_file_path = prepare_env_file(entry_point) - + e.info("using .env file from: "+env_file_path) image_map = {} setup_flow = Path('setup.py').exists() From 99cacb60e28666c5e22b8c58c5372dfc77d8b842 Mon Sep 17 00:00:00 2001 From: Feras Date: Tue, 22 Nov 2022 16:00:53 -0500 Subject: [PATCH 3/6] Add build time placeholder --- src/soopervisor/commons/docker.py | 9 +++++++-- tests/test_commons_docker.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/soopervisor/commons/docker.py b/src/soopervisor/commons/docker.py index dc14e95..18e503e 100644 --- a/src/soopervisor/commons/docker.py +++ b/src/soopervisor/commons/docker.py @@ -77,6 +77,8 @@ def prepare_env_file(entry_point: str): env_data.setdefault("git", env_default["git"]) if "git_hash" in env_default: env_data.setdefault("git_hash", env_default["git_hash"]) + if "build_time" in env_default: + env_data.setdefault("build_time", env_default["build_time"]) else: env_path = Path(entry_point).parents[0] / "env.yaml" env_data = {} @@ -84,7 +86,10 @@ def prepare_env_file(entry_point: str): env_data.setdefault("git", env_default["git"]) if "git_hash" in env_default: env_data.setdefault("git_hash", env_default["git_hash"]) - env_path.write_text(yaml.safe_dump(env_data)) + if "build_time" in env_default: + env_data.setdefault("build_time", env_default["build_time"]) + + env_path.write_text(yaml.safe_dump(env_data)) return env_path @@ -233,7 +238,7 @@ def build(e, dependency_files, lock_paths = get_dependencies() env_file_path = prepare_env_file(entry_point) - e.info("using .env file from: "+env_file_path) + e.info("using .env file from: "+str(env_file_path)) image_map = {} setup_flow = Path('setup.py').exists() diff --git a/tests/test_commons_docker.py b/tests/test_commons_docker.py index 4ef9ba0..e0e510f 100644 --- a/tests/test_commons_docker.py +++ b/tests/test_commons_docker.py @@ -12,6 +12,7 @@ from soopervisor.aws.batch import AWSBatchExporter from soopervisor.argo.export import ArgoWorkflowsExporter from soopervisor.airflow.export import AirflowExporter +from test_commons import git_init def _process_docker_output(output): @@ -121,6 +122,7 @@ def test_process_docker_output_ci(): def test_docker_build_caches_pkg_installation(EXPORTER, config, tmp_fast_pipeline, capfd): Path('requirements.lock.txt').write_text('pkgmt==0.0.1') + git_init() with capfd.disabled(): EXPORTER.new('soopervisor.yaml', env_name='my-env').add() @@ -184,4 +186,12 @@ def test_docker_build_caches_pkg_installation(EXPORTER, config, expected = ('env.yaml\nenvironment.yml\nfast-pipeline.tar.gz\n' 'fast_pipeline.py\nmy-env\npipeline.yaml\n' 'requirements.lock.txt\nsoopervisor.yaml\n') + assert ls == expected + + out = subprocess.run(['docker', 'run', 'fast-pipeline', 'cat', 'env.yaml'], + check=True, + capture_output=True) + + env_contents = out.stdout.decode() + assert env_contents == "" From 8a4eabe40343222b24dd98c6b5c6b5d01f73bc6d Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Wed, 28 Dec 2022 20:11:54 -0600 Subject: [PATCH 4/6] test fixes --- setup.cfg | 2 + src/soopervisor/commons/docker.py | 250 ++++++++++++++++-------------- tests/test_commons_docker.py | 204 ++++++++++++++++-------- 3 files changed, 277 insertions(+), 179 deletions(-) diff --git a/setup.cfg b/setup.cfg index b8e1822..05cdf99 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,5 @@ [flake8] # kind is a tmp directory with tutorials, we can delete it once we move their content to the docs exclude = build/, src/soopervisor/assets/aws-lambda/app.py, tests/assets/fast-pipeline/fast_pipeline.py, kind/ +max-line-length = 88 +extend-ignore = E203 \ No newline at end of file diff --git a/src/soopervisor/commons/docker.py b/src/soopervisor/commons/docker.py index 18e503e..a8a140a 100644 --- a/src/soopervisor/commons/docker.py +++ b/src/soopervisor/commons/docker.py @@ -13,26 +13,30 @@ from ploomber_core.telemetry import telemetry from soopervisor.commons import source, dependencies -from soopervisor.exceptions import ConfigurationError, \ - MissingDockerfileError, ConfigurationFileTypeError +from soopervisor.exceptions import ( + ConfigurationError, + MissingDockerfileError, + ConfigurationFileTypeError, +) def _validate_repository(repository): - if repository == 'your-repository/name': + if repository == "your-repository/name": raise ConfigurationError( - f'Invalid repository {repository!r} ' - 'in soopervisor.yaml, please add a valid value.') + f"Invalid repository {repository!r} " + "in soopervisor.yaml, please add a valid value." + ) def cp_ploomber_home(pkg_name): # Generate ploomber home - home_path = Path(telemetry.get_home_dir(), 'stats') + home_path = Path(telemetry.get_home_dir(), "stats") home_path = home_path.expanduser() if home_path.exists(): - target = Path('dist', f'{pkg_name}.tar.gz') + target = Path("dist", f"{pkg_name}.tar.gz") archive = tarfile.open(target, "w:gz") - archive.add(home_path, arcname='ploomber/stats') + archive.add(home_path, arcname="ploomber/stats") archive.close() @@ -47,15 +51,14 @@ def get_dependencies(): and requirements.fit-__.lock.txt mapped to pattern fit-*. """ - requirement_files = dependencies.get_task_dependency_files( - 'requirements', 'txt') - dependency_files = requirement_files if requirement_files \ - else dependencies.get_task_dependency_files('environment', 'yml') + requirement_files = dependencies.get_task_dependency_files("requirements", "txt") + dependency_files = ( + requirement_files + if requirement_files + else dependencies.get_task_dependency_files("environment", "yml") + ) - lock_paths = { - task: paths['lock'] - for task, paths in dependency_files.items() - } + lock_paths = {task: paths["lock"] for task, paths in dependency_files.items()} return dependency_files, lock_paths @@ -67,8 +70,10 @@ def prepare_env_file(entry_point: str): the pipeline file if one doesn't exist. """ env_path = path_to_env_from_spec(entry_point) - env_default = EnvDict({}) + if env_path is not None: + env_default = EnvDict({}, path_to_here=Path(env_path).parent) + env_data = yaml.safe_load(Path(env_path).read_text()) if not isinstance(env_data, Mapping): raise ConfigurationFileTypeError(env_path, env_data) @@ -81,7 +86,9 @@ def prepare_env_file(entry_point: str): env_data.setdefault("build_time", env_default["build_time"]) else: env_path = Path(entry_point).parents[0] / "env.yaml" + env_default = EnvDict({}, path_to_here=Path(env_path).parent) env_data = {} + if "git" in env_default: env_data.setdefault("git", env_default["git"]) if "git_hash" in env_default: @@ -89,7 +96,7 @@ def prepare_env_file(entry_point: str): if "build_time" in env_default: env_data.setdefault("build_time", env_default["build_time"]) - env_path.write_text(yaml.safe_dump(env_data)) + Path(env_path).write_text(yaml.safe_dump(env_data)) return env_path @@ -118,60 +125,66 @@ def build_image( (e.g. {pkg_name}:{version}) """ - e.cp('dist') + e.cp("dist") e.cd(env_name) if task: - suffix = '' if task == 'default' else f'-{modify_wildcard(task)}' - image_local = f'{pkg_name}{suffix}:{version}' + suffix = "" if task == "default" else f"-{modify_wildcard(task)}" + image_local = f"{pkg_name}{suffix}:{version}" - args = ['docker', 'build', '.', '--tag', image_local] + args = ["docker", "build", ".", "--tag", image_local] # NOTE: this is used in Ploomber Cloud, but it isn't documented in # soopervisor's docs - if 'DOCKER_ARGS' in os.environ: - args = args + shlex.split(os.environ['DOCKER_ARGS']) + if "DOCKER_ARGS" in os.environ: + args = args + shlex.split(os.environ["DOCKER_ARGS"]) - print(f'docker args: {args}') + print(f"docker args: {args}") # how to allow passing --no-cache? - e.run(*args, description='Building image') + e.run(*args, description="Building image") if not skip_tests: # test "ploomber status" in docker image - e.run('docker', - 'run', - image_local, - 'ploomber', - 'status', - '--entry-point', - entry_point, - description='Testing image', - error_message='Error while testing your docker image with', - hint=f'Use "docker run -it {image_local} /bin/bash" to ' - 'start an interactive session to debug your image') + e.run( + "docker", + "run", + image_local, + "ploomber", + "status", + "--entry-point", + entry_point, + description="Testing image", + error_message="Error while testing your docker image with", + hint=f'Use "docker run -it {image_local} /bin/bash" to ' + "start an interactive session to debug your image", + ) # check that the pipeline in the image has a configured File.client - test_cmd = ('from ploomber.spec import DAGSpec; ' - f'print("File" in DAGSpec("{entry_point}")' - '.to_dag().clients)') - - e.run('docker', - 'run', - image_local, - 'python', - '-c', - test_cmd, - description='Testing File client', - error_message='Missing File client', - hint=f'Run "docker run -it {image_local} /bin/bash" to ' - 'to debug your image. Ensure a File client is configured', - capture_output=True, - expected_output='True\n', - show_cmd=False) - - if until == 'build': + test_cmd = ( + "from ploomber.spec import DAGSpec; " + f'print("File" in DAGSpec("{entry_point}")' + ".to_dag().clients)" + ) + + e.run( + "docker", + "run", + image_local, + "python", + "-c", + test_cmd, + description="Testing File client", + error_message="Missing File client", + hint=f'Run "docker run -it {image_local} /bin/bash" to ' + "to debug your image. Ensure a File client is configured", + capture_output=True, + expected_output="True\n", + show_cmd=False, + ) + + if until == "build": raise CommanderStop('Done. Run "docker images" to see your image.') if cfg.repository: @@ -179,30 +192,20 @@ def build_image( # Adding the latest tag if not a remote repo if ":" not in image_target: - image_target = f'{image_target}{suffix}:{version}' - - e.run('docker', - 'tag', - image_local, - image_target, - description='Tagging') - e.run('docker', 'push', image_target, description='Pushing image') + image_target = f"{image_target}{suffix}:{version}" + + e.run("docker", "tag", image_local, image_target, description="Tagging") + e.run("docker", "push", image_target, description="Pushing image") else: image_target = image_local - if until == 'push': - raise CommanderStop('Done. Image pushed to repository.') + if until == "push": + raise CommanderStop("Done. Image pushed to repository.") return image_target -def build(e, - cfg, - env_name, - until, - entry_point, - skip_tests=False, - ignore_git=False): +def build(e, cfg, env_name, until, entry_point, skip_tests=False, ignore_git=False): """Build a docker image Parameters @@ -226,7 +229,7 @@ def build(e, Skip image testing (check dag loading and File.client configuration) """ - if not Path(env_name, 'Dockerfile').is_file(): + if not Path(env_name, "Dockerfile").is_file(): raise MissingDockerfileError(env_name) # raise an error if the user didn't change the default value @@ -238,41 +241,44 @@ def build(e, dependency_files, lock_paths = get_dependencies() env_file_path = prepare_env_file(entry_point) - e.info("using .env file from: "+str(env_file_path)) + e.info("using env file from: " + str(env_file_path)) image_map = {} - setup_flow = Path('setup.py').exists() + setup_flow = Path("setup.py").exists() # Generate source distribution if setup_flow: for task_pattern in sorted(dependency_files.keys()): - if task_pattern != 'default': + if task_pattern != "default": raise NotImplementedError( "Multiple requirements.*.lock.txt or " "environment.*.lock.yml files found along " "with setup.py file. Please have either " - "of the two in the project root.") + "of the two in the project root." + ) # .egg-info may cause issues if MANIFEST.in was recently updated - if Path('requirements.lock.txt').exists(): - e.cp('requirements.lock.txt') - elif Path('environment.lock.yml').exists(): - e.cp('environment.lock.yml') + if Path("requirements.lock.txt").exists(): + e.cp("requirements.lock.txt") + elif Path("environment.lock.yml").exists(): + e.cp("environment.lock.yml") - e.rm('dist', 'build', Path('src', pkg_name, f'{pkg_name}.egg-info')) - e.run('python', '-m', 'build', '--sdist', description='Packaging code') + e.rm("dist", "build", Path("src", pkg_name, f"{pkg_name}.egg-info")) + e.run("python", "-m", "build", "--sdist", description="Packaging code") default_image_key = dependencies.get_default_image_key() - image = build_image(e, - cfg, - env_name, - until, - entry_point, - skip_tests, - ignore_git, - pkg_name, - version, - task=default_image_key) + image = build_image( + e, + cfg, + env_name, + until, + entry_point, + skip_tests, + ignore_git, + pkg_name, + version, + task=default_image_key, + ) image_map[default_image_key] = image # raise error if include is not None? and suggest to use MANIFEST.in @@ -284,9 +290,9 @@ def build(e, if Path(lock_file).exists(): e.cp(lock_file) - e.rm('dist') - target = Path('dist', pkg_name) - e.info('Packaging code') + e.rm("dist") + target = Path("dist", pkg_name) + e.info("Packaging code") other_lock_files = [ file for file in list(lock_paths.values()) if file != lock_file ] @@ -297,28 +303,40 @@ def build(e, exclude = other_lock_files rename_files = {} - if lock_file not in ('requirements.lock.txt', - 'environment.lock.yml'): - rename_files = {lock_file: 'requirements.lock.txt'} \ - if 'requirements' in lock_file \ - else {lock_file: 'environment.lock.yml'} - source.copy(cmdr=e, - src='.', - dst=target, - include=cfg.include, - exclude=exclude, - ignore_git=ignore_git, - rename_files=rename_files) - source.compress_dir(e, target, Path('dist', f'{pkg_name}.tar.gz')) - - image = build_image(e, cfg, env_name, until, entry_point, - skip_tests, ignore_git, pkg_name, version, - task) + if lock_file not in ("requirements.lock.txt", "environment.lock.yml"): + rename_files = ( + {lock_file: "requirements.lock.txt"} + if "requirements" in lock_file + else {lock_file: "environment.lock.yml"} + ) + source.copy( + cmdr=e, + src=".", + dst=target, + include=cfg.include, + exclude=exclude, + ignore_git=ignore_git, + rename_files=rename_files, + ) + source.compress_dir(e, target, Path("dist", f"{pkg_name}.tar.gz")) + + image = build_image( + e, + cfg, + env_name, + until, + entry_point, + skip_tests, + ignore_git, + pkg_name, + version, + task, + ) image_map[task] = image - e.rm('dist') - e.cd('..') + e.rm("dist") + e.cd("..") if not setup_flow: # We need to go back to the env folder before return diff --git a/tests/test_commons_docker.py b/tests/test_commons_docker.py index e0e510f..5b451a9 100644 --- a/tests/test_commons_docker.py +++ b/tests/test_commons_docker.py @@ -5,19 +5,22 @@ import subprocess import platform from pathlib import Path +from os import environ +import yaml import pytest -from os import environ +from ploomber import repo from soopervisor.aws.batch import AWSBatchExporter from soopervisor.argo.export import ArgoWorkflowsExporter from soopervisor.airflow.export import AirflowExporter +from soopervisor.commons.docker import prepare_env_file + from test_commons import git_init def _process_docker_output(output): - """Processes output from "docker build" - """ + """Processes output from "docker build" """ # output (on my local docker) looks like this: # STEP 1 @@ -30,13 +33,13 @@ def _process_docker_output(output): sections = [] # output is separated by an empty line - empty = [idx for idx, line in enumerate(lines) if line == ''] + empty = [idx for idx, line in enumerate(lines) if line == ""] # split by each section slices = list(zip(empty, empty[1:])) for i, j in slices: - sections.append('\n'.join(lines[i:j])) + sections.append("\n".join(lines[i:j])) return sections @@ -46,14 +49,14 @@ def _process_docker_output_ci(output): sections = [] - step = [idx for idx, line in enumerate(lines) if 'Step ' in line] + step = [idx for idx, line in enumerate(lines) if "Step " in line] slices = list(zip(step, step[1:])) for i, j in slices: - sections.append('\n'.join(lines[i:j - 1])) + sections.append("\n".join(lines[i : j - 1])) - sections.append('\n'.join(lines[j:])) + sections.append("\n".join(lines[j:])) return sections @@ -72,9 +75,9 @@ def test_process_docker_output_ci(): """ expected = [ - 'Step 1/7 : FROM A', - 'Step 2/7 : COPY B C\n ---> Using cache', - 'Step 3/7 : RUN D\n ---> Using cache\n ---> hash', + "Step 1/7 : FROM A", + "Step 2/7 : COPY B C\n ---> Using cache", + "Step 3/7 : RUN D\n ---> Using cache\n ---> hash", ] assert _process_docker_output_ci(out) == expected @@ -106,92 +109,167 @@ def test_process_docker_output_ci(): @pytest.mark.skipif( - platform.system() != 'Linux' and 'CI' in environ, + platform.system() != "Linux" and "CI" in environ, reason="Docker is only installed on the linux runner (Github Actions)", ) -@pytest.mark.parametrize('EXPORTER, config', [ - [AWSBatchExporter, config_aws], - [ArgoWorkflowsExporter, config_argo], - [AirflowExporter, config_airflow], -], - ids=[ - 'aws', - 'argo', - 'airflow', - ]) -def test_docker_build_caches_pkg_installation(EXPORTER, config, - tmp_fast_pipeline, capfd): - Path('requirements.lock.txt').write_text('pkgmt==0.0.1') +@pytest.mark.parametrize( + "EXPORTER, config", + [ + [AWSBatchExporter, config_aws], + [ArgoWorkflowsExporter, config_argo], + [AirflowExporter, config_airflow], + ], + ids=[ + "aws", + "argo", + "airflow", + ], +) +def test_docker_build(EXPORTER, config, tmp_fast_pipeline, capfd, monkeypatch): + """ + Unlike other tests, this one does not mock calls to Docker's CLI. We have a simple + Dockerfile that builds fast and check that the container has what we expect + """ + + def git_hash(*args, **kwargs): + return "SOMEHASH" + + monkeypatch.setattr(repo, "git_hash", git_hash) + + Path("requirements.lock.txt").write_text("pkgmt==0.0.1") git_init() with capfd.disabled(): - EXPORTER.new('soopervisor.yaml', env_name='my-env').add() + EXPORTER.new("soopervisor.yaml", env_name="my-env").add() - Path('soopervisor.yaml').write_text(config) + Path("soopervisor.yaml").write_text(config) + # build image for the first time with capfd.disabled(): - EXPORTER.load('soopervisor.yaml', env_name='my-env', - lazy_import=False).export(mode='incremental', - until='build', - skip_tests=True, - skip_docker=False, - ignore_git=True, - lazy_import=False, - task_name=None) - - EXPORTER.load('soopervisor.yaml', env_name='my-env', - lazy_import=False).export(mode='incremental', - until='build', - skip_tests=True, - skip_docker=False, - ignore_git=True, - lazy_import=False, - task_name=None) + EXPORTER.load("soopervisor.yaml", env_name="my-env", lazy_import=False).export( + mode="incremental", + until="build", + skip_tests=True, + skip_docker=False, + ignore_git=True, + lazy_import=False, + task_name=None, + ) + + # build it for the first time (to check pkg installation cache) + EXPORTER.load("soopervisor.yaml", env_name="my-env", lazy_import=False).export( + mode="incremental", + until="build", + skip_tests=True, + skip_docker=False, + ignore_git=True, + lazy_import=False, + task_name=None, + ) # check that pip installation is cached captured = capfd.readouterr() # this is the output format on github actions - if '--->' in captured.err or '--->' in captured.out: + if "--->" in captured.err or "--->" in captured.out: sections = _process_docker_output_ci(captured.out) - cached = [group for group in sections if 'Using cache' in group] + cached = [group for group in sections if "Using cache" in group] # this is the output format I'm getting locally (macOS) # Docker version 20.10.17, build 100c701 else: sections = _process_docker_output(captured.err) - cached = [group for group in sections if 'CACHED' in group] + cached = [group for group in sections if "CACHED" in group] assert len(cached) == 2 - copy = 'COPY requirements.lock.txt project/requirements.lock.txt' + copy = "COPY requirements.lock.txt project/requirements.lock.txt" assert copy in cached[0] - install = 'RUN pip install --requirement project/requirements.lock.txt' + install = "RUN pip install --requirement project/requirements.lock.txt" assert install in cached[1] # check that the packages in the requirements file are installed - out = subprocess.run(['docker', 'run', 'fast-pipeline', 'pip', 'freeze'], - check=True, - capture_output=True) + out = subprocess.run( + ["docker", "run", "fast-pipeline", "pip", "freeze"], + check=True, + capture_output=True, + ) pkgs = out.stdout.decode() - assert 'pkgmt==0.0.1' in pkgs + assert "pkgmt==0.0.1" in pkgs # check that the right files are copied - out = subprocess.run(['docker', 'run', 'fast-pipeline', 'ls'], - check=True, - capture_output=True) + out = subprocess.run( + ["docker", "run", "fast-pipeline", "ls"], check=True, capture_output=True + ) ls = out.stdout.decode() - expected = ('env.yaml\nenvironment.yml\nfast-pipeline.tar.gz\n' - 'fast_pipeline.py\nmy-env\npipeline.yaml\n' - 'requirements.lock.txt\nsoopervisor.yaml\n') + expected = ( + "env.yaml\nenvironment.yml\nfast-pipeline.tar.gz\n" + "fast_pipeline.py\nmy-env\npipeline.yaml\n" + "requirements.lock.txt\nsoopervisor.yaml\n" + ) assert ls == expected - out = subprocess.run(['docker', 'run', 'fast-pipeline', 'cat', 'env.yaml'], - check=True, - capture_output=True) + out = subprocess.run( + ["docker", "run", "fast-pipeline", "cat", "env.yaml"], + check=True, + capture_output=True, + ) env_contents = out.stdout.decode() - assert env_contents == "" + assert ( + env_contents + == """\ +git: master +git_hash: SOMEHASH +""" + ) + + # FIXME: original env.yaml is overwritten (or left there if it doesn't exist) + # we need to revert the chance once the process is done (maybe with a context + # manager) + assert not Path("env.yaml").is_file() + + +@pytest.mark.parametrize( + "env_var, env_user, env_expected", + [ + [ + None, + None, + {"git": "master", "git_hash": "SOMEHASH"}, + ], + [ + "env.something.yaml", + {"some": {"nested": "value"}}, + {"git": "master", "git_hash": "SOMEHASH", "some": {"nested": "value"}}, + ], + [ + None, + {"some": {"nested": "value"}}, + {"git": "master", "git_hash": "SOMEHASH", "some": {"nested": "value"}}, + ], + ], +) +def test_prepare_env_file( + tmp_fast_pipeline, monkeypatch, env_var, env_user, env_expected +): + if env_var: + monkeypatch.setenv("PLOOMBER_ENV_FILENAME", env_var) + + file_to_use = env_var or "env.yaml" + + if env_user: + Path(file_to_use).write_text(yaml.safe_dump(env_user)) + + def git_hash(*args, **kwargs): + return "SOMEHASH" + + monkeypatch.setattr(repo, "git_hash", git_hash) + git_init() + + prepare_env_file("pipeline.yaml") + + assert yaml.safe_load(Path(file_to_use).read_text()) == env_expected From a92cd1921ecd88fd2e6fefbe2a92b99c43d56135 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Wed, 28 Dec 2022 21:33:14 -0600 Subject: [PATCH 5/6] fixes --- src/soopervisor/commons/docker.py | 193 ++++++++++++++++-------------- tests/test_commons_docker.py | 20 +++- 2 files changed, 118 insertions(+), 95 deletions(-) diff --git a/src/soopervisor/commons/docker.py b/src/soopervisor/commons/docker.py index a8a140a..97b28de 100644 --- a/src/soopervisor/commons/docker.py +++ b/src/soopervisor/commons/docker.py @@ -3,6 +3,8 @@ import tarfile from pathlib import Path from typing import Mapping +from contextlib import contextmanager +import shutil import yaml from ploomber.env.envdict import EnvDict @@ -62,6 +64,7 @@ def get_dependencies(): return dependency_files, lock_paths +@contextmanager def prepare_env_file(entry_point: str): """ Given an entrypoint pipeline.yaml file determine the env.yaml in use @@ -69,14 +72,15 @@ def prepare_env_file(entry_point: str): they already exist. The env file will be created in the root of the pipeline file if one doesn't exist. """ - env_path = path_to_env_from_spec(entry_point) + path_to_env = path_to_env_from_spec(entry_point) + user_provided_env = path_to_env is not None - if env_path is not None: - env_default = EnvDict({}, path_to_here=Path(env_path).parent) + if user_provided_env: + env_default = EnvDict({}, path_to_here=Path(path_to_env).parent) - env_data = yaml.safe_load(Path(env_path).read_text()) + env_data = yaml.safe_load(Path(path_to_env).read_text()) if not isinstance(env_data, Mapping): - raise ConfigurationFileTypeError(env_path, env_data) + raise ConfigurationFileTypeError(path_to_env, env_data) env_data = dict(env_data) if "git" in env_default: env_data.setdefault("git", env_default["git"]) @@ -85,20 +89,30 @@ def prepare_env_file(entry_point: str): if "build_time" in env_default: env_data.setdefault("build_time", env_default["build_time"]) else: - env_path = Path(entry_point).parents[0] / "env.yaml" - env_default = EnvDict({}, path_to_here=Path(env_path).parent) + path_to_env = Path(entry_point).parents[0] / "env.yaml" + env_default = EnvDict({}, path_to_here=Path(path_to_env).parent) env_data = {} if "git" in env_default: env_data.setdefault("git", env_default["git"]) if "git_hash" in env_default: env_data.setdefault("git_hash", env_default["git_hash"]) - if "build_time" in env_default: - env_data.setdefault("build_time", env_default["build_time"]) - Path(env_path).write_text(yaml.safe_dump(env_data)) + path_to_env = Path(path_to_env).resolve() + path_to_backup = Path("env.backup.yaml").resolve() + + if user_provided_env: + shutil.copy(path_to_env, path_to_backup) - return env_path + try: + path_to_env.write_text(yaml.safe_dump(env_data)) + yield + finally: + if user_provided_env: + shutil.copy(path_to_backup, path_to_env) + path_to_backup.unlink() + else: + path_to_env.unlink() def build_image( @@ -240,85 +254,31 @@ def build(e, cfg, env_name, until, entry_point, skip_tests=False, ignore_git=Fal dependencies.check_lock_files_exist() dependency_files, lock_paths = get_dependencies() - env_file_path = prepare_env_file(entry_point) - e.info("using env file from: " + str(env_file_path)) - image_map = {} + with prepare_env_file(entry_point): + image_map = {} - setup_flow = Path("setup.py").exists() + setup_flow = Path("setup.py").exists() - # Generate source distribution - if setup_flow: - for task_pattern in sorted(dependency_files.keys()): + # Generate source distribution + if setup_flow: + for task_pattern in sorted(dependency_files.keys()): - if task_pattern != "default": - raise NotImplementedError( - "Multiple requirements.*.lock.txt or " - "environment.*.lock.yml files found along " - "with setup.py file. Please have either " - "of the two in the project root." - ) - # .egg-info may cause issues if MANIFEST.in was recently updated - if Path("requirements.lock.txt").exists(): - e.cp("requirements.lock.txt") - elif Path("environment.lock.yml").exists(): - e.cp("environment.lock.yml") - - e.rm("dist", "build", Path("src", pkg_name, f"{pkg_name}.egg-info")) - e.run("python", "-m", "build", "--sdist", description="Packaging code") - default_image_key = dependencies.get_default_image_key() - - image = build_image( - e, - cfg, - env_name, - until, - entry_point, - skip_tests, - ignore_git, - pkg_name, - version, - task=default_image_key, - ) + if task_pattern != "default": + raise NotImplementedError( + "Multiple requirements.*.lock.txt or " + "environment.*.lock.yml files found along " + "with setup.py file. Please have either " + "of the two in the project root." + ) + # .egg-info may cause issues if MANIFEST.in was recently updated + if Path("requirements.lock.txt").exists(): + e.cp("requirements.lock.txt") + elif Path("environment.lock.yml").exists(): + e.cp("environment.lock.yml") - image_map[default_image_key] = image - # raise error if include is not None? and suggest to use MANIFEST.in - # instead - else: - # sort keys so we iterate in deterministic order and can test easily - for task in sorted(lock_paths.keys()): - lock_file = lock_paths[task] - - if Path(lock_file).exists(): - e.cp(lock_file) - e.rm("dist") - target = Path("dist", pkg_name) - e.info("Packaging code") - other_lock_files = [ - file for file in list(lock_paths.values()) if file != lock_file - ] - exclude = cfg.exclude - if cfg.exclude and other_lock_files: - exclude = cfg.exclude + other_lock_files - elif not cfg.exclude and other_lock_files: - exclude = other_lock_files - - rename_files = {} - if lock_file not in ("requirements.lock.txt", "environment.lock.yml"): - rename_files = ( - {lock_file: "requirements.lock.txt"} - if "requirements" in lock_file - else {lock_file: "environment.lock.yml"} - ) - source.copy( - cmdr=e, - src=".", - dst=target, - include=cfg.include, - exclude=exclude, - ignore_git=ignore_git, - rename_files=rename_files, - ) - source.compress_dir(e, target, Path("dist", f"{pkg_name}.tar.gz")) + e.rm("dist", "build", Path("src", pkg_name, f"{pkg_name}.egg-info")) + e.run("python", "-m", "build", "--sdist", description="Packaging code") + default_image_key = dependencies.get_default_image_key() image = build_image( e, @@ -330,13 +290,66 @@ def build(e, cfg, env_name, until, entry_point, skip_tests=False, ignore_git=Fal ignore_git, pkg_name, version, - task, + task=default_image_key, ) - image_map[task] = image + image_map[default_image_key] = image + # raise error if include is not None? and suggest to use MANIFEST.in + # instead + else: + # sort keys so we iterate in deterministic order and can test easily + for task in sorted(lock_paths.keys()): + lock_file = lock_paths[task] + + if Path(lock_file).exists(): + e.cp(lock_file) + e.rm("dist") + target = Path("dist", pkg_name) + e.info("Packaging code") + other_lock_files = [ + file for file in list(lock_paths.values()) if file != lock_file + ] + exclude = cfg.exclude + if cfg.exclude and other_lock_files: + exclude = cfg.exclude + other_lock_files + elif not cfg.exclude and other_lock_files: + exclude = other_lock_files + + rename_files = {} + if lock_file not in ("requirements.lock.txt", "environment.lock.yml"): + rename_files = ( + {lock_file: "requirements.lock.txt"} + if "requirements" in lock_file + else {lock_file: "environment.lock.yml"} + ) + source.copy( + cmdr=e, + src=".", + dst=target, + include=cfg.include, + exclude=exclude, + ignore_git=ignore_git, + rename_files=rename_files, + ) + source.compress_dir(e, target, Path("dist", f"{pkg_name}.tar.gz")) + + image = build_image( + e, + cfg, + env_name, + until, + entry_point, + skip_tests, + ignore_git, + pkg_name, + version, + task, + ) + + image_map[task] = image - e.rm("dist") - e.cd("..") + e.rm("dist") + e.cd("..") if not setup_flow: # We need to go back to the env folder before return diff --git a/tests/test_commons_docker.py b/tests/test_commons_docker.py index 5b451a9..5bd7fba 100644 --- a/tests/test_commons_docker.py +++ b/tests/test_commons_docker.py @@ -227,10 +227,8 @@ def git_hash(*args, **kwargs): """ ) - # FIXME: original env.yaml is overwritten (or left there if it doesn't exist) - # we need to revert the chance once the process is done (maybe with a context - # manager) assert not Path("env.yaml").is_file() + assert not Path("env.backup.yaml").is_file() @pytest.mark.parametrize( @@ -252,6 +250,11 @@ def git_hash(*args, **kwargs): {"git": "master", "git_hash": "SOMEHASH", "some": {"nested": "value"}}, ], ], + ids=[ + "no-user-env", + "user-env-with-var", + "user-env", + ], ) def test_prepare_env_file( tmp_fast_pipeline, monkeypatch, env_var, env_user, env_expected @@ -270,6 +273,13 @@ def git_hash(*args, **kwargs): monkeypatch.setattr(repo, "git_hash", git_hash) git_init() - prepare_env_file("pipeline.yaml") + # env is modified + with prepare_env_file("pipeline.yaml"): + assert yaml.safe_load(Path(file_to_use).read_text()) == env_expected - assert yaml.safe_load(Path(file_to_use).read_text()) == env_expected + # but then restored + if env_user: + assert yaml.safe_load(Path(file_to_use).read_text()) == env_user + # or delete if the user did not provide one + else: + assert not Path(file_to_use).is_file() From 48544a675dfe4b737f12bca0dbfea72581a98629 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Wed, 28 Dec 2022 21:45:50 -0600 Subject: [PATCH 6/6] fix --- tests/test_commons.py | 927 ++++++++++++++++++++++-------------------- 1 file changed, 478 insertions(+), 449 deletions(-) diff --git a/tests/test_commons.py b/tests/test_commons.py index 99d071e..a4b6e7c 100644 --- a/tests/test_commons.py +++ b/tests/test_commons.py @@ -20,10 +20,9 @@ class ConcreteDockerConfig(AbstractDockerConfig): - @classmethod def get_backend_value(self): - return 'backend-value' + return "backend-value" @pytest.fixture @@ -34,380 +33,393 @@ def cmdr(): def git_init(): # to prevent overwriting the repo's settings - if 'soopervisor' in str(Path('.').resolve()): - raise ValueError('This doesnt look like a tmp directory. ' - 'Did you forget the tmp_empty fixture?') + if "soopervisor" in str(Path(".").resolve()): + raise ValueError( + "This doesnt look like a tmp directory. " + "Did you forget the tmp_empty fixture?" + ) - subprocess.check_call(['git', 'init']) - subprocess.check_call(['git', 'config', 'commit.gpgsign', 'false']) - subprocess.check_call(['git', 'config', 'user.email', 'ci@ploomberio']) - subprocess.check_call(['git', 'config', 'user.name', 'Ploomber']) - subprocess.check_call(['git', 'add', '--all']) - subprocess.check_call(['git', 'commit', '-m', 'commit']) + subprocess.check_call(["git", "init"]) + subprocess.check_call(["git", "config", "commit.gpgsign", "false"]) + subprocess.check_call(["git", "config", "user.email", "ci@ploomberio"]) + subprocess.check_call(["git", "config", "user.name", "Ploomber"]) + subprocess.check_call(["git", "add", "--all"]) + subprocess.check_call(["git", "commit", "-m", "commit"]) def git_commit(): - subprocess.check_call(['git', 'add', '--all']) - subprocess.check_call(['git', 'commit', '-m', 'commit']) + subprocess.check_call(["git", "add", "--all"]) + subprocess.check_call(["git", "commit", "-m", "commit"]) def test_glob_all_excludes_directories(tmp_empty): - Path('dir').mkdir() - Path('dir', 'a').touch() + Path("dir").mkdir() + Path("dir", "a").touch() - assert set(Path(p) for p in source.glob_all('.')) == {Path('dir', 'a')} + assert set(Path(p) for p in source.glob_all(".")) == {Path("dir", "a")} def test_global_all_excludes_from_arg(tmp_empty): - Path('dir').mkdir() - Path('dir', 'a').touch() - Path('excluded').mkdir() - Path('excluded', 'should-not-appear').touch() + Path("dir").mkdir() + Path("dir", "a").touch() + Path("excluded").mkdir() + Path("excluded", "should-not-appear").touch() - assert set(Path(p) for p in source.glob_all('.', exclude='excluded')) == { - Path('dir', 'a') + assert set(Path(p) for p in source.glob_all(".", exclude="excluded")) == { + Path("dir", "a") } def test_copy(cmdr, tmp_empty): - Path('file').touch() - Path('dir').mkdir() - Path('dir', 'another').touch() + Path("file").touch() + Path("dir").mkdir() + Path("dir", "another").touch() git_init() - source.copy(cmdr, '.', 'dist') + source.copy(cmdr, ".", "dist") - expected = set(Path(p) for p in ( - 'dist/file', - 'dist/dir/another', - )) + expected = set( + Path(p) + for p in ( + "dist/file", + "dist/dir/another", + ) + ) - assert set(Path(p) for p in source.glob_all('dist')) == expected + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_copy_with_rename(cmdr, tmp_empty): - Path('file').touch() - Path('dir').mkdir() - Path('dir', 'another').touch() - rename_files = {'file': 'file_another'} + Path("file").touch() + Path("dir").mkdir() + Path("dir", "another").touch() + rename_files = {"file": "file_another"} git_init() - source.copy(cmdr, '.', 'dist', rename_files=rename_files) + source.copy(cmdr, ".", "dist", rename_files=rename_files) expected = set( - Path(p) for p in ( - 'dist/file_another', - 'dist/dir/another', - )) + Path(p) + for p in ( + "dist/file_another", + "dist/dir/another", + ) + ) - assert set(Path(p) for p in source.glob_all('dist')) == expected + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_copy_with_gitignore(cmdr, tmp_empty): - Path('file').touch() - Path('ignoreme').touch() + Path("file").touch() + Path("ignoreme").touch() - Path('.gitignore').write_text('ignoreme') + Path(".gitignore").write_text("ignoreme") git_init() - source.copy(cmdr, '.', 'dist') + source.copy(cmdr, ".", "dist") - expected = set({Path('dist/file')}) - assert set(Path(p) for p in source.glob_all('dist')) == expected + expected = set({Path("dist/file")}) + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_error_if_exclude_and_include_overlap(cmdr, tmp_empty): with pytest.raises(ClickException) as excinfo: - source.copy(cmdr, '.', 'dist', exclude=['file'], include=['file']) + source.copy(cmdr, ".", "dist", exclude=["file"], include=["file"]) - expected = ("include and exclude must " - "not have overlapping elements: {'file'}") + expected = "include and exclude must " "not have overlapping elements: {'file'}" assert expected == str(excinfo.value) def test_override_git_with_exclude(cmdr, tmp_empty): - Path('file').touch() - Path('secrets.txt').touch() + Path("file").touch() + Path("secrets.txt").touch() # let git track everything - Path('.gitignore').touch() + Path(".gitignore").touch() git_init() # exclude some file - source.copy(cmdr, '.', 'dist', exclude=['file']) + source.copy(cmdr, ".", "dist", exclude=["file"]) - expected = set({Path('dist/secrets.txt')}) - assert set(Path(p) for p in source.glob_all('dist')) == expected + expected = set({Path("dist/secrets.txt")}) + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_copy_override_gitignore_with_include(cmdr, tmp_empty): - Path('file').touch() - Path('secrets.txt').touch() + Path("file").touch() + Path("secrets.txt").touch() - Path('.gitignore').write_text('secrets.txt') + Path(".gitignore").write_text("secrets.txt") git_init() - source.copy(cmdr, '.', 'dist', include=['secrets.txt']) + source.copy(cmdr, ".", "dist", include=["secrets.txt"]) - expected = set(Path(p) for p in ( - 'dist/file', - 'dist/secrets.txt', - )) + expected = set( + Path(p) + for p in ( + "dist/file", + "dist/secrets.txt", + ) + ) - assert set(Path(p) for p in source.glob_all('dist')) == expected + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_copy_override_gitignore_with_include_entire_folder(cmdr, tmp_empty): - Path('file').touch() - Path('dir').mkdir() - Path('dir', 'secrets.txt').touch() - Path('dir', 'more-secrets.txt').touch() + Path("file").touch() + Path("dir").mkdir() + Path("dir", "secrets.txt").touch() + Path("dir", "more-secrets.txt").touch() - Path('.gitignore').write_text('dir') + Path(".gitignore").write_text("dir") git_init() - source.copy(cmdr, '.', 'dist', include=['dir']) + source.copy(cmdr, ".", "dist", include=["dir"]) expected = set( - Path(p) for p in ( - 'dist/file', - 'dist/dir/secrets.txt', - 'dist/dir/more-secrets.txt', - )) + Path(p) + for p in ( + "dist/file", + "dist/dir/secrets.txt", + "dist/dir/more-secrets.txt", + ) + ) - assert set(Path(p) for p in source.glob_all('dist')) == expected + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_no_git_but_exclude(cmdr, tmp_empty): - Path('file').touch() - Path('secrets.txt').touch() + Path("file").touch() + Path("secrets.txt").touch() - source.copy(cmdr, '.', 'dist', exclude=['secrets.txt']) + source.copy(cmdr, ".", "dist", exclude=["secrets.txt"]) - expected = set(Path(p) for p in ('dist/file', )) + expected = set(Path(p) for p in ("dist/file",)) - assert set(Path(p) for p in source.glob_all('dist')) == expected + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_no_git_but_exclude_entire_folder(cmdr, tmp_empty): - Path('file').touch() - Path('dir').mkdir() - Path('dir', 'secrets.txt').touch() - Path('dir', 'more-secrets.txt').touch() + Path("file").touch() + Path("dir").mkdir() + Path("dir", "secrets.txt").touch() + Path("dir", "more-secrets.txt").touch() - source.copy(cmdr, '.', 'dist', exclude=['dir']) + source.copy(cmdr, ".", "dist", exclude=["dir"]) - expected = set(Path(p) for p in ('dist/file', )) - assert set(Path(p) for p in source.glob_all('dist')) == expected + expected = set(Path(p) for p in ("dist/file",)) + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_ignores_pycache(cmdr, tmp_empty): - Path('file').touch() - dir_ = Path('__pycache__') + Path("file").touch() + dir_ = Path("__pycache__") dir_.mkdir() - (dir_ / 'file').touch() - (dir_ / 'another').touch() - dir_another = Path('subdir', '__pycache__') + (dir_ / "file").touch() + (dir_ / "another").touch() + dir_another = Path("subdir", "__pycache__") dir_another.mkdir(parents=True) - (dir_another / 'file').touch() - (dir_another / 'another').touch() + (dir_another / "file").touch() + (dir_another / "another").touch() - source.copy(cmdr, '.', 'dist') + source.copy(cmdr, ".", "dist") - expected = set(Path(p) for p in ('dist/file', )) - assert set(Path(p) for p in source.glob_all('dist')) == expected + expected = set(Path(p) for p in ("dist/file",)) + assert set(Path(p) for p in source.glob_all("dist")) == expected def test_git_tracked_files(tmp_empty): - Path('file').touch() - Path('dir').mkdir() - Path('dir', 'another').touch() + Path("file").touch() + Path("dir").mkdir() + Path("dir", "another").touch() git_init() - assert {'dir/another', 'file'} == set(source.git_tracked_files()[0]) + assert {"dir/another", "file"} == set(source.git_tracked_files()[0]) def test_warns_if_fails_to_get_git_tracked_files(tmp_empty, capsys): - Path('file').touch() - Path('secrets.txt').touch() + Path("file").touch() + Path("secrets.txt").touch() with Commander() as cmdr: - source.copy(cmdr, '.', 'dist') + source.copy(cmdr, ".", "dist") captured = capsys.readouterr() - assert 'Unable to get git tracked files' in captured.out + assert "Unable to get git tracked files" in captured.out def test_warns_on_dirty_git(tmp_empty, capsys): - Path('file').touch() - Path('secrets.txt').touch() + Path("file").touch() + Path("secrets.txt").touch() - Path('.gitignore').write_text('secrets.txt') + Path(".gitignore").write_text("secrets.txt") git_init() - Path('new-file').touch() + Path("new-file").touch() with Commander() as cmdr: - source.copy(cmdr, '.', 'dist') + source.copy(cmdr, ".", "dist") captured = capsys.readouterr() - assert 'Your git repository contains uncommitted' in captured.out + assert "Your git repository contains uncommitted" in captured.out def test_errors_if_no_tracked_files(tmp_empty): - Path('file').touch() + Path("file").touch() git_init() - dir_ = Path('dir') + dir_ = Path("dir") dir_.mkdir() os.chdir(dir_) - Path('another').touch() + Path("another").touch() with pytest.raises(ClickException) as excinfo: with Commander() as cmdr: - source.copy(cmdr, '.', 'dist') + source.copy(cmdr, ".", "dist") - expected = ('Running inside a git repository, but no files in ' - 'the current working directory are tracked by git. Commit the ' - 'files to include them in the Docker image or pass the ' - '--ignore-git flag to soopervisor export') + expected = ( + "Running inside a git repository, but no files in " + "the current working directory are tracked by git. Commit the " + "files to include them in the Docker image or pass the " + "--ignore-git flag to soopervisor export" + ) assert str(excinfo.value) == expected def test_copy_ignore_git(tmp_empty): - Path('file').touch() + Path("file").touch() git_init() - dir_ = Path('dir') + dir_ = Path("dir") dir_.mkdir() os.chdir(dir_) - Path('another').touch() + Path("another").touch() with Commander() as cmdr: - source.copy(cmdr, '.', 'dist', ignore_git=True) + source.copy(cmdr, ".", "dist", ignore_git=True) - assert Path('dist', 'another').is_file() + assert Path("dist", "another").is_file() def test_copy_warn_if_file_too_big(cmdr, tmp_empty, monkeypatch, capsys): # mock files to be 11MB - monkeypatch.setattr(source.os.path, 'getsize', - Mock(return_value=1024 * 1024 * 11.1243214124)) + monkeypatch.setattr( + source.os.path, "getsize", Mock(return_value=1024 * 1024 * 11.1243214124) + ) - Path('file').touch() - Path('dir').mkdir() - Path('dir', 'another').touch() - Path('dir', 'others').touch() + Path("file").touch() + Path("dir").mkdir() + Path("dir", "another").touch() + Path("dir", "others").touch() git_init() - source.copy(cmdr, '.', 'dist') + source.copy(cmdr, ".", "dist") expected = set( - Path(p) for p in ( - 'dist/file', - 'dist/dir/another', - 'dist/dir/others', - )) + Path(p) + for p in ( + "dist/file", + "dist/dir/another", + "dist/dir/others", + ) + ) captured = capsys.readouterr() - assert set(Path(p) for p in source.glob_all('dist')) == expected - assert 'The following files are too big. ' in captured.out - assert 'file' in captured.out + assert set(Path(p) for p in source.glob_all("dist")) == expected + assert "The following files are too big. " in captured.out + assert "file" in captured.out def test_compress_dir(tmp_empty): - dir = Path('dist', 'project-name') + dir = Path("dist", "project-name") dir.mkdir(parents=True) - (dir / 'file').touch() + (dir / "file").touch() with Commander() as cmdr: - source.compress_dir(cmdr, 'dist/project-name', - 'dist/project-name.tar.gz') + source.compress_dir(cmdr, "dist/project-name", "dist/project-name.tar.gz") - with tarfile.open('dist/project-name.tar.gz', 'r:gz') as tar: - tar.extractall('.') + with tarfile.open("dist/project-name.tar.gz", "r:gz") as tar: + tar.extractall(".") - expected = {Path('project-name/file')} - assert set(Path(p) for p in source.glob_all('project-name')) == expected + expected = {Path("project-name/file")} + assert set(Path(p) for p in source.glob_all("project-name")) == expected def test_compress_warns_if_output_too_big(tmp_empty, monkeypatch, capsys): # mock a file of 6MB - monkeypatch.setattr(source.os.path, 'getsize', - Mock(return_value=1024 * 1024 * 6)) + monkeypatch.setattr(source.os.path, "getsize", Mock(return_value=1024 * 1024 * 6)) - dir = Path('dist', 'project-name') + dir = Path("dist", "project-name") dir.mkdir(parents=True) - (dir / 'file').touch() + (dir / "file").touch() with Commander() as cmdr: - source.compress_dir(cmdr, 'dist/project-name', - 'dist/project-name.tar.gz') + source.compress_dir(cmdr, "dist/project-name", "dist/project-name.tar.gz") captured = capsys.readouterr() - expected = ("The project's source code 'dist/project-name.tar.gz' " - "is larger than 5MB") + expected = ( + "The project's source code 'dist/project-name.tar.gz' " "is larger than 5MB" + ) assert expected in captured.out -@pytest.mark.parametrize('env_yaml, expected', [ - [{ - 'dependencies': ['a', 'b', { - 'pip': ['c', 'd'] - }] - }, ['c', 'd']], - [{ - 'dependencies': [{ - 'pip': ['y', 'z'] - }, 'a', 'b'] - }, ['y', 'z']], -]) +@pytest.mark.parametrize( + "env_yaml, expected", + [ + [{"dependencies": ["a", "b", {"pip": ["c", "d"]}]}, ["c", "d"]], + [{"dependencies": [{"pip": ["y", "z"]}, "a", "b"]}, ["y", "z"]], + ], +) def test_extract_pip_from_env_yaml(tmp_empty, env_yaml, expected): - Path('environment.yml').write_text(yaml.safe_dump(env_yaml)) - assert conda.extract_pip_from_env_yaml('environment.yml') == expected + Path("environment.yml").write_text(yaml.safe_dump(env_yaml)) + assert conda.extract_pip_from_env_yaml("environment.yml") == expected def test_error_extract_pip_missing_dependencies_section(): - Path('environment.yml').write_text(yaml.safe_dump({})) + Path("environment.yml").write_text(yaml.safe_dump({})) with pytest.raises(ClickException) as excinfo: - conda.extract_pip_from_env_yaml('environment.yml') + conda.extract_pip_from_env_yaml("environment.yml") - msg = ('Cannot extract pip dependencies from environment.lock.yml: ' - 'missing dependencies section') + msg = ( + "Cannot extract pip dependencies from environment.lock.yml: " + "missing dependencies section" + ) assert msg == str(excinfo.value) def test_error_extract_pip_missing_pip_dict(): - Path('environment.yml').write_text( - yaml.safe_dump({'dependencies': ['a', 'b']})) + Path("environment.yml").write_text(yaml.safe_dump({"dependencies": ["a", "b"]})) with pytest.raises(ClickException) as excinfo: - conda.extract_pip_from_env_yaml('environment.yml') + conda.extract_pip_from_env_yaml("environment.yml") - msg = ('Cannot extract pip dependencies from environment.lock.yml: ' - 'missing dependencies.pip section') + msg = ( + "Cannot extract pip dependencies from environment.lock.yml: " + "missing dependencies.pip section" + ) assert msg == str(excinfo.value) def test_error_extract_pip_unexpected_pip_list(): - Path('environment.yml').write_text( - yaml.safe_dump({'dependencies': ['a', 'b', { - 'pip': 1 - }]})) + Path("environment.yml").write_text( + yaml.safe_dump({"dependencies": ["a", "b", {"pip": 1}]}) + ) with pytest.raises(ClickException) as excinfo: - conda.extract_pip_from_env_yaml('environment.yml') + conda.extract_pip_from_env_yaml("environment.yml") - msg = ('Cannot extract pip dependencies from environment.lock.yml: ' - 'unexpected dependencies.pip value. Expected a list of ' - 'dependencies, got: 1') + msg = ( + "Cannot extract pip dependencies from environment.lock.yml: " + "unexpected dependencies.pip value. Expected a list of " + "dependencies, got: 1" + ) assert msg == str(excinfo.value) @@ -418,50 +430,62 @@ def dag_build(): dag.render().build() -@pytest.mark.parametrize('mode, tasks_expected, args_expected', [ - ['incremental', {}, ['--entry-point', 'pipeline.yaml']], +@pytest.mark.parametrize( + "mode, tasks_expected, args_expected", [ - 'regular', { - 'root': [], - 'another': ['root'] - }, ['--entry-point', 'pipeline.yaml'] - ], - [ - 'force', { - 'root': [], - 'another': ['root'] - }, ['--entry-point', 'pipeline.yaml', '--force'] + ["incremental", {}, ["--entry-point", "pipeline.yaml"]], + [ + "regular", + {"root": [], "another": ["root"]}, + ["--entry-point", "pipeline.yaml"], + ], + [ + "force", + {"root": [], "another": ["root"]}, + ["--entry-point", "pipeline.yaml", "--force"], + ], ], -]) -def test_load_tasks(cmdr, tmp_fast_pipeline, add_current_to_sys_path, - dag_build, mode, tasks_expected, args_expected): +) +def test_load_tasks( + cmdr, + tmp_fast_pipeline, + add_current_to_sys_path, + dag_build, + mode, + tasks_expected, + args_expected, +): tasks, args = commons.load_tasks(cmdr=cmdr, mode=mode) assert tasks == tasks_expected assert args == args_expected -@pytest.mark.parametrize('mode, tasks_expected, args_expected', [ - ['incremental', { - 'another': [] - }, ['--entry-point', 'pipeline.yaml']], - [ - 'regular', { - 'root': [], - 'another': ['root'] - }, ['--entry-point', 'pipeline.yaml'] - ], +@pytest.mark.parametrize( + "mode, tasks_expected, args_expected", [ - 'force', { - 'root': [], - 'another': ['root'] - }, ['--entry-point', 'pipeline.yaml', '--force'] + ["incremental", {"another": []}, ["--entry-point", "pipeline.yaml"]], + [ + "regular", + {"root": [], "another": ["root"]}, + ["--entry-point", "pipeline.yaml"], + ], + [ + "force", + {"root": [], "another": ["root"]}, + ["--entry-point", "pipeline.yaml", "--force"], + ], ], -]) -def test_load_tasks_missing_remote_metadata(cmdr, tmp_fast_pipeline, - add_current_to_sys_path, dag_build, - mode, tasks_expected, - args_expected): - Path('remote', 'out', 'another').unlink() +) +def test_load_tasks_missing_remote_metadata( + cmdr, + tmp_fast_pipeline, + add_current_to_sys_path, + dag_build, + mode, + tasks_expected, + args_expected, +): + Path("remote", "out", "another").unlink() tasks, args = commons.load_tasks(cmdr=cmdr, mode=mode) assert tasks == tasks_expected assert args == args_expected @@ -469,30 +493,29 @@ def test_load_tasks_missing_remote_metadata(cmdr, tmp_fast_pipeline, def test_invalid_mode(cmdr, tmp_fast_pipeline): with pytest.raises(ValueError) as excinfo: - commons.load_tasks(cmdr=cmdr, mode='unknown') + commons.load_tasks(cmdr=cmdr, mode="unknown") assert "'mode' must be one of" in str(excinfo.value) def test_loads_pipeline_with_name(cmdr, tmp_fast_pipeline): - os.rename('pipeline.yaml', 'pipeline.train.yaml') + os.rename("pipeline.yaml", "pipeline.train.yaml") # we need this to set our project root - Path('pipeline.yaml').touch() + Path("pipeline.yaml").touch() - _, args = commons.load_tasks(cmdr, name='train') - assert args == ['--entry-point', 'pipeline.train.yaml'] + _, args = commons.load_tasks(cmdr, name="train") + assert args == ["--entry-point", "pipeline.train.yaml"] def test_loads_pipeline_in_package_with_name(cmdr, backup_packaged_project): - os.rename(Path('src', 'my_project', 'pipeline.yaml'), - Path('src', 'my_project', 'pipeline.train.yaml')) - _, args = commons.load_tasks(cmdr, name='train') + os.rename( + Path("src", "my_project", "pipeline.yaml"), + Path("src", "my_project", "pipeline.train.yaml"), + ) + _, args = commons.load_tasks(cmdr, name="train") - assert args == [ - '--entry-point', - str(Path('src/my_project/pipeline.train.yaml')) - ] + assert args == ["--entry-point", str(Path("src/my_project/pipeline.train.yaml"))] def test_check_lock_files_exist(tmp_empty): @@ -500,128 +523,126 @@ def test_check_lock_files_exist(tmp_empty): with pytest.raises(ClickException) as excinfo: dependencies.check_lock_files_exist() - expected = ('Expected requirements.lock.txt or environment.lock.yml at ' - 'the root directory') + expected = ( + "Expected requirements.lock.txt or environment.lock.yml at " + "the root directory" + ) assert expected in str(excinfo.value) def test_check_lock_files_exist_multiple_dependency(tmp_empty): - Path('requirements.txt').touch() - Path('requirements.lock.txt').touch() - Path('requirements.fit-__.txt').touch() + Path("requirements.txt").touch() + Path("requirements.lock.txt").touch() + Path("requirements.fit-__.txt").touch() with pytest.raises(ClickException) as excinfo: dependencies.check_lock_files_exist() - expected = ('Expected requirements..lock.txt file for \ - each requirements..txt file ') + expected = "Expected requirements..lock.txt file for \ + each requirements..txt file " assert expected in str(excinfo.value) def test_error_if_missing_dockerfile(tmp_empty): with pytest.raises(MissingDockerfileError) as excinfo: - commons.docker.build(e=Mock(), - cfg=Mock(), - env_name='some_name', - until=Mock(), - entry_point=Mock()) + commons.docker.build( + e=Mock(), cfg=Mock(), env_name="some_name", until=Mock(), entry_point=Mock() + ) - assert excinfo.value.env_name == 'some_name' + assert excinfo.value.env_name == "some_name" def _list_files(path): - """Return files in a .tar.gz file, ignoring hidden files - """ + """Return files in a .tar.gz file, ignoring hidden files""" with tarfile.open(path) as tar: - return set(f for f in tar.getnames() - if not Path(f).name.startswith('.')) + return set(f for f in tar.getnames() if not Path(f).name.startswith(".")) -@pytest.mark.xfail(reason='current implementation overwrites files') +@pytest.mark.xfail(reason="current implementation overwrites files") def test_cp_ploomber_home(tmp_empty, monkeypatch): - monkeypatch.setattr(commons.docker.telemetry, 'get_home_dir', lambda: '.') + monkeypatch.setattr(commons.docker.telemetry, "get_home_dir", lambda: ".") - Path('stats').mkdir() - Path('stats', 'another').touch() - Path('dist').mkdir() - Path('file').touch() - path = Path('dist', 'some-package.tar.gz') + Path("stats").mkdir() + Path("stats", "another").touch() + Path("dist").mkdir() + Path("file").touch() + path = Path("dist", "some-package.tar.gz") - with tarfile.open(path, 'w:gz') as tar: - tar.add('file') + with tarfile.open(path, "w:gz") as tar: + tar.add("file") before = _list_files(path) - commons.docker.cp_ploomber_home('some-package') + commons.docker.cp_ploomber_home("some-package") after = _list_files(path) - assert before == {'file'} - assert after == {'ploomber/stats', 'ploomber/stats/another', 'file'} + assert before == {"file"} + assert after == {"ploomber/stats", "ploomber/stats/another", "file"} def test_get_dependencies(tmp_empty): - Path('requirements.txt').touch() - Path('requirements.lock.txt').touch() - Path('requirements.clean-__.txt').touch() - Path('requirements.clean-__.lock.txt').touch() - Path('requirements.load-__.txt').touch() - Path('requirements.load-__.lock.txt').touch() + Path("requirements.txt").touch() + Path("requirements.lock.txt").touch() + Path("requirements.clean-__.txt").touch() + Path("requirements.clean-__.lock.txt").touch() + Path("requirements.load-__.txt").touch() + Path("requirements.load-__.lock.txt").touch() dependency_files, lock_paths = commons.docker.get_dependencies() expected_dependency_files = { - 'load-*': { - 'dependency': 'requirements.load-__.txt', - 'lock': 'requirements.load-__.lock.txt' + "load-*": { + "dependency": "requirements.load-__.txt", + "lock": "requirements.load-__.lock.txt", }, - 'default': { - 'dependency': 'requirements.txt', - 'lock': 'requirements.lock.txt' + "default": {"dependency": "requirements.txt", "lock": "requirements.lock.txt"}, + "clean-*": { + "lock": "requirements.clean-__.lock.txt", + "dependency": "requirements.clean-__.txt", }, - 'clean-*': { - 'lock': 'requirements.clean-__.lock.txt', - 'dependency': 'requirements.clean-__.txt' - } } expected_lock_paths = { - 'load-*': 'requirements.load-__.lock.txt', - 'default': 'requirements.lock.txt', - 'clean-*': 'requirements.clean-__.lock.txt' + "load-*": "requirements.load-__.lock.txt", + "default": "requirements.lock.txt", + "clean-*": "requirements.clean-__.lock.txt", } assert dependency_files == expected_dependency_files assert lock_paths == expected_lock_paths def test_docker_build(tmp_sample_project): - Path('some-env').mkdir() - Path('some-env', 'Dockerfile').touch() + Path("some-env").mkdir() + Path("some-env", "Dockerfile").touch() - with CustomCommander(workspace='some-env') as cmdr: - commons.docker.build(cmdr, - ConcreteDockerConfig(), - 'some-env', - until=None, - entry_point='pipeline.yaml') + with CustomCommander(workspace="some-env") as cmdr: + commons.docker.build( + cmdr, + ConcreteDockerConfig(), + "some-env", + until=None, + entry_point="pipeline.yaml", + ) - existing = _list_files(Path('dist', 'sample_project.tar.gz')) + existing = _list_files(Path("dist", "sample_project.tar.gz")) expected = { - 'sample_project/env.serve.yaml', - 'sample_project', - 'sample_project/some-env/Dockerfile', - 'sample_project/clean.py', - 'sample_project/plot.py', - 'sample_project/environment.yml', - 'sample_project/env.yaml', - 'sample_project/README.md', - 'sample_project/environment.lock.yml', - 'sample_project/some-env', - 'sample_project/some-env/environment.lock.yml', - 'sample_project/raw.py', - 'sample_project/pipeline.yaml', - 'sample_project/lib/__init__.py', - 'sample_project/lib', - 'sample_project/lib/package_a.py', + "sample_project/env.backup.yaml", + "sample_project/env.serve.yaml", + "sample_project", + "sample_project/some-env/Dockerfile", + "sample_project/clean.py", + "sample_project/plot.py", + "sample_project/environment.yml", + "sample_project/env.yaml", + "sample_project/README.md", + "sample_project/environment.lock.yml", + "sample_project/some-env", + "sample_project/some-env/environment.lock.yml", + "sample_project/raw.py", + "sample_project/pipeline.yaml", + "sample_project/lib/__init__.py", + "sample_project/lib", + "sample_project/lib/package_a.py", } assert existing == expected @@ -629,197 +650,208 @@ def test_docker_build(tmp_sample_project): # check tag name cmd = cmdr.docker_cmds[0][0] name, tag = cmd[1], cmd[-1] - assert name == 'build' and tag == 'sample_project:latest' + assert name == "build" and tag == "sample_project:latest" -@pytest.mark.parametrize('repo, expected', [ - ['docker.company.com/something', 'docker.company.com/something:latest'], - ['docker.company.com/something:v2', 'docker.company.com/something:v2'], -]) +@pytest.mark.parametrize( + "repo, expected", + [ + ["docker.company.com/something", "docker.company.com/something:latest"], + ["docker.company.com/something:v2", "docker.company.com/something:v2"], + ], +) def test_docker_build_with_repository(tmp_sample_project, repo, expected): - Path('some-env').mkdir() - Path('some-env', 'Dockerfile').touch() + Path("some-env").mkdir() + Path("some-env", "Dockerfile").touch() cfg = ConcreteDockerConfig(repository=repo) - with CustomCommander(workspace='some-env') as cmdr: - commons.docker.build(cmdr, - cfg, - 'some-env', - until=None, - entry_point='pipeline.yaml') + with CustomCommander(workspace="some-env") as cmdr: + commons.docker.build( + cmdr, cfg, "some-env", until=None, entry_point="pipeline.yaml" + ) # check tag name cmd = cmdr.docker_cmds[0][0] assert cmd == ( - 'docker', - 'build', - '.', - '--tag', - 'sample_project:latest', + "docker", + "build", + ".", + "--tag", + "sample_project:latest", ) # check tag command cmd = cmdr.docker_cmds[-2][0] assert cmd == ( - 'docker', - 'tag', - 'sample_project:latest', + "docker", + "tag", + "sample_project:latest", expected, ) -def test_docker_build_multiple_requirement( - tmp_sample_project_multiple_requirement): - Path('some-env').mkdir() - Path('some-env', 'Dockerfile').touch() +def test_docker_build_multiple_requirement(tmp_sample_project_multiple_requirement): + Path("some-env").mkdir() + Path("some-env", "Dockerfile").touch() - with CustomCommander(workspace='some-env') as cmdr: - pkg_name, image_map = \ - commons.docker.build(cmdr, - ConcreteDockerConfig(), - 'some-env', - until=None, - entry_point='pipeline.yaml') + with CustomCommander(workspace="some-env") as cmdr: + pkg_name, image_map = commons.docker.build( + cmdr, + ConcreteDockerConfig(), + "some-env", + until=None, + entry_point="pipeline.yaml", + ) # validate docker is called with the right arguments docker_args = [args[0] for args in cmdr.docker_cmds] def generate_commands(suffix): - image_name = f'multiple_requirements_project{suffix}:latest' + image_name = f"multiple_requirements_project{suffix}:latest" return [ # build image ( - 'docker', - 'build', - '.', - '--tag', + "docker", + "build", + ".", + "--tag", image_name, ), # check pipeline load ( - 'docker', - 'run', + "docker", + "run", image_name, - 'ploomber', - 'status', - '--entry-point', - 'pipeline.yaml', + "ploomber", + "status", + "--entry-point", + "pipeline.yaml", ), # check pipeline has a client ( - 'docker', - 'run', + "docker", + "run", image_name, - 'python', - '-c', - ('from ploomber.spec import DAGSpec; ' - 'print("File" in DAGSpec("pipeline.yaml").to_dag().clients)'), - ) + "python", + "-c", + ( + "from ploomber.spec import DAGSpec; " + 'print("File" in DAGSpec("pipeline.yaml").to_dag().clients)' + ), + ), ] - assert generate_commands('-clean-ploomber') == docker_args[:3] - assert generate_commands('') == docker_args[3:6] - assert generate_commands('-plot-ploomber') == docker_args[6:] + assert generate_commands("-clean-ploomber") == docker_args[:3] + assert generate_commands("") == docker_args[3:6] + assert generate_commands("-plot-ploomber") == docker_args[6:] - assert pkg_name == 'multiple_requirements_project' - assert image_map == \ - {'default': 'multiple_requirements_project:latest', - 'clean-*': 'multiple_requirements_project-clean-ploomber:latest', - 'plot-*': 'multiple_requirements_project-plot-ploomber:latest'} + assert pkg_name == "multiple_requirements_project" + assert image_map == { + "default": "multiple_requirements_project:latest", + "clean-*": "multiple_requirements_project-clean-ploomber:latest", + "plot-*": "multiple_requirements_project-plot-ploomber:latest", + } - existing = _list_files(Path('dist', - 'multiple_requirements_project.tar.gz')) + existing = _list_files(Path("dist", "multiple_requirements_project.tar.gz")) expected = { - 'multiple_requirements_project/env.serve.yaml', - 'multiple_requirements_project', - 'multiple_requirements_project/some-env/Dockerfile', - 'multiple_requirements_project/clean_one.py', - 'multiple_requirements_project/clean_two.py', - 'multiple_requirements_project/plot.py', - 'multiple_requirements_project/env.yaml', - 'multiple_requirements_project/README.md', - 'multiple_requirements_project/some-env', - 'multiple_requirements_project/raw.py', - 'multiple_requirements_project/pipeline.yaml', - 'multiple_requirements_project/requirements.txt', - 'multiple_requirements_project/requirements.lock.txt', - 'multiple_requirements_project/requirements.clean-__.txt', - 'multiple_requirements_project/requirements.plot-__.txt', - 'multiple_requirements_project/some-env/requirements.lock.txt', - 'multiple_requirements_project/some-env/requirements.clean-__.' - 'lock.txt', - 'multiple_requirements_project/some-env/requirements.plot-__.lock.txt' + "multiple_requirements_project/env.backup.yaml", + "multiple_requirements_project/env.serve.yaml", + "multiple_requirements_project", + "multiple_requirements_project/some-env/Dockerfile", + "multiple_requirements_project/clean_one.py", + "multiple_requirements_project/clean_two.py", + "multiple_requirements_project/plot.py", + "multiple_requirements_project/env.yaml", + "multiple_requirements_project/README.md", + "multiple_requirements_project/some-env", + "multiple_requirements_project/raw.py", + "multiple_requirements_project/pipeline.yaml", + "multiple_requirements_project/requirements.txt", + "multiple_requirements_project/requirements.lock.txt", + "multiple_requirements_project/requirements.clean-__.txt", + "multiple_requirements_project/requirements.plot-__.txt", + "multiple_requirements_project/some-env/requirements.lock.txt", + "multiple_requirements_project/some-env/requirements.clean-__." "lock.txt", + "multiple_requirements_project/some-env/requirements.plot-__.lock.txt", } assert existing == expected def test_docker_build_multiple_requirement_with_setup( - tmp_sample_project_multiple_requirement): - Path('some-env').mkdir() - Path('some-env', 'Dockerfile').touch() - Path('setup.py').touch() + tmp_sample_project_multiple_requirement, +): + Path("some-env").mkdir() + Path("some-env", "Dockerfile").touch() + Path("setup.py").touch() with pytest.raises(NotImplementedError) as excinfo: - commons.docker.build(CustomCommander(workspace='some-env'), - ConcreteDockerConfig(), - 'some-env', - until=None, - entry_point='pipeline.yaml') - - expected = ('Multiple requirements.*.lock.txt or environment.*.lock.yml ' - 'files found along with setup.py file.') + commons.docker.build( + CustomCommander(workspace="some-env"), + ConcreteDockerConfig(), + "some-env", + until=None, + entry_point="pipeline.yaml", + ) + + expected = ( + "Multiple requirements.*.lock.txt or environment.*.lock.yml " + "files found along with setup.py file." + ) assert expected in str(excinfo.value) def test_docker_build_big_file_warns(tmp_sample_project, monkeypatch, capsys): - monkeypatch.setattr(source.os.path, 'getsize', - Mock(return_value=1024 * 1024 * 11)) + monkeypatch.setattr(source.os.path, "getsize", Mock(return_value=1024 * 1024 * 11)) - Path('some-env').mkdir() - Path('some-env', 'Dockerfile').touch() + Path("some-env").mkdir() + Path("some-env", "Dockerfile").touch() - with CustomCommander(workspace='some-env') as cmdr: - commons.docker.build(cmdr, - ConcreteDockerConfig(), - 'some-env', - until=None, - entry_point='pipeline.yaml') + with CustomCommander(workspace="some-env") as cmdr: + commons.docker.build( + cmdr, + ConcreteDockerConfig(), + "some-env", + until=None, + entry_point="pipeline.yaml", + ) - existing = _list_files(Path('dist', 'sample_project.tar.gz')) + existing = _list_files(Path("dist", "sample_project.tar.gz")) expected = { - 'sample_project/env.serve.yaml', - 'sample_project', - 'sample_project/some-env/Dockerfile', - 'sample_project/clean.py', - 'sample_project/plot.py', - 'sample_project/environment.yml', - 'sample_project/env.yaml', - 'sample_project/README.md', - 'sample_project/environment.lock.yml', - 'sample_project/some-env', - 'sample_project/some-env/environment.lock.yml', - 'sample_project/raw.py', - 'sample_project/pipeline.yaml', - 'sample_project/lib/__init__.py', - 'sample_project/lib', - 'sample_project/lib/package_a.py', + "sample_project/env.backup.yaml", + "sample_project/env.serve.yaml", + "sample_project", + "sample_project/some-env/Dockerfile", + "sample_project/clean.py", + "sample_project/plot.py", + "sample_project/environment.yml", + "sample_project/env.yaml", + "sample_project/README.md", + "sample_project/environment.lock.yml", + "sample_project/some-env", + "sample_project/some-env/environment.lock.yml", + "sample_project/raw.py", + "sample_project/pipeline.yaml", + "sample_project/lib/__init__.py", + "sample_project/lib", + "sample_project/lib/package_a.py", } captured = capsys.readouterr() assert existing == expected - assert 'The following files are too big. ' in captured.out - assert 'README.md' in captured.out - assert 'raw.py' in captured.out + assert "The following files are too big. " in captured.out + assert "README.md" in captured.out + assert "raw.py" in captured.out def test_lazily_load_dag(tmp_empty): - Path('script.py').write_text(""" + Path("script.py").write_text( + """ import some_unknown_package # %% + tags = ["parameters"] @@ -827,38 +859,35 @@ def test_lazily_load_dag(tmp_empty): # %% 1 + 1 -""") +""" + ) - Path('tasks.py').write_text(""" + Path("tasks.py").write_text( + """ import another_unknown_package def some_task(product): pass -""") +""" + ) - Path('clients.py').write_text(""" + Path("clients.py").write_text( + """ from ploomber.clients import LocalStorageClient def get_client(): return LocalStorageClient(path_to_backup_dir='backup') -""") +""" + ) spec = { - 'tasks': [ - { - 'source': 'script.py', - 'product': 'report.html' - }, - { - 'source': 'tasks.some_task', - 'product': 'out.csv' - }, + "tasks": [ + {"source": "script.py", "product": "report.html"}, + {"source": "tasks.some_task", "product": "out.csv"}, ], - 'clients': { - 'File': 'clients.get_client' - } + "clients": {"File": "clients.get_client"}, } - Path('pipeline.yaml').write_text(yaml.safe_dump(spec)) + Path("pipeline.yaml").write_text(yaml.safe_dump(spec)) - commons.load_dag(cmdr=Mock(), name='name', lazy_import=True) + commons.load_dag(cmdr=Mock(), name="name", lazy_import=True)