diff --git a/em_workflows/dm_conversion/flow.py b/em_workflows/dm_conversion/flow.py index 486e9760..76c43bd8 100644 --- a/em_workflows/dm_conversion/flow.py +++ b/em_workflows/dm_conversion/flow.py @@ -76,8 +76,6 @@ def convert_if_int16_tiff(file_path: FilePath) -> None: shrink_factor = _calculate_shrink_factor(file_path.fp_in) cmd = [ - "env", - "IMOD_OUTPUT_FORMAT=TIF", DMConfig.newstack_loc, "-shrink", f"{shrink_factor:.3f}", @@ -92,7 +90,7 @@ def convert_if_int16_tiff(file_path: FilePath) -> None: str(tif_8_bit), ] utils.log(f"Generated cmd {cmd}") - FilePath.run(cmd, log_fp) + FilePath.run(cmd, log_fp, env={"IMOD_OUTPUT_FORMAT": "TIF"}) @@ -127,8 +125,6 @@ def convert_2d_mrc_to_tiff(file_path: FilePath) -> None: utils.log(f"{file_path.fp_in.as_posix()} is a mrc file, will convert to {out_fp}.") # work out meansd cmd = [ - "env", - "IMOD_OUTPUT_FORMAT=TIF", DMConfig.newstack_loc, "-shrink", shrink_factor_3, @@ -142,7 +138,7 @@ def convert_2d_mrc_to_tiff(file_path: FilePath) -> None: out_fp, ] utils.log(f"Generated cmd {cmd}") - FilePath.run(cmd, log_fp) + FilePath.run(cmd, log_fp, env={"IMOD_OUTPUT_FORMAT": "TIF"}) @task( diff --git a/em_workflows/file_path.py b/em_workflows/file_path.py index dd0e9224..008af51f 100644 --- a/em_workflows/file_path.py +++ b/em_workflows/file_path.py @@ -1,7 +1,7 @@ import datetime import shutil import os -from typing import List, Dict +from typing import List, Dict, Optional, AnyStr from pathlib import Path import tempfile import subprocess @@ -272,28 +272,42 @@ def rm_workdir(self): shutil.rmtree(self.working_dir, ignore_errors=True) @staticmethod - def run(cmd: List[str], log_file: str) -> int: + def run(cmd: List[str], log_file: str, env: Optional[Dict[AnyStr, AnyStr]] = None, *, copy_env: bool = True) -> int: """Runs a Unix command as a subprocess - - Captures stderr & stddout and writes them to the `log_file` input parameter. - - If final returncode is not 0, raises a FAIL signal + - If final returncode is not 0, raises a RuntimeError + + :param cmd: list of strings representing the command to run + :param log_file: path to the log file to write the stdout and stderr to + :param env: dictionary of additional environment variables to pass to the subprocess + :param copy_env: if True, the subprocess inherits the parent's environment + :return: the return code of the subprocess + + """ - log("Trying to run: " + " ".join(cmd)) - try: - sp = subprocess.run(cmd, check=False, capture_output=True) - stdout = sp.stdout.decode("utf-8") - stderr = sp.stderr.decode("utf-8") - with open(log_file, "w+") as _file: - _file.write(f"Trying to run {' '.join(cmd)}") - _file.write(f"STDOUT:{stdout}") - _file.write(f"STDERR:{stderr}") - if sp.returncode != 0: - msg = f"ERROR : {stderr} -- {stdout}" - log(msg) - raise RuntimeError(msg) - else: - msg = f"Command ok : {stderr} -- {stdout}" - log(msg) - except Exception as ex: - raise RuntimeError(str(ex)) - return sp.returncode + + if env is None: + if not copy_env: + env = {} + # Note: if env is not and copy_env is True, the subprocess inherits the parent's environment, + # by passing env=None + elif copy_env: + # merge dictionaries python 3.9+ + env = os.environ | env + + log(f"Running subprocess: {' '.join(cmd)} logfile: {log_file}") + + with (subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env) as p, + open(log_file, 'ab') as file): + file.write(f"Running subprocess: {' '.join(cmd)}\n".encode()) + + # write the outputs line by line as they come in + for line in p.stdout: + file.write(line) + log(line.decode()) + file.flush() + + if p.wait() != 0: + raise RuntimeError(f"Failed to run command: {' '.join(cmd)}") + + return p.returncode diff --git a/test/test_file_path.py b/test/test_file_path.py index 86a30954..60a128a6 100644 --- a/test/test_file_path.py +++ b/test/test_file_path.py @@ -1,22 +1,125 @@ +import os + from em_workflows.file_path import FilePath +from pathlib import Path +import sys def test_gen_output_fp(mock_nfs_mount, tmp_path): - input_dir = tmp_path / "Projects" - input_dir.mkdir() - p = input_dir / "some_file_rec.mrc" - p.write_text("content") - assert str(p).endswith("Projects/some_file_rec.mrc") + input_dir = Path("test/input_files/brt/Projects/2013-1220-dA30_5-BSC-1_10.mrc").absolute() + + file_base_name = input_dir.stem - fp_in = FilePath(share_name="test", input_dir=input_dir, fp_in=p) + fp_in = FilePath(share_name="test", input_dir=input_dir.parent, fp_in=input_dir) zarr = fp_in.gen_output_fp(output_ext=".zarr") rec_mrc = fp_in.gen_output_fp(output_ext="_rec.mrc") base_mrc = fp_in.gen_output_fp(output_ext=".mrc", out_fname="adjusted.mrc") input_mrc = fp_in.fp_in print(f"{rec_mrc=}\n{base_mrc=}\n{input_mrc=}") - assert zarr.name == "some_file_rec.zarr" + assert zarr.name == f"{file_base_name}.zarr" assert base_mrc.name == "adjusted.mrc" - assert input_mrc.name == "some_file_rec.mrc" + assert input_mrc.name == f"{file_base_name}.mrc" # Below is simply a temporary outfile loc - assert rec_mrc.name == "some_file_rec_rec.mrc" + assert rec_mrc.name == f"{file_base_name}_rec.mrc" + + +# Test the FilePath.run function by executing the python shell, and producing putput to stderr and stdout +def test_filepath_run(mock_nfs_mount, tmp_path, request): + current_test_name = request.node.name + log_file = tmp_path/f"{current_test_name}.log" + + input_filename = "test/input_files/dm_inputs/Projects/Lab/PI/PrP - Protein.007.tif" + input_dir = Path(input_filename).absolute() + + fp_in = FilePath(share_name="test", input_dir=input_dir.parent, fp_in=input_dir) + + # Run the python shell, and produce output to stderr and stdout + cmd = [sys.executable, "-c", "import sys; print('stdout'); print('stderr', file=sys.stderr)"] + fp_in.run(cmd, log_file=str(log_file)) + + # Check if the log file was created + assert log_file.exists() + + # Check that the log contain "stdout" and "stderr" strings + with open(log_file, "r") as f: + log_content = f.read() + assert "stdout" in log_content + assert "stderr" in log_content + + +# Write a pytest for FilePath.run function by executing the python shell which print the environment variables. +# Combinations of setting the "env" parameter and "copy_env" need to be tested. +def test_filepath_run_env(mock_nfs_mount, tmp_path, request): + current_test_name = request.node.name + log_file = tmp_path/f"{current_test_name}.log" + + input_filename = "test/input_files/dm_inputs/Projects/Lab/PI/PrP - Protein.007.tif" + input_dir = Path(input_filename).absolute() + + fp_in = FilePath(share_name="test", input_dir=input_dir.parent, fp_in=input_dir) + + os.environ["PARENT_ENV"] = "parent_env_value" + + # Case 1: env is set, and copy_env is set to False + + # Run the python shell, and print all environment variables + cmd = [sys.executable, "-c", "import os; print(os.environ)"] + fp_in.run(cmd, log_file=str(log_file), env={"ENV_VAR": "env_var_value"}, copy_env=False) + + # Check if the log file was created + assert log_file.exists() + + with open(log_file, "r") as f: + log_content = f.read() + assert "env_var_value" in log_content + assert "parent_env_value" not in log_content + log_file.unlink() + + # Case 2: env is set, and copy_env is set to True + + # Run the python shell, and print all environment variables + cmd = [sys.executable, "-c", "import os; print(os.environ)"] + fp_in.run(cmd, log_file=str(log_file), env={"ENV_VAR": "env_var_value"}, copy_env=True) + + # Check if the log file was created + assert log_file.exists() + + # Check that the log contain "env_var_value" string + with open(log_file, "r") as f: + log_content = f.read() + assert "env_var_value" in log_content + assert "parent_env_value" in log_content + log_file.unlink() + + # Case 3: env is not set, and copy_env is set to False + + # Run the python shell, and print all environment variables + cmd = [sys.executable, "-c", "import os; print(os.environ)"] + fp_in.run(cmd, log_file=str(log_file), env=None, copy_env=True) + + # Check if the log file was created + assert log_file.exists() + + # Check that the log contain "env_var_value" string + with open(log_file, "r") as f: + log_content = f.read() + assert "env_var_value" not in log_content + assert "parent_env_value" in log_content + log_file.unlink() + + # Case 4: env is not set, and copy_env is set to False + + # Run the python shell, and print all environment variables + cmd = [sys.executable, "-c", "import os; print(os.environ)"] + fp_in.run(cmd, log_file=str(log_file), env=None, copy_env=False) + + # Check if the log file was created + assert log_file.exists() + + # Check that the log contain "env_var_value" string + with open(log_file, "r") as f: + log_content = f.read() + assert "env_var_value" not in log_content + assert "parent_env_value" not in log_content + log_file.unlink()