Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log and write subprocess output as produced. #485

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions em_workflows/dm_conversion/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ def convert_if_int16_tiff(file_path: FilePath) -> None:
shrink_factor = _calculate_shrink_factor(file_path.fp_in)

cmd = [
"env",
"IMOD_OUTPUT_FORMAT=TIF",
DMConfig.newstack_loc,
"-shrink",
f"{shrink_factor:.3f}",
Expand All @@ -92,7 +90,7 @@ def convert_if_int16_tiff(file_path: FilePath) -> None:
str(tif_8_bit),
]
utils.log(f"Generated cmd {cmd}")
FilePath.run(cmd, log_fp)
FilePath.run(cmd, log_fp, env={"IMOD_OUTPUT_FORMAT": "TIF"})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, cool

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The important things added is the usage of pipes with streaming output so that the output of the process can be seen live or if the task is killed by slum the partial output would be there.




Expand Down Expand Up @@ -127,8 +125,6 @@ def convert_2d_mrc_to_tiff(file_path: FilePath) -> None:
utils.log(f"{file_path.fp_in.as_posix()} is a mrc file, will convert to {out_fp}.")
# work out meansd
cmd = [
"env",
"IMOD_OUTPUT_FORMAT=TIF",
DMConfig.newstack_loc,
"-shrink",
shrink_factor_3,
Expand All @@ -142,7 +138,7 @@ def convert_2d_mrc_to_tiff(file_path: FilePath) -> None:
out_fp,
]
utils.log(f"Generated cmd {cmd}")
FilePath.run(cmd, log_fp)
FilePath.run(cmd, log_fp, env={"IMOD_OUTPUT_FORMAT": "TIF"})


@task(
Expand Down
60 changes: 37 additions & 23 deletions em_workflows/file_path.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import shutil
import os
from typing import List, Dict
from typing import List, Dict, Optional, AnyStr
from pathlib import Path
import tempfile
import subprocess
Expand Down Expand Up @@ -272,28 +272,42 @@ def rm_workdir(self):
shutil.rmtree(self.working_dir, ignore_errors=True)

@staticmethod
def run(cmd: List[str], log_file: str) -> int:
def run(cmd: List[str], log_file: str, env: Optional[Dict[AnyStr, AnyStr]] = None, *, copy_env: bool = True) -> int:
"""Runs a Unix command as a subprocess

- Captures stderr & stddout and writes them to the `log_file` input parameter.
- If final returncode is not 0, raises a FAIL signal
- If final returncode is not 0, raises a RuntimeError

:param cmd: list of strings representing the command to run
:param log_file: path to the log file to write the stdout and stderr to
:param env: dictionary of additional environment variables to pass to the subprocess
:param copy_env: if True, the subprocess inherits the parent's environment
:return: the return code of the subprocess


"""
log("Trying to run: " + " ".join(cmd))
try:
sp = subprocess.run(cmd, check=False, capture_output=True)
stdout = sp.stdout.decode("utf-8")
stderr = sp.stderr.decode("utf-8")
with open(log_file, "w+") as _file:
_file.write(f"Trying to run {' '.join(cmd)}")
_file.write(f"STDOUT:{stdout}")
_file.write(f"STDERR:{stderr}")
if sp.returncode != 0:
msg = f"ERROR : {stderr} -- {stdout}"
log(msg)
raise RuntimeError(msg)
else:
msg = f"Command ok : {stderr} -- {stdout}"
log(msg)
except Exception as ex:
raise RuntimeError(str(ex))
return sp.returncode

if env is None:
if not copy_env:
env = {}
# Note: if env is not and copy_env is True, the subprocess inherits the parent's environment,
# by passing env=None
elif copy_env:
# merge dictionaries python 3.9+
env = os.environ | env

log(f"Running subprocess: {' '.join(cmd)} logfile: {log_file}")

with (subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env) as p,
open(log_file, 'ab') as file):
file.write(f"Running subprocess: {' '.join(cmd)}\n".encode())

# write the outputs line by line as they come in
for line in p.stdout:
file.write(line)
log(line.decode())
file.flush()

if p.wait() != 0:
raise RuntimeError(f"Failed to run command: {' '.join(cmd)}")

return p.returncode
121 changes: 112 additions & 9 deletions test/test_file_path.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,125 @@
import os

from em_workflows.file_path import FilePath
from pathlib import Path
import sys


def test_gen_output_fp(mock_nfs_mount, tmp_path):
input_dir = tmp_path / "Projects"
input_dir.mkdir()
p = input_dir / "some_file_rec.mrc"
p.write_text("content")
assert str(p).endswith("Projects/some_file_rec.mrc")
input_dir = Path("test/input_files/brt/Projects/2013-1220-dA30_5-BSC-1_10.mrc").absolute()

file_base_name = input_dir.stem

fp_in = FilePath(share_name="test", input_dir=input_dir, fp_in=p)
fp_in = FilePath(share_name="test", input_dir=input_dir.parent, fp_in=input_dir)

zarr = fp_in.gen_output_fp(output_ext=".zarr")
rec_mrc = fp_in.gen_output_fp(output_ext="_rec.mrc")
base_mrc = fp_in.gen_output_fp(output_ext=".mrc", out_fname="adjusted.mrc")
input_mrc = fp_in.fp_in
print(f"{rec_mrc=}\n{base_mrc=}\n{input_mrc=}")
assert zarr.name == "some_file_rec.zarr"
assert zarr.name == f"{file_base_name}.zarr"
assert base_mrc.name == "adjusted.mrc"
assert input_mrc.name == "some_file_rec.mrc"
assert input_mrc.name == f"{file_base_name}.mrc"
# Below is simply a temporary outfile loc
assert rec_mrc.name == "some_file_rec_rec.mrc"
assert rec_mrc.name == f"{file_base_name}_rec.mrc"


# Test the FilePath.run function by executing the python shell, and producing putput to stderr and stdout
def test_filepath_run(mock_nfs_mount, tmp_path, request):
current_test_name = request.node.name
log_file = tmp_path/f"{current_test_name}.log"

input_filename = "test/input_files/dm_inputs/Projects/Lab/PI/PrP - Protein.007.tif"
input_dir = Path(input_filename).absolute()

fp_in = FilePath(share_name="test", input_dir=input_dir.parent, fp_in=input_dir)

# Run the python shell, and produce output to stderr and stdout
cmd = [sys.executable, "-c", "import sys; print('stdout'); print('stderr', file=sys.stderr)"]
fp_in.run(cmd, log_file=str(log_file))

# Check if the log file was created
assert log_file.exists()

# Check that the log contain "stdout" and "stderr" strings
with open(log_file, "r") as f:
log_content = f.read()
assert "stdout" in log_content
assert "stderr" in log_content


# Write a pytest for FilePath.run function by executing the python shell which print the environment variables.
# Combinations of setting the "env" parameter and "copy_env" need to be tested.
def test_filepath_run_env(mock_nfs_mount, tmp_path, request):
current_test_name = request.node.name
log_file = tmp_path/f"{current_test_name}.log"

input_filename = "test/input_files/dm_inputs/Projects/Lab/PI/PrP - Protein.007.tif"
input_dir = Path(input_filename).absolute()

fp_in = FilePath(share_name="test", input_dir=input_dir.parent, fp_in=input_dir)

os.environ["PARENT_ENV"] = "parent_env_value"

# Case 1: env is set, and copy_env is set to False

# Run the python shell, and print all environment variables
cmd = [sys.executable, "-c", "import os; print(os.environ)"]
fp_in.run(cmd, log_file=str(log_file), env={"ENV_VAR": "env_var_value"}, copy_env=False)

# Check if the log file was created
assert log_file.exists()

with open(log_file, "r") as f:
log_content = f.read()
assert "env_var_value" in log_content
assert "parent_env_value" not in log_content
log_file.unlink()

# Case 2: env is set, and copy_env is set to True

# Run the python shell, and print all environment variables
cmd = [sys.executable, "-c", "import os; print(os.environ)"]
fp_in.run(cmd, log_file=str(log_file), env={"ENV_VAR": "env_var_value"}, copy_env=True)

# Check if the log file was created
assert log_file.exists()

# Check that the log contain "env_var_value" string
with open(log_file, "r") as f:
log_content = f.read()
assert "env_var_value" in log_content
assert "parent_env_value" in log_content
log_file.unlink()

# Case 3: env is not set, and copy_env is set to False

# Run the python shell, and print all environment variables
cmd = [sys.executable, "-c", "import os; print(os.environ)"]
fp_in.run(cmd, log_file=str(log_file), env=None, copy_env=True)

# Check if the log file was created
assert log_file.exists()

# Check that the log contain "env_var_value" string
with open(log_file, "r") as f:
log_content = f.read()
assert "env_var_value" not in log_content
assert "parent_env_value" in log_content
log_file.unlink()

# Case 4: env is not set, and copy_env is set to False

# Run the python shell, and print all environment variables
cmd = [sys.executable, "-c", "import os; print(os.environ)"]
fp_in.run(cmd, log_file=str(log_file), env=None, copy_env=False)

# Check if the log file was created
assert log_file.exists()

# Check that the log contain "env_var_value" string
with open(log_file, "r") as f:
log_content = f.read()
assert "env_var_value" not in log_content
assert "parent_env_value" not in log_content
log_file.unlink()
Loading