From 33498d88f0145a7f06d85897919935eddbd3fbc4 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Thu, 19 Dec 2024 15:44:16 +0100 Subject: [PATCH 01/35] draft script --- scripts/copy_specified_udf.py | 88 +++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 scripts/copy_specified_udf.py diff --git a/scripts/copy_specified_udf.py b/scripts/copy_specified_udf.py new file mode 100644 index 00000000..1069c992 --- /dev/null +++ b/scripts/copy_specified_udf.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +import logging +from argparse import ArgumentParser +from datetime import datetime as dt + +from genologics.config import BASEURI, PASSWORD, USERNAME +from genologics.entities import Process +from genologics.lims import Lims + +from scilifelab_epps.utils import udf_tools +from scilifelab_epps.wrapper import epp_decorator + +TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") + + +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) +def main(): + """This script will get the name of an artifact UDF from a master step field, + and for every sample artifact in the current step: + + - Use API calls to recursively back-trace the sample history using + input-output links until it finds an artifact with the specified UDF + - Copy the value of the specified UDF from the found artifact to the + artifact of the current step + + """ + lims = Lims(BASEURI, USERNAME, PASSWORD) + process = Process(lims, id=args.pid) + + target_udf = process.udf("step_udf") + + no_outputs = udf_tools.no_outputs(process) + + if no_outputs: + logging.info("Step has no output artifacts. Assigning to input artifact.") + + art_tuples = udf_tools.get_art_tuples(process) + for art_tuple in art_tuples: + target_artifact = art_tuple[0]["uri"] if no_outputs else art_tuple[1]["uri"] + logging.info( + f"Looking for last recorded UDF '{target_udf}' of sample '{target_artifact.name}'..." + ) + udf_value, udf_history = udf_tools.fetch_last( + currentStep=process, + art_tuple=art_tuple, + target_udfs=target_udf, + use_current=False, + print_history=True, + on_fail=None, + ) + if udf_value: + logging.info(f"Traceback:\n{udf_history}") + target_artifact.udf[target_udf] = udf_value + target_artifact.put() + logging.info( + f"Updated UDF '{target_udf}' for '{art_tuple[1]['uri'].name}' to '{udf_value}'" + ) + else: + logging.warning( + f"Could not traceback UDF '{target_udf}' for '{art_tuple[1]['uri'].name}'" + ) + logging.info(f"Traceback:\n{udf_history}") + + +if __name__ == "__main__": + # Parse args + parser = ArgumentParser() + parser.add_argument( + "--pid", + required=True, + type=str, + help="Lims ID for current Process.", + ) + parser.add_argument( + "--log", + required=True, + type=str, + help="Which file slot to use for the script log.", + ) + parser.add_argument( + "--step_udf", + required=True, + type=str, + help="The name of the step UDF listing the target artifact UDF.", + ) + args = parser.parse_args() + + main(args) From 0cf2c0a705680a4ff8602704aaed41f0babaa7dc Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Thu, 19 Dec 2024 18:24:09 +0100 Subject: [PATCH 02/35] bugfix and none-case --- scripts/copy_specified_udf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/copy_specified_udf.py b/scripts/copy_specified_udf.py index 1069c992..6f9350d8 100644 --- a/scripts/copy_specified_udf.py +++ b/scripts/copy_specified_udf.py @@ -14,7 +14,7 @@ @epp_decorator(script_path=__file__, timestamp=TIMESTAMP) -def main(): +def main(args): """This script will get the name of an artifact UDF from a master step field, and for every sample artifact in the current step: @@ -27,7 +27,9 @@ def main(): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) - target_udf = process.udf("step_udf") + target_udf = process.udf.get(args.step_udf, None) + if target_udf is None or target_udf == "None": + logging.error(f"No target UDF supplied from step field '{args.step_udf}'") no_outputs = udf_tools.no_outputs(process) From 3c88aac4fe9b3b92d8eeed18f0e0c3933b7f11bd Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Thu, 19 Dec 2024 18:45:59 +0100 Subject: [PATCH 03/35] wip, add TODOs --- scripts/copy_specified_udf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/copy_specified_udf.py b/scripts/copy_specified_udf.py index 6f9350d8..fd20a021 100644 --- a/scripts/copy_specified_udf.py +++ b/scripts/copy_specified_udf.py @@ -36,7 +36,9 @@ def main(args): if no_outputs: logging.info("Step has no output artifacts. Assigning to input artifact.") - art_tuples = udf_tools.get_art_tuples(process) + # TODO need to tweak this script and possible the traceback function to handle both + # TODO aggregate QC and regular steps + art_tuples = udf_tools.get_art_tuples(process) # TODO this returns [] for art_tuple in art_tuples: target_artifact = art_tuple[0]["uri"] if no_outputs else art_tuple[1]["uri"] logging.info( From f3381bbd659959c6d3550bffa72add0c8b29b649 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Fri, 20 Dec 2024 12:25:30 +0100 Subject: [PATCH 04/35] rename script file and bump docstring --- scripts/{copy_specified_udf.py => fetch_last_known_field.py} | 3 +++ 1 file changed, 3 insertions(+) rename scripts/{copy_specified_udf.py => fetch_last_known_field.py} (93%) diff --git a/scripts/copy_specified_udf.py b/scripts/fetch_last_known_field.py similarity index 93% rename from scripts/copy_specified_udf.py rename to scripts/fetch_last_known_field.py index fd20a021..fc59ab49 100644 --- a/scripts/copy_specified_udf.py +++ b/scripts/fetch_last_known_field.py @@ -23,6 +23,9 @@ def main(args): - Copy the value of the specified UDF from the found artifact to the artifact of the current step + Example use-case: + - For Nanopore libraries in the Aggregate QC step of the Library Validation protocol, + fetch the last recorded artifact UDF "Size (bp)" from the library prep for all samples. """ lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) From ff412b0b85052b98b7149861688e5891d62d10e8 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Fri, 20 Dec 2024 12:57:58 +0100 Subject: [PATCH 05/35] live testing dev --- scilifelab_epps/utils/udf_tools.py | 43 ++++++++++++++++++++++-------- scripts/fetch_last_known_field.py | 43 +++++++++++++++++++++--------- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 278b196d..22ff69e4 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -136,19 +136,36 @@ def list_udfs(art: Artifact) -> list: def fetch_last( currentStep: Process, - art_tuple: tuple, target_udfs: str | list, + art_tuple: tuple = None, + art: Artifact = None, use_current=True, print_history=False, on_fail=AssertionError, ): """Recursively look for target UDF. - Target UDF can be supplied as a string, or as a prioritized list of strings. + Arguments: + + - "art_tuple": step input-output tuple. Mutually exclusive use with "art". + + - "art": step artifact, either input or output. Mutually exclusive use with "art_tuple". + + - "target_udfs": can be supplied as a string, or as a + prioritized list of strings. - If "print_history" == True, will return both the target metric and the lookup history as a string. + - "use_current": if true, will return the target metric + if found in the current step. + + - "print_history": if true, will return both the target + metric and the lookup history as a string. """ + assert art_tuple or art, "One of function args 'art_tuple' and 'art' are required." + assert not ( + art_tuple and art + ), "Function args 'art_tuple' and 'art' are mutually exclusive." + # Convert to list, to enable iteration if isinstance(target_udfs, str): target_udfs = [target_udfs] @@ -158,15 +175,19 @@ def fetch_last( while True: history.append({"Step name": currentStep.type.name, "Step ID": currentStep.id}) - # Try to grab input and output articles, if possible - try: - input_art = art_tuple[0]["uri"] - except: - input_art = None - try: - output_art = art_tuple[1]["uri"] - except: + if len(history) == 1 and not art_tuple: + # Handle the case of having an art instead of an art_tuple in the original step + input_art = art output_art = None + else: + try: + input_art = art_tuple[0]["uri"] + except: + input_art = None + try: + output_art = art_tuple[1]["uri"] + except: + output_art = None if len(history) == 1 and use_current is not True: # If we are in the original step and "use_current" is false, skip diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index fc59ab49..dae82767 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -4,7 +4,7 @@ from datetime import datetime as dt from genologics.config import BASEURI, PASSWORD, USERNAME -from genologics.entities import Process +from genologics.entities import Artifact, Process from genologics.lims import Lims from scilifelab_epps.utils import udf_tools @@ -30,41 +30,58 @@ def main(args): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) + # Get the target UDF from the step field target_udf = process.udf.get(args.step_udf, None) - if target_udf is None or target_udf == "None": - logging.error(f"No target UDF supplied from step field '{args.step_udf}'") + assert ( + target_udf is not None or target_udf != "None" + ), f"No target UDF supplied from step field '{args.step_udf}'" - no_outputs = udf_tools.no_outputs(process) + # Check whether process has output artifacts, not the case for e.g. QC steps + no_outputs: bool = udf_tools.no_outputs(process) + # Load input artifacts + arts_in: list[Artifact] = [ + art for art in process.all_inputs() if art.type == "Analyte" + ] + + # Find target output artifacts, if any if no_outputs: logging.info("Step has no output artifacts. Assigning to input artifact.") + else: + art_tuples: list[tuple[dict]] = process.input_output_maps + art_in2out: dict[Process.Artifact : Process.Artifact] = { + i["uri"]: o["uri"] + for i, o in art_tuples + if i["uri"].type == "Analyte" and o["uri"].type == "Analyte" + } - # TODO need to tweak this script and possible the traceback function to handle both - # TODO aggregate QC and regular steps - art_tuples = udf_tools.get_art_tuples(process) # TODO this returns [] - for art_tuple in art_tuples: - target_artifact = art_tuple[0]["uri"] if no_outputs else art_tuple[1]["uri"] + for art_in in arts_in: + if no_outputs: + target_artifact = art_in + else: + target_artifact = art_in2out[art_in] logging.info( - f"Looking for last recorded UDF '{target_udf}' of sample '{target_artifact.name}'..." + f"Looking for last recorded UDF '{target_udf}' of {'input' if no_outputs else 'output'} artifact '{target_artifact.name}'..." ) udf_value, udf_history = udf_tools.fetch_last( currentStep=process, - art_tuple=art_tuple, + art=art_in, target_udfs=target_udf, use_current=False, print_history=True, on_fail=None, ) if udf_value: + logging.info(f"Found target UDF '{target_udf}' with value '{udf_value}'") logging.info(f"Traceback:\n{udf_history}") target_artifact.udf[target_udf] = udf_value target_artifact.put() logging.info( - f"Updated UDF '{target_udf}' for '{art_tuple[1]['uri'].name}' to '{udf_value}'" + f"Updated UDF '{target_udf}' for '{art_in.name}' to '{udf_value}'" ) else: logging.warning( - f"Could not traceback UDF '{target_udf}' for '{art_tuple[1]['uri'].name}'" + f"Could not traceback UDF '{target_udf}' for '{art_in.name}'" ) logging.info(f"Traceback:\n{udf_history}") From 89c7a80d9349a44b248744f4815b12cf537ee812 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Fri, 20 Dec 2024 13:00:53 +0100 Subject: [PATCH 06/35] fix --- scripts/fetch_last_known_field.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index dae82767..a0277ae5 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -33,7 +33,7 @@ def main(args): # Get the target UDF from the step field target_udf = process.udf.get(args.step_udf, None) assert ( - target_udf is not None or target_udf != "None" + target_udf is not None and target_udf != "None" ), f"No target UDF supplied from step field '{args.step_udf}'" # Check whether process has output artifacts, not the case for e.g. QC steps From a7c65e6484517cece542596539e92cbb491c1c03 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Fri, 20 Dec 2024 13:23:38 +0100 Subject: [PATCH 07/35] var ref fixes --- scripts/fetch_last_known_field.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index a0277ae5..13dc47aa 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -30,7 +30,7 @@ def main(args): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) - # Get the target UDF from the step field + # Get the name of the target UDF from the step field target_udf = process.udf.get(args.step_udf, None) assert ( target_udf is not None and target_udf != "None" @@ -49,8 +49,8 @@ def main(args): logging.info("Step has no output artifacts. Assigning to input artifact.") else: art_tuples: list[tuple[dict]] = process.input_output_maps - art_in2out: dict[Process.Artifact : Process.Artifact] = { - i["uri"]: o["uri"] + art_in2out: dict[str:Artifact] = { + i["uri"].id: o["uri"] for i, o in art_tuples if i["uri"].type == "Analyte" and o["uri"].type == "Analyte" } @@ -59,13 +59,13 @@ def main(args): if no_outputs: target_artifact = art_in else: - target_artifact = art_in2out[art_in] + target_artifact = art_in2out[art_in.id] logging.info( f"Looking for last recorded UDF '{target_udf}' of {'input' if no_outputs else 'output'} artifact '{target_artifact.name}'..." ) udf_value, udf_history = udf_tools.fetch_last( currentStep=process, - art=art_in, + art=target_artifact, target_udfs=target_udf, use_current=False, print_history=True, @@ -77,11 +77,11 @@ def main(args): target_artifact.udf[target_udf] = udf_value target_artifact.put() logging.info( - f"Updated UDF '{target_udf}' for '{art_in.name}' to '{udf_value}'" + f"Updated UDF '{target_udf}' for {'input' if no_outputs else 'output'} '{target_artifact.name}' to '{udf_value}'" ) else: logging.warning( - f"Could not traceback UDF '{target_udf}' for '{art_in.name}'" + f"Could not traceback UDF '{target_udf}' for {'input' if no_outputs else 'output'} artifact '{target_artifact.name}'" ) logging.info(f"Traceback:\n{udf_history}") From 6e418246e2aee38acdef1a915b61410bddfb4200 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Fri, 20 Dec 2024 13:27:18 +0100 Subject: [PATCH 08/35] bump logs --- scripts/fetch_last_known_field.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index 13dc47aa..4f7b7c4e 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -77,7 +77,7 @@ def main(args): target_artifact.udf[target_udf] = udf_value target_artifact.put() logging.info( - f"Updated UDF '{target_udf}' for {'input' if no_outputs else 'output'} '{target_artifact.name}' to '{udf_value}'" + f"Updated UDF '{target_udf}' for {'input' if no_outputs else 'output'} artifact '{target_artifact.name}' to '{udf_value}'" ) else: logging.warning( From 3434eed6f72721295d09800178285c98edeb4279 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Fri, 20 Dec 2024 13:30:48 +0100 Subject: [PATCH 09/35] bump vlog --- VERSIONLOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index da67e885..1834d8fa 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20241220.1 + +Introduce EPP to fetch last recorded derived sample UDF. + ## 20241211.1 No longer reserve PromethION column 3 for Clinical Genomics. From d6ade1ba10bc3ae26f29ceb18c844c1f5c991181 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Fri, 20 Dec 2024 13:40:01 +0100 Subject: [PATCH 10/35] shut up mypy --- scilifelab_epps/utils/udf_tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 22ff69e4..444d7690 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -137,8 +137,8 @@ def list_udfs(art: Artifact) -> list: def fetch_last( currentStep: Process, target_udfs: str | list, - art_tuple: tuple = None, - art: Artifact = None, + art_tuple=None, + art=None, use_current=True, print_history=False, on_fail=AssertionError, @@ -147,9 +147,9 @@ def fetch_last( Arguments: - - "art_tuple": step input-output tuple. Mutually exclusive use with "art". + - "art_tuple": step input-output tuple or none. Mutually exclusive use with "art". - - "art": step artifact, either input or output. Mutually exclusive use with "art_tuple". + - "art": step artifact, either input or output or none. Mutually exclusive use with "art_tuple". - "target_udfs": can be supplied as a string, or as a prioritized list of strings. From 901ef861db7f781b00b4d397f7d12267ef48ceba Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 7 Jan 2025 12:51:07 +0100 Subject: [PATCH 11/35] wip --- scilifelab_epps/utils/udf_tools.py | 62 +++++++++++++++++------------- scripts/fetch_last_known_field.py | 3 ++ 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 444d7690..658ff640 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -172,10 +172,14 @@ def fetch_last( history = [] + # Track iterations + n = 1 + + # Start traceback while True: history.append({"Step name": currentStep.type.name, "Step ID": currentStep.id}) - if len(history) == 1 and not art_tuple: + if n == 1 and not art_tuple: # Handle the case of having an art instead of an art_tuple in the original step input_art = art output_art = None @@ -189,20 +193,20 @@ def fetch_last( except: output_art = None - if len(history) == 1 and use_current is not True: - # If we are in the original step and "use_current" is false, skip - pass - else: - # Look trough outputs - if output_art: - history[-1].update( - { - "Derived sample ID": output_art.id, - "Derived sample UDFs": dict(output_art.udf.items()), - } - ) + # Look trough outputs + if output_art: + history[-1].update( + { + "Derived sample ID": output_art.id, + "Derived sample UDFs": dict(output_art.udf.items()), + } + ) - for target_udf in target_udfs: + for target_udf in target_udfs: + # Don't search outputs of first and second iteration if use_current is False + if n in [1, 2] and use_current is False: + pass + else: if target_udf in list_udfs(output_art): if print_history is True: return output_art.udf[target_udf], json.dumps( @@ -211,22 +215,26 @@ def fetch_last( else: return output_art.udf[target_udf] - # Look through inputs - if input_art: - if input_art.parent_process: - history[-1].update( - { - "Input sample parent step name": input_art.parent_process.type.name, - "Input sample parent step ID": input_art.parent_process.id, - } - ) + # Look through inputs + if input_art: + if input_art.parent_process: history[-1].update( { - "Input sample ID": input_art.id, - "Input sample UDFs": dict(input_art.udf.items()), + "Input sample parent step name": input_art.parent_process.type.name, + "Input sample parent step ID": input_art.parent_process.id, } ) - for target_udf in target_udfs: + history[-1].update( + { + "Input sample ID": input_art.id, + "Input sample UDFs": dict(input_art.udf.items()), + } + ) + for target_udf in target_udfs: + # Don't search inputs of first iteration if use_current is False + if n == 1 and use_current is False: + pass + else: if target_udf in list_udfs(input_art): if print_history is True: return input_art.udf[target_udf], json.dumps( @@ -265,6 +273,8 @@ def fetch_last( currentStep = pp art_tuple = matching_tuples[0] + n += 1 + except AssertionError: if isinstance(on_fail, type) and issubclass(on_fail, Exception): if print_history is True: diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index 4f7b7c4e..91fd6f78 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -44,6 +44,9 @@ def main(args): art for art in process.all_inputs() if art.type == "Analyte" ] + # TODO currently even steps with valid tuples will only use input artifacts + # No traceback provided for output artifact of current step + # Find target output artifacts, if any if no_outputs: logging.info("Step has no output artifacts. Assigning to input artifact.") From f07296b5226ca39e780e1476ec7ad9ed997bca29 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 7 Jan 2025 16:15:24 +0100 Subject: [PATCH 12/35] big swap, need testing --- .../calc_from_args/udf_arg_methods.py | 14 +- scilifelab_epps/utils/udf_tools.py | 234 +++++++----------- scilifelab_epps/zika/utils.py | 4 +- scripts/fetch_last_known_field.py | 13 +- scripts/log_udfs.py | 2 +- scripts/ont_calc_volumes.py | 6 +- scripts/ont_update_amount.py | 6 +- 7 files changed, 111 insertions(+), 168 deletions(-) diff --git a/scilifelab_epps/calc_from_args/udf_arg_methods.py b/scilifelab_epps/calc_from_args/udf_arg_methods.py index 1040c91d..b7a5defd 100644 --- a/scilifelab_epps/calc_from_args/udf_arg_methods.py +++ b/scilifelab_epps/calc_from_args/udf_arg_methods.py @@ -47,19 +47,13 @@ def fetch_from_arg( value = process.udf[arg_dict["udf"]] else: if arg_dict["recursive"]: - # Fetch UDF recursively, back-tracking the input-output tuple - if arg_dict["source"] == "input": - use_current = False - else: - assert arg_dict["source"] == "output" - use_current = True + # Fetch UDF recursively value, history = udf_tools.fetch_last( - currentStep=process, - art_tuple=art_tuple, + target_art=source, target_udfs=arg_dict["udf"], - use_current=use_current, - print_history=True, + log_traceback=True, + return_traceback=True, ) else: # Fetch UDF from input or output artifact diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 658ff640..eb5fc2e9 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -1,4 +1,5 @@ import json +import logging from typing import Union from genologics.entities import Artifact, Process @@ -39,22 +40,6 @@ def is_filled(art: Artifact, target_udf: str) -> bool: return False -def no_outputs(currentStep: Process) -> bool: - """Check whether step has outputs or not""" - - art_tuples = get_art_tuples(currentStep) - - if art_tuples: - none_outputs = [t[1] is None for t in art_tuples] - - if all(none_outputs): - return True - else: - return False - else: - return True - - def get_art_tuples(currentStep: Process) -> list: """Return I/O tuples whose elements are either 1) both analytes @@ -135,156 +120,119 @@ def list_udfs(art: Artifact) -> list: def fetch_last( - currentStep: Process, + target_art: Artifact, target_udfs: str | list, - art_tuple=None, - art=None, - use_current=True, - print_history=False, - on_fail=AssertionError, -): + log_traceback=False, + return_traceback=False, + on_fail=None, +) -> (str | int | float) | tuple[str | int | float, dict]: """Recursively look for target UDF. Arguments: - - "art_tuple": step input-output tuple or none. Mutually exclusive use with "art". + target_art Artifact to traceback and assign UDF value to. - - "art": step artifact, either input or output or none. Mutually exclusive use with "art_tuple". + target_udfs Can be supplied as a string, or as a prioritized + list of strings. - - "target_udfs": can be supplied as a string, or as a - prioritized list of strings. + log_traceback If True, will log the full traceback. - - "use_current": if true, will return the target metric - if found in the current step. + return_traceback If True, will return the traceback too. - - "print_history": if true, will return both the target - metric and the lookup history as a string. + on_fail If not None, will return this value on failure. """ - assert art_tuple or art, "One of function args 'art_tuple' and 'art' are required." - assert not ( - art_tuple and art - ), "Function args 'art_tuple' and 'art' are mutually exclusive." - # Convert to list, to enable iteration if isinstance(target_udfs, str): target_udfs = [target_udfs] - history = [] + # Instantiate traceback + traceback = [] + steps_visited = [] - # Track iterations - n = 1 + try: + # First iteration, current artifact is the target artifact. Don't pull any UDF values. + current_art = target_art + pp = current_art.parent_process + assert pp, f"Artifact '{current_art.name}' ({current_art.id}) has no parent process linked." + steps_visited.append(f"'{pp.type.name}' ({pp.id})") + + traceback.append( + { + "Artifact": { + "Name": current_art.name, + "ID": current_art.id, + "UDFs": dict(current_art.udf.items()), + "Parent Step": { + "Name": pp.type.name if pp else None, + "ID": pp.id if pp else None, + }, + } + } + ) - # Start traceback - while True: - history.append({"Step name": currentStep.type.name, "Step ID": currentStep.id}) + # Start recursive search + while True: + pp_art_tuples = get_art_tuples(pp) + + # If parent process has valid input-output tuples, use for linkage + if pp_art_tuples != []: + for pp_tuple in pp_art_tuples: + if pp_tuple[1]["uri"].id == current_art.id: + current_art = pp_tuple[0]["uri"] + break + # If not, TODO + else: + raise NotImplementedError() - if n == 1 and not art_tuple: - # Handle the case of having an art instead of an art_tuple in the original step - input_art = art - output_art = None - else: - try: - input_art = art_tuple[0]["uri"] - except: - input_art = None - try: - output_art = art_tuple[1]["uri"] - except: - output_art = None + pp = current_art.parent_process + if pp is not None: + steps_visited.append(f"'{pp.type.name}' ({pp.id})") - # Look trough outputs - if output_art: - history[-1].update( + traceback.append( { - "Derived sample ID": output_art.id, - "Derived sample UDFs": dict(output_art.udf.items()), + "Artifact": { + "Name": current_art.name, + "ID": current_art.id, + "UDFs": dict(current_art.udf.items()), + "Parent Step": { + "Name": pp.type.name if pp else None, + "ID": pp.id if pp else None, + }, + } } ) + # Search for correct UDF for target_udf in target_udfs: - # Don't search outputs of first and second iteration if use_current is False - if n in [1, 2] and use_current is False: - pass - else: - if target_udf in list_udfs(output_art): - if print_history is True: - return output_art.udf[target_udf], json.dumps( - history, indent=2 - ) - else: - return output_art.udf[target_udf] - - # Look through inputs - if input_art: - if input_art.parent_process: - history[-1].update( - { - "Input sample parent step name": input_art.parent_process.type.name, - "Input sample parent step ID": input_art.parent_process.id, - } + if target_udf in list_udfs(current_art): + if log_traceback is True: + logging.info(f"Traceback:\n{json.dumps(traceback, indent=2)}") + logging.info( + f"Found target UDF '{target_udf}'" + + f" with value '{current_art.udf[target_udf]}'" + + f" in process {steps_visited[-1]}" + + f" {'output' if pp else 'input'}" + + f" artifact '{current_art.name}' ({current_art.id})" + ) + + if return_traceback: + return current_art.udf[target_udf], traceback + else: + return current_art.udf[target_udf] + + if pp is None: + raise AssertionError( + f"Artifact '{current_art.name}' ({current_art.id}) has no parent process linked and can't be traced back further." ) - history[-1].update( - { - "Input sample ID": input_art.id, - "Input sample UDFs": dict(input_art.udf.items()), - } + + except AssertionError: + if on_fail is not None: + logging.warning( + f"Failed traceback for artifact '{target_art.name}' ({target_art.id}), falling back to on_fail value '{on_fail}'" + ) + return on_fail + else: + raise AssertionError( + f"Could not find matching UDF(s) [{', '.join(target_udfs)}] for artifact {target_art}" ) - for target_udf in target_udfs: - # Don't search inputs of first iteration if use_current is False - if n == 1 and use_current is False: - pass - else: - if target_udf in list_udfs(input_art): - if print_history is True: - return input_art.udf[target_udf], json.dumps( - history, indent=2 - ) - else: - return input_art.udf[target_udf] - - # Cycle to previous step, if possible - try: - pp = input_art.parent_process - assert pp is not None - - pp_tuples = get_art_tuples(pp) - matching_tuples = [] - for pp_tuple in pp_tuples: - try: - pp_input = pp_tuple[0]["uri"] - except: - pp_input = None - try: - pp_output = pp_tuple[1]["uri"] - except: - pp_output = None - - if (pp_input and pp_input.id == input_art.id) or ( - pp_output and pp_output.id == input_art.id - ): - matching_tuples.append(pp_tuple) - - assert ( - len(matching_tuples) == 1 - ), "Target artifact matches multiple inputs/outputs in previous step." - - # Back-tracking successful, re-assign variables to represent previous step - currentStep = pp - art_tuple = matching_tuples[0] - - n += 1 - - except AssertionError: - if isinstance(on_fail, type) and issubclass(on_fail, Exception): - if print_history is True: - print(json.dumps(history, indent=2)) - raise on_fail( - f"Could not find matching UDF(s) [{', '.join(target_udfs)}] for artifact tuple {art_tuple}" - ) - else: - if print_history is True: - print(json.dumps(history, indent=2)) - return on_fail, json.dumps(history, indent=2) - else: - return on_fail diff --git a/scilifelab_epps/zika/utils.py b/scilifelab_epps/zika/utils.py index 26b0b528..89055188 100644 --- a/scilifelab_epps/zika/utils.py +++ b/scilifelab_epps/zika/utils.py @@ -114,7 +114,9 @@ def fetch_sample_data(currentStep: Process, to_fetch: dict) -> pd.DataFrame: except KeyError: row[col_name] = None else: - row[col_name] = fetch_last(currentStep, art_tuple, udf_query) + row[col_name] = fetch_last( + target_art=art_tuple[1]["uri"], target_udfs=udf_query + ) rows.append(row) # Transform to dataframe diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index 91fd6f78..b15056b1 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -44,9 +44,6 @@ def main(args): art for art in process.all_inputs() if art.type == "Analyte" ] - # TODO currently even steps with valid tuples will only use input artifacts - # No traceback provided for output artifact of current step - # Find target output artifacts, if any if no_outputs: logging.info("Step has no output artifacts. Assigning to input artifact.") @@ -67,16 +64,14 @@ def main(args): f"Looking for last recorded UDF '{target_udf}' of {'input' if no_outputs else 'output'} artifact '{target_artifact.name}'..." ) udf_value, udf_history = udf_tools.fetch_last( - currentStep=process, - art=target_artifact, + target_art=target_artifact, target_udfs=target_udf, - use_current=False, - print_history=True, + log_traceback=True, + return_traceback=True, on_fail=None, ) + # TODO collect history for overview of which steps have been pulled from if udf_value: - logging.info(f"Found target UDF '{target_udf}' with value '{udf_value}'") - logging.info(f"Traceback:\n{udf_history}") target_artifact.udf[target_udf] = udf_value target_artifact.put() logging.info( diff --git a/scripts/log_udfs.py b/scripts/log_udfs.py index e42d6af0..a8a202ed 100644 --- a/scripts/log_udfs.py +++ b/scripts/log_udfs.py @@ -37,7 +37,7 @@ def main(lims, args): file_str = None # Parse outputs and their UDFs - if udf_tools.no_outputs(currentStep): + if udf_tools.get_art_tuples(currentStep) == []: arts = [art for art in currentStep.all_inputs() if art.type == "Analyte"] else: arts = [art for art in currentStep.all_outputs() if art.type == "Analyte"] diff --git a/scripts/ont_calc_volumes.py b/scripts/ont_calc_volumes.py index 9271745f..312dcbb1 100644 --- a/scripts/ont_calc_volumes.py +++ b/scripts/ont_calc_volumes.py @@ -45,7 +45,11 @@ def main(lims, args): # Get last known length size_bp, size_bp_history = udf_tools.fetch_last( - currentStep, art_tuple, "Size (bp)", on_fail=None, print_history=True + target_art=art_out, + target_udfs="Size (bp)", + log_traceback=True, + return_traceback=True, + on_fail=None, ) log.append(f"'Size (bp)': {size_bp}\n{size_bp_history}") diff --git a/scripts/ont_update_amount.py b/scripts/ont_update_amount.py index f3c6fbfc..37e430af 100644 --- a/scripts/ont_update_amount.py +++ b/scripts/ont_update_amount.py @@ -44,10 +44,10 @@ def main(lims, args): or "ONT Barcoding" in currentStep.type.name ): size_bp, size_bp_history = udf_tools.fetch_last( - currentStep=currentStep, - art_tuple=art_tuple, + target_art=art_out, target_udfs="Size (bp)", - print_history=True, + log_traceback=True, + return_traceback=True, on_fail=None, ) log.append(f"'Size (bp)': {size_bp}\n{size_bp_history}") From 391c2749564c32165c4d9e6f6cabe0b07104414d Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 7 Jan 2025 16:23:08 +0100 Subject: [PATCH 13/35] fix so function can return None explicitly --- scilifelab_epps/utils/udf_tools.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index eb5fc2e9..c00ffbed 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -124,7 +124,7 @@ def fetch_last( target_udfs: str | list, log_traceback=False, return_traceback=False, - on_fail=None, + on_fail=AssertionError, ) -> (str | int | float) | tuple[str | int | float, dict]: """Recursively look for target UDF. @@ -227,12 +227,12 @@ def fetch_last( ) except AssertionError: - if on_fail is not None: - logging.warning( - f"Failed traceback for artifact '{target_art.name}' ({target_art.id}), falling back to on_fail value '{on_fail}'" + if isinstance(on_fail, type) and issubclass(on_fail, Exception): + raise on_fail( + f"Could not find matching UDF(s) [{', '.join(target_udfs)}] for artifact {target_art}" ) - return on_fail else: - raise AssertionError( - f"Could not find matching UDF(s) [{', '.join(target_udfs)}] for artifact {target_art}" + logging.warning( + f"Failed traceback for artifact '{target_art.name}' ({target_art.id}), falling back to value '{on_fail}'" ) + return on_fail From cd0ee61194acde1c654e9715f97cfa7c3f5b25f8 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 7 Jan 2025 16:27:31 +0100 Subject: [PATCH 14/35] try displaying steps used on exit 0 --- scripts/fetch_last_known_field.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index b15056b1..b335a33e 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import logging +import sys from argparse import ArgumentParser from datetime import datetime as dt @@ -55,6 +56,7 @@ def main(args): if i["uri"].type == "Analyte" and o["uri"].type == "Analyte" } + steps_used = [] for art_in in arts_in: if no_outputs: target_artifact = art_in @@ -70,8 +72,10 @@ def main(args): return_traceback=True, on_fail=None, ) - # TODO collect history for overview of which steps have been pulled from - if udf_value: + + steps_used.append(udf_history[-1]["Artifact"]["Parent Step"]["Name"]) + + if udf_value is not None: target_artifact.udf[target_udf] = udf_value target_artifact.put() logging.info( @@ -83,6 +87,10 @@ def main(args): ) logging.info(f"Traceback:\n{udf_history}") + sys.stdout( + f"UDF '{target_udf}' pulled from steps: {' ,'.join(set(steps_used))}. Please double check the values." + ) + if __name__ == "__main__": # Parse args From 565431791272db832284f3899e58855e4bca671a Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 7 Jan 2025 16:30:20 +0100 Subject: [PATCH 15/35] bugfix --- scripts/fetch_last_known_field.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index b335a33e..2369e538 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -38,7 +38,7 @@ def main(args): ), f"No target UDF supplied from step field '{args.step_udf}'" # Check whether process has output artifacts, not the case for e.g. QC steps - no_outputs: bool = udf_tools.no_outputs(process) + no_outputs: bool = True if udf_tools.get_art_tuples(process) == [] else False # Load input artifacts arts_in: list[Artifact] = [ From e97f89691ab0fd16a230f1aad4ee91873119f131 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 7 Jan 2025 16:34:25 +0100 Subject: [PATCH 16/35] try stderr --- scripts/fetch_last_known_field.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index 2369e538..2f14d022 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -87,7 +87,7 @@ def main(args): ) logging.info(f"Traceback:\n{udf_history}") - sys.stdout( + sys.stderr( f"UDF '{target_udf}' pulled from steps: {' ,'.join(set(steps_used))}. Please double check the values." ) From 87a3a0cdadba79811ca10119ef32458527c7a9e8 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 7 Jan 2025 16:36:32 +0100 Subject: [PATCH 17/35] try w/o --- scripts/fetch_last_known_field.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index 2f14d022..b8ec3e5f 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -87,9 +87,8 @@ def main(args): ) logging.info(f"Traceback:\n{udf_history}") - sys.stderr( - f"UDF '{target_udf}' pulled from steps: {' ,'.join(set(steps_used))}. Please double check the values." - ) + # TODO use variable + msg = f"UDF '{target_udf}' pulled from steps: {' ,'.join(set(steps_used))}. Please double check the values." if __name__ == "__main__": From 94750dda22485e1502af5409820a1bafa5a2aa50 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 7 Jan 2025 16:42:26 +0100 Subject: [PATCH 18/35] rename and change banner approach --- scripts/fetch_last_known_field.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index b8ec3e5f..ecb796e1 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -65,7 +65,7 @@ def main(args): logging.info( f"Looking for last recorded UDF '{target_udf}' of {'input' if no_outputs else 'output'} artifact '{target_artifact.name}'..." ) - udf_value, udf_history = udf_tools.fetch_last( + udf_value, traceback = udf_tools.fetch_last( target_art=target_artifact, target_udfs=target_udf, log_traceback=True, @@ -73,7 +73,10 @@ def main(args): on_fail=None, ) - steps_used.append(udf_history[-1]["Artifact"]["Parent Step"]["Name"]) + steps_used.append( + f"'{traceback[-1]['Artifact']['Parent Step']['Name']}'" + + f" ({traceback[-1]['Artifact']['Parent Step']['ID']})" + ) if udf_value is not None: target_artifact.udf[target_udf] = udf_value @@ -85,10 +88,11 @@ def main(args): logging.warning( f"Could not traceback UDF '{target_udf}' for {'input' if no_outputs else 'output'} artifact '{target_artifact.name}'" ) - logging.info(f"Traceback:\n{udf_history}") + logging.info(f"Traceback:\n{traceback}") - # TODO use variable - msg = f"UDF '{target_udf}' pulled from steps: {' ,'.join(set(steps_used))}. Please double check the values." + logging.warning( + f"UDF '{target_udf}' pulled from steps: {' ,'.join(set(steps_used))}. Please double check the values." + ) if __name__ == "__main__": From e79603468f8e9b641bb14892e1b33a89aba701ec Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 12:43:20 +0100 Subject: [PATCH 19/35] test exit 0 message --- scilifelab_epps/wrapper.py | 1 + scripts/fetch_last_known_field.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py index 5380fbee..dade9c03 100644 --- a/scilifelab_epps/wrapper.py +++ b/scilifelab_epps/wrapper.py @@ -97,6 +97,7 @@ def epp_wrapper(args): ) sys.exit(2) else: + print("Test print") sys.exit(0) return epp_wrapper diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index ecb796e1..a687c6f1 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -90,9 +90,10 @@ def main(args): ) logging.info(f"Traceback:\n{traceback}") - logging.warning( - f"UDF '{target_udf}' pulled from steps: {' ,'.join(set(steps_used))}. Please double check the values." - ) + # Look into exit 0 with message + # - print + # - stderr + # - sys.exit("blabla") if __name__ == "__main__": From f778f39829af6c59b076a6431b7e966cd0e0713e Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 12:45:25 +0100 Subject: [PATCH 20/35] prev --- scilifelab_epps/wrapper.py | 1 - scripts/fetch_last_known_field.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py index dade9c03..5380fbee 100644 --- a/scilifelab_epps/wrapper.py +++ b/scilifelab_epps/wrapper.py @@ -97,7 +97,6 @@ def epp_wrapper(args): ) sys.exit(2) else: - print("Test print") sys.exit(0) return epp_wrapper diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index a687c6f1..20715e51 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -94,6 +94,7 @@ def main(args): # - print # - stderr # - sys.exit("blabla") + print("Test print within script scope") if __name__ == "__main__": From 38a1ee5dbdcf215da2ba3567f1ff9865b27321ff Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 12:47:16 +0100 Subject: [PATCH 21/35] prev --- scripts/fetch_last_known_field.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index 20715e51..a213e52f 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -90,11 +90,10 @@ def main(args): ) logging.info(f"Traceback:\n{traceback}") - # Look into exit 0 with message - # - print - # - stderr - # - sys.exit("blabla") - print("Test print within script scope") + # Write to stdout for the green banner + print( + f"UDF '{target_udf}' pulled from steps: {' ,'.join(set(steps_used))}. Please double check the values." + ) if __name__ == "__main__": From 16423cb5d7157e43eeeed963055355e740cd08f2 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 14:23:08 +0100 Subject: [PATCH 22/35] Add a very useful utility function to check for unpopulated UDFs and throw appropriate warning --- scilifelab_epps/utils/udf_tools.py | 57 ++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index c00ffbed..2f2ce46d 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -1,7 +1,7 @@ import json import logging from typing import Union - +import xml.etree.ElementTree as ET from genologics.entities import Artifact, Process from requests.exceptions import HTTPError @@ -10,6 +10,32 @@ """ +def process_has_udfs(process: Process, target_udfs: list[str]) -> list[str]: + """Check whether any target UDFs are present in the sample fields of the process associated type. + + This function is necessary because a non-required sample UDF left blank will not be detected in the artifact object. + + Returns a list of found UDFs, or an empty list if none were found. + """ + + # Get the raw xml of the process associated type + raw_xml = process.type.xml() + + # Parse as tree object + root = ET.fromstring(raw_xml) + + # Instantiate return object + target_udfs_found = [] + + # Check whether the target UDF is present in the sample fields + for sample_field in root.iter("sample-field"): + for target_udf in target_udfs: + if sample_field.attrib["name"] == target_udf: + target_udfs_found.append(target_udf) + + return target_udfs_found + + def put(target: Artifact | Process, target_udf: str, val, on_fail=AssertionError): """Try to put UDF on artifact or process, optionally without causing fatal error. Evaluates true on success and error (default) or on_fail param on failure. @@ -130,14 +156,15 @@ def fetch_last( Arguments: - target_art Artifact to traceback and assign UDF value to. + target_art Artifact to traceback. Any target UDFs already present in this artifact will be ignored. - target_udfs Can be supplied as a string, or as a prioritized + target_udfs The UDF(s) to look for. Can be supplied as a string, or as a prioritized list of strings. log_traceback If True, will log the full traceback. - return_traceback If True, will return the traceback too. + return_traceback If False, will return only UDF value. + If True, will also return the traceback as a dict. on_fail If not None, will return this value on failure. """ @@ -179,15 +206,19 @@ def fetch_last( if pp_art_tuples != []: for pp_tuple in pp_art_tuples: if pp_tuple[1]["uri"].id == current_art.id: + # Dynamically reassign current artifact current_art = pp_tuple[0]["uri"] break - # If not, TODO else: - raise NotImplementedError() + raise NotImplementedError("Parent process has no valid input-output links, traceback can't continue.") + # Dynamically reassign parent process pp = current_art.parent_process + + # Keep track of visited parent processes if pp is not None: steps_visited.append(f"'{pp.type.name}' ({pp.id})") + target_udfs_in_parent_process = process_has_udfs(pp, target_udfs) traceback.append( { @@ -221,6 +252,15 @@ def fetch_last( else: return current_art.udf[target_udf] + # Address the case that no target UDFs were found on the artifact, even though they were present in the parent process + if target_udfs_in_parent_process != []: + logging.warning( + f"Parent process '{pp.type.name}' ({pp.id})" + + f" has target UDF(s) {target_udfs_in_parent_process}," + + f" but it's not filled in for artifact '{current_art}' ({current_art.id})." + + f" Please double check that you haven't missed filling it in.") + + # Stop traceback if no parent process is found if pp is None: raise AssertionError( f"Artifact '{current_art.name}' ({current_art.id}) has no parent process linked and can't be traced back further." @@ -235,4 +275,7 @@ def fetch_last( logging.warning( f"Failed traceback for artifact '{target_art.name}' ({target_art.id}), falling back to value '{on_fail}'" ) - return on_fail + if return_traceback: + return on_fail, traceback + else: + return on_fail From 58a22f8f4c4f8c0a2e099b67ea5480bc498fe928 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 14:27:30 +0100 Subject: [PATCH 23/35] improve logs --- scilifelab_epps/utils/udf_tools.py | 2 +- scripts/fetch_last_known_field.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 2f2ce46d..b74e666e 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -257,7 +257,7 @@ def fetch_last( logging.warning( f"Parent process '{pp.type.name}' ({pp.id})" + f" has target UDF(s) {target_udfs_in_parent_process}," - + f" but it's not filled in for artifact '{current_art}' ({current_art.id})." + + f" but it's not filled in for artifact '{current_art.name}' ({current_art.id})." + f" Please double check that you haven't missed filling it in.") # Stop traceback if no parent process is found diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index a213e52f..db6ff54d 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -47,7 +47,7 @@ def main(args): # Find target output artifacts, if any if no_outputs: - logging.info("Step has no output artifacts. Assigning to input artifact.") + logging.info("Step has no output artifacts. Assigning to input artifacts.") else: art_tuples: list[tuple[dict]] = process.input_output_maps art_in2out: dict[str:Artifact] = { From 29201dfeae1264fca03f245534d39b802880368b Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 14:28:45 +0100 Subject: [PATCH 24/35] ruff --- scilifelab_epps/utils/udf_tools.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index b74e666e..92d600f9 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -210,7 +210,9 @@ def fetch_last( current_art = pp_tuple[0]["uri"] break else: - raise NotImplementedError("Parent process has no valid input-output links, traceback can't continue.") + raise NotImplementedError( + "Parent process has no valid input-output links, traceback can't continue." + ) # Dynamically reassign parent process pp = current_art.parent_process @@ -258,7 +260,8 @@ def fetch_last( f"Parent process '{pp.type.name}' ({pp.id})" + f" has target UDF(s) {target_udfs_in_parent_process}," + f" but it's not filled in for artifact '{current_art.name}' ({current_art.id})." - + f" Please double check that you haven't missed filling it in.") + + f" Please double check that you haven't missed filling it in." + ) # Stop traceback if no parent process is found if pp is None: From 8d5b95e465dbc8993c383ec2ec3bba91c153e19a Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 14:29:18 +0100 Subject: [PATCH 25/35] ruff check fixes --- scilifelab_epps/utils/udf_tools.py | 5 +++-- scripts/fetch_last_known_field.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 92d600f9..09292a09 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -1,7 +1,8 @@ import json import logging -from typing import Union import xml.etree.ElementTree as ET +from typing import Union + from genologics.entities import Artifact, Process from requests.exceptions import HTTPError @@ -260,7 +261,7 @@ def fetch_last( f"Parent process '{pp.type.name}' ({pp.id})" + f" has target UDF(s) {target_udfs_in_parent_process}," + f" but it's not filled in for artifact '{current_art.name}' ({current_art.id})." - + f" Please double check that you haven't missed filling it in." + + " Please double check that you haven't missed filling it in." ) # Stop traceback if no parent process is found diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index db6ff54d..40115735 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -1,6 +1,5 @@ #!/usr/bin/env python import logging -import sys from argparse import ArgumentParser from datetime import datetime as dt From a5ab60cd0915429024609971dcc209ce9b2b8056 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 14:35:11 +0100 Subject: [PATCH 26/35] bump docstring --- scilifelab_epps/utils/udf_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 09292a09..9a19c6a4 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -164,10 +164,10 @@ def fetch_last( log_traceback If True, will log the full traceback. - return_traceback If False, will return only UDF value. - If True, will also return the traceback as a dict. + return_traceback If True, will additionally return the traceback as a dict. - on_fail If not None, will return this value on failure. + on_fail If this is a subclass of Exception, will raise this exception on failure. + If not, will return this value on failure instead of the UDF value. """ # Convert to list, to enable iteration From 0b5913e9bbcb89a00c5555087dea025abcbd39d5 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 14:57:18 +0100 Subject: [PATCH 27/35] fix log --- scilifelab_epps/utils/udf_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 9a19c6a4..898d7a74 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -273,7 +273,7 @@ def fetch_last( except AssertionError: if isinstance(on_fail, type) and issubclass(on_fail, Exception): raise on_fail( - f"Could not find matching UDF(s) [{', '.join(target_udfs)}] for artifact {target_art}" + f"Could not find matching UDF(s) [{', '.join(target_udfs)}] for artifact '{target_art.name}' ({target_art.id})" ) else: logging.warning( From b8e314e9ac8849ce8e6a4d16a1a101f8d8ba9a2e Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 15:36:58 +0100 Subject: [PATCH 28/35] fix issue with ont pooling, add argument to allow fetching from target artifact, refactor loop --- scilifelab_epps/utils/udf_tools.py | 104 +++++++++++++++-------------- scilifelab_epps/zika/utils.py | 4 +- 2 files changed, 58 insertions(+), 50 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index 898d7a74..b54c22ea 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -149,6 +149,7 @@ def list_udfs(art: Artifact) -> list: def fetch_last( target_art: Artifact, target_udfs: str | list, + include_current=False, log_traceback=False, return_traceback=False, on_fail=AssertionError, @@ -157,11 +158,13 @@ def fetch_last( Arguments: - target_art Artifact to traceback. Any target UDFs already present in this artifact will be ignored. + target_art Artifact to traceback. target_udfs The UDF(s) to look for. Can be supplied as a string, or as a prioritized list of strings. + include_current If True, will pull target UDFs if found in the target artifact. + log_traceback If True, will log the full traceback. return_traceback If True, will additionally return the traceback as a dict. @@ -178,43 +181,12 @@ def fetch_last( traceback = [] steps_visited = [] + # Instantiate recursion variables + current_art = target_art + n = 1 try: - # First iteration, current artifact is the target artifact. Don't pull any UDF values. - current_art = target_art - pp = current_art.parent_process - assert pp, f"Artifact '{current_art.name}' ({current_art.id}) has no parent process linked." - steps_visited.append(f"'{pp.type.name}' ({pp.id})") - - traceback.append( - { - "Artifact": { - "Name": current_art.name, - "ID": current_art.id, - "UDFs": dict(current_art.udf.items()), - "Parent Step": { - "Name": pp.type.name if pp else None, - "ID": pp.id if pp else None, - }, - } - } - ) - # Start recursive search while True: - pp_art_tuples = get_art_tuples(pp) - - # If parent process has valid input-output tuples, use for linkage - if pp_art_tuples != []: - for pp_tuple in pp_art_tuples: - if pp_tuple[1]["uri"].id == current_art.id: - # Dynamically reassign current artifact - current_art = pp_tuple[0]["uri"] - break - else: - raise NotImplementedError( - "Parent process has no valid input-output links, traceback can't continue." - ) - # Dynamically reassign parent process pp = current_art.parent_process @@ -240,23 +212,30 @@ def fetch_last( # Search for correct UDF for target_udf in target_udfs: if target_udf in list_udfs(current_art): - if log_traceback is True: - logging.info(f"Traceback:\n{json.dumps(traceback, indent=2)}") - logging.info( - f"Found target UDF '{target_udf}'" - + f" with value '{current_art.udf[target_udf]}'" - + f" in process {steps_visited[-1]}" - + f" {'output' if pp else 'input'}" - + f" artifact '{current_art.name}' ({current_art.id})" - ) - - if return_traceback: - return current_art.udf[target_udf], traceback + if include_current is not True and n == 1: + logging.info( + "Target UDF was found in specified artifact, but include_current is set to False. Skipping." + ) else: - return current_art.udf[target_udf] + if log_traceback is True: + logging.info( + f"Traceback:\n{json.dumps(traceback, indent=2)}" + ) + logging.info( + f"Found target UDF '{target_udf}'" + + f" with value '{current_art.udf[target_udf]}'" + + f" in process {steps_visited[-1]}" + + f" {'output' if pp else 'input'}" + + f" artifact '{current_art.name}' ({current_art.id})" + ) + + if return_traceback: + return current_art.udf[target_udf], traceback + else: + return current_art.udf[target_udf] # Address the case that no target UDFs were found on the artifact, even though they were present in the parent process - if target_udfs_in_parent_process != []: + if pp is not None and target_udfs_in_parent_process != []: logging.warning( f"Parent process '{pp.type.name}' ({pp.id})" + f" has target UDF(s) {target_udfs_in_parent_process}," @@ -270,6 +249,33 @@ def fetch_last( f"Artifact '{current_art.name}' ({current_art.id}) has no parent process linked and can't be traced back further." ) + pp_art_tuples = get_art_tuples(pp) + + # If parent process has valid input-output tuples, use for linkage + linked_input_arts = [] + if pp_art_tuples != []: + for pp_tuple in pp_art_tuples: + if pp_tuple[1]["uri"].id == current_art.id: + linked_input_arts.append(pp_tuple[0]["uri"]) + else: + raise NotImplementedError( + "Parent process has no valid input-output links, traceback can't continue." + ) + + if len(linked_input_arts) == 1: + # Dynamically reassign current artifact + current_art = linked_input_arts[0] + elif len(linked_input_arts) > 1: + raise AssertionError( + "Parent process has multiple input artifacts linked to the same output artifact, can't traceback." + ) + else: + raise AssertionError( + "Parent process has no input artifacts linked to the output artifact, can't traceback." + ) + + n += 1 + except AssertionError: if isinstance(on_fail, type) and issubclass(on_fail, Exception): raise on_fail( diff --git a/scilifelab_epps/zika/utils.py b/scilifelab_epps/zika/utils.py index 89055188..41cbfeee 100644 --- a/scilifelab_epps/zika/utils.py +++ b/scilifelab_epps/zika/utils.py @@ -115,7 +115,9 @@ def fetch_sample_data(currentStep: Process, to_fetch: dict) -> pd.DataFrame: row[col_name] = None else: row[col_name] = fetch_last( - target_art=art_tuple[1]["uri"], target_udfs=udf_query + target_art=art_tuple[0]["uri"], + target_udfs=udf_query, + include_current=True, ) rows.append(row) From b5ad5846127b092d4fae85352ad71f8fb2c91549 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 15:52:34 +0100 Subject: [PATCH 29/35] remove superfluous handling of traceback --- .../calc_from_args/udf_arg_methods.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/scilifelab_epps/calc_from_args/udf_arg_methods.py b/scilifelab_epps/calc_from_args/udf_arg_methods.py index b7a5defd..d2bbb9f4 100644 --- a/scilifelab_epps/calc_from_args/udf_arg_methods.py +++ b/scilifelab_epps/calc_from_args/udf_arg_methods.py @@ -23,7 +23,6 @@ def fetch_from_arg( """ - history: str | None = None source: Artifact | Process source_name: str @@ -49,11 +48,9 @@ def fetch_from_arg( if arg_dict["recursive"]: # Fetch UDF recursively - value, history = udf_tools.fetch_last( + value = udf_tools.fetch_last( target_art=source, target_udfs=arg_dict["udf"], - log_traceback=True, - return_traceback=True, ) else: # Fetch UDF from input or output artifact @@ -72,17 +69,6 @@ def fetch_from_arg( else: return on_fail - # Log what has been done - log_str = f"Fetched UDF '{arg_dict['udf']}': {value} from {arg_dict['source']} '{source_name}'." - - if history: - history_yaml = yaml.load(history, Loader=yaml.FullLoader) - last_step_name = history_yaml[-1]["Step name"] - last_step_id = history_yaml[-1]["Step ID"] - log_str += f"\n\tUDF recusively fetched from step: '{last_step_name}' (ID: '{last_step_id}')" - - logging.info(log_str) - return value From b4165f9f0fd85c0494fe6777f1e0058ff6d4c712 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 15:54:46 +0100 Subject: [PATCH 30/35] ruff fixes --- scilifelab_epps/calc_from_args/udf_arg_methods.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scilifelab_epps/calc_from_args/udf_arg_methods.py b/scilifelab_epps/calc_from_args/udf_arg_methods.py index d2bbb9f4..77ef5236 100644 --- a/scilifelab_epps/calc_from_args/udf_arg_methods.py +++ b/scilifelab_epps/calc_from_args/udf_arg_methods.py @@ -1,8 +1,6 @@ #!/usr/bin/env python -import logging from typing import Any -import yaml from genologics.entities import Artifact, Process from scilifelab_epps.utils import udf_tools From 5426656eb55e086644bd031cc169b397b3561b3b Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 8 Jan 2025 15:56:18 +0100 Subject: [PATCH 31/35] shut up mypy --- scilifelab_epps/utils/udf_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scilifelab_epps/utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py index b54c22ea..fec53764 100644 --- a/scilifelab_epps/utils/udf_tools.py +++ b/scilifelab_epps/utils/udf_tools.py @@ -1,7 +1,7 @@ import json import logging import xml.etree.ElementTree as ET -from typing import Union +from typing import Any, Union from genologics.entities import Artifact, Process from requests.exceptions import HTTPError @@ -153,7 +153,7 @@ def fetch_last( log_traceback=False, return_traceback=False, on_fail=AssertionError, -) -> (str | int | float) | tuple[str | int | float, dict]: +) -> Any | tuple[Any, dict]: """Recursively look for target UDF. Arguments: From bf95f9a1714529b74bb807cf001de88dfadadb61 Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Mon, 20 Jan 2025 13:00:45 +0100 Subject: [PATCH 32/35] ruff --- .../calc_from_args/calculation_methods.py | 24 ++--- scilifelab_epps/epp.py | 6 +- scilifelab_epps/zika/methods.py | 98 ++++++++++--------- scripts/calculate_cell_nuclei_conc.py | 4 +- scripts/copy_field_art2samp.py | 5 +- scripts/copy_field_proc2projs.py | 4 +- scripts/copy_frag_an.py | 4 +- scripts/copy_qubit.py | 4 +- scripts/copy_reference_genome.py | 2 +- scripts/fetch_last_known_field.py | 6 +- scripts/generate_anglerfish_samplesheet.py | 6 +- scripts/generate_aviti_run_manifest.py | 20 ++-- scripts/generate_minknow_samplesheet.py | 76 +++++++------- scripts/index_distance_checker.py | 4 +- scripts/logbook.py | 6 +- scripts/logbook_service_account.py | 4 +- scripts/molar_concentration.py | 4 +- scripts/multiply_with_dilution_fold.py | 6 +- scripts/ont_calc_volumes.py | 12 +-- scripts/ont_pool.py | 12 +-- scripts/ont_send_reloading_info_to_db.py | 48 ++++----- scripts/ont_suggest_ports.py | 8 +- scripts/ont_sync_to_db.py | 6 +- scripts/parse_anglerfish_results.py | 15 +-- scripts/parse_caliper_results.py | 4 +- scripts/parse_vc100_results.py | 4 +- scripts/qc_amount_calculation.py | 2 +- scripts/quality_filter.py | 6 +- scripts/quantit_generate_driver_file.py | 4 +- scripts/quantit_set_QC.py | 13 +-- scripts/quantit_set_conc.py | 8 +- scripts/set_App_QC.py | 6 +- scripts/undemultiplexed_index.py | 7 +- scripts/zebra_barcodes.py | 11 +-- 34 files changed, 211 insertions(+), 238 deletions(-) diff --git a/scilifelab_epps/calc_from_args/calculation_methods.py b/scilifelab_epps/calc_from_args/calculation_methods.py index f048a0ea..614fb415 100644 --- a/scilifelab_epps/calc_from_args/calculation_methods.py +++ b/scilifelab_epps/calc_from_args/calculation_methods.py @@ -181,9 +181,9 @@ def summarize_pooling(process: Process, args: Namespace): cols["input_conc_units"] = str( fetch_from_arg(art_tuple, args.conc_units_in, process) ) - assert ( - cols["input_conc_units"] in ["ng/ul", "nM"] - ), f'Unsupported conc. units "{cols["input_conc_units"]}" for art {art_in.name}' + assert cols["input_conc_units"] in ["ng/ul", "nM"], ( + f'Unsupported conc. units "{cols["input_conc_units"]}" for art {art_in.name}' + ) else: # Infer concentration unit if "ng/ul" in args.conc_in["udf"]: @@ -216,9 +216,9 @@ def summarize_pooling(process: Process, args: Namespace): df_pool = pd.DataFrame(pool_data_rows) df_pool.index = [art_tuple[0]["uri"].name for art_tuple in pool_tuples] - assert ( - df_pool.output_amt_unit.unique().size == 1 - ), "Inconsistent output amount units." + assert df_pool.output_amt_unit.unique().size == 1, ( + "Inconsistent output amount units." + ) # Get a column with consistent concentration units df_pool["input_conc_nM"] = df_pool.apply( @@ -331,9 +331,9 @@ def equimolar_pooling(process: Process, args: Namespace): cols["input_conc_units"] = str( fetch_from_arg(art_tuple, args.conc_units_in, process) ) - assert ( - cols["input_conc_units"] in ["ng/ul", "nM"] - ), f'Unsupported conc. units "{cols["input_conc_units"]}" for art {art_in.name}' + assert cols["input_conc_units"] in ["ng/ul", "nM"], ( + f'Unsupported conc. units "{cols["input_conc_units"]}" for art {art_in.name}' + ) else: # Infer concentration unit if "ng/ul" in args.conc_in["udf"]: @@ -366,9 +366,9 @@ def equimolar_pooling(process: Process, args: Namespace): df_pool = pd.DataFrame(pool_data_rows) df_pool.index = [art_tuple[0]["uri"].name for art_tuple in pool_tuples] - assert ( - df_pool.output_amt_unit.unique().size == 1 - ), "Inconsistent output amount units." + assert df_pool.output_amt_unit.unique().size == 1, ( + "Inconsistent output amount units." + ) # Get a column with consistent concentration units df_pool["input_conc_nM"] = df_pool.apply( diff --git a/scilifelab_epps/epp.py b/scilifelab_epps/epp.py index 58014574..2cfa7d70 100644 --- a/scilifelab_epps/epp.py +++ b/scilifelab_epps/epp.py @@ -90,7 +90,7 @@ def __enter__(self): ) except DistributionNotFound as e: logging.error(e) - logging.error(f"Make sure you have the {self.PACKAGE} " "package installed") + logging.error(f"Make sure you have the {self.PACKAGE} package installed") sys.exit(-1) return self @@ -185,7 +185,7 @@ def prepend_old_log(self, external_log_file=None): f.write("=" * 80 + "\n") except HTTPError: # Probably no artifact found, skip prepending print( - ("No log file artifact found " f"for id: {log_file_name}"), + (f"No log file artifact found for id: {log_file_name}"), file=sys.stderr, ) except OSError as e: # Probably some path was wrong in copy @@ -417,7 +417,7 @@ def _log_after_change(self): } logging.info( - "Updated {d_elt_type} udf: {d_udf}, from {su} to " "{nv}.".format(**d) + "Updated {d_elt_type} udf: {d_udf}, from {su} to {nv}.".format(**d) ) def copy_udf(self, changelog_f=None): diff --git a/scilifelab_epps/zika/methods.py b/scilifelab_epps/zika/methods.py index 7c596454..64b2d384 100644 --- a/scilifelab_epps/zika/methods.py +++ b/scilifelab_epps/zika/methods.py @@ -25,7 +25,7 @@ def pool_fixed_vol( # Write log header log = [] for e in [ - f"LIMS process {currentStep.id}\n" "\n=== Volume constraints ===", + f"LIMS process {currentStep.id}\n\n=== Volume constraints ===", f"Minimum pipetting volume: {zika_min_vol} ul", f"Maximum allowed dst well volume: {well_max_vol} ul", ]: @@ -35,9 +35,9 @@ def pool_fixed_vol( fixed_vol_step_udf = "Transfer Volume for Pooling (uL)" fixed_vol = currentStep.udf[fixed_vol_step_udf] assert type(fixed_vol) in [int, float], f"'{fixed_vol_step_udf}' must be a number." - assert ( - zika_min_vol <= fixed_vol <= well_max_vol - ), f"'{fixed_vol_step_udf}' must be between {zika_min_vol} and {well_max_vol} ul." + assert zika_min_vol <= fixed_vol <= well_max_vol, ( + f"'{fixed_vol_step_udf}' must be between {zika_min_vol} and {well_max_vol} ul." + ) log.append(f"Fixed transfer volume: {fixed_vol} ul") # Get pools @@ -156,7 +156,7 @@ def pool( # Write log header log = [] for e in [ - f"LIMS process {currentStep.id}\n" "\n=== Volume constraints ===", + f"LIMS process {currentStep.id}\n\n=== Volume constraints ===", f"Minimum pipetting volume: {zika_min_vol} ul", f"Applied dead volume: {well_dead_vol} ul", f"Maximum allowed dst well volume: {well_max_vol} ul", @@ -190,13 +190,13 @@ def pool( df_all = zika.utils.fetch_sample_data(currentStep, to_fetch) # All samples should have accessible volume - assert all( - df_all.vol > well_dead_vol - ), f"The minimum required source volume is {well_dead_vol} ul" + assert all(df_all.vol > well_dead_vol), ( + f"The minimum required source volume is {well_dead_vol} ul" + ) - assert all( - df_all.target_vol <= well_max_vol - ), f"All target volumes must be at or below {well_max_vol} uL" + assert all(df_all.target_vol <= well_max_vol), ( + f"All target volumes must be at or below {well_max_vol} uL" + ) # Adjust for dead volume df_all["full_vol"] = df_all.vol.copy() @@ -237,9 +237,9 @@ def pool( conc_unit = "ng/ul" else: raise AssertionError("Could not make sense of input UDFs") - assert all( - df_all.conc_units == conc_unit - ), "Samples and pools have different conc units" + assert all(df_all.conc_units == conc_unit), ( + "Samples and pools have different conc units" + ) # Append target parameters to log log.append(f"\n\nPooling {len(df_pool)} samples into {pool.name}...") @@ -314,10 +314,10 @@ def pool( f"\nERROR: Overflow in {pool.name}. Decrease number of samples or dilute highly concentrated outliers" ) log.append( - f"Highest concentrated sample: {highest_conc_sample.sample_name} at {round(highest_conc_sample.conc,2)} {conc_unit}" + f"Highest concentrated sample: {highest_conc_sample.sample_name} at {round(highest_conc_sample.conc, 2)} {conc_unit}" ) log.append( - f"Pooling cannot be normalized to less than {round(pool_min_sample_vol,1)} ul" + f"Pooling cannot be normalized to less than {round(pool_min_sample_vol, 1)} ul" ) errors = True @@ -327,13 +327,13 @@ def pool( "\nAn even pool can be created within the following parameter ranges:" ) log.append( - f" - Amount per sample {round(lowest_common_amount,2)} - {round(pool_max_sample_amt / len(df_pool),2)} {amt_unit}" + f" - Amount per sample {round(lowest_common_amount, 2)} - {round(pool_max_sample_amt / len(df_pool), 2)} {amt_unit}" ) log.append( - f" - Pool volume {round(pool_min_sample_vol,1)} - {round(well_max_vol,1)} ul" + f" - Pool volume {round(pool_min_sample_vol, 1)} - {round(well_max_vol, 1)} ul" ) log.append( - f" - Pool concentration {round(pool_min_conc,2)} - {round(pool_max_conc,2)} {conc_unit}" + f" - Pool concentration {round(pool_min_conc, 2)} - {round(pool_max_conc, 2)} {conc_unit}" ) # Nudge conc, if necessary @@ -380,7 +380,7 @@ def pool( ) for i, r in df_low.iterrows(): log.append( - f"{r.sample_name} ({round(r.conc,2)} {r.conc_units}, {round(r.vol,2)} uL accessible volume)" + f"{r.sample_name} ({round(r.conc, 2)} {r.conc_units}, {round(r.vol, 2)} uL accessible volume)" ) log.append( "The above samples will be depleted and under-represented in the final pool." @@ -402,10 +402,10 @@ def pool( f"\nERROR: Overflow in {pool.name}. Decrease number of samples or dilute highly concentrated outliers" ) log.append( - f"Highest concentrated sample: {highest_conc_sample.sample_name} at {round(highest_conc_sample.conc,2)} {conc_unit}" + f"Highest concentrated sample: {highest_conc_sample.sample_name} at {round(highest_conc_sample.conc, 2)} {conc_unit}" ) log.append( - f"Pooling cannot be normalized to less than {round(pool_real_min_sample_vol,1)} ul" + f"Pooling cannot be normalized to less than {round(pool_real_min_sample_vol, 1)} ul" ) errors = True @@ -415,13 +415,13 @@ def pool( "\nWill try to create a pool that is as even as possible. Accounting for sample depletion, a pool can be created with the following parameter ranges: " ) log.append( - f" - Target amount per sample {round(target_transfer_amt,2)}" + f" - Target amount per sample {round(target_transfer_amt, 2)}" ) log.append( - f" - Pool volume {round(pool_real_min_sample_vol,1)}-{round(well_max_vol,1)} ul" + f" - Pool volume {round(pool_real_min_sample_vol, 1)}-{round(well_max_vol, 1)} ul" ) log.append( - f" - Pool concentration {round(pool_real_min_conc,2)}-{round(pool_real_max_conc,2)} {conc_unit}" + f" - Pool concentration {round(pool_real_min_conc, 2)}-{round(pool_real_max_conc, 2)} {conc_unit}" ) # Nudge conc, if necessary @@ -456,11 +456,11 @@ def pool( log.append("\nAdjustments:") if round(target_pool_conc, 2) != round(pool_conc, 2): log.append( - f" - WARNING: Target pool concentration is adjusted from {round(target_pool_conc,2)} --> {round(pool_conc,2)} {conc_unit}" + f" - WARNING: Target pool concentration is adjusted from {round(target_pool_conc, 2)} --> {round(pool_conc, 2)} {conc_unit}" ) if round(target_pool_vol, 1) != round(pool_vol, 1): log.append( - f" - WARNING: Target pool volume is adjusted from {round(target_pool_vol,1)} --> {round(pool_vol,1)} ul" + f" - WARNING: Target pool volume is adjusted from {round(target_pool_vol, 1)} --> {round(pool_vol, 1)} ul" ) if round(target_pool_conc, 2) == round(pool_conc, 2) and round( target_pool_vol, 1 @@ -468,7 +468,7 @@ def pool( log.append("Pooling OK") if round(target_transfer_amt, 2) != round(target_amt_taken, 2): log.append( - f" - INFO: Amount taken per sample is adjusted from {round(target_amt_taken,2)} --> {round(target_transfer_amt,2)} {amt_unit}" + f" - INFO: Amount taken per sample is adjusted from {round(target_amt_taken, 2)} --> {round(target_transfer_amt, 2)} {amt_unit}" ) # Calculate and store pool buffer volume @@ -480,7 +480,7 @@ def pool( ) buffer_vols[pool.name] = buffer_vol log.append( - f"\nThe final pool volume is {round(pool_vol,1)} ul ({round(total_sample_vol,1)} ul sample + {round(buffer_vol,1)} ul buffer)" + f"\nThe final pool volume is {round(pool_vol, 1)} ul ({round(total_sample_vol, 1)} ul sample + {round(buffer_vol, 1)} ul buffer)" ) # === REPORT DEVIATING SAMPLES === @@ -498,7 +498,7 @@ def pool( ) log.append("Sample\tFraction") for name, frac in outlier_samples.values: - log.append(f" - {name}\t{round(frac,2)}") + log.append(f" - {name}\t{round(frac, 2)}") df_wl = pd.concat([df_wl, df_pool], axis=0) @@ -535,7 +535,7 @@ def pool( for pool in pools: if buffer_vols[pool.name] > 0: comments.append( - f"Add {round(buffer_vols[pool.name],1)} ul buffer to pool {pool.name} (well {pool.location[1]})" + f"Add {round(buffer_vols[pool.name], 1)} ul buffer to pool {pool.name} (well {pool.location[1]})" ) # Write the output files @@ -606,7 +606,7 @@ def norm( log = [] for e in [ - f"LIMS process {currentStep.id}\n" "\n=== Dilution strategy ===", + f"LIMS process {currentStep.id}\n\n=== Dilution strategy ===", f"Expand volume to obtain target conc: {volume_expansion}", f"Base calculations on user measurements: {use_customer_metrics}", "\n=== Volume constraints ===", @@ -623,12 +623,12 @@ def norm( # Assert required UDFs are populated in step for output_name, output in outputs.items(): - assert is_filled( - output, udfs["target_amt"] - ), f"UDF '{udfs['target_amt']}' missing for {output.name}" - assert is_filled( - output, udfs["target_vol"] - ), f"UDF '{udfs['target_vol']}' missing for {output.name}" + assert is_filled(output, udfs["target_amt"]), ( + f"UDF '{udfs['target_amt']}' missing for {output.name}" + ) + assert is_filled(output, udfs["target_vol"]), ( + f"UDF '{udfs['target_vol']}' missing for {output.name}" + ) # Fetch sample data @@ -662,13 +662,13 @@ def norm( amt_unit = "ng" if conc_unit == "ng/ul" else "fmol" # Assertions - assert all( - df.target_vol <= well_max_vol - ), f"All target volumes must be at or below {well_max_vol} uL" + assert all(df.target_vol <= well_max_vol), ( + f"All target volumes must be at or below {well_max_vol} uL" + ) - assert all( - df.vol > well_dead_vol - ), f"The minimum required source volume is {well_dead_vol} ul" + assert all(df.vol > well_dead_vol), ( + f"The minimum required source volume is {well_dead_vol} ul" + ) df["full_vol"] = df.vol.copy() df.loc[:, "vol"] = df.vol - well_dead_vol @@ -696,7 +696,7 @@ def norm( } for i, r in df.iterrows(): log.append( - f"\n{r.sample_name} (conc {round(r.conc,2)} {conc_unit}, vol {round(r.vol,1)} ul)" + f"\n{r.sample_name} (conc {round(r.conc, 2)} {conc_unit}, vol {round(r.vol, 1)} ul)" ) # Cases @@ -733,7 +733,9 @@ def norm( tot_vol = r.min_transfer_amt / r.target_conc else: tot_vol = well_max_vol - log.append(f"INFO: Expanding total volume to {round(tot_vol,1)} ul") + log.append( + f"INFO: Expanding total volume to {round(tot_vol, 1)} ul" + ) sample_vol = zika_min_vol buffer_vol = tot_vol - sample_vol @@ -746,7 +748,7 @@ def norm( # Adress cases where buffer volume is lower than the minimum transfer amount if 0 < buffer_vol < zika_min_vol: log.append( - f"WARNING: Required buffer volume ({round(buffer_vol,1)} ul) is less than minimum transfer volume {zika_min_vol} ul" + f"WARNING: Required buffer volume ({round(buffer_vol, 1)} ul) is less than minimum transfer volume {zika_min_vol} ul" ) log.append("INFO: Omitting buffer") tot_vol -= buffer_vol @@ -761,7 +763,7 @@ def norm( elif round(final_conc_frac, 2) < 1: log.append("WARNING: Final concentration is below target") log.append( - f"--> Diluting {round(sample_vol,1)} ul ({round(final_amt,2)} {amt_unit}) to {round(tot_vol,1)} ul ({round(final_conc,2)} {conc_unit}, {round(final_conc_frac*100,1)}% of target)" + f"--> Diluting {round(sample_vol, 1)} ul ({round(final_amt, 2)} {amt_unit}) to {round(tot_vol, 1)} ul ({round(final_conc, 2)} {conc_unit}, {round(final_conc_frac * 100, 1)}% of target)" ) # Append calculation results to dict diff --git a/scripts/calculate_cell_nuclei_conc.py b/scripts/calculate_cell_nuclei_conc.py index 9b57aac7..d34298ae 100644 --- a/scripts/calculate_cell_nuclei_conc.py +++ b/scripts/calculate_cell_nuclei_conc.py @@ -60,9 +60,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() diff --git a/scripts/copy_field_art2samp.py b/scripts/copy_field_art2samp.py index ba426904..8b7871b7 100644 --- a/scripts/copy_field_art2samp.py +++ b/scripts/copy_field_art2samp.py @@ -92,8 +92,7 @@ def main(lims, args, epp_logger): } abstract = ( - "Updated {ua} udf(s), out of {ca} in total, " - "{warning} with incorrect udf info." + "Updated {ua} udf(s), out of {ca} in total, {warning} with incorrect udf info." ).format(**d) print(abstract, file=sys.stderr) # stderr will be logged and printed in GUI @@ -105,7 +104,7 @@ def main(lims, args, epp_logger): parser.add_argument( "--log", help=( - "File name for standard log file, " " for runtime information and problems." + "File name for standard log file, for runtime information and problems." ), ) parser.add_argument( diff --git a/scripts/copy_field_proc2projs.py b/scripts/copy_field_proc2projs.py index bdb0d466..bf896dbd 100644 --- a/scripts/copy_field_proc2projs.py +++ b/scripts/copy_field_proc2projs.py @@ -84,9 +84,7 @@ def main(lims, args, epp_logger): parser.add_argument("--pid", help="Lims id for current Process") parser.add_argument( "--log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) parser.add_argument( "-s", diff --git a/scripts/copy_frag_an.py b/scripts/copy_frag_an.py index f2784393..7ba2ebc6 100644 --- a/scripts/copy_frag_an.py +++ b/scripts/copy_frag_an.py @@ -200,9 +200,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() diff --git a/scripts/copy_qubit.py b/scripts/copy_qubit.py index 3634924d..13ac4aaa 100644 --- a/scripts/copy_qubit.py +++ b/scripts/copy_qubit.py @@ -190,9 +190,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() diff --git a/scripts/copy_reference_genome.py b/scripts/copy_reference_genome.py index 77bb180a..3d3af520 100644 --- a/scripts/copy_reference_genome.py +++ b/scripts/copy_reference_genome.py @@ -146,7 +146,7 @@ def main(lims, args, epp_logger): parser.add_argument( "--log", help=( - "File name for standard log file, " " for runtime information and problems." + "File name for standard log file, for runtime information and problems." ), ) args = parser.parse_args() diff --git a/scripts/fetch_last_known_field.py b/scripts/fetch_last_known_field.py index 40115735..536afd8b 100644 --- a/scripts/fetch_last_known_field.py +++ b/scripts/fetch_last_known_field.py @@ -32,9 +32,9 @@ def main(args): # Get the name of the target UDF from the step field target_udf = process.udf.get(args.step_udf, None) - assert ( - target_udf is not None and target_udf != "None" - ), f"No target UDF supplied from step field '{args.step_udf}'" + assert target_udf is not None and target_udf != "None", ( + f"No target UDF supplied from step field '{args.step_udf}'" + ) # Check whether process has output artifacts, not the case for e.g. QC steps no_outputs: bool = True if udf_tools.get_art_tuples(process) == [] else False diff --git a/scripts/generate_anglerfish_samplesheet.py b/scripts/generate_anglerfish_samplesheet.py index 3b4cc925..76fc278f 100644 --- a/scripts/generate_anglerfish_samplesheet.py +++ b/scripts/generate_anglerfish_samplesheet.py @@ -31,9 +31,9 @@ def generate_anglerfish_samplesheet(process): """ ont_libraries = [art for art in process.all_outputs() if art.type == "Analyte"] - assert ( - len(ont_libraries) == 1 - ), "Samplesheet can only be generated for a single sequencing library." + assert len(ont_libraries) == 1, ( + "Samplesheet can only be generated for a single sequencing library." + ) ont_library = ont_libraries[0] df = get_ont_library_contents( diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 54a9ada2..3226269c 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -152,9 +152,9 @@ def get_manifests(process: Process, manifest_root_name: str) -> list[tuple[str, # Assert output analytes loaded on flowcell arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] - assert ( - len(arts_out) == 1 or len(arts_out) == 2 - ), "Expected one or two output analytes." + assert len(arts_out) == 1 or len(arts_out) == 2, ( + "Expected one or two output analytes." + ) # Assert lanes lanes = [art_out.location[1].split(":")[0] for art_out in arts_out] @@ -169,17 +169,17 @@ def get_manifests(process: Process, manifest_root_name: str) -> list[tuple[str, for pool, lane in zip(arts_out, lanes): # Get sample-label linkage via database sample2label: dict[str, str] = get_pool_sample_label_mapping(pool) - assert len(set(pool.reagent_labels)) == len( - pool.reagent_labels - ), "Detected non-unique reagent labels." + assert len(set(pool.reagent_labels)) == len(pool.reagent_labels), ( + "Detected non-unique reagent labels." + ) # Record PhiX UDFs for each output artifact phix_loaded: bool = pool.udf["% phiX"] != 0 phix_set_name = pool.udf.get("Element PhiX Set", None) if phix_loaded: - assert ( - phix_set_name is not None - ), "PhiX controls loaded but no kit specified." + assert phix_set_name is not None, ( + "PhiX controls loaded but no kit specified." + ) else: assert phix_set_name is None, "PhiX controls specified but not loaded." @@ -456,7 +456,7 @@ def main(args: Namespace): # Create manifest root name flowcell_id = get_flowcell_id(process) - manifest_root_name = f"AVITI_run_manifest_{flowcell_id}_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}" + manifest_root_name = f"AVITI_run_manifest_{flowcell_id}_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ', '')}" # Create manifest(s) manifests: list[tuple[str, str]] = get_manifests(process, manifest_root_name) diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index dfb41e5e..18bb92b2 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -87,7 +87,9 @@ def get_ont_library_contents( udf_ont_barcode_well = fetch( ont_pooling_input, "ONT Barcode Well", on_fail=None ) - assert udf_ont_barcode_well, f"Pooling input '{ont_pooling_input.name}' consists of multiple samples, but has not been assigned an ONT barcode." + assert udf_ont_barcode_well, ( + f"Pooling input '{ont_pooling_input.name}' consists of multiple samples, but has not been assigned an ONT barcode." + ) sanitized_well = udf_ont_barcode_well.upper().replace(":", "") ont_barcode = ont_barcode_well2label[sanitized_well] @@ -113,9 +115,9 @@ def get_ont_library_contents( elif len(ont_pooling_input.samples) == 1: # Remaining possibilities: # (2) ONT-barcodes only - assert ( - len(ont_pooling_input.reagent_labels) == 1 - ), f"ONT-pooling input '{ont_pooling_input.name}' lacks any reagent labels. Mixing barcoded and non-barcoded samples is not allowed." + assert len(ont_pooling_input.reagent_labels) == 1, ( + f"ONT-pooling input '{ont_pooling_input.name}' lacks any reagent labels. Mixing barcoded and non-barcoded samples is not allowed." + ) # ONT barcode-level demultiplexing for ont_sample in ont_pooling_input.samples: @@ -224,12 +226,12 @@ def get_pool_sample_label_mapping(pool: Artifact) -> dict[str, str]: cursor.execute(query.format(sample.name)) query_results = cursor.fetchall() - assert ( - len(query_results) != 0 - ), f"No reagent labels found for sample '{sample.name}'." - assert ( - len(query_results) == 1 - ), f"Multiple reagent labels found for sample '{sample.name}'." + assert len(query_results) != 0, ( + f"No reagent labels found for sample '{sample.name}'." + ) + assert len(query_results) == 1, ( + f"Multiple reagent labels found for sample '{sample.name}'." + ) label = query_results[0][0] sample2label[sample.name] = label @@ -253,7 +255,7 @@ def get_kit_string(process: Process) -> str: expansion_kit = process.udf.get("ONT expansion kit") if expansion_kit != "None": - prep_kit += f" {expansion_kit.replace('.','-')}" + prep_kit += f" {expansion_kit.replace('.', '-')}" return prep_kit @@ -359,9 +361,9 @@ def generate_MinKNOW_samplesheet(args): ) # Assert flowcell type is written in a valid format - assert ( - process.udf["ONT flow cell type"] in valid_flowcell_type_strings - ), f"Invalid flow cell type {process.udf['ONT flow cell type']}." + assert process.udf["ONT flow cell type"] in valid_flowcell_type_strings, ( + f"Invalid flow cell type {process.udf['ONT flow cell type']}." + ) # Parse flowcell product code flowcell_product_code = process.udf["ONT flow cell type"].split(" ", 1)[0] @@ -384,20 +386,22 @@ def generate_MinKNOW_samplesheet(args): # Assert position makes sense with the flowcell type if "PromethION" in row["flow_cell_type"]: - assert ( - row["position_id"] != "None" - ), "Positions must be specified for PromethION flow cells." + assert row["position_id"] != "None", ( + "Positions must be specified for PromethION flow cells." + ) else: - assert ( - row["position_id"] == "None" - ), "Positions must be unassigned for non-PromethION flow cells." + assert row["position_id"] == "None", ( + "Positions must be unassigned for non-PromethION flow cells." + ) # 1) Barcodes implied from kit selection if process.udf.get("ONT expansion kit") != "None" or process.udf.get( "ONT prep kit" ) in ["SQK-PCB114-24"]: # Assert barcodes are found within library - assert ont_barcodes, f"ONT barcodes are implied from kit selection, but no ONT barcodes were found within library {ont_library.name}" + assert ont_barcodes, ( + f"ONT barcodes are implied from kit selection, but no ONT barcodes were found within library {ont_library.name}" + ) # Append rows for each barcode alias_column_name = "illumina_pool_name" if qc else "sample_name" @@ -414,9 +418,9 @@ def generate_MinKNOW_samplesheet(args): ONT_BARCODE_LABEL_PATTERN, barcode_row_data["ont_barcode"], ) - assert ( - barcode_label_match - ), f"Could not parse barcode '{barcode_row_data['ont_barcode']}'." + assert barcode_label_match, ( + f"Could not parse barcode '{barcode_row_data['ont_barcode']}'." + ) barcode_id = barcode_label_match.group(2) row["barcode"] = f"barcode{barcode_id}" @@ -428,7 +432,9 @@ def generate_MinKNOW_samplesheet(args): # 2) No barcodes implied from kit selection else: # Assert barcodes are not found within library - assert not ont_barcodes, f"Library '{ont_library.name}' appears to contain ONT barcodes, but no ONT barcodes are implied from the kit selection." + assert not ont_barcodes, ( + f"Library '{ont_library.name}' appears to contain ONT barcodes, but no ONT barcodes are implied from the kit selection." + ) # Append single row rows.append(row) @@ -450,16 +456,16 @@ def generate_MinKNOW_samplesheet(args): assert all( ["PromethION" in fc_type for fc_type in df.flow_cell_type.unique()] ), "Only PromethION flowcells can be grouped together in the same sample sheet." - assert ( - len(ont_libraries) <= 24 - ), "Only up to 24 PromethION flowcells may be started at once." + assert len(ont_libraries) <= 24, ( + "Only up to 24 PromethION flowcells may be started at once." + ) elif len(ont_libraries) == 1 and "MinION" in df.flow_cell_type[0]: - assert ( - df.position_id[0] == "None" - ), "MinION flow cells should not have a position assigned." - assert ( - len(df.flow_cell_product_code.unique()) == len(df.kit.unique()) == 1 - ), "All rows must have the same flow cell type and kits" + assert df.position_id[0] == "None", ( + "MinION flow cells should not have a position assigned." + ) + assert len(df.flow_cell_product_code.unique()) == len(df.kit.unique()) == 1, ( + "All rows must have the same flow cell type and kits" + ) assert ( len(df.position_id.unique()) == len(df.flow_cell_id.unique()) @@ -467,7 +473,7 @@ def generate_MinKNOW_samplesheet(args): ), "All rows must have different flow cell positions and IDs" # Generate samplesheet - file_name = f"MinKNOW_samplesheet_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv" + file_name = f"MinKNOW_samplesheet_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ', '')}.csv" write_minknow_csv(df, file_name) return file_name diff --git a/scripts/index_distance_checker.py b/scripts/index_distance_checker.py index 9572d899..9d9087d0 100644 --- a/scripts/index_distance_checker.py +++ b/scripts/index_distance_checker.py @@ -486,9 +486,7 @@ def main(lims, pid, auto): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) parser.add_argument( "--auto", diff --git a/scripts/logbook.py b/scripts/logbook.py index 078bee81..5ffb935a 100644 --- a/scripts/logbook.py +++ b/scripts/logbook.py @@ -67,7 +67,7 @@ def get_credentials(): def write_record(content, dest_file): credentials = get_credentials() http = credentials.authorize(httplib2.Http()) - discoveryUrl = "https://sheets.googleapis.com/$discovery/rest?" "version=v4" + discoveryUrl = "https://sheets.googleapis.com/$discovery/rest?version=v4" service = discovery.build( "sheets", "v4", http=http, discoveryServiceUrl=discoveryUrl ) @@ -172,9 +172,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() lims = Lims(BASEURI, USERNAME, PASSWORD) diff --git a/scripts/logbook_service_account.py b/scripts/logbook_service_account.py index bcfe308b..9eaeae8b 100644 --- a/scripts/logbook_service_account.py +++ b/scripts/logbook_service_account.py @@ -154,9 +154,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() lims = Lims(BASEURI, USERNAME, PASSWORD) diff --git a/scripts/molar_concentration.py b/scripts/molar_concentration.py index 57af5238..e413b74b 100644 --- a/scripts/molar_concentration.py +++ b/scripts/molar_concentration.py @@ -118,9 +118,7 @@ def main(lims, args, epp_logger): parser.add_argument( "--log", default=sys.stdout, - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) parser.add_argument( "--aggregate", diff --git a/scripts/multiply_with_dilution_fold.py b/scripts/multiply_with_dilution_fold.py index 725fb0ee..c593daa7 100644 --- a/scripts/multiply_with_dilution_fold.py +++ b/scripts/multiply_with_dilution_fold.py @@ -66,15 +66,13 @@ def main(lims, pid, aggregate, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) parser.add_argument( "--aggregate", dest="aggregate", action="store_true", - help=("Use this tag if current Process is an " "aggregate QC step"), + help=("Use this tag if current Process is an aggregate QC step"), ) args = parser.parse_args() diff --git a/scripts/ont_calc_volumes.py b/scripts/ont_calc_volumes.py index 312dcbb1..7e4e069d 100644 --- a/scripts/ont_calc_volumes.py +++ b/scripts/ont_calc_volumes.py @@ -55,9 +55,9 @@ def main(lims, args): # Get current stats vol = udf_tools.fetch(art_in, "Volume (ul)") - log.append(f"'Volume (ul)': {round(vol,2)}") + log.append(f"'Volume (ul)': {round(vol, 2)}") conc = udf_tools.fetch(art_in, "Concentration") - log.append(f"'Concentration': {round(conc,2)}") + log.append(f"'Concentration': {round(conc, 2)}") conc_units = udf_tools.fetch(art_in, "Conc. Units") log.append(f"'Conc. Units': {conc_units}") assert conc_units.lower() in [ @@ -68,7 +68,7 @@ def main(lims, args): # Calculate volume to take, based on supplied info if udf_tools.is_filled(art_out, "ONT flow cell loading amount (fmol)"): log.append( - f"Basing calculations on 'ONT flow cell loading amount (fmol)': {round(udf_tools.fetch(art_out, 'ONT flow cell loading amount (fmol)'),2)}" + f"Basing calculations on 'ONT flow cell loading amount (fmol)': {round(udf_tools.fetch(art_out, 'ONT flow cell loading amount (fmol)'), 2)}" ) if conc_units.lower() == "nm": vol_to_take = min( @@ -88,7 +88,7 @@ def main(lims, args): ) elif udf_tools.is_filled(art_out, "Amount (fmol)"): log.append( - f"Basing calculations on 'Amount (fmol): {round(udf_tools.fetch(art_out, 'Amount (fmol)'),2)}'" + f"Basing calculations on 'Amount (fmol): {round(udf_tools.fetch(art_out, 'Amount (fmol)'), 2)}'" ) if conc_units.lower() == "nm": vol_to_take = min(udf_tools.fetch(art_out, "Amount (fmol)") / conc, vol) @@ -103,7 +103,7 @@ def main(lims, args): ) elif udf_tools.is_filled(art_out, "Amount (ng)"): log.append( - f"Basing calculations on 'Amount (ng)': {round(udf_tools.fetch(art_out, 'Amount (ng)'),2)}" + f"Basing calculations on 'Amount (ng)': {round(udf_tools.fetch(art_out, 'Amount (ng)'), 2)}" ) if conc_units.lower() == "ng/ul": vol_to_take = min(udf_tools.fetch(art_out, "Amount (ng)") / conc, vol) @@ -116,7 +116,7 @@ def main(lims, args): ) elif udf_tools.is_filled(art_out, "Volume to take (uL)"): log.append( - f"Basing calculations on 'Volume to take (uL)': {round(udf_tools.fetch(art_out, 'Volume to take (uL)'),2)}" + f"Basing calculations on 'Volume to take (uL)': {round(udf_tools.fetch(art_out, 'Volume to take (uL)'), 2)}" ) vol_to_take = min(udf_tools.fetch(art_out, "Volume to take (uL)"), vol) else: diff --git a/scripts/ont_pool.py b/scripts/ont_pool.py index 5835d31d..ed345a45 100644 --- a/scripts/ont_pool.py +++ b/scripts/ont_pool.py @@ -43,9 +43,9 @@ def main(lims, args): df = fetch_sample_data(currentStep, to_fetch) - assert all( - [i in ["ng/ul", "nM"] for i in df.conc_units] - ), "Some of the pool inputs have invalid concentration units." + assert all([i in ["ng/ul", "nM"] for i in df.conc_units]), ( + "Some of the pool inputs have invalid concentration units." + ) df["conc_nM"] = df.apply( lambda x: x @@ -71,7 +71,7 @@ def main(lims, args): pool_target_amt_fmol = pool.udf["Amount (fmol)"] pool_target_vol = None - log.append(f"Target amt: {round(pool_target_amt_fmol,1)} fmol") + log.append(f"Target amt: {round(pool_target_amt_fmol, 1)} fmol") target_amt_fmol = pool_target_amt_fmol / len(df_pool) @@ -88,7 +88,7 @@ def main(lims, args): pool_target_amt_fmol = None pool_target_vol = pool.udf["Final Volume (uL)"] - log.append(f"Target vol: {round(pool_target_vol,1)} uL") + log.append(f"Target vol: {round(pool_target_vol, 1)} uL") # Apply molar proportions to target volume to get transfer amounts df_pool["transfer_vol_ul"] = minimum( @@ -127,7 +127,7 @@ def main(lims, args): df_to_print = df_to_print.round(1) log.append(tabulate(df_to_print, headers="keys")) log.append(f"\nFinal amt: {round(pool_transfer_amt, 1)} fmol") - log.append(f"Final vol: {round(pool_transfer_vol,1)} uL") + log.append(f"Final vol: {round(pool_transfer_vol, 1)} uL") log.append("\n") # Write log diff --git a/scripts/ont_send_reloading_info_to_db.py b/scripts/ont_send_reloading_info_to_db.py index de8fc6d8..efd397e8 100644 --- a/scripts/ont_send_reloading_info_to_db.py +++ b/scripts/ont_send_reloading_info_to_db.py @@ -46,16 +46,16 @@ def send_reloading_info_to_db(process: Process): rows_matching_run: list[Row] = [ row for row in view.rows - if f'{run["run_name"]}' in row.value["TACA_run_path"] + if f"{run['run_name']}" in row.value["TACA_run_path"] ] try: - assert ( - len(rows_matching_run) > 0 - ), f"The database contains no document with run name '{run['run_name']}'. If the run was recently started, wait until it appears in GenStat." - assert ( - len(rows_matching_run) == 1 - ), f"The database contains multiple documents with run name '{run['run_name']}'. Contact a database administrator." + assert len(rows_matching_run) > 0, ( + f"The database contains no document with run name '{run['run_name']}'. If the run was recently started, wait until it appears in GenStat." + ) + assert len(rows_matching_run) == 1, ( + f"The database contains multiple documents with run name '{run['run_name']}'. Contact a database administrator." + ) doc_id: str = rows_matching_run[0].id doc: Document = db[doc_id] @@ -120,18 +120,20 @@ def parse_run(art: Artifact) -> dict | None: and len(fc["reload_times"]) == len(fc["reload_fmols"]) == len(fc["reload_lots"]) - ), "All reload UDFs within a row must have the same number of comma-separated values" + ), ( + "All reload UDFs within a row must have the same number of comma-separated values" + ) - assert check_csv_udf_list( - r"^\d{1,3}:\d{2}$", fc["reload_times"] - ), "Reload run times must be formatted as comma-separated h:mm" + assert check_csv_udf_list(r"^\d{1,3}:\d{2}$", fc["reload_times"]), ( + "Reload run times must be formatted as comma-separated h:mm" + ) check_times_list(fc["reload_times"]) - assert check_csv_udf_list( - r"^[0-9.]+$", fc["reload_fmols"] - ), "Invalid flow cell reload amount(s)" - assert check_csv_udf_list( - r"^[0-9a-zA-Z.-_]+$", fc["reload_lots"] - ), "Invalid Reload wash kit" + assert check_csv_udf_list(r"^[0-9.]+$", fc["reload_fmols"]), ( + "Invalid flow cell reload amount(s)" + ) + assert check_csv_udf_list(r"^[0-9a-zA-Z.-_]+$", fc["reload_lots"]), ( + "Invalid Reload wash kit" + ) return fc @@ -145,12 +147,12 @@ def check_times_list(times_list: list[str]): for time in times_list: _hours, _minutes = time.split(":") hours, minutes = int(_hours), int(_minutes) - assert hours > prev_hours or ( - hours == prev_hours and minutes > prev_minutes - ), f"Times in field {times_list} are non-sequential." - assert ( - minutes < 60 - ), f"Field {times_list} contains invalid entries (minutes >= 60)." + assert hours > prev_hours or (hours == prev_hours and minutes > prev_minutes), ( + f"Times in field {times_list} are non-sequential." + ) + assert minutes < 60, ( + f"Field {times_list} contains invalid entries (minutes >= 60)." + ) prev_hours, prev_minutes = hours, minutes diff --git a/scripts/ont_suggest_ports.py b/scripts/ont_suggest_ports.py index 509e5e55..c7f7d05b 100644 --- a/scripts/ont_suggest_ports.py +++ b/scripts/ont_suggest_ports.py @@ -49,9 +49,9 @@ def main(lims, args): for output in outputs: try: if output.udf["ONT flow cell position"] != "None": - assert ( - output.udf["ONT flow cell position"] in ports.keys() - ), f'{output.udf["ONT flow cell position"]} is not a valid position' + assert output.udf["ONT flow cell position"] in ports.keys(), ( + f"{output.udf['ONT flow cell position']} is not a valid position" + ) ports_used.append(output.udf["ONT flow cell position"]) except KeyError: continue @@ -79,7 +79,7 @@ def main(lims, args): ports_used.append(port_tuple[0]) # Print ports to stdout, starting with the least used - message = f'Listing ports, from least to most used, down-prioritizing column 3: {", ".join([port[0] for port in ports_list])}' + message = f"Listing ports, from least to most used, down-prioritizing column 3: {', '.join([port[0] for port in ports_list])}" sys.stdout.write(message) except AssertionError as e: diff --git a/scripts/ont_sync_to_db.py b/scripts/ont_sync_to_db.py index e51a0c27..29be41ab 100644 --- a/scripts/ont_sync_to_db.py +++ b/scripts/ont_sync_to_db.py @@ -179,9 +179,9 @@ def sync_runs_to_db(process: Process, args: Namespace, lims: Lims): # Assert that only one input is provided for QC runs if "QC" in process.type.name: - assert ( - len(arts) == 1 - ), "When starting QC sequencing runs, only one input is allowed." + assert len(arts) == 1, ( + "When starting QC sequencing runs, only one input is allowed." + ) # Keep track of which artifacts were successfully updated arts_successful = [] diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py index 52c4318e..12b7ecad 100644 --- a/scripts/parse_anglerfish_results.py +++ b/scripts/parse_anglerfish_results.py @@ -48,9 +48,9 @@ def find_latest_anglerfish_run(run_path: str) -> str: logging.info(f"Looking for Anglerfish runs with query {anglerfish_query}") anglerfish_glob = glob.glob(anglerfish_query, recursive=True) - assert ( - len(anglerfish_glob) != 0 - ), f"No Anglerfish runs found for query {anglerfish_query}" + assert len(anglerfish_glob) != 0, ( + f"No Anglerfish runs found for query {anglerfish_query}" + ) if len(anglerfish_glob) > 1: runs_list = "\n".join(anglerfish_glob) @@ -157,10 +157,11 @@ def fill_udfs(process: Process, df: pd.DataFrame): measurements.append(op) measurements.sort(key=lambda x: x.name) - assert ( - len(measurements) - == len(df[df["sample_name"].isin([m.name for m in measurements])]) - ), "Number of samples demultiplexed in LIMS does not correspond to number of sample rows in Anglerfish results." + assert len(measurements) == len( + df[df["sample_name"].isin([m.name for m in measurements])] + ), ( + "Number of samples demultiplexed in LIMS does not correspond to number of sample rows in Anglerfish results." + ) # Relate UDF names to dataframe column names udf2col = { diff --git a/scripts/parse_caliper_results.py b/scripts/parse_caliper_results.py index 509a5bdb..fa0db0ad 100644 --- a/scripts/parse_caliper_results.py +++ b/scripts/parse_caliper_results.py @@ -157,9 +157,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() diff --git a/scripts/parse_vc100_results.py b/scripts/parse_vc100_results.py index cc84215e..ad0c8435 100644 --- a/scripts/parse_vc100_results.py +++ b/scripts/parse_vc100_results.py @@ -106,9 +106,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() diff --git a/scripts/qc_amount_calculation.py b/scripts/qc_amount_calculation.py index 2e258259..ffb2b468 100644 --- a/scripts/qc_amount_calculation.py +++ b/scripts/qc_amount_calculation.py @@ -181,7 +181,7 @@ def main(lims, args, epp_logger): parser.add_argument( "--aggregate", action="store_true", - help=("Use this tag if current Process is an " "aggregate QC step"), + help=("Use this tag if current Process is an aggregate QC step"), ) args = parser.parse_args() diff --git a/scripts/quality_filter.py b/scripts/quality_filter.py index 72a205e0..57a6ef24 100644 --- a/scripts/quality_filter.py +++ b/scripts/quality_filter.py @@ -62,7 +62,7 @@ def get_and_set_yield_and_Q30(self): self._set_udfs(samp_name, target_file, lane) if self.nr_samps_updat: self.abstract_ext.append( - f"LANE {lane} with {str(len(set(self.nr_samps_updat)))} samples." "" + f"LANE {lane} with {str(len(set(self.nr_samps_updat)))} samples." ) if self.missing_samps: self.abstract_ext.append( @@ -124,9 +124,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() diff --git a/scripts/quantit_generate_driver_file.py b/scripts/quantit_generate_driver_file.py index a68a2170..9d99e5dd 100644 --- a/scripts/quantit_generate_driver_file.py +++ b/scripts/quantit_generate_driver_file.py @@ -72,9 +72,7 @@ def main(lims, pid, drivf, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) parser.add_argument( "--drivf", diff --git a/scripts/quantit_set_QC.py b/scripts/quantit_set_QC.py index abc3f09a..7140a6aa 100644 --- a/scripts/quantit_set_QC.py +++ b/scripts/quantit_set_QC.py @@ -137,7 +137,7 @@ def main(lims, pid, epp_logger): QiT.assign_QC_flag() if QiT.flour_int_missing: QiT.abstract.append( - f"Fluorescence intensity is missing for {QiT.flour_int_missing} " "samples." + f"Fluorescence intensity is missing for {QiT.flour_int_missing} samples." ) if QiT.missing_udfs: QiT.abstract.append( @@ -146,12 +146,11 @@ def main(lims, pid, epp_logger): ) else: QiT.abstract.append( - f"{QiT.no_failed} out of {len(process.result_files())} samples failed " - "QC." + f"{QiT.no_failed} out of {len(process.result_files())} samples failed QC." ) if QiT.saturated: QiT.abstract.append( - f"{QiT.saturated} samples had saturated fluorescence " "intensity." + f"{QiT.saturated} samples had saturated fluorescence intensity." ) if QiT.hig_CV_fract: QiT.abstract.append(f"{QiT.hig_CV_fract} samples had high %CV.") @@ -159,7 +158,7 @@ def main(lims, pid, epp_logger): QiT.abstract.append(f"{QiT.low_conc} samples had low concentration.") if QiT.conc_missing: QiT.abstract.append( - f"Concentration is missing for {QiT.conc_missing} " "sample(s)." + f"Concentration is missing for {QiT.conc_missing} sample(s)." ) QiT.abstract = list(set(QiT.abstract)) print(" ".join(QiT.abstract), file=sys.stderr) @@ -173,9 +172,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() diff --git a/scripts/quantit_set_conc.py b/scripts/quantit_set_conc.py index 225e7069..c4581a78 100644 --- a/scripts/quantit_set_conc.py +++ b/scripts/quantit_set_conc.py @@ -219,13 +219,13 @@ def main(lims, pid, epp_logger): rel_fluor_int = QiT.get_and_set_fluor_int(target_file) QiT.calc_and_set_conc(target_file, rel_fluor_int) QiT.abstract.append( - f"Concentrations uploaded for {QiT.no_samps} " "samples." + f"Concentrations uploaded for {QiT.no_samps} samples." ) else: QiT.abstract.append("Upload input file(s) for samples.") else: QiT.abstract.insert( - 0, f"R2 = {R2}. Problem with standards! Redo " "measurement!" + 0, f"R2 = {R2}. Problem with standards! Redo measurement!" ) else: QiT.missing_udfs.append("Linearity of standards") @@ -251,9 +251,7 @@ def main(lims, pid, epp_logger): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) args = parser.parse_args() diff --git a/scripts/set_App_QC.py b/scripts/set_App_QC.py index dc88e188..b31698c2 100644 --- a/scripts/set_App_QC.py +++ b/scripts/set_App_QC.py @@ -75,7 +75,7 @@ def logging(self): ) if self.missing_samps: self.abstract.append( - "The following samples are missing in " "App_QC_file: {}.".format( + "The following samples are missing in App_QC_file: {}.".format( ", ".join(self.missing_samps) ) ) @@ -99,9 +99,7 @@ def main(lims, pid, epp_logger, App_QC_file): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) parser.add_argument("--file", dest="file", help=("File path to new App_QC file")) args = parser.parse_args() diff --git a/scripts/undemultiplexed_index.py b/scripts/undemultiplexed_index.py index 82898307..82509d38 100644 --- a/scripts/undemultiplexed_index.py +++ b/scripts/undemultiplexed_index.py @@ -149,8 +149,7 @@ def _get_file_path(self, cont_name): path_id = cont_name try: self.file_path = glob.glob( - f"/srv/ngi-nas-ns/{data_folder}/*{path_id}/Unaligned/" - "Basecall_Stats_*/" + f"/srv/ngi-nas-ns/{data_folder}/*{path_id}/Unaligned/Basecall_Stats_*/" )[0] except: sys.exit("Failed to get file path") @@ -587,9 +586,7 @@ def main(lims, pid, epp_logger, demuxfile, qc_log_file): parser.add_argument( "--log", dest="log", - help=( - "File name for standard log file, " "for runtime information and problems." - ), + help=("File name for standard log file, for runtime information and problems."), ) parser.add_argument( "--file", diff --git a/scripts/zebra_barcodes.py b/scripts/zebra_barcodes.py index 5ada59f5..ea8d3781 100644 --- a/scripts/zebra_barcodes.py +++ b/scripts/zebra_barcodes.py @@ -127,24 +127,23 @@ def getArgs(): "--container_id", action="store_true", help=( - "Print output container id label in both " - "barcode format and human readable." + "Print output container id label in both barcode format and human readable." ), ) parser.add_argument( "--operator_and_date", action="store_true", - help=("Print label with both operator " "and todays date."), + help=("Print label with both operator and todays date."), ) parser.add_argument( "--container_name", action="store_true", - help=("Print label with human readable" "container name (user defined)"), + help=("Print label with human readablecontainer name (user defined)"), ) parser.add_argument( "--process_name", action="store_true", - help=("Print label with human readable" "process name"), + help=("Print label with human readableprocess name"), ) parser.add_argument( "--copies", @@ -162,7 +161,7 @@ def getArgs(): parser.add_argument( "--use_printer", action="store_true", - help=("Print file on default or " "supplied printer using lp command."), + help=("Print file on default or supplied printer using lp command."), ) parser.add_argument("--hostname", help="Hostname for lp CUPS server.") parser.add_argument("--destination", help="Name of printer.") From 542daeed27f6fda487a51fa87ad353d969da2adf Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Tue, 21 Jan 2025 11:12:31 +0100 Subject: [PATCH 33/35] improve logging --- scilifelab_epps/calc_from_args/udf_arg_methods.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scilifelab_epps/calc_from_args/udf_arg_methods.py b/scilifelab_epps/calc_from_args/udf_arg_methods.py index 77ef5236..3321f3b1 100644 --- a/scilifelab_epps/calc_from_args/udf_arg_methods.py +++ b/scilifelab_epps/calc_from_args/udf_arg_methods.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +import logging from typing import Any from genologics.entities import Artifact, Process @@ -67,6 +68,13 @@ def fetch_from_arg( else: return on_fail + log_str = ( + f"Fetched UDF '{arg_dict['udf']}': {value}" + + f"{' (recursive)' if arg_dict['recursive'] else ''}" + + f" from {arg_dict['source']} '{source_name}'." + ) + logging.info(log_str) + return value From 2d2b9121e7a51ca30a2373bb44ffe80501b9752d Mon Sep 17 00:00:00 2001 From: kedhammar <alfred.kedhammar@scilifelab.se> Date: Wed, 22 Jan 2025 13:01:30 +0100 Subject: [PATCH 34/35] init --- VERSIONLOG.md | 4 ++++ scripts/generate_aviti_run_manifest.py | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 4d9265b5..b4433856 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20250122.1 + +Create yearly dir for AVITI run manifests. + ## 20250116.1 Ruff 0.9.2 formatting. diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 3226269c..1bc0b968 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -485,9 +485,13 @@ def main(args: Namespace): # Move manifest(s) logging.info("Moving run manifest to ngi-nas-ns...") try: + dst = f"/srv/ngi-nas-ns/samplesheets/Aviti/{dt.now().year}" + if not os.path.exists(dst): + logging.info(f"Happy new year! Creating {dst}") + os.mkdir(dst) shutil.copyfile( zip_file, - f"/srv/ngi-nas-ns/samplesheets/Aviti/{dt.now().year}/{zip_file}", + f"{dst}/{zip_file}", ) os.remove(zip_file) except: From 5b145b32463173553a49e015f80722b8b3623317 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Wed, 22 Jan 2025 15:16:17 +0100 Subject: [PATCH 35/35] Fix date in vlog --- VERSIONLOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index c3464406..0243a06b 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,6 +1,6 @@ # Scilifelab_epps Version Log -## 20241222.2 +## 20250122.2 Rebuild EPP to fetch last recorded derived sample UDF.