From 19d1d928f063fa268b86798e2627fcf84954601d Mon Sep 17 00:00:00 2001 From: Nicolai-vKuegelgen Date: Mon, 19 Aug 2024 13:06:05 +0200 Subject: [PATCH 1/8] feat: presets for cubi-tk sodar ingest-fastq (#232 ) --- cubi_tk/sodar/ingest_fastq.py | 104 +++++++++++++----- tests/test_sodar_ingest_fastq.py | 180 +++++++++++++++++++++++++++++-- 2 files changed, 249 insertions(+), 35 deletions(-) diff --git a/cubi_tk/sodar/ingest_fastq.py b/cubi_tk/sodar/ingest_fastq.py index 6f213f2a..a7081d64 100644 --- a/cubi_tk/sodar/ingest_fastq.py +++ b/cubi_tk/sodar/ingest_fastq.py @@ -30,17 +30,40 @@ formatter = logzero.LogFormatter(fmt="%(message)s") output_logger = logzero.setup_logger(formatter=formatter) -DEFAULT_SRC_REGEX = ( - r"(.*/)?(?P.+?)" - r"(?:_S[0-9]+)?" - r"(?:_(?PL[0-9]+?))?" - r"(?:_(?PR[0-9]+?))?" - r"(?:_(?P[0-9]+?))?" - r"\.f(?:ast)?q\.gz" -) - -#: Default value for --dest-pattern -DEFAULT_DEST_PATTERN = r"{collection_name}/raw_data/{date}/{filename}" +SRC_REGEX_PRESETS = { + "default": ( + r"(.*/)?(?P.+?)" + r"(?:_S[0-9]+)?" + r"(?:_(?PL[0-9]+?))?" + r"(?:_(?PR[0-9]+?))?" + r"(?:_(?P[0-9]+?))?" + r"\.f(?:ast)?q\.gz" + ), + "digestiflow": ( + r"(.*/)?(?P[A-Z0-9]{9,10}?)/" + r"(?PL[0-9]{3}?)/" + r"(?P.+?)_" + r"S[0-9]+_L[0-9]{3}_R[0-9]_[0-9]{3}" + r"\.fastq\.gz" + ), + "ONT": ( + r"(.*/)?" + r"[0-9]{8}_" # Date + # Sample could be __, but this is not given and may change between projects + r"(?P[a-zA-Z0-9_-]+?)/" + # RunID is _