Merge pull request #30 from phac-nml/dev

Update to version 0.3.0
phac-nml · Sep 10, 2024 · 9bbd787 · 9bbd787
2 parents 50949d1 + 0c35862
commit 9bbd787
Show file tree

Hide file tree

Showing 19 changed files with 226 additions and 63 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,18 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.0] - 2024-09-10
+
+### Changed
+
+- Upgraded `profile_dist` container to version `1.0.2`
+- Upgraded `locidex/merge` to version `0.2.3` and updated `input_assure` and test data for compatibility with the new `mlst.json` allele file format.
+  - [PR28](https://github.com/phac-nml/gasclustering/pull/28)
+- Aligned container registry handling in configuration files and modules with `phac-nml/pipeline-standards`
+  - [PR28](https://github.com/phac-nml/gasclustering/pull/28)
+
+This pipeline is now compatible only with output generated by [Locidex v0.2.3+](https://github.com/phac-nml/locidex) and [Mikrokondo v0.4.0+](https://github.com/phac-nml/mikrokondo/releases/tag/v0.4.0).
+
 ## [0.2.0] - 2024-06-26
 
 ### Added
@@ -28,3 +40,4 @@ Initial release of the Genomic Address Service Clustering pipeline to be used fo
 
 [0.1.0]: https://github.com/phac-nml/gasclustering/releases/tag/0.1.0
 [0.2.0]: https://github.com/phac-nml/gasclustering/releases/tag/0.2.0
+[0.3.0]: https://github.com/phac-nml/gasclustering/releases/tag/0.3.0
diff --git a/bin/input_assure.py b/bin/input_assure.py
@@ -19,38 +19,43 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f
     with open_file(json_file, "rt") as f:
         json_data = json.load(f)
 
+    # Extract the profile from the json_data
+    profile = json_data.get("data", {}).get("profile", {})
+    # Check for multiple keys in the JSON file and define error message
+    keys = sorted(profile.keys())
+    original_key = keys[0] if keys else None
+
     # Define a variable to store the match_status (True or False)
-    match_status = sample_id in json_data
+    match_status = sample_id in profile
 
     # Initialize the error message
     error_message = None
 
-    # Check for multiple keys in the JSON file and define error message
-    keys = list(json_data.keys())
-    original_key = keys[0] if keys else None
-
-    if len(keys) == 0:
-        error_message = f"{json_file} is completely empty!"
+    if not keys:
+        error_message = (
+            f"{json_file} is missing the 'profile' section or is completely empty!"
+        )
         print(error_message)
         sys.exit(1)
     elif len(keys) > 1:
         # Check if sample_id matches any key
         if not match_status:
             error_message = f"No key in the MLST JSON file ({json_file}) matches the specified sample ID '{sample_id}'. The first key '{original_key}' has been forcefully changed to '{sample_id}' and all other keys have been removed."
             # Retain only the specified sample ID
-            json_data = {sample_id: json_data.pop(original_key)}
+            json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
         else:
             error_message = f"MLST JSON file ({json_file}) contains multiple keys: {keys}. The MLST JSON file has been modified to retain only the '{sample_id}' entry"
-            # Remove all keys expect the one matching sample_id
-            json_data = {sample_id: json_data[sample_id]}
+            # Retain only the specified sample_id in the profile
+            json_data["data"]["profile"] = {sample_id: profile[sample_id]}
     elif not match_status:
         # Define error message based on meta.address (query or reference)
         if address == "null":
             error_message = f"Query {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
         else:
             error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
         # Update the JSON file with the new sample ID
-        json_data[sample_id] = json_data.pop(original_key)
+        json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
+        json_data["data"]["sample_name"] = sample_id
 
     # Write file containing relevant error messages
     if error_message:

diff --git a/conf/test.config b/conf/test.config
@@ -20,7 +20,7 @@ params {
     max_time   = '1.h'
 
     // Input data
-    input  = 'https://raw.githubusercontent.com/phac-nml/gasclustering/dev/assets/samplesheet.csv'
+    input  = "${projectDir}/assets/samplesheet.csv"
 }
 
 

diff --git a/modules/local/arborview.nf b/modules/local/arborview.nf
@@ -9,8 +9,8 @@ process ARBOR_VIEW {
     stageInMode 'copy' // Need to copy in arbor view html
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    "docker.io/python:3.11.6" :
-    "docker.io/python:3.11.6" }"
+    'https://depot.galaxyproject.org/singularity/python%3A3.12' :
+    'biocontainers/python:3.12' }"
 
     input:
     tuple path(tree), path(contextual_data)

diff --git a/modules/local/gas/mcluster/main.nf b/modules/local/gas/mcluster/main.nf
@@ -6,7 +6,7 @@ process GAS_MCLUSTER{
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' :
-        'quay.io/biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"
+        'biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"
 
     input:
     path(dist_matrix)

diff --git a/modules/local/locidex/merge/main.nf b/modules/local/locidex/merge/main.nf
@@ -5,8 +5,9 @@ process LOCIDEX_MERGE {
     label 'process_medium'
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    'https://depot.galaxyproject.org/singularity/locidex:0.1.1--pyhdfd78af_0' :
-    'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }"
+    "docker.io/mwells14/locidex:0.2.3" :
+    task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/locidex:0.2.3' :
+    'mwells14/locidex:0.2.3' }"
 
     input:
     path input_values // [file(sample1), file(sample2), file(sample3), etc...]

diff --git a/modules/local/profile_dists/main.nf b/modules/local/profile_dists/main.nf
@@ -3,8 +3,9 @@ process PROFILE_DISTS{
     tag "Pairwise Distance Generation"
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker.io/mwells14/gsp:arborator_1.0.0' :
-        'docker.io/mwells14/gsp:arborator_1.0.0' }"
+        'docker.io/mwells14/profile_dists:1.0.2' :
+        task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/profile_dists:1.0.2' :
+        'mwells14/profile_dists:1.0.2' }"
 
     input:
     path query

diff --git a/nextflow.config b/nextflow.config
@@ -178,6 +178,9 @@ docker.registry      = 'quay.io'
 podman.registry      = 'quay.io'
 singularity.registry = 'quay.io'
 
+// Override the default Docker registry when required
+process.ext.override_configured_container_registry = true
+
 // Nextflow plugins
 plugins {
     id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
@@ -226,7 +229,7 @@ manifest {
     description     = """IRIDA Next Genomic Address Service Clustering Pipeline"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '0.2.0'
+    version         = '0.3.0'
     doi             = ''
     defaultBranch   = 'main'
 }

diff --git a/tests/data/reports/case-hamming/sample1.mlst.subtyping.json b/tests/data/reports/case-hamming/sample1.mlst.subtyping.json
@@ -1,7 +1,21 @@
 {
-    "sample1": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample1",
+        "profile": {
+            "sample1": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/case-hamming/sample2.mlst.subtyping.json b/tests/data/reports/case-hamming/sample2.mlst.subtyping.json
@@ -1,7 +1,21 @@
 {
-    "sample2": {
-        "l1": "1",
-        "l2": "2",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample2",
+        "profile": {
+            "sample2": {
+                "l1": "1",
+                "l2": "2",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/case-hamming/sample3.mlst.subtyping.json b/tests/data/reports/case-hamming/sample3.mlst.subtyping.json
@@ -1,7 +1,21 @@
 {
-    "sample3": {
-        "l1": "2",
-        "l2": "1",
-        "l3": "2"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample3",
+        "profile": {
+            "sample3": {
+                "l1": "2",
+                "l2": "1",
+                "l3": "2"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json b/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json
@@ -1,7 +1,21 @@
 {
-    "sample1": {
-        "l1": "b026324c6904b2a9cb4b88d6d61c81d1",
-        "l2": "b026324c6904b2a9cb4b88d6d61c81d1",
-        "l3": "b026324c6904b2a9cb4b88d6d61c81d1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample1",
+        "profile": {
+            "sample1": {
+                "l1": "b026324c6904b2a9cb4b88d6d61c81d1",
+                "l2": "b026324c6904b2a9cb4b88d6d61c81d1",
+                "l3": "b026324c6904b2a9cb4b88d6d61c81d1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json b/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json
@@ -1,7 +1,21 @@
 {
-    "sample2": {
-        "l1": "-",
-        "l2": "26ab0db90d72e28ad0ba1e22ee510510",
-        "l3": "b026324c6904b2a9cb4b88d6d61c81d1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample2",
+        "profile": {
+            "sample2": {
+                "l1": "-",
+                "l2": "26ab0db90d72e28ad0ba1e22ee510510",
+                "l3": "b026324c6904b2a9cb4b88d6d61c81d1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/case-hash-missing/sample3-more-missing.mlst.subtyping.json b/tests/data/reports/case-hash-missing/sample3-more-missing.mlst.subtyping.json
@@ -1,7 +1,21 @@
 {
-    "sample3": {
-        "l1": "-",
-        "l2": "-",
-        "l3": "26ab0db90d72e28ad0ba1e22ee510510"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample3",
+        "profile": {
+            "sample3": {
+                "l1": "-",
+                "l2": "-",
+                "l3": "26ab0db90d72e28ad0ba1e22ee510510"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json b/tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json
@@ -1,7 +1,21 @@
 {
-    "sample3": {
-        "l1": "-",
-        "l2": "b026324c6904b2a9cb4b88d6d61c81d1",
-        "l3": "26ab0db90d72e28ad0ba1e22ee510510"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample3",
+        "profile": {
+            "sample3": {
+                "l1": "-",
+                "l2": "b026324c6904b2a9cb4b88d6d61c81d1",
+                "l3": "26ab0db90d72e28ad0ba1e22ee510510"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample1.mlst.json b/tests/data/reports/sample1.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sample1": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample1",
+        "profile": {
+            "sample1": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }
diff --git a/tests/data/reports/sample2.mlst.json b/tests/data/reports/sample2.mlst.json
@@ -1,7 +1,21 @@
 {
-    "sample2": {
-        "l1": "1",
-        "l2": "1",
-        "l3": "1"
+    "db_info": {},
+    "parameters": {
+        "mode": "normal",
+        "min_match_ident": 100,
+        "min_match_cov": 100,
+        "max_ambiguous": 0,
+        "max_internal_stops": 0
+    },
+    "data": {
+        "sample_name": "sample2",
+        "profile": {
+            "sample2": {
+                "l1": "1",
+                "l2": "1",
+                "l3": "1"
+            }
+        },
+        "seq_data": {}
     }
 }