Skip to content

Commit

Permalink
Merge pull request #30 from phac-nml/dev
Browse files Browse the repository at this point in the history
Update to version 0.3.0
  • Loading branch information
kylacochrane authored Sep 10, 2024
2 parents 50949d1 + 0c35862 commit 9bbd787
Show file tree
Hide file tree
Showing 19 changed files with 226 additions and 63 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.3.0] - 2024-09-10

### Changed

- Upgraded `profile_dist` container to version `1.0.2`
- Upgraded `locidex/merge` to version `0.2.3` and updated `input_assure` and test data for compatibility with the new `mlst.json` allele file format.
- [PR28](https://github.com/phac-nml/gasclustering/pull/28)
- Aligned container registry handling in configuration files and modules with `phac-nml/pipeline-standards`
- [PR28](https://github.com/phac-nml/gasclustering/pull/28)

This pipeline is now compatible only with output generated by [Locidex v0.2.3+](https://github.com/phac-nml/locidex) and [Mikrokondo v0.4.0+](https://github.com/phac-nml/mikrokondo/releases/tag/v0.4.0).

## [0.2.0] - 2024-06-26

### Added
Expand All @@ -28,3 +40,4 @@ Initial release of the Genomic Address Service Clustering pipeline to be used fo

[0.1.0]: https://github.com/phac-nml/gasclustering/releases/tag/0.1.0
[0.2.0]: https://github.com/phac-nml/gasclustering/releases/tag/0.2.0
[0.3.0]: https://github.com/phac-nml/gasclustering/releases/tag/0.3.0
27 changes: 16 additions & 11 deletions bin/input_assure.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,43 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f
with open_file(json_file, "rt") as f:
json_data = json.load(f)

# Extract the profile from the json_data
profile = json_data.get("data", {}).get("profile", {})
# Check for multiple keys in the JSON file and define error message
keys = sorted(profile.keys())
original_key = keys[0] if keys else None

# Define a variable to store the match_status (True or False)
match_status = sample_id in json_data
match_status = sample_id in profile

# Initialize the error message
error_message = None

# Check for multiple keys in the JSON file and define error message
keys = list(json_data.keys())
original_key = keys[0] if keys else None

if len(keys) == 0:
error_message = f"{json_file} is completely empty!"
if not keys:
error_message = (
f"{json_file} is missing the 'profile' section or is completely empty!"
)
print(error_message)
sys.exit(1)
elif len(keys) > 1:
# Check if sample_id matches any key
if not match_status:
error_message = f"No key in the MLST JSON file ({json_file}) matches the specified sample ID '{sample_id}'. The first key '{original_key}' has been forcefully changed to '{sample_id}' and all other keys have been removed."
# Retain only the specified sample ID
json_data = {sample_id: json_data.pop(original_key)}
json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
else:
error_message = f"MLST JSON file ({json_file}) contains multiple keys: {keys}. The MLST JSON file has been modified to retain only the '{sample_id}' entry"
# Remove all keys expect the one matching sample_id
json_data = {sample_id: json_data[sample_id]}
# Retain only the specified sample_id in the profile
json_data["data"]["profile"] = {sample_id: profile[sample_id]}
elif not match_status:
# Define error message based on meta.address (query or reference)
if address == "null":
error_message = f"Query {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
else:
error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
# Update the JSON file with the new sample ID
json_data[sample_id] = json_data.pop(original_key)
json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
json_data["data"]["sample_name"] = sample_id

# Write file containing relevant error messages
if error_message:
Expand Down
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = '1.h'

// Input data
input = 'https://raw.githubusercontent.com/phac-nml/gasclustering/dev/assets/samplesheet.csv'
input = "${projectDir}/assets/samplesheet.csv"
}


Expand Down
4 changes: 2 additions & 2 deletions modules/local/arborview.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ process ARBOR_VIEW {
stageInMode 'copy' // Need to copy in arbor view html

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
"docker.io/python:3.11.6" :
"docker.io/python:3.11.6" }"
'https://depot.galaxyproject.org/singularity/python%3A3.12' :
'biocontainers/python:3.12' }"

input:
tuple path(tree), path(contextual_data)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/gas/mcluster/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ process GAS_MCLUSTER{

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' :
'quay.io/biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"
'biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"

input:
path(dist_matrix)
Expand Down
5 changes: 3 additions & 2 deletions modules/local/locidex/merge/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ process LOCIDEX_MERGE {
label 'process_medium'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/locidex:0.1.1--pyhdfd78af_0' :
'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }"
"docker.io/mwells14/locidex:0.2.3" :
task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/locidex:0.2.3' :
'mwells14/locidex:0.2.3' }"

input:
path input_values // [file(sample1), file(sample2), file(sample3), etc...]
Expand Down
5 changes: 3 additions & 2 deletions modules/local/profile_dists/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ process PROFILE_DISTS{
tag "Pairwise Distance Generation"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker.io/mwells14/gsp:arborator_1.0.0' :
'docker.io/mwells14/gsp:arborator_1.0.0' }"
'docker.io/mwells14/profile_dists:1.0.2' :
task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/profile_dists:1.0.2' :
'mwells14/profile_dists:1.0.2' }"

input:
path query
Expand Down
5 changes: 4 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ docker.registry = 'quay.io'
podman.registry = 'quay.io'
singularity.registry = 'quay.io'

// Override the default Docker registry when required
process.ext.override_configured_container_registry = true

// Nextflow plugins
plugins {
id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
Expand Down Expand Up @@ -226,7 +229,7 @@ manifest {
description = """IRIDA Next Genomic Address Service Clustering Pipeline"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '0.2.0'
version = '0.3.0'
doi = ''
defaultBranch = 'main'
}
Expand Down
22 changes: 18 additions & 4 deletions tests/data/reports/case-hamming/sample1.mlst.subtyping.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample1",
"profile": {
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/case-hamming/sample2.mlst.subtyping.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample2": {
"l1": "1",
"l2": "2",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample2",
"profile": {
"sample2": {
"l1": "1",
"l2": "2",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/case-hamming/sample3.mlst.subtyping.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample3": {
"l1": "2",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample3",
"profile": {
"sample3": {
"l1": "2",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample1": {
"l1": "b026324c6904b2a9cb4b88d6d61c81d1",
"l2": "b026324c6904b2a9cb4b88d6d61c81d1",
"l3": "b026324c6904b2a9cb4b88d6d61c81d1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample1",
"profile": {
"sample1": {
"l1": "b026324c6904b2a9cb4b88d6d61c81d1",
"l2": "b026324c6904b2a9cb4b88d6d61c81d1",
"l3": "b026324c6904b2a9cb4b88d6d61c81d1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample2": {
"l1": "-",
"l2": "26ab0db90d72e28ad0ba1e22ee510510",
"l3": "b026324c6904b2a9cb4b88d6d61c81d1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample2",
"profile": {
"sample2": {
"l1": "-",
"l2": "26ab0db90d72e28ad0ba1e22ee510510",
"l3": "b026324c6904b2a9cb4b88d6d61c81d1"
}
},
"seq_data": {}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample3": {
"l1": "-",
"l2": "-",
"l3": "26ab0db90d72e28ad0ba1e22ee510510"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample3",
"profile": {
"sample3": {
"l1": "-",
"l2": "-",
"l3": "26ab0db90d72e28ad0ba1e22ee510510"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample3": {
"l1": "-",
"l2": "b026324c6904b2a9cb4b88d6d61c81d1",
"l3": "26ab0db90d72e28ad0ba1e22ee510510"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample3",
"profile": {
"sample3": {
"l1": "-",
"l2": "b026324c6904b2a9cb4b88d6d61c81d1",
"l3": "26ab0db90d72e28ad0ba1e22ee510510"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample1.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample1",
"profile": {
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample2.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample2": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample2",
"profile": {
"sample2": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
Loading

0 comments on commit 9bbd787

Please sign in to comment.