Skip to content

Commit

Permalink
updated tests running average coverage calculations in cloud executors (
Browse files Browse the repository at this point in the history
#102)

* updated tests running average coverage calculations in cloud executors

* committing progress, difference in behaviour between nextflow versions is being observed

* fixed missing coverage value in version 24 of nextflow

* updated test ouputs

* removed docker.userEmulation as marked as deprecated

* renamed duplicated test name
  • Loading branch information
mattheww95 committed Aug 29, 2024
1 parent 010edd8 commit 3a3b982
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 17 deletions.
20 changes: 9 additions & 11 deletions modules/local/report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,13 @@ def generate_coverage_data(sample_data, bp_field, species){
&& species[species_data_pos].fixed_genome_size != null){

def length = species[species_data_pos].fixed_genome_size.toLong()
def cov = base_pairs / q_length
def cov = base_pairs / length
entry.value[params.coverage_calc_fields.fixed_cov] = cov.round(2)

}

}
}

return sample_data
}


Expand Down Expand Up @@ -439,9 +438,9 @@ def convert_type(type, val){
}

def recurse_keys(value, keys_rk){
// TODO add in generic return for if a value is not found
def temp = traverse_values(value, keys_rk.path)
def value_found = true

if(temp == null){
value_found = false;
}
Expand All @@ -455,21 +454,19 @@ def recurse_keys(value, keys_rk){
}

def traverse_values(value, path){

def temp = value
def value_found = true
def ret_val = null

if(path instanceof String){
temp = temp[path]
return temp
}

for(key in path){
def key_val
if(key.isNumber()){
key_val = key.toInteger()
}else{
key_val = key
def key_val = key
if(key_val.isNumber()){
key_val = key_val.toInteger()
}

if(temp.containsKey(key_val)){
Expand Down Expand Up @@ -674,7 +671,8 @@ def generate_qc_data(data, search_phrases, qc_species_tag){
for(k in data){
if(!k.value.meta.metagenomic){
def species = get_species(k.value[k.key][top_hit_tag], search_phrases, shortest_token)
generate_coverage_data(data[k.key], params.coverage_calc_fields.bp_field, species) // update coverage first so its values can be used in generating qc messages
// update coverage first so its values can be used in generating qc messages
generate_coverage_data(data[k.key], params.coverage_calc_fields.bp_field, species)
data[k.key][quality_analysis] = get_qc_data_species(k.value[k.key], species)
data[k.key][qc_species_tag] = species[species_tag_location]
}else{
Expand Down
13 changes: 9 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -885,9 +885,15 @@ params {
low_msg = "Base quality is poor, resequencing is recommended."
}
average_coverage {
//path = ["${params.quast.report_tag}", "0", "Avg. coverage depth"] // uncomment to use QUASTS coverage
//path = ["${params.coverage_calc_fields.auto_cov}"] // uncomment to use quasts base_pairs/genome length for coverage
path = ["${params.coverage_calc_fields.fixed_cov}"]
/*
* Example path below can be used for qaust coverage
path = [params.quast.report_tag, "0", "Avg. coverage depth"]
* Use the path below to use quasts base_pairs/genome length for coverage
path = [params.coverage_calc_fields.auto_cov]
*/

path = [params.coverage_calc_fields.fixed_cov]
coerce_type = 'Float'
compare_fields = ['min_average_coverage']
comp_type = 'ge'
Expand Down Expand Up @@ -967,7 +973,6 @@ profiles {
}
docker {
docker.enabled = true
docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
Expand Down
169 changes: 167 additions & 2 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,171 @@ nextflow_pipeline {

skip_allele_calling = true

QCReport {
fallthrough {
search = "No organism specific QC data available."
raw_average_quality = 30
min_n50 = null
max_n50 = null
min_nr_contigs = null
max_nr_contigs = null
fixed_genome_size = 1000
min_length = null
max_length = null
max_checkm_contamination = 3.0
min_average_coverage = 30
}
}


skip_bakta = true
skip_staramr = false
skip_mobrecon = false
skip_checkm = false
skip_raw_read_metrics = false
skip_polishing = false

max_memory = "2.GB"
max_cpus = 1
}
}

then {

assert workflow.success
assert path("$launchDir/results").exists()

// parse output json file
def json = path("$launchDir/results/FinalReports/Aggregated/Json/final_report.json").json

assert json.short.short.RawReadSummary.R1."total_bp".equals(118750)
assert json.short.short.RawReadSummary.R1."total_reads".equals(475)
assert json.short.short.RawReadSummary.R1."read_qual_mean".equals(40.0)
assert json.short.short.RawReadSummary.R1."mean_sequence_length".equals(250.0)

assert json.short.short.FastP.summary.sequencing.equals("paired end (250 cycles + 250 cycles)")
assert json.short.short.FastP.summary.before_filtering.total_reads.equals(950)
assert json.short.short.FastP.filtering_result.passed_filter_reads.equals(950)
assert json.short.short.FastP.filtering_result.low_quality_reads.equals(0)
assert json.short.short.FastP.insert_size.peak.equals(347)

//assert json.short.meta.metagenomic.equals(false) // Currently, this is "null".
assert json.short.meta.assembly.equals(false)
assert json.short.meta.hybrid.equals(false)
assert json.short.meta.single_end.equals(false)
assert json.short.meta.merge.equals(false)
assert json.short.meta.downsampled.equals(false)

assert json.short.short.AssemblyCompleted.equals(true)
assert json.short.short.QUAST."0"."Total length (>= 0 bp)".equals("4949")
assert json.short.short.QUAST."0"."Largest contig".equals("4949")
assert json.short.short.QUAST."0"."GC (%)".equals("52.96")
assert json.short.short.QUAST."0"."Avg. coverage depth".equals("47")

// Below two values should be empty
assert json.short.short.StarAMR."0"."Genotype".equals("None")
assert json.short.short.StarAMR."0"."Predicted Phenotype".equals("Susceptible")
assert json.short.short.StarAMR."0"."Genome Length".equals("4949")

assert json.short.short.CheckM."0"."# genomes".equals("5656")
assert json.short.short.CheckM."0"."# markers".equals("56")
assert json.short.short.CheckM."0"."# marker sets".equals("24")
assert json.short.short.CheckM."0".Contamination.equals("0.00")

assert json.short.short.SevenGeneMLSTReport[0].filename.equals("short.filtered.fasta.gz")

assert json.short.short.Abricate."0".RESISTANCE.equals("NoData") // All Abricate results for this are "NoData".

def assembly_path = "$launchDir/results/Assembly/FinalAssembly/short/short.final.filtered.assembly.fasta.gz"
assert path(assembly_path).exists()

// parse assembly file
def assembly_header = path(assembly_path).linesGzip[0]
assert assembly_header.startsWith(">NODE_1_length_4949_cov_23.917254") // _pilon_pilon_pilon gets appended

// compare IRIDA Next JSON output
def iridanext_json = path("$launchDir/results/iridanext.output.json").json
def iridanext_global = iridanext_json.files.global
def iridanext_samples = iridanext_json.files.samples
def iridanext_metadata = iridanext_json.metadata.samples

// output files
assert iridanext_global.findAll { it.path == "FinalReports/Aggregated/Json/final_report.json" }.size() == 1
assert iridanext_global.findAll { it.path == "FinalReports/Aggregated/Tables/final_report.tsv" }.size() == 1
assert iridanext_samples.short.findAll { it.path == "Assembly/FinalAssembly/short/short.final.filtered.assembly.fasta.gz" }.size() == 1
assert iridanext_samples.short.findAll { it.path == "Assembly/Quality/QUAST/short/short.transposed_short.quast.quality.tsv" }.size() == 1
assert iridanext_samples.short.findAll { it.path == "Assembly/Quality/SeqKitStats/short.seqkit.stats.summary.tsv" }.size() == 1
assert iridanext_samples.short.findAll { it.path == "Assembly/PostProcessing/Speciation/MashScreen/short.mash.screen.taxa.screen.screen" }.size() == 1
assert iridanext_samples.short.findAll { it.path == "Reads/Quality/Trimmed/MashScreen/short.mash.screen.reads.screen.screen" }.size() == 1
assert iridanext_samples.short.findAll { it.path == "Reads/Quality/Trimmed/FastP/short.fastp.summary.json" }.size() == 1
assert iridanext_samples.short.findAll { it.path == "Reads/Quality/RawReadQuality/short.read.scan.summary.json" }.size() == 1
assert iridanext_samples.short.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1

// output metadata
assert iridanext_metadata.short."QC Status" == "PASSED"
assert iridanext_metadata.short."Checkm Status" == "PASSED"
assert iridanext_metadata.short."Checkm Value" == 0.0
assert iridanext_metadata.short."Average Coverage Status" == "PASSED"
assert iridanext_metadata.short."Average Coverage Value" == 237.5
assert iridanext_metadata.short."n50 Status" == "WARNING"
assert iridanext_metadata.short."n50 Value" == 4949
assert iridanext_metadata.short."Raw Average Quality Status" == "PASSED"
assert iridanext_metadata.short."Raw Average Quality Value" == 40.0
assert iridanext_metadata.short."Length Status" == "WARNING"
assert iridanext_metadata.short."Length Value" == 4949
assert iridanext_metadata.short."nr contigs Status" == "WARNING"
assert iridanext_metadata.short."nr contigs Value" == 1
assert iridanext_metadata.short."QC Summary" == "PASSED Species ID: No Species Identified; Passed Tests: 6/6; Organism QC Criteria: No organism specific QC data available."

assert iridanext_metadata.short."Downsampled" == false
assert iridanext_metadata.short."predicted_identification_name" == "No Species Identified"
assert iridanext_metadata.short."predicted_identification_method" == "Mash"
assert iridanext_metadata.short."GC (%)" == "52.96"
assert iridanext_metadata.short."Mean Sequence Length Forward" == 250
assert iridanext_metadata.short."BaseCount" == 237500

assert iridanext_metadata.short."StarAMR Genotype" == "None"
assert iridanext_metadata.short."StarAMR Predicted Phenotype" == "Susceptible"
}

}

test("Should assemble but fail average coverage check") {
tag "succeed_assembly_fail_coverage"

when {
params {
input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv"
outdir = "results"

platform = "illumina"

mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
mh_min_kmer = 1

dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi"
kraken2_db = "$baseDir/tests/data/kraken2/test"

min_reads = 100

skip_allele_calling = true

QCReport {
fallthrough {
search = "No organism specific QC data available."
raw_average_quality = 30
min_n50 = null
max_n50 = null
min_nr_contigs = null
max_nr_contigs = null
fixed_genome_size = 1000000
min_length = null
max_length = null
max_checkm_contamination = 3.0
min_average_coverage = 30
}
}


skip_bakta = true
skip_staramr = false
Expand Down Expand Up @@ -208,8 +373,8 @@ nextflow_pipeline {
assert iridanext_metadata.short."QC Status" == "FAILED"
assert iridanext_metadata.short."Checkm Status" == "PASSED"
assert iridanext_metadata.short."Checkm Value" == 0.0
assert !iridanext_metadata.short.containsKey("Average Coverage Status")
assert !iridanext_metadata.short.containsKey("Average Coverage Value")
assert iridanext_metadata.short."Average Coverage Status" == "FAILED"
assert iridanext_metadata.short."Average Coverage Value" == 0.24
assert iridanext_metadata.short."n50 Status" == "WARNING"
assert iridanext_metadata.short."n50 Value" == 4949
assert iridanext_metadata.short."Raw Average Quality Status" == "PASSED"
Expand Down

0 comments on commit 3a3b982

Please sign in to comment.