phac-nml · mattheww95 · Aug 29, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 26, 2024
diff --git a/modules/local/report.nf b/modules/local/report.nf
@@ -130,14 +130,13 @@ def generate_coverage_data(sample_data, bp_field, species){
                 && species[species_data_pos].fixed_genome_size != null){
 
                 def length = species[species_data_pos].fixed_genome_size.toLong()
-                def cov = base_pairs / q_length
+                def cov = base_pairs / length
                 entry.value[params.coverage_calc_fields.fixed_cov] = cov.round(2)
-
             }
 
         }
     }
-
+    return sample_data
 }
 
 
@@ -439,9 +438,9 @@ def convert_type(type, val){
 }
 
 def recurse_keys(value, keys_rk){
-    // TODO add in generic return for if a value is not found
     def temp = traverse_values(value, keys_rk.path)
     def value_found = true
+
     if(temp == null){
         value_found = false;
     }
@@ -455,21 +454,19 @@ def recurse_keys(value, keys_rk){
 }
 
 def traverse_values(value, path){
+
     def temp = value
     def value_found = true
-    def ret_val = null
 
     if(path instanceof String){
         temp = temp[path]
         return temp
     }
 
     for(key in path){
-        def key_val
-        if(key.isNumber()){
-            key_val = key.toInteger()
-        }else{
-            key_val = key
+        def key_val = key
+        if(key_val.isNumber()){
+            key_val = key_val.toInteger()
         }
 
         if(temp.containsKey(key_val)){
@@ -674,7 +671,8 @@ def generate_qc_data(data, search_phrases, qc_species_tag){
     for(k in data){
         if(!k.value.meta.metagenomic){
             def species = get_species(k.value[k.key][top_hit_tag], search_phrases, shortest_token)
-            generate_coverage_data(data[k.key], params.coverage_calc_fields.bp_field, species) // update coverage first so its values can be used in generating qc messages
+            // update coverage first so its values can be used in generating qc messages
+            generate_coverage_data(data[k.key], params.coverage_calc_fields.bp_field, species)
             data[k.key][quality_analysis] = get_qc_data_species(k.value[k.key], species)
             data[k.key][qc_species_tag] = species[species_tag_location]
         }else{

diff --git a/nextflow.config b/nextflow.config
@@ -885,9 +885,15 @@ params {
             low_msg = "Base quality is poor, resequencing is recommended."
         }
         average_coverage {
-            //path = ["${params.quast.report_tag}", "0", "Avg. coverage depth"] // uncomment to use QUASTS coverage
-            //path = ["${params.coverage_calc_fields.auto_cov}"] // uncomment to use quasts base_pairs/genome length for coverage
-            path = ["${params.coverage_calc_fields.fixed_cov}"]
+            /*
+                * Example path below can be used for qaust coverage
+                path = [params.quast.report_tag, "0", "Avg. coverage depth"]
+                * Use the path below to use quasts base_pairs/genome length for coverage
+                path = [params.coverage_calc_fields.auto_cov]
+
+            */
+
+            path = [params.coverage_calc_fields.fixed_cov]
             coerce_type = 'Float'
             compare_fields = ['min_average_coverage']
             comp_type = 'ge'
@@ -967,7 +973,6 @@ profiles {
     }
     docker {
         docker.enabled         = true
-        docker.userEmulation   = true
         singularity.enabled    = false
         podman.enabled         = false
         shifter.enabled        = false

diff --git a/tests/main.nf.test b/tests/main.nf.test
@@ -120,6 +120,171 @@ nextflow_pipeline {
 
                 skip_allele_calling = true
 
+                QCReport {
+                    fallthrough {
+                        search = "No organism specific QC data available."
+                        raw_average_quality = 30
+                        min_n50 = null
+                        max_n50 = null
+                        min_nr_contigs = null
+                        max_nr_contigs = null
+                        fixed_genome_size = 1000
+                        min_length = null
+                        max_length = null
+                        max_checkm_contamination = 3.0
+                        min_average_coverage = 30
+                    }
+                }
+
+
+                skip_bakta = true
+                skip_staramr = false
+                skip_mobrecon = false
+                skip_checkm = false
+                skip_raw_read_metrics = false
+                skip_polishing = false
+
+                max_memory = "2.GB"
+                max_cpus = 1
+            }
+        }
+
+        then {
+
+            assert workflow.success
+            assert path("$launchDir/results").exists()
+
+            // parse output json file
+            def json = path("$launchDir/results/FinalReports/Aggregated/Json/final_report.json").json
+
+            assert json.short.short.RawReadSummary.R1."total_bp".equals(118750)
+            assert json.short.short.RawReadSummary.R1."total_reads".equals(475)
+            assert json.short.short.RawReadSummary.R1."read_qual_mean".equals(40.0)
+            assert json.short.short.RawReadSummary.R1."mean_sequence_length".equals(250.0)
+
+            assert json.short.short.FastP.summary.sequencing.equals("paired end (250 cycles + 250 cycles)")
+            assert json.short.short.FastP.summary.before_filtering.total_reads.equals(950)
+            assert json.short.short.FastP.filtering_result.passed_filter_reads.equals(950)
+            assert json.short.short.FastP.filtering_result.low_quality_reads.equals(0)
+            assert json.short.short.FastP.insert_size.peak.equals(347)
+
+            //assert json.short.meta.metagenomic.equals(false)  // Currently, this is "null".
+            assert json.short.meta.assembly.equals(false)
+            assert json.short.meta.hybrid.equals(false)
+            assert json.short.meta.single_end.equals(false)
+            assert json.short.meta.merge.equals(false)
+            assert json.short.meta.downsampled.equals(false)
+
+            assert json.short.short.AssemblyCompleted.equals(true)
+            assert json.short.short.QUAST."0"."Total length (>= 0 bp)".equals("4949")
+            assert json.short.short.QUAST."0"."Largest contig".equals("4949")
+            assert json.short.short.QUAST."0"."GC (%)".equals("52.96")
+            assert json.short.short.QUAST."0"."Avg. coverage depth".equals("47")
+
+            // Below two values should be empty
+            assert json.short.short.StarAMR."0"."Genotype".equals("None")
+            assert json.short.short.StarAMR."0"."Predicted Phenotype".equals("Susceptible")
+            assert json.short.short.StarAMR."0"."Genome Length".equals("4949")
+
+            assert json.short.short.CheckM."0"."# genomes".equals("5656")
+            assert json.short.short.CheckM."0"."# markers".equals("56")
+            assert json.short.short.CheckM."0"."# marker sets".equals("24")
+            assert json.short.short.CheckM."0".Contamination.equals("0.00")
+
+            assert json.short.short.SevenGeneMLSTReport[0].filename.equals("short.filtered.fasta.gz")
+
+            assert json.short.short.Abricate."0".RESISTANCE.equals("NoData")  // All Abricate results for this are "NoData".
+
+            def assembly_path = "$launchDir/results/Assembly/FinalAssembly/short/short.final.filtered.assembly.fasta.gz"
+            assert path(assembly_path).exists()
+
+            // parse assembly file
+            def assembly_header = path(assembly_path).linesGzip[0]
+            assert assembly_header.startsWith(">NODE_1_length_4949_cov_23.917254")  // _pilon_pilon_pilon gets appended
+
+            // compare IRIDA Next JSON output
+            def iridanext_json = path("$launchDir/results/iridanext.output.json").json
+            def iridanext_global = iridanext_json.files.global
+            def iridanext_samples = iridanext_json.files.samples
+            def iridanext_metadata = iridanext_json.metadata.samples
+
+            // output files
+            assert iridanext_global.findAll { it.path == "FinalReports/Aggregated/Json/final_report.json" }.size() == 1
+            assert iridanext_global.findAll { it.path == "FinalReports/Aggregated/Tables/final_report.tsv" }.size() == 1
+            assert iridanext_samples.short.findAll { it.path == "Assembly/FinalAssembly/short/short.final.filtered.assembly.fasta.gz" }.size() == 1
+            assert iridanext_samples.short.findAll { it.path == "Assembly/Quality/QUAST/short/short.transposed_short.quast.quality.tsv" }.size() == 1
+            assert iridanext_samples.short.findAll { it.path == "Assembly/Quality/SeqKitStats/short.seqkit.stats.summary.tsv" }.size() == 1
+            assert iridanext_samples.short.findAll { it.path == "Assembly/PostProcessing/Speciation/MashScreen/short.mash.screen.taxa.screen.screen" }.size() == 1
+            assert iridanext_samples.short.findAll { it.path == "Reads/Quality/Trimmed/MashScreen/short.mash.screen.reads.screen.screen" }.size() == 1
+            assert iridanext_samples.short.findAll { it.path == "Reads/Quality/Trimmed/FastP/short.fastp.summary.json" }.size() == 1
+            assert iridanext_samples.short.findAll { it.path == "Reads/Quality/RawReadQuality/short.read.scan.summary.json" }.size() == 1
+            assert iridanext_samples.short.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1
+
+            // output metadata
+            assert iridanext_metadata.short."QC Status" == "PASSED"
+            assert iridanext_metadata.short."Checkm Status" == "PASSED"
+            assert iridanext_metadata.short."Checkm Value" == 0.0
+            assert iridanext_metadata.short."Average Coverage Status" == "PASSED"
+            assert iridanext_metadata.short."Average Coverage Value" == 237.5
+            assert iridanext_metadata.short."n50 Status" == "WARNING"
+            assert iridanext_metadata.short."n50 Value" == 4949
+            assert iridanext_metadata.short."Raw Average Quality Status" == "PASSED"
+            assert iridanext_metadata.short."Raw Average Quality Value" == 40.0
+            assert iridanext_metadata.short."Length Status" == "WARNING"
+            assert iridanext_metadata.short."Length Value" == 4949
+            assert iridanext_metadata.short."nr contigs Status" == "WARNING"
+            assert iridanext_metadata.short."nr contigs Value" == 1
+            assert iridanext_metadata.short."QC Summary" == "PASSED Species ID: No Species Identified; Passed Tests: 6/6; Organism QC Criteria: No organism specific QC data available."
+
+            assert iridanext_metadata.short."Downsampled" == false
+            assert iridanext_metadata.short."predicted_identification_name" == "No Species Identified"
+            assert iridanext_metadata.short."predicted_identification_method" == "Mash"
+            assert iridanext_metadata.short."GC (%)" == "52.96"
+            assert iridanext_metadata.short."Mean Sequence Length Forward" == 250
+            assert iridanext_metadata.short."BaseCount" == 237500
+
+            assert iridanext_metadata.short."StarAMR Genotype" == "None"
+            assert iridanext_metadata.short."StarAMR Predicted Phenotype" == "Susceptible"
+        }
+
+    }
+
+    test("Should assemble but fail average coverage check") {
+        tag "succeed_assembly_fail_coverage"
+
+        when {
+            params {
+                input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/dev/tests/data/samplesheets/samplesheet-small-assembly.csv"
+                outdir = "results"
+
+                platform = "illumina"
+
+                mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh"
+                mh_min_kmer = 1
+
+                dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi"
+                kraken2_db = "$baseDir/tests/data/kraken2/test"
+
+                min_reads = 100
+
+                skip_allele_calling = true
+
+                QCReport {
+                    fallthrough {
+                        search = "No organism specific QC data available."
+                        raw_average_quality = 30
+                        min_n50 = null
+                        max_n50 = null
+                        min_nr_contigs = null
+                        max_nr_contigs = null
+                        fixed_genome_size = 1000000
+                        min_length = null
+                        max_length = null
+                        max_checkm_contamination = 3.0
+                        min_average_coverage = 30
+                    }
+                }
+
 
                 skip_bakta = true
                 skip_staramr = false
@@ -208,8 +373,8 @@ nextflow_pipeline {
             assert iridanext_metadata.short."QC Status" == "FAILED"
             assert iridanext_metadata.short."Checkm Status" == "PASSED"
             assert iridanext_metadata.short."Checkm Value" == 0.0
-            assert !iridanext_metadata.short.containsKey("Average Coverage Status")
-            assert !iridanext_metadata.short.containsKey("Average Coverage Value")
+            assert iridanext_metadata.short."Average Coverage Status" == "FAILED"
+            assert iridanext_metadata.short."Average Coverage Value" == 0.24
             assert iridanext_metadata.short."n50 Status" == "WARNING"
             assert iridanext_metadata.short."n50 Value" == 4949
             assert iridanext_metadata.short."Raw Average Quality Status" == "PASSED"