Merge pull request #86 from sanger-tol/dev_copy

Dev copy
sanger-tol · Feb 12, 2025 · 2e18147 · 2e18147
2 parents 85c6c74 + 0d0d8e3
commit 2e18147
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 20 deletions.
diff --git a/bin/generate_samplesheet.py b/bin/generate_samplesheet.py
@@ -17,11 +17,12 @@ def parse_args():
     parser = argparse.ArgumentParser(description="Generate a csv file for BTK")
     parser.add_argument("sample_name", type=str, help="Name of sample")
     parser.add_argument(
-        "mapped_bam_file",
+        "path_to_reads",
         type=str,
-        help="Path containing the mapped BAM generated with PacBio data and the ASCC input assembly",
+        help="Path containing the PacBio reads",
     )
-    parser.add_argument("-v", "--version", action="version", version="1.0.0")
+    parser.add_argument("-v", "--version", action="version", version="1.1.0")
+
     return parser.parse_args()
 
 
@@ -31,12 +32,13 @@ def main():
     data_list = []
 
     data_list.append("sample,datatype,datafile\n")
-    if args.mapped_bam_file.endswith(".bam"):
-        data_list.append(f"{args.sample_name},pacbio,{args.mapped_bam_file}\n")
-    else:
-        sys.exit("I was expecting a mapped BAM file")
 
-    with open(f"{args.sample_name}_samplesheet.csv", "w") as file:
+    [data_list.append(f"{args.sample_name},pacbio,{args.path_to_reads}{file}\n") for file in os.listdir(args.path_to_reads) if file.endswith('.fasta.gz') or file.endswith('.fa.gz')]
+
+    if len(data_list) <= 1:
+        sys.exit("I was expecting at least one FASTA.GZ file")
+
+    with open("samplesheet.csv", "w") as file:
         file.write("".join(data_list))
 
 

diff --git a/modules/local/generate_samplesheet.nf b/modules/local/generate_samplesheet.nf
@@ -8,7 +8,8 @@ process GENERATE_SAMPLESHEET {
         'biocontainers/python:3.9' }"
 
     input:
-    tuple val(meta),    path(pacbio_path)
+    tuple val(meta), path(reference)
+    path( "input_pacbio_files/*" )
     path(alarm_file)
 
     output:
@@ -21,7 +22,7 @@ process GENERATE_SAMPLESHEET {
     """
     generate_samplesheet.py \\
         $prefix \\
-        "\$(realpath $pacbio_path)"
+        input_pacbio_files/
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
@@ -9,6 +9,7 @@ process SANGER_TOL_BTK {
     path blastn
     path blastx
     path tax_dump
+    path( "input_pacbio_files/*" )
     val busco_lineages_folder
     val busco_lineages
     val taxon
@@ -53,6 +54,7 @@ process SANGER_TOL_BTK {
         --blastn "\$(realpath $blastn)" \\
         --blastx "\$(realpath $blastx)" \\
         --use_work_dir_as_temp true \\
+        --align \\
         $args
 
     mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info

diff --git a/workflows/ascc_genomic.nf b/workflows/ascc_genomic.nf
@@ -323,8 +323,7 @@ workflow ASCC_GENOMIC {
     ) {
         PACBIO_BARCODE_CHECK (
             reference_tuple_from_GG,
-            params.reads_path,  // TODO: TEAM WANT TO BE ABLE TO SPECIFY PACBIO FILES
-                                // MAY NEED A PROCESS TO PULL THEM INTO A SINGLE FOLDER BEFORE PROCESING
+            params.reads_path,
             params.reads_type,
             params.pacbio_barcode_file,
             params.pacbio_barcode_names
@@ -675,7 +674,8 @@ workflow ASCC_GENOMIC {
         //          USE IN THE BTK PIPELINE
         //
         GENERATE_SAMPLESHEET (
-            RUN_READ_COVERAGE.out.bam_ch,
+            reference_tuple_from_GG,
+            params.reads_path,
             AUTOFILTER_AND_CHECK_ASSEMBLY.out.alarm_file
         )
         ch_versions         = ch_versions.mix(GENERATE_SAMPLESHEET.out.versions)
@@ -715,6 +715,7 @@ workflow ASCC_GENOMIC {
             params.nt_database_path,
             params.diamond_uniprot_database_path,
             params.ncbi_taxonomy_path,
+            params.reads_path,
             params.busco_lineages_folder,
             params.busco_lineages,
             params.taxid,
@@ -836,7 +837,7 @@ workflow ASCC_GENOMIC {
             .map { id, data ->
                 [id: id, data: data]
             }
-            .set {number_1}
+            .set {ascc_merged_data}
 
         def processes = [
             'GC_COV', 'Coverage', 'TIARA',
@@ -845,26 +846,25 @@ workflow ASCC_GENOMIC {
         ]
 
         def processChannels = processes.collectEntries { process ->
-            [(process): number_1
+            [(process): ascc_merged_data
                 .map { sample ->
                     def data = sample.data.find { it.meta.process == process }
                     data ? [sample.id, data.meta, data.file] : [sample.id, [process: process], []]
                 }
             ]
         }
 
-        def combined_channel_1 = processChannels['GC_COV']
+        def ascc_combined_channels = processChannels['GC_COV']
         processes.tail().each { process ->
-            combined_channel_1 = combined_channel_1
+            ascc_combined_channels = ascc_combined_channels
                                     .combine(processChannels[process], by: 0)
         }
 
-        combined_channel_1.view()
         //
         // SUBWORKFLOW: MERGES DATA THAT IS NOT USED IN THE CREATION OF THE BTK_DATASETS FOLDER
         //
         ASCC_MERGE_TABLES (
-            combined_channel_1.map { it[1..-1] } // Remove the first item in tuple (mapping key)
+            ascc_combined_channels.map { it[1..-1] } // Remove the first item in tuple (mapping key)
         )
         ch_versions             = ch_versions.mix(ASCC_MERGE_TABLES.out.versions)
     }

diff --git a/workflows/ascc_organellar.nf b/workflows/ascc_organellar.nf
@@ -121,7 +121,7 @@ workflow ASCC_ORGANELLAR {
     if ( (include_workflow_steps.contains('pacbio_barcodes') || include_workflow_steps.contains('ALL')) && !exclude_workflow_steps.contains("pacbio_barcodes") ) {
         PACBIO_BARCODE_CHECK (
             ESSENTIAL_JOBS.out.reference_tuple_from_GG,
-            params.reads_path, // TODO: COME ON MAN
+            params.reads_path,
             params.reads_type,
             params.pacbio_barcode_file,
             params.pacbio_barcode_names