Skip to content

Commit

Permalink
Merge pull request #86 from sanger-tol/dev_copy
Browse files Browse the repository at this point in the history
Dev copy
  • Loading branch information
DLBPointon authored Feb 12, 2025
2 parents 85c6c74 + 0d0d8e3 commit 2e18147
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 20 deletions.
18 changes: 10 additions & 8 deletions bin/generate_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ def parse_args():
parser = argparse.ArgumentParser(description="Generate a csv file for BTK")
parser.add_argument("sample_name", type=str, help="Name of sample")
parser.add_argument(
"mapped_bam_file",
"path_to_reads",
type=str,
help="Path containing the mapped BAM generated with PacBio data and the ASCC input assembly",
help="Path containing the PacBio reads",
)
parser.add_argument("-v", "--version", action="version", version="1.0.0")
parser.add_argument("-v", "--version", action="version", version="1.1.0")

return parser.parse_args()


Expand All @@ -31,12 +32,13 @@ def main():
data_list = []

data_list.append("sample,datatype,datafile\n")
if args.mapped_bam_file.endswith(".bam"):
data_list.append(f"{args.sample_name},pacbio,{args.mapped_bam_file}\n")
else:
sys.exit("I was expecting a mapped BAM file")

with open(f"{args.sample_name}_samplesheet.csv", "w") as file:
[data_list.append(f"{args.sample_name},pacbio,{args.path_to_reads}{file}\n") for file in os.listdir(args.path_to_reads) if file.endswith('.fasta.gz') or file.endswith('.fa.gz')]

if len(data_list) <= 1:
sys.exit("I was expecting at least one FASTA.GZ file")

with open("samplesheet.csv", "w") as file:
file.write("".join(data_list))


Expand Down
5 changes: 3 additions & 2 deletions modules/local/generate_samplesheet.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ process GENERATE_SAMPLESHEET {
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(pacbio_path)
tuple val(meta), path(reference)
path( "input_pacbio_files/*" )
path(alarm_file)

output:
Expand All @@ -21,7 +22,7 @@ process GENERATE_SAMPLESHEET {
"""
generate_samplesheet.py \\
$prefix \\
"\$(realpath $pacbio_path)"
input_pacbio_files/
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
2 changes: 2 additions & 0 deletions modules/local/sanger_tol_btk.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ process SANGER_TOL_BTK {
path blastn
path blastx
path tax_dump
path( "input_pacbio_files/*" )
val busco_lineages_folder
val busco_lineages
val taxon
Expand Down Expand Up @@ -53,6 +54,7 @@ process SANGER_TOL_BTK {
--blastn "\$(realpath $blastn)" \\
--blastx "\$(realpath $blastx)" \\
--use_work_dir_as_temp true \\
--align \\
$args
mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info
Expand Down
18 changes: 9 additions & 9 deletions workflows/ascc_genomic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -323,8 +323,7 @@ workflow ASCC_GENOMIC {
) {
PACBIO_BARCODE_CHECK (
reference_tuple_from_GG,
params.reads_path, // TODO: TEAM WANT TO BE ABLE TO SPECIFY PACBIO FILES
// MAY NEED A PROCESS TO PULL THEM INTO A SINGLE FOLDER BEFORE PROCESING
params.reads_path,
params.reads_type,
params.pacbio_barcode_file,
params.pacbio_barcode_names
Expand Down Expand Up @@ -675,7 +674,8 @@ workflow ASCC_GENOMIC {
// USE IN THE BTK PIPELINE
//
GENERATE_SAMPLESHEET (
RUN_READ_COVERAGE.out.bam_ch,
reference_tuple_from_GG,
params.reads_path,
AUTOFILTER_AND_CHECK_ASSEMBLY.out.alarm_file
)
ch_versions = ch_versions.mix(GENERATE_SAMPLESHEET.out.versions)
Expand Down Expand Up @@ -715,6 +715,7 @@ workflow ASCC_GENOMIC {
params.nt_database_path,
params.diamond_uniprot_database_path,
params.ncbi_taxonomy_path,
params.reads_path,
params.busco_lineages_folder,
params.busco_lineages,
params.taxid,
Expand Down Expand Up @@ -836,7 +837,7 @@ workflow ASCC_GENOMIC {
.map { id, data ->
[id: id, data: data]
}
.set {number_1}
.set {ascc_merged_data}

def processes = [
'GC_COV', 'Coverage', 'TIARA',
Expand All @@ -845,26 +846,25 @@ workflow ASCC_GENOMIC {
]

def processChannels = processes.collectEntries { process ->
[(process): number_1
[(process): ascc_merged_data
.map { sample ->
def data = sample.data.find { it.meta.process == process }
data ? [sample.id, data.meta, data.file] : [sample.id, [process: process], []]
}
]
}

def combined_channel_1 = processChannels['GC_COV']
def ascc_combined_channels = processChannels['GC_COV']
processes.tail().each { process ->
combined_channel_1 = combined_channel_1
ascc_combined_channels = ascc_combined_channels
.combine(processChannels[process], by: 0)
}

combined_channel_1.view()
//
// SUBWORKFLOW: MERGES DATA THAT IS NOT USED IN THE CREATION OF THE BTK_DATASETS FOLDER
//
ASCC_MERGE_TABLES (
combined_channel_1.map { it[1..-1] } // Remove the first item in tuple (mapping key)
ascc_combined_channels.map { it[1..-1] } // Remove the first item in tuple (mapping key)
)
ch_versions = ch_versions.mix(ASCC_MERGE_TABLES.out.versions)
}
Expand Down
2 changes: 1 addition & 1 deletion workflows/ascc_organellar.nf
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ workflow ASCC_ORGANELLAR {
if ( (include_workflow_steps.contains('pacbio_barcodes') || include_workflow_steps.contains('ALL')) && !exclude_workflow_steps.contains("pacbio_barcodes") ) {
PACBIO_BARCODE_CHECK (
ESSENTIAL_JOBS.out.reference_tuple_from_GG,
params.reads_path, // TODO: COME ON MAN
params.reads_path,
params.reads_type,
params.pacbio_barcode_file,
params.pacbio_barcode_names
Expand Down

0 comments on commit 2e18147

Please sign in to comment.