main.nf

#!/usr/bin/env nextflow
/*
========================================================================================
                         nf-core/genebygenebact
========================================================================================
 nf-core/genebygenebact Analysis Pipeline.
 #### Homepage / Documentation
 https://github.com/nf-core/genebygenebact
----------------------------------------------------------------------------------------
*/

log.info Headers.nf_core(workflow, params.monochrome_logs)

////////////////////////////////////////////////
/*                                            */
/*                 PRINT HELP                 */
/*                                            */
////////////////////////////////////////////////


def json_schema = "$projectDir/nextflow_schema.json"
if (params.help) {
    def command = "nextflow run nf-core/genebygenebact --input '*_R{1,2}.fastq.gz' -profile docker"
    log.info NfcoreSchema.params_help(workflow, params, json_schema, command)
    exit 0
}


///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
def helpMessage() {
    log.info nfcoreHeader()
    log.info"""

    Usage:

    The typical command for running the pipeline is as follows:


    Generic arguments
      --input [file]                                Comma-separated file containing information about the samples in the experiment (see docs/usage.md)
      *--schema [path]                              **Directory where schema files are located
      **-profile [str]                                Configuration profile to use. Can use multiple (comma separated)
                                                    Available: conda, docker, singularity, test, awsbatch, <institute> and more

    SRA download
      --save_sra_fastq [bool]                       Save FastQ files created from SRA identifiers in the results directory (Default: false)
      --skip_sra_download [bool]                    Skip steps involving the download and validation of FastQ files using SRA identifiers (Default: false)


    Schema download
      --api [str]                                   Nick name to connect to REST API. Options:
                                                        'bigsdb'
                                                        'pubMLST'
      --organism_id [int]                           Organism ID whose schema or profile is going to be donwloaded.
      --schema_type [int]                           ID for schema type to download.
      --skip_get_schema [bool]                      Skip schema download (Default: true)
      --skip_get_profile [bool]                     Skip schema ST profile download (Default: true)


    References                                 If not specified in the configuration file or you wish to overwrite any of the references
      *--genome [str]                               Name of genome reference key for viral genome (Default: '')
      *--fasta [file]                               Path to fasta reference for viral genome. Mandatory when --genome not supplied
      *--gff [file]                                 Full path to viral gff annotation file (Default: '')
      *--save_reference [bool]                      If generated by the pipeline save the Bowtie2 indices in the results directory (Default: false) // SE UTILIZA SAVE_REFERENCE PARA DESCOMPRIMIR EL GELNOMA DE REFERENCIA Y OBTENER EL FASTA, PARA GUARDAR EL FASTA, ES NECESARIO DEJARLO?


    Quality control
      --skip_fastqc [bool]                          Skip FastQC (Default: false)
      --skip_multiqc [bool]                         Skip MultiQC (Default: false)


    Read trimming
      --cut_mean_quality [int]                      The mean quality requirement option shared by fastp cut_front, cut_tail or cut_sliding options. Range: 1~36 (Default: 30 (Q30))
      --qualified_quality_phred [int]               The quality value that a base is qualified. Default 30 means phred quality >=Q30 is qualified (Default: 30)
      --unqualified_percent_limit [int]             Percentage of bases that are allowed to be unqualified (0~100) (Default: 10)
      --min_trimming_length [int]                   Reads shorter than this length after trimming will be discarded (Default: 50)
      --skip_fastp_trimming [bool]                  Skip the adapter trimming step with fastp (Default: false)
      --save_trimmed [bool]                         Save the trimmed FastQ files in the results directory (Default: false)


    De novo assembly
      --skip_unicycler_assembly [bool]              Skip assembly steps in the pipeline (Default: false)
      --skip_quast [bool]                           Skip generation of QUAST aggregated report for assemblies (Default: false)


    Taranis / Typing / Gene by gene analysis using cgMLST schema
        Analyze schema
          --remove_subsets [str]                    Remove allele subsequences from the schema. Options:
                                                        'True' - Remove subsets
                                                        'False' - Do not remove subsets
                                                        (Default: 'False')
          --remove_duplicates [str]                 Remove duplicated alleles from the schema. Options:
                                                        'True' - Remove duplicates
                                                        'False' - Do not remove duplicates
                                                        (Default: 'False')
          --remove_nocds [str]                      Remove no CDS alleles from the schema. Options:
                                                        'True' - Remove no CDS alleles
                                                        'False' - Do not remove no CDS alleles
                                                        (Default: 'False')
          --new_schema [str]                        Filter a copy of the core genes schema preserving the analysis core genes schema. Options:
                                                    #Create an analysis core genes schema copy for filtering alleles when this option is selected. Options:
                                                        'True' - Create a copy of the core genes schema for filtering
                                                        'False' - Do not create a copy of the core genes schema for filtering
                                                        (Default: 'False')
          --skip_taranis_analyze_schema [bool]      Skip Taranis schema analysis (Default: true)


    	Reference Allele(s)
          --evalue_ref [float]                      E-value in BLAST searches (Default: 0.001)
          --perc_identity_ref [int]                 Identity percent in BLAST searches (Default: 90)
          --reward_ref [int]                        Match reward in BLAST searches (Default: 1)
          --penalty_ref [int]                       Mismatch penalty in BLAST searches (Default: -2)
          --gap_open_ref [int]                      Gap open penalty in BLAST searches (Default: 1)
          --gap_extend_ref [int]                    Gap extension penalty in BLAST searches (Default: 1)
          --num_threads_ref [int]                   num_threads in BLAST searches (Default: 1)
          --skip_taranis_reference_alleles [bool]   Skip Taranis schema analysis (Default: false)


    	Allele Calling
          --reference_alleles [path]                Directory where the core gene references files are located
          --percentlength [int]                     Allowed length percentage considered the allele found as INF. Outside of this limit it is considered as ASM or ALM (Default: Standard Deviation)
          --coverage [int]                          Coverage threshold to exclude found sequences. Outside of this limit it is considered LNF (Default: 50)
          --evalue_call [float]                     E-value in BLAST searches (Default: 0.001)
          --perc_identity_ref_call [int]            Identity percentage in BLAST searches using reference alleles for each locus detection in samples (Default: 90)
          --perc_identity_loc_call [int]            Identity percentage in BLAST searches using all alleles in each locus for allele identification in samples (Default: 90)
          --reward_call [int]                       Match reward in BLAST searches (Default: 1)
          --penalty_call [int]                      Mismatch penalty in BLAST searches (Default: -2)
          --gapopen_call [int]                      Gap open penalty in BLAST searches (Default: 1)
          --gapextend_call [int]                    Gap extension penalty in BLAST searches (Default: 1)
          --max_target_seqs_call [int]              max_target_seqs in BLAST searches (Default: 10)
          --max_hsps_call [int]                     max_hsps in BLAST searches (Default: 10)
          --num_threads_call [int]                  num_threads in BLAST searches (Default: 1)
          --flanking_nts [int]                      Number of flanking nucleotides to add to each BLAST result obtained after locus detection in sample using reference allele for correct allele identification (Default: 100)
          --update_schema [str]                     Add INF alleles found for each locus to the core genes schema. Options:
                                                        'True' - Add INF alleles to the analysis core genes schema
                                                        'New' - Add INF alleles to a copy of the core genes schema preserving the analysis core genes schema
                                                        'False' - Do not update the core gene schema adding new INF alleles found
                                                        (Default: 'True')

          --st_profile [file]                      ST profile file based on core genes schema file to get ST for each sample (Default: '')
          --skip_taranis_allele_calling [bool]      Skip Taranis schema analysis (Default: false)


    Other options:
      --outdir [file]                               The output directory where the results will be saved
      --email [email]                               Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
      --email_on_fail [email]                       Same as --email, except only send mail if the workflow is not successful
      --max_multiqc_email_size [str]                Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
      -name [str]                                   Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic


    AWSBatch options:
      --awsqueue [str]                              The AWSBatch JobQueue that needs to be set when running on AWSBatch
      --awsregion [str]                             The AWS Region for your AWS Batch job to run on
      --awscli [str]                                Path to the AWS CLI tool
    """.stripIndent()
}

// Show help message
if (params.help) {
    helpMessage()
    exit 0
}

///////////////////////////////////////////////////////////////////////////////////////


/////////////////////////////////////////////////////
/*                                                 */
/*                 VALIDATE INPUTS                 */
/*                                                 */
/////////////////////////////////////////////////////

if (params.validate_params) {
    NfcoreSchema.validateParameters(params, json_schema, log)
}

//if (params.input) { ch_input = file(params.input, checkIfExists: true) } else { exit 1, "Input samplesheet file not specified!" }

if (params.input) { ch_input = file(params.input, checkIfExists: true) } else { if (!params.skip_fastqc || !params.skip_fastp || !params.skip_unicycler || !params.skip_quast || !params.skip_taranis_allele_calling) { exit 1, "Cannot carry out the analysis, input samplesheet file not specified!" } }

if (params.schema) { ch_schema = Channel.fromPath( params.schema, type: 'dir', checkIfExists: true ) } else {
    if (params.skip_get_schema && (!params.skip_taranis_analyze_schema || !params.skip_taranis_reference_alleles || !params.skip_taranis_allele_calling)) {
        exit 1, "Schema not specified! To perform Taranis analysis (analyze schema, get reference alleles or allele calling) please provide a valid path to the schema or indicate which schema you desire to download and from which REST API!"
    }
}

if (!params.fasta && !params.genome && !params.skip_quast) {
    exit 1, "Reference genome not specified! To perform QUAST assemblies analysis please provide a reference genome!"
}

if (!params.gff && !params.genome && !params.skip_quast) {
    exit 1, "gff annotation file for reference genome not specified! To perform QUAST assemblies analysis please provide a gff annotation file!"
}

if (!params.fasta && !params.genome && !params.skip_taranis_allele_calling) {
    exit 1, "Reference genome not specified! To perform Taranis allele calling analysis please provide a reference genome!"
}

if (params.reference_alleles) { ch_reference_alleles = Channel.fromPath( params.reference_alleles, type: 'dir', checkIfExists: true ) } else {
    if (params.skip_taranis_reference_alleles && !params.skip_taranis_allele_calling) {
        exit 1, "Reference alleles not specified! To perform Taranis allele calling analysis please provide a valid path to the schema reference alleles or choose the Taranis reference alleles analysis to get them!"
    }
}

if (params.st_profile) { ch_st_profile = file( params.st_profile, checkIfExists: true ) }

if (params.alleles_matrix) { ch_allele_calling_matrix = Channel.fromPath( params.alleles_matrix, type: 'file', checkIfExists: true ) } else {
    if (!params.skip_taranis_distance_matrix && params.skip_taranis_allele_calling) {
    exit 1, "Allele calling matrix not specified! To perform Taranis distance matrix calculation please provide a valid path to the allele calling matrix file or choose the Taranis allele calling analysis to get it!"
    }
}


// Check if reference genome exists in the config file
if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
   exit 1, "The provided genome '${params.genome}' is not available in the Genome file. Currently the available genomes are ${params.genomes.keySet().join(", ")}"
}
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.gff = params.genome ? params.genomes[ params.genome ].gff ?: false : false


if (params.fasta) {
    file(params.fasta, checkIfExists: true)

    lastPath = params.fasta.lastIndexOf(File.separator)
    lastExt = params.fasta.lastIndexOf(".")
    fasta_base = params.fasta.substring(lastPath+1)
    index_base = params.fasta.substring(lastPath+1,lastExt)
    if (params.fasta.endsWith('.gz')) {
        fasta_base = params.fasta.substring(lastPath+1,lastExt)
        index_base = fasta_base.substring(0,fasta_base.lastIndexOf("."))
    }
} else {
    exit 1, "Reference genome fasta file not specified!"
}


//////////////////////////////////////////////////
/*                                              */
/*                 CONFIG FILES                 */
/*                                              */
//////////////////////////////////////////////////

// Stage config files
ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true)
ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty()
ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true)
ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true)


/////////////////////////////////////////
/*                                     */
/*                 AWS                 */
/*                                     */
/////////////////////////////////////////

// Check AWS batch settings
if (workflow.profile.contains('awsbatch')) {
    // AWSBatch sanity checking
    if (!params.awsqueue || !params.awsregion) exit 1, 'Specify correct --awsqueue and --awsregion parameters on AWSBatch!'
    // Check outdir paths to be S3 buckets if running on AWSBatch
    // related: https://github.com/nextflow-io/nextflow/issues/813
    if (!params.outdir.startsWith('s3:')) exit 1, 'Outdir not on S3 - specify S3 Bucket to run on AWSBatch!'
    // Prevent trace files to be stored on S3 since S3 does not support rolling files.
    if (params.tracedir.startsWith('s3:')) exit 1, 'Specify a local tracedir or run without trace! S3 cannot be used for tracefiles.'
}


///////////////////////////////////////////////////////////////////////////
/*                                                                       */
/*                        PRINT PARAMETER SUMMARY                        */
/*                                                                       */
///////////////////////////////////////////////////////////////////////////

log.info NfcoreSchema.params_summary_log(workflow, params, json_schema)

// Header log info
def summary = [:]
if (workflow.revision) summary['Pipeline Release'] = workflow.revision
summary['Run Name']         = workflow.runName
summary['Samplesheet']               = params.input

summary['Reference Genome']              = params.genome ?: 'Not supplied'
if (params.fasta)                    summary['Reference Genome Fasta File'] = params.fasta
if (params.gff)                      summary['Reference Genome GFF'] = params.gff
if (params.save_reference)           summary['Save Genome Indices'] = 'Yes'

// NCBI assembly download //
if (params.save_ncbi_assembly_fasta)           summary['Save NCBI assembly fasta'] = params.save_sra_fastq
if (params.skip_ncbi_assembly_download)        summary['Skip NCBI assembly download'] = params.skip_sra_download

// SRA download //
if (params.save_sra_fastq)           summary['Save SRA fastq'] = params.save_sra_fastq
if (params.skip_sra_download)        summary['Skip SRA download'] = params.skip_sra_download

if (params.schema)                   summary['Schema path'] = params.schema

// Schema and profile donwload //
if (!params.skip_get_schema || !params.skip_get_profile)  {
    summary['Nick name to connect to REST API'] = params.api
    if (params.organism_id)                    summary['Organism ID whose schema or profile is going to be donwloaded'] = params.organism_id
    if (params.schema_database)                summary['ID for schema database'] = params.schema_database  //****
    if (params.schema_type)                    summary['ID for schema type to download'] = params.schema_type
} else {
    if (params.get_schema)                summary['Skip Get Schema'] = 'Yes'
    if (params.get_profile)               summary['Skip Get Profile'] = 'Yes'
}

// Quality control //
if (params.skip_fastqc)              summary['Skip FastQC'] = 'Yes'
if (params.skip_multiqc)             summary['Skip MultiQC'] = 'Yes'

// Trimming //
if (!params.skip_fastp_trimming)  {
    if (params.cut_mean_quality)              summary['Fastp Mean Qual'] = params.cut_mean_quality
    if (params.qualified_quality_phred)       summary['Fastp Qual Phred'] = params.qualified_quality_phred
    if (params.unqualified_percent_limit)     summary['Fastp Unqual % Limit'] = params.unqualified_percent_limit
    if (params.min_trimming_length)           summary['Fastp Min Trimming Length'] = params.min_trimming_length
} else {
    summary['Skip Adapter Trimming'] = 'Yes'
}

if (params.save_trimmed)             summary['Save Trimmed'] = 'Yes'

// Assembly //
if (params.skip_unicycler_assembly)              summary['Skip Unicycler Assembly'] = 'Yes'
if (params.skip_quast)                           summary['Skip Quast'] = 'Yes'


// Analyze schema //
if (!params.skip_taranis_analyze_schema)  {
    if (params.remove_subsets)        summary['Remove allele subsequences from schema'] = params.remove_subsets
    if (params.remove_duplicates)     summary['Remove duplicated alleles from schema'] = params.remove_duplicates
    if (params.remove_nocds)          summary['Remove no CDS alleles from schema'] = params.remove_nocds
    if (params.new_schema)            summary['Filter a schema copy'] = params.new_schema
} else {
    summary['Skip Taranis Analyze Schema'] = 'Yes'
}

// Reference alleles //
if (!params.skip_taranis_reference_alleles)  {
    if (params.evalue_ref)          summary['Reference Alleles BLAST E-value'] = params.evalue_ref
    if (params.perc_identity_ref)   summary['Reference Alleles BLAST ID'] = params.perc_identity_ref
    if (params.reward_ref)          summary['Reference Alleles BLAST match reward'] = params.reward_ref
    if (params.penalty_ref)         summary['Reference Alleles BLAST mismatch penalty'] = params.penalty_ref
    if (params.gap_open_ref)        summary['Reference Alleles BLAST gap open penalty'] = params.gapopen_ref
    if (params.gap_extend_ref)      summary['Reference Alleles BLAST gap extension penalty'] = params.gapextend_ref
    if (params.num_threads_ref)     summary['Reference Alleles BLAST num_threads'] = params.num_threads_ref
} else {
    summary['Skip Taranis Reference Alleles'] = 'Yes'
}

// Allele calling //
if (!params.skip_taranis_allele_calling)  {

    if (params.reference_alleles)        summary['Core genes reference alleles path'] = params.reference_alleles
    if (params.percentlength)            summary['Allowed length percentage to consider INF'] = params.percentlength
    if (params.coverage)                 summary['Coverage threshold to exclude results'] = params.coverage
    if (params.evalue_call)              summary['Allele calling BLAST E-value'] = params.evalue_call
    if (params.perc_identity_ref_call)   summary['Allele calling BLAST ID for locus detection'] = params.perc_identity_ref_call
    if (params.perc_identity_loc_call)   summary['Allele calling BLAST ID for allele identification'] = params.perc_identity_loc_call
    if (params.reward_call)              summary['Allele calling BLAST match reward'] = params.reward_call
    if (params.penalty_call)             summary['Allele calling BLAST mismatch penalty'] = params.penalty_call
    if (params.gapopen_call)             summary['Allele calling BLAST gap open penalty'] = params.gapopen_call
    if (params.gapextend_call)           summary['Allele calling BLAST gap extension penalty'] = params.gapextend_call
    if (params.max_target_seqs_call)     summary['Allele calling BLAST max_target_seqs'] = params.max_target_seqs_call
    if (params.max_hsps_call)            summary['Allele calling BLAST max_hsps'] = params.max_hsps_call
    if (params.num_threads_call)         summary['Allele calling BLAST num_threads'] = params.num_threads_call
    if (params.flanking_nts)             summary['Number of flanking nucleotides added to locus detection BLAST results'] = params.flanking_nts
    if (params.update_schema)            summary['Update schema adding INF alleles'] = params.update_schema
    if (params.st_profile)               summary['ST profile'] = params.st_profile

} else {
    summary['Skip Taranis Allele Calling'] = 'Yes'
}

summary['Max Resources']    = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container"
summary['Output dir']       = params.outdir
summary['Launch dir']       = workflow.launchDir
summary['Working dir']      = workflow.workDir
summary['Script dir']       = workflow.projectDir
summary['User']             = workflow.userName
if (workflow.profile.contains('awsbatch')) {
    summary['AWS Region']   = params.awsregion
    summary['AWS Queue']    = params.awsqueue
    summary['AWS CLI']      = params.awscli
}
summary['Config Profile'] = workflow.profile
if (params.config_profile_description) summary['Config Profile Description'] = params.config_profile_description
if (params.config_profile_contact)     summary['Config Profile Contact']     = params.config_profile_contact
if (params.config_profile_url)         summary['Config Profile URL']         = params.config_profile_url
summary['Config Files'] = workflow.configFiles.join(', ')
if (params.email || params.email_on_fail) {
    summary['E-mail Address']    = params.email
    summary['E-mail on failure'] = params.email_on_fail
    summary['MultiQC maxsize']   = params.max_multiqc_email_size
}

// Check the hostnames against configured profiles
checkHostname()


///////////////////////////////////////////////////////////////////
/*                                                               */
/*                  UNZIP/UNTAR REFERENCE FILES                  */
/*                                                               */
///////////////////////////////////////////////////////////////////

/*
 * PREPROCESSING: Uncompress genome fasta file
 */

if (params.fasta) {
    file(params.fasta, checkIfExists: true)
    if (params.fasta.endsWith('.gz')) {
        process GUNZIP_FASTA {
            label 'error_retry'
            if (params.save_reference) {
                publishDir "${params.outdir}/genome", mode: params.publish_dir_mode
            }

            input:
            path fasta from params.fasta

            output:
            path "$unzip" into ch_fasta

            script:
            unzip = fasta.toString() - '.gz'
            """
            pigz -f -d -p $task.cpus $fasta
            """
        }
    } else {
        ch_fasta = file(params.fasta)
    }
}


/*
 * PREPROCESSING: Uncompress gff annotation file
 */

if (params.gff) {
    file(params.gff, checkIfExists: true)
    if (params.gff.endsWith('.gz')) {
        process GUNZIP_GFF {
            label 'error_retry'
            if (params.save_reference) {
                publishDir "${params.outdir}/genome", mode: params.publish_dir_mode
            }

            input:
            path gff from params.gff

            output:
            path "$unzip" into ch_gff

            script:
            unzip = gff.toString() - '.gz'
            """
            pigz -f -d -p $task.cpus $gff
            """
        }
    } else {
        ch_gff = file(params.gff)
    }
}


/////////////////////////////////////////////////////////////////
/*                                                             */
/*                      PARSE DESIGN FILE                      */
/*                                                             */
/////////////////////////////////////////////////////////////////


process CHECK_SAMPLESHEET {
    tag "$samplesheet"
    publishDir "${params.outdir}/", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.endsWith(".tsv")) "preprocess/sra/$filename"
                      else "pipeline_info/$filename"
                }

    input:
    path samplesheet from ch_input

    output:
    path "samplesheet.valid.csv" into ch_samplesheet_reformat
    path "sra_run_info.tsv" optional true

    script:
    run_sra = !params.skip_sra_download && !isOffline()
    run_ncbi = !params.skip_ncbi_assembly_download && !isOffline()

    //awk -F, 'NR>1 {if(\$1 != "" && \$2 == "" && \$3 == "" && \$4 == "") {print \$1}}' $samplesheet > ncbi_id_samples.csv
    """

    awk -F, '{if((\$1 != "" && \$2 != "" && \$4 == "" || \$4 == "fasta") || (\$4 != "")) {print \$0}}' $samplesheet > non_id.csv
    if [ -s non_id.csv ]
    then
        check_samplesheet.py -FILE_IN non_id.csv -FILE_OUT non_id.samplesheet.csv
    fi


    awk -F, 'NR>1 {if(\$1 ~ /^GCA/ || \$1 ~ /^GCF/ && \$1 != "" && \$2 == "" && \$3 == "" && \$4 == "") {print \$1}}' $samplesheet > ncbi_id_samples.csv

    if [ -s ncbi_id_samples.csv ]
    then
        awk -F, 'BEGIN {OFS=","; print "sample_id,single_end,is_sra,is_ftp,fastq_1,fastq_2,md5_1,md5_2,is_ncbi,is_fasta,fasta"} {\$2="0";\$3="0";\$4="0";\$5="";\$6="";\$7="0";\$8="0";\$9="1";\$10="0";\$11="";print \$0}' ncbi_id_samples.csv > ncbi_id.samplesheet.csv
        rm ncbi_id_samples.csv
    fi


    awk -F, '{if(\$1 !~ /^GCA/ && \$1 !~ /^GCF/ && \$1 != "" && \$2 == "" && \$3 == "" && \$4 == "") {print \$1}}' $samplesheet > sra_id.list

    if $run_sra && [ -s sra_id.list ]
    then
        fetch_sra_runinfo.py sra_id.list sra_run_info.tsv --platform ILLUMINA --library_layout SINGLE,PAIRED
        sra_runinfo_to_samplesheet.py sra_run_info.tsv sra_id.samplesheet.csv
    fi


    if [ -f non_id.samplesheet.csv ]
    then
        head -n 1 non_id.samplesheet.csv > samplesheet.valid.csv
    else
        if [ -f ncbi_id.samplesheet.csv ]
        then
            head -n 1 ncbi_id.samplesheet.csv > samplesheet.valid.csv
        else
            head -n 1 sra_id.samplesheet.csv > samplesheet.valid.csv
        fi
    fi
    tail -n +2 -q *id.samplesheet.csv >> samplesheet.valid.csv

    """
}


def validate_input(LinkedHashMap sample) {
    def sample_id = sample.sample_id
    def single_end = sample.single_end.toBoolean()
    def is_sra = sample.is_sra.toBoolean()
    def is_ftp = sample.is_ftp.toBoolean()
    def fastq_1 = sample.fastq_1
    def fastq_2 = sample.fastq_2
    def md5_1 = sample.md5_1
    def md5_2 = sample.md5_2
    def is_ncbi = sample.is_ncbi.toBoolean()
    def is_fasta = sample.is_fasta.toBoolean()
    def fasta = sample.fasta


    def array = []
    if (!is_sra && !is_ncbi && !is_fasta) {
        if (single_end) {
            array = [ sample_id, single_end, is_sra, is_ftp, [ file(fastq_1, checkIfExists: true) ], is_ncbi, is_fasta, fasta ]
        } else {
            array = [ sample_id, single_end, is_sra, is_ftp, [ file(fastq_1, checkIfExists: true), file(fastq_2, checkIfExists: true) ], is_ncbi, is_fasta, fasta ]
        }
    } else {
        array = [ sample_id, single_end, is_sra, is_ftp, [ fastq_1, fastq_2 ], is_ncbi, is_fasta, fasta, [ md5_1, md5_2 ] ]
    }

    return array
}


/*
 * Create channels for input fastq files
 */

ch_samplesheet_reformat
    .splitCsv(header:true, sep:',')
    .map { validate_input(it) }
    .into { ch_reads_all
            ch_reads_sra
            ch_fasta_gunzip
            ch_fasta_ncbi }


/*
 * Create channel for input assembly fasta files
 */

ch_fasta_gunzip
    .filter{ it[6] }
    .into { ch_fasta_gunzip_filter }


process GUNZIP_INPUT_ASSEMBLIES {
    label 'error_retry'
    if (params.save_fasta_assemblies) {
        publishDir "${params.outdir}/fasta_assemblies", mode: params.publish_dir_mode
    }

    input:
    ///tuple val(sample), path(fasta) from ch_fasta_gunzip_filter
    ///tuple val(sample), val(is_id), path(fasta) from ch_fasta_gunzip_filter
    ///tuple val(sample), val(single_end), val(is_sra), val(is_ftp), val(fastq), val(is_ncbi), val(is_fasta), file(fasta), val(md5) from ch_fasta_gunzip_filter
    tuple val(sample), val(single_end), val(is_sra), val(is_ftp), val(fastq), val(is_ncbi), val(is_fasta), path(fasta), val(md5) from ch_fasta_gunzip_filter

    output:
    tuple val(sample), path("$unzip") into ch_input_assembly

    script:
    unzip = fasta.toString() - '.gz'
    """
    if [[ "$fasta" == *.gz ]];
    then
        pigz -f -d -p $task.cpus $fasta
    fi
    """
    }


//////////////////////////////////////////////////////////////////////////////////
/*                                                                              */
/*                     DOWNLOAD NCBI ASSEMBLIES FASTA FILES                     */
/*                                                                              */
//////////////////////////////////////////////////////////////////////////////////

/*
 * STEP 1: Download NCBI data
 */

if (!params.skip_ncbi_assembly_download && !isOffline()) {

    ch_fasta_ncbi
    .filter { it[5] }
    .into { ch_fasta_ncbi_id }


    process NCBI_ASSEMBLY_DOWNLOAD {

        tag "$sample"
        //label 'process_medium'
        label 'process_low'
        label 'error_retry'
        publishDir "${params.outdir}/assembly/ncbi", mode: params.publish_dir_mode,
            saveAs : { filename -> params.save_ncbi_assembly_fasta ? filename : null }

        input:
        //tuple val(sample), val(is_id), val(fasta) from ch_fasta_ncbi_id

        //tuple val(sample), val(single_end), val(is_sra), val(is_ftp), val(fastq), val(is_ncbi), val(is_fasta), val(fasta) from ch_fasta_ncbi_id
        val(sample) from ch_fasta_ncbi_id.map { it[0] }

        output:
        tuple val(sample), path("*.fna") into ch_fasta_ncbi_download
        path "*.fna"

        script:

        """
        id=${sample}
        id_prefix=\${id:0:4}

        if [ \$id_prefix == GCA_ ]; then   database=FtpPath_GenBank; else   database=FtpPath_RefSeq; fi


        esearch -db assembly -query \$id \
            | esummary \
            | xtract -pattern DocumentSummary -element \$database \
            | while read -r url ; do
                file_name=\$(echo \$url | grep -o \$id_prefix.* | sed 's/\$/_genomic.fna.gz/') ;
                wget "\$url/\$file_name" ;
            done

        gzip -d *.fna.gz

        """
    }

    ch_input_assembly
    .concat(ch_fasta_ncbi_download)
    .set{ ch_input_assembly }
}


//////////////////////////////////////////////////////////////////////
/*                                                                  */
/*                     DOWNLOAD SRA FASTQ FILES                     */
/*                                                                  */
//////////////////////////////////////////////////////////////////////

/*
 * STEP 2: Download and check SRA data
 */

if (!params.skip_sra_download && !isOffline()) {
    ch_reads_sra
        .filter { it[2] }
        .into { ch_reads_sra_ftp
                ch_reads_sra_dump }

    process SRA_FASTQ_FTP {
        tag "$sample"
        label 'process_medium'
        label 'error_retry'
        publishDir "${params.outdir}/preprocess/sra", mode: params.publish_dir_mode,
            saveAs: { filename ->
                          if (filename.endsWith(".md5")) "md5/$filename"
                          else params.save_sra_fastq ? filename : null
                    }

        when:
        is_ftp

        input:
        tuple val(sample), val(single_end), val(is_sra), val(is_ftp), val(fastq), val(is_ncbi), val(is_fasta), val(fasta), val(md5) from ch_reads_sra_ftp

        output:
        tuple val(sample), val(single_end), val(is_sra), val(is_ftp), path("*.fastq.gz") into ch_sra_fastq_ftp
        path "*.md5"

        script:
        if (single_end) {
            """
            curl -L ${fastq[0]} -o ${sample}.fastq.gz
            echo "${md5[0]}  ${sample}.fastq.gz" > ${sample}.fastq.gz.md5
            md5sum -c ${sample}.fastq.gz.md5
            """
        } else {
            """
            curl -L ${fastq[0]} -o ${sample}_1.fastq.gz
            echo "${md5[0]}  ${sample}_1.fastq.gz" > ${sample}_1.fastq.gz.md5
            md5sum -c ${sample}_1.fastq.gz.md5

            curl -L ${fastq[1]} -o ${sample}_2.fastq.gz
            echo "${md5[1]}  ${sample}_2.fastq.gz" > ${sample}_2.fastq.gz.md5
            md5sum -c ${sample}_2.fastq.gz.md5
            """
        }
    }

    process SRA_FASTQ_DUMP {
        tag "$sample"
        label 'process_medium'
        label 'error_retry'
        publishDir "${params.outdir}/preprocess/sra", mode: params.publish_dir_mode,
            saveAs: { filename ->
                          if (filename.endsWith(".log")) "log/$filename"
                          else params.save_sra_fastq ? filename : null
                    }

        when:
        !is_ftp

        input:
        tuple val(sample), val(single_end), val(is_sra), val(is_ftp) from ch_reads_sra_dump.map { it[0..3] }

        output:
        tuple val(sample), val(single_end), val(is_sra), val(is_ftp), path("*.fastq.gz") into ch_sra_fastq_dump
        path "*.log"

        script:
        prefix = "${sample.split('_')[0..-2].join('_')}"
        pe = single_end ? "" : "--readids --split-e"
        rm_orphan = single_end ? "" : "[ -f  ${prefix}.fastq.gz ] && rm ${prefix}.fastq.gz"

        """

        parallel-fastq-dump \\
            --sra-id $prefix \\
            --threads $task.cpus \\
            --outdir ./ \\
            --tmpdir ./ \\
            --gzip \\
            $pe \\
            > ${prefix}.fastq_dump.log

        $rm_orphan

        """
    }

    ch_reads_all
        .filter { !it[2] }
        .concat(ch_sra_fastq_ftp, ch_sra_fastq_dump)
        .set { ch_reads_all }
}


ch_reads_all
    .filter { !it[5] } //filtrando muestras ncbi para dejar solo fastqs
    .filter { !it[6] } //filtrando muestras fasta para dejar solo fastqs
    .map { [ it[0].split('_')[0..-2].join('_'), it[1], it[4] ] }
    .groupTuple(by: [0, 1])
    .map { [ it[0], it[1], it[2].flatten() ] }
    .set { ch_reads_all }


/////////////////////////////////////////////////////////////////////
/*                                                                 */
/*                     MERGE RESEQUENCED FASTQ                     */
/*                                                                 */
/////////////////////////////////////////////////////////////////////

/*
 * STEP 3: Merge FastQ files with the same sample identifier
 */

process CAT_FASTQ {
    tag "$sample"

    input:
    tuple val(sample), val(single_end), path(reads) from ch_reads_all
    //tuple val(sample), val(single_end), val(is_sra), val(is_ftp), val(fastq), val(is_ncbi), val(is_fasta), val(fasta) from ch_reads_all

    output:
    tuple val(sample), val(single_end), path("*.merged.fastq.gz") into ch_cat_fastqc,
                                                                       ch_cat_fastp

    when:
    !params.skip_unicycler_assembly

    script:
    readList = reads.collect{it.toString()}
    if (!single_end) {
        if (readList.size > 2) {
            def read1 = []
            def read2 = []
            readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v }
            """
            cat ${read1.sort().join(' ')} > ${sample}_1.merged.fastq.gz
            cat ${read2.sort().join(' ')} > ${sample}_2.merged.fastq.gz
            """
        } else {
            """
            ln -s ${reads[0]} ${sample}_1.merged.fastq.gz
            ln -s ${reads[1]} ${sample}_2.merged.fastq.gz
            """
        }
    } else {
        if (readList.size > 1) {
            """
            cat ${readList.sort().join(' ')} > ${sample}.merged.fastq.gz
            """
        } else {
            """
            ln -s $reads ${sample}.merged.fastq.gz
            """
        }
    }
}


////////////////////////////////////////////////////////////////////
/*                                                                */
/*                     FASTQ QUALITY CONTROL                      */
/*                                                                */
////////////////////////////////////////////////////////////////////

/*
 * STEP 4: FastQC on input reads after merging libraries from the same sample
 */

process FASTQC {
    tag "$sample"
    //label 'process_medium'
    label 'process_low'
    publishDir "${params.outdir}/preprocess/fastqc", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      filename.endsWith(".zip") ? "zips/$filename" : filename
                }

    when:
    !params.skip_fastqc && !params.skip_unicycler_assembly

    input:

    tuple val(sample), val(single_end), path(reads) from ch_cat_fastqc

    output:
    path "*.{zip,html}" into ch_fastqc_raw_reports_mqc

    script:

    """

    fastqc --quiet --threads $task.cpus *.fastq.gz

    """
}


////////////////////////////////////////////////////////////////////
/*                                                                */
/*                        ADAPTER TRIMMING                        */
/*                                                                */
////////////////////////////////////////////////////////////////////

/*
 * STEP 5: Fastp adapter trimming and quality filtering
 */

if (!params.skip_fastp_trimming) {
    process FASTP {
        tag "$sample"
        //label 'process_medium'
        label 'process_low'
        publishDir "${params.outdir}/preprocess/fastp", mode: params.publish_dir_mode,
            saveAs: { filename ->
                          if (filename.endsWith(".json")) filename
                          else if (filename.endsWith(".fastp.html")) filename
                          else if (filename.endsWith("_fastqc.html")) "fastqc/$filename"
                          else if (filename.endsWith(".zip")) "fastqc/zips/$filename"
                          else if (filename.endsWith(".log")) "log/$filename"
                          else params.save_trimmed ? filename : null
                    }

        when:
        !params.skip_unicycler_assembly

        input:
        tuple val(sample), val(single_end), path(reads) from ch_cat_fastp

        output:
        tuple val(sample), val(single_end), path("*.trim.fastq.gz") into ch_fastp_unicycler
        path "*.json" into ch_fastp_mqc
        path "*_fastqc.{zip,html}" into ch_fastp_fastqc_mqc
        path "*.{log,fastp.html}"
        path "*.fail.fastq.gz"

        script:
        // Added soft-links to original fastqs for consistent naming in MultiQC
        autodetect = single_end ? "" : "--detect_adapter_for_pe"

        """

        IN_READS='--in1 ${sample}.fastq.gz'
        OUT_READS='--out1 ${sample}.trim.fastq.gz --failed_out ${sample}.fail.fastq.gz'
        if $single_end; then
            [ ! -f  ${sample}.fastq.gz ] && ln -s $reads ${sample}.fastq.gz
        else
            [ ! -f  ${sample}_1.fastq.gz ] && ln -s ${reads[0]} ${sample}_1.fastq.gz
            [ ! -f  ${sample}_2.fastq.gz ] && ln -s ${reads[1]} ${sample}_2.fastq.gz
            IN_READS='--in1 ${sample}_1.fastq.gz --in2 ${sample}_2.fastq.gz'
            OUT_READS='--out1 ${sample}_1.trim.fastq.gz --out2 ${sample}_2.trim.fastq.gz --unpaired1 ${sample}_1.fail.fastq.gz --unpaired2 ${sample}_2.fail.fastq.gz'
        fi

        fastp \\
            \$IN_READS \\
            \$OUT_READS \\
            $autodetect \\
            --cut_front \\
            --cut_tail \\
            --cut_mean_quality $params.cut_mean_quality \\
            --qualified_quality_phred $params.qualified_quality_phred \\
            --unqualified_percent_limit $params.unqualified_percent_limit \\
            --length_required $params.min_trimming_length \\
            --trim_poly_x \\
            --thread $task.cpus \\
            --json ${sample}.fastp.json \\
            --html ${sample}.fastp.html \\
            2> ${sample}.fastp.log

        fastqc --quiet --threads $task.cpus *.trim.fastq.gz

        """
    }

} else {
    ch_cat_fastp
        .into { ch_fastp_unicycler }
    ch_fastp_mqc = Channel.empty()
    ch_fastp_fastqc_mqc = Channel.empty()
}


///////////////////////////////////////////////////////////
/*                                                       */
/*                    DENOVO ASSEMBLY                    */
/*                                                       */
///////////////////////////////////////////////////////////

/*
 * STEP 6: De novo assembly with Unicycler
 */

if (!params.skip_unicycler_assembly ) {
    process UNICYCLER {
        tag "$sample"
        //label 'process_high'
        label 'process_low'
        label 'error_retry'
        publishDir "${params.outdir}/assembly/unicycler", mode: params.publish_dir_mode

        input:
        tuple val(sample), val(single_end), path(reads) from ch_fastp_unicycler

        output:

        tuple val(sample), path("*scaffolds.fa") into ch_unicycler_assembly
        //path "*scaffolds.fa" into ch_unicycler_assembly

        path "*assembly.{gfa,png,svg}"

        script:
        input_reads = single_end ? "-s $reads" : "-1 ${reads[0]} -2 ${reads[1]}"

        """

        unicycler \\
            --threads $task.cpus \\
            $input_reads \\
            --out ./
        mv assembly.fasta ${sample}.scaffolds.fa
        mv assembly.gfa ${sample}.assembly.gfa

        if [ -s ${sample}.assembly.gfa ]
        then
            Bandage image ${sample}.assembly.gfa ${sample}.assembly.png --height 1000
            Bandage image ${sample}.assembly.gfa ${sample}.assembly.svg --height 1000
        fi

        """
    }
    ch_unicycler_assembly
        .concat(ch_input_assembly)
        .into { ch_samples_quast
                ch_samples_taranis_allele_calling}
} else {
    ch_input_assembly
        .into { ch_samples_quast
                ch_samples_taranis_allele_calling }
}


/*
 * STEP 7: Run Quast on de novo assemblies
 */

process QUAST {
    tag "$sample"
    //label 'process_medium'
    label 'process_low'
    label 'error_retry'
    publishDir "${params.outdir}/assembly/", mode: params.publish_dir_mode,
        saveAs: { filename ->
                    if (!filename.endsWith(".tsv")) filename
                }

  //  when:
    //!params.skip_quast

    input:

    tuple val(sample), path(scaffolds) from ch_samples_quast
    //path scaffolds from ch_samples_quast

    path fasta from ch_fasta
    path gff from ch_gff

    output:
    path "quast"
    path "*_report.tsv" into ch_quast_mqc

    script:
    features = params.gff ? "--features $gff" : ""

    """

    quast.py \\
        --output-dir quast \\
        -r $fasta \\
        $features \\
        --threads $task.cpus \\
        ${scaffolds.join(' ')}

    mv quast/report.tsv quast/${sample}_report.tsv
    ln -s quast/${sample}_report.tsv

    """
}


/////////////////////////////////////////////////////////////////////
/*                                                                 */
/*                    DOWNLOAD SCHEMA & PROFILE                    */
/*                                                                 */
/////////////////////////////////////////////////////////////////////

/*
 * STEP 8: Download specified schema from bigsdb or pubmlst REST-API if not provided
 */

if ( !params.skip_get_schema && !params.schema ) {
    process GET_SCHEMA {
        //label 'process_medium'
        label 'process_low'
        label 'error_retry'
        publishDir "${params.outdir}/preprocess", mode: params.publish_dir_mode

        output:
        path "api_schema" into ch_schema

        script:

        """

        get_files_from_rest_api.py --out api_schema/ \\
                                            no_interactive \\
                                            --api_url $params.api \\
                                            --organism_id $params.organism_id \\
                                            --schema_database 1 \\
                                            --schema_type $params.schema_type \\
                                            --file_type schema

        """
    }
}


/*
 * STEP 9: Donwload specified ST profile from bigsdb or pubmlst REST-API if not provided
 */

if (!params.skip_get_profile) {
    process GET_PROFILE {
        //label 'process_medium'

        label 'process_low'
        label 'error_retry'
        publishDir "${params.outdir}/preprocess", mode: params.publish_dir_mode

        output:
        path "api_profile/*.csv" into ch_st_profile

        script:

        """

        get_files_from_rest_api.py --out api_profile/ \\
                                            no_interactive \\
                                            --api_url $params.api \\
                                            --organism_id $params.organism_id \\
                                            --schema_database 1 \\
                                            --schema_type $params.schema_type \\
                                            --file_type profile

        """
    }

} else {
    if (!params.st_profile) { ch_st_profile = Channel.empty()
    }
}


/////////////////////////////////////////////////////////////////
/*                                                             */
/*                    GENE BY GENE ANALYSIS                    */
/*                                                             */
/////////////////////////////////////////////////////////////////

/*
 * STEP 10: Analyze Schema with Taranis
 */

if (!params.skip_taranis_analyze_schema) {

    ch_schema.into{ ch_schema_taranis_analyze_schema }

    process TARANIS_ANALYZE_SCHEMA {
        //label 'process_high'
        label 'process_low'
        label 'error_retry'

        publishDir "${params.outdir}/taranis/", mode: params.publish_dir_mode

        input:

        path schema from ch_schema_taranis_analyze_schema

        output:

        path "analyze_schema/*_updated" optional true into ch_schema_taranis_reference_alleles_cop,
                                                        ch_schema_taranis_allele_calling_cop

        path "${schema}" into ch_schema_taranis_reference_alleles_orig,
                                ch_schema_taranis_allele_calling_orig
        path "analyze_schema/"

        script:

        """
        taranis.py analyze_schema -inputdir $schema \\
                                    -outputdir analyze_schema/ \\
                                    -removeduplicates $params.remove_duplicates \\
                                    -removesubsets $params.remove_subsets \\
                                    -removenocds $params.remove_nocds \\
                                    -newschema $params.new_schema \\
                                    -cpus $task.cpus
        """
    }

} else {
    ch_schema
        .into { ch_schema_taranis_reference_alleles
                ch_schema_taranis_allele_calling }
}

// si se analiza el esquema y se filtra una copia del esquema se copia a los canales que se van a usar en allele calling y reference alleles el contenido de los canales cop
if (!params.skip_taranis_analyze_schema && params.newschema) {
    ch_schema_taranis_reference_alleles_cop.into{ ch_schema_taranis_reference_alleles }
    ch_schema_taranis_allele_calling_cop.into{ ch_schema_taranis_allele_calling }
}

// si se analiza el esquema y se filtra el esquema original se copia a los canales que se van a usar en allele calling y reference alleles el contenido de los canales orig
if (!params.skip_taranis_analyze_schema && !params.newschema) {
    ch_schema_taranis_reference_alleles_orig.into{ ch_schema_taranis_reference_alleles }
    ch_schema_taranis_allele_calling_orig.into{ ch_schema_taranis_allele_calling }
}


/*
 * STEP 11: Get reference allele(s) for each locus in the schema with Taranis
 */

if (!params.skip_taranis_reference_alleles && !params.reference_alleles) {

    process TARANIS_REFERENCE_ALLELES {
        //label 'process_high'
        label 'process_low'
        label 'error_retry'

        publishDir "${params.outdir}/taranis", mode: params.publish_dir_mode


        input:

        path schema from ch_schema_taranis_reference_alleles

        output:

        path "reference_alleles" into ch_reference_alleles

        script:

        """
        taranis.py reference_alleles -coregenedir $schema \\
                                    -outputdir reference_alleles/ \\
                                    -evalue $params.evalue_ref \\
                                    -perc_identity $params.perc_identity_ref \\
                                    -reward $params.reward_ref \\
                                    -penalty $params.penalty_ref \\
                                    -gapopen $params.gap_open_ref \\
                                    -gapextend $params.gap_extend_ref \\
                                    -num_threads $params.num_threads_ref \\
                                    -cpus $task.cpus
        """
    }
}


/*
 * STEP 12: Gene by gene analysis with Taranis
 */

if (!params.skip_taranis_allele_calling) {

    if ( !params.st_profile && params.skip_get_profile ) {
        //ch_st_profile = Channel.of( './') }
        ch_st_profile = Channel.of( 'False') }

    process TARANIS_ALLELE_CALLING {
        //label 'process_high'
        label 'process_low'
        label 'error_retry'

        publishDir "${params.outdir}/taranis", mode: params.publish_dir_mode

        input:

        path fasta from ch_fasta
        //tuple val(samples), path(samples_path) from ch_samples_taranis_allele_calling.collect()
        path(samples_path) from ch_samples_taranis_allele_calling.map { it[1] }.collect()
        path schema from ch_schema_taranis_allele_calling
        path reference_alleles from ch_reference_alleles
        val profile from ch_st_profile

        output:
        path "allele_calling/"
        path "allele_calling/result.tsv" into ch_allele_calling_matrix

        script:

        """

        mkdir samples_dir
        mv $samples_path samples_dir

        taranis.py allele_calling -coregenedir $schema \\
                                            -refalleles $reference_alleles \\
                                            -inputdir samples_dir \\
                                            -refgenome $fasta \\
                                            -outputdir allele_calling/ \\
                                            -cpus $task.cpus \\
                                            -percentlength $params.percentlength \\
                                            -coverage $params.coverage \\
                                            -evalue $params.evalue_call \\
                                            -perc_identity_ref $params.perc_identity_ref_call \\
                                            -perc_identity_loc $params.perc_identity_loc_call \\
                                            -reward $params.reward_call \\
                                            -penalty $params.penalty_call \\
                                            -gapopen $params.gapopen_call \\
                                            -gapextend $params.gapextend_call \\
                                            -max_target_seqs $params.max_target_seqs_call \\
                                            -max_hsps $params.max_hsps_call \\
                                            -num_threads $params.num_threads_call \\
                                            -flankingnts $params.flanking_nts \\
                                            -updateschema $params.update_schema \\
                                            -profile $profile

        """
    }
}


/*
 * STEP 13: Get distance matrix with Taranis
 */

if (!params.skip_taranis_distance_matrix) {

    process TARANIS_DISTANCE_MATRIX {
        //label 'process_high'
        label 'process_low'
        label 'error_retry'

        publishDir "${params.outdir}/taranis", mode: params.publish_dir_mode

        input:
        path alleles_matrix from ch_allele_calling_matrix

        output:
        path "distance_matrix/"

        script:

        """

        taranis.py distance_matrix -alleles_matrix $alleles_matrix \\
                                            -locus_missing_threshold $params.locus_missing_threshold \\
                                            -sample_missing_threshold $params.sample_missing_threshold \\
                                            -paralog_filter $params.paralog_filter \\
                                            -lnf_filter $params.lnf_filter \\
                                            -plot_filter $params.plot_filter \\
                                            -outputdir distance_matrix

        """
    }
}


///////////////////////////////////////////////////////////////
/*                                                           */
/*                          MULTIQC                          */
/*                                                           */
///////////////////////////////////////////////////////////////

Channel.from(summary.collect{ [it.key, it.value] })
    .map { k,v -> "<dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }
    .reduce { a, b -> return [a, b].join("\n            ") }
    .map { x -> """
    id: 'nf-core-genebygenebact-summary'
    description: " - this information is collected when the pipeline is started."
    section_name: 'nf-core/genebygenebact Workflow Summary'
    section_href: 'https://github.com/nf-core/genebygenebact'
    plot_type: 'html'
    data: |
        <dl class=\"dl-horizontal\">
            $x
        </dl>
    """.stripIndent() }
    .set { ch_workflow_summary }

/*
 * Parse software version numbers
 */

process get_software_versions {
    publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode,
        saveAs: { filename ->
                      if (filename.indexOf(".csv") > 0) filename
                      //if (filename.indexOf(".csv")) filename ---> en vir
                      else null
                }

    output:
    file 'software_versions_mqc.yaml' into ch_software_versions_yaml
    file "software_versions.csv"

    script:
    """
    echo $workflow.manifest.version > v_pipeline.txt
    echo $workflow.nextflow.version > v_nextflow.txt
    fastqc --version > v_fastqc.txt
    fastp --version 2> v_fastp.txt
    unicycler --version > v_unicycler.txt
    Bandage --version > v_bandage.txt
    quast --version > v_quast.txt
    taranis.py --version > v_taranis.txt
    multiqc --version > v_multiqc.txt
    scrape_software_versions.py &> software_versions_mqc.yaml
    """
    //quast.py --version > v_quast.txt // quast.py en vir
}


/*
 * STEP 13 - MultiQC
 */

process MULTIQC {
    publishDir "${params.outdir}/MultiQC", mode: params.publish_dir_mode

    when:
    !params.skip_multiqc

    input:
    path (multiqc_config) from ch_multiqc_config
    path (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([])
    path ('fastqc/*') from ch_fastqc_raw_reports_mqc.collect().ifEmpty([])
    path ('fastp/log/*') from ch_fastp_mqc.collect().ifEmpty([])
    path ('fastp/fastqc/*') from ch_fastp_fastqc_mqc.collect().ifEmpty([])
    path ('quast/*') from ch_quast_mqc.collect().ifEmpty([])
    path ('software_versions/*') from ch_software_versions_yaml.collect()
    path workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml")

    output:
    path "*multiqc_report.html" into ch_multiqc_report
    path "*_data"
    path "multiqc_plots"

    script:
    rtitle = ''
    rfilename = ''
    if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
        rtitle = "--title \"${workflow.runName}\""
        rfilename = "--filename " + workflow.runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report"
    }
    custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : ''
    """
    multiqc -f $rtitle $rfilename $custom_config_file .
    """
}


/*
 * STEP 14 - Output Description HTML
 */

process output_documentation {
    publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode

    input:
    file output_docs from ch_output_docs
    file images from ch_output_docs_images

    output:
    file 'results_description.html'

    script:
    """
    markdown_to_html.py $output_docs -o results_description.html
    """
}


/*
 * Completion e-mail notification
 */
workflow.onComplete {

    // Set up the e-mail variables
    def subject = "[nf-core/genebygenebact] Successful: $workflow.runName"
    if (!workflow.success) {
        subject = "[nf-core/genebygenebact] FAILED: $workflow.runName"
    }
    def email_fields = [:]
    email_fields['version'] = workflow.manifest.version
    email_fields['runName'] = workflow.runName
    email_fields['success'] = workflow.success
    email_fields['dateComplete'] = workflow.complete
    email_fields['duration'] = workflow.duration
    email_fields['exitStatus'] = workflow.exitStatus
    email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
    email_fields['errorReport'] = (workflow.errorReport ?: 'None')
    email_fields['commandLine'] = workflow.commandLine
    email_fields['projectDir'] = workflow.projectDir
    email_fields['summary'] = summary
    email_fields['summary']['Date Started'] = workflow.start
    email_fields['summary']['Date Completed'] = workflow.complete
    email_fields['summary']['Pipeline script file path'] = workflow.scriptFile
    email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId
    if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository
    if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
    if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
    email_fields['summary']['Nextflow Version'] = workflow.nextflow.version
    email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
    email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp

    // TODO nf-core: If not using MultiQC, strip out this code (including params.max_multiqc_email_size)
    // On success try attach the multiqc report
    def mqc_report = null
    try {
        if (workflow.success) {
            mqc_report = ch_multiqc_report.getVal()
            if (mqc_report.getClass() == ArrayList) {
                log.warn "[nf-core/genebygenebact] Found multiple reports from process 'multiqc', will use only one"
                mqc_report = mqc_report[0]
            }
        }
    } catch (all) {
        log.warn "[nf-core/genebygenebact] Could not attach MultiQC report to summary email"
    }

    // Check if we are only sending emails on failure
    email_address = params.email
    if (!params.email && params.email_on_fail && !workflow.success) {
        email_address = params.email_on_fail
    }

    // Render the TXT template
    def engine = new groovy.text.GStringTemplateEngine()
    def tf = new File("$projectDir/assets/email_template.txt")
    def txt_template = engine.createTemplate(tf).make(email_fields)
    def email_txt = txt_template.toString()

    // Render the HTML template
    def hf = new File("$projectDir/assets/email_template.html")
    def html_template = engine.createTemplate(hf).make(email_fields)
    def email_html = html_template.toString()

    // Render the sendmail template
    def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ]
    def sf = new File("$projectDir/assets/sendmail_template.txt")
    def sendmail_template = engine.createTemplate(sf).make(smail_fields)
    def sendmail_html = sendmail_template.toString()

    // Send the HTML e-mail
    if (email_address) {
        try {
            if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
            // Try to send HTML e-mail using sendmail
            [ 'sendmail', '-t' ].execute() << sendmail_html
            log.info "[nf-core/genebygenebact] Sent summary e-mail to $email_address (sendmail)"
        } catch (all) {
            // Catch failures and try with plaintext
            def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
            if ( mqc_report.size() <= params.max_multiqc_email_size.toBytes() ) {
              mail_cmd += [ '-A', mqc_report ]
            }
            mail_cmd.execute() << email_html
            log.info "[nf-core/genebygenebact] Sent summary e-mail to $email_address (mail)"
        }
    }

    // Write summary e-mail HTML to a file
    def output_d = new File("${params.outdir}/pipeline_info/")
    if (!output_d.exists()) {
        output_d.mkdirs()
    }
    def output_hf = new File(output_d, "pipeline_report.html")
    output_hf.withWriter { w -> w << email_html }
    def output_tf = new File(output_d, "pipeline_report.txt")
    output_tf.withWriter { w -> w << email_txt }

    c_green = params.monochrome_logs ? '' : "\033[0;32m";
    c_purple = params.monochrome_logs ? '' : "\033[0;35m";
    c_red = params.monochrome_logs ? '' : "\033[0;31m";
    c_reset = params.monochrome_logs ? '' : "\033[0m";

    if (workflow.stats.ignoredCount > 0 && workflow.success) {
        log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-"
        log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-"
        log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-"
    }

    if (workflow.success) {
        log.info "-${c_purple}[nf-core/genebygenebact]${c_green} Pipeline completed successfully${c_reset}-"
    } else {
        checkHostname()
        log.info "-${c_purple}[nf-core/genebygenebact]${c_red} Pipeline completed with errors${c_reset}-"
    }

}

workflow.onError {
    // Print unexpected parameters - easiest is to just rerun validation
    NfcoreSchema.validateParameters(params, json_schema, log)
}

def checkHostname() {
    def c_reset = params.monochrome_logs ? '' : "\033[0m"
    def c_white = params.monochrome_logs ? '' : "\033[0;37m"
    def c_red = params.monochrome_logs ? '' : "\033[1;91m"
    def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m"
    if (params.hostnames) {
        def hostname = 'hostname'.execute().text.trim()
        params.hostnames.each { prof, hnames ->
            hnames.each { hname ->
                if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
                    log.error "${c_red}====================================================${c_reset}\n" +
                            "  ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" +
                            "  but your machine hostname is ${c_white}'$hostname'${c_reset}\n" +
                            "  ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" +
                            "${c_red}====================================================${c_reset}\n"
                }
            }
        }
    }
}


// Function to check if running offline
def isOffline() {
    try {
        return NXF_OFFLINE as Boolean
    }
    catch( Exception e ) {
        return false
    }
}