diff --git a/bin/bff.R b/bin/bff.R index bc31c06..b9d0039 100755 --- a/bin/bff.R +++ b/bin/bff.R @@ -4,11 +4,21 @@ library(DropletUtils) library(Seurat) library(ggplot2) library(cowplot) +if(!require("cellhashR")){ + devtools::install_github(repo = 'bimberlab/cellhashR', ref = 'master', dependencies = TRUE, upgrade = 'always') + library("cellhashR") +} + library(cellhashR) library(here) library(dplyr) library(argparse) -library(tidyverse) + +if(!require("tidyverse")){ + install.packages("tidyverse") + library("tidyverse") +} + # Create a parser parser <- ArgumentParser("Parameters for BFF") diff --git a/bin/filter_bam_file_for_popscle_dsc_pileup.sh b/bin/filter_bam_file_for_popscle_dsc_pileup.sh new file mode 100755 index 0000000..9cd8f8a --- /dev/null +++ b/bin/filter_bam_file_for_popscle_dsc_pileup.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# +# Copyright (C): 2020-2021 - Gert Hulselmans +# +# Purpose: Filter BAM file for usage with popscle dsc-pileup by keeping reads: +# - which overlap with SNPs in the VCF file +# - and which have a cell barcode (default: "CB" tag) contained in the cell barcode list +# Keeping only relevant reads for popscle dsc-pileup can speedup it up quite significantly +# (depending on the reduction of the number of reads in the filtered BAM file vs original). + + + +# Function to check if any of the programs in a pipe failed. +check_exit_codes () { + local GET_PIPESTATUS="${PIPESTATUS[@]}"; + local exit_code; + + for exit_code in ${GET_PIPESTATUS} ; do + if [ ${exit_code} -ne 0 ] ; then + return ${exit_code}; + fi + done + + return 0; +} + + + +# Check if necessary programs are installed. +check_if_programs_exists () { + local exit_code=0; + + # Check if bedtools is installed. + if ! type bedtools > /dev/null 2>&1 ; then + printf 'Error: "bedtools" could not be found in PATH.\n' > /dev/stderr; + exit_code=2; + fi + + # Check if samtools is installed. + if ! type samtools > /dev/null 2>&1 ; then + printf 'Error: "samtools" could not be found in PATH.\n' > /dev/stderr; + exit_code=2; + fi + + if [ ${exit_code} -eq 2 ] ; then + return ${exit_code}; + fi + + # Check if samtools 1.10 or higher is installed (needs to have "-D STR:FILE" or "-D, --tag-file STR:FILE" option). + if ! samtools view --help 2>&1 | grep -q -- '-D.*STR:FILE' ; then + printf 'Error: The version of "samtools" (%s) should be 1.10 or higher (%s found).\n' \ + "$(type samtools)" \ + "$(samtools --version | head -n 1)" \ + > /dev/stderr; + exit_code=2; + fi + + return ${exit_code}; +} + + + +filter_bam_file_for_popscle_dsc_pileup () { + local input_bam_filename="${1}"; + local barcodes_tsv_filename="${2}"; + local vcf_filename="${3}"; + local output_bam_filename="${4}"; + local barcode_tag="${5:-CB}"; + + local exit_code=0; + + if [ ${#@} -lt 4 ] ; then + printf 'Usage: filter_bam_file_for_popscle_dsc_pileup input_bam_filename barcodes_tsv_filename vcf_filename output_bam_filename [barcode_tag]\n\n'; + printf 'Purpose: Filter BAM file for usage with popscle dsc-pileup by keeping reads:\n'; + printf ' - which overlap with SNPs in the VCF file\n'; + printf ' - and which have a cell barcode (default: "CB" tag) contained in the cell barcode list\n'; + printf ' Keeping only relevant reads for popscle dsc-pileup can speedup it up quite significantly\n'; + printf ' (depending on the reduction of the number of reads in the filtered BAM file vs original).\n\n'; + + return 1; + fi + + if [ ! -f "${input_bam_filename}" ] ; then + printf 'Error: Input (CellRanger) BAM file "%s" could not be found.\n' "${input_bam_filename}" > /dev/stderr; + return 2; + fi + + if [ ! -f "${barcodes_tsv_filename}" ] ; then + printf 'Error: File with barcodes "%s" could not be found.\n' "${barcodes_tsv_filename}" > /dev/stderr; + return 2; + fi + + if [ ! -f "${vcf_filename}" ] ; then + printf 'Error: File with unique SNPs per sample "%s" could not be found.\n' "${vcf_filename}" > /dev/stderr; + return 2; + fi + + if [ ${#barcode_tag} -ne 2 ] ; then + printf 'Error: Barcode tag "%s" should be 2 characters.\n' "${barcode_tag}" > /dev/stderr; + return 2; + fi + + # Check if bedtools and samtools are in PATH. + if ! check_if_programs_exists ; then + return 2; + fi + + # Create much smaller BAM file for dsc-pileup of popscle: + # - Convert VCF file with unique SNPs for each sample + # to a BED file and merge adjacent SNP regions to one. + # - Only include reads that contain a SNP position + # and which contain a cell barcode of interest. + if [ "${barcodes_tsv_filename%.gz}".gz = "${barcodes_tsv_filename}" ] ; then + # Barcodes file is compressed with gzip. + bedtools merge -i "${vcf_filename}" \ + | samtools view\ + -@ 8 \ + --write-index \ + -L - \ + -D "${barcode_tag}":<(zcat "${barcodes_tsv_filename}") \ + -o "${output_bam_filename}" \ + "${input_bam_filename}"; + + # Check if any of the previous commands failed. + check_exit_codes; + + exit_code=$?; + else + # Barcodes file is uncompressed. + bedtools merge -i "${vcf_filename}" \ + | samtools view\ + -@ 8 \ + --write-index \ + -L - \ + -D "${barcode_tag}":"${barcodes_tsv_filename}" \ + -o "${output_bam_filename}" \ + "${input_bam_filename}"; + + # Check if any of the previous commands failed. + check_exit_codes; + + exit_code=$?; + fi + + + return ${exit_code}; +} + + + +filter_bam_file_for_popscle_dsc_pileup "${@}"; diff --git a/bin/sort_vcf_same_as_bam.sh b/bin/sort_vcf_same_as_bam.sh new file mode 100755 index 0000000..9638b4f --- /dev/null +++ b/bin/sort_vcf_same_as_bam.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# +# Copyright (C): 2020-2021 - Gert Hulselmans +# +# Purpose: Sort VCF file in the same order as the BAM file, so it can be used with popscle. + + + +# Function to check if any of the programs in a pipe failed. +check_exit_codes () { + local GET_PIPESTATUS="${PIPESTATUS[@]}"; + local exit_code; + + for exit_code in ${GET_PIPESTATUS} ; do + if [ ${exit_code} -ne 0 ] ; then + return ${exit_code}; + fi + done + + return 0; +} + + + +# Check if necessary programs are installed. +check_if_programs_exists () { + local exit_code=0; + + # Check if awk is installed. + if ! type awk > /dev/null 2>&1 ; then + printf 'Error: "awk" could not be found in PATH.\n' > /dev/stderr; + exit_code=2; + fi + + # Check if bcftools is installed. + if ! type bcftools > /dev/null 2>&1 ; then + printf 'Error: "bcftools" could not be found in PATH.\n' > /dev/stderr; + exit_code=2; + fi + + # Check if samtools is installed. + if ! type samtools > /dev/null 2>&1 ; then + printf 'Error: "samtools" could not be found in PATH.\n' > /dev/stderr; + exit_code=2; + fi + + return ${exit_code}; +} + + + +# Get order of the contigs (chromosomes) and their length from the BAM header. +get_contig_order_from_bam () { + local bam_input_file="${1}"; + local output_type="${2}"; + + if [ ${#@} -ne 2 ] ; then + printf 'Usage: get_contig_order_from_bam BAM_file output_type\n\n'; + printf 'Arguments:\n'; + printf ' - BAM_file: BAM file from which to get the contig order and contig lengths.\n'; + printf ' - output_type:\n'; + printf ' - "names": Return contig names.\n'; + printf ' - "chrom_sizes": Return contig names and contig lengths.\n'; + printf ' - "vcf": Return VCF header section for contigs.\n\n'; + return 1; + fi + + case "${output_type}" in + 'names') + ;; + 'chrom_sizes') + ;; + 'vcf') + ;; + *) + printf 'Error: output_type "%s" is not supported.\n' "${output_type}" > /dev/stderr; + return 1; + ;; + esac + + check_if_programs_exists || return $?; + + # Get the order of the contigs from the BAM header. + samtools view -H "${bam_input_file}" \ + | awk \ + -F '\t' \ + -v output_type="${output_type}" \ + ' + { + # Only look at sequence header fields. + if ($1 == "@SQ") { + contig_idx += 1; + contig_name = ""; + contig_length = ""; + + # Extract contig (chromosome) name and contig (chromosome) length. + for (i = 2; i <= NF; i++) { + if ($i ~ /^SN:/) { + contig_name = substr($i, 4); + } + + if ($i ~ /^LN:/) { + contig_length = substr($i, 4); + } + + # Create contig order to name and contig order to length and vcf contig appings. + contig_idx_to_name[contig_idx] = contig_name; + contig_idx_to_length[contig_idx] = contig_length; + contig_idx_to_vcf_contig[contig_idx] = sprintf("##contig=", contig_name, contig_length); + } + } + } END { + if (contig_idx == 0) { + printf "Error: No \"@SQ\" header line found in BAM file.\n" > "/dev/stderr"; + exit(1); + } else if (output_type == "names") { + contig_names = ""; + + for (contig_idx = 1; contig_idx <= length(contig_idx_to_name); contig_idx++) { + contig_names = contig_names " " contig_idx_to_name[contig_idx]; + } + + # Print all contig names (without leading space). + print substr(contig_names, 2); + } else if (output_type == "chrom_sizes") { + # Print all contig names with their length in a TAB separated fashion. + for (contig_idx = 1; contig_idx <= length(contig_idx_to_name); contig_idx++) { + print contig_idx_to_name[contig_idx] "\t" contig_idx_to_length[contig_idx]; + } + } else if (output_type == "vcf") { + # Print VCF header section for contigs. + for (contig_idx = 1; contig_idx <= length(contig_idx_to_vcf_contig); contig_idx++) { + print contig_idx_to_vcf_contig[contig_idx]; + } + } + }' + + check_exit_codes; + + return $?; +} + + + +# Sort VCF file in the same order as the BAM file, so it can be used with popscle. +sort_vcf_same_as_bam () { + local bam_input_file="${1}"; + local vcf_input_file="${2}"; + local vcf_type="${3:-v}"; + + if [ ${#@} -lt 2 ] ; then + printf 'Usage: sort_vcf_same_as_bam BAM_file VCF_file [VCF_type]\n\n'; + printf 'Arguments:\n'; + printf ' - BAM_file: BAM file from which to get the contig order to sort the VCF file.\n'; + printf ' - VCF_file: VCF file to sort by contig order as defined in the BAM file.\n'; + printf ' - VCF_type: VCF ouput file type (default: same as input VCF file type):\n'; + printf ' v: uncompressed VCF, z: compressed VCF,\n'; + printf ' u: uncompressed BCF, b: compressed BCF\n\n'; + printf 'Purpose:\n'; + printf ' Sort VCF file in the same order as the BAM file, so it can be used with popscle.\n\n'; + return 1; + fi + + check_if_programs_exists || return $?; + + # If VCF type is not specified, try to guess it from the filename extension. + if [ ${#@} -eq 2 ] ; then + if [ "${vcf_input_file%.vcf.gz}" != "${vcf_input_file}" ] ; then + vcf_type='z'; + elif [ "${vcf_input_file%.bcf}" != "${vcf_input_file}" ] ; then + vcf_type='b'; + fi + fi + + # Sort VCF file by same chromosome order as BAM file. + cat <( + # Create new VCF header: + # - Get VCF header of VCF input file. + # - Remove all contig header lines and "#CHROM" line from the VCF header. + # - Append contig headers in the order they appear in the input BAM file. + # - Add "#CHROM" line as last line of the new VCF header. + bcftools view -h "${vcf_input_file}" \ + | awk \ + ' + { + if ($1 !~ /^##contig=/ && $1 !~ /^#CHROM/) { + # Remove all contig header lines and "#CHROM" line. + print $0; + } + }' \ + | cat \ + - \ + <(get_contig_order_from_bam "${bam_input_file}" 'vcf') \ + <(bcftools view -h "${vcf_input_file}" | tail -n 1) \ + ) \ + <(bcftools view -H -O v "${vcf_input_file}") \ + | bcftools sort -O "${vcf_type}"; + + check_exit_codes; + + return $?; +} + + + +sort_vcf_same_as_bam "${@}"; diff --git a/conda/bff.yml b/conda/bff.yml index 295e53b..e014dff 100644 --- a/conda/bff.yml +++ b/conda/bff.yml @@ -3,6 +3,7 @@ channels: - conda-forge - bioconda dependencies: + - conda-forge::r-tidyverse - conda-forge::r-base>=4.2 - r-seurat - bioconductor-dropletutils @@ -17,3 +18,4 @@ dependencies: - conda-forge::r-here - conda-forge::r-argparse - conda-forge::r-dplyr + diff --git a/conda/condaenv.31vp35_d.requirements.txt b/conda/condaenv.31vp35_d.requirements.txt new file mode 100644 index 0000000..b1be65e --- /dev/null +++ b/conda/condaenv.31vp35_d.requirements.txt @@ -0,0 +1,5 @@ +solo-sc +scanpy +argparse +torchmetrics==0.6.0 +matplotlib \ No newline at end of file diff --git a/conda/condaenv.ucda_93c.requirements.txt b/conda/condaenv.ucda_93c.requirements.txt new file mode 100644 index 0000000..4b3fe5b --- /dev/null +++ b/conda/condaenv.ucda_93c.requirements.txt @@ -0,0 +1,3 @@ +GMM_Demux==0.2.1.3 +scikit-learn==1.1.3 +argparse \ No newline at end of file diff --git a/main.nf b/main.nf index 1768451..77aab97 100644 --- a/main.nf +++ b/main.nf @@ -1,109 +1,15 @@ #!/usr/bin/env nextflow -nextflow.enable.dsl = 2 -include { run_multi } from './modules/multi_demultiplexing' -include { gene_demultiplexing } from './modules/single/gene_demultiplexing' -include { hash_demultiplexing } from './modules/single/hash_demultiplexing' -include { donor_match } from './modules/single/donor_match' -process summary_all { - publishDir "$projectDir/$params.outdir/$params.mode", mode: 'copy' - label 'small_mem' +nextflow.enable.dsl=2 +include { summary } from "$projectDir/modules/multi/gene_demultiplexing" +include { HADGE; SUMMARY } from "$projectDir/subworkflows/HADGE" - conda 'pandas scanpy mudata' - - input: - path gene_demulti_result - path hash_demulti_result - output: - path 'summary' - - script: - """ - summary.py --gene_demulti $gene_demulti_result --hash_demulti $hash_demulti_result - """ -} - -process generate_data { - publishDir "$projectDir/$params.outdir/$params.mode/data_output", mode: 'copy' - - conda 'pandas scanpy mudata' - - input: - path assignment - val generate_anndata - val generate_mudata - val rna_matrix - val hto_matrix - output: - path 'adata_with_donor_matching.h5ad', optional: true - path 'mudata_with_donor_matching.h5mu', optional: true - - script: - def generate_adata = '' - def generate_mdata = '' - - if (generate_anndata == 'True') { - if (rna_matrix == 'None') { - error 'Error: RNA count matrix is not given.' - } - generate_adata = "--generate_anndata --read_rna_mtx $rna_matrix" - } - if (generate_mudata == 'True') { - if (rna_matrix == 'None') { - error 'Error: RNA count matrix is not given.' - } - if (hto_matrix == 'None') { - error 'Error: HTO count matrix is not given.' - } - generate_mdata = "--generate_mudata --read_rna_mtx $rna_matrix --read_hto_mtx $hto_matrix" - } - - """ - generate_data.py --assignment $assignment $generate_adata $generate_mdata - """ -} - -workflow run_single { - if (params.mode == 'genetic') { - gene_demultiplexing() - if (params.match_donor == 'True') { - donor_match(gene_demultiplexing.out) - } - } - else if (params.mode == 'hashing') { - hash_demultiplexing(params.rna_matrix_raw, params.rna_matrix_filtered, params.hto_matrix_raw, params.hto_matrix_filtered) - if (params.match_donor == 'True') { - donor_match(hash_demultiplexing.out) - } - } - else if (params.mode == 'rescue') { - hash_demultiplexing(params.rna_matrix_raw, params.rna_matrix_filtered, params.hto_matrix_raw, params.hto_matrix_filtered) - gene_demultiplexing() - gene_summary = gene_demultiplexing.out - hash_summary = hash_demultiplexing.out - summary_all(gene_summary, hash_summary) - if (params.match_donor == 'True') { - donor_match(summary_all.out) - if (params.generate_anndata == 'True' || params.generate_mudata == 'True') { - generate_data(donor_match.out, params.generate_anndata, params.generate_mudata, - params.rna_matrix_filtered, params.hto_matrix_filtered) - } - } - } - else if (params.mode == 'donor_match') { - donor_match(params.demultiplexing_result) - if (params.generate_anndata == 'True' || params.generate_mudata == 'True') { - generate_data(donor_match.out, params.generate_anndata, params.generate_mudata, - params.rna_matrix_filtered, params.hto_matrix_filtered) - } - } +// Main entry point in the pipeline +workflow { + HADGE() } -workflow { - if (params.multi_input == null) { - run_single() - } - else { - run_multi() - } +// Enty point to only generate summary files +workflow HADGE_SUMMARY_ONLY{ + SUMMARY() } diff --git a/modules/gene_demultiplexing.nf b/modules/gene_demultiplexing.nf deleted file mode 100644 index d6d4bfb..0000000 --- a/modules/gene_demultiplexing.nf +++ /dev/null @@ -1,258 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 -include { data_preprocess } from './gene_demulti/samtools' -include { filter_variant } from './gene_demulti/bcftools' -include { variant_cellSNP } from './gene_demulti/cellsnp' -include { variant_freebayes } from './gene_demulti/freebayes' -include { demultiplex_demuxlet } from './gene_demulti/demuxlet' -include { demultiplex_freemuxlet } from './gene_demulti/freemuxlet' -include { demultiplex_scSplit } from './gene_demulti/scsplit' -include { demultiplex_souporcell } from './gene_demulti/souporcell' -include { demultiplex_vireo } from './gene_demulti/vireo' - -process summary { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti", mode: 'copy' - label 'small_mem' - - conda 'pandas scanpy mudata' - - input: - tuple val(sampleId), path(hto_matrix, stageAs: 'hto_data'), path(rna_matrix, stageAs: 'rna_data') - val demuxlet_result - val freemuxlet_result - val vireo_result - val souporcell_result - val scsplit_result - val generate_anndata - val generate_mudata - - output: - tuple val(sampleId), path('genetic_summary') - - script: - def demuxlet_files = '' - def freemuxlet_files = '' - def vireo_files = '' - def souporcell_files = '' - def scsplit_files = '' - def generate_adata = '' - def generate_mdata = '' - - if (demuxlet_result != 'no_result') { - demuxlet_res = demuxlet_result.find { it.name.contains(sampleId) } - demuxlet_files = "--demuxlet ${demuxlet_res}" - } - if (freemuxlet_result != 'no_result') { - freemuxlet_res = freemuxlet_result.find { it.name.contains(sampleId) } - freemuxlet_files = "--freemuxlet ${freemuxlet_res}" - } - if (vireo_result != 'no_result') { - vireo_res = vireo_result.find { it.name.contains(sampleId) } - vireo_files = "--vireo ${vireo_res}" - } - if (souporcell_result != 'no_result') { - souporcell_res = souporcell_result.find { it.name.contains(sampleId) } - souporcell_files = "--souporcell ${souporcell_res}" - } - if (scsplit_result != 'no_result') { - scsplit_res = scsplit_result.find { it.name.contains(sampleId) } - scsplit_files = "--scsplit ${scsplit_res}" - } - if (generate_anndata == 'True') { - if (rna_matrix.name == 'None') { - error 'Error: RNA count matrix is not given.' - } - generate_adata = '--generate_anndata --read_rna_mtx rna_data' - } - if (generate_mudata == 'True') { - if (rna_matrix.name == 'None') { - error 'Error: RNA count matrix is not given.' - } - if (hto_matrix.name == 'None') { - error 'Error: HTO count matrix is not given.' - } - generate_mdata = '--generate_mudata --read_rna_mtx rna_data --read_hto_mtx hto_data' - } - """ - summary_gene.py $demuxlet_files $vireo_files $souporcell_files $scsplit_files $freemuxlet_files $generate_adata $generate_mdata - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow gene_demultiplexing { - if ((params.demuxlet == 'True' & params.demuxlet_preprocess == 'True') | \ - (params.freemuxlet == 'True' & params.freemuxlet_preprocess == 'True')| \ - (params.scSplit == 'True' & params.scSplit_preprocess == 'True') | \ - (params.vireo == 'True' & params.vireo_preprocess == 'True') | \ - (params.souporcell == 'True' & params.souporcell_preprocess == 'True')) { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.bam) } - | data_preprocess - qc_bam = data_preprocess.out.map { it -> tuple(it.name.tokenize('_').last(), it + '/sorted.bam', it + '/sorted.bam.bai') } - } - - if (params.scSplit == 'True' & params.scSplit_variant == 'True') { - freebayes_region = Channel.from(1..22, 'X', 'Y').flatten() - if (params.region != 'None') { - freebayes_region = split_input(params.region) - } - if (params.scSplit_preprocess == 'True') { - variant_freebayes(qc_bam, freebayes_region) - } - else { - input_list = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.bam, row.bam_index) } - variant_freebayes(input_list, freebayes_region) - } - filter_variant(variant_freebayes.out) - freebayes_vcf = filter_variant.out.map { it -> tuple(it[0], it[1] + '/filtered_sorted_total_chroms.vcf') } - } - - if (params.vireo == 'True' & params.vireo_variant == 'True') { - if (params.vireo_preprocess == 'True') { - input_param_cellsnp = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.barcodes) } - qc_bam_new = qc_bam.join(input_param_cellsnp) - variant_cellSNP(qc_bam_new) - } - else { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.bam, row.bam_index, row.barcodes) } - | variant_cellSNP - } - cellsnp_vcf = variant_cellSNP.out.map { it -> tuple(it.name.tokenize('_').last(), it + '/*/cellSNP.cells.vcf') } - } - - if (params.scSplit == 'True') { - if (params.scSplit_preprocess == 'False') { - input_bam_scsplit = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.bam, row.bam_index) } - } - else { - input_bam_scsplit = qc_bam - } - if (params.scSplit_variant == 'True') { - input_vcf_scsplit = freebayes_vcf - } - else { - input_vcf_scsplit = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.vcf_mixed) } - } - input_param_scsplit = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.barcodes, row.nsamples_genetic, row.vcf_donor) } - - input_list_scsplit = input_bam_scsplit.join(input_vcf_scsplit) - input_list_scsplit = input_list_scsplit.join(input_param_scsplit) - demultiplex_scSplit(input_list_scsplit) - scSplit_out = demultiplex_scSplit.out - } - else { - scSplit_out = channel.value('no_result') - } - - if (params.vireo == 'True') { - if (params.vireo_variant == 'True') { - input_vcf_vireo = cellsnp_vcf - } - else { - input_vcf_vireo = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.celldata) } - } - input_param_vireo = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.nsamples_genetic, row.vcf_donor) } - - input_list_vireo = input_vcf_vireo.join(input_param_vireo) - demultiplex_vireo(input_list_vireo) - vireo_out = demultiplex_vireo.out - } - else { - vireo_out = channel.value('no_result') - } - - if (params.demuxlet == 'True') { - if (params.demuxlet_preprocess == 'False') { - input_bam_demuxlet = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.bam, row.bam_index) } - } - else { - input_bam_demuxlet = qc_bam - } - input_param_demuxlet = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.barcodes, row.vcf_donor) } - input_list_demuxlet = input_bam_demuxlet.join(input_param_demuxlet) - demultiplex_demuxlet(input_list_demuxlet) - demuxlet_out = demultiplex_demuxlet.out - } - else { - demuxlet_out = channel.value('no_result') - } - - if (params.freemuxlet == 'True') { - if (params.freemuxlet_preprocess == 'False') { - input_bam_freemuxlet = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.bam, row.bam_index) } - } - else { - input_bam_freemuxlet = qc_bam - } - input_param_freemuxlet = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.barcodes, row.nsamples_genetic) } - input_list_freemuxlet = input_bam_freemuxlet.join(input_param_freemuxlet) - demultiplex_freemuxlet(input_list_freemuxlet) - freemuxlet_out = demultiplex_freemuxlet.out - } - else { - freemuxlet_out = channel.value('no_result') - } - - if (params.souporcell == 'True') { - if (params.souporcell_preprocess == 'False') { - input_bam_souporcell = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.bam, row.bam_index) } - } - else { - input_bam_souporcell = qc_bam - } - input_param_souporcell = Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.barcodes, row.nsamples_genetic, row.vcf_donor) } - input_list_souporcell = input_bam_souporcell.join(input_param_souporcell) - demultiplex_souporcell(input_list_souporcell) - souporcell_out = demultiplex_souporcell.out - } - else { - souporcell_out = channel.value('no_result') - } - - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, file(row.hto_matrix_filtered), file(row.rna_matrix_filtered)) } - | set { input_list_summary } - summary(input_list_summary, demuxlet_out, freemuxlet_out, vireo_out, souporcell_out, scSplit_out, - params.generate_anndata, params.generate_mudata) - - emit: - summary.out -} diff --git a/modules/hash_demultiplexing.nf b/modules/hash_demultiplexing.nf deleted file mode 100644 index 542bac5..0000000 --- a/modules/hash_demultiplexing.nf +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 -include { preprocessing_hashing as preprocessing_hashing_htodemux } from './hash_demulti/preprocess' -include { preprocessing_hashing as preprocessing_hashing_multiseq } from './hash_demulti/preprocess' -include { multiseq_hashing } from './hash_demulti/multiseq' -include { htodemux_hashing } from './hash_demulti/htodemux' -include { hash_solo_hashing } from './hash_demulti/hashsolo' -include { hashedDrops_hashing } from './hash_demulti/hashedDrops' -include { demuxem_hashing } from './hash_demulti/demuxem' -include { gmm_demux_hashing } from './hash_demulti/gmm_demux' -include { bff_hashing } from './hash_demulti/bff' - -process summary { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti", mode: 'copy' - label 'small_mem' - - conda 'pandas scanpy mudata' - - input: - tuple val(sampleId), path(hto_matrix, stageAs: 'hto_data'), path(rna_matrix, stageAs: 'rna_data') - val demuxem_result - val hashsolo_result - val htodemux_result - val multiseq_result - val hashedDrops_result - val bff_result - val gmmDemux_result - val generate_anndata - val generate_mudata - - output: - tuple val(sampleId), path('hash_summary') - - script: - def htodemux_files = '' - def hashsolo_files = '' - def multiseq_files = '' - def hashedDrops_files = '' - def gmmDemux_files = '' - def demuxem_files = '' - def bff_files = '' - def generate_adata = '' - def generate_mdata = '' - - if (demuxem_result != 'no_result') { - demuxem_res = demuxem_result.find { it.name.contains(sampleId) } - demuxem_files = "--demuxem ${demuxem_res}" - } - if (hashsolo_result != 'no_result') { - hashsolo_res = hashsolo_result.find { it.name.contains(sampleId) } - hashsolo_files = "--hashsolo ${hashsolo_res}" - } - if (htodemux_result != 'no_result') { - htodemux_res = htodemux_result.find { it.name.contains(sampleId) } - htodemux_files = "--htodemux ${htodemux_res}" - } - if (multiseq_result != 'no_result') { - multiseq_res = multiseq_result.find { it.name.contains(sampleId) } - multiseq_files = "--multiseq ${multiseq_res}" - } - if (hashedDrops_result != 'no_result') { - hashedDrops_res = hashedDrops_result.find { it.name.contains(sampleId) } - hashedDrops_files = "--hashedDrops ${hashedDrops_res}" - } - if (gmmDemux_result != 'no_result') { - gmmDemux_res = gmmDemux_result.find { it.name.contains(sampleId) } - gmmDemux_files = "--gmm_demux ${gmmDemux_res}" - } - if (bff_result != 'no_result') { - bff_res = bff_result.find { it.name.contains(sampleId) } - bff_files = "--bff ${bff_res}" - } - if (generate_anndata == 'True') { - if (rna_matrix.name == 'None') { - error 'Error: RNA count matrix is not given.' - } - generate_adata = '--generate_anndata --read_rna_mtx rna_data' - } - if (generate_mudata == 'True') { - if (rna_matrix.name == 'None') { - error 'Error: RNA count matrix is not given.' - } - if (hto_matrix.name == 'None') { - error 'Error: HTO count matrix is not given.' - } - generate_mdata = '--generate_mudata --read_rna_mtx rna_data --read_hto_mtx hto_data' - } - - """ - summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $gmmDemux_files $bff_files $generate_adata $generate_mdata --sampleId $sampleId - """ -} - -workflow hash_demultiplexing { - if (params.htodemux == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, params.hto_matrix_htodemux == 'raw' ? row.hto_matrix_raw : row.hto_matrix_filtered, - params.rna_matrix_htodemux == 'raw' ? row.rna_matrix_raw : row.rna_matrix_filtered)} - | set { input_list_preprocess_htodemux } - preprocessing_hashing_htodemux(input_list_preprocess_htodemux, params.hto_matrix_htodemux, params.rna_matrix_htodemux) - htodemux_preprocess_out = preprocessing_hashing_htodemux.out - htodemux_hashing(htodemux_preprocess_out) - htodemux_out = htodemux_hashing.out - } - else { - htodemux_out = channel.value('no_result') - } - - if (params.multiseq == 'True') { - if (params.htodemux == 'True' & params.hto_matrix_htodemux == params.hto_matrix_multiseq & - params.rna_matrix_htodemux == params.rna_matrix_multiseq) { - multiseq_preprocess_out = htodemux_preprocess_out - } - else { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, params.hto_matrix_multiseq == 'raw' ? row.hto_matrix_raw : row.hto_matrix_filtered, - params.rna_matrix_multiseq == 'raw' ? row.rna_matrix_raw : row.rna_matrix_filtered)} - | set { input_list_preprocess_multiseq } - preprocessing_hashing_multiseq(input_list_preprocess_multiseq, params.hto_matrix_multiseq, params.rna_matrix_multiseq) - multiseq_preprocess_out = preprocessing_hashing_multiseq.out - } - multiseq_hashing(multiseq_preprocess_out) - multiseq_out = multiseq_hashing.out - } - else { - multiseq_out = channel.value('no_result') - } - - if (params.hashsolo == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, params.hto_matrix_hashsolo == 'raw' ? row.hto_matrix_raw : row.hto_matrix_filtered, - params.rna_matrix_hashsolo == 'False' ? channel.value('None') : - (params.rna_matrix_hashsolo == 'raw' ? row.rna_matrix_raw : row.rna_matrix_filtered) - )} - | hash_solo_hashing - hashsolo_out = hash_solo_hashing.out - } - else { - hashsolo_out = channel.value('no_result') - } - - if (params.demuxem == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, params.hto_matrix_demuxem == 'raw' ? row.hto_matrix_raw : row.hto_matrix_filtered, - params.rna_matrix_demuxem == 'raw' ? row.rna_matrix_raw : row.rna_matrix_filtered)} - | demuxem_hashing - demuxem_out = demuxem_hashing.out - } - else { - demuxem_out = channel.value('no_result') - } - - if (params.hashedDrops == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, params.hto_matrix_hashedDrops == 'raw' ? row.hto_matrix_raw : row.hto_matrix_filtered) } - | hashedDrops_hashing - hashedDrops_out = hashedDrops_hashing.out - } - else { - hashedDrops_out = channel.value('no_result') - } - - if (params.bff == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, params.hto_matrix_bff == 'raw' ? row.hto_matrix_raw : row.hto_matrix_filtered) } - | bff_hashing - bff_out = bff_hashing.out - print('BFF path to output') - } - else { - bff_out = channel.value('no_result') - } - if (params.gmmDemux == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - - | map { row-> tuple(row.sampleId, - params.hto_matrix_gmm_demux == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered, - row.hto_name_gmm )} - | set {input_list_gmm_demux} - | gmm_demux_hashing(input_list_gmm_demux) - gmmDemux_out = gmm_demux_hashing.out - } - else { - gmmDemux_out = channel.value('no_result') - } - - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, file(row.hto_matrix_filtered), file(row.rna_matrix_filtered)) } - | set { input_list_summary } - summary(input_list_summary, demuxem_out, hashsolo_out, htodemux_out, multiseq_out, hashedDrops_out, bff_out, gmmDemux_out, params.generate_anndata, params.generate_mudata) - - emit: - summary.out -} diff --git a/modules/donor_match.nf b/modules/multi/donor_match.nf similarity index 94% rename from modules/donor_match.nf rename to modules/multi/donor_match.nf index 723932f..2805616 100644 --- a/modules/donor_match.nf +++ b/modules/multi/donor_match.nf @@ -1,10 +1,10 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -process matchDonor { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode", mode: 'copy' +process matchDonor{ + publishDir "$params.outdir/$sampleId/$params.mode", mode: 'copy' label 'big_mem' - + tag "${sampleId}" conda "$projectDir/conda/donor_match.yml" input: @@ -43,7 +43,7 @@ process matchDonor { if ([ "$findVariants" != "False" ]); then best_method_vireo="\$(head -n 1 \$outputdir/best_method_vireo.txt)" if ([ "$params.mode" != "donor_match" ]); then - donor_genotype="\$(find $projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/vireo/\$best_method_vireo -name GT_donors.vireo.vcf.gz | head -n 1)" + donor_genotype="\$(find $params.outdir/$sampleId/$params.mode/gene_demulti/vireo/\$best_method_vireo -name GT_donors.vireo.vcf.gz | head -n 1)" else donor_genotype="\$(find $vireo_parent_dir/\$best_method_vireo -name GT_donors.vireo.vcf.gz | head -n 1)" fi diff --git a/modules/gene_demulti/bcftools.nf b/modules/multi/gene_demulti/bcftools.nf similarity index 87% rename from modules/gene_demulti/bcftools.nf rename to modules/multi/gene_demulti/bcftools.nf index 5de6b67..a1b2663 100644 --- a/modules/gene_demulti/bcftools.nf +++ b/modules/multi/gene_demulti/bcftools.nf @@ -1,9 +1,9 @@ #!/usr/bin/env nextflow nextflow.enable.dsl=2 process bcftools{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/bcftools", mode: 'copy' + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/bcftools", mode: 'copy' label 'big_mem' - + tag "${sampleId}" conda "bioconda::bcftools=1.9" input: diff --git a/modules/gene_demulti/cellsnp.nf b/modules/multi/gene_demulti/cellsnp.nf similarity index 94% rename from modules/gene_demulti/cellsnp.nf rename to modules/multi/gene_demulti/cellsnp.nf index fe052c5..973da31 100644 --- a/modules/gene_demulti/cellsnp.nf +++ b/modules/multi/gene_demulti/cellsnp.nf @@ -2,9 +2,9 @@ nextflow.enable.dsl=2 process cellSNP{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/cellSNP", mode: 'copy' + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/cellSNP", mode: 'copy' label 'big_mem' - + tag "${sampleId}" conda "bioconda::cellsnp-lite" input: @@ -33,7 +33,8 @@ process cellSNP{ output: - path "cellsnp_${sampleId}" + path "cellsnp_${sampleId}", emit: out1 + tuple val(sampleId), path("cellsnp_${sampleId}/${cellsnp_out}"), emit: cellsnp_input script: def samFile = "--samFile ${samFile_cellSNP}" @@ -114,5 +115,6 @@ workflow variant_cellSNP{ nproc_cellSNP, refseq_cellSNP, chrom, cellTAG, UMItag, minCOUNT, minMAF, doubletGL, inclFLAG, exclFLAG, minLEN, minMAPQ, countORPHAN, cellsnp_out) emit: - cellSNP.out + out1 = cellSNP.out.out1 + cellsnp_input = cellSNP.out.cellsnp_input } diff --git a/modules/gene_demulti/demuxlet.nf b/modules/multi/gene_demulti/demuxlet.nf similarity index 84% rename from modules/gene_demulti/demuxlet.nf rename to modules/multi/gene_demulti/demuxlet.nf index 2d10b8e..0d33fcf 100755 --- a/modules/gene_demulti/demuxlet.nf +++ b/modules/multi/gene_demulti/demuxlet.nf @@ -2,14 +2,33 @@ nextflow.enable.dsl=2 + +process subset_bam_and_sort_vcf_based_on_reference{ + label 'small_mem' + conda "bioconda::samtools=1.19.2 bedtools bcftools=1.19" + tag "${sampleId}" + input: + tuple val(sampleId), path(sam), path(sam_index), path(barcodes), val(vcf) + + output: + tuple val(sampleId), path("${sampleId}_dmx__filtered_bam_file.bam"), path("${sampleId}_dmx__filtered_bam_file.bam.csi"), path(barcodes), path("${sampleId}_dmx__samples.sorted_as_in_bam.vcf"), emit: input + when: + vcf !='None' + script: + """ + filter_bam_file_for_popscle_dsc_pileup.sh ${sam} ${barcodes} ${vcf} ${sampleId}_dmx__filtered_bam_file.bam + sort_vcf_same_as_bam.sh ${sampleId}_dmx__filtered_bam_file.bam ${vcf} > ${sampleId}_dmx__samples.sorted_as_in_bam.vcf + """ +} + process demuxlet { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/demuxlet", mode: 'copy' + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/demuxlet", mode: 'copy' label 'small_mem' - + tag "${sampleId}" conda "bioconda::popscle" input: - tuple val(sampleId), path(sam), path(sam_index), path(group_list), path(vcf_donor) + tuple val(sampleId), path(sam), path(sam_index), path(group_list), val(vcf_donor) val tag_group val tag_UMI val sm @@ -45,6 +64,9 @@ process demuxlet { output: path "demuxlet_${sampleId}" + when: + vcf_donor !='None' + script: def samfile = "--sam $sam" def taggroup = tag_group != 'None' ? "--tag-group ${tag_group}" : '' @@ -107,6 +129,8 @@ workflow demultiplex_demuxlet{ take: input_list main: + + tag_group = params.tag_group tag_UMI = params.tag_UMI sm = params.sm @@ -134,6 +158,9 @@ workflow demultiplex_demuxlet{ doublet_prior = params.doublet_prior demuxlet_out = params.demuxlet_out + + subset_bam_and_sort_vcf_based_on_reference(input_list) + input_list = subset_bam_and_sort_vcf_based_on_reference.out.input demuxlet(input_list, tag_group, tag_UMI, sm, sm_list, sam_verbose, vcf_verbose, skip_umi, cap_BQ, min_BQ, min_MQ, min_TD, excl_flag, min_total, min_uniq, min_umi, min_snp, plp, field, geno_error_offset, geno_error_coeff, r2_info, min_mac, min_callrate, alpha, doublet_prior, demuxlet_out) diff --git a/modules/gene_demulti/freebayes.nf b/modules/multi/gene_demulti/freebayes.nf similarity index 98% rename from modules/gene_demulti/freebayes.nf rename to modules/multi/gene_demulti/freebayes.nf index fc85d92..ccfb5ed 100644 --- a/modules/gene_demulti/freebayes.nf +++ b/modules/multi/gene_demulti/freebayes.nf @@ -2,9 +2,9 @@ nextflow.enable.dsl=2 process freebayes{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/freebayes", mode: 'copy' + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/freebayes", mode: 'copy' label 'big_mem' - + tag "${sampleId}" conda "bioconda::freebayes=1.2" input: @@ -85,7 +85,7 @@ process freebayes{ output: - tuple val(sampleId), path ("${sampleId}_${region_freebayes}_${vcf_freebayes}") + tuple val(sampleId), path ("out_${sampleId}_${region_freebayes}_${vcf_freebayes}") script: def stdin = stdin_freebayes != 'False' ? "--stdin" : '' @@ -179,7 +179,7 @@ process freebayes{ ${observation_bias} ${base_quality_cap} ${prob_contamination} ${legacy_gls} ${contamination_estimates} \ ${report_genotype_likelihood_max} ${genotyping_max_iterations} ${genotyping_max_banddepth} ${posterior_integration_limits} ${exclude_unobserved_genotypes} ${genotype_variant_threshold} \ ${use_mapping_quality} ${harmonic_indel_quality} ${read_dependence_factor} ${genotype_qualities} $debug $dd - + ln -s ${sampleId}_${region_freebayes}_${vcf_freebayes} out_${sampleId}_${region_freebayes}_${vcf_freebayes} """ } diff --git a/modules/gene_demulti/freemuxlet.nf b/modules/multi/gene_demulti/freemuxlet.nf similarity index 84% rename from modules/gene_demulti/freemuxlet.nf rename to modules/multi/gene_demulti/freemuxlet.nf index 23390ff..6d7f12e 100755 --- a/modules/gene_demulti/freemuxlet.nf +++ b/modules/multi/gene_demulti/freemuxlet.nf @@ -1,15 +1,33 @@ #!/usr/bin/env nextflow nextflow.enable.dsl=2 -process freemuxlet { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/freemuxlet", mode: 'copy' + +process subset_bam_and_sort_vcf_based_on_reference{ label 'small_mem' + conda "bioconda::samtools=1.19.2 bedtools bcftools=1.19" + tag "${sampleId}" + input: + tuple val(sampleId), path(sam), path(sam_index), path(group_list), val(nsample) + path vcf + + output: + tuple val(sampleId), path(sam), path(sam_index), path(group_list), val(nsample), path("${sampleId}__samples.sorted_as_in_bam.vcf"), emit: input + + script: + """ + + sort_vcf_same_as_bam.sh ${sam} ${vcf} > ${sampleId}__samples.sorted_as_in_bam.vcf + """ +} +process freemuxlet { + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/freemuxlet", mode: 'copy' + label 'small_mem' + tag "${sampleId}" conda "bioconda::popscle" input: - tuple val(sampleId), path(sam), path(sam_index), path(group_list), val(nsample) - path vcf + tuple val(sampleId), path(sam), path(sam_index), path(group_list), val(nsample), path(vcf) val tag_group val tag_UMI val sm @@ -119,7 +137,11 @@ workflow demultiplex_freemuxlet{ keep_init_missing = params.keep_init_missing freemuxlet_out = params.freemuxlet_out - freemuxlet(input_list, vcf, tag_group, tag_UMI, sm, sm_list, sam_verbose, vcf_verbose, skip_umi, cap_BQ, min_BQ, min_MQ, + + + subset_bam_and_sort_vcf_based_on_reference(input_list,vcf) + input_list = subset_bam_and_sort_vcf_based_on_reference.out.input + freemuxlet(input_list, tag_group, tag_UMI, sm, sm_list, sam_verbose, vcf_verbose, skip_umi, cap_BQ, min_BQ, min_MQ, min_TD, excl_flag, min_total, min_uniq, min_umi, min_snp, init_cluster,aux_files, verbose, doublet_prior, bf_thres, frac_init_clust, iter_init, keep_init_missing, freemuxlet_out) diff --git a/modules/gene_demulti/samtools.nf b/modules/multi/gene_demulti/samtools.nf similarity index 89% rename from modules/gene_demulti/samtools.nf rename to modules/multi/gene_demulti/samtools.nf index dcf322b..af8aea3 100644 --- a/modules/gene_demulti/samtools.nf +++ b/modules/multi/gene_demulti/samtools.nf @@ -2,9 +2,9 @@ nextflow.enable.dsl=2 process samtools{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/samtools", mode: 'copy' + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/samtools", mode: 'copy' label 'big_mem' - + tag "${sampleId}" conda "bioconda::samtools bioconda::umi_tools" input: diff --git a/modules/gene_demulti/scsplit.nf b/modules/multi/gene_demulti/scsplit.nf similarity index 96% rename from modules/gene_demulti/scsplit.nf rename to modules/multi/gene_demulti/scsplit.nf index b496a44..de7dab9 100644 --- a/modules/gene_demulti/scsplit.nf +++ b/modules/multi/gene_demulti/scsplit.nf @@ -2,9 +2,9 @@ nextflow.enable.dsl=2 process scSplit{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/scSplit", mode: 'copy' + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/scSplit", mode: 'copy' label 'big_mem' - + tag "${sampleId}" conda "$projectDir/conda/scsplit.yml" input: diff --git a/modules/gene_demulti/souporcell.nf b/modules/multi/gene_demulti/souporcell.nf similarity index 82% rename from modules/gene_demulti/souporcell.nf rename to modules/multi/gene_demulti/souporcell.nf index a6f643d..19af807 100755 --- a/modules/gene_demulti/souporcell.nf +++ b/modules/multi/gene_demulti/souporcell.nf @@ -2,9 +2,9 @@ nextflow.enable.dsl=2 process souporcell{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/souporcell", mode: 'copy' + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/souporcell", mode: 'copy' label 'big_mem' - + tag "${sampleId}" container "shub://wheaton5/souporcell" input: @@ -37,10 +37,12 @@ process souporcell{ def minref = "--min_ref ${min_ref}" def maxloci = "--max_loci ${max_loci}" def restart = restarts != 'None' ? "--restarts $restarts" : '' - def commonvariant = (common_variants != 'None' & use_known_genotype != "True" & known_genotypes == 'None' )? "--common_variants ${common_variants}" : '' + def commonvariant = (common_variants != 'None' & use_known_genotype != "True" & known_genotypes == 'None' )? "--common_variants commonvariant.vcf" : '' + def commonvariant_unzip = (common_variants != 'None' & use_known_genotype != "True" & known_genotypes == 'None' )? "bcftools view ${common_variants} -Ov -o commonvariant.vcf" : '' def commonvariant_name = (common_variants != 'None' & use_known_genotype != "True" & known_genotypes == 'None' ) ? common_variants : 'No common variants are given.' - def knowngenotype = (known_genotypes != 'None' & use_known_genotype == "True") ? "--known_genotypes ${known_genotypes}" : '' + def genotype_unzip = (known_genotypes != 'None' & use_known_genotype == "True") ? "bcftools view ${known_genotypes} -Ov -o unzipped.vcf" : '' + def knowngenotype = (known_genotypes != 'None' & use_known_genotype == "True") ? "--known_genotypes unzipped.vcf" : '' def knowngenotype_name = (known_genotypes != 'None' & use_known_genotype == "True") ? known_genotypes : 'No known variants are given.' def knowngenotypes_sample = known_genotypes_sample_names != 'None' ? "--known_genotypes_sample_names ${known_genotypes_sample_names}" : '' @@ -51,12 +53,14 @@ process souporcell{ def out = "souporcell_${sampleId}/${souporcell_out}" """ + ${genotype_unzip} + ${commonvariant_unzip} mkdir souporcell_${sampleId} mkdir $out touch souporcell_${sampleId}/params.csv echo -e "Argument,Value \n bamfile,${bam} \n barcode,${barcodes} \n fasta,${fasta} \n threads,${threads} \n clusters,${clusters} \n ploidy,${ploidy} \n min_alt,${min_alt} \n min_ref,${min_ref} \n max_loci,${max_loci} \n restarts,${restarts} \n common_variant,${commonvariant_name} \n known_genotype,${knowngenotype_name} \n known_genotype_sample,${knowngenotype_sample_name} \n skip_remap,${skip_remap} \n ignore,${ignore} " >> souporcell_${sampleId}/params.csv - souporcell_pipeline.py $bamfile $barcode $fastafile $thread $cluster $ploi $minalt $minref $maxloci $restart \ + souporcell_pipeline.py --threads ${task.cpus} $bamfile $barcode $fastafile $thread $cluster $ploi $minalt $minref $maxloci $restart \ $commonvariant $knowngenotype $knowngenotypes_sample $skipremap $ign -o $out """ } diff --git a/modules/gene_demulti/vireo.nf b/modules/multi/gene_demulti/vireo.nf similarity index 90% rename from modules/gene_demulti/vireo.nf rename to modules/multi/gene_demulti/vireo.nf index 294b132..d840ff1 100755 --- a/modules/gene_demulti/vireo.nf +++ b/modules/multi/gene_demulti/vireo.nf @@ -2,9 +2,9 @@ nextflow.enable.dsl=2 process vireo{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti/vireo", mode: 'copy' + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti/vireo", mode: 'copy' label 'big_mem' - + tag "${sampleId}" conda "aksarkar::vireosnp" input: @@ -34,7 +34,8 @@ process vireo{ def celldata_name = celldata.baseName def n_donor = ndonor != 'None'? "-N $ndonor" : '' def n_donor_yesno = ndonor != 'None'? "$ndonor" : "Number of donors are not given" - def donor = donorfile != 'None' ? "-d $donorfile" : '' + def donor = donorfile != 'None' ? "-d no_prefix_chr.vcf" : '' + def donor_preprocess = donorfile != 'None' ? "bcftools view $donorfile | awk '{gsub(/^chr/,\"\"); print}' | awk '{gsub(/ID=chr/,\"ID=\"); print}' > no_prefix_chr.vcf" : '' def donor_data_name = donorfile != 'None' ? donorfile : 'Donor file is not given' def geno_tag = donorfile != 'None' ? "--genoTag $genoTag" : '' def no_doublet = noDoublet != 'False' ? "--noDoublet" : '' @@ -51,6 +52,7 @@ process vireo{ def n_proc = "--nproc $nproc" """ + ${donor_preprocess} mkdir vireo_${sampleId} mkdir vireo_${sampleId}/${vireo_out} touch vireo_${sampleId}/params.csv diff --git a/modules/multi/gene_demultiplexing.nf b/modules/multi/gene_demultiplexing.nf new file mode 100644 index 0000000..2567e7f --- /dev/null +++ b/modules/multi/gene_demultiplexing.nf @@ -0,0 +1,302 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl=2 + +include { data_preprocess } from './gene_demulti/samtools' +include { filter_variant } from './gene_demulti/bcftools' +include { variant_cellSNP } from './gene_demulti/cellsnp' +include { variant_freebayes } from './gene_demulti/freebayes' +include { demultiplex_demuxlet } from './gene_demulti/demuxlet' +include { demultiplex_freemuxlet } from './gene_demulti/freemuxlet' +include { demultiplex_scSplit } from './gene_demulti/scsplit' +include { demultiplex_souporcell } from './gene_demulti/souporcell' +include { demultiplex_vireo } from './gene_demulti/vireo' + +def split_input(input){ + if (input =~ /;/ ){ + Channel.from(input).map{ return it.tokenize(';')}.flatten() + } + else{ + Channel.from(input) + } +} + + +process subset_bam_to_comon_variants{ + + label 'small_mem' + conda "bioconda::samtools=1.19.2 bedtools bcftools=1.19" + tag "${sampleId}" + input: + tuple val(sampleId), path(sam), path(sam_index), path(barcodes) + path vcf + + output: + tuple val(sampleId), path("${sampleId}__filtered_bam_file.bam"), path("${sampleId}__filtered_bam_file.bam.csi"), emit: input + + script: + """ + + bcftools sort ${vcf} -Oz -o sorted.vcf.gz + filter_bam_file_for_popscle_dsc_pileup.sh ${sam} ${barcodes} sorted.vcf.gz ${sampleId}__filtered_bam_file.bam + """ + +} + +process summary{ + publishDir "$params.outdir/$sampleId/$params.mode/gene_demulti", mode: 'copy' + label 'small_mem' + tag "${sampleId}" + conda "pandas scanpy mudata" + + input: + tuple(val(sampleId), path(hto_matrix, stageAs: 'hto_data'), path(rna_matrix, stageAs: 'rna_data'), val(souporcell_result), val(scsplit_result), val(vireo_result),val(freemuxlet_result),val(demuxlet_result)) + val generate_anndata + val generate_mudata + + + output: + tuple val(sampleId), path("genetic_summary") + + script: + def demuxlet_files = "" + def freemuxlet_files = "" + def vireo_files = "" + def souporcell_files = "" + def scsplit_files = "" + def generate_adata = "" + def generate_mdata = "" + + if (demuxlet_result){ + demuxlet_files = "--demuxlet ${demuxlet_result}" + } + if (freemuxlet_result){ + freemuxlet_files = "--freemuxlet ${freemuxlet_result}" + } + if (vireo_result){ + vireo_files = "--vireo ${vireo_result}" + } + if (souporcell_result){ + souporcell_files = "--souporcell ${souporcell_result}" + } + if (scsplit_result){ + scsplit_files = "--scsplit ${scsplit_result}" + } + if (generate_anndata == "True"){ + if(rna_matrix.name == "None"){ + error "Error: RNA count matrix is not given." + } + generate_adata = "--generate_anndata --read_rna_mtx rna_data" + } + if (generate_mudata == "True"){ + if(rna_matrix.name == "None"){ + error "Error: RNA count matrix is not given." + } + if(hto_matrix.name == "None"){ + error "Error: HTO count matrix is not given." + } + generate_mdata = "--generate_mudata --read_rna_mtx rna_data --read_hto_mtx hto_data" + } + """ + summary_gene.py $demuxlet_files $vireo_files $souporcell_files $scsplit_files $freemuxlet_files $generate_adata $generate_mdata + """ +} + + + +workflow gene_demultiplexing { + take: + input_channel + main: + + if ((params.demuxlet == "True" & params.demuxlet_preprocess == "True") | \ + (params.freemuxlet == "True" & params.freemuxlet_preprocess == "True") | \ + (params.scSplit == "True" & params.scSplit_preprocess == "True") | \ + (params.vireo == "True" & params.vireo_preprocess == "True") | \ + (params.souporcell == "True" & params.souporcell_preprocess == "True")) { + + input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.bam)} + | data_preprocess + qc_bam = data_preprocess.out.map{ it -> tuple( it.name.tokenize( '_' ).last(), it + "/sorted.bam", it + "/sorted.bam.bai") } + }else{ + qc_bam = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.bam, row.bam_index)} + + + + } + + input_param_cellsnp = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.barcodes) } + qc_bam_new = qc_bam.join(input_param_cellsnp) + + + if (params.subset_bam_to_comon_variants){ + qc_bam = subset_bam_to_comon_variants(qc_bam_new,params.common_variants_freemuxlet) + } + + ////////// + //FreeBayes/ scSplit + ////////// + if (params.scSplit == "True" & params.scSplit_variant == 'True'){ + + freebayes_region = Channel.from(1..22, "X","Y").flatten() + if (params.region != "None"){ + freebayes_region = split_input(params.region) + } + + variant_freebayes(qc_bam, freebayes_region) + filter_variant(variant_freebayes.out) + freebayes_vcf = filter_variant.out.map{ it -> tuple(it[0], it[1] + "/filtered_sorted_total_chroms.vcf")} + } + + if (params.scSplit == "True"){ + + + input_bam_scsplit = qc_bam + + if (params.scSplit_variant == 'True'){ + input_vcf_scsplit = freebayes_vcf + } + else{ + + input_vcf_scsplit = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.vcf_mixed)} + } + + input_param_scsplit = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.barcodes, row.nsample, row.vcf_donor)} + + input_list_scsplit = input_bam_scsplit.join(input_vcf_scsplit) + input_list_scsplit = input_list_scsplit.join(input_param_scsplit) + demultiplex_scSplit(input_list_scsplit) + scSplit_out = demultiplex_scSplit.out + } + else{ + scSplit_out = channel.value("no_result") + } + + + ////////// + //CellSNP/Vireo + ////////// + if (params.vireo == "True" & params.vireo_variant == 'True'){ + + variant_cellSNP(qc_bam_new) + cellsnp_vcf = variant_cellSNP.out.out1.map{ it -> tuple( it.name.tokenize( '_' ).last(), it + "/*/cellSNP.cells.vcf") } + + } + + if (params.vireo == "True"){ + + if (params.vireo_variant == 'True'){ + input_vcf_vireo = variant_cellSNP.out.cellsnp_input + } + else{ + input_vcf_vireo = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.celldata)} + } + input_param_vireo = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.nsample, row.vcf_donor)} + + input_list_vireo = input_vcf_vireo.join(input_param_vireo) + demultiplex_vireo(input_list_vireo) + vireo_out = demultiplex_vireo.out + } + else{ + vireo_out = channel.value("no_result") + } + + + ////////// + // Demuxlet/Freemuxlet + // demuxlet (with genotypes) or freemuxlet (without genotypes) + ////////// + + if (params.demuxlet == "True"){ + + input_bam_demuxlet = qc_bam + + input_param_demuxlet = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.barcodes, row.vcf_donor)} + input_list_demuxlet = input_bam_demuxlet.join(input_param_demuxlet) + demultiplex_demuxlet(input_list_demuxlet) + demuxlet_out = demultiplex_demuxlet.out + } + else{ + demuxlet_out = channel.value("no_result") + } + + + ////////// + //Freemuxlet + ////////// + + if (params.freemuxlet == "True"){ + + input_bam_freemuxlet = qc_bam + + input_param_freemuxlet = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.barcodes, row.nsample)} + + input_list_freemuxlet = input_bam_freemuxlet.join(input_param_freemuxlet) + + demultiplex_freemuxlet(input_list_freemuxlet) + freemuxlet_out = demultiplex_freemuxlet.out + } + else{ + freemuxlet_out = channel.value("no_result") + } + + + ////////// + //Souporcell + ////////// + + if (params.souporcell == "True"){ + + input_bam_souporcell = qc_bam + + input_param_souporcell = input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, row.barcodes, row.nsample, row.vcf_donor)} + + input_list_souporcell = input_bam_souporcell.join(input_param_souporcell) + demultiplex_souporcell(input_list_souporcell) + souporcell_out = demultiplex_souporcell.out + } + else{ + souporcell_out = channel.value("no_result") + } + + ////////// + //Summary + ////////// + + input_list_summary = input_channel.splitCsv(header:true).map { row-> tuple(row.sampleId, file(row.hto_matrix_filtered), file(row.rna_matrix_filtered))} + + demuxlet_out_ch = demuxlet_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*demuxlet_",""), r1 )} + freemuxlet_out_ch = freemuxlet_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*freemuxlet_",""), r1 )} + vireo_out_ch = vireo_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*vireo_",""), r1 )} + scSplit_out_ch = scSplit_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*scsplit_",""), r1 )} + souporcell_out_ch = souporcell_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*souporcell_",""), r1 )} + + summary_input = input_list_summary.join(souporcell_out_ch,by:0,remainder: true).join(scSplit_out_ch,by:0,remainder: true).join(vireo_out_ch,by:0,remainder: true).join(freemuxlet_out_ch,by:0,remainder: true).join(demuxlet_out_ch,by:0,remainder: true) + summary_input = summary_input.filter{ it[0] != 'no_result' } + + summary(summary_input, + params.generate_anndata, params.generate_mudata) + + emit: + summary.out +} + diff --git a/modules/hash_demulti/bff.nf b/modules/multi/hash_demulti/bff.nf similarity index 95% rename from modules/hash_demulti/bff.nf rename to modules/multi/hash_demulti/bff.nf index 37d6a36..6fd4566 100644 --- a/modules/hash_demulti/bff.nf +++ b/modules/multi/hash_demulti/bff.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -process bff { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/bff", mode:'copy' +process bff{ + publishDir "$params.outdir/$sampleId/$params.mode/hash_demulti/bff", mode:'copy' label 'small_mem' conda "$projectDir/conda/bff.yml" diff --git a/modules/hash_demulti/demuxem.nf b/modules/multi/hash_demulti/demuxem.nf similarity index 91% rename from modules/hash_demulti/demuxem.nf rename to modules/multi/hash_demulti/demuxem.nf index ae9a5e3..6ae69fc 100644 --- a/modules/hash_demulti/demuxem.nf +++ b/modules/multi/hash_demulti/demuxem.nf @@ -1,12 +1,12 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -process demuxem { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/demuxem", mode:'copy' +process demuxem{ + publishDir "$params.outdir/$sampleId/$params.mode/hash_demulti/demuxem", mode:'copy' label 'small_mem' - conda 'bioconda::pegasuspy bioconda::scanpy bioconda::demuxEM' - + conda "bioconda::pegasuspy demuxEM scanpy" + input: tuple val(sampleId), path(raw_hto_matrix_dir, stageAs: "hto_data_${params.hto_matrix_demuxem}"), path(raw_rna_matrix_dir, stageAs: "rna_data_${params.rna_matrix_demuxem}") diff --git a/modules/hash_demulti/gmm_demux.nf b/modules/multi/hash_demulti/gmm_demux.nf similarity index 96% rename from modules/hash_demulti/gmm_demux.nf rename to modules/multi/hash_demulti/gmm_demux.nf index 5eb2d6e..93a4b65 100644 --- a/modules/hash_demulti/gmm_demux.nf +++ b/modules/multi/hash_demulti/gmm_demux.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process gmm_demux{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/gmm_demux", mode:'copy' + publishDir "$params.outdir/$sampleId/$params.mode/hash_demulti/gmm_demux", mode:'copy' label 'small_mem' conda "$projectDir/conda/gmm_demux.yml" diff --git a/modules/hash_demulti/hashedDrops.nf b/modules/multi/hash_demulti/hashedDrops.nf similarity index 97% rename from modules/hash_demulti/hashedDrops.nf rename to modules/multi/hash_demulti/hashedDrops.nf index 9c5b388..dd19f4c 100755 --- a/modules/hash_demulti/hashedDrops.nf +++ b/modules/multi/hash_demulti/hashedDrops.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process hashedDrops{ - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/hashedDrops", mode:'copy' + publishDir "$params.outdir/$sampleId/$params.mode/hash_demulti/hashedDrops", mode:'copy' label 'small_mem' conda "conda-forge::r-seurat conda-forge::r-argparse bioconda::bioconductor-dropletutils" diff --git a/modules/hash_demulti/hashsolo.nf b/modules/multi/hash_demulti/hashsolo.nf similarity index 93% rename from modules/hash_demulti/hashsolo.nf rename to modules/multi/hash_demulti/hashsolo.nf index 534a0ce..a6f91f6 100755 --- a/modules/hash_demulti/hashsolo.nf +++ b/modules/multi/hash_demulti/hashsolo.nf @@ -1,7 +1,7 @@ #!/usr/bin/env nextflow -nextflow.enable.dsl = 2 -process hash_solo { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/hashsolo", mode:'copy' +nextflow.enable.dsl=2 +process hash_solo{ + publishDir "$params.outdir/$sampleId/$params.mode/hash_demulti/hashsolo", mode:'copy' label 'small_mem' conda "$projectDir/conda/hashsolo_py.yml" diff --git a/modules/hash_demulti/htodemux.nf b/modules/multi/hash_demulti/htodemux.nf similarity index 96% rename from modules/hash_demulti/htodemux.nf rename to modules/multi/hash_demulti/htodemux.nf index 3170ccc..03a2540 100644 --- a/modules/hash_demulti/htodemux.nf +++ b/modules/multi/hash_demulti/htodemux.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -process htodemux { - publishDir "$projectDir/$params.outdir/${seurat_object.name.tokenize( '_' )[1]}/$params.mode/hash_demulti/htodemux", mode: 'copy' +process htodemux{ + publishDir "$params.outdir/${seurat_object.name.tokenize( '_' )[1]}/$params.mode/hash_demulti/htodemux", mode: 'copy' label 'small_mem' conda 'conda-forge::r-seurat conda-forge::r-argparse' diff --git a/modules/hash_demulti/multiseq.nf b/modules/multi/hash_demulti/multiseq.nf similarity index 91% rename from modules/hash_demulti/multiseq.nf rename to modules/multi/hash_demulti/multiseq.nf index 63de4ed..8074a76 100644 --- a/modules/hash_demulti/multiseq.nf +++ b/modules/multi/hash_demulti/multiseq.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -process multi_seq { - publishDir "$projectDir/$params.outdir/${seurat_object.name.tokenize( '_' )[1]}/$params.mode/hash_demulti/multiseq", mode: 'copy' +process multi_seq{ + publishDir "$params.outdir/${seurat_object.name.tokenize( '_' )[1]}/$params.mode/hash_demulti/multiseq", mode: 'copy' label 'small_mem' conda 'conda-forge::r-seurat conda-forge::r-argparse' diff --git a/modules/hash_demulti/preprocess.nf b/modules/multi/hash_demulti/preprocess.nf similarity index 93% rename from modules/hash_demulti/preprocess.nf rename to modules/multi/hash_demulti/preprocess.nf index d866d6a..a104dea 100644 --- a/modules/hash_demulti/preprocess.nf +++ b/modules/multi/hash_demulti/preprocess.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -process preprocess { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/preprocess", mode:'copy' +process preprocess{ + publishDir "$params.outdir/$sampleId/$params.mode/hash_demulti/preprocess", mode:'copy' label 'small_mem' conda 'conda-forge::r-seurat conda-forge::r-argparse' diff --git a/modules/hash_demulti/solo.nf b/modules/multi/hash_demulti/solo.nf similarity index 96% rename from modules/hash_demulti/solo.nf rename to modules/multi/hash_demulti/solo.nf index 9fba924..4c9061a 100755 --- a/modules/hash_demulti/solo.nf +++ b/modules/multi/hash_demulti/solo.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 process solo{ - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/solo", mode:'copy' + publishDir "$params.outdir/$params.mode/hash_demulti/solo", mode:'copy' label 'small_mem' input: diff --git a/modules/multi/hash_demultiplexing.nf b/modules/multi/hash_demultiplexing.nf new file mode 100644 index 0000000..4f204e0 --- /dev/null +++ b/modules/multi/hash_demultiplexing.nf @@ -0,0 +1,198 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl=2 +include { preprocessing_hashing as preprocessing_hashing_htodemux } from './hash_demulti/preprocess' +include { preprocessing_hashing as preprocessing_hashing_multiseq } from './hash_demulti/preprocess' +include { multiseq_hashing } from './hash_demulti/multiseq' +include { htodemux_hashing } from './hash_demulti/htodemux' +include { hash_solo_hashing } from './hash_demulti/hashsolo' +include { hashedDrops_hashing } from './hash_demulti/hashedDrops' +include { demuxem_hashing } from './hash_demulti/demuxem' +include { gmm_demux_hashing } from './hash_demulti/gmm_demux' +include { bff_hashing } from './hash_demulti/bff' + +process summary{ + publishDir "$params.outdir/$sampleId/$params.mode/hash_demulti", mode: 'copy' + label 'small_mem' + + conda "pandas scanpy mudata" + + input: + tuple val(sampleId), path(hto_matrix, stageAs: 'hto_data'), path(rna_matrix, stageAs: 'rna_data') + val demuxem_result + val hashsolo_result + val htodemux_result + val multiseq_result + val hashedDrops_result + val bff_result + val gmmDemux_result + val generate_anndata + val generate_mudata + + output: + tuple val(sampleId), path("hash_summary") + + script: + def htodemux_files = "" + def hashsolo_files = "" + def multiseq_files = "" + def hashedDrops_files = "" + def gmmDemux_files = "" + def demuxem_files = "" + def bff_files = "" + def generate_adata = "" + def generate_mdata = "" + + if (demuxem_result != "no_result"){ + demuxem_res = demuxem_result.find{it.name.contains(sampleId)} + demuxem_files = "--demuxem ${demuxem_res}" + } + if (hashsolo_result != "no_result"){ + hashsolo_res = hashsolo_result.find{it.name.contains(sampleId)} + hashsolo_files = "--hashsolo ${hashsolo_res}" + } + if (htodemux_result != "no_result"){ + htodemux_res = htodemux_result.find{it.name.contains(sampleId)} + htodemux_files = "--htodemux ${htodemux_res}" + } + if (multiseq_result != "no_result"){ + multiseq_res = multiseq_result.find{it.name.contains(sampleId)} + multiseq_files = "--multiseq ${multiseq_res}" + } + if (hashedDrops_result != "no_result"){ + hashedDrops_res = hashedDrops_result.find{it.name.contains(sampleId)} + hashedDrops_files = "--hashedDrops ${hashedDrops_res}" + } + if (gmmDemux_result != "no_result"){ + gmmDemux_res = gmmDemux_result.find{it.name.contains(sampleId)} + gmmDemux_files = "--gmm_demux ${gmmDemux_res}" + } + if (bff_result != "no_result"){ + bff_res = bff_result.find{it.name.contains(sampleId)} + bff_files = "--bff ${bff_res}" + } + if (generate_anndata == "True"){ + if(rna_matrix.name == "None"){ + error "Error: RNA count matrix is not given." + } + generate_adata = "--generate_anndata --read_rna_mtx rna_data" + } + if (generate_mudata == "True"){ + if(rna_matrix.name == "None"){ + error "Error: RNA count matrix is not given." + } + if(hto_matrix.name == "None"){ + error "Error: HTO count matrix is not given." + } + generate_mdata = "--generate_mudata --read_rna_mtx rna_data --read_hto_mtx hto_data" + } + + """ + summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $gmmDemux_files $bff_files $generate_adata $generate_mdata --sampleId $sampleId + """ +} + + +workflow hash_demultiplexing{ + take: + input_channel + main: + + if (params.htodemux == "True"){ + input_channel.splitCsv(header:true).map { row-> tuple(row.sampleId, params.hto_matrix_htodemux == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered, + params.rna_matrix_htodemux == "raw" ? row.rna_matrix_raw : row.rna_matrix_filtered)}.set{input_list_preprocess_htodemux} + + preprocessing_hashing_htodemux(input_list_preprocess_htodemux, params.hto_matrix_htodemux, params.rna_matrix_htodemux) + htodemux_preprocess_out = preprocessing_hashing_htodemux.out + htodemux_hashing(htodemux_preprocess_out) + htodemux_out = htodemux_hashing.out + } + else{ + htodemux_out = channel.value("no_result") + } + + if (params.multiseq == "True"){ + if (params.htodemux == "True" & params.hto_matrix_htodemux == params.hto_matrix_multiseq & + params.rna_matrix_htodemux == params.rna_matrix_multiseq){ + multiseq_preprocess_out = htodemux_preprocess_out + } + else{ + input_channel.splitCsv(header:true).map { row-> tuple(row.sampleId, params.hto_matrix_multiseq == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered, + params.rna_matrix_multiseq == "raw" ? row.rna_matrix_raw : row.rna_matrix_filtered)}.set {input_list_preprocess_multiseq} + + preprocessing_hashing_multiseq(input_list_preprocess_multiseq, params.hto_matrix_multiseq, params.rna_matrix_multiseq) + multiseq_preprocess_out = preprocessing_hashing_multiseq.out + } + multiseq_hashing(multiseq_preprocess_out) + multiseq_out = multiseq_hashing.out + } + else{ + multiseq_out = channel.value("no_result") + } + + if (params.hashsolo == "True"){ + input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, params.hto_matrix_hashsolo == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered, + params.rna_matrix_hashsolo == "False" ? channel.value("None") : + (params.rna_matrix_hashsolo == "raw" ? row.rna_matrix_raw : row.rna_matrix_filtered) + )} + | hash_solo_hashing + hashsolo_out = hash_solo_hashing.out + } + else{ + hashsolo_out = channel.value("no_result") + } + + if (params.demuxem == "True"){ + input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, params.hto_matrix_demuxem == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered, + params.rna_matrix_demuxem == "raw" ? row.rna_matrix_raw : row.rna_matrix_filtered)} + | demuxem_hashing + demuxem_out = demuxem_hashing.out + } + else{ + demuxem_out = channel.value("no_result") + } + + if (params.hashedDrops == "True"){ + input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, params.hto_matrix_hashedDrops == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered )} + | hashedDrops_hashing + hashedDrops_out = hashedDrops_hashing.out + } + else{ + hashedDrops_out = channel.value("no_result") + } + + + if (params.bff == "True"){ + input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, params.hto_matrix_bff == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered )} + | bff_hashing + bff_out = bff_hashing.out + print("BFF path to output") + } + else{ + bff_out = channel.value("no_result") + } + if (params.gmmDemux == "True"){ + input_channel \ + | splitCsv(header:true) \ + | map { row-> tuple(row.sampleId, params.hto_matrix_gmm_demux == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered, row.hto_name_gmm )} + | gmm_demux_hashing + gmmDemux_out = gmm_demux_hashing.out + } + else{ + gmmDemux_out = channel.value("no_result") + } + + input_channel.splitCsv(header:true).map { row-> tuple(row.sampleId, file(row.hto_matrix_filtered), file(row.rna_matrix_filtered))}.set {input_list_summary} + + summary(input_list_summary, demuxem_out, hashsolo_out, htodemux_out, multiseq_out, hashedDrops_out,bff_out,gmmDemux_out, params.generate_anndata, params.generate_mudata) + + emit: + summary.out +} diff --git a/modules/multi/preprocessing/preprocessing.nf b/modules/multi/preprocessing/preprocessing.nf new file mode 100644 index 0000000..fd1ddae --- /dev/null +++ b/modules/multi/preprocessing/preprocessing.nf @@ -0,0 +1,34 @@ +process create_single_chanel_input{ + + tag "${sampleId}" + conda "$projectDir/conda/scsplit.yml" + + input: + val sample_name + val hto_matrix_raw + val hto_matrix_filtered + val rna_matrix_raw + val rna_matrix_filtered + val bam + val bai + val barcodes + val fasta + val fasta_index + val nsample + val cell_data + val vcf_mixed + val vcf_donor + val vireo_parent_dir + val demultiplexing_result + output: + path('hadge_single_input.csv'), emit: input_channel + + script: + """ + echo "sampleId,rna_matrix_raw,rna_matrix_filtered,hto_matrix_raw,hto_matrix_filtered,bam,bam_index,barcodes,nsample,cell_data,vcf_mixed,vcf_donor,vireo_parent_dir,demultiplexing_result" > hadge_single_input.csv + echo "${sample_name},${rna_matrix_raw},${rna_matrix_filtered},${hto_matrix_raw},${hto_matrix_filtered},${bam},${bai},${barcodes},${nsample},${cell_data},${vcf_mixed},${vcf_donor},${vireo_parent_dir},${demultiplexing_result}" >> hadge_single_input.csv + """ + +} + + diff --git a/modules/multi_demultiplexing.nf b/modules/multi_demultiplexing.nf index 24e1cf0..7aed27f 100644 --- a/modules/multi_demultiplexing.nf +++ b/modules/multi_demultiplexing.nf @@ -1,11 +1,13 @@ #!/usr/bin/env nextflow -nextflow.enable.dsl = 2 -include { hash_demultiplexing } from './hash_demultiplexing' -include { gene_demultiplexing } from './gene_demultiplexing' -include { donor_match } from './donor_match' +nextflow.enable.dsl=2 -process generate_data { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/data_output", mode: 'copy' +include { hash_demultiplexing } from "$projectDir/modules/multi/hash_demultiplexing" +include { gene_demultiplexing } from "$projectDir/modules/multi/gene_demultiplexing" +include { donor_match } from "$projectDir/modules/multi/donor_match" + + +process generate_data{ + publishDir "$params.outdir/$sampleId/$params.mode/data_output", mode: 'copy' label 'small_mem' conda 'pandas scanpy mudata' @@ -44,8 +46,8 @@ process generate_data { """ } -process summary_all { - publishDir "$projectDir/$params.outdir/$sampleId/$params.mode", mode: 'copy' +process summary_all{ + publishDir "$params.outdir/$sampleId/$params.mode", mode: 'copy' label 'small_mem' conda 'pandas scanpy mudata' @@ -57,67 +59,66 @@ process summary_all { script: """ - summary.py --gene_demulti $gene_demulti_result --hash_demulti $hash_demulti_result + summary.py --gene_demulti $gene_demulti_result --hash_demulti $hash_demulti_result """ } -workflow run_multi { - if (params.mode == 'genetic') { - gene_demultiplexing() - if (params.match_donor == 'True') { - gene_demultiplexing.out.view() - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row-> tuple(row.sampleId, row.nsamples_genetic, row.barcodes, 'None', 'None') } - | join(gene_demultiplexing.out) - | donor_match +workflow run_multi{ + take: + input_channel + main: + + if (params.mode == "genetic"){ + // Performing genetic demultiplexing methodologies + gene_demultiplexing(input_channel) + //////////// + + if (params.match_donor == "True"){ + + input_channel.splitCsv(header:true).map { row-> tuple(row.sampleId, row.nsample, row.barcodes, "None", "None")}.join(gene_demultiplexing.out).set{dm_input} + } } - } - else if (params.mode == 'hashing') { - hash_demultiplexing() - if (params.match_donor == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row -> tuple(row.sampleId, row.nsamples_genetic, row.barcodes, 'None', 'None') } - | join(hash_demultiplexing.out) - | donor_match + else if (params.mode == "hashing"){ + + // Performing hashing demultplexing + hash_demultiplexing(input_channel) + //////////// + + if (params.match_donor == "True"){ + input_channel.splitCsv(header:true).map { row -> tuple(row.sampleId, row.nsample, row.barcodes, "None", "None")}.join(hash_demultiplexing.out).set{dm_input} + } } - } - else if (params.mode == 'rescue') { - hash_demultiplexing() - gene_demultiplexing() - gene_summary = gene_demultiplexing.out - hash_summary = hash_demultiplexing.out - input_summary_all = gene_summary.join(hash_summary) - summary_all(input_summary_all) - if (params.match_donor == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row -> tuple(row.sampleId, row.nsamples_genetic, row.barcodes, 'None', 'None') } - | join(summary_all.out) - | donor_match + else if (params.mode == "rescue"){ + + // Performing both hashing and genetic demultiplexing methods + hash_demultiplexing(input_channel) + gene_demultiplexing(input_channel) + //////////// + + gene_summary = gene_demultiplexing.out + hash_summary = hash_demultiplexing.out + input_summary_all = gene_summary.join(hash_summary) + summary_all(input_summary_all) + + if (params.match_donor == "True"){ + input_channel.splitCsv(header:true).map { row -> tuple(row.sampleId, row.nsample, row.barcodes, "None", "None")}.join(summary_all.out).set{dm_input} + } + } - if (params.generate_anndata == 'True' || params.generate_mudata == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row -> tuple(row.sampleId, row.hto_matrix_filtered, row.rna_matrix_filtered) } - | join(donor_match.out) - | set { input_generate_data } - generate_data(input_generate_data, params.generate_anndata, params.generate_mudata) + else if (params.mode == "donor_match"){ + // Performing just donor matching + input_channel.splitCsv(header:true).map { row -> tuple(row.sampleId, row.nsample, row.barcodes, row.celldata, row.vireo_parent_dir, row.demultiplexing_result)}.set{dm_input} } - } - else if (params.mode == 'donor_match') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row -> tuple(row.sampleId, row.nsamples_genetic, row.barcodes, row.celldata, row.vireo_parent_dir, row.demultiplexing_result) } \ - | donor_match - if (params.generate_anndata == 'True' || params.generate_mudata == 'True') { - Channel.fromPath(params.multi_input) \ - | splitCsv(header:true) \ - | map { row -> tuple(row.sampleId, row.hto_matrix_filtered, row.rna_matrix_filtered) } - | join(donor_match.out) - | set { input_generate_data } - generate_data(input_generate_data, params.generate_anndata, params.generate_mudata) + + + if (params.match_donor == "True" || params.mode == "donor_match"){ + donor_match(dm_input) + + if (params.generate_anndata == "True" || params.generate_mudata == "True" ){ + input_channel.splitCsv(header:true).map { row -> tuple(row.sampleId, row.hto_matrix_filtered, row.rna_matrix_filtered)}.join(donor_match.out).set{input_generate_data} + generate_data(input_generate_data, params.generate_anndata, params.generate_mudata) + } } - } + + } diff --git a/modules/single/donor_match.nf b/modules/single/donor_match.nf deleted file mode 100644 index c1e72c1..0000000 --- a/modules/single/donor_match.nf +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process matchDonor { - publishDir "$projectDir/$params.outdir/$params.mode", mode: 'copy' - label 'big_mem' - - conda "$projectDir/conda/donor_match.yml" - - input: - path demultiplexing_result - val ndonor - path barcode_whitelist - val method1_name - val method2_name - val findVariants - val cell_genotype - val variant_count - val variant_pct - val vireo_parent_dir - - output: - path 'donor_match' - - script: - def two_method = (method1_name != 'None' & method2_name != 'None') ? "--method1 $method1_name --method2 $method2_name" : '' - - def cell_genotype_path = '' - if (findVariants == 'True' | findVariants == 'default') { - cell_genotype_path = cell_genotype != 'None' ? "--cell_genotype $cell_genotype" : \ - "--cell_genotype $projectDir/$params.outdir/$params.mode/gene_demulti/cellSNP/cellsnp_1/*/cellSNP.cells.vcf.gz" - } - - def vireo_parent_path = '' - if (findVariants == 'vireo' | findVariants == 'True') { - vireo_parent_path = (params.mode == 'donor_match' & vireo_parent_dir != 'None') ? "--vireo_parent_dir $vireo_parent_dir" : \ - "--vireo_parent_dir $projectDir/$params.outdir/$params.mode/gene_demulti/vireo/" - } - - def barcode_whitelist_path = (barcode_whitelist != 'None') ? "--barcode $barcode_whitelist" : '' - - """ - export R_MAX_VSIZE=100Gb - outputdir=donor_match - mkdir -p \$outputdir - donor_match.R --result_csv $demultiplexing_result $barcode_whitelist_path --findVariants $findVariants \ - $cell_genotype_path --variant_pct $variant_pct --variant_count $variant_count --ndonor $ndonor \ - $two_method --outputdir \$outputdir $vireo_parent_path - - if ([ "$findVariants" != "False" ]); then - best_method_vireo="\$(head -n 1 \$outputdir/best_method_vireo.txt)" - if ([ "$params.mode" != "donor_match" ]); then - donor_genotype="\$(find $projectDir/$params.outdir/$params.mode/gene_demulti/vireo/\$best_method_vireo -name GT_donors.vireo.vcf.gz | head -n 1)" - else - donor_genotype="\$(find $vireo_parent_dir/\$best_method_vireo -name GT_donors.vireo.vcf.gz | head -n 1)" - fi - - if ([ "$findVariants" = "True" ] || [ "$findVariants" = "default" ]); then - gunzip -c \$donor_genotype > \$outputdir/GT_donors.vireo.vcf - if ([ \$(grep "^##config" \$outputdir/GT_donors.vireo.vcf | wc -l) == 0 ]); then - bcftools view --header-only $cell_genotype | grep "^##" | grep -v "^##bcftools" > \$outputdir/donor_with_header.vcf - grep -v "^##" \$outputdir/GT_donors.vireo.vcf >> \$outputdir/donor_with_header.vcf - bcftools sort \$outputdir/donor_with_header.vcf -Oz -o \$outputdir/compressed_sorted_donor_genotype.vcf.gz - rm \$outputdir/donor_with_header.vcf - else - bcftools sort \$donor_genotype -Oz -o \$outputdir/compressed_sorted_donor_genotype.vcf.gz - fi - bcftools index \$outputdir/compressed_sorted_donor_genotype.vcf.gz - bcftools filter \$outputdir/compressed_sorted_donor_genotype.vcf.gz -R \$outputdir/donor_specific_variants.csv > \$outputdir/donor_genotype_subset_by_default.vcf - bcftools reheader --samples \$outputdir/donor_match.csv -o \$outputdir/donor_genotype_subset_by_default_matched.vcf \$outputdir/donor_genotype_subset_by_default.vcf - - rm \$outputdir/GT_donors.vireo.vcf - fi - - if ([ "$findVariants" = "True" ]); then - bcftools filter \$outputdir/compressed_sorted_donor_genotype.vcf.gz -R \$outputdir/representative_variants_vireo.csv > \$outputdir/donor_genotype_subset_by_vireo.vcf - bcftools reheader --samples \$outputdir/donor_match.csv -o \$outputdir/donor_genotype_subset_by_vireo_matched.vcf \$outputdir/donor_genotype_subset_by_vireo.vcf - fi - - if ([ "$findVariants" = "vireo" ]); then - bcftools sort \$donor_genotype -Oz -o \$outputdir/compressed_sorted_donor_genotype.vcf.gz - bcftools index \$outputdir/compressed_sorted_donor_genotype.vcf.gz - bcftools filter \$outputdir/compressed_sorted_donor_genotype.vcf.gz -R \$outputdir/representative_variants_vireo.csv > \$outputdir/donor_genotype_subset_by_vireo.vcf - bcftools reheader --samples \$outputdir/donor_match.csv -o \$outputdir/donor_genotype_subset_by_vireo_matched.vcf \$outputdir/donor_genotype_subset_by_vireo.vcf - fi - - rm \$outputdir/best_method_vireo.txt - rm \$outputdir/compressed_sorted_donor_genotype.vcf.gz - rm \$outputdir/compressed_sorted_donor_genotype.vcf.gz.csi - - fi - - """ -} - -workflow donor_match { - take: - demultiplexing_result - main: - matchDonor(demultiplexing_result, params.nsamples_genetic, params.barcodes, params.match_donor_method1, params.match_donor_method2, - params.findVariants, params.celldata, params.variant_count, params.variant_pct, params.vireo_parent_dir) - emit: - matchDonor.out -} diff --git a/modules/single/gene_demulti/bcftools.nf b/modules/single/gene_demulti/bcftools.nf deleted file mode 100644 index e499c2e..0000000 --- a/modules/single/gene_demulti/bcftools.nf +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 -process bcftools { - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/bcftools", mode: 'copy' - label 'big_mem' - - conda 'bioconda::bcftools=1.9' - - input: - val vcf - output: - path "bcftools_${task.index}" - - script: - vcf_files = vcf.join(' ') - """ - mkdir bcftools_${task.index} - bcftools concat -o bcftools_${task.index}/total_chroms.vcf ${vcf_files} - cd bcftools_${task.index} - bcftools sort total_chroms.vcf -o sorted_total_chroms.vcf - bcftools filter -i '%QUAL > 30' sorted_total_chroms.vcf -o filtered_sorted_total_chroms.vcf - """ -} -workflow filter_variant { - take: - vcf - main: - bcftools(vcf) - emit: - bcftools.out -} - diff --git a/modules/single/gene_demulti/cellsnp.nf b/modules/single/gene_demulti/cellsnp.nf deleted file mode 100644 index 082afb9..0000000 --- a/modules/single/gene_demulti/cellsnp.nf +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process cellSNP { - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/cellSNP", mode: 'copy' - label 'big_mem' - - conda 'bioconda::cellsnp-lite' - - input: - path samFile_cellSNP - path indexFile_cellSNP - val regionsVCF_cellSNP - val targetsVCF_cellSNP - path barcodeFile_cellSNP - val sampleList_cellSNP - val sampleIDs_cellSNP - val genotype_cellSNP - val gzip_cellSNP - val printSkipSNPs_cellSNP - val nproc_cellSNP - val refseq_cellSNP - val chrom_cellSNP - val cellTAG_cellSNP - val UMItag_cellSNP - val minCOUNT_cellSNP - val minMAF_cellSNP - val doubletGL_cellSNP - val inclFLAG_cellSNP - val exclFLAG_cellSNP - val minLEN_cellSNP - val minMAPQ_cellSNP - val countORPHAN_cellSNP - val cellsnp_out - - output: - path "cellsnp_${task.index}" - - script: - def samFile = samFile_cellSNP.name != 'None' ? "--samFile ${samFile_cellSNP}" : '' - def regionsVCF = regionsVCF_cellSNP != 'None' ? "--regionsVCF ${regionsVCF_cellSNP}" : '' - def targetsVCF = targetsVCF_cellSNP != 'None' ? "--targetsVCF ${targetsVCF_cellSNP}" : '' - def barcodeFile = barcodeFile_cellSNP.name != 'None' ? "--barcodeFile ${barcodeFile_cellSNP}" : '' - def sampleList = sampleList_cellSNP != 'None' ? "--sampleList ${sampleList_cellSNP}" : '' - def sampleIDs = sampleIDs_cellSNP != 'None' ? "--sampleIDs ${sampleIDs_cellSNP}" : '' - def genotype = genotype_cellSNP != 'False' ? '--genotype' : '' - def gzip = gzip_cellSNP != 'False' ? '--gzip' : '' - def printSkipSNPs = printSkipSNPs_cellSNP != 'False' ? '--printSkipSNPs' : '' - def nproc = nproc_cellSNP != 'None' ? "--nproc ${nproc_cellSNP}" : '' - def refseq = refseq_cellSNP != 'None' ? "--refseq ${refseq_cellSNP}" : '' - def chrom = chrom_cellSNP != 'None' ? "--chrom ${chrom_cellSNP}" : '' - def cellTAG = "--cellTAG ${cellTAG_cellSNP}" - def UMItag = "--UMItag ${UMItag_cellSNP}" - def minCOUNT = "--minCOUNT ${minCOUNT_cellSNP}" - def minMAF = "--minMAF ${minMAF_cellSNP}" - def doubletGL = doubletGL_cellSNP != 'False' ? '--doubletGL' : '' - def inclFLAG = inclFLAG_cellSNP != 'None' ? "--inclFLAG ${inclFLAG_cellSNP}" : '' - def exclFLAG = exclFLAG_cellSNP != 'None' ? "--exclFLAG ${exclFLAG_cellSNP}" : '' - def minLEN = "--minLEN ${minLEN_cellSNP}" - def minMAPQ = "--minMAPQ ${minMAPQ_cellSNP}" - def countORPHAN = countORPHAN_cellSNP != 'False' ? '--countORPHAN' : '' - def out = "cellsnp_${task.index}/${cellsnp_out}" - - """ - mkdir cellsnp_${task.index} - mkdir $out - touch cellsnp_${task.index}/params.csv - echo -e "Argument,Value \n samfile,${samFile_cellSNP} \n regionsVCF,${regionsVCF_cellSNP} \n targetsVCF,${targetsVCF_cellSNP} \n barcodeFile,${barcodeFile_cellSNP} \n sampleList,${sampleList_cellSNP} \n sampleIDs,${sampleIDs_cellSNP} \n genotype,${genotype_cellSNP} \n gzip,${gzip_cellSNP} \n printSkipSNPs,${printSkipSNPs_cellSNP} \n nproc,${nproc_cellSNP} \n refseq,${refseq_cellSNP} \n chrom,${chrom_cellSNP} \n cellTAG,${cellTAG_cellSNP} \n UMItag,${UMItag_cellSNP} \n minCOUNT,${minCOUNT_cellSNP} \n minMAF,${minMAF_cellSNP} \n doubletGL,${doubletGL_cellSNP} \n inclFLAG,${inclFLAG_cellSNP} \n exclFLAG,${exclFLAG_cellSNP} \n minLEN,${minLEN_cellSNP} \n minMAPQ,${minMAPQ_cellSNP} \n countORPHAN,${countORPHAN_cellSNP}" >> cellsnp_${task.index}/params.csv - cellsnp-lite $samFile $regionsVCF $targetsVCF $barcodeFile $sampleList $sampleIDs \ - $genotype $gzip $printSkipSNPs $nproc $refseq $chrom $cellTAG $UMItag $minCOUNT $minMAF $doubletGL \ - $inclFLAG $exclFLAG $minLEN $minMAPQ $countORPHAN --outDir $out - cd $out - gunzip -c cellSNP.cells.vcf.gz > cellSNP.cells.vcf - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow variant_cellSNP { - take: - samFile - indexFile - main: - barcodeFile = channel.fromPath(params.barcodes) - regionsVCF = channel.value(params.common_variants_cellsnp) - targetsVCF = channel.value(params.targetsVCF) - sampleList = channel.value(params.sampleList) - sampleIDs = channel.value(params.sampleIDs) - genotype_cellSNP = channel.value(params.genotype_cellSNP) - gzip_cellSNP = channel.value(params.gzip_cellSNP) - printSkipSNPs = channel.value(params.printSkipSNPs) - nproc_cellSNP = channel.value(params.nproc_cellSNP) - refseq_cellSNP = channel.value(params.refseq_cellSNP) - chrom = channel.value(params.chrom) - cellTAG = channel.value(params.cellTAG) - UMItag = channel.value(params.UMItag) - minCOUNT = channel.value(params.minCOUNT) - minMAF = channel.value(params.minMAF) - doubletGL = channel.value(params.doubletGL) - inclFLAG = channel.value(params.inclFLAG) - exclFLAG = channel.value(params.exclFLAG) - minLEN = channel.value(params.minLEN) - minMAPQ = channel.value(params.minMAPQ) - countORPHAN = channel.value(params.countORPHAN) - cellsnp_out = channel.value(params.cellsnp_out) - cellSNP(samFile, indexFile, regionsVCF, targetsVCF, barcodeFile, sampleList, - sampleIDs, genotype_cellSNP, gzip_cellSNP, printSkipSNPs, nproc_cellSNP, refseq_cellSNP, - chrom, cellTAG, UMItag, minCOUNT, minMAF, doubletGL, inclFLAG, exclFLAG, minLEN, minMAPQ, countORPHAN, cellsnp_out) - emit: - cellSNP.out -} diff --git a/modules/single/gene_demulti/demuxlet.nf b/modules/single/gene_demulti/demuxlet.nf deleted file mode 100755 index 2be6ea9..0000000 --- a/modules/single/gene_demulti/demuxlet.nf +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -process demuxlet { - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/demuxlet", mode: 'copy' - label 'small_mem' - - conda 'bioconda::popscle' - - input: - each sam - each tag_group - each tag_UMI - each sm - each sm_list - each sam_verbose - each vcf_verbose - each skip_umi - - each cap_BQ - each min_BQ - each min_MQ - each min_TD - each excl_flag - - each group_list - each min_total - each min_uniq - each min_umi - each min_snp - - each plp - each vcf_donor - each field - each geno_error_offset - each geno_error_coeff - each r2_info - each min_mac - each min_callrate - - each alpha - each doublet_prior - each demuxlet_out - - output: - path "demuxlet_${task.index}" - - script: - def samfile = "--sam $sam" - def taggroup = tag_group != 'None' ? "--tag-group ${tag_group}" : '' - def tagUMI = tag_UMI != 'None' ? "--tag-UMI ${tag_UMI}" : '' - def vcfref = plp == 'True' ? "--vcf ${vcf_donor}" : '' - def vcfref_name = plp == 'True' ? vcf_donor : 'No VCF Ref is used because plp is not performed.' - def smlist = sm != 'None' ? "--sm $sm" : '' - def sm_list_file = sm_list != 'None' ? "--sm-list ${sm_list}" : '' - def sm_list_file_name = sm_list != 'None' ? file(sm_list).baseName : 'No sm list file is given' - def samverbose = "--sam-verbose ${sam_verbose}" - def vcfverbose = "--vcf-verbose ${vcf_verbose}" - def skipumi = skip_umi != 'False' ? '--skip-umi' : '' - def capBQ = "--cap-BQ ${cap_BQ}" - def minBQ = "--min-BQ ${min_BQ}" - def minMQ = "--min-MQ ${min_MQ}" - def minTD = "--min-TD ${min_TD}" - def exclflag = "--excl-flag ${excl_flag}" - def grouplist = group_list != 'None' ? "--group-list ${group_list}" : '' - def mintotal = "--min-total ${min_total}" - def minumi = "--min-umi ${min_umi}" - def minuniq = "--min-uniq ${min_uniq}" - def minsnp = "--min-snp ${min_snp}" - def plp_name = plp == 'True' ? 'plp performed' : 'plp not performed' - def vcfdonor = "--vcf ${vcf_donor}" - def fieldinfo = "--field $field" - def genoerror_off = "--geno-error-offset ${geno_error_offset}" - def genoerror_cof = "--geno-error-coeff ${geno_error_coeff}" - def r2info = "--r2-info ${r2_info}" - def minmac = "--min-mac ${min_mac}" - def mincallrate = "--min-callrate ${min_callrate}" - def alpha_value = alpha.replaceAll(/,/, ' --alpha ') - def doubletprior = "--doublet-prior ${doublet_prior}" - - """ - mkdir demuxlet_${task.index} - touch demuxlet_${task.index}/params.csv - barcode_num=\$(wc -l < "${group_list}") - echo -e "Argument,Value \n samfile,${sam} \n tag_group,${tag_group} \n tag_UMI,${tag_UMI} \n vcf_ref,${vcfref_name} \n sm,${sm} \n sm_list_file,${sm_list_file_name} \n sam_verbose,${sam_verbose} \n vcf_verbose,${vcf_verbose} \n skip_umi,${skip_umi} \n cap_BQ,${cap_BQ} \n min_BQ,${min_BQ} \n min_MQ,${min_MQ} \n min_TD,${min_TD} \n excl_flag,${excl_flag} \n grouplist,${group_list} \n min_total,${min_total} \n min_uniq,${min_uniq} \n min_umi,${min_umi} \n min_snp,${min_snp} \n plpfile,${plp_name} \n vcf_donor,${vcf_donor} \n field,${field} \n geno_error_offset,${geno_error_offset} \n geno_error_coeff,${geno_error_coeff} \n r2_info,${r2_info} \n min_mac,${min_mac} \n min_callrate,${min_callrate} \n alpha,${alpha} \n doublet_prior,${doublet_prior}" >> demuxlet_${task.index}/params.csv - if [[ "$plp" != "True" ]] - then - popscle demuxlet $samfile $taggroup $tagUMI $vcfdonor $fieldinfo ${genoerror_off} ${genoerror_cof} $r2info $minmac \ - $mincallrate $smlist ${sm_list_file} --alpha ${alpha_value} $doubletprior $samverbose $vcfverbose $capBQ $minBQ \ - $minMQ $minTD $exclflag $grouplist $mintotal $minumi $minsnp --out demuxlet_${task.index}/${demuxlet_out} - else - mkdir demuxlet_${task.index}/plp - popscle dsc-pileup $samfile ${taggroup} ${tagUMI} $vcfref ${smlist} ${sm_list_file} ${samverbose} ${vcfverbose} \ - ${skipumi} ${capBQ} ${minBQ} ${minMQ} ${minTD} ${exclflag} ${grouplist} ${mintotal} ${minsnp} \ - --out demuxlet_${task.index}/plp/${demuxlet_out} - popscle demuxlet $taggroup $tagUMI --plp demuxlet_${task.index}/plp/${demuxlet_out} $vcfdonor $fieldinfo \ - ${genoerror_off} ${genoerror_cof} $r2info $minmac $mincallrate $smlist ${sm_list_file} --alpha ${alpha_value} \ - $doubletprior $samverbose $vcfverbose $capBQ $minBQ $minMQ $minTD $exclflag $grouplist $mintotal $minumi $minsnp \ - $minuniq --out demuxlet_${task.index}/${demuxlet_out} - - fi - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow demultiplex_demuxlet { - take: - sam - main: - group_list = split_input(params.barcodes) - tag_group = split_input(params.tag_group) - tag_UMI = split_input(params.tag_UMI) - sm = split_input(params.sm) - sm_list = split_input(params.sm_list) - sam_verbose = split_input(params.sam_verbose) - vcf_verbose = split_input(params.vcf_verbose) - skip_umi = split_input(params.skip_umi) - cap_BQ = split_input(params.cap_BQ) - min_BQ = split_input(params.min_BQ) - min_MQ = split_input(params.min_MQ) - min_TD = split_input(params.min_TD) - excl_flag = split_input(params.excl_flag) - min_total = split_input(params.min_total) - min_umi = split_input(params.min_umi) - min_uniq = split_input(params.min_uniq) - min_snp = split_input(params.min_snp) - plp = split_input(params.plp) - vcfdonor = split_input(params.vcf_donor) - field = split_input(params.field) - geno_error_offset = split_input(params.geno_error_offset) - geno_error_coeff = split_input(params.geno_error_coeff) - r2_info = split_input(params.r2_info) - min_mac = split_input(params.min_mac) - min_callrate = split_input(params.min_callrate) - alpha = split_input(params.alpha) - doublet_prior = split_input(params.doublet_prior) - demuxlet_out = params.demuxlet_out - - demuxlet(sam, tag_group, tag_UMI, sm, sm_list, sam_verbose, vcf_verbose, skip_umi, - cap_BQ, min_BQ, min_MQ, min_TD, excl_flag, group_list, min_total, min_uniq, min_umi, - min_snp, plp, vcfdonor, field, geno_error_offset, geno_error_coeff, r2_info, min_mac, - min_callrate, alpha, doublet_prior, demuxlet_out) - - emit: - demuxlet.out.collect() -} diff --git a/modules/single/gene_demulti/freebayes.nf b/modules/single/gene_demulti/freebayes.nf deleted file mode 100644 index 73ad0b8..0000000 --- a/modules/single/gene_demulti/freebayes.nf +++ /dev/null @@ -1,282 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process freebayes { - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/freebayes", mode: 'copy' - label 'big_mem' - - conda 'bioconda::freebayes=1.2' - - input: - path bam_freebayes - path bai_freebayes - val stdin_freebayes - val ref_freebayes - val ref_index_freebayes - val targets_freebayes - each region_freebayes - val samples_freebayes - val populations_freebayes - val cnv_map_freebayes - val vcf_freebayes - val gvcf_freebayes - val gvcf_chunk_freebayes - val gvcf_dont_use_chunk_freebayes - val variant_input_freebayes - val only_use_input_alleles_freebayes - val haplotype_basis_alleles_freebayes - val report_all_haplotype_alleles_freebayes - val report_monomorphic_freebayes - val pvar_freebayes - val strict_vcf_freebayes - val theta_freebayes - val ploidy_freebayes - val pooled_discrete_freebayes - val pooled_continuous_freebayes - val use_reference_allele_freebayes - val reference_quality_freebayes - val no_snps_freebayes - val no_indels_freebayes - val no_mnps_freebayes - val no_complex_freebayes - val use_best_n_alleles_freebayes - val haplotype_length_freebayes - val min_repeat_size_freebayes - val min_repeat_entropy_freebayes - val no_partial_observations_freebayes - val dont_left_align_indels_freebayes - val use_duplicate_reads_freebayes - val min_mapping_quality_freebayes - val min_base_quality_freebayes - val min_supporting_allele_qsum_freebayes - val min_supporting_mapping_qsum_freebayes - val mismatch_base_quality_threshold_freebayes - val read_mismatch_limit_freebayes - val read_max_mismatch_fraction_freebayes - val read_snp_limit_freebayes - val read_indel_limit_freebayes - val standard_filters_freebayes - val min_alternate_fraction_freebayes - val min_alternate_count_freebayes - val min_alternate_qsum_freebayes - val min_alternate_total_freebayes - val min_coverage_freebayes - val max_coverage_freebayes - val no_population_priors_freebayes - val hwe_priors_off_freebayes - val binomial_obs_priors_off_freebayes - val allele_balance_priors_off_freebayes - val observation_bias_freebayes - val base_quality_cap_freebayes - val prob_contamination_freebayes - val legacy_gls_freebayes - val contamination_estimates_freebayes - val report_genotype_likelihood_max_freebayes - val genotyping_max_iterations_freebayes - val genotyping_max_banddepth_freebayes - val posterior_integration_limits_freebayes - val exclude_unobserved_genotypes_freebayes - val genotype_variant_threshold_freebayes - val use_mapping_quality_freebayes - val harmonic_indel_quality_freebayes - val read_dependence_factor_freebayes - val genotype_qualities_freebayes - val debug_freebayes - val dd_freebayes - - output: - path '*.vcf' - - script: - def stdin = stdin_freebayes != 'False' ? '--stdin' : '' - def targets = targets_freebayes != 'None' ? "--targets ${targets_freebayes}" : '' - def region = region_freebayes != 'None' ? "--region ${region_freebayes}" : '' - def samples = samples_freebayes != 'None' ? "--samples ${samples_freebayes}" : '' - def populations = populations_freebayes != 'None' ? "--populations ${populations}" : '' - def cnv_map = cnv_map_freebayes != 'None' ? "--cnv-map ${cnv_map_freebayes}" : '' - - def gvcf = gvcf_freebayes != 'False' ? '--gvcf' : '' - def gvcf_chunk = gvcf_chunk_freebayes != 'None' ? "--gvcf-chunk ${gvcf_chunk_freebayes}" : '' - def gvcf_dont_use_chunk = gvcf_dont_use_chunk_freebayes != 'None' ? "--gvcf-dont-use-chunk ${gvcf_dont_use_chunk_freebayes}" : '' - def variant_input = variant_input_freebayes != 'None' ? "--variant-input ${variant_input_freebayes}" : '' - def only_use_input_alleles = only_use_input_alleles_freebayes != 'False' ? '--only-use-input-alleles' : '' - def haplotype_basis_alleles = haplotype_basis_alleles_freebayes != 'None' ? "--haplotype-basis-alleles ${haplotype_basis_alleles_freebayes}" : '' - def report_all_haplotype_alleles = report_all_haplotype_alleles_freebayes != 'False' ? '--report-all-haplotype-alleles' : '' - def report_monomorphic = report_monomorphic_freebayes != 'False' ? '--report-monomorphic' : '' - def pvar = "--pvar ${pvar_freebayes}" - def strict_vcf = strict_vcf_freebayes != 'False' ? '--strict-vcf' : '' - def theta = "--theta ${theta_freebayes}" - def ploidy = "--ploidy ${ploidy_freebayes}" - def pooled_discrete = pooled_discrete_freebayes != 'False' ? '--pooled-discrete' : '' - def pooled_continuous = pooled_continuous_freebayes != 'False' ? '--pooled-continuous' : '' - - def use_reference_allele = use_reference_allele_freebayes != 'False' ? '--use-reference-allele' : '' - def reference_quality = "--reference-quality ${reference_quality_freebayes}" - - def no_snps = no_snps_freebayes != 'False' ? '--no-snps' : '' - def no_indels = no_indels_freebayes != 'False' ? '--no-indels' : '' - def no_mnps = no_mnps_freebayes != 'False' ? '--no-mnps' : '' - def no_complex = no_complex_freebayes != 'False' ? '--no-complex' : '' - - def use_best_n_alleles = "--use-best-n-alleles ${use_best_n_alleles_freebayes}" - def haplotype_length = haplotype_length_freebayes = "--haplotype-length ${haplotype_length_freebayes}" - def min_repeat_size = "--min-repeat-size ${min_repeat_size_freebayes}" - def min_repeat_entropy = "--min-repeat-entropy ${min_repeat_entropy_freebayes}" - def no_partial_observations = no_partial_observations_freebayes != 'False' ? '--no-partial-observations' : '' - - def dont_left_align_indels = dont_left_align_indels_freebayes != 'False' ? '--dont-left-align-indels' : '' - - def use_duplicate_reads = use_duplicate_reads_freebayes != 'False' ? '--use-duplicate-reads' : '' - def min_mapping_quality = "--min-mapping-quality ${min_mapping_quality_freebayes}" - def min_base_quality = "--min-base-quality ${min_base_quality_freebayes}" - def min_supporting_allele_qsum = "--min-supporting-allele-qsum ${min_supporting_allele_qsum_freebayes}" - def min_supporting_mapping_qsum = "--min-supporting-mapping-qsum ${min_supporting_mapping_qsum_freebayes}" - def mismatch_base_quality_threshold = "--mismatch-base-quality-threshold ${mismatch_base_quality_threshold_freebayes}" - def read_mismatch_limit = read_mismatch_limit_freebayes != 'None' ? "-read-mismatch-limit ${read_mismatch_limit_freebayes}" : '' - def read_max_mismatch_fraction = "--read-max-mismatch-fraction ${read_max_mismatch_fraction_freebayes}" - def read_snp_limit = read_snp_limit_freebayes != 'None' ? "--read-snp-limit ${read_snp_limit_freebayes}" : '' - def read_indel_limit = read_indel_limit_freebayes != 'None' ? "--read-indel-limit ${read_indel_limit_freebayes}" : '' - def standard_filters = standard_filters_freebayes != 'False' ? '--standard-filters' : '' - def min_alternate_fraction = "--min-alternate-fraction ${min_alternate_fraction_freebayes}" - def min_alternate_count = "--min-alternate-count ${min_alternate_count_freebayes}" - def min_alternate_qsum = "--min-alternate-qsum ${min_alternate_qsum_freebayes}" - def min_alternate_total = "--min-alternate-total ${min_alternate_total_freebayes}" - def min_coverage = "--min-coverage ${min_coverage_freebayes}" - def max_coverage = max_coverage_freebayes != 'None' ? "--max-coverage ${max_coverage_freebayes}" : '' - - def no_population_priors = no_population_priors_freebayes != 'False' ? '--no-population-priors' : '' - def hwe_priors_off = hwe_priors_off_freebayes != 'False' ? '--hwe-priors-off' : '' - def binomial_obs_priors_off = binomial_obs_priors_off_freebayes != 'False' ? '--binomial-obs-priors-off' : '' - def allele_balance_priors_off = allele_balance_priors_off_freebayes != 'False' ? '--allele-balance-priors-off' : '' - def observation_bias = observation_bias_freebayes != 'None' ? "--observation-bias ${observation_bias_freebayes}" : '' - def base_quality_cap = base_quality_cap_freebayes != 'None' ? "--base-quality-cap ${base_quality_cap_freebayes}" : '' - def prob_contamination = "--prob-contamination ${prob_contamination_freebayes}" - def legacy_gls = legacy_gls_freebayes != 'False' ? '--legacy-gls' : '' - def contamination_estimates = contamination_estimates_freebayes != 'None' ? "--contamination-estimates ${contamination_estimates_freebayes}" : '' - - def report_genotype_likelihood_max = report_genotype_likelihood_max_freebayes != 'False' ? '--report-genotype-likelihood-max' : '' - def genotyping_max_iterations = "--genotyping-max-iterations ${genotyping_max_iterations_freebayes}" - def genotyping_max_banddepth = "--genotyping-max-banddepth ${genotyping_max_banddepth_freebayes}" - def posterior_integration_limits = "--posterior-integration-limits ${posterior_integration_limits_freebayes}" - def exclude_unobserved_genotypes = exclude_unobserved_genotypes_freebayes != 'False' ? '--exclude-unobserved-genotypes' : '' - def genotype_variant_threshold = genotype_variant_threshold_freebayes != 'None' ? "--genotype-variant-threshold ${genotype_variant_threshold_freebayes}" : '' - def use_mapping_quality = use_mapping_quality_freebayes != 'False' ? '--use-mapping-quality' : '' - def harmonic_indel_quality = harmonic_indel_quality_freebayes != 'False' ? '--harmonic-indel-quality' : '' - def read_dependence_factor = "--read-dependence-factor ${read_dependence_factor_freebayes}" - def genotype_qualities = genotype_qualities_freebayes != 'False' ? '--genotype-qualities' : '' - - def debug = debug_freebayes != 'False' ? '--debug' : '' - def dd = dd_freebayes != 'False' ? '-dd' : '' - - """ - freebayes ${bam_freebayes} $stdin -f ${ref_freebayes} $targets $region $samples $populations ${cnv_map} \ - -v ${region_freebayes}_${vcf_freebayes} $gvcf ${gvcf_chunk} ${gvcf_dont_use_chunk} ${variant_input} ${only_use_input_alleles} ${haplotype_basis_alleles} ${report_all_haplotype_alleles} ${report_monomorphic} $pvar ${strict_vcf} \ - $theta $ploidy ${pooled_discrete} ${pooled_continuous} \ - ${use_reference_allele} ${reference_quality} ${no_snps} ${no_indels} ${no_mnps} ${no_complex} ${use_best_n_alleles} ${haplotype_length} ${min_repeat_size} ${min_repeat_entropy} ${no_partial_observations} ${dont_left_align_indels} \ - ${use_duplicate_reads} ${min_mapping_quality} ${min_base_quality} ${min_supporting_allele_qsum} ${min_supporting_mapping_qsum} ${mismatch_base_quality_threshold} \ - ${read_mismatch_limit} ${read_max_mismatch_fraction} ${read_snp_limit} ${read_indel_limit} ${standard_filters} ${min_alternate_fraction} ${min_alternate_count} ${min_alternate_qsum} ${min_alternate_total} ${min_coverage} ${max_coverage} \ - ${no_population_priors} ${hwe_priors_off} ${binomial_obs_priors_off} ${allele_balance_priors_off} \ - ${observation_bias} ${base_quality_cap} ${prob_contamination} ${legacy_gls} ${contamination_estimates} \ - ${report_genotype_likelihood_max} ${genotyping_max_iterations} ${genotyping_max_banddepth} ${posterior_integration_limits} ${exclude_unobserved_genotypes} ${genotype_variant_threshold} \ - ${use_mapping_quality} ${harmonic_indel_quality} ${read_dependence_factor} ${genotype_qualities} $debug $dd - - """ -} - -workflow variant_freebayes { - take: - bam_freebayes - bai_freebayes - region_freebayes - main: - stdin_freebayes = Channel.value(params.stdin) - fasta_reference = Channel.value(params.fasta) - fasta_reference_index = Channel.value(params.fasta_index) - targets_freebayes = Channel.value(params.targets) - samples_freebayes = Channel.value(params.samples) - populations_freebayes = channel.value(params.populations) - cnv_map_freebayes = Channel.value(params.cnv_map) - vcf_freebayes = channel.value(params.vcf_freebayes) - gvcf_freebayes = channel.value(params.gvcf) - gvcf_chunk_freebayes = channel.value(params.gvcf_chunk) - gvcf_dont_use_chunk_freebayes = channel.value(params.gvcf_dont_use_chunk) - variant_input_freebayes = Channel.value(params.variant_input) - only_use_input_alleles_freebayes = Channel.value(params.only_use_input_alleles) - haplotype_basis_alleles_freebayes = channel.value(params.haplotype_basis_alleles) - report_all_haplotype_alleles_freebayes = channel.value(params.report_all_haplotype_alleles) - report_monomorphic_freebayes = channel.value(params.report_monomorphic) - pvar_freebayes = channel.value(params.pvar) - strict_vcf_freebayes = channel.value(params.strict_vcf) - - theta_freebayes = channel.value(params.theta) - ploidy_freebayes = channel.value(params.ploidy) - pooled_discrete_freebayes = channel.value(params.pooled_discrete) - pooled_continuous_freebayes = channel.value(params.pooled_continuous) - use_reference_allele_freebayes = channel.value(params.use_reference_allele) - reference_quality_freebayes = channel.value(params.reference_quality) - no_snps = channel.value(params.no_snps) - no_indels = channel.value(params.no_indels) - no_mnps = channel.value(params.no_mnps) - no_complex = channel.value(params.no_complex) - use_best_n_alleles_freebayes = channel.value(params.use_best_n_alleles) - haplotype_length_freebayes = channel.value(params.haplotype_length) - min_repeat_size_freebayes = channel.value(params.min_repeat_size) - min_repeat_entropy_freebayes = channel.value(params.min_repeat_entropy) - no_partial_observations_freebayes = channel.value(params.no_partial_observations) - - dont_left_align_indels_freebayes = channel.value(params.dont_left_align_indels) - use_duplicate_reads_freebayes = channel.value(params.use_duplicate_reads) - min_mapping_quality_freebayes = channel.value(params.min_mapping_quality) - min_base_quality_freebayes = channel.value(params.min_base_quality) - min_supporting_allele_qsum_freebayes = channel.value(params.min_supporting_allele_qsum) - min_supporting_mapping_qsum_freebayes = channel.value(params.min_supporting_mapping_qsum) - mismatch_base_quality_threshold_freebayes = channel.value(params.mismatch_base_quality_threshold) - read_mismatch_limit_freebayes = channel.value(params.read_mismatch_limit) - read_max_mismatch_fraction_freebayes = channel.value(params.read_max_mismatch_fraction) - read_snp_limit_freebayes = channel.value(params.read_snp_limit) - read_indel_limit_freebayes = channel.value(params.read_indel_limit) - standard_filters_freebayes = channel.value(params.standard_filters) - min_alternate_fraction_freebayes = channel.value(params.min_alternate_fraction) - min_alternate_count_freebayes = channel.value(params.min_alternate_count) - min_alternate_qsum_freebayes = channel.value(params.min_alternate_qsum) - min_alternate_total_freebayes = channel.value(params.min_alternate_total) - min_coverage_freebayes = channel.value(params.min_coverage) - max_coverage_freebayes = channel.value(params.max_coverage) - no_population_priors_freebayes = channel.value(params.no_population_priors) - - hwe_priors_off_freebayes = channel.value(params.hwe_priors_off) - binomial_obs_priors_off_freebayes = channel.value(params.binomial_obs_priors_off) - allele_balance_priors_off_freebayes = channel.value(params.allele_balance_priors_off) - observation_bias_freebayes = channel.value(params.observation_bias) - base_quality_cap_freebayes = channel.value(params.base_quality_cap) - prob_contamination_freebayes = channel.value(params.prob_contamination) - legacy_gls_freebayes = channel.value(params.legacy_gls) - contamination_estimates_freebayes = channel.value(params.contamination_estimates) - - report_genotype_likelihood_max_freebayes = channel.value(params.report_genotype_likelihood_max) - genotyping_max_iterations_freebayes = channel.value(params.genotyping_max_iterations) - genotyping_max_banddepth_freebayes = channel.value(params.genotyping_max_banddepth) - posterior_integration_limits_freebayes = channel.value(params.posterior_integration_limits) - exclude_unobserved_genotypes_freebayes = channel.value(params.exclude_unobserved_genotypes) - genotype_variant_threshold_freebayes = channel.value(params.genotype_variant_threshold) - use_mapping_quality_freebayes = channel.value(params.use_mapping_quality) - harmonic_indel_quality_freebayes = channel.value(params.harmonic_indel_quality) - read_dependence_factor_freebayes = channel.value(params.read_dependence_factor) - genotype_qualities_freebayes = channel.value(params.genotype_qualities) - debug_freebayes = channel.value(params.debug) - dd_freebayes = channel.value(params.dd) - - freebayes(bam_freebayes, bai_freebayes, stdin_freebayes, fasta_reference, fasta_reference_index, targets_freebayes, region_freebayes, samples_freebayes, populations_freebayes, cnv_map_freebayes, \ - vcf_freebayes, gvcf_freebayes, gvcf_chunk_freebayes, gvcf_dont_use_chunk_freebayes, variant_input_freebayes, only_use_input_alleles_freebayes, haplotype_basis_alleles_freebayes, report_all_haplotype_alleles_freebayes, report_monomorphic_freebayes, pvar_freebayes, strict_vcf_freebayes, \ - theta_freebayes, ploidy_freebayes, pooled_discrete_freebayes, pooled_continuous_freebayes, use_reference_allele_freebayes, reference_quality_freebayes, \ - no_snps, no_indels, no_mnps, no_complex, use_best_n_alleles_freebayes, haplotype_length_freebayes, min_repeat_size_freebayes, min_repeat_entropy_freebayes, no_partial_observations_freebayes, \ - dont_left_align_indels_freebayes, use_duplicate_reads_freebayes, min_mapping_quality_freebayes, min_base_quality_freebayes, min_supporting_allele_qsum_freebayes,\ - min_supporting_mapping_qsum_freebayes, mismatch_base_quality_threshold_freebayes, read_mismatch_limit_freebayes, read_max_mismatch_fraction_freebayes, - read_snp_limit_freebayes, read_indel_limit_freebayes, standard_filters_freebayes, min_alternate_fraction_freebayes, min_alternate_count_freebayes, min_alternate_qsum_freebayes, min_alternate_total_freebayes, min_coverage_freebayes, max_coverage_freebayes, \ - no_population_priors_freebayes, hwe_priors_off_freebayes, binomial_obs_priors_off_freebayes, allele_balance_priors_off_freebayes, observation_bias_freebayes, base_quality_cap_freebayes, prob_contamination_freebayes, legacy_gls_freebayes, contamination_estimates_freebayes, \ - report_genotype_likelihood_max_freebayes, genotyping_max_iterations_freebayes, genotyping_max_banddepth_freebayes, posterior_integration_limits_freebayes, exclude_unobserved_genotypes_freebayes, \ - genotype_variant_threshold_freebayes, use_mapping_quality_freebayes, harmonic_indel_quality_freebayes, read_dependence_factor_freebayes, genotype_qualities_freebayes, debug_freebayes, dd_freebayes) - - emit: - freebayes.out.collect() -} diff --git a/modules/single/gene_demulti/freemuxlet.nf b/modules/single/gene_demulti/freemuxlet.nf deleted file mode 100755 index 567b7dc..0000000 --- a/modules/single/gene_demulti/freemuxlet.nf +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process freemuxlet { - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/freemuxlet", mode: 'copy' - label 'small_mem' - - conda 'bioconda::popscle' - - input: - each sam - each vcf - each tag_group - each tag_UMI - each sm - each sm_list - each sam_verbose - each vcf_verbose - each skip_umi - each cap_BQ - each min_BQ - each min_MQ - each min_TD - each excl_flag - each group_list - each min_total - each min_uniq - each min_umi - each min_snp - each init_cluster - each nsample - each aux_files - each verbose - each doublet_prior - each bf_thres - each frac_init_clust - each iter_init - each keep_init_missing - each freemuxlet_out - - output: - path "freemuxlet_${task.index}" - - script: - def samfile = "--sam $sam" - def taggroup = tag_group != 'None' ? "--tag-group ${tag_group}" : '' - def tagUMI = tag_UMI != 'None' ? "--tag-UMI ${tag_UMI}" : '' - def vcffile = "--vcf $vcf" - def smlist = sm != 'None' ? "--sm $sm" : '' - def sm_list_file = sm_list != 'None' ? "--sm-list ${sm_list}" : '' - def sm_list_file_name = sm_list != 'None' ? file(sm_list).baseName : 'No sm list file is given' - def samverbose = "--sam-verbose ${sam_verbose}" - def vcfverbose = "--vcf-verbose ${vcf_verbose}" - def skipumi = skip_umi != 'False' ? '--skip-umi' : '' - def capBQ = "--cap-BQ ${cap_BQ}" - def minBQ = "--min-BQ ${min_BQ}" - def minMQ = "--min-MQ ${min_MQ}" - def minTD = "--min-TD ${min_TD}" - def exclflag = "--excl-flag ${excl_flag}" - def grouplist = "--group-list ${group_list}" - def mintotal = "--min-total ${min_total}" - def minuniq = "--min-uniq ${min_uniq}" - def minumi = "--min-umi ${min_umi}" - def minsnp = "--min-snp ${min_snp}" - def initcluster = init_cluster != 'None' ? "--init-cluster ${init_cluster}" : '' - def n_sample = "--nsample $nsample" - def auxfiles = aux_files != 'False' ? '--aux-files' : '' - def verbose_info = "--verbose $verbose" - def doubletprior = "--doublet-prior ${doublet_prior}" - def bfthres = "--bf-thres ${bf_thres}" - def frac_init_cluster = "--frac-init-clust ${frac_init_clust}" - def iterinit = "--iter-init ${iter_init}" - def keepinit_missing = keep_init_missing != 'False' ? '--keep-init-missing' : '' - - """ - mkdir freemuxlet_${task.index} - mkdir freemuxlet_${task.index}/plp - touch freemuxlet_${task.index}/params.csv - echo -e "Argument,Value \n samfile,${sam} \n tag_group,${tag_group} \n tag_UMI,${tag_UMI} \n vcf_file,${vcf} \n sm,${sm} \n sm_list_file,${sm_list_file_name} \n sam_verbose,${sam_verbose} \n vcf_verbose,${vcf_verbose} \n skip_umi,${skip_umi} \n cap_BQ,${cap_BQ} \n min_BQ,${min_BQ} \n min_MQ,${min_MQ} \n min_TD,${min_TD} \n excl_flag,${excl_flag} \n grouplist,${group_list} \n min_total,${min_total} \n min_uniq,${min_uniq} \n min_umi,${min_umi} \n min_snp,${min_snp} \n init_cluster,${init_cluster} \n nsample,${nsample} \n aux_files,${aux_files} \n verbose,${verbose} \n doublet_prior,${doublet_prior} \n bf_thres,${bf_thres} \n frac_init_clust,${frac_init_clust} \n iter_init,${iter_init} \n keep_init_missing,${keep_init_missing}" >> freemuxlet_${task.index}/params.csv - - popscle dsc-pileup $samfile ${taggroup} ${tagUMI} $vcffile ${smlist} ${sm_list_file} ${samverbose} \ - ${vcfverbose} ${skipumi} ${capBQ} ${minBQ} ${minMQ} ${minTD} ${exclflag} ${grouplist} ${mintotal} ${minuniq} \ - ${minsnp} --out freemuxlet_${task.index}/plp/${freemuxlet_out} - popscle freemuxlet --plp freemuxlet_${task.index}/plp/${freemuxlet_out} --out freemuxlet_${task.index}/${freemuxlet_out} \ - ${initcluster} ${n_sample} ${auxfiles} ${verbose_info} ${doubletprior} ${bfthres} ${frac_init_cluster} ${iterinit} \ - ${keepinit_missing} ${capBQ} ${minBQ} ${grouplist} ${mintotal} ${minumi} ${minsnp} - - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow demultiplex_freemuxlet { - take: - sam - main: - group_list = split_input(params.barcodes) - vcf = split_input(params.common_variants_freemuxlet) - tag_group = split_input(params.tag_group) - tag_UMI = split_input(params.tag_UMI) - sm = split_input(params.sm) - sm_list = split_input(params.sm_list) - sam_verbose = split_input(params.sam_verbose) - vcf_verbose = split_input(params.vcf_verbose) - skip_umi = split_input(params.skip_umi) - - cap_BQ = split_input(params.cap_BQ) - min_BQ = split_input(params.min_BQ) - min_MQ = split_input(params.min_MQ) - min_TD = split_input(params.min_TD) - excl_flag = split_input(params.excl_flag) - min_total = split_input(params.min_total) - min_uniq = split_input(params.min_uniq) - min_umi = split_input(params.min_umi) - min_snp = split_input(params.min_snp) - init_cluster = split_input(params.init_cluster) - nsample = split_input(params.nsamples_genetic) - aux_files = split_input(params.aux_files) - verbose = split_input(params.verbose) - doublet_prior = split_input(params.doublet_prior) - bf_thres = split_input(params.bf_thres) - frac_init_clust = split_input(params.frac_init_clust) - iter_init = split_input(params.iter_init) - keep_init_missing = split_input(params.keep_init_missing) - freemuxlet_out = params.freemuxlet_out - - freemuxlet(sam, vcf, tag_group, tag_UMI, sm, sm_list, sam_verbose, vcf_verbose, skip_umi, cap_BQ, - min_BQ, min_MQ, min_TD, excl_flag, group_list, min_total, min_uniq, min_umi, min_snp, init_cluster, nsample, - aux_files, verbose, doublet_prior, bf_thres, frac_init_clust, iter_init, keep_init_missing, freemuxlet_out) - - emit: - freemuxlet.out.collect() -} diff --git a/modules/single/gene_demulti/samtools.nf b/modules/single/gene_demulti/samtools.nf deleted file mode 100644 index b6c0177..0000000 --- a/modules/single/gene_demulti/samtools.nf +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl=2 - -process samtools{ - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/samtools", mode: 'copy' - label 'big_mem' - - conda "bioconda::samtools bioconda::umi_tools" - - input: - file bam - - output: - path "samtools_${task.index}" - - - script: - """ - mkdir samtools_${task.index} - samtools view -S -b -q 10 -F 3844 $bam > samtools_${task.index}/filtered.bam - samtools index samtools_${task.index}/filtered.bam samtools_${task.index}/filtered.bam.bai - umi_tools dedup --stdin=samtools_${task.index}/filtered.bam --extract-umi-method=tag --umi-tag=UR --cell-tag=CB --log=logfile > samtools_${task.index}/no_dup.bam - samtools sort samtools_${task.index}/no_dup.bam -o samtools_${task.index}/sorted.bam - samtools index samtools_${task.index}/sorted.bam samtools_${task.index}/sorted.bam.bai - """ -} - - -workflow data_preprocess{ - take: - bam - main: - samtools(bam) - emit: - samtools.out - -} - diff --git a/modules/single/gene_demulti/scsplit.nf b/modules/single/gene_demulti/scsplit.nf deleted file mode 100644 index 03f4342..0000000 --- a/modules/single/gene_demulti/scsplit.nf +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process scSplit { - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/scSplit", mode: 'copy' - label 'big_mem' - - conda "$projectDir/conda/scsplit.yml" - - input: - each vcf - each bam - each bai - each barcode - each tag - each com - each ref - each alt - each num - each sub - each ems - each dbl - each vcf_known - each sample_geno - each scsplit_out - - output: - path "scsplit_${task.index}" - - script: - - def common_data = com != 'None' ? "--com $com" : '' - def common_data_name = com != 'None' ? com : 'Common variants are not given.' - def vcf_known_data = vcf_known != 'None' ? "--vcf ${vcf_known}" : '' - def vcf_known_data_name = vcf_known != 'None' ? vcf_known : 'Known variants are not given.' - def sub_yesno = num == 0 ? "$sub" : 'no_sub' - - def vcf_data = "-v $vcf" - def bam_data = "-i $bam" - def barcode_data = "-b $barcode" - def tag_data = "--tag $tag" - def num_data = "-n $num" - def sub_data = num == 0 ? "--sub $sub" : '' - def ems_data = "--ems $ems" - def dbl_data = dbl != 'None' ? "--dbl $dbl" : '' - def out = "scsplit_${task.index}/${scsplit_out}" - - """ - git clone https://github.com/jon-xu/scSplit - mkdir scsplit_${task.index} - mkdir $out - touch scsplit_${task.index}/params.csv - echo -e "Argument,Value \n vcf,$vcf \n bam,$bam \n barcode,$barcode \n common_data,${common_data_name} \n num,${num} \n sub,${sub_yesno} \n ems,${ems} \n dbl,${dbl} \n vcf_known_data,${vcf_known_data_name}" >> scsplit_${task.index}/params.csv - - python scSplit/scSplit count ${vcf_data} ${bam_data} ${barcode_data} ${common_data} -r $ref -a $alt --out $out - python scSplit/scSplit run -r $out/$ref -a $out/$alt ${num_data} ${sub_data} ${ems_data} ${dbl_data} ${vcf_known_data} --out $out - - if [[ "$sample_geno" != "False" ]] - then - python scSplit/scSplit genotype -r $out/$ref -a $out/$alt -p $out/scSplit_P_s_c.csv --out $out - fi - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow demultiplex_scSplit { - take: - bam_scsplit - vcf_scsplit - bai_scsplit - main: - tag_scsplit = split_input(params.tag_group) - bar_scsplit = split_input(params.barcodes) - com_scsplit = split_input(params.common_variants_scSplit) - ref_scsplit = split_input(params.refscSplit) - alt_scsplit = split_input(params.altscSplit) - num_scsplit = split_input(params.nsamples_genetic) - sub_scsplit = split_input(params.subscSplit) - ems_scsplit = split_input(params.emsscSplit) - dbl_scsplit = split_input(params.dblscSplit) - vcf_known_scsplit = split_input(params.vcf_donor) - sample_geno = split_input(params.sample_geno) - scsplit_out = params.scsplit_out - scSplit(vcf_scsplit, bam_scsplit, bai_scsplit, bar_scsplit, tag_scsplit, com_scsplit, ref_scsplit, - alt_scsplit, num_scsplit, sub_scsplit, ems_scsplit, dbl_scsplit, vcf_known_scsplit,sample_geno, scsplit_out) - emit: - scSplit.out.collect() -} diff --git a/modules/single/gene_demulti/souporcell.nf b/modules/single/gene_demulti/souporcell.nf deleted file mode 100755 index 22cffcc..0000000 --- a/modules/single/gene_demulti/souporcell.nf +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl=2 - -process souporcell{ - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/souporcell", mode: 'copy' - label 'big_mem' - - container "shub://wheaton5/souporcell" - - input: - each bam - each barcodes - each fasta - each threads - each clusters - each ploidy - each min_alt - each min_ref - each max_loci - each restarts - each common_variants - each use_known_genotype - each known_genotypes - each known_genotypes_sample_names - each skip_remap - each ignore - each souporcell_out - - output: - path "souporcell_${task.index}" - - script: - def bamfile = "-i $bam" - def barcode = "-b $barcodes" - def fastafile = "-f $fasta" - def thread = "-t $threads" - def cluster = "-k $clusters" - def ploi = "--ploidy $ploidy" - def minalt = "--min_alt ${min_alt}" - def minref = "--min_ref ${min_ref}" - def maxloci = "--max_loci ${max_loci}" - def restart = restarts != 'None' ? "--restarts $restarts" : '' - - def commonvariant = (common_variants != 'None' & use_known_genotype != "True" & known_genotypes == 'None' )? "--common_variants ${common_variants}" : '' - def commonvariant_name = (common_variants != 'None' & use_known_genotype != "True" & known_genotypes == 'None' ) ? file(common_variants).baseName : 'no_common_variants' - - def knowngenotype = (use_known_genotype == "True" & known_genotypes != 'None') ? "--known_genotypes ${known_genotypes}" : '' - def knowngenotype_name = (use_known_genotype == "True" & known_genotypes != 'None') ? file(known_genotypes).baseName : 'no_known_genotypes' - - def knowngenotypes_sample = known_genotypes_sample_names != 'None' ? "--known_genotypes_sample_names ${known_genotypes_sample_names}" : '' - def knowngenotype_sample_name = known_genotypes_sample_names != 'None' ? file(known_genotypes_sample_names).baseName : 'no_knowngenotypes_sample_names' - - def skipremap = skip_remap != 'False' ? "--skip_remap True" : '' - def ign = ignore != 'False' ? "--ignore True" : '' - def out = "souporcell_${task.index}/${souporcell_out}" - - """ - mkdir souporcell_${task.index} - mkdir $out - touch souporcell_${task.index}/params.csv - echo -e "Argument,Value \n bamfile,${bam} \n barcode,${barcodes} \n fasta,${fasta} \n threads,${threads} \n clusters,${clusters} \n ploidy,${ploidy} \n min_alt,${min_alt} \n min_ref,${min_ref} \n max_loci,${max_loci} \n restarts,${restarts} \n common_variant,${commonvariant_name} \n known_genotype,${knowngenotype_name} \n known_genotype_sample,${knowngenotype_sample_name} \n skip_remap,${skip_remap} \n ignore,${ignore} " >> souporcell_${task.index}/params.csv - souporcell_pipeline.py $bamfile $barcode $fastafile $thread $cluster $ploi $minalt $minref $maxloci $restart $commonvariant $knowngenotype $knowngenotypes_sample $skipremap $ign -o $out - """ -} - - -def split_input(input){ - if (input =~ /;/ ){ - Channel.from(input).map{ return it.tokenize(';')}.flatten() - } - else{ - Channel.from(input) - } -} - - -workflow demultiplex_souporcell{ - take: - bam - main: - barcodes = split_input(params.barcodes) - fasta = split_input(params.fasta) - threads = split_input(params.threads) - clusters = split_input(params.nsamples_genetic) - - ploidy = split_input(params.ploidy) - min_alt = split_input(params.min_alt) - min_ref = split_input(params.min_ref) - max_loci = split_input(params.max_loci) - restarts = split_input(params.restarts) - - common_variants = split_input(params.common_variants_souporcell) - use_known_genotype = split_input(params.use_known_genotype) - known_genotypes = split_input(params.vcf_donor) - known_genotypes_sample_names = split_input(params.known_genotypes_sample_names) - skip_remap = split_input(params.skip_remap) - ignore = split_input(params.ignore) - souporcell_out = params.souporcell_out - - souporcell(bam, barcodes, fasta, threads, clusters, ploidy, min_alt, min_ref, max_loci, restarts, - common_variants, use_known_genotype, known_genotypes, known_genotypes_sample_names, skip_remap, - ignore, souporcell_out) - - emit: - souporcell.out.collect() -} diff --git a/modules/single/gene_demulti/vireo.nf b/modules/single/gene_demulti/vireo.nf deleted file mode 100755 index 1db4387..0000000 --- a/modules/single/gene_demulti/vireo.nf +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process vireo { - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti/vireo", mode: 'copy' - label 'big_mem' - - conda 'aksarkar::vireosnp' - - input: - each celldata - each ndonor - each donorfile - each genoTag - each noDoublet - each nInit - each extraDonor - each extraDonorMode - each forceLearnGT - each ASEmode - each noPlot - each randSeed - each cellRange - each callAmbientRNAs - each nproc - each findVariant - each vireo_out - - output: - path "vireo_${task.index}" - - script: - def cell_data = "-c $celldata" - def n_donor = ndonor != 'None' ? "-N $ndonor" : '' - def n_donor_yesno = ndonor != 'None' ? "$ndonor" : 'Number of donors are not given' - def donor = donorfile != 'None' ? "-d $donorfile" : '' - def donor_data_name = donorfile != 'None' ? donorfile : 'Donor file is not given' - def geno_tag = donorfile != 'None' ? "--genoTag $genoTag" : '' - def no_doublet = noDoublet != 'False' ? '--noDoublet' : '' - def n_init = "--nInit $nInit" - def extra_donor = "--extraDonor $extraDonor" - def extradonor_mode = extraDonorMode != 'distance' ? "--extraDonorMode $extraDonorMode" : '' - def learnGT = (forceLearnGT != 'False' && donorfile != 'None') ? '--forceLearnGT' : '' - def learnGT_yesno = (forceLearnGT != 'False' && donorfile != 'None') ? "$forceLearnGT" : 'False' - def ase_mode = ASEmode != 'False' ? '--ASEmode' : '' - def no_plot = noPlot != 'False' ? '--noPlot' : '' - def random_seed = randSeed != 'None' ? "--randSeed $randSeed" : '' - def cell_range = cellRange != 'all' ? "--cellRange $cellRange" : '' - def call_ambient_rna = callAmbientRNAs != 'False' ? '--callAmbientRNAs' : '' - def n_proc = "--nproc $nproc" - - """ - mkdir vireo_${task.index} - mkdir vireo_${task.index}/${vireo_out} - touch vireo_${task.index}/params.csv - echo -e "Argument,Value \n cell_data,${celldata} \n n_donor,${n_donor_yesno} \n donor_data,${donor_data_name} \n genoTag,${genoTag} \n noDoublet,${noDoublet} \n nInit,${nInit} \n extraDonor,${extraDonor} \n extraDonorMode,${extraDonorMode} \n learnGT,${learnGT_yesno} \n ASEmode,${ASEmode} \n noPlot,${noPlot} \n randSeed,${randSeed} \n cellRange,${cellRange} \n callAmbientRNAs,${callAmbientRNAs} \n nproc,${nproc}" >> vireo_${task.index}/params.csv - vireo ${cell_data} ${n_donor} $donor ${geno_tag} ${no_doublet} ${n_init} ${extra_donor} ${extradonor_mode} \ - $learnGT ${ase_mode} ${no_plot} ${random_seed} ${cell_range} ${call_ambient_rna} ${n_proc} \ - -o vireo_${task.index}/${vireo_out} - if ([ "$donorfile" = "None" ]); then - if ([ "$findVariant" = "True" ] || [ "$findVariant" = "vireo" ]); then - GTbarcode -i vireo_${task.index}/${vireo_out}/GT_donors.vireo.vcf.gz -o vireo_${task.index}/${vireo_out}/filtered_variants.tsv ${randSeed} - fi - fi - - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow demultiplex_vireo { - take: - celldata - - main: - ndonor = split_input(params.nsamples_genetic) - donorfile = split_input(params.vcf_donor) - genoTag = split_input(params.genoTag) - - noDoublet = split_input(params.noDoublet) - nInit = split_input(params.nInit) - extraDonor = split_input(params.extraDonor) - extraDonorMode = split_input(params.extraDonorMode) - forceLearnGT = split_input(params.forceLearnGT) - - ASEmode = split_input(params.ASEmode) - noPlot = split_input(params.noPlot) - randSeed = split_input(params.randSeed) - cellRange = split_input(params.cellRange) - callAmbientRNAs = split_input(params.callAmbientRNAs) - nproc = split_input(params.nproc) - findVariant = split_input(params.findVariants) - vireo_out = params.vireo_out - - vireo(celldata, ndonor, donorfile, genoTag, noDoublet, nInit, extraDonor, extraDonorMode, forceLearnGT, - ASEmode, noPlot, randSeed, cellRange, callAmbientRNAs, nproc, findVariant, vireo_out) - - emit: - vireo.out.collect() -} diff --git a/modules/single/gene_demultiplexing.nf b/modules/single/gene_demultiplexing.nf deleted file mode 100644 index 0e6f317..0000000 --- a/modules/single/gene_demultiplexing.nf +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -include { data_preprocess } from './gene_demulti/samtools' -include { filter_variant } from './gene_demulti/bcftools' -include { variant_cellSNP } from './gene_demulti/cellsnp' -include { variant_freebayes } from './gene_demulti/freebayes' -include { demultiplex_demuxlet } from './gene_demulti/demuxlet' -include { demultiplex_freemuxlet } from './gene_demulti/freemuxlet' -include { demultiplex_scSplit } from './gene_demulti/scsplit' -include { demultiplex_souporcell } from './gene_demulti/souporcell' -include { demultiplex_vireo } from './gene_demulti/vireo' - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -process summary { - publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti", mode: 'copy' - label 'small_mem' - - conda 'pandas scanpy mudata' - - input: - val demuxlet_result - val freemuxlet_result - val vireo_result - val souporcell_result - val scsplit_result - val generate_anndata - val generate_mudata - val rna_matrix - val hto_matrix - output: - path genetic_summary - - script: - def demuxlet_files = '' - def freemuxlet_files = '' - def vireo_files = '' - def souporcell_files = '' - def scsplit_files = '' - def generate_adata = '' - def generate_mdata = '' - - if (demuxlet_result != 'no_result') { - demuxlet_files = "--demuxlet ${demuxlet_result.join(':')}" - } - if (freemuxlet_result != 'no_result') { - freemuxlet_files = "--freemuxlet ${freemuxlet_result.join(':')}" - } - if (vireo_result != 'no_result') { - vireo_files = "--vireo ${vireo_result.join(':')}" - } - if (souporcell_result != 'no_result') { - souporcell_files = "--souporcell ${souporcell_result.join(':')}" - } - if (scsplit_result != 'no_result') { - scsplit_files = "--scsplit ${scsplit_result.join(':')}" - } - if (scsplit_result != 'no_result') { - scsplit_files = "--scsplit ${scsplit_result.join(':')}" - } - if (generate_anndata == 'True') { - if (rna_matrix == 'None') { - error 'Error: RNA count matrix is not given.' - } - generate_adata = "--generate_anndata --read_rna_mtx $rna_matrix" - } - if (generate_mudata == 'True') { - if (rna_matrix == 'None') { - error 'Error: RNA count matrix is not given.' - } - if (hto_matrix == 'None') { - error 'Error: HTO count matrix is not given.' - } - generate_mdata = "--generate_mudata --read_rna_mtx $rna_matrix --read_hto_mtx $hto_matrix" - } - """ - summary_gene.py $demuxlet_files $vireo_files $souporcell_files $scsplit_files $freemuxlet_files $generate_adata $generate_mdata - """ -} - -workflow gene_demultiplexing { - main: - input_bam = Channel.fromPath(params.bam) - input_bai = Channel.fromPath(params.bai) - - if ((params.demuxlet == 'True' & params.demuxlet_preprocess != 'False') | \ - (params.freemuxlet == 'True' & params.freemuxlet_preprocess != 'False')| \ - (params.scSplit == 'True' & params.scSplit_preprocess != 'False') | \ - (params.vireo == 'True' & params.vireo_preprocess != 'False') | \ - (params.souporcell == 'True' & params.souporcell_preprocess != 'False')) { - data_preprocess(input_bam) - qc_bam = data_preprocess.out.map { return it + '/sorted.bam' } - qc_bam_bai = data_preprocess.out.map { return it + '/sorted.bam.bai' } - } - - if (params.vireo == 'True' & params.vireo_variant == 'True') { - if (params.vireo_preprocess != 'False') { - variant_cellSNP(qc_bam, qc_bam_bai) - } - else { - variant_cellSNP(input_bam, input_bai) - } - cellsnp_vcf = variant_cellSNP.out.map { return it + '/*/cellSNP.cells.vcf' } - } - - if (params.scSplit == 'True' & params.scSplit_variant == 'True') { - freebayes_region = Channel.from(1..22, 'X', 'Y').flatten() - if (params.region != 'None') { - freebayes_region = split_input(params.region) - } - if (params.scSplit_preprocess != 'False') { - variant_freebayes(qc_bam, qc_bam_bai, freebayes_region) - } - else { - variant_freebayes(input_bam, input_bai, freebayes_region) - } - filter_variant(variant_freebayes.out) - freebayes_vcf = filter_variant.out.map { return it + '/filtered_sorted_total_chroms.vcf' } - } - - if (params.demuxlet == 'True') { - bam = params.demuxlet_preprocess == 'True' ? qc_bam : input_bam //qc_bam.mix(input_bam)) - demultiplex_demuxlet(bam) - demuxlet_out = demultiplex_demuxlet.out - } - else { - demuxlet_out = channel.value('no_result') - } - - if (params.freemuxlet == 'True') { - bam = params.freemuxlet_preprocess == 'True' ? qc_bam : input_bam // qc_bam.mix(input_bam)) - demultiplex_freemuxlet(bam) - freemuxlet_out = demultiplex_freemuxlet.out - } - else { - freemuxlet_out = channel.value('no_result') - } - - if (params.vireo == 'True') { - vcf = params.vireo_variant != 'True' ? Channel.fromPath(params.celldata) : - variant_cellSNP.out.map { return it + '/*/cellSNP.cells.vcf' } - //variant_cellSNP.out.map{ return it + "/*/cellSNP.cells.vcf"}.mix(Channel.fromPath(params.celldata))) - demultiplex_vireo(vcf) - vireo_out = demultiplex_vireo.out - } - else { - vireo_out = channel.value('no_result') - } - - if (params.scSplit == 'True') { - bam = params.scSplit_preprocess == 'True' ? qc_bam : input_bam // qc_bam.mix(input_bam)) - bai = params.scSplit_preprocess == 'True' ? qc_bam_bai : input_bai // qc_bam_bai.mix(input_bai)) - vcf = params.scSplit_variant != 'True' ? Channel.fromPath(params.vcf_mixed) : - filter_variant.out.map { return it + '/filtered_sorted_total_chroms.vcf' } - //freebayes_vcf.mix(Channel.fromPath(params.vcf_mixed))) - demultiplex_scSplit(bam, vcf, bai) - scSplit_out = demultiplex_scSplit.out - } - else { - scSplit_out = channel.value('no_result') - } - - if (params.souporcell == 'True') { - bam = params.souporcell_preprocess == 'True' ? qc_bam : input_bam // qc_bam.mix(input_bam)) - demultiplex_souporcell(bam) - souporcell_out = demultiplex_souporcell.out - } - else { - souporcell_out = channel.value('no_result') - } - - summary(demuxlet_out, freemuxlet_out, vireo_out, souporcell_out, scSplit_out, - params.generate_anndata, params.generate_mudata, - params.rna_matrix_filtered, params.hto_matrix_filtered) - emit: - summary.out -} diff --git a/modules/single/hash_demulti/bff.nf b/modules/single/hash_demulti/bff.nf deleted file mode 100644 index 3070299..0000000 --- a/modules/single/hash_demulti/bff.nf +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process bff { - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/bff", mode:'copy' - label 'small_mem' - - conda "$projectDir/conda/bff.yml" - - input: - - path hto_matrix, stageAs: 'hto_data' - each methods - each methodsForConsensus - each cellbarcodeWhitelist - each metricsFile - each doTSNE - each doHeatmap - each perCellSaturation - each majorityConsensusThreshold - each chemistry - each callerDisagreementThreshold - each assignmentOutBff - each preprocess_bff - each barcodeWhitelist - - output: - path "bff_${task.index}" - - script: - def run_preprocess = preprocess_bff != 'False' ? " --preprocess_bff" : '' - - """ - mkdir bff_${task.index} - bff.R --fileHto hto_data --methods $methods --methodsForConsensus $methodsForConsensus \ - --cellbarcodeWhitelist $cellbarcodeWhitelist --metricsFile bff_${task.index}_$metricsFile \ - --doTSNE $doTSNE --doHeatmap $doHeatmap --perCellSaturation $perCellSaturation --majorityConsensusThreshold $majorityConsensusThreshold \ - --chemistry $chemistry --callerDisagreementThreshold $callerDisagreementThreshold --outputdir bff_${task.index} --assignmentOutBff $assignmentOutBff \ - ${run_preprocess} --barcodeWhitelist $barcodeWhitelist - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow bff_hashing { - take: - hto_matrix - main: - methods = split_input(params.methods) - methodsForConsensus = split_input(params.methodsForConsensus) - cellbarcodeWhitelist = split_input(params.cellbarcodeWhitelist) - metricsFile = split_input(params.metricsFile) - doTSNE = split_input(params.doTSNE) - doHeatmap = split_input(params.doHeatmap) - perCellSaturation = split_input(params.perCellSaturation) - majorityConsensusThreshold = split_input(params.majorityConsensusThreshold) - chemistry = split_input(params.chemistry) - callerDisagreementThreshold = split_input(params.callerDisagreementThreshold) - assignmentOutBff = split_input(params.assignmentOutBff) - preprocess_bff = split_input(params.preprocess_bff) - barcodeWhitelist = split_input(params.barcodeWhitelist) - - bff(hto_matrix, methods, methodsForConsensus, cellbarcodeWhitelist, metricsFile, doTSNE, doHeatmap, perCellSaturation, majorityConsensusThreshold, chemistry, callerDisagreementThreshold, assignmentOutBff, preprocess_bff, barcodeWhitelist) - - emit: - bff.out.collect() -} - -workflow { - bff_hashing() -} diff --git a/modules/single/hash_demulti/demuxem.nf b/modules/single/hash_demulti/demuxem.nf deleted file mode 100644 index ba81034..0000000 --- a/modules/single/hash_demulti/demuxem.nf +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process demuxem { - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/demuxem", mode:'copy' - label 'small_mem' - - conda 'bioconda::pegasuspy demuxEM scanpy' - - input: - path raw_rna_matrix_dir, stageAs: "rna_data_${params.rna_matrix_demuxem}" - path raw_hto_matrix_dir, stageAs: "hto_data_${params.hto_matrix_demuxem}" - val threads - each alpha - each alpha_noise - each tol - each min_num_genes - each min_num_umis - each min_signal - each random_state - each generate_gender_plot - val objectOutDemuxem - each filter_demuxem - output: - path "demuxem_${task.index}" - - script: - def generateGenderPlot = generate_gender_plot != 'None' ? " --generateGenderPlot ${generate_gender_plot}" : '' - """ - mkdir demuxem_${task.index} - demuxem.py --rna_matrix_dir rna_data_${params.rna_matrix_demuxem} --hto_matrix_dir hto_data_${params.hto_matrix_demuxem} \ - --randomState $random_state --min_signal $min_signal --tol $tol --min_num_genes $min_num_genes --min_num_umis $min_num_umis \ - --alpha $alpha --alpha_noise $alpha_noise --n_threads $threads $generateGenderPlot --objectOutDemuxem $objectOutDemuxem \ - --outputdir demuxem_${task.index} --filter_demuxem $filter_demuxem - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow demuxem_hashing { - take: - hto_matrix - rna_matrix - main: - threads = params.threads_demuxem - alpha = split_input(params.alpha_demuxem) - alpha_noise = split_input(params.alpha_noise) - min_num_genes = split_input(params.min_num_genes) - min_num_umis = split_input(params.min_num_umis) - min_signal = split_input(params.min_signal) - tol = split_input(params.tol) - random_state = split_input(params.random_state) - generate_gender_plot = split_input(params.generate_gender_plot) - objectOutDemuxem = params.objectOutDemuxem - filter_demuxem = split_input(params.filter_demuxem) - - demuxem(rna_matrix, hto_matrix, threads, alpha, alpha_noise, tol, min_num_genes, min_num_umis, - min_signal, random_state, generate_gender_plot, objectOutDemuxem, filter_demuxem) - - emit: - demuxem.out.collect() -} diff --git a/modules/single/hash_demulti/gmm_demux.nf b/modules/single/hash_demulti/gmm_demux.nf deleted file mode 100644 index 9febe1b..0000000 --- a/modules/single/hash_demulti/gmm_demux.nf +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - - -process gmm_demux{ - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/gmm_demux", mode:'copy' - - label 'small_mem' - conda "$projectDir/conda/gmm_demux.yml" - - input: - path filtered_hto_matrix_dir - //HTO names as string separated by commas - val hto_name_gmm - //mode 2 - //need estimate number of cells in the single cell assay - //obligatory - val summary - //need to be combined with summary to get a report as file - val report_gmm - //mode 4 - // write csv or tsv - type of input - val mode_GMM - //case 5 - val extract - //float between 0 and 1 - val threshold_gmm - val ambiguous - - output: - - path "gmm_demux_${task.index}" - - - script: - def extract_droplets = extract != 'None' ? " -x ${extract}" : '' - def ambiguous_droplets = extract != 'None' ? " --ambiguous ${ambiguous}" : '' - - if (mode_GMM == 'csv') { - """ - mkdir gmm_demux_${task.index} - touch gmm_demux_${task.index}_$report_gmm - - GMM-demux -c $filtered_hto_matrix_dir $hto_name_gmm -u $summary --report gmm_demux_${task.index}_$report_gmm --full gmm_demux_${task.index} $extract_droplets -t $threshold_gmm - gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${task.index}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${task.index} - - """ - }else { - """ - mkdir gmm_demux_${task.index} - touch gmm_demux_${task.index}_$report_gmm - - GMM-demux $filtered_hto_matrix_dir $hto_name_gmm -u $summary -r gmm_demux_${task.index}_$report_gmm --full gmm_demux_${task.index} -o gmm_demux_${task.index} $extract_droplets -t $threshold_gmm - gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${task.index}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${task.index} - - """ - } -} - -workflow gmm_demux_hashing { -take: - hto_matrix - main: - hto_name_gmm = params.hto_name_gmm - summary = params.summary - report_gmm = params.report_gmm - mode = params.mode_GMM - extract = params.extract - threshold_gmm = params.threshold_gmm - ambiguous = params.ambiguous - - - gmm_demux(hto_matrix,hto_name_gmm,summary,report_gmm,mode,extract,threshold_gmm,ambiguous) - - emit: - gmm_demux.out.collect() -} - -workflow { - gmm_demux_hashing() -} diff --git a/modules/single/hash_demulti/hashedDrops.nf b/modules/single/hash_demulti/hashedDrops.nf deleted file mode 100755 index ec4bd43..0000000 --- a/modules/single/hash_demulti/hashedDrops.nf +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl=2 - -process hashedDrops{ - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/hashedDrops", mode:'copy' - label 'small_mem' - - conda "conda-forge::r-seurat conda-forge::r-argparse bioconda::bioconductor-dropletutils" - - input: - path raw_hto_matrix_dir - each lower - each niters - each testAmbient - each ignore - each alpha - each round - each byRank - each isCellFDR - val objectOutEmptyDrops - val assignmentOutEmptyDrops - each runEmptyDrops - - each ambient - each minProp - each pseudoCount - each constantAmbient - each doubletNmads - each doubletMin - each doubletMixture - each confidentNmads - each confidenMin - each combinations - val objectOutHashedDrops - val assignmentOutHashedDrops - each gene_col - output: - path "hashedDrops_${task.index}" - - script: - def testAmb = testAmbient != 'False' ? " --testAmbient" : '' - def rou = round != 'False' ? " --round" : '' - def constantAmb = constantAmbient != 'False' ? " --constantAmbient" : '' - def doubletMix = doubletMixture != 'False' ? " --doubletMixture" : '' - def ign = ignore != "None" ? " --ignore ${ignore}" : '' - def alp = alpha != "None" ? " --alpha ${alpha}" : '' - def byR = byRank != "None" ? " --by.rank ${byRank}" : '' - def amb = ambient != 'False' ? " --ambient" : '' - def run_empty = runEmptyDrops != 'False' ? " --runEmptyDrops" : '' - def comb = combinations != "None" ? " --combinations ${combinations}" : '' - - """ - mkdir hashedDrops_${task.index} - dropletUtils.R --raw_hto_matrix_dir $raw_hto_matrix_dir --lower $lower --niters $niters --isCellFDR $isCellFDR --objectOutEmptyDrops $objectOutEmptyDrops --assignmentOutEmptyDrops $assignmentOutEmptyDrops --minProp $minProp --pseudoCount $pseudoCount --doubletNmads $doubletNmads --doubletMin $doubletMin --confidentNmads $confidentNmads --confidenMin $confidenMin --objectOutHashedDrops $objectOutHashedDrops --outputdir hashedDrops_${task.index} --assignmentOutHashedDrops ${assignmentOutHashedDrops}${testAmb}${ign}${alp}${rou}${byR}${constantAmb}${doubletMix}${amb}${comb}${run_empty} --gene_col $gene_col - """ - -} - - -def split_input(input){ - if (input =~ /;/ ){ - Channel.from(input).map{ return it.tokenize(';')}.flatten() - } - else{ - Channel.from(input) - } -} - - -workflow hashedDrops_hashing{ - take: - hto_matrix - main: - lower = split_input(params.lower) - niters = split_input(params.niters) - testAmbient = split_input(params.testAmbient) - ignore = split_input(params.ignore_hashedDrops) - alpha = split_input(params.alpha_hashedDrops) - round = split_input(params.round) - byRank = split_input(params.byRank) - isCellFDR = split_input(params.isCellFDR) - objectOutEmptyDrops = params.objectOutEmptyDrops - assignmentOutEmptyDrops = params.assignmentOutEmptyDrops - runEmptyDrops = split_input(params.runEmptyDrops) - - ambient = split_input(params.ambient) - minProp = split_input(params.minProp) - pseudoCount = split_input(params.pseudoCount) - constantAmbient = split_input(params.constantAmbient) - doubletNmads = split_input(params.doubletNmads) - doubletMin = split_input(params.doubletMin) - doubletMixture = split_input(params.doubletMixture) - confidentNmads = split_input(params.confidentNmads) - confidenMin = split_input(params.confidenMin) - combinations = split_input(params.combinations) - objectOutHashedDrops = params.objectOutHashedDrops - assignmentOutHashedDrops = params.assignmentOutHashedDrops - gene_col = split_input(params.gene_col) - - hashedDrops(hto_matrix, lower, niters, testAmbient, ignore, alpha, round, byRank, isCellFDR, objectOutEmptyDrops, assignmentOutEmptyDrops,runEmptyDrops, ambient, minProp, pseudoCount, constantAmbient, doubletNmads, doubletMin, doubletMixture, confidentNmads, confidenMin, combinations, objectOutHashedDrops, assignmentOutHashedDrops, gene_col) - - emit: - hashedDrops.out.collect() -} - -workflow{ - hashedDrops_hashing() -} diff --git a/modules/single/hash_demulti/hashsolo.nf b/modules/single/hash_demulti/hashsolo.nf deleted file mode 100755 index d0ad851..0000000 --- a/modules/single/hash_demulti/hashsolo.nf +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 -process hash_solo { - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/hashsolo", mode:'copy' - label 'small_mem' - - conda "$projectDir/conda/hashsolo_py.yml" - - input: - path hto_data, stageAs: "hto_data_${params.hto_matrix_hashsolo}" - each priors_negative - each priors_singlet - each priors_doublet - each pre_existing_clusters - path rna_data, stageAs: "rna_data_${params.rna_matrix_hashsolo}" - val use_rna_data - each number_of_noise_barcodes - val assignmentOutHashSolo - val plotOutHashSolo - - output: - path "hashsolo_${task.index}" - - script: - def noise_barcodes = number_of_noise_barcodes != 'None' ? "--number_of_noise_barcodes $number_of_noise_barcodes" : '' - def existing_clusters = pre_existing_clusters != 'None' ? "--pre_existing_clusters $pre_existing_clusters" : '' - def clustering_data = use_rna_data != 'False' ? "--clustering_data rna_data_${params.rna_matrix_hashsolo}" : '' - """ - mkdir hashsolo_${task.index} - hashsolo.py --hto_data hto_data_${params.hto_matrix_hashsolo} --priors $priors_negative $priors_singlet $priors_doublet \ - $existing_clusters $clustering_data $noise_barcodes \ - --assignmentOutHashSolo $assignmentOutHashSolo \ - --plotOutHashSolo $plotOutHashSolo --outputdir hashsolo_${task.index} - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow hash_solo_hashing { - take: - hto_matrix - rna_matrix - main: - use_rna_data = split_input(params.use_rna_data) - priors_negative = split_input(params.priors_negative) - priors_singlet = split_input(params.priors_singlet) - priors_doublet = split_input(params.priors_doublet) - pre_existing_clusters = split_input(params.pre_existing_clusters) - number_of_noise_barcodes = split_input(params.number_of_noise_barcodes) - assignmentOutHashSolo = params.assignmentOutHashSolo - plotOutHashSolo = params.plotOutHashSolo - - hash_solo(hto_matrix, priors_negative, priors_singlet, priors_doublet, pre_existing_clusters, rna_matrix, use_rna_data, number_of_noise_barcodes, assignmentOutHashSolo, plotOutHashSolo) - emit: - hash_solo.out.collect() -} diff --git a/modules/single/hash_demulti/htodemux.nf b/modules/single/hash_demulti/htodemux.nf deleted file mode 100644 index c211ff3..0000000 --- a/modules/single/hash_demulti/htodemux.nf +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process htodemux { - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/htodemux", mode: 'copy' - label 'small_mem' - - conda 'conda-forge::r-seurat conda-forge::r-argparse' - - input: - each seurat_object - val assay - each quantile - each kfunc - each nstarts - each nsamples - each seed - each init - val objectOutHTO - val assignmentOutHTO - - //Ridge plot params - each ridgePlot - each ridgeNCol - //Scatter features params - each featureScatter - each scatterFeat1 - each scatterFeat2 - //Violin plot params - each vlnplot - each vlnFeatures - each vlnLog - //tSNE - each tsne - each tsneIdents - each tsneInvert - each tsneVerbose - each tsneApprox - each tsneDimMax - each tsnePerplexity - //Heatmap - each heatmap - each heatmapNcells - - output: - path "htodemux_${task.index}" - - script: - def init_val = init != 'None' ? " --init $init" : '' - def vln_log = vlnLog != 'False' ? '--vlnLog' : '' - def invert = tsneInvert != 'False' ? '--tSNEInvert' : '' - def verbose = tsneVerbose != 'False' ? '--tSNEVerbose' : '' - def approx = tsneApprox != 'False' ? '--tSNEApprox' : '' - - """ - mkdir htodemux_${task.index} - HTODemux.R --seuratObject $seurat_object --assay $assay --quantile $quantile --kfunc $kfunc --nstarts $nstarts --nsamples $nsamples --seed $seed $init_val --objectOutHTO $objectOutHTO --assignmentOutHTO $assignmentOutHTO --outputdir htodemux_${task.index} - HTODemux-visualisation.R --hashtagPath htodemux_${task.index}/${objectOutHTO}.rds --assay $assay --ridgePlot $ridgePlot --ridgeNCol $ridgeNCol --featureScatter $featureScatter --scatterFeat1 $scatterFeat1 --scatterFeat2 $scatterFeat2 --vlnPlot $vlnplot --vlnFeatures $vlnFeatures $vln_log --tSNE $tsne --tSNEIdents $tsneIdents $invert $verbose $approx --tSNEDimMax $tsneDimMax --tSNEPerplexity $tsnePerplexity --heatMap $heatmap --heatMapNcells $heatmapNcells --outputdir htodemux_${task.index} - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow htodemux_hashing { - take: - seurat_object - main: - quantile = split_input(params.quantile_htodemux) - assay = params.assay - kfunc = split_input(params.kfunc) - nstarts = split_input(params.nstarts) - nsamples = split_input(params.nsamples_clustering) - seed = split_input(params.seed) - init = split_input(params.init) - objectOutHTO = params.objectOutHTO - assignmentOutHTO = params.assignmentOutHTO - - ridgePlot = split_input(params.ridgePlot) - ridgeNCol = split_input(params.ridgeNCol) - featureScatter = split_input(params.featureScatter) - scatterFeat1 = split_input(params.scatterFeat1) - scatterFeat2 = split_input(params.scatterFeat2) - vlnplot = split_input(params.vlnplot) - vlnFeatures = split_input(params.vlnFeatures) - vlnLog = split_input(params.vlnLog) - - tsne = split_input(params.tsne) - tsneIdents = split_input(params.tsneIdents) - tsneInvert = split_input(params.tsneInvert) - tsneVerbose = split_input(params.tsneVerbose) - tsneApprox = split_input(params.tsneApprox) - tsneDimMax = split_input(params.tsneDimMax) - tsnePerplexity = split_input(params.tsnePerplexity) - heatmap = split_input(params.heatmap) - heatmapNcells = split_input(params.heatmapNcells) - - htodemux(seurat_object, assay, quantile, kfunc, nstarts, nsamples, seed, init, objectOutHTO, assignmentOutHTO, ridgePlot, ridgeNCol, featureScatter, scatterFeat1, scatterFeat2, vlnplot, vlnFeatures, vlnLog, tsne, tsneIdents, tsneInvert, tsneVerbose, tsneApprox, tsneDimMax, tsnePerplexity, heatmap, heatmapNcells) - - emit: - htodemux.out.collect() -} - -workflow { - htodemux_hashing() -} diff --git a/modules/single/hash_demulti/multiseq.nf b/modules/single/hash_demulti/multiseq.nf deleted file mode 100644 index 12795f0..0000000 --- a/modules/single/hash_demulti/multiseq.nf +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -process multi_seq { - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/multiseq", mode:'copy' - label 'small_mem' - - conda 'conda-forge::r-seurat conda-forge::r-argparse' - - input: - each rdsObject - each quantile - each autoThresh - each maxiter - each qrangeFrom - each qrangeTo - each qrangeBy - each verbose - val assay - val objectOutMulti - val assignmentOutMulti - - output: - path "multiseq_${task.index}" - - script: - def autoThr = autoThresh != 'False' ? ' --autoThresh' : '' - def verb = verbose != 'False' ? ' --verbose' : '' - - """ - mkdir multiseq_${task.index} - MultiSeq.R --seuratObjectPath $rdsObject --assay $assay --quantile $quantile $autoThr --maxiter $maxiter --qrangeFrom $qrangeFrom --qrangeTo $qrangeTo --qrangeBy $qrangeBy $verb --objectOutMulti $objectOutMulti --assignmentOutMulti $assignmentOutMulti --outputdir multiseq_${task.index} - """ -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow multiseq_hashing { - take: - rdsObject - main: - quantile = split_input(params.quantile_multi) - autoThresh = split_input(params.autoThresh) - maxIter = split_input(params.maxiter) - qrangeFrom = split_input(params.qrangeFrom) - qrangeTo = split_input(params.qrangeTo) - qrangeBy = split_input(params.qrangeBy) - verbose = split_input(params.verbose_multiseq) - assay = params.assay - objectOutMulti = params.objectOutMulti - assignmentOutMulti = params.assignmentOutMulti - multi_seq(rdsObject, quantile, autoThresh, maxIter, qrangeFrom, qrangeTo, qrangeBy, verbose, assay, objectOutMulti, assignmentOutMulti) - emit: - multi_seq.out.collect() -} diff --git a/modules/single/hash_demulti/preprocess.nf b/modules/single/hash_demulti/preprocess.nf deleted file mode 100644 index ecaeed7..0000000 --- a/modules/single/hash_demulti/preprocess.nf +++ /dev/null @@ -1,63 +0,0 @@ -process preprocess { - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/preprocess", mode:'copy' - label 'small_mem' - - conda 'conda-forge::r-seurat conda-forge::r-argparse' - - input: - path hto_matrix, stageAs: 'hto_data' - path umi_matrix, stageAs: 'rna_data' - val hto_raw_or_filtered - val rna_raw_or_filtered - val ndelim - each selection_method - each number_features - val assay - each margin - val normalisation_method - val preprocess_out - each gene_col - - output: - path "preprocess_${task.index}_hto_${hto_raw_or_filtered}_rna_${rna_raw_or_filtered}" - - script: - - """ - mkdir preprocess_${task.index}_hto_${hto_raw_or_filtered}_rna_${rna_raw_or_filtered} - pre_processing.R --fileUmi rna_data --fileHto hto_data --ndelim $ndelim \ - --selectMethod $selection_method --numberFeatures $number_features --assay $assay \ - --margin $margin --normalisationMethod $normalisation_method --OutputFile $preprocess_out \ - --outputdir preprocess_${task.index}_hto_${hto_raw_or_filtered}_rna_${rna_raw_or_filtered} --gene_col $gene_col - """ - -} - -def split_input(input) { - if (input =~ /;/) { - Channel.from(input).map { return it.tokenize(';') }.flatten() - } - else { - Channel.from(input) - } -} - -workflow preprocessing_hashing { - take: - hto_matrix - rna_matrix - hto_raw_or_filtered - rna_raw_or_filtered - main: - sel_method = split_input(params.sel_method) - ndelim = params.ndelim - n_features = split_input(params.n_features) - assay = params.assay - margin = split_input(params.margin) - norm_method = split_input(params.norm_method) - out_file = params.preprocessOut - gene_col = split_input(params.gene_col) - preprocess(hto_matrix, rna_matrix, hto_raw_or_filtered, rna_raw_or_filtered, ndelim, sel_method, n_features, assay, margin, norm_method, out_file, gene_col) - emit: - preprocess.out.collect() -} diff --git a/modules/single/hash_demultiplexing.nf b/modules/single/hash_demultiplexing.nf deleted file mode 100644 index 6a5088d..0000000 --- a/modules/single/hash_demultiplexing.nf +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 -include { preprocessing_hashing as preprocessing_hashing_htodemux } from './hash_demulti/preprocess' -include { preprocessing_hashing as preprocessing_hashing_multiseq } from './hash_demulti/preprocess' -include { multiseq_hashing } from './hash_demulti/multiseq' -include { htodemux_hashing } from './hash_demulti/htodemux' -include { hash_solo_hashing } from './hash_demulti/hashsolo' -include { hashedDrops_hashing } from './hash_demulti/hashedDrops' -include { demuxem_hashing } from './hash_demulti/demuxem' -include { gmm_demux_hashing } from './hash_demulti/gmm_demux' -include { bff_hashing } from './hash_demulti/bff' - -process summary { - publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti", mode: 'copy' - label 'small_mem' - - conda 'pandas scanpy mudata' - - input: - val demuxem_result - val hashsolo_result - val htodemux_result - val multiseq_result - val hashedDrops_result - val gmmDemux_result - val bff_result - val generate_anndata - val generate_mudata - path rna_matrix, stageAs: 'rna_data' - path hto_matrix, stageAs: 'hto_data' - - output: - path hash_summary - - script: - def demuxem_files = '' - def htodemux_files = '' - def hashsolo_files = '' - def multiseq_files = '' - def hashedDrops_files = '' - def gmmDemux_files = '' - def bff_files = '' - def generate_adata = '' - def generate_mdata = '' - - if (demuxem_result != 'no_result') { - demuxem_files = "--demuxem ${demuxem_result.join(':')}" - } - if (hashsolo_result != 'no_result') { - hashsolo_files = "--hashsolo ${hashsolo_result.join(':')}" - } - if (htodemux_result != 'no_result') { - htodemux_files = "--htodemux ${htodemux_result.join(':')}" - } - if (multiseq_result != 'no_result') { - multiseq_files = "--multiseq ${multiseq_result.join(':')}" - } - if (hashedDrops_result != 'no_result') { - hashedDrops_files = "--hashedDrops ${hashedDrops_result.join(':')}" - } - if (gmmDemux_result != 'no_result') { - gmmDemux_files = "--gmm_demux ${gmmDemux_result.join(':')}" - } - if (bff_result != 'no_result') { - bff_files = "--bff ${bff_result.join(':')}" - } - if (generate_anndata == 'True') { - if (rna_matrix.name == 'None') { - error 'Error: RNA count matrix is not given.' - } - generate_adata = '--generate_anndata --read_rna_mtx rna_data' - } - if (generate_mudata == 'True') { - if (rna_matrix.name == 'None') { - error 'Error: RNA count matrix is not given.' - } - if (hto_matrix.name == 'None') { - error 'Error: HTO count matrix is not given.' - } - generate_mdata = '--generate_mudata --read_rna_mtx rna_data --read_hto_mtx hto_data' - } - - """ - summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $gmmDemux_files $bff_files $generate_adata $generate_mdata - """ -} - -workflow hash_demultiplexing { - take: - rna_matrix_raw - rna_matrix_filtered - hto_matrix_raw - hto_matrix_filtered - main: - - if (params.htodemux == 'True') { - rna_matrix = params.rna_matrix_htodemux == 'raw' ? rna_matrix_raw : rna_matrix_filtered - hto_matrix = params.hto_matrix_htodemux == 'raw' ? hto_matrix_raw : hto_matrix_filtered - preprocessing_hashing_htodemux(hto_matrix, rna_matrix, params.hto_matrix_htodemux, params.rna_matrix_htodemux) - htodemux_preprocess_out = preprocessing_hashing_htodemux.out - htodemux_hashing(htodemux_preprocess_out) - htodemux_out = htodemux_hashing.out - } - else { - htodemux_out = channel.value('no_result') - } - - if (params.multiseq == 'True') { - if (params.htodemux == 'True' & params.hto_matrix_htodemux == params.hto_matrix_multiseq & - params.rna_matrix_htodemux == params.rna_matrix_multiseq) { - multiseq_preprocess_out = htodemux_preprocess_out - } - else { - rna_matrix = params.rna_matrix_multiseq == 'raw' ? rna_matrix_raw : rna_matrix_filtered - hto_matrix = params.hto_matrix_multiseq == 'raw' ? hto_matrix_raw : hto_matrix_filtered - preprocessing_hashing_multiseq(hto_matrix, rna_matrix, params.hto_matrix_multiseq, params.rna_matrix_multiseq) - multiseq_preprocess_out = preprocessing_hashing_multiseq.out - } - multiseq_hashing(multiseq_preprocess_out) - multiseq_out = multiseq_hashing.out - } - else { - multiseq_out = channel.value('no_result') - } - - if (params.hashsolo == 'True') { - hashsolo_hto_input = params.hto_matrix_hashsolo == 'raw' ? hto_matrix_raw : hto_matrix_filtered - hashsolo_rna_input = params.rna_matrix_hashsolo == 'False' ? channel.value('None') : - (params.rna_matrix_hashsolo == 'raw' ? rna_matrix_raw : rna_matrix_filtered) - hash_solo_hashing(hashsolo_hto_input, hashsolo_rna_input) - hashsolo_out = hash_solo_hashing.out - } - else { - hashsolo_out = channel.value('no_result') - } - - if (params.demuxem == 'True') { - demuxem_hto_input = params.hto_matrix_demuxem == 'raw' ? hto_matrix_raw : hto_matrix_filtered - demuxem_rna_input = params.rna_matrix_demuxem == 'raw' ? rna_matrix_raw : rna_matrix_filtered - demuxem_hashing(demuxem_hto_input, demuxem_rna_input) - demuxem_out = demuxem_hashing.out - } - else { - demuxem_out = channel.value('no_result') - } - - if (params.hashedDrops == 'True') { - hashedDrops_hto_input = params.hto_matrix_hashedDrops == 'raw' ? hto_matrix_raw : hto_matrix_filtered - hashedDrops_hashing(hashedDrops_hto_input) - hashedDrops_out = hashedDrops_hashing.out - } - else { - hashedDrops_out = channel.value('no_result') - } - if (params.bff == 'True') { - bff_hto_input = params.hto_matrix_bff == 'raw' ? hto_matrix_raw : hto_matrix_filtered - bff_hashing(bff_hto_input) - bff_out = bff_hashing.out - } - else { - bff_out = channel.value('no_result') - } - if (params.gmmDemux == 'True') { - gmmDemux_hto_input = params.hto_matrix_gmm_demux == 'raw' ? hto_matrix_raw : hto_matrix_filtered - gmm_demux_hashing(gmmDemux_hto_input) - gmmDemux_out = gmm_demux_hashing.out - } - else { - gmmDemux_out = channel.value('no_result') - } - - summary(demuxem_out, hashsolo_out, htodemux_out, multiseq_out, hashedDrops_out, gmmDemux_out, bff_out, - params.generate_anndata, params.generate_mudata, rna_matrix_filtered, hto_matrix_filtered) - emit: - summary.out -} diff --git a/nextflow.config b/nextflow.config index 9426fbc..922a252 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,468 +1,507 @@ -params { - outdir = "result" - mode = "rescue" - generate_anndata = "True" - generate_mudata = "False" - multi_input = null // run only one sample - - // hashing-based deconvolution - // key input - hto_matrix_raw = "None" - hto_matrix_filtered = "None" - rna_matrix_raw = "None" - rna_matrix_filtered = "None" - - // preprocessing - sel_method = "mean.var.plot" - ndelim = "_" - n_features = "2000" - assay = "HTO" - margin = 2 - norm_method = "CLR" - preprocessOut = "preprocessed" - gene_col = 2 - - // htodemux - htodemux = "True" - rna_matrix_htodemux = "filtered" - hto_matrix_htodemux = "filtered" - quantile_htodemux = "0.99" - kfunc = "clara" - nstarts = 100 - nsamples_clustering = 100 - seed = 42 - init = "None" - objectOutHTO = "htodemux" - assignmentOutHTO = "htodemux" - - // htodemux_visualization - ridgePlot = "True" - ridgeNCol = 3 - featureScatter = "False" - scatterFeat1 = "None" - scatterFeat2 = "None" - vlnplot = "True" - vlnFeatures = "nCount_RNA" - vlnLog = "True" - tsne = "True" - tsneIdents = "Negative" - tsneInvert = "True" - tsneVerbose = "False" - tsneApprox = "False" - tsneDimMax = 2 - tsnePerplexity = 100 - heatmap = "True" - heatmapNcells= 500 - - // multiseq - multiseq = "True" - rna_matrix_multiseq = "filtered" - hto_matrix_multiseq = "filtered" - quantile_multi = 0.7 - autoThresh = "True" - maxiter = 5 - qrangeFrom = 0.1 - qrangeTo = 0.9 - qrangeBy = 0.05 - verbose_multiseq = "True" - objectOutMulti = "multiseq" - assignmentOutMulti = "multiseq" - - // hashsolo - hashsolo = "True" - hto_matrix_hashsolo = "raw" - rna_matrix_hashsolo = "raw" - use_rna_data = "False" - priors_negative = 1/3 - priors_singlet = 1/3 - priors_doublet = 1/3 - pre_existing_clusters = "None" - number_of_noise_barcodes = "None" - assignmentOutHashSolo = "hashsolo" - plotOutHashSolo = "hashsolo" - - // hashedDrops - hashedDrops = "True" - hto_matrix_hashedDrops = "raw" - lower = 100 - niters = 10000 - testAmbient = "True" - ignore_hashedDrops = "None" - alpha_hashedDrops = "None" - round = "True" - byRank = "None" - isCellFDR = 0.01 - objectOutEmptyDrops = "emptyDroplets" - assignmentOutEmptyDrops = "emptyDroplets" - ambient = "True" - minProp = 0.05 - pseudoCount = 5 - constantAmbient = "False" - doubletNmads = 3 - doubletMin = 2 - doubletMixture = "False" - confidentNmads = 3 - confidenMin = 2 - combinations = "None" - objectOutHashedDrops = "hashedDrops" - assignmentOutHashedDrops = "hashedDrops" - runEmptyDrops = "True" - - // demuxem - demuxem = "True" - rna_matrix_demuxem = "raw" - hto_matrix_demuxem = "raw" - threads_demuxem = 2 - alpha_demuxem = 0.0 - alpha_noise = 1.0 - min_num_genes = 100 - min_num_umis = 100 - min_signal = 10.0 - tol = 1e-6 - random_state = 0 - generate_gender_plot = "None" - objectOutDemuxem = "demuxem_res" - filter_demuxem = "True" - - // gmm-demux - gmmDemux = "True" - hto_matrix_gmm_demux = "filtered" - assignmentOutGmmDemux = "gmm_demux" - hto_name_gmm = "None" - summary = 2000 - report_gmm = "report.txt" - mode_GMM = "tsv" - extract = 'None' - threshold_gmm = 0.8 - ambiguous = 0.05 - - // demuxmix - demuxmix = "False" - rna_matrix_demuxmix = "raw" - hto_matrix_demuxmix = "raw" - assignmentOutDemuxmix = "demuxmix" - demuxmix_preprocess = "True" - correctTails = "TRUE" - rna_available = 'TRUE' - model = 'naive' - alpha_demuxmix = 0.9 - beta_demuxmix = 0.9 - tol_demuxmix = 0.00001 - maxIter_demuxmix = 1000 - k_hto = 1.5 - k_rna = 1.5 - - // bff - bff = "True" - rna_matrix_bff = "raw" - hto_matrix_bff = "raw" - assignmentOutBff = "bff" - bff_preprocess = "True" - methods = "combined_bff" - methodsForConsensus = 'NULL' - cellbarcodeWhitelist = 'NULL' - metricsFile = 'metrics_bff.csv' - doTSNE = "True" - doHeatmap = "True" - perCellSaturation = 'NULL' - majorityConsensusThreshold = 'NULL' - chemistry = "10xV3" - callerDisagreementThreshold = 'NULL' - preprocess_bff = "FALSE" - barcodeWhitelist = 'NULL' - - // genetics-based deconvolution - // key input - bam = "None" - bai = "None" - barcodes = "None" - fasta = "None" - fasta_index = "None" - nsamples_genetic = 2 - common_variants_scSplit = "None" - common_variants_souporcell = "None" - common_variants_freemuxlet = "None" - common_variants_cellsnp = "None" - vcf_mixed = "None" - vcf_donor = "None" - celldata = "None" - - // demuxlet and freemuxlet - demuxlet = "True" - demuxlet_preprocess = "False" - tag_group = "CB" - tag_UMI = "UB" - sm = "None" - sm_list = "None" - sam_verbose = 1000000 - vcf_verbose = 10000 - skip_umi = "False" - cap_BQ = 40 - min_BQ = 13 - min_MQ = 20 - min_TD = 0 - excl_flag = 3844 - min_total = 0 - min_uniq = 0 - min_snp = 0 - min_umi = 0 - plp = "False" - field = "GT" - geno_error_offset = 0.1 - geno_error_coeff = 0.0 - r2_info = "R2" - min_mac = 1 - min_callrate = 0.50 - alpha = "0.5" // must be quoted - doublet_prior = 0.5 - demuxlet_out = "demuxlet_res" - - // freemuxlet - freemuxlet = "True" - freemuxlet_preprocess = "False" - init_cluster = "None" - aux_files = "False" - verbose = 100 - bf_thres = 5.41 - frac_init_clust = 0.5 - iter_init = 10 - keep_init_missing = "False" - freemuxlet_out = "freemuxlet_out" - - // vireo - vireo = "True" - vireo_preprocess = "False" - vireo_variant = "True" - genoTag = "GT" - noDoublet = "False" - nInit = 50 - extraDonor = 0 - extraDonorMode = "distance" - forceLearnGT = "False" - ASEmode = "False" - noPlot = "False" - randSeed = "None" - cellRange = "all" - callAmbientRNAs = "False" - nproc = 2 - vireo_out = "vireo_out" - - // souporcell - souporcell = "True" - souporcell_preprocess = "False" - threads = 5 - ploidy = 2 - min_alt = 10 - min_ref = 10 - max_loci = 2048 - restarts = "None" - use_known_genotype = "False" - known_genotypes_sample_names = "None" - skip_remap = "True" - ignore = "False" - souporcell_out = "souporcell_out" - - // scSplit - scSplit = "True" - scSplit_preprocess = "False" - scSplit_variant = "True" - refscSplit = "ref_filtered.csv" - altscSplit = "alt_filtered.csv" - subscSplit = 10 - emsscSplit = 30 - dblscSplit = "None" - sample_geno = "False" - scsplit_out = "scsplit_out" - - // freebayes - stdin = "False" - targets = "None" - region = "None" - samples= "None" - populations = "None" - cnv_map = "None" - vcf_freebayes = "vcf_freebayes_output.vcf" - gvcf = "False" - gvcf_chunk = "None" - gvcf_dont_use_chunk = "None" - variant_input = "None" - only_use_input_alleles = "False" - haplotype_basis_alleles = "None" - report_all_haplotype_alleles = "False" - report_monomorphic = "False" - pvar = 0.0 - strict_vcf = "False" - - theta = 0.001 - pooled_discrete = "False" - pooled_continuous = "False" - use_reference_allele = "False" - reference_quality = "100,60" - no_snps = "False" - no_indels = "True" - no_mnps = "True" - no_complex = "True" - use_best_n_alleles = 0 - haplotype_length = 3 - min_repeat_size = 5 - min_repeat_entropy = 1 - no_partial_observations = "False" - - dont_left_align_indels = "False" - use_duplicate_reads = "False" - min_mapping_quality = 1 - min_base_quality = 1 - min_supporting_allele_qsum = 0 - min_supporting_mapping_qsum = 0 - mismatch_base_quality_threshold = 10 - read_mismatch_limit = "None" - read_max_mismatch_fraction = 1.0 - read_snp_limit = "None" - read_indel_limit = "None" - standard_filters = "False" - min_alternate_fraction = 0.05 - min_alternate_count = 2 - min_alternate_qsum = 0 - min_alternate_total = 1 - min_coverage = 0 - max_coverage = "None" - no_population_priors = "False" - hwe_priors_off = "False" - binomial_obs_priors_off = "False" - allele_balance_priors_off = "False" - observation_bias = "None" - base_quality_cap = "None" - prob_contamination = 0.000000001 - legacy_gls= "False" - contamination_estimates = "None" - report_genotype_likelihood_max = "False" - genotyping_max_iterations = 1000 - genotyping_max_banddepth = 6 - posterior_integration_limits = "1,3" - exclude_unobserved_genotypes = "False" - genotype_variant_threshold = "None" - use_mapping_quality = "False" - harmonic_indel_quality = "False" - read_dependence_factor = 0.9 - genotype_qualities= "False" - debug = "False" - dd = "False" - // cellsnp - targetsVCF = "None" - sampleList = "None" - sampleIDs = "None" - genotype_cellSNP = "True" - gzip_cellSNP = "True" - printSkipSNPs = "False" - nproc_cellSNP = 2 - refseq_cellSNP = "None" - chrom = "None" - cellTAG = "CB" - UMItag = "Auto" - minCOUNT = 20 - minMAF = 0.1 - doubletGL = "False" - inclFLAG = "None" - exclFLAG = "None" - minLEN = 30 - minMAPQ = 20 - maxDEPTH = 0 - countORPHAN = "False" - cellsnp_out = "cellSNP_out" - - // donor matching - match_donor = "True" - demultiplexing_result = "None" - match_donor_method1 = "None" - match_donor_method2 = "None" - findVariants = "False" - variant_count = 10 - variant_pct = 0.9 - vireo_parent_dir = "None" -} - -profiles { - standard { - process { - executor = 'local' - withLabel: big_mem { - cpus = 4 - memory = 10.GB - } - withLabel: small_mem { - cpus = 2 - memory = 8.GB - } - } - - } - cluster { - process { - executor = 'slurm' - maxRetries = 3 - // queue = ... - // clusterOptions = ... - withLabel: big_mem { - cpus = 32 - memory = 64.GB - } - withLabel: small_mem { - cpus = 16 - memory = 32.GB - } - } - } - conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - docker { - docker.enabled = true - docker.registry = 'quay.io' - docker.userEmulation = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' - } - singularity { - singularity.enabled = true - singularity.runOptions = "--bind $PWD" - singularity.cacheDir = "$PWD" - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - test { - includeConfig 'test.config' - } - conda_singularity { - singularity.enabled = true - singularity.runOptions = "--bind $PWD" - singularity.cacheDir = "$PWD" - conda.enabled = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } -} - -process { - echo = true - debug = true +params { + subset_bam_to_comon_variants= true + cell_data="None" + // Nf core integration + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + sample_name='hadge_process' + outdir = "${launchDir}/results" + mode = "rescue" + generate_anndata = "True" + generate_mudata = "False" + multi_input = null // run only one sample + + // hashing-based deconvolution + // key input + hto_matrix_raw = "None" + hto_matrix_filtered = "None" + rna_matrix_raw = "None" + rna_matrix_filtered = "None" + + // preprocessing + sel_method = "mean.var.plot" + ndelim = "_" + n_features = "2000" + assay = "HTO" + margin = 2 + norm_method = "CLR" + preprocessOut = "preprocessed" + gene_col = 2 + + // htodemux + htodemux = "True" + rna_matrix_htodemux = "filtered" + hto_matrix_htodemux = "filtered" + quantile_htodemux = "0.99" + kfunc = "clara" + nstarts = 100 + nsamples_clustering = 100 + seed = 42 + init = "None" + objectOutHTO = "htodemux" + assignmentOutHTO = "htodemux" + + // htodemux_visualization + ridgePlot = "True" + ridgeNCol = 3 + featureScatter = "False" + scatterFeat1 = "None" + scatterFeat2 = "None" + vlnplot = "True" + vlnFeatures = "nCount_RNA" + vlnLog = "True" + tsne = "True" + tsneIdents = "Negative" + tsneInvert = "True" + tsneVerbose = "False" + tsneApprox = "False" + tsneDimMax = 2 + tsnePerplexity = 100 + heatmap = "True" + heatmapNcells= 500 + + // multiseq + multiseq = "True" + rna_matrix_multiseq = "filtered" + hto_matrix_multiseq = "filtered" + quantile_multi = 0.7 + autoThresh = "True" + maxiter = 5 + qrangeFrom = 0.1 + qrangeTo = 0.9 + qrangeBy = 0.05 + verbose_multiseq = "True" + objectOutMulti = "multiseq" + assignmentOutMulti = "multiseq" + + // hashsolo + hashsolo = "True" + hto_matrix_hashsolo = "raw" + rna_matrix_hashsolo = "raw" + use_rna_data = "False" + priors_negative = 1/3 + priors_singlet = 1/3 + priors_doublet = 1/3 + pre_existing_clusters = "None" + number_of_noise_barcodes = "None" + assignmentOutHashSolo = "hashsolo" + plotOutHashSolo = "hashsolo" + + // hashedDrops + hashedDrops = "True" + hto_matrix_hashedDrops = "raw" + lower = 100 + niters = 10000 + testAmbient = "True" + ignore_hashedDrops = "None" + alpha_hashedDrops = "None" + round = "True" + byRank = "None" + isCellFDR = 0.01 + objectOutEmptyDrops = "emptyDroplets" + assignmentOutEmptyDrops = "emptyDroplets" + ambient = "True" + minProp = 0.05 + pseudoCount = 5 + constantAmbient = "False" + doubletNmads = 3 + doubletMin = 2 + doubletMixture = "False" + confidentNmads = 3 + confidenMin = 2 + combinations = "None" + objectOutHashedDrops = "hashedDrops" + assignmentOutHashedDrops = "hashedDrops" + runEmptyDrops = "True" + + // demuxem + demuxem = "True" + rna_matrix_demuxem = "raw" + hto_matrix_demuxem = "raw" + threads_demuxem = 2 + alpha_demuxem = 0.0 + alpha_noise = 1.0 + min_num_genes = 100 + min_num_umis = 100 + min_signal = 10.0 + tol = 1e-6 + random_state = 0 + generate_gender_plot = "None" + objectOutDemuxem = "demuxem_res" + filter_demuxem = "True" + + // gmm-demux + gmmDemux = "True" + hto_matrix_gmm_demux = "filtered" + assignmentOutGmmDemux = "gmm_demux" + hto_name_gmm = "None" + summary = 2000 + report_gmm = "report.txt" + mode_GMM = "tsv" + extract = 'None' + threshold_gmm = 0.8 + ambiguous = 0.05 + + // demuxmix + demuxmix = "False" + rna_matrix_demuxmix = "raw" + hto_matrix_demuxmix = "raw" + assignmentOutDemuxmix = "demuxmix" + demuxmix_preprocess = "True" + correctTails = "TRUE" + rna_available = 'TRUE' + model = 'naive' + alpha_demuxmix = 0.9 + beta_demuxmix = 0.9 + tol_demuxmix = 0.00001 + maxIter_demuxmix = 1000 + k_hto = 1.5 + k_rna = 1.5 + + // bff + bff = "True" + rna_matrix_bff = "raw" + hto_matrix_bff = "raw" + assignmentOutBff = "bff" + bff_preprocess = "True" + methods = "combined_bff" + methodsForConsensus = 'NULL' + cellbarcodeWhitelist = 'NULL' + metricsFile = 'metrics_bff.csv' + doTSNE = "True" + doHeatmap = "True" + perCellSaturation = 'NULL' + majorityConsensusThreshold = 'NULL' + chemistry = "10xV3" + callerDisagreementThreshold = 'NULL' + preprocess_bff = "FALSE" + barcodeWhitelist = 'NULL' + + // genetics-based deconvolution + // key input + bam = "None" + bai = "None" + barcodes = "None" + fasta = "None" + fasta_index = "None" + nsamples_genetic = 2 + common_variants_scSplit = "None" + common_variants_souporcell = "None" + common_variants_freemuxlet = "None" + common_variants_cellsnp = "None" + vcf_mixed = "None" + vcf_donor = "None" + celldata = "None" + + // demuxlet and freemuxlet + demuxlet = "True" + demuxlet_preprocess = "False" + tag_group = "CB" + tag_UMI = "UB" + sm = "None" + sm_list = "None" + sam_verbose = 1000000 + vcf_verbose = 10000 + skip_umi = "False" + cap_BQ = 40 + min_BQ = 13 + min_MQ = 20 + min_TD = 0 + excl_flag = 3844 + min_total = 0 + min_uniq = 0 + min_snp = 0 + min_umi = 0 + plp = "False" + field = "GT" + geno_error_offset = 0.1 + geno_error_coeff = 0.0 + r2_info = "R2" + min_mac = 1 + min_callrate = 0.50 + alpha = "0.5" // must be string, multiple values in a single run should be comma separated + doublet_prior = 0.5 + demuxlet_out = "demuxlet_res" + + // freemuxlet + freemuxlet = "True" + freemuxlet_preprocess = "False" + init_cluster = "None" + aux_files = "False" + verbose = 100 + bf_thres = 5.41 + frac_init_clust = 0.5 + iter_init = 10 + keep_init_missing = "False" + freemuxlet_out = "freemuxlet_out" + + // vireo + vireo = "True" + vireo_preprocess = "False" + vireo_variant = "True" + genoTag = "GT" + noDoublet = "False" + nInit = 50 + extraDonor = 0 + extraDonorMode = "distance" + forceLearnGT = "False" + ASEmode = "False" + noPlot = "False" + randSeed = "None" + cellRange = "all" + callAmbientRNAs = "False" + nproc = 2 + vireo_out = "vireo_out" + + // souporcell + souporcell = "True" + souporcell_preprocess = "False" + threads = 5 + ploidy = 2 + min_alt = 10 + min_ref = 10 + max_loci = 2048 + restarts = "None" + use_known_genotype = "False" + known_genotypes_sample_names = "None" + skip_remap = "True" + ignore = "False" + souporcell_out = "souporcell_out" + + // scSplit + scSplit = "True" + scSplit_preprocess = "False" + scSplit_variant = "True" + refscSplit = "ref_filtered.csv" + altscSplit = "alt_filtered.csv" + subscSplit = 10 + emsscSplit = 30 + dblscSplit = "None" + sample_geno = "False" + scsplit_out = "scsplit_out" + + // freebayes + stdin = "False" + targets = "None" + region = "None" + samples= "None" + populations = "None" + cnv_map = "None" + vcf_freebayes = "vcf_freebayes_output.vcf" + gvcf = "False" + gvcf_chunk = "None" + gvcf_dont_use_chunk = "None" + variant_input = "None" + only_use_input_alleles = "False" + haplotype_basis_alleles = "None" + report_all_haplotype_alleles = "False" + report_monomorphic = "False" + pvar = 0.0 + strict_vcf = "False" + + theta = 0.001 + pooled_discrete = "False" + pooled_continuous = "False" + use_reference_allele = "False" + reference_quality = "100,60" + no_snps = "False" + no_indels = "True" + no_mnps = "True" + no_complex = "True" + use_best_n_alleles = 0 + haplotype_length = 3 + min_repeat_size = 5 + min_repeat_entropy = 1 + no_partial_observations = "False" + + dont_left_align_indels = "False" + use_duplicate_reads = "False" + min_mapping_quality = 1 + min_base_quality = 1 + min_supporting_allele_qsum = 0 + min_supporting_mapping_qsum = 0 + mismatch_base_quality_threshold = 10 + read_mismatch_limit = "None" + read_max_mismatch_fraction = 1.0 + read_snp_limit = "None" + read_indel_limit = "None" + standard_filters = "False" + min_alternate_fraction = 0.05 + min_alternate_count = 2 + min_alternate_qsum = 0 + min_alternate_total = 1 + min_coverage = 0 + max_coverage = "None" + no_population_priors = "False" + hwe_priors_off = "False" + binomial_obs_priors_off = "False" + allele_balance_priors_off = "False" + observation_bias = "None" + base_quality_cap = "None" + prob_contamination = 0.000000001 + legacy_gls= "False" + contamination_estimates = "None" + report_genotype_likelihood_max = "False" + genotyping_max_iterations = 1000 + genotyping_max_banddepth = 6 + posterior_integration_limits = "1,3" + exclude_unobserved_genotypes = "False" + genotype_variant_threshold = "None" + use_mapping_quality = "False" + harmonic_indel_quality = "False" + read_dependence_factor = 0.9 + genotype_qualities= "False" + debug = "False" + dd = "False" + // cellsnp + targetsVCF = "None" + sampleList = "None" + sampleIDs = "None" + genotype_cellSNP = "True" + gzip_cellSNP = "True" + printSkipSNPs = "False" + nproc_cellSNP = 2 + refseq_cellSNP = "None" + chrom = "None" + cellTAG = "CB" + UMItag = "Auto" + minCOUNT = 20 + minMAF = 0.1 + doubletGL = "False" + inclFLAG = "None" + exclFLAG = "None" + minLEN = 30 + minMAPQ = 20 + maxDEPTH = 0 + countORPHAN = "False" + cellsnp_out = "cellSNP_out" + + // donor matching + match_donor = "True" + demultiplexing_result = "None" + match_donor_method1 = "None" + match_donor_method2 = "None" + findVariants = "False" + variant_count = 10 + variant_pct = 0.9 + vireo_parent_dir = "None" +} + +profiles { + standard { + process { + executor = 'local' + withLabel: big_mem { + cpus = 4 + memory = 150.GB + } + withLabel: small_mem { + cpus = 2 + memory = 100.GB + } + } + + } + cluster { + process { + executor = 'slurm' + maxRetries = 3 + // queue = ... + // clusterOptions = ... + withLabel: big_mem { + cpus = 16 + memory = {30.GB * task.attempt} + time =24.h + } + withLabel: small_mem { + cpus = 6 + memory = {20.GB * task.attempt} + time =24.h + } + + + withName: subset_bam_to_comon_variants{ + cpus = 2 + memory = {20.GB * task.attempt} + } + + withName: summary{ + cpus = 1 + memory = {10.GB * task.attempt} + } + + withName: matchDonor{ + cpus = {2 * task.attempt} + memory = {10.GB * task.attempt } + } + + withName:souporcell{ + time = 48.h + } + } + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + docker { + docker.enabled = true + docker.registry = 'quay.io' + docker.userEmulation = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + singularity { + singularity.enabled = true + singularity.runOptions = "--bind $PWD" + singularity.cacheDir = "$PWD" + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + test { + includeConfig 'test.config' + } + conda_singularity { + singularity.enabled = true + singularity.runOptions = "--bind $PWD" + singularity.cacheDir = "$PWD" + conda.enabled = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } +} + +process { + echo = true + debug = true + maxRetries = 3 + maxErrors = '-1' + errorStrategy = 'retry' +} + + +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") + } \ No newline at end of file diff --git a/subworkflows/HADGE.nf b/subworkflows/HADGE.nf new file mode 100644 index 0000000..33046b4 --- /dev/null +++ b/subworkflows/HADGE.nf @@ -0,0 +1,76 @@ +nextflow.enable.dsl=2 + +include { run_multi } from "$projectDir/modules/multi_demultiplexing" +include { summary } from "$projectDir/modules/multi/gene_demultiplexing" +include { donor_match } from "$projectDir/modules/multi/donor_match" +include {create_single_chanel_input} from "$projectDir/modules/multi/preprocessing/preprocessing" + + +workflow HADGE { + // Here we decide if it is a single sample demultiplexing or multi input demutliplexing run. + if (params.multi_input == null){ + // Single Mode + // Instead of running in single here we want to run a multi so that the chanels, workflows and processes are not redundant. + create_single_chanel_input( + params.sample_name, + params.hto_matrix_raw, + params.hto_matrix_filtered, + params.rna_matrix_raw, + params.rna_matrix_filtered, + params.bam, + params.bai, + params.barcodes, + params.fasta, + params.fasta_index, + params.nsamples_genetic, + params.cell_data, + params.vcf_mixed, + params.vcf_donor, + params.vireo_parent_dir, + params.demultiplexing_result, + ) + run_multi(create_single_chanel_input.out.input_channel) + + } + else{ + // Multi mode + input_channel = Channel.fromPath(params.multi_input) + run_multi(input_channel) + } +} + + + +workflow SUMMARY{ + + + log.info('running summary only') + + Channel.fromPath(params.multi_input).splitCsv(header:true).map { row-> tuple(row.sampleId, file(row.hto_matrix_filtered), file(row.rna_matrix_filtered))}.set {input_list_summary} + + + demuxlet_out = Channel.fromPath("${params.outdir}/*/genetic/gene_demulti/demuxlet/demuxlet_*", type: 'dir').collect().ifEmpty('no_result') + freemuxlet_out= Channel.fromPath("${params.outdir}/*/genetic/gene_demulti/freemuxlet/freemuxlet_*", type: 'dir').collect().ifEmpty('no_result') + vireo_out= Channel.fromPath("${params.outdir}/*/genetic/gene_demulti/vireo/vireo_*", type: 'dir').collect().ifEmpty('no_result') + scSplit_out= Channel.fromPath("${params.outdir}/*/genetic/gene_demulti/scSplit/scsplit*", type: 'dir').collect().ifEmpty('no_result') + souporcell_out= Channel.fromPath("${params.outdir}/*/genetic/gene_demulti/souporcell/souporcell_*", type: 'dir').collect().ifEmpty('no_result') + + demuxlet_out_ch = demuxlet_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*demuxlet_",""), r1 )} + freemuxlet_out_ch = freemuxlet_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*freemuxlet_",""), r1 )} + vireo_out_ch = vireo_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*vireo_",""), r1 )} + scSplit_out_ch = scSplit_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*scsplit_",""), r1 )} + souporcell_out_ch = souporcell_out.flatten().map{r1-> tuple( "$r1".replaceAll(".*souporcell_",""), r1 )} + + summary_input = input_list_summary.join(souporcell_out_ch,by:0,remainder: true).join(scSplit_out_ch,by:0,remainder: true).join(vireo_out_ch,by:0,remainder: true).join(freemuxlet_out_ch,by:0,remainder: true).join(demuxlet_out_ch,by:0,remainder: true) + summary_input = summary_input.filter{ it[0] != 'no_result' } + + summary(summary_input, + params.generate_anndata, params.generate_mudata) + + // Channel.fromPath(params.multi_input) \ + // | splitCsv(header:true) \ + // | map { row-> tuple(row.sampleId, row.nsample, row.barcodes, "None", "None")} + // | join(summary.out) + // | donor_match + +} \ No newline at end of file diff --git a/test_data/download_data.sh b/test_data/download_data.sh old mode 100644 new mode 100755 diff --git a/test_data/hto/barcodes.tsv b/test_data/hto/barcodes.tsv old mode 100644 new mode 100755 diff --git a/test_data/hto/genes.tsv b/test_data/hto/genes.tsv old mode 100644 new mode 100755 diff --git a/test_data/hto/matrix.mtx b/test_data/hto/matrix.mtx old mode 100644 new mode 100755 diff --git a/test_data/multi_sample_input.csv b/test_data/multi_sample_input.csv old mode 100644 new mode 100755 diff --git a/test_data/rna/barcodes.tsv b/test_data/rna/barcodes.tsv old mode 100644 new mode 100755 diff --git a/test_data/rna/genes.tsv b/test_data/rna/genes.tsv old mode 100644 new mode 100755 diff --git a/test_data/rna/matrix.mtx b/test_data/rna/matrix.mtx old mode 100644 new mode 100755 diff --git a/test_data/simulation.R b/test_data/simulation.R old mode 100644 new mode 100755