Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Commit

Permalink
Merge pull request #577 from MaxUlysse/NewTestAndRefs
Browse files Browse the repository at this point in the history
Use new repo for test and reference
  • Loading branch information
Szilveszter Juhos authored Apr 30, 2018
2 parents 36ddecf + 007e56e commit 4b6d492
Show file tree
Hide file tree
Showing 65 changed files with 39 additions and 134 deletions.
6 changes: 2 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ Annotation/
Preprocessing/
References/
Reports/
Sarek-data/
VariantCalling/
work/
.*swp
.DS_Store
.nextflow*
*.img
*.tar.gz
report.html*
timeline.html*
trace.txt*
.DS_Store
41 changes: 2 additions & 39 deletions buildReferences.nf
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,7 @@ if (params.help) exit 0, helpMessage()
if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information"
if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <UPPMAX Project ID>"

if (!params.download && params.refDir == "" ) exit 1, "No --refDir specified"
if (params.download && params.refDir != "" ) exit 1, "No need to specify --refDir"

ch_referencesFiles = defReferencesFiles(params.genome)

if (params.download && params.genome != "smallGRCh37") exit 1, "Not possible to download ${params.genome} references files"

if (!params.download) ch_referencesFiles.each{checkFile(params.refDir + "/" + it)}
ch_referencesFiles = Channel.fromPath("${params.refDir}/*")

/*
================================================================================
Expand All @@ -73,37 +66,10 @@ if (!params.download) ch_referencesFiles.each{checkFile(params.refDir + "/" + it

startMessage()

process ProcessReference {
tag params.download ? {"Download: " + f_reference} : {"Link: " + f_reference}

input:
val(f_reference) from ch_referencesFiles

output:
file(f_reference) into ch_processedFiles

script:

if (params.download)
"""
wget https://github.com/szilvajuhos/smallRef/raw/master/${f_reference}
"""

else
"""
ln -s ${params.refDir}/${f_reference} .
"""
}


if (params.verbose) ch_processedFiles = ch_processedFiles.view {
"Files preprocessed : ${it.fileName}"
}

ch_compressedfiles = Channel.create()
ch_notCompressedfiles = Channel.create()

ch_processedFiles
ch_referencesFiles
.choice(ch_compressedfiles, ch_notCompressedfiles) {it =~ ".(gz|tar.bz2)" ? 0 : 1}

process DecompressFile {
Expand Down Expand Up @@ -294,9 +260,6 @@ def helpMessage() {
this.sarekMessage()
log.info " Usage:"
log.info " nextflow run buildReferences.nf --refDir <pathToRefDir> --genome <genome>"
log.info " nextflow run buildReferences.nf --download --genome smallGRCh37"
log.info " --download"
log.info " Download reference files. (only with --genome smallGRCh37)"
log.info " --refDir <Directoy>"
log.info " Specify a directory containing reference files."
log.info " --outDir <Directoy>"
Expand Down
Binary file removed data/tiny/dummy/normal/dummy_n_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/dummy/normal/dummy_n_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/dummy/tumor/dummy_t_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/dummy/tumor/dummy_t_R2_xxx.fastq.gz
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L001_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L001_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L002_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L002_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L004_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L004_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L007_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L007_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L008_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/normal/tiny_n_L008_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L001_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L001_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L002_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L002_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L003_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L003_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L005_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L005_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L006_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L006_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L007_R1_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/tiny/tumor/tiny_t_L007_R2_xxx.fastq.gz
Binary file not shown.
Binary file removed data/tiny/vcf/Strelka_1234N_variants.vcf.gz
Binary file not shown.
Binary file removed data/tiny/vcf/Strelka_1234N_variants.vcf.gz.tbi
Binary file not shown.
Binary file removed data/tiny/vcf/Strelka_9876T_variants.vcf.gz
Binary file not shown.
Binary file removed data/tiny/vcf/Strelka_9876T_variants.vcf.gz.tbi
Binary file not shown.
5 changes: 0 additions & 5 deletions data/tsv/dream-normal-s3.tsv

This file was deleted.

3 changes: 0 additions & 3 deletions data/tsv/sample.tsv

This file was deleted.

11 changes: 0 additions & 11 deletions data/tsv/tiny-manta-s3.tsv

This file was deleted.

11 changes: 0 additions & 11 deletions data/tsv/tiny-manta.tsv

This file was deleted.

13 changes: 0 additions & 13 deletions data/tsv/tiny-s3.tsv

This file was deleted.

13 changes: 0 additions & 13 deletions data/tsv/tiny.tsv

This file was deleted.

File renamed without changes.
10 changes: 3 additions & 7 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ if (params.sample) tsvPath = params.sample
// No need for tsv file for step annotate
if (!params.sample && !params.sampleDir) {
tsvPaths = [
'mapping': "${workflow.projectDir}/data/tsv/tiny.tsv",
'mapping': "${workflow.projectDir}/Sarek-data/testdata/tsv/tiny.tsv",
'realign': "${directoryMap.nonRealigned}/nonRealigned.tsv",
'recalibrate': "${directoryMap.nonRecalibrated}/nonRecalibrated.tsv"
]
Expand Down Expand Up @@ -721,12 +721,8 @@ def extractFastq(tsvFile) {
def status = returnStatus(list[2].toInteger())
def idSample = list[3]
def idRun = list[4]

// Normally path to files starts from workflow.launchDir
// But when executing workflow from Github
// Path to hosted FASTQ files starts from workflow.projectDir
def fastqFile1 = workflow.commitId && params.test ? returnFile("${workflow.projectDir}/${list[5]}") : returnFile(list[5])
def fastqFile2 = workflow.commitId && params.test ? returnFile("${workflow.projectDir}/${list[6]}") : returnFile(list[6])
def fastqFile1 = returnFile(list[5])
def fastqFile2 = returnFile(list[6])

checkFileExtension(fastqFile1,".fastq.gz")
checkFileExtension(fastqFile2,".fastq.gz")
Expand Down
2 changes: 1 addition & 1 deletion scripts/do_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ done

if [[ $GENOME = smallGRCh37 ]]
then
$GENOME = GRCh37
GENOME=GRCh37
fi

function toLower() {
Expand Down
38 changes: 21 additions & 17 deletions scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BUILD=false
KEEP=false
GENOME=smallGRCh37
PROFILE=singularity
SAMPLE=data/tsv/tiny.tsv
SAMPLE=Sarek-data/testdata/tsv/tiny.tsv
TEST=ALL
TRAVIS=${TRAVIS:-false}

Expand Down Expand Up @@ -47,26 +47,31 @@ do
esac
done

function nf_test() {
echo "$(tput setaf 1)nextflow run $@ -profile $PROFILE --genome $GENOME -resume --verbose$(tput sgr0)"
nextflow run $@ -profile $PROFILE --genome $GENOME -resume --genome_base $PWD/References/$GENOME --verbose
}

function run_wrapper() {
./scripts/wrapper.sh $@ --profile $PROFILE --genome $GENOME --genomeBase $PWD/References/$GENOME --verbose
}

function clean_repo() {
if [[ $TRAVIS == false ]] && [[ $KEEP == true ]]
if [[ $TRAVIS == false ]] && [[ $KEEP == false ]]
then
echo "$(tput setaf 1)Cleaning directory$(tput sgr0)"
rm -rf work .nextflow* Preprocessing Reports Annotation VariantCalling Results
fi
}

# Build references only for smallGRCh37
if [[ $GENOME == smallGRCh37 ]] && [[ $TEST != BUILDCONTAINERS ]] && [[ BUILD ]]
then
nf_test buildReferences.nf --download --outDir References/$GENOME
if [[ ! -d Sarek-data ]]
then
echo "$(tput setaf 1)Cloning Sarek-data repository$(tput sgr0)"
git clone https://github.com/SciLifeLab/Sarek-data.git
fi
if [[ ! -d References ]]
then
echo "$(tput setaf 1)Building references$(tput sgr0)"
nextflow run buildReferences.nf --refDir Sarek-data/reference --outDir References/$GENOME -profile $PROFILE --genome $GENOME --verbose
fi
# Remove images only on TRAVIS
if [[ $PROFILE == docker ]] && [[ $TRAVIS == true ]]
then
Expand All @@ -79,21 +84,21 @@ fi

if [[ ALL,DIR =~ $TEST ]]
then
run_wrapper --germline --sampleDir data/tiny/tiny/normal
run_wrapper --germline --sampleDir Sarek-data/testdata/tiny/normal
clean_repo
fi

if [[ ALL,STEP =~ $TEST ]]
then
run_wrapper --germline --sampleDir data/tiny/tiny/normal
run_wrapper --germline --sampleDir Sarek-data/testdata/tiny/normal
run_wrapper --germline --step realign --noReports
run_wrapper --germline --step recalibrate --noReports
clean_repo
fi

if [[ ALL,GERMLINE =~ $TEST ]]
then
run_wrapper --germline --sampleDir data/tiny/tiny/normal --variantCalling --tools HaplotypeCaller
run_wrapper --germline --sampleDir Sarek-data/testdata/tiny/normal --variantCalling --tools HaplotypeCaller
clean_repo
fi

Expand All @@ -104,8 +109,8 @@ fi

if [[ ALL,MANTA =~ $TEST ]]
then
run_wrapper --somatic --sample data/tsv/tiny-manta.tsv --variantCalling --tools Manta --noReports
run_wrapper --somatic --sample data/tsv/tiny-manta.tsv --variantCalling --tools Manta,Strelka --noReports --strelkaBP
run_wrapper --somatic --sample Sarek-data/testdata/tsv/tiny-manta.tsv --variantCalling --tools Manta --noReports
run_wrapper --somatic --sample Sarek-data/testdata/tsv/tiny-manta.tsv --variantCalling --tools Manta,Strelka --noReports --strelkaBP
clean_repo
fi

Expand All @@ -131,13 +136,12 @@ then
rm -rf work/singularity/sarek-latest.img
rm -rf work/singularity/picard-latest.img
fi
run_wrapper --annotate --tools ${ANNOTATOR} --annotateVCF data/tiny/vcf/Strelka_1234N_variants.vcf.gz --noReports
run_wrapper --annotate --tools ${ANNOTATOR} --annotateVCF data/tiny/vcf/Strelka_1234N_variants.vcf.gz,data/tiny/vcf/Strelka_9876T_variants.vcf.gz
run_wrapper --annotate --tools ${ANNOTATOR} --annotateVCF Sarek-data/testdata/vcf/Strelka_1234N_variants.vcf.gz --noReports
run_wrapper --annotate --tools ${ANNOTATOR} --annotateVCF Sarek-data/testdata/vcf/Strelka_1234N_variants.vcf.gz,Sarek-data/testdata/vcf/Strelka_9876T_variants.vcf.gz
clean_repo
fi

if [[ ALL,BUILDCONTAINERS =~ $TEST ]] && [[ $PROFILE == docker ]]
then
nf_test buildContainers.nf --docker --containers gatk,igvtools,mutect1,picard,qctools,runallelecount,r-base,snpeff,sarek
clean_repo
./scripts/do_all.sh --genome $GENOME
fi
20 changes: 10 additions & 10 deletions scripts/wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,61 +98,61 @@ function run_sarek() {

if [[ $GERMLINE == true ]] && [[ $SOMATIC == true ]]
then
echo "Germline and Somatic"
echo "$(tput setaf 1)Germline and Somatic$(tput sgr0)"
exit
fi

if [[ $GERMLINE == true ]] && [[ $ANNOTATE == true ]]
then
echo "Germline and Annotate"
echo "$(tput setaf 1)Germline and Annotate$(tput sgr0)"
exit
fi

if [[ $SOMATIC == true ]] && [[ $SAMPLEDIR != '' ]]
then
echo "Directory defined for Somatic"
echo "$(tput setaf 1)Directory defined for Somatic$(tput sgr0)"
exit
fi

if [[ $GERMLINE == true ]] && [[ $SAMPLEDIR != '' ]]
then
echo "Germline with SampleDir"
echo "$(tput setaf 1)Germline with SampleDir$(tput sgr0)"
run_sarek main.nf --step $STEP --sampleDir $SAMPLEDIR
fi

if [[ $GERMLINE == true ]] && [[ $SAMPLETSV != '' ]]
then
echo "Germline with TSV"
echo "$(tput setaf 1)Germline with TSV$(tput sgr0)"
run_sarek main.nf --step $STEP --sample $SAMPLETSV
fi

if [[ $GERMLINE == true ]] && [[ $VARIANTCALLING == true ]]
then
echo "GermlineVC"
echo "$(tput setaf 1)GermlineVC$(tput sgr0)"
run_sarek germlineVC.nf --tools $TOOLS
fi

if [[ $SOMATIC == true ]] && [[ $SAMPLETSV != '' ]]
then
echo "Somatic with TSV"
echo "$(tput setaf 1)Somatic with TSV$(tput sgr0)"
run_sarek main.nf --step $STEP --sample $SAMPLETSV
fi

if [[ $SOMATIC == true ]] && [[ $VARIANTCALLING == true ]]
then
echo "SomaticVC"
echo "$(tput setaf 1)SomaticVC$(tput sgr0)"
run_sarek germlineVC.nf --tools $TOOLS
run_sarek somaticVC.nf --tools $TOOLS
fi

if [[ $ANNOTATE == true ]]
then
echo "Annotate"
echo "$(tput setaf 1)Annotate$(tput sgr0)"
run_sarek annotate.nf --tools $TOOLS --annotateVCF $ANNOTATEVCF
fi

if [[ $REPORTS == true ]]
then
echo "Reports"
echo "$(tput setaf 1)Reports$(tput sgr0)"
run_sarek runMultiQC.nf
fi

0 comments on commit 4b6d492

Please sign in to comment.