diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 1f2cf7e557..2fa022da3c 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -15,8 +15,12 @@ on: description: "Trigger somatic full test on AWS" type: boolean default: false - germline: - description: "Trigger germline full test on AWS" + germline_wgs: + description: "Trigger germline full test (WGS) on AWS" + type: boolean + default: false + germline_wes: + description: "Trigger germline full test (WES) on AWS" type: boolean default: false @@ -32,8 +36,11 @@ jobs: enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.profiletest ) }} - profile: test_full enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.somatic ) }} - - profile: test_full_germline - enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline ) }} + - profile: test_full_germline_wgs + enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wgs ) }} + - profile: test_full_germline_wes + enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wes ) }} + steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower diff --git a/.github/workflows/ncbench.yml b/.github/workflows/ncbench.yml new file mode 100644 index 0000000000..614e2dd722 --- /dev/null +++ b/.github/workflows/ncbench.yml @@ -0,0 +1,233 @@ +name: NCBench Zenodo Upload +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. + +on: + # upload can only be triggered manually for now + workflow_dispatch: + inputs: + germline_wes: + description: "Trigger NCBench upload" + type: boolean + default: true + pull_request: + +jobs: + ncbench-upload: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - profile: germline_wes + enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wes ) }} + env: + pipeline_version: 3.2.3 + + steps: + - name: Download files from AWS + uses: keithweaver/aws-s3-github-action@v1.0.0 + if: ${{ matrix.enabled }} + with: + revision: ${{ github.sha }} + source: s3://nf-core-awsmegatests/sarek/results-test-59026dc07633edb83aab3bfb2f65f79db38437a1/test_full/variant_calling/strelka/ + command: sync + destination: ./variant_calling + flags: --no-sign-request --include ".vcf.gz" --exclude "g.vcf.gz" + + - name: Download metadata file for Zenodo + uses: actions/checkout@v4 + with: + sparse-checkout: | + assets/zenodo_ncbench.json + sparse-checkout-cone-mode: false + + - name: Create new Zenodo entry + uses: popperized/zenodo/create@master + env: + ZENODO_METADATA_PATH: assets/zenodo_ncbench.json + with: + secrets: ${{ secrets.ZENODO_API_TOKEN }} + + - name: Upload file to Zenodo + uses: popperized/zenodo/upload@master + env: + ZENODO_DEPOSIT_ID: #TODO get the dposit ID from the previous step + ZENODO_UPLOAD_PATH: ./variant_calling_v$pipeline_version + ZENODO_USE_SANDBOX: true + with: + secrets: ${{ secrets.ZENODO_API_TOKEN }} + + # - name: Install jq + # uses: sergeysova/jq-action@v2 + # - name: NCBench set config + # uses: actions/checkout@v4 + # with: + # repository: FriederikeHanssen/ncbench + # path: ncbench + # ref: main + # token: ${{ github.token }} + # sparse-checkout: | + # config/config.yaml + # sparse-checkout-cone-mode: false + # run: | + # jq --help >> config.yaml + + + + + # with: + # repository: github.com/FriederikeHanssen/ncbench + # path: ncbench + # ref: main + # TODO: how to retrieve the pipeline and tool versions + # TODO: how to get the zenodo id + #run: | + # jq --version + + +# git checkout -b ${{ github.sha }} +# echo " nf-core-sarek-$pipeline_version-deepvariants-agilent-200M:" >> config.yaml +# echo " labels:" >> config.yaml +# echo " site: nf-core" >> config.yaml +# echo " pipeline: nf-core/sarek v$" >> config.yaml +# echo " trimming: Fastp v0.23." >> config.yaml +# echo " read-mapping: bwa mem v0.7." >> config.yaml +# echo " base-quality-recalibration: gatk4 v4.4.0." >> config.yaml +# echo " realignment: " >> config.yaml +# echo " variant-detection: deepvariant v1.5." >> config.yaml +# echo " genotyping: " >> config.yaml +# echo " reads: " >> config.yaml +# echo " subcategory: NA12878-agilent" >> config.yaml +# echo " zenodo:" >> config.yaml +# echo " deposition: ?" >> config.yaml +# echo " filename: nf-core-sarek-$pipeline_version/deepvariant/NA12878_200M/NA12878_200M.deepvariant.vcf." >> config.yaml +# echo " benchmark: giab-NA12878-agilent-" >> config.yaml +# echo " rename-contigs: resources/rename-contigs/ucsc-to-ensembl.txt" >> config.yaml +# git config user.name github-actions +# git config user.email github-actions@github.com +# git add . +# git commit -m "Benchmarking ${{ github.sha }}" +# git push origin ${{ github.sha }} + + + +# TB-sarek311-freebayes-agilent-200M: +# labels: +# site: TB QBiC +# pipeline: nf-core/sarek v3.1.1 +# trimming: Fastp v0.23.2 +# read-mapping: bwa mem v0.7.17 +# base-quality-recalibration: gatk4 v4.3.0.0 +# realignment: none +# variant-detection: freebayes v1.3.6 +# genotyping: none +# reads: 200M +# subcategory: NA12878-agilent +# zenodo: +# deposition: 7376244 +# filename: WES_agilent_high_cov_sarek311_NA12878_freebayes.vcf.gz +# benchmark: giab-NA12878-agilent-200M +# rename-contigs: resources/rename-contigs/ucsc-to-ensemb + +# TB-sarek311-haplotypecaller-agilent-200M: +# labels: +# site: TB QBiC +# pipeline: nf-core/sarek v3.1.1 +# trimming: Fastp v0.23.2 +# read-mapping: bwa mem v0.7.17 +# base-quality-recalibration: gatk4 v4.3.0.0 +# realignment: none +# variant-detection: gatk-haplotypecaller v4.3.0.0 +# genotyping: none +# reads: 200M +# subcategory: NA12878-agilent +# zenodo: +# deposition: 7376244 +# filename: WES_agilent_high_cov_sarek311_NA12878_haplotypecaller.vcf.gz +# benchmark: giab-NA12878-agilent-200M +# rename-contigs: resources/rename-contigs/ucsc-to-ensemb +# TB-sarek311-strelka-agilent-200M: +# labels: +# site: TB QBiC +# pipeline: nf-core/sarek v3.1.1 +# trimming: Fastp v0.23.2 +# read-mapping: bwa mem v0.7.17 +# base-quality-recalibration: gatk4 v4.3.0.0 +# realignment: none +# variant-detection: strelka2 v2.9.10 +# genotyping: none +# reads: 200M +# subcategory: NA12878-agilent +# zenodo: +# deposition: 7376244 +# filename: WES_agilent_high_cov_sarek311_NA12878_strelka.vcf.gz +# benchmark: giab-NA12878-agilent-200M +# rename-contigs: resources/rename-contigs/ucsc-to-ensemb +# TB-sarek311-deepvariants-agilent-75M: +# labels: +# site: TB QBiC +# pipeline: nf-core/sarek v3.1.1 +# trimming: Fastp v0.23.2 +# read-mapping: bwa mem v0.7.17 +# base-quality-recalibration: gatk4 v4.3.0.0 +# realignment: none +# variant-detection: DeepVariant v1.3.6 +# genotyping: none +# reads: 75M +# subcategory: NA12878-agilent +# zenodo: +# deposition: 7376244 +# filename: WES_agilent_low_cov_sarek311_NA12878_freebayes.vcf.gz +# benchmark: giab-NA12878-agilent-75M +# rename-contigs: resources/rename-contigs/ucsc-to-ensemb +# TB-sarek311-freebayes-agilent-75M: +# labels: +# site: TB QBiC +# pipeline: nf-core/sarek v3.1.1 +# trimming: Fastp v0.23.2 +# read-mapping: bwa mem v0.7.17 +# base-quality-recalibration: gatk4 v4.3.0.0 +# realignment: none +# variant-detection: freebayes v1.3.6 +# genotyping: none +# reads: 75M +# subcategory: NA12878-agilent +# zenodo: +# deposition: 7376244 +# filename: WES_agilent_low_cov_sarek311_NA12878_freebayes.vcf.gz +# benchmark: giab-NA12878-agilent-75M +# rename-contigs: resources/rename-contigs/ucsc-to-ensemb +# nfcore-sarek-${{ github.sha }}-haplotypecaller-agilent-75M: +# labels: +# site: nf-core +# pipeline: nf-core/sarek v +# trimming: Fastp v0.23.2 +# read-mapping: bwa mem v0.7.17 +# base-quality-recalibration: gatk4 v4.3.0.0 +# realignment: none +# variant-detection: gatk-haplotypecaller v4.3.0.0 +# genotyping: none +# reads: 75M +# subcategory: NA12878-agilent +# zenodo: +# deposition: +# filename: WES_agilent_low_cov_sarek311_NA12878_haplotypecaller.vcf.gz +# benchmark: giab-NA12878-agilent-75M +# rename-contigs: resources/rename-contigs/ucsc-to-ensemb +# TB-sarek311-strelka-agilent-75M: +# labels: +# site: TB QBiC +# pipeline: nf-core/sarek v3.1.1 +# trimming: Fastp v0.23.2 +# read-mapping: bwa mem v0.7.17 +# base-quality-recalibration: gatk4 v4.3.0.0 +# realignment: none +# variant-detection: strelka2 v2.9.10 +# genotyping: none +# reads: 75M +# subcategory: NA12878-agilent +# zenodo: +# deposition: 7376244 +# filename: WES_agilent_low_cov_sarek311_NA12878_strelka.vcf.gz +# benchmark: giab-NA12878-agilent-75M +# rename-contigs: resources/rename-contigs/ucsc-to-ensemb diff --git a/assets/zenodo_ncbench.json b/assets/zenodo_ncbench.json new file mode 100644 index 0000000000..d96fcb719b --- /dev/null +++ b/assets/zenodo_ncbench.json @@ -0,0 +1,13 @@ +{ + "metadata": { + "title": "nf-core/sarek v3.1.1 NCBench vcf files", + "upload_type": "dataset", + "description": "Agilent datasets for NCBench", + "creators": [ + { + "name": "Garcia, Maxime U", + "name": "Hanssen, Friederike" + } + ] + } +} diff --git a/conf/test_full_germline.config b/conf/test_full_germline.config index c477f7d6f6..c0dfd980cd 100644 --- a/conf/test_full_germline.config +++ b/conf/test_full_germline.config @@ -18,7 +18,7 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_WGS_30x_full_test.csv' // Other params - tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep' + tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep,snpeff' split_fastq = 50000000 use_annotation_cache_keys = true } diff --git a/conf/test_full_germline_wes.config b/conf/test_full_germline_wes.config new file mode 100644 index 0000000000..c1d157e36c --- /dev/null +++ b/conf/test_full_germline_wes.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/sarek -profile test_full_germline, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for germline VC' + config_profile_description = 'Full test dataset to check germline VC pipeline function' + + // Input data for full size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_Agilent_full_test.csv' + + // Other params + intervals = 'https://zenodo.org/record/6513789/files/Agilent_v7.bed' //TODO: upload own intervals files prepended with "chr" + wes = true + tools = 'strelka,freebayes,haplotypecaller,deepvariant' + use_annotation_cache_keys = true +} diff --git a/nextflow.config b/nextflow.config index 123a58ae90..141ffd9c34 100644 --- a/nextflow.config +++ b/nextflow.config @@ -248,6 +248,7 @@ profiles { // Extra test profiles for full tests on AWS test_full { includeConfig 'conf/test_full.config' } test_full_germline { includeConfig 'conf/test_full_germline.config' } + test_full_germline_wes { includeConfig 'conf/test_full_germline_wes.config' } // Extra test profiles for more complete CI alignment_to_fastq { includeConfig 'conf/test/alignment_to_fastq.config' } annotation { includeConfig 'conf/test/annotation.config' }