Skip to content

Commit

Permalink
Merge pull request #1266 from FriederikeHanssen/ncbench
Browse files Browse the repository at this point in the history
Ncbench
  • Loading branch information
FriederikeHanssen authored Oct 4, 2023
2 parents df6df02 + 13c23fc commit 8eaf157
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 5 deletions.
15 changes: 11 additions & 4 deletions .github/workflows/awstest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ on:
description: "Trigger somatic full test on AWS"
type: boolean
default: false
germline:
description: "Trigger germline full test on AWS"
germline_wgs:
description: "Trigger germline full test (WGS) on AWS"
type: boolean
default: false
germline_wes:
description: "Trigger germline full test (WES) on AWS"
type: boolean
default: false

Expand All @@ -32,8 +36,11 @@ jobs:
enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.profiletest ) }}
- profile: test_full
enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.somatic ) }}
- profile: test_full_germline
enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline ) }}
- profile: test_full_germline_wgs
enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wgs ) }}
- profile: test_full_germline_wes
enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wes ) }}

steps:
# Launch workflow using Tower CLI tool action
- name: Launch workflow via tower
Expand Down
233 changes: 233 additions & 0 deletions .github/workflows/ncbench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
name: NCBench Zenodo Upload
# This workflow can be triggered manually with the GitHub actions workflow dispatch button.

on:
# upload can only be triggered manually for now
workflow_dispatch:
inputs:
germline_wes:
description: "Trigger NCBench upload"
type: boolean
default: true
pull_request:

jobs:
ncbench-upload:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- profile: germline_wes
enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wes ) }}
env:
pipeline_version: 3.2.3

steps:
- name: Download files from AWS
uses: keithweaver/aws-s3-github-action@v1.0.0
if: ${{ matrix.enabled }}
with:
revision: ${{ github.sha }}
source: s3://nf-core-awsmegatests/sarek/results-test-59026dc07633edb83aab3bfb2f65f79db38437a1/test_full/variant_calling/strelka/
command: sync
destination: ./variant_calling
flags: --no-sign-request --include ".vcf.gz" --exclude "g.vcf.gz"

- name: Download metadata file for Zenodo
uses: actions/checkout@v4
with:
sparse-checkout: |
assets/zenodo_ncbench.json
sparse-checkout-cone-mode: false

- name: Create new Zenodo entry
uses: popperized/zenodo/create@master
env:
ZENODO_METADATA_PATH: assets/zenodo_ncbench.json
with:
secrets: ${{ secrets.ZENODO_API_TOKEN }}

- name: Upload file to Zenodo
uses: popperized/zenodo/upload@master
env:
ZENODO_DEPOSIT_ID: #TODO get the dposit ID from the previous step
ZENODO_UPLOAD_PATH: ./variant_calling_v$pipeline_version
ZENODO_USE_SANDBOX: true
with:
secrets: ${{ secrets.ZENODO_API_TOKEN }}

# - name: Install jq
# uses: sergeysova/jq-action@v2
# - name: NCBench set config
# uses: actions/checkout@v4
# with:
# repository: FriederikeHanssen/ncbench
# path: ncbench
# ref: main
# token: ${{ github.token }}
# sparse-checkout: |
# config/config.yaml
# sparse-checkout-cone-mode: false
# run: |
# jq --help >> config.yaml




# with:
# repository: github.com/FriederikeHanssen/ncbench
# path: ncbench
# ref: main
# TODO: how to retrieve the pipeline and tool versions
# TODO: how to get the zenodo id
#run: |
# jq --version


# git checkout -b ${{ github.sha }}
# echo " nf-core-sarek-$pipeline_version-deepvariants-agilent-200M:" >> config.yaml
# echo " labels:" >> config.yaml
# echo " site: nf-core" >> config.yaml
# echo " pipeline: nf-core/sarek v$" >> config.yaml
# echo " trimming: Fastp v0.23." >> config.yaml
# echo " read-mapping: bwa mem v0.7." >> config.yaml
# echo " base-quality-recalibration: gatk4 v4.4.0." >> config.yaml
# echo " realignment: " >> config.yaml
# echo " variant-detection: deepvariant v1.5." >> config.yaml
# echo " genotyping: " >> config.yaml
# echo " reads: " >> config.yaml
# echo " subcategory: NA12878-agilent" >> config.yaml
# echo " zenodo:" >> config.yaml
# echo " deposition: ?" >> config.yaml
# echo " filename: nf-core-sarek-$pipeline_version/deepvariant/NA12878_200M/NA12878_200M.deepvariant.vcf." >> config.yaml
# echo " benchmark: giab-NA12878-agilent-" >> config.yaml
# echo " rename-contigs: resources/rename-contigs/ucsc-to-ensembl.txt" >> config.yaml
# git config user.name github-actions
# git config user.email github-actions@github.com
# git add .
# git commit -m "Benchmarking ${{ github.sha }}"
# git push origin ${{ github.sha }}



# TB-sarek311-freebayes-agilent-200M:
# labels:
# site: TB QBiC
# pipeline: nf-core/sarek v3.1.1
# trimming: Fastp v0.23.2
# read-mapping: bwa mem v0.7.17
# base-quality-recalibration: gatk4 v4.3.0.0
# realignment: none
# variant-detection: freebayes v1.3.6
# genotyping: none
# reads: 200M
# subcategory: NA12878-agilent
# zenodo:
# deposition: 7376244
# filename: WES_agilent_high_cov_sarek311_NA12878_freebayes.vcf.gz
# benchmark: giab-NA12878-agilent-200M
# rename-contigs: resources/rename-contigs/ucsc-to-ensemb

# TB-sarek311-haplotypecaller-agilent-200M:
# labels:
# site: TB QBiC
# pipeline: nf-core/sarek v3.1.1
# trimming: Fastp v0.23.2
# read-mapping: bwa mem v0.7.17
# base-quality-recalibration: gatk4 v4.3.0.0
# realignment: none
# variant-detection: gatk-haplotypecaller v4.3.0.0
# genotyping: none
# reads: 200M
# subcategory: NA12878-agilent
# zenodo:
# deposition: 7376244
# filename: WES_agilent_high_cov_sarek311_NA12878_haplotypecaller.vcf.gz
# benchmark: giab-NA12878-agilent-200M
# rename-contigs: resources/rename-contigs/ucsc-to-ensemb
# TB-sarek311-strelka-agilent-200M:
# labels:
# site: TB QBiC
# pipeline: nf-core/sarek v3.1.1
# trimming: Fastp v0.23.2
# read-mapping: bwa mem v0.7.17
# base-quality-recalibration: gatk4 v4.3.0.0
# realignment: none
# variant-detection: strelka2 v2.9.10
# genotyping: none
# reads: 200M
# subcategory: NA12878-agilent
# zenodo:
# deposition: 7376244
# filename: WES_agilent_high_cov_sarek311_NA12878_strelka.vcf.gz
# benchmark: giab-NA12878-agilent-200M
# rename-contigs: resources/rename-contigs/ucsc-to-ensemb
# TB-sarek311-deepvariants-agilent-75M:
# labels:
# site: TB QBiC
# pipeline: nf-core/sarek v3.1.1
# trimming: Fastp v0.23.2
# read-mapping: bwa mem v0.7.17
# base-quality-recalibration: gatk4 v4.3.0.0
# realignment: none
# variant-detection: DeepVariant v1.3.6
# genotyping: none
# reads: 75M
# subcategory: NA12878-agilent
# zenodo:
# deposition: 7376244
# filename: WES_agilent_low_cov_sarek311_NA12878_freebayes.vcf.gz
# benchmark: giab-NA12878-agilent-75M
# rename-contigs: resources/rename-contigs/ucsc-to-ensemb
# TB-sarek311-freebayes-agilent-75M:
# labels:
# site: TB QBiC
# pipeline: nf-core/sarek v3.1.1
# trimming: Fastp v0.23.2
# read-mapping: bwa mem v0.7.17
# base-quality-recalibration: gatk4 v4.3.0.0
# realignment: none
# variant-detection: freebayes v1.3.6
# genotyping: none
# reads: 75M
# subcategory: NA12878-agilent
# zenodo:
# deposition: 7376244
# filename: WES_agilent_low_cov_sarek311_NA12878_freebayes.vcf.gz
# benchmark: giab-NA12878-agilent-75M
# rename-contigs: resources/rename-contigs/ucsc-to-ensemb
# nfcore-sarek-${{ github.sha }}-haplotypecaller-agilent-75M:
# labels:
# site: nf-core
# pipeline: nf-core/sarek v
# trimming: Fastp v0.23.2
# read-mapping: bwa mem v0.7.17
# base-quality-recalibration: gatk4 v4.3.0.0
# realignment: none
# variant-detection: gatk-haplotypecaller v4.3.0.0
# genotyping: none
# reads: 75M
# subcategory: NA12878-agilent
# zenodo:
# deposition:
# filename: WES_agilent_low_cov_sarek311_NA12878_haplotypecaller.vcf.gz
# benchmark: giab-NA12878-agilent-75M
# rename-contigs: resources/rename-contigs/ucsc-to-ensemb
# TB-sarek311-strelka-agilent-75M:
# labels:
# site: TB QBiC
# pipeline: nf-core/sarek v3.1.1
# trimming: Fastp v0.23.2
# read-mapping: bwa mem v0.7.17
# base-quality-recalibration: gatk4 v4.3.0.0
# realignment: none
# variant-detection: strelka2 v2.9.10
# genotyping: none
# reads: 75M
# subcategory: NA12878-agilent
# zenodo:
# deposition: 7376244
# filename: WES_agilent_low_cov_sarek311_NA12878_strelka.vcf.gz
# benchmark: giab-NA12878-agilent-75M
# rename-contigs: resources/rename-contigs/ucsc-to-ensemb
13 changes: 13 additions & 0 deletions assets/zenodo_ncbench.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"metadata": {
"title": "nf-core/sarek v3.1.1 NCBench vcf files",
"upload_type": "dataset",
"description": "Agilent datasets for NCBench",
"creators": [
{
"name": "Garcia, Maxime U",
"name": "Hanssen, Friederike"
}
]
}
}
2 changes: 1 addition & 1 deletion conf/test_full_germline.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_WGS_30x_full_test.csv'

// Other params
tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep'
tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep,snpeff'
split_fastq = 50000000
use_annotation_cache_keys = true
}
25 changes: 25 additions & 0 deletions conf/test_full_germline_wes.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running full-size tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a full size pipeline test.
Use as follows:
nextflow run nf-core/sarek -profile test_full_germline,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Full test profile for germline VC'
config_profile_description = 'Full test dataset to check germline VC pipeline function'

// Input data for full size test
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_Agilent_full_test.csv'

// Other params
intervals = 'https://zenodo.org/record/6513789/files/Agilent_v7.bed' //TODO: upload own intervals files prepended with "chr"
wes = true
tools = 'strelka,freebayes,haplotypecaller,deepvariant'
use_annotation_cache_keys = true
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ profiles {
// Extra test profiles for full tests on AWS
test_full { includeConfig 'conf/test_full.config' }
test_full_germline { includeConfig 'conf/test_full_germline.config' }
test_full_germline_wes { includeConfig 'conf/test_full_germline_wes.config' }
// Extra test profiles for more complete CI
alignment_to_fastq { includeConfig 'conf/test/alignment_to_fastq.config' }
annotation { includeConfig 'conf/test/annotation.config' }
Expand Down

0 comments on commit 8eaf157

Please sign in to comment.