Merge pull request #1266 from FriederikeHanssen/ncbench

Ncbench
nf-core · Oct 4, 2023 · 8eaf157 · 8eaf157
2 parents df6df02 + 13c23fc
commit 8eaf157
Show file tree

Hide file tree

Showing 6 changed files with 284 additions and 5 deletions.
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
@@ -15,8 +15,12 @@ on:
         description: "Trigger somatic full test on AWS"
         type: boolean
         default: false
-      germline:
-        description: "Trigger germline full test on AWS"
+      germline_wgs:
+        description: "Trigger germline full test (WGS) on AWS"
+        type: boolean
+        default: false
+      germline_wes:
+        description: "Trigger germline full test (WES) on AWS"
         type: boolean
         default: false
 
@@ -32,8 +36,11 @@ jobs:
             enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.profiletest ) }}
           - profile: test_full
             enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.somatic ) }}
-          - profile: test_full_germline
-            enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline ) }}
+          - profile: test_full_germline_wgs
+            enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wgs ) }}
+          - profile: test_full_germline_wes
+            enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wes ) }}
+
     steps:
       # Launch workflow using Tower CLI tool action
       - name: Launch workflow via tower

diff --git a/.github/workflows/ncbench.yml b/.github/workflows/ncbench.yml
@@ -0,0 +1,233 @@
+name: NCBench Zenodo Upload
+# This workflow can be triggered manually with the GitHub actions workflow dispatch button.
+
+on:
+  # upload can only be triggered manually for now
+  workflow_dispatch:
+      inputs:
+        germline_wes:
+          description: "Trigger NCBench upload"
+          type: boolean
+          default: true
+  pull_request:
+
+jobs:
+  ncbench-upload:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - profile: germline_wes
+            enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_wes ) }}
+    env:
+      pipeline_version: 3.2.3
+
+    steps:
+      - name: Download files from AWS
+        uses: keithweaver/aws-s3-github-action@v1.0.0
+        if: ${{ matrix.enabled }}
+        with:
+          revision: ${{ github.sha }}
+          source:  s3://nf-core-awsmegatests/sarek/results-test-59026dc07633edb83aab3bfb2f65f79db38437a1/test_full/variant_calling/strelka/
+          command: sync
+          destination: ./variant_calling
+          flags: --no-sign-request --include ".vcf.gz" --exclude "g.vcf.gz"
+
+      - name: Download metadata file for Zenodo
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: |
+            assets/zenodo_ncbench.json
+          sparse-checkout-cone-mode: false
+
+      - name: Create new Zenodo entry
+        uses: popperized/zenodo/create@master
+        env:
+          ZENODO_METADATA_PATH: assets/zenodo_ncbench.json
+        with:
+          secrets: ${{ secrets.ZENODO_API_TOKEN }}
+
+      - name: Upload file to Zenodo
+        uses: popperized/zenodo/upload@master
+        env:
+          ZENODO_DEPOSIT_ID:  #TODO get the dposit ID from the previous step
+          ZENODO_UPLOAD_PATH: ./variant_calling_v$pipeline_version
+          ZENODO_USE_SANDBOX: true
+        with:
+          secrets: ${{ secrets.ZENODO_API_TOKEN }}
+
+      # - name: Install jq
+      #   uses: sergeysova/jq-action@v2
+      # - name: NCBench set config
+      #   uses: actions/checkout@v4
+      #   with:
+      #     repository: FriederikeHanssen/ncbench
+      #     path: ncbench
+      #     ref: main
+      #     token: ${{ github.token }}
+      #     sparse-checkout: |
+      #       config/config.yaml
+      #     sparse-checkout-cone-mode: false
+      #     run: |
+      #       jq --help >> config.yaml
+
+
+
+
+        # with:
+        #   repository: github.com/FriederikeHanssen/ncbench
+        #   path: ncbench
+        #   ref: main
+        # TODO: how to retrieve the pipeline and tool versions
+        # TODO: how to get the zenodo id
+        #run: |
+        #  jq --version
+
+
+# git checkout -b ${{ github.sha }}
+# echo "    nf-core-sarek-$pipeline_version-deepvariants-agilent-200M:" >> config.yaml
+# echo "      labels:" >> config.yaml
+# echo "        site: nf-core" >> config.yaml
+# echo "        pipeline: nf-core/sarek v$" >> config.yaml
+# echo "        trimming: Fastp v0.23." >> config.yaml
+# echo "        read-mapping: bwa mem v0.7." >> config.yaml
+# echo "        base-quality-recalibration: gatk4 v4.4.0." >> config.yaml
+# echo "        realignment: " >> config.yaml
+# echo "        variant-detection: deepvariant v1.5." >> config.yaml
+# echo "        genotyping: " >> config.yaml
+# echo "        reads: " >> config.yaml
+# echo "      subcategory: NA12878-agilent" >> config.yaml
+# echo "      zenodo:" >> config.yaml
+# echo "        deposition: ?" >> config.yaml
+# echo "        filename: nf-core-sarek-$pipeline_version/deepvariant/NA12878_200M/NA12878_200M.deepvariant.vcf." >> config.yaml
+# echo "      benchmark: giab-NA12878-agilent-" >> config.yaml
+# echo "      rename-contigs: resources/rename-contigs/ucsc-to-ensembl.txt" >> config.yaml
+# git config user.name github-actions
+# git config user.email github-actions@github.com
+# git add .
+# git commit -m "Benchmarking ${{ github.sha }}"
+# git push origin ${{ github.sha }}
+
+
+
+# TB-sarek311-freebayes-agilent-200M:
+#   labels:
+#     site: TB QBiC
+#     pipeline: nf-core/sarek v3.1.1
+#     trimming: Fastp v0.23.2
+#     read-mapping: bwa mem v0.7.17
+#     base-quality-recalibration: gatk4 v4.3.0.0
+#     realignment: none
+#     variant-detection: freebayes v1.3.6
+#     genotyping: none
+#     reads: 200M
+#   subcategory: NA12878-agilent
+#   zenodo:
+#     deposition: 7376244
+#     filename: WES_agilent_high_cov_sarek311_NA12878_freebayes.vcf.gz
+#   benchmark: giab-NA12878-agilent-200M
+#   rename-contigs: resources/rename-contigs/ucsc-to-ensemb
+
+# TB-sarek311-haplotypecaller-agilent-200M:
+#   labels:
+#     site: TB QBiC
+#     pipeline: nf-core/sarek v3.1.1
+#     trimming: Fastp v0.23.2
+#     read-mapping: bwa mem v0.7.17
+#     base-quality-recalibration: gatk4 v4.3.0.0
+#     realignment: none
+#     variant-detection: gatk-haplotypecaller v4.3.0.0
+#     genotyping: none
+#     reads: 200M
+#   subcategory: NA12878-agilent
+#   zenodo:
+#     deposition: 7376244
+#     filename: WES_agilent_high_cov_sarek311_NA12878_haplotypecaller.vcf.gz
+#   benchmark: giab-NA12878-agilent-200M
+#   rename-contigs: resources/rename-contigs/ucsc-to-ensemb
+# TB-sarek311-strelka-agilent-200M:
+#   labels:
+#     site: TB QBiC
+#     pipeline: nf-core/sarek v3.1.1
+#     trimming: Fastp v0.23.2
+#     read-mapping: bwa mem v0.7.17
+#     base-quality-recalibration: gatk4 v4.3.0.0
+#     realignment: none
+#     variant-detection: strelka2 v2.9.10
+#     genotyping: none
+#     reads: 200M
+#   subcategory: NA12878-agilent
+#   zenodo:
+#     deposition: 7376244
+#     filename: WES_agilent_high_cov_sarek311_NA12878_strelka.vcf.gz
+#   benchmark: giab-NA12878-agilent-200M
+#   rename-contigs: resources/rename-contigs/ucsc-to-ensemb
+# TB-sarek311-deepvariants-agilent-75M:
+#   labels:
+#     site: TB QBiC
+#     pipeline: nf-core/sarek v3.1.1
+#     trimming: Fastp v0.23.2
+#     read-mapping: bwa mem v0.7.17
+#     base-quality-recalibration: gatk4 v4.3.0.0
+#     realignment: none
+#     variant-detection: DeepVariant v1.3.6
+#     genotyping: none
+#     reads: 75M
+#   subcategory: NA12878-agilent
+#   zenodo:
+#     deposition: 7376244
+#     filename: WES_agilent_low_cov_sarek311_NA12878_freebayes.vcf.gz
+#   benchmark: giab-NA12878-agilent-75M
+#   rename-contigs: resources/rename-contigs/ucsc-to-ensemb
+# TB-sarek311-freebayes-agilent-75M:
+#   labels:
+#     site: TB QBiC
+#     pipeline: nf-core/sarek v3.1.1
+#     trimming: Fastp v0.23.2
+#     read-mapping: bwa mem v0.7.17
+#     base-quality-recalibration: gatk4 v4.3.0.0
+#     realignment: none
+#     variant-detection: freebayes v1.3.6
+#     genotyping: none
+#     reads: 75M
+#   subcategory: NA12878-agilent
+#   zenodo:
+#     deposition: 7376244
+#     filename: WES_agilent_low_cov_sarek311_NA12878_freebayes.vcf.gz
+#   benchmark: giab-NA12878-agilent-75M
+#   rename-contigs: resources/rename-contigs/ucsc-to-ensemb
+# nfcore-sarek-${{ github.sha }}-haplotypecaller-agilent-75M:
+#   labels:
+#     site: nf-core
+#     pipeline: nf-core/sarek v
+#     trimming: Fastp v0.23.2
+#     read-mapping: bwa mem v0.7.17
+#     base-quality-recalibration: gatk4 v4.3.0.0
+#     realignment: none
+#     variant-detection: gatk-haplotypecaller v4.3.0.0
+#     genotyping: none
+#     reads: 75M
+#   subcategory: NA12878-agilent
+#   zenodo:
+#     deposition:
+#     filename: WES_agilent_low_cov_sarek311_NA12878_haplotypecaller.vcf.gz
+#   benchmark: giab-NA12878-agilent-75M
+#   rename-contigs: resources/rename-contigs/ucsc-to-ensemb
+# TB-sarek311-strelka-agilent-75M:
+#   labels:
+#     site: TB QBiC
+#     pipeline: nf-core/sarek v3.1.1
+#     trimming: Fastp v0.23.2
+#     read-mapping: bwa mem v0.7.17
+#     base-quality-recalibration: gatk4 v4.3.0.0
+#     realignment: none
+#     variant-detection: strelka2 v2.9.10
+#     genotyping: none
+#     reads: 75M
+#   subcategory: NA12878-agilent
+#   zenodo:
+#     deposition: 7376244
+#     filename: WES_agilent_low_cov_sarek311_NA12878_strelka.vcf.gz
+#   benchmark: giab-NA12878-agilent-75M
+#   rename-contigs: resources/rename-contigs/ucsc-to-ensemb
diff --git a/assets/zenodo_ncbench.json b/assets/zenodo_ncbench.json
@@ -0,0 +1,13 @@
+{
+    "metadata": {
+        "title": "nf-core/sarek v3.1.1 NCBench vcf files",
+        "upload_type": "dataset",
+        "description": "Agilent datasets for NCBench",
+        "creators": [
+            {
+                "name": "Garcia, Maxime U",
+                "name": "Hanssen, Friederike"
+            }
+        ]
+    }
+}
diff --git a/conf/test_full_germline.config b/conf/test_full_germline.config
@@ -18,7 +18,7 @@ params {
     input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_WGS_30x_full_test.csv'
 
     // Other params
-    tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep'
+    tools                     = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep,snpeff'
     split_fastq               = 50000000
     use_annotation_cache_keys = true
 }
diff --git a/conf/test_full_germline_wes.config b/conf/test_full_germline_wes.config
@@ -0,0 +1,25 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running full-size tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a full size pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/sarek -profile test_full_germline,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Full test profile for germline VC'
+    config_profile_description = 'Full test dataset to check germline VC pipeline function'
+
+    // Input data for full size test
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_Agilent_full_test.csv'
+
+    // Other params
+    intervals                 = 'https://zenodo.org/record/6513789/files/Agilent_v7.bed' //TODO: upload own intervals files prepended with "chr"
+    wes                       = true
+    tools                     = 'strelka,freebayes,haplotypecaller,deepvariant'
+    use_annotation_cache_keys = true
+}
diff --git a/nextflow.config b/nextflow.config
@@ -251,6 +251,7 @@ profiles {
     // Extra test profiles for full tests on AWS
     test_full                  { includeConfig 'conf/test_full.config' }
     test_full_germline         { includeConfig 'conf/test_full_germline.config' }
+    test_full_germline_wes     { includeConfig 'conf/test_full_germline_wes.config' }
     // Extra test profiles for more complete CI
     alignment_to_fastq         { includeConfig 'conf/test/alignment_to_fastq.config' }
     annotation                 { includeConfig 'conf/test/annotation.config' }