Skip to content

Commit

Permalink
fix(defaults.toml): remove unnecessary options and add more comment
Browse files Browse the repository at this point in the history
  • Loading branch information
violetbrina committed Jan 8, 2025
1 parent 803069b commit 57f80ca
Showing 1 changed file with 68 additions and 98 deletions.
166 changes: 68 additions & 98 deletions src/cpg_flow/defaults.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
[workflow]
# Only print the final merged config and a list of stages to be submitted.
# Will skip any communication with Metamist, Hail Batch, and Cloud Storage, so
# the code can be run without permissions.
#dry_run = true

# Cohorts to use as inputs.
#input_cohorts = []

Expand All @@ -19,10 +24,6 @@
# Force stage rerun
#force_stages = []

# Map of stages to lists of sequencing groups, to skip for specific stages
#[workflow.skip_stages_for_sgs]
#CramQC = ['CPGaaa']

# Name of the workflow (to prefix output paths)
#name =

Expand All @@ -33,9 +34,12 @@
# By default, the hash of all input paths will be used.
#output_version = "0.1"

# Check input file existence (e.g. FASTQ files). When they are missing,
# Limit to data of this sequencing type
#sequencing_type = 'genome'

# Check input file existence. When they are missing,
# the `skip_sgs_with_missing_input` option controls whether such
# sequencing groups should be ignored, or it should cause raising an error.
# sequencing groups should be ignored, or it should raise an error.
check_inputs = true

# For the first (not-skipped) stage, if the input for a target does
Expand All @@ -53,107 +57,73 @@ check_intermediates = true
# already exist. If it exists, do not submit stage jobs.
check_expected_outputs = true

# Limit to data of this sequencing type
#sequencing_type = 'genome'

# Realign CRAM when available, instead of using FASTQ.
# The parameter value should correspond to CRAM version
# (e.g. v0 in gs://cpg-fewgenomes-main/cram/v0/CPGaaa.cram
#realign_from_cram_version = 'v0'

# Calling intervals (defauls to whole genome intervals)
# TODO: Remove from cpg_flow and move to cpg_utils, then remove from defaults.toml
#intervals_path =

# The GQ bands used for ReblockGVCF, specified as exclusive upper bounds for reference
# confidence GQ bands (must be in [1, 100] and specified in increasing order). Finer
# granularity bands result in more reference blocks and therefore larger GVCFs.
reblock_gq_bands = [20, 30, 40]

# Only print the final merged config and a list of stages to be submitted.
# Will skip any communication with Metamist, Hail Batch, and Cloud Storage, so
# the code can be run without permissions.
#dry_run = true
# Map of stages to lists of sequencing groups, to skip for specific stages
# [workflow.skip_stages_for_sgs]
#CramQC = ['CPGaaa']

# By default, BamToCram stage will create CRAM analysis types, this can be overridden
# bam_to_cram_analysis_type = 'pacbio_cram'
[hail]

# Map internally used validation sample external_id to truth sample names
[validation.sample_map]
HG001_NA12878 = 'na12878'
SYNDIP = 'syndip'
# This is different from the cpg_flow Workflow dry_run option.
# This will create Hail Batch jobs, but will enable Hail Batch's dry_run option.
dry_run = false

[hail]
# Delete temporary directories with intermediate files.
delete_scratch_on_exit = false

[resource_overrides]
# Override default resource requirements for unusually large seq data without
# demanding higher resources for all operations as standard. Examples below

# picard MarkDuplicates overrides for unreasnobly large sequnce groups
#picard_mem_gb = 100
#picard_storage_gb = 350

# haplotype caller overrides, see production-pipelines PR#381
# defaults in code are 40 for genomes, none for exomes
#haplotypecaller_storage = 80

# Use highmem machine type for alignment step
# align_use_highmem = true

# Use additional storage in postproc_gvcf job for large gVCFs
# postproc_gvcf_storage = 50

# JointGenotyping GenomicsDBImport job overrides
# genomicsdb_import_mem_gb = 32
# genomicsdb_import_use_highmem = false

# JointGenotyping GenotypeGVCFs job overrides
# genotype_gvcfs_mem_gb = 15
# genotype_gvcfs_use_highmem = false

[mito_snv]
# Example config for broad wdl found here:
# https://raw.githubusercontent.com/broadinstitute/gatk/master/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json
# f_score_beta is not configured so will use tool default of 1.0
f_score_beta = 1.0
# Sarah Stenton from Broad runs this pipline for seqr ingest and indicated they use a
# threshold of 0.01 for seqr cohorts.
vaf_filter_threshold = 0.01
# Use verifybamid in addition to haplocheck for contamination estimate
use_verifybamid = true

[stripy]
# Analysis_type can be "standard" (fast) or "extended" (marginally slower
# but also uses unmapped reads for genotying)
analysis_type = "extended"
# See https://gitlab.com/andreassh/stripy-pipeline#list-of-loci
# Excluded by default: C9orf72, HTT
target_loci = """AFF2,AR,ARX_1,ARX_2,ATN1,ATXN1,ATXN10,ATXN2,ATXN3,ATXN7,ATXN8OS,\
BEAN1,CACNA1A,CBL,CNBP,COMP,CSTB,DAB1,DIP2B,DMD,DMPK,EIF4A3,FGF14,FMR1,FOXL2,FXN,GIPC1,\
GLS,HOXA13_1,HOXA13_2,HOXA13_3,HOXD13,JPH3,LRP12,MARCHF6,NIPA1,NOP56,NOTCH2NLC,\
NUTM2B-AS1,PABPN1,PHOX2B,PPP2R2B,PRDM12,PRNP,RAPGEF2,RFC1,RILPL1,RUNX2,SAMD12,SOX3,\
STARD7,TBP,TBX1,TCF4,TNRC6A,VWA1,XYLT1,YEATS2,ZFHX3,ZIC2,ZIC3"""
# Path to bed+ file containins extra loci to include in the analysis. Tab-delimited BED
# file containing at least the following four values: chromosome, start and end position
# of the STR locus and motif on the plus strand. Optionally, the locus name/ID can be
# specified as fifth value. Additionally, you can also specify disease name, inheritance,
# normal range and pathogenic cut-off values which are then being used to colourise
# results e.g.:
# https://gitlab.com/andreassh/stripy-pipeline/-/blob/main/examples/vntr.bed
# custom_loci_path = "gs://cpg-reference/hg38/loci/seqr/seqr_stripy_custom_loci.bed"
# Set to empty string if no custom loci are to be used.
custom_loci_path = ""
# Change the path the stripy report is saved to, useful when testing novel loci
output_prefix = "stripy"

# Add in specific multiQC report config options
# See https://multiqc.info/docs/getting_started/config for more details
# [workflow.cram_multiqc]
# send_to_slack = true
# [workflow.cram_multiqc.extra_config]
# plots_force_interactive = true

# [workflow.gvcf_multiqc]
# send_to_slack = true
# [workflow.gvcf_multiqc.extra_config]
# plots_force_interactive = true
# The value should be an int representing the memory in GB
# This is true for both memory and storage overrides
# job_mem_override = 50

# Other common overrides could be for number of cpus or GBs of storage
# job_cpu_override = 4
# job_storage_override = 100

# To use this override in the job python file do something like this:
# def my_job(
# b: hb.Batch,
# input_file: hb.ResourceFile,
# job_attrs: dict | None = None,
# output_path: Path | None = None,
# second_output_path: Path | None = None,
# fasta_reference: hb.ResourceGroup | None = None,
# overwrite: bool = False,
# ) -> Job | None:
# """
# My super awesome custom job
# """
# job = b.new_job(job_name, job_attrs)
# ...
#
# # check for a memory override for this job
# memory_override = get_config()['resource_overrides'].get('job_mem_override')
# assert isinstance(memory_override, (int, type(None)))
#
# resource = HIGHMEM.request_resources(ncpu=4, mem_gb=memory_override)
# # Any other resource logic or modifications...
#
# # This line sets the resource for the job created above
# resource.set_to_job(j)
# ....
#
# return job

# Adding custom options for workflow stages and jobs
# You are more than welcome to add your own custom options, however they should not
# live in the defaults.toml file. Instead, create a new toml file in the same directory
# That you pass to the analysis runner. Any options you create following the toml syntax
# will be available to you in the workflow and job python files.
# For example, if you create a file called custom_options.toml with the following content:

# [custom]
# my_custom_option = "my_custom_value"

# You can access this value in your workflow or job python file like this:
# get_config().get('custom', {}).get('my_custom_option')

0 comments on commit 57f80ca

Please sign in to comment.