multiqc/config_defaults.yaml

#################################################################
# MultiQC Defaults
#################################################################
# This file contains the default configuration options
# for MultiQC. IT SHOULD NOT BE EDITED. If you want to
# change any of these config options, create a new file
# in any of the following locations:
#  1. <installation_dir>/multiqc_config.yaml (not pip or conda)
#  2. ~/.multiqc_config.yaml
#  3. <working directory>/multiqc_config.yaml
#################################################################

title: null
subtitle: null
intro_text: null
report_comment: null
report_header_info: null
show_analysis_paths: True
show_analysis_time: True
custom_logo: null
custom_logo_url: null
custom_logo_title: null
custom_css_files: []
simple_output: false
template: "default"
profile_runtime: false
profile_memory: false
pandoc_template: null
read_count_multiplier: 0.000001
read_count_prefix: "M"
read_count_desc: "millions"
long_read_count_multiplier: 0.001
long_read_count_prefix: "K"
long_read_count_desc: "thousands"
base_count_multiplier: 0.000001
base_count_prefix: "Mb"
base_count_desc: "millions"
output_fn_name: "multiqc_report.html"
data_dir_name: "multiqc_data"
plots_dir_name: "multiqc_plots"
data_format: "tsv"
force: false
verbose: false
no_ansi: false
quiet: false
prepend_dirs: false
prepend_dirs_depth: 0
prepend_dirs_sep: " | "
file_list: false
require_logs: false
version_check_url: https://api.multiqc.info/version

make_data_dir: true
zip_data_dir: false
data_dump_file: true
data_dump_file_write_raw: true # Write the `saved_raw_data` section in multiqc_data.json - useful to set to
# false for large datasets as the raw data can be prohibitively large
megaqc_url: false
megaqc_access_token: null
megaqc_timeout: 30
export_plots: false
make_report: true
make_pdf: false

# AI settings:
ai_summary: false
ai_summary_full: false
ai_provider: "seqera"
ai_model: null
ai_custom_endpoint: null
ai_extra_query_options: null
ai_custom_context_window: null
no_ai: false
ai_anonymize_samples: false

# Development settings:
seqera_api_url: "https://intern.seqera.io"
seqera_website: "https://seqera.io"

plots_force_flat: false
plots_export_font_scale: 1.0 # set to 1.5 for bigger fonts
plots_force_interactive: false
plots_flat_numseries: 2000
plots_defer_loading_numseries: 100 # plot will require user to press button to render plot
num_datasets_plot_limit: 100 # DEPRECATED in favour of plots_defer_loading_numseries
lineplot_number_of_points_to_hide_markers: 50 # sum of data points in all samples
barplot_legend_on_bottom: false # place legend at the bottom of the bar plot (not recommended)
violin_downsample_after: 2000 # downsample data for violin plot starting from this number os samples
violin_min_threshold_outliers: 100 # for more than this number of samples, show only outliers
violin_min_threshold_no_points: 1000 # for more than this number of samples, show no points

collapse_tables: true
max_table_rows: 500
max_configurable_table_columns: 200
table_columns_visible: {}
table_columns_placement: {}
table_columns_name: {}
table_cond_formatting_colours:
  - blue: "#337ab7"
  - lbue: "#5bc0de"
  - pass: "#5cb85c"
  - warn: "#f0ad4e"
  - fail: "#d9534f"
  - male: "#5bc0de"
  - female: "#d9534f"
table_cond_formatting_rules:
  all_columns:
    pass:
      - s_eq: "pass"
      - s_eq: "true"
      - s_eq: "yes"
      - s_eq: "ok"
    warn:
      - s_eq: "warn"
      - s_eq: "unknown"
    fail:
      - s_eq: "fail"
      - s_eq: "false"
      - s_eq: "no"
    male:
      - s_eq: "male"
      - s_eq: "M"
    female:
      - s_eq: "female"
      - s_eq: "F"
  # PURPLE module - QC Status column
  # TODO: Should update the table plotting code so that this can go in pconfig
  QCStatus:
    fail:
      - s_contains: "fail"
decimalPoint_format: null
thousandsSep_format: null
remove_sections: []
section_comments: {}
lint: False # Deprecated since v1.17
strict: False
development: False
custom_plot_config: {}
custom_table_header_config: {}
software_versions: {}

ignore_symlinks: false
ignore_images: true
fn_ignore_dirs:
  - "multiqc_data"
  - ".git"
  - "icarus_viewers" # quast
  - "runs_per_reference" # quast
  - "not_aligned" # quast
  - "contigs_reports" # quast

fn_ignore_paths:
  - "*/work/??/??????????????????????????????" # Nextflow work directories - always same hash lengths
  - "*/.snakemake"
  - "*/.singularity" # Singularity cache path
  - "*/__pycache__"
  - "*/site-packages/multiqc" # MultiQC installation directory
sample_names_ignore: []
sample_names_ignore_re: []
sample_names_rename_buttons: []
sample_names_replace: {}
sample_names_replace_regex: false
sample_names_replace_exact: false
sample_names_replace_complete: false
sample_names_rename: []
show_hide_buttons: []
show_hide_patterns: []
show_hide_regex: []
show_hide_mode: []
highlight_patterns: []
highlight_colors: []
highlight_regex: false
no_version_check: false
log_filesize_limit: 50000000
filesearch_lines_limit: 1000
filesearch_file_shared: []
report_readerrors: false
skip_generalstats: false
skip_versions_section: false
disable_version_detection: false
versions_table_group_header: "Group"
data_format_extensions:
  tsv: "txt"
  csv: "csv"
  json: "json"
  yaml: "yaml"
export_plot_formats:
  - "png"
  - "svg"
  - "pdf"

# Custom Config settings
custom_content:
  order: []

# Option to disable sample name cleaning if desired
fn_clean_sample_names: true

# Option to use the filename as the sample name if desired
# Set to True to apply for all modules. Define a list of search pattern keys to be specific.
use_filename_as_sample_name: false

# Used for cleaning sample names. Should be strings.
# NB: These are removed in order!
fn_clean_exts:
  - ".gz"
  - ".fastq"
  - ".fq"
  - ".bam"
  - ".cram"
  - ".sam"
  - ".sra"
  - ".vcf"
  - ".dat"
  - "_tophat"
  - ".pbmarkdup.log"
  - ".log"
  - ".stderr"
  - ".out"
  - ".spp"
  - ".fa"
  - ".fasta"
  - ".png"
  - ".jpg"
  - ".jpeg"
  - ".html"
  - "Log.final"
  - "ReadsPerGene"
  - ".flagstat"
  - "_star_aligned"
  - "_fastqc"
  - ".hicup"
  - ".counts"
  - "_counts"
  - ".txt"
  - ".tsv"
  - ".csv"
  - ".aligned"
  - "Aligned"
  - ".merge"
  - ".deduplicated"
  - ".dedup"
  - ".clean"
  - ".sorted"
  - ".report"
  - "| stdin"
  - ".geneBodyCoverage"
  - ".inner_distance_freq"
  - ".junctionSaturation_plot.r"
  - ".pos.DupRate.xls"
  - ".GC.xls"
  - "_slamdunk"
  - "_bismark"
  - ".conpair"
  - ".concordance"
  - ".contamination"
  - ".BEST.results"
  - "_peaks.xls"
  - ".relatedness"
  - ".cnt"
  - ".aqhist"
  - ".bhist"
  - ".bincov"
  - ".bqhist"
  - ".covhist"
  - ".covstats"
  - ".ehist"
  - ".gchist"
  - ".idhist"
  - ".ihist"
  - ".indelhist"
  - ".lhist"
  - ".mhist"
  - ".qahist"
  - ".qchist"
  - ".qhist"
  - ".rpkm"
  - ".selfSM"
  - ".extendedFrags"
  - "_SummaryStatistics"
  - ".purple.purity"
  - ".purple.qc"
  - ".trim"
  - ".bowtie2"
  - ".mkD"
  - ".highfreq"
  - ".lowfreq"
  - ".consensus"
  - ".snpEff"
  - ".snpeff"
  - ".scaffolds"
  - ".contigs"
  - ".kraken2"
  - ".ccurve"
  - ".hisat2"
  - "_duprate"
  - ".markdup"
  - ".read_distribution"
  - ".junction_annotation"
  - ".infer_experiment"
  - ".biotype"
  - ".ivar"
  - ".mpileup"
  - ".primer_trim"
  - ".mapped"
  - ".vep"
  - "_vep"
  - "ccs"
  - "_NanoStats"
  - ".cutadapt"
  - ".qcML"
  - ".mosdepth"
  - "_gopeaks"
  - ".readCounts"
  - ".wgs_contig_mean_cov"
  - "_overall_mean_cov"
  - "_coverage_metrics"
  - ".wgs_fine_hist"
  - ".wgs_coverage_metrics"
  - ".wgs_hist"
  - ".vc_metrics"
  - ".gvcf_metrics"
  - ".ploidy_estimation_metrics"
  - "_overall_mean_cov"
  - ".fragment_length_hist"
  - ".mapping_metrics"
  - ".gc_metrics"
  - ".trimmer_metrics"
  - ".time_metrics"
  - ".quant_metrics"
  - ".quant.metrics"
  - ".quant.transcript_coverage"
  - ".scRNA_metrics"
  - ".scRNA.metrics"
  - ".scATAC_metrics"
  - ".scATAC.metrics"
  - ".fastqc_metrics"
  - ".labels"
  - ".bammetrics.metrics"
  - ".filter_summary"
  - ".cluster_report"
  - ".error.spl"
  - ".error.grp"
  - ".vgstats"

# Search patterns for grouping paired samples in general stats (e.g. fastq R1/R2)
# Affects all modules that support it (e.g. fastqc, cutadapt)
table_sample_merge:
#   "R1":
#     - "_R1"
#     - type: regex
#       pattern: "[_.-][rR]?1$"
#   "R2":
#     - "_R2"
#     - type: regex
#       pattern: "[_.-][rR]?2$"

# These are removed after the above, only if sample names
# start or end with this string. Again, removed in order.
fn_clean_trim:
  - "."
  - ":"
  - "_"
  - "-"
  - ".r"
  - "_val"
  - ".idxstats"
  - "_trimmed"
  - ".trimmed"
  - ".csv"
  - ".yaml"
  - ".yml"
  - ".json"
  - "_mqc"
  - "short_summary_"
  - "_summary"
  - ".summary"
  - ".align"
  - ".h5"
  - "_matrix"
  - ".stats"
  - ".hist"
  - ".phased"
  - ".tar"
  - "runs_"

# Files to ignore when indexing files.
# Grep file match patterns.
fn_ignore_files:
  - ".DS_Store"
  - ".py[cod]"
  - "*.bam"
  - "*.bai"
  - "*.sam"
  - "*.fq.gz"
  - "*.fastq.gz"
  - "*.fq"
  - "*.fastq"
  - "*.fa"
  - "*.gtf"
  - "*.bed"
  - "*.vcf"
  - "*.tbi"
  - "*.txt.gz"
  - "*.pdf"
  - "*.md5"
  - "*.parquet"
  - "*[!s][!u][!m][!_\\.m][!mva][!qer][!cpy].html" # Allow _mqc.html, _vep.html and summary.html files
  - "multiqc_data.json"
  - "*.gam"
  - "*.gamp"
  - "*.jar"

# Favourite modules that should appear at the top in preference
# This is in addition to those below. These appear above _all_ other
# modules (even those not present in the below list).
top_modules: []

# Order that modules should appear in report. Try to list in order of analysis.
module_order:
  # MultiQC general module for catching output from custom scripts
  - custom_content
  # Post-alignment QC
  - ccs
  - ngsderive
  - purple
  - conpair
  - isoseq
  - lima
  - peddy
  - percolator
  - haplocheck
  - somalier
  - methylqa
  - mosdepth
  - phantompeakqualtools
  - qualimap
  - bamdst
  - preseq
  - hifiasm
  - quast
  - qorts
  - rna_seqc
  - rockhopper
  - rsem
  - rseqc
  - busco
  - checkm
  - bustools
  - goleft_indexcov
  - gffcompare
  - disambiguate
  - supernova
  - deeptools
  - sargasso
  - verifybamid
  - mirtrace
  - happy
  - mirtop
  - glimpse
  # Post-alignment processing
  - gopeaks
  - homer
  - hops
  - macs2
  - theta2
  - snpeff
  - gatk
  - htseq
  - bcftools
  - featurecounts
  - fgbio
  - dragen
  - dragen_fastqc
  - dedup
  - pbmarkdup
  - damageprofiler
  - mapdamage
  - biobambam2
  - jcvi
  - mtnucratio
  - picard
  - vep
  - bakta
  - prokka
  - checkm2
  - qc3C
  - nanoq
  - nanostat
  - samblaster
  - samtools
  - bamtools
  - sambamba
  - ngsbits
  - pairtools
  - sexdeterrmine
  - seqera_cli
  - eigenstratdatabasetools
  - jellyfish
  - vcftools
  - longranger
  - stacks
  - varscan2
  - snippy
  - umicollapse
  - umitools
  - truvari
  - megahit
  - ganon
  - gtdbtk
  # Alignment tool stats
  - bbmap
  - bismark
  - biscuit
  - diamond
  - hicexplorer
  - hicup
  - hicpro
  - salmon
  - kallisto
  - slamdunk
  - star
  - hisat2
  - tophat
  - bowtie2
  - bowtie1
  - hostile
  - cellranger
  - snpsplit
  - odgi
  - vg
  - pangolin
  - nextclade
  - freyja
  # Pre-alignment QC
  - humid
  - kat
  - leehom
  - librarian
  - nonpareil
  - adapterremoval
  - bbduk
  - clipandmerge
  - cutadapt
  - flexbar
  - sourmash
  - kaiju
  - kraken
  - malt
  - motus
  - trimmomatic
  - sickle
  - skewer
  - sortmerna
  - biobloomtools
  - fastq_screen
  - afterqc
  - fastp
  - fastqc
  - sequali
  - filtlong
  - prinseqplusplus
  - pychopper
  - porechop
  - pycoqc
  - minionqc
  - anglerfish
  - multivcfanalyzer
  - clusterflow
  - checkqc
  - bcl2fastq
  - bclconvert
  - interop
  - ivar
  - flash
  - seqyclean
  - optitype
  - whatshap
  - spaceranger
  - xenome
  - xengsort
  - metaphlan
  - seqwho
  - telseq
  - ataqv
  - mgikit
  - mosaicatcher

# Preserves `saved_raw_data` section in modules - default for interactive runs
preserve_module_raw_data: false