profile/pipeline_parameters.yaml

######################################
### Software parameters            ###
######################################

Trimmomatic:
    # For the Nextera PE lib prep adapters
    adapter_removal_config: ILLUMINACLIP:files/trimmomatic_0.36_adapters_lists/NexteraPE-PE.fa:2:30:10:8:true
    # ! test settings for best "relaxed" mode
    # Default: 5 nucleotides window size, minimum average Phred score of 30
    quality_trimming_config: SLIDINGWINDOW:5:30
    # Default: Remove anything smaller than 50 nucleotides
    minimum_length_config: MINLEN:50
    
HuGo_removal:
    # Choose if you want to do "local" or "global" alignment. Keep this as "--local" unless you know what you're doing.
    alignment_type: --local

SPAdes:
    # TODO: kmersizes kunnen waarschijnlijk worden verwijderd, moet getest worden.
    # uncomment this line if you want to use the pipeline for short Illumina reads (<250 nt in length). Then also comment the line below.
#    kmersizes: 21,33,55,77
    # uncomment this line if you want to use the pipeline for longer Illumina reads (>250 nt in length). Then also comment the line above.
    kmersizes: 21,33,55,77,99,127

scaffold_minLen_filter:
    # ! test settings for best "relaxed" mode
    # Minimum allowed scaffold size to be allowed for downstream processessing. Advice, use a minimum length that is atleast 1nt greater than your Illumina read length
    minlen: 500

taxonomic_classification: # NT database is hardcoded for now
    evalue: 0.05 # E-value threshold for saving hits
    max_target_seqs: 250 # Maximum number of target sequences to report
    max_hsps: 1 # Maximum number of hits per targets sequence.

taxonomic_classification_LCA:
    Krona: true
    Krona_LCA: 
        bitscoreDeltaLCA: 5 # Every taxonomic hit within this bitscore distance will be used in the LCA analysis.
    mgkit: False
    mgkit_LCA:
        quantile_threshold: .97
        bitscore_threshold: 100
        
SNP_calling:
    max_cov: 20000 # Maximum coverage that will be used for SNP calling. Dont change this value unless you know what youre doing.
    minimum_AF: 0.05 # This is the minimum allelle frequency (=AF) for which you want a SNP to be reported. Default is 5%.

ORF_prediction:
    procedure: meta # Set the prodigal procedure. Use "meta" for metagenomics data.
    output_format: gff # Set the ORF prediction output format. The format that IGVjs (the alignment viewer program) accepts is "gff"

HTML_index_titles:
    IGVjs_title: "Interactive genome viewers for all samples in this run:"

server_info: # This is used for IGVjs, discuss this with your IT department, depends on your local setup
    # ? kan deze ook semi-flexibel? I.e. automatisch invullen naar de juiste poort als nginx al draait op een systeem?
    # ? Kan gecheckt worden met netstat -tulpn | grep nginx
    port: 8083

######################################
### Technical parameters           ###
######################################

threads:
    Clean_the_data: 4
    HuGo_removal: 4
    De_novo_assembly: 12
    Taxonomic_classification_of_scaffolds: 12
    SNP_calling: 12
    Fragment_length_analysis: 4
    quantify_output: 4

remove_temp: 1 # For development and debug only. If you set this to 0 you will keep the HuGo alignment data which can lead to patient privacy concerns.ssss