diff --git a/regression/presets/analyze/rna-seq-with-umi.yaml b/regression/presets/analyze/rna-seq-with-umi.yaml new file mode 100644 index 000000000..fc54c2569 --- /dev/null +++ b/regression/presets/analyze/rna-seq-with-umi.yaml @@ -0,0 +1,389 @@ +flags: + - species + - tagPattern + - materialType +pipeline: + - align + - refineTagsAndSort + - assemblePartial + - assemblePartial + - extend + - assemble + - assembleContigs + - exportClones +align: + species: "" + libraryName: default + trimmingQualityThreshold: 10 + trimmingWindowSize: 6 + chains: ALL + replaceWildcards: true + overlapPairedReads: true + bamDropNonVDJ: false + writeFailedAlignments: false + tagPattern: null + tagUnstranded: false + tagMaxBudget: 10.0 + headerExtractors: [] + readIdAsCellTag: false + sampleTable: null + tagsValidations: + - type: MustContainTagType + tagType: Cell + - type: MustContainTagType + tagType: Molecule + splitBySample: true + limit: null + parameters: + vParameters: + geneFeatureToAlign: VTranscriptWithP + minSumScore: 60 + relativeMinScore: 0.97 + maxHits: 5 + edgeRealignmentMinScoreOverride: 35 + parameters: + type: kaligner2 + mapperNValue: 9 + mapperKValue: 1 + floatingLeftBound: false + floatingRightBound: true + mapperAbsoluteMinClusterScore: 250 + mapperExtraClusterScore: -38 + mapperMatchScore: 127 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 15 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 250 + mapperRelativeMinScore: 0.75 + mapperMinSeedsDistance: 6 + mapperMaxSeedsDistance: 6 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + dParameters: + geneFeatureToAlign: DRegionWithP + relativeMinScore: 0.85 + absoluteMinScore: 25.0 + maxHits: 3 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + jParameters: + geneFeatureToAlign: JRegionWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: false + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 95 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 4 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 100 + mapperRelativeMinScore: 0.8 + mapperMinSeedsDistance: 5 + mapperMaxSeedsDistance: 5 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + cParameters: + geneFeatureToAlign: CExon1 + minSumScore: 40 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner + mapperKValue: 5 + floatingLeftBound: false + floatingRightBound: false + mapperAbsoluteMinScore: 95.0 + mapperRelativeMinScore: 0.63 + mapperMatchScore: 128.0 + mapperMismatchPenalty: -0.1 + mapperOffsetShiftPenalty: -0.3 + mapperMinSeedsDistance: 8 + mapperMaxSeedsDistance: 18 + minAlignmentLength: 15 + maxAdjacentIndels: 2 + alignmentStopPenalty: -1000 + absoluteMinScore: 40.0 + relativeMinScore: 0.87 + maxHits: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + vjAlignmentOrder: VThenJ + libraryStructure: Unknown + includeDScore: false + includeCScore: false + minSumScore: 120.0 + relativeMinVFR3CDR3Score: 0.7 + allowPartialAlignments: true + allowNoCDR3PartAlignments: true + allowChimeras: false + readsLayout: Opposite + mergerParameters: + qualityMergingAlgorithm: MaxSubtraction + partsLayout: null + minimalOverlap: 13 + minimalMatchQualitySum: 364 + maxQuality: 50 + minimalIdentity: 0.7 + identityType: MinimalQualityWeighted + fixSeed: true + alignmentBoundaryTolerance: 5 + minChimeraDetectionScore: 120 + vjOverlapWindow: 3 + saveOriginalSequence: false + saveOriginalReads: false + smartForceEdgeAlignments: true +refineTagsAndSort: + whitelists: {} + runCorrection: true + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: + type: filter_groups + groupingKeys: + - allTags:Molecule + predicates: + - metrics: + - type: group_metric_sum_weight + reportHist: + log: true + binNumber: 0 + minBinWidth: 0.2 + multiplyWeightByKey: false + operator: + type: group_operator_lowest_threshold + operators: + - type: group_operator_advanced_thresholding + algo: + type: otsu + histTransformers: + - type: inflate + fraction: 0.15 + value: 1.0 + logX: true + minimalSample: 20 + fallbackThreshold: 1.0 + accept: High + - type: group_operator_cumtop + share: 0.85 + round: Down + accept: High + accept: High + expectedSorting: [] + requiredSequences: [] + expectedSorting: + - allTags:Molecule + requiredSequences: [] +assemblePartial: + overlappedOnly: false + dropPartial: false + cellLevel: false + parameters: + kValue: 12 + kOffset: -7 + minimalAssembleOverlap: 12 + minimalNOverlap: 7 + minimalNOverlapShare: 0.65 + minimalAlignmentMergeIdentity: 0.85 + mergerParameters: + qualityMergingAlgorithm: SumSubtraction + partsLayout: CollinearDirect + minimalOverlap: 20 + minimalMatchQualitySum: 0 + maxQuality: 45 + minimalIdentity: 0.95 + identityType: Unweighted + maxLeftParts: 256000 + maxLeftMatches: 6144 +extend: + vAnchor: CDR3Begin + jAnchor: FR4Begin + minimalVScore: 50 + minimalJScore: 50 +assemble: + sortBySequence: false + clnaOutput: false + cellLevel: false + consensusAssemblerParameters: + assembler: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -14 + minAlignmentScore: 40 + maxNormalizedAlignmentPenalty: 0.2 + trimMinimalSumQuality: 0 + trimReferenceRegion: false + maxQuality: 45 + maxIterations: 6 + minAltSeedQualityScore: 11 + minAltSeedNormalizedPenalty: 0.35 + altSeedPenaltyTolerance: 0.5 + minRecordSharePerConsensus: 0.2 + minRecordsPerConsensus: 0 + minRecursiveRecordShare: 0.4 + minQualityScore: 0 + maxConsensuses: 3 + minTagSuffixShare: 0.0 + isolateChains: false + cloneAssemblerParameters: + assemblingFeatures: + - CDR3 + minimalClonalSequenceLength: 12 + qualityAggregationType: BetaScore + cloneClusteringParameters: + searchDepth: 2 + allowedMutationsInNRegions: 1 + searchParameters: twoMismatchesOrIndels + clusteringFilter: + type: advanced + correctionPower: 0.001 + backgroundSubstitutionRate: 5.0E-4 + backgroundIndelRate: 2.0E-4 + cloneFactoryParameters: + vParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null + separateByV: false + separateByJ: false + separateByC: false + maximalPreClusteringRatio: 1.0 + preClusteringScoreFilteringRatio: 2.0 + preClusteringCountFilteringRatio: 2.0 + addReadsCountOnClustering: false + badQualityThreshold: 15 + maxBadPointsPercent: 0.7 + mappingThreshold: 2of5 + minimalQuality: 20 + postFilters: null + inferMinRecordsPerConsensus: true +assembleContigs: + ignoreTags: false + parameters: + branchingMinimalQualityShare: 0.1 + branchingMinimalSumQuality: 60 + decisiveBranchingSumQualityThreshold: 120 + alignedSequenceEdgeDelta: 3 + alignmentEdgeRegionSize: 7 + minimalNonEdgePointsFraction: 0.25 + minimalMeanNormalizedQuality: 5.0 + outputMinimalQualityShare: 0.75 + outputMinimalSumQuality: 0 + subCloningRegions: null + assemblingRegions: null + postFiltering: + type: NoFiltering + trimmingParameters: + averageQualityThreshold: 10.0 + windowSize: 8 + minimalContigLength: 20 + alignedRegionsOnly: false + discardAmbiguousNucleotideCalls: false +exportAlignments: + chains: ALL + noHeader: false + fields: + - field: -readIds + - field: -tags + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + - field: -topChains +exportClones: + splitByTagType: null + filterOutOfFrames: false + filterStops: false + chains: ALL + noHeader: false + fields: + - field: -cloneId + - field: -readCount + - field: -readFraction + - field: -uniqueTagCount + args: + - Molecule + - field: -uniqueTagFraction + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + splitFilesBy: + - chain + groupClonesBy: [] diff --git a/regression/presets/list.txt b/regression/presets/list.txt index e2d2ae7f6..c4795e716 100644 --- a/regression/presets/list.txt +++ b/regression/presets/list.txt @@ -7,6 +7,12 @@ generic-ont (Oxford Nanopore data) Required args: --species +rna-seq-with-umi (RNA-Seq data with UMI barcodes) + Required args: + --species + --tag-pattern + (--dna|--rna) + generic-pacbio-with-umi (PacBio data with UMIs) Required args: --species