From 48415c1ae995624734485b5c2dab394c15dbdd14 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Fri, 14 Aug 2020 09:46:07 +0200 Subject: [PATCH] feat: pass mutations difference from reference node to QC --- .../web/src/algorithms/QC/ruleDivergence.ts | 3 +- .../web/src/algorithms/QC/ruleMissingData.ts | 3 +- .../web/src/algorithms/QC/ruleMixedSites.ts | 3 +- .../web/src/algorithms/QC/ruleSnpClusters.ts | 11 ++++-- packages/web/src/algorithms/QC/runQC.ts | 26 +++++++------ .../web/src/algorithms/tree/locateInTree.ts | 37 ++++++++++++++----- packages/web/src/helpers/safeZip.ts | 13 +++++++ .../src/state/algorithm/algorithm.sagas.ts | 18 ++++++--- 8 files changed, 82 insertions(+), 32 deletions(-) create mode 100644 packages/web/src/helpers/safeZip.ts diff --git a/packages/web/src/algorithms/QC/ruleDivergence.ts b/packages/web/src/algorithms/QC/ruleDivergence.ts index 30bf2449d..e0d6bd5b2 100644 --- a/packages/web/src/algorithms/QC/ruleDivergence.ts +++ b/packages/web/src/algorithms/QC/ruleDivergence.ts @@ -1,4 +1,4 @@ -import type { AnalysisResult } from 'src/algorithms/types' +import type { AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types' export interface QCRulesConfigDivergence { divergenceMean: number @@ -8,6 +8,7 @@ export interface QCRulesConfigDivergence { export function ruleDivergence( { substitutions, insertions, deletions }: AnalysisResult, + mutationsDiff: NucleotideSubstitution[], { divergenceMean, divergenceStd, nStd }: QCRulesConfigDivergence, ) { const totalNumberOfMutations = diff --git a/packages/web/src/algorithms/QC/ruleMissingData.ts b/packages/web/src/algorithms/QC/ruleMissingData.ts index 721b1c012..b736bd554 100644 --- a/packages/web/src/algorithms/QC/ruleMissingData.ts +++ b/packages/web/src/algorithms/QC/ruleMissingData.ts @@ -1,6 +1,6 @@ import { clamp } from 'lodash' -import type { AnalysisResult } from 'src/algorithms/types' +import type { AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types' export interface QCRulesConfigMissingData { missingDataThreshold: number @@ -11,6 +11,7 @@ export interface QCRulesConfigMissingData { export function ruleMissingData( { nucleotideComposition }: AnalysisResult, + _1: NucleotideSubstitution[], { missingDataThreshold, scoreWeight, scoreBias, scoreMax }: QCRulesConfigMissingData, ) { const totalMissing = nucleotideComposition.N ?? 0 diff --git a/packages/web/src/algorithms/QC/ruleMixedSites.ts b/packages/web/src/algorithms/QC/ruleMixedSites.ts index c985f206b..b73f47a3d 100644 --- a/packages/web/src/algorithms/QC/ruleMixedSites.ts +++ b/packages/web/src/algorithms/QC/ruleMixedSites.ts @@ -1,6 +1,6 @@ import { clamp } from 'lodash' -import type { AnalysisResult } from 'src/algorithms/types' +import type { AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types' export interface QCRulesConfigMixedSites { mixedSitesThreshold: number @@ -11,6 +11,7 @@ export interface QCRulesConfigMixedSites { export function ruleMixedSites( { nucleotideComposition }: AnalysisResult, + _1: NucleotideSubstitution[], { mixedSitesThreshold, scoreWeight, scoreBias, scoreMax }: QCRulesConfigMixedSites, ) { const goodBases = new Set(['A', 'C', 'G', 'T', 'N', '-']) diff --git a/packages/web/src/algorithms/QC/ruleSnpClusters.ts b/packages/web/src/algorithms/QC/ruleSnpClusters.ts index 907269538..46073b945 100644 --- a/packages/web/src/algorithms/QC/ruleSnpClusters.ts +++ b/packages/web/src/algorithms/QC/ruleSnpClusters.ts @@ -1,9 +1,10 @@ import { clamp } from 'lodash' -import type { AnalysisResult, ClusteredSNPs } from 'src/algorithms/types' +import type { AnalysisResult, ClusteredSNPs, NucleotideSubstitution } from 'src/algorithms/types' export function findSNPClusters( { substitutions }: AnalysisResult, + mutationsDiff: NucleotideSubstitution[], { knownClusters, windowSize, clusterCutOff }: QCRulesConfigSNPClusters, ) { // turn mutation keys into positions, exclude known clusters, and sort @@ -65,10 +66,14 @@ export interface QCRulesConfigSNPClusters { scoreMax: number } -export function ruleSnpClusters(data: AnalysisResult, config: QCRulesConfigSNPClusters) { +export function ruleSnpClusters( + data: AnalysisResult, + mutationsDiff: NucleotideSubstitution[], + config: QCRulesConfigSNPClusters, +) { const { totalSNPsThreshold, scoreWeight, scoreBias, scoreMax } = config - const snpClusters = findSNPClusters(data, config) + const snpClusters = findSNPClusters(data, mutationsDiff, config) const clusteredSNPs = processSNPClusters(snpClusters) const totalSNPs = clusteredSNPs.reduce((acc, { numberOfSNPs }) => acc + numberOfSNPs, 0) diff --git a/packages/web/src/algorithms/QC/runQC.ts b/packages/web/src/algorithms/QC/runQC.ts index bd0ea1ebf..3be6747ec 100644 --- a/packages/web/src/algorithms/QC/runQC.ts +++ b/packages/web/src/algorithms/QC/runQC.ts @@ -2,9 +2,7 @@ import { merge } from 'lodash' import { DeepPartial } from 'ts-essentials' -import { AuspiceJsonV2 } from 'auspice' - -import type { AnalysisResult } from 'src/algorithms/types' +import type { AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types' import { ruleMissingData, QCRulesConfigMissingData, QCResultMissingData } from './ruleMissingData' import { ruleMixedSites, QCRulesConfigMixedSites, QCResultMixedSites } from './ruleMixedSites' @@ -67,30 +65,36 @@ export interface QCResult { mixedSites?: QCResultMixedSites } -export type Rule = (analysisResult: AnalysisResult, config: Conf) => Ret +export type Rule = ( + analysisResult: AnalysisResult, + mutationsDiff: NucleotideSubstitution[], + config: Conf, +) => Ret export function runOne, Ret>( rule: Rule, analysisResult: AnalysisResult, + mutationsDiff: NucleotideSubstitution[], config: Conf, ): Ret | undefined { - return config.enabled ? rule(analysisResult, config) : undefined + return config.enabled ? rule(analysisResult, mutationsDiff, config) : undefined } export interface RunQCParams { analysisResult: AnalysisResult - auspiceData: AuspiceJsonV2 + mutationsDiff: NucleotideSubstitution[] qcRulesConfig: DeepPartial } -export function runQC({ analysisResult, auspiceData, qcRulesConfig }: RunQCParams): QCResult { +export function runQC({ analysisResult, mutationsDiff, qcRulesConfig }: RunQCParams): QCResult { + // TODO: set initial state to default object in redux store instead of merging objects here every time const configs: QCRulesConfig = merge(qcRulesConfigDefault, qcRulesConfig) const result = { - divergence: runOne(ruleDivergence, analysisResult, configs.divergence), - missingData: runOne(ruleMissingData, analysisResult, configs.missingData), - snpClusters: runOne(ruleSnpClusters, analysisResult, configs.snpClusters), - mixedSites: runOne(ruleMixedSites, analysisResult, configs.mixedSites), + divergence: runOne(ruleDivergence, analysisResult, mutationsDiff, configs.divergence), + missingData: runOne(ruleMissingData, analysisResult, mutationsDiff, configs.missingData), + snpClusters: runOne(ruleSnpClusters, analysisResult, mutationsDiff, configs.snpClusters), + mixedSites: runOne(ruleMixedSites, analysisResult, mutationsDiff, configs.mixedSites), } const score = Object.values(result).reduce((acc, r) => acc + (r?.score ?? 0), 0) diff --git a/packages/web/src/algorithms/tree/locateInTree.ts b/packages/web/src/algorithms/tree/locateInTree.ts index 7f253c1e4..8b7e98b57 100644 --- a/packages/web/src/algorithms/tree/locateInTree.ts +++ b/packages/web/src/algorithms/tree/locateInTree.ts @@ -3,17 +3,15 @@ import { cloneDeep, groupBy, set, mapValues, unset, zip } from 'lodash' import type { AuspiceJsonV2, AuspiceTreeNode } from 'auspice' +import type { Nucleotide, AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types' import { formatAAMutationWithoutGene, formatMutation } from 'src/helpers/formatMutation' import { parseMutation } from 'src/helpers/parseMutation' - -import type { Nucleotide, AnalysisResult } from 'src/algorithms/types' -import { notUndefined } from 'src/helpers/notUndefined' import { formatClades } from 'src/helpers/formatClades' - -import auspiceDataRaw from 'src/assets/data/ncov_small.json' import { formatRange } from 'src/helpers/formatRange' import { UNKNOWN_VALUE } from 'src/constants' +import auspiceDataRaw from 'src/assets/data/ncov_small.json' + export type MutationMap = Map export enum NodeType { @@ -150,6 +148,12 @@ export function calculate_distance(node: AuspiceTreeNodeExtended, seq: AnalysisR return numMut + seq.substitutions.length - 2 * shared_differences - shared_sites - undetermined_sites } +/* Find mutations that are present in the new sequence, but not present in the matching reference node sequence */ +export function findMutDiff(node: AuspiceTreeNodeExtended, seq: AnalysisResult, root_seq: string) { + const nodeMuts: [number, Nucleotide][] = Array.from(node.mutations?.entries() ?? []) + return seq.substitutions.filter((qmut) => nodeMuts.some(([pos, nuc]) => pos === qmut.pos && nuc === qmut.queryNuc)) +} + export function get_differences(node: AuspiceTreeNodeExtended, seq: AnalysisResult, root_seq: string) { const nucMutations: string[] = [] let aminoacidMutationEntries: { gene: string; aaMut: string }[] = [] @@ -300,12 +304,13 @@ export function addColoringScale({ auspiceData, key, value, color }: AddColoring } export interface LocateInTreeParams { - analysisResults: (AnalysisResult | undefined)[] + analysisResults: AnalysisResult[] rootSeq: string } export interface LocateInTreeResults { matches: AuspiceTreeNodeExtended[] + mutationsDiffs: NucleotideSubstitution[][] auspiceData: AuspiceJsonV2 } @@ -313,8 +318,7 @@ export function locateInTree({ analysisResults: analysisResultsRaw, rootSeq, }: LocateInTreeParams): LocateInTreeResults { - const succeeded = analysisResultsRaw.filter(notUndefined) - const analysisResults = cloneDeep(succeeded) + const analysisResults = cloneDeep(analysisResultsRaw) const auspiceData = (cloneDeep(auspiceDataRaw) as unknown) as AuspiceJsonV2 // TODO: validate and sanitize const auspiceTreeVersionExpected = 'v2' @@ -330,13 +334,22 @@ export function locateInTree({ throw new Error(`Tree format not recognized: ".tree" is undefined`) } + // TODO: this can be done offline when preparing the json setNodeTypes(focal_node) const mutations = new Map() mutations_on_tree(focal_node, mutations) - const matches = analysisResults.map((seq) => closest_match(focal_node, seq).best_node) - return { matches, auspiceData } + const matchesAndDiffs = analysisResults.map((seq) => { + const match = closest_match(focal_node, seq).best_node + const diff = findMutDiff(match, seq, rootSeq) + return { match, diff } + }) + + const matches = matchesAndDiffs.map((matchAndDiff) => matchAndDiff.match) + const mutationsDiffs = matchesAndDiffs.map((matchAndDiff) => matchAndDiff.diff) + + return { matches, mutationsDiffs, auspiceData } } export interface FinalizeTreeParams { @@ -375,6 +388,7 @@ export function finalizeTree({ auspiceData, results, matches, rootSeq }: Finaliz auspiceData.meta = { colorings: [], display_defaults: {} } } + // TODO: this can be done offline when preparing the json auspiceData.meta.colorings.unshift({ key: 'QC Status', title: 'QC Status', @@ -385,6 +399,7 @@ export function finalizeTree({ auspiceData, results, matches, rootSeq }: Finaliz ], }) + // TODO: this can be done offline when preparing the json auspiceData.meta.colorings.unshift({ key: 'Node type', title: 'Node type', @@ -395,10 +410,12 @@ export function finalizeTree({ auspiceData, results, matches, rootSeq }: Finaliz ], }) + // TODO: this can be done offline when preparing the json addColoringScale({ auspiceData, key: 'region', value: UNKNOWN_VALUE, color: '#999999' }) addColoringScale({ auspiceData, key: 'country', value: UNKNOWN_VALUE, color: '#999999' }) addColoringScale({ auspiceData, key: 'division', value: UNKNOWN_VALUE, color: '#999999' }) + // TODO: this can be done offline when preparing the json auspiceData.meta.display_defaults = { branch_label: 'clade', color_by: 'Node type', diff --git a/packages/web/src/helpers/safeZip.ts b/packages/web/src/helpers/safeZip.ts new file mode 100644 index 000000000..805da9d4b --- /dev/null +++ b/packages/web/src/helpers/safeZip.ts @@ -0,0 +1,13 @@ +import { zip } from 'lodash' + +export function safeZip(first: T[], second: U[]) { + const firstLen = first.length + const secondLen = second.length + if (first.length === second.length) { + throw new Error( + `safeZip: expected zipped arrays to be of equal length, but got arrays of lengths ${firstLen} and ${secondLen}`, + ) + } + + return zip(first, second) as [T, U][] +} diff --git a/packages/web/src/state/algorithm/algorithm.sagas.ts b/packages/web/src/state/algorithm/algorithm.sagas.ts index 682d0be0f..df23ef388 100644 --- a/packages/web/src/state/algorithm/algorithm.sagas.ts +++ b/packages/web/src/state/algorithm/algorithm.sagas.ts @@ -16,6 +16,7 @@ import type { TreeBuildThread } from 'src/workers/worker.treeBuild' import type { RunQcThread } from 'src/workers/worker.runQc' import type { TreeFinalizeThread } from 'src/workers/worker.treeFinalize' +import { safeZip } from 'src/helpers/safeZip' import { notUndefined } from 'src/helpers/notUndefined' import { sanitizeError } from 'src/helpers/sanitizeError' import fsaSaga from 'src/state/util/fsaSaga' @@ -74,8 +75,13 @@ export interface ScheduleQcRunParams extends RunQCParams { poolRunQc: Pool } -export async function scheduleOneQcRun({ poolRunQc, analysisResult, auspiceData, qcRulesConfig }: ScheduleQcRunParams) { - return poolRunQc.queue(async (runQc: RunQcThread) => runQc({ analysisResult, auspiceData, qcRulesConfig })) +export async function scheduleOneQcRun({ + poolRunQc, + analysisResult, + mutationsDiff, + qcRulesConfig, +}: ScheduleQcRunParams) { + return poolRunQc.queue(async (runQc: RunQcThread) => runQc({ analysisResult, mutationsDiff, qcRulesConfig })) } export function* runQcOne(params: ScheduleQcRunParams) { @@ -195,8 +201,9 @@ export function* runAlgorithm(content?: File | string) { return undefined } - const { matches, auspiceData: auspiceDataRaw } = treeBuildResult + const { matches, mutationsDiffs, auspiceData: auspiceDataRaw } = treeBuildResult + // TODO: move this to user-controlled state const qcRulesConfig: DeepPartial = { divergence: {}, missingData: {}, @@ -205,9 +212,10 @@ export function* runAlgorithm(content?: File | string) { } yield* put(setAlgorithmGlobalStatus(AlgorithmGlobalStatus.qc)) + const resultsAndDiffs = safeZip(analysisResults, mutationsDiffs) const qcResults = yield* all( - analysisResults.map((analysisResult) => - call(runQcOne, { poolRunQc, analysisResult, auspiceData: auspiceDataRaw, qcRulesConfig }), + resultsAndDiffs.map(([analysisResult, mutationsDiff]) => + call(runQcOne, { poolRunQc, analysisResult, mutationsDiff, qcRulesConfig }), ), )