Skip to content

Commit

Permalink
feat: pass mutations difference from reference node to QC
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov committed Aug 14, 2020
1 parent 37cb3b3 commit 48415c1
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 32 deletions.
3 changes: 2 additions & 1 deletion packages/web/src/algorithms/QC/ruleDivergence.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { AnalysisResult } from 'src/algorithms/types'
import type { AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types'

export interface QCRulesConfigDivergence {
divergenceMean: number
Expand All @@ -8,6 +8,7 @@ export interface QCRulesConfigDivergence {

export function ruleDivergence(
{ substitutions, insertions, deletions }: AnalysisResult,
mutationsDiff: NucleotideSubstitution[],
{ divergenceMean, divergenceStd, nStd }: QCRulesConfigDivergence,
) {
const totalNumberOfMutations =
Expand Down
3 changes: 2 additions & 1 deletion packages/web/src/algorithms/QC/ruleMissingData.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { clamp } from 'lodash'

import type { AnalysisResult } from 'src/algorithms/types'
import type { AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types'

export interface QCRulesConfigMissingData {
missingDataThreshold: number
Expand All @@ -11,6 +11,7 @@ export interface QCRulesConfigMissingData {

export function ruleMissingData(
{ nucleotideComposition }: AnalysisResult,
_1: NucleotideSubstitution[],
{ missingDataThreshold, scoreWeight, scoreBias, scoreMax }: QCRulesConfigMissingData,
) {
const totalMissing = nucleotideComposition.N ?? 0
Expand Down
3 changes: 2 additions & 1 deletion packages/web/src/algorithms/QC/ruleMixedSites.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { clamp } from 'lodash'

import type { AnalysisResult } from 'src/algorithms/types'
import type { AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types'

export interface QCRulesConfigMixedSites {
mixedSitesThreshold: number
Expand All @@ -11,6 +11,7 @@ export interface QCRulesConfigMixedSites {

export function ruleMixedSites(
{ nucleotideComposition }: AnalysisResult,
_1: NucleotideSubstitution[],
{ mixedSitesThreshold, scoreWeight, scoreBias, scoreMax }: QCRulesConfigMixedSites,
) {
const goodBases = new Set(['A', 'C', 'G', 'T', 'N', '-'])
Expand Down
11 changes: 8 additions & 3 deletions packages/web/src/algorithms/QC/ruleSnpClusters.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { clamp } from 'lodash'

import type { AnalysisResult, ClusteredSNPs } from 'src/algorithms/types'
import type { AnalysisResult, ClusteredSNPs, NucleotideSubstitution } from 'src/algorithms/types'

export function findSNPClusters(
{ substitutions }: AnalysisResult,
mutationsDiff: NucleotideSubstitution[],
{ knownClusters, windowSize, clusterCutOff }: QCRulesConfigSNPClusters,
) {
// turn mutation keys into positions, exclude known clusters, and sort
Expand Down Expand Up @@ -65,10 +66,14 @@ export interface QCRulesConfigSNPClusters {
scoreMax: number
}

export function ruleSnpClusters(data: AnalysisResult, config: QCRulesConfigSNPClusters) {
export function ruleSnpClusters(
data: AnalysisResult,
mutationsDiff: NucleotideSubstitution[],
config: QCRulesConfigSNPClusters,
) {
const { totalSNPsThreshold, scoreWeight, scoreBias, scoreMax } = config

const snpClusters = findSNPClusters(data, config)
const snpClusters = findSNPClusters(data, mutationsDiff, config)
const clusteredSNPs = processSNPClusters(snpClusters)
const totalSNPs = clusteredSNPs.reduce((acc, { numberOfSNPs }) => acc + numberOfSNPs, 0)

Expand Down
26 changes: 15 additions & 11 deletions packages/web/src/algorithms/QC/runQC.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ import { merge } from 'lodash'

import { DeepPartial } from 'ts-essentials'

import { AuspiceJsonV2 } from 'auspice'

import type { AnalysisResult } from 'src/algorithms/types'
import type { AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types'

import { ruleMissingData, QCRulesConfigMissingData, QCResultMissingData } from './ruleMissingData'
import { ruleMixedSites, QCRulesConfigMixedSites, QCResultMixedSites } from './ruleMixedSites'
Expand Down Expand Up @@ -67,30 +65,36 @@ export interface QCResult {
mixedSites?: QCResultMixedSites
}

export type Rule<Conf, Ret> = (analysisResult: AnalysisResult, config: Conf) => Ret
export type Rule<Conf, Ret> = (
analysisResult: AnalysisResult,
mutationsDiff: NucleotideSubstitution[],
config: Conf,
) => Ret

export function runOne<Conf extends Enableable<unknown>, Ret>(
rule: Rule<Conf, Ret>,
analysisResult: AnalysisResult,
mutationsDiff: NucleotideSubstitution[],
config: Conf,
): Ret | undefined {
return config.enabled ? rule(analysisResult, config) : undefined
return config.enabled ? rule(analysisResult, mutationsDiff, config) : undefined
}

export interface RunQCParams {
analysisResult: AnalysisResult
auspiceData: AuspiceJsonV2
mutationsDiff: NucleotideSubstitution[]
qcRulesConfig: DeepPartial<QCRulesConfig>
}

export function runQC({ analysisResult, auspiceData, qcRulesConfig }: RunQCParams): QCResult {
export function runQC({ analysisResult, mutationsDiff, qcRulesConfig }: RunQCParams): QCResult {
// TODO: set initial state to default object in redux store instead of merging objects here every time
const configs: QCRulesConfig = merge(qcRulesConfigDefault, qcRulesConfig)

const result = {
divergence: runOne(ruleDivergence, analysisResult, configs.divergence),
missingData: runOne(ruleMissingData, analysisResult, configs.missingData),
snpClusters: runOne(ruleSnpClusters, analysisResult, configs.snpClusters),
mixedSites: runOne(ruleMixedSites, analysisResult, configs.mixedSites),
divergence: runOne(ruleDivergence, analysisResult, mutationsDiff, configs.divergence),
missingData: runOne(ruleMissingData, analysisResult, mutationsDiff, configs.missingData),
snpClusters: runOne(ruleSnpClusters, analysisResult, mutationsDiff, configs.snpClusters),
mixedSites: runOne(ruleMixedSites, analysisResult, mutationsDiff, configs.mixedSites),
}

const score = Object.values(result).reduce((acc, r) => acc + (r?.score ?? 0), 0)
Expand Down
37 changes: 27 additions & 10 deletions packages/web/src/algorithms/tree/locateInTree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@ import { cloneDeep, groupBy, set, mapValues, unset, zip } from 'lodash'

import type { AuspiceJsonV2, AuspiceTreeNode } from 'auspice'

import type { Nucleotide, AnalysisResult, NucleotideSubstitution } from 'src/algorithms/types'
import { formatAAMutationWithoutGene, formatMutation } from 'src/helpers/formatMutation'
import { parseMutation } from 'src/helpers/parseMutation'

import type { Nucleotide, AnalysisResult } from 'src/algorithms/types'
import { notUndefined } from 'src/helpers/notUndefined'
import { formatClades } from 'src/helpers/formatClades'

import auspiceDataRaw from 'src/assets/data/ncov_small.json'
import { formatRange } from 'src/helpers/formatRange'
import { UNKNOWN_VALUE } from 'src/constants'

import auspiceDataRaw from 'src/assets/data/ncov_small.json'

export type MutationMap = Map<number, Nucleotide>

export enum NodeType {
Expand Down Expand Up @@ -150,6 +148,12 @@ export function calculate_distance(node: AuspiceTreeNodeExtended, seq: AnalysisR
return numMut + seq.substitutions.length - 2 * shared_differences - shared_sites - undetermined_sites
}

/* Find mutations that are present in the new sequence, but not present in the matching reference node sequence */
export function findMutDiff(node: AuspiceTreeNodeExtended, seq: AnalysisResult, root_seq: string) {
const nodeMuts: [number, Nucleotide][] = Array.from(node.mutations?.entries() ?? [])
return seq.substitutions.filter((qmut) => nodeMuts.some(([pos, nuc]) => pos === qmut.pos && nuc === qmut.queryNuc))
}

export function get_differences(node: AuspiceTreeNodeExtended, seq: AnalysisResult, root_seq: string) {
const nucMutations: string[] = []
let aminoacidMutationEntries: { gene: string; aaMut: string }[] = []
Expand Down Expand Up @@ -300,21 +304,21 @@ export function addColoringScale({ auspiceData, key, value, color }: AddColoring
}

export interface LocateInTreeParams {
analysisResults: (AnalysisResult | undefined)[]
analysisResults: AnalysisResult[]
rootSeq: string
}

export interface LocateInTreeResults {
matches: AuspiceTreeNodeExtended[]
mutationsDiffs: NucleotideSubstitution[][]
auspiceData: AuspiceJsonV2
}

export function locateInTree({
analysisResults: analysisResultsRaw,
rootSeq,
}: LocateInTreeParams): LocateInTreeResults {
const succeeded = analysisResultsRaw.filter(notUndefined)
const analysisResults = cloneDeep(succeeded)
const analysisResults = cloneDeep(analysisResultsRaw)
const auspiceData = (cloneDeep(auspiceDataRaw) as unknown) as AuspiceJsonV2 // TODO: validate and sanitize

const auspiceTreeVersionExpected = 'v2'
Expand All @@ -330,13 +334,22 @@ export function locateInTree({
throw new Error(`Tree format not recognized: ".tree" is undefined`)
}

// TODO: this can be done offline when preparing the json
setNodeTypes(focal_node)

const mutations = new Map()
mutations_on_tree(focal_node, mutations)

const matches = analysisResults.map((seq) => closest_match(focal_node, seq).best_node)
return { matches, auspiceData }
const matchesAndDiffs = analysisResults.map((seq) => {
const match = closest_match(focal_node, seq).best_node
const diff = findMutDiff(match, seq, rootSeq)
return { match, diff }
})

const matches = matchesAndDiffs.map((matchAndDiff) => matchAndDiff.match)
const mutationsDiffs = matchesAndDiffs.map((matchAndDiff) => matchAndDiff.diff)

return { matches, mutationsDiffs, auspiceData }
}

export interface FinalizeTreeParams {
Expand Down Expand Up @@ -375,6 +388,7 @@ export function finalizeTree({ auspiceData, results, matches, rootSeq }: Finaliz
auspiceData.meta = { colorings: [], display_defaults: {} }
}

// TODO: this can be done offline when preparing the json
auspiceData.meta.colorings.unshift({
key: 'QC Status',
title: 'QC Status',
Expand All @@ -385,6 +399,7 @@ export function finalizeTree({ auspiceData, results, matches, rootSeq }: Finaliz
],
})

// TODO: this can be done offline when preparing the json
auspiceData.meta.colorings.unshift({
key: 'Node type',
title: 'Node type',
Expand All @@ -395,10 +410,12 @@ export function finalizeTree({ auspiceData, results, matches, rootSeq }: Finaliz
],
})

// TODO: this can be done offline when preparing the json
addColoringScale({ auspiceData, key: 'region', value: UNKNOWN_VALUE, color: '#999999' })
addColoringScale({ auspiceData, key: 'country', value: UNKNOWN_VALUE, color: '#999999' })
addColoringScale({ auspiceData, key: 'division', value: UNKNOWN_VALUE, color: '#999999' })

// TODO: this can be done offline when preparing the json
auspiceData.meta.display_defaults = {
branch_label: 'clade',
color_by: 'Node type',
Expand Down
13 changes: 13 additions & 0 deletions packages/web/src/helpers/safeZip.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { zip } from 'lodash'

export function safeZip<T, U>(first: T[], second: U[]) {
const firstLen = first.length
const secondLen = second.length
if (first.length === second.length) {
throw new Error(
`safeZip: expected zipped arrays to be of equal length, but got arrays of lengths ${firstLen} and ${secondLen}`,
)
}

return zip(first, second) as [T, U][]
}
18 changes: 13 additions & 5 deletions packages/web/src/state/algorithm/algorithm.sagas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import type { TreeBuildThread } from 'src/workers/worker.treeBuild'
import type { RunQcThread } from 'src/workers/worker.runQc'
import type { TreeFinalizeThread } from 'src/workers/worker.treeFinalize'

import { safeZip } from 'src/helpers/safeZip'
import { notUndefined } from 'src/helpers/notUndefined'
import { sanitizeError } from 'src/helpers/sanitizeError'
import fsaSaga from 'src/state/util/fsaSaga'
Expand Down Expand Up @@ -74,8 +75,13 @@ export interface ScheduleQcRunParams extends RunQCParams {
poolRunQc: Pool<RunQcThread>
}

export async function scheduleOneQcRun({ poolRunQc, analysisResult, auspiceData, qcRulesConfig }: ScheduleQcRunParams) {
return poolRunQc.queue(async (runQc: RunQcThread) => runQc({ analysisResult, auspiceData, qcRulesConfig }))
export async function scheduleOneQcRun({
poolRunQc,
analysisResult,
mutationsDiff,
qcRulesConfig,
}: ScheduleQcRunParams) {
return poolRunQc.queue(async (runQc: RunQcThread) => runQc({ analysisResult, mutationsDiff, qcRulesConfig }))
}

export function* runQcOne(params: ScheduleQcRunParams) {
Expand Down Expand Up @@ -195,8 +201,9 @@ export function* runAlgorithm(content?: File | string) {
return undefined
}

const { matches, auspiceData: auspiceDataRaw } = treeBuildResult
const { matches, mutationsDiffs, auspiceData: auspiceDataRaw } = treeBuildResult

// TODO: move this to user-controlled state
const qcRulesConfig: DeepPartial<QCRulesConfig> = {
divergence: {},
missingData: {},
Expand All @@ -205,9 +212,10 @@ export function* runAlgorithm(content?: File | string) {
}

yield* put(setAlgorithmGlobalStatus(AlgorithmGlobalStatus.qc))
const resultsAndDiffs = safeZip(analysisResults, mutationsDiffs)
const qcResults = yield* all(
analysisResults.map((analysisResult) =>
call(runQcOne, { poolRunQc, analysisResult, auspiceData: auspiceDataRaw, qcRulesConfig }),
resultsAndDiffs.map(([analysisResult, mutationsDiff]) =>
call(runQcOne, { poolRunQc, analysisResult, mutationsDiff, qcRulesConfig }),
),
)

Expand Down

0 comments on commit 48415c1

Please sign in to comment.