Skip to content

Commit

Permalink
fixes #55
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Jun 30, 2020
1 parent 89fcb12 commit 3e0b840
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ v0.2.11 (dev)
=============
+ more informative error message on bad sample name (#53)
+ allow setting SOMALIER_AB_HOM_CUTOFF to change which calls are considered hom-ref (#56)
+ adjust (fix) relatedness calculation which was off when the number of shared sites was low (#55).
many thanks to @fgvieira who found and diagnosed this problem.

v0.2.10
=======
Expand Down
12 changes: 10 additions & 2 deletions src/somalierpkg/bitset.nim
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,28 @@ proc set*(b:var bitset, i:SomeInteger) {.inline.} =
b[idx] = b[idx] or (1'u64 shl (i mod 64))

type genotypes* = tuple[hom_ref:bitset, het:bitset, hom_alt:bitset]
type IBSResult* = tuple[IBS0: int32, IBS2:int32, N:int32, shared_hets:int32, shared_hom_alts:int32]
type IBSResult* = tuple[IBS0: int32, IBS2:int32, N:int32, shared_hets:int32, shared_hom_alts:int32, het_ab:int32]

proc IBS*(a: genotypes, b: genotypes): IBSResult =
doAssert a.hom_ref.len == b.hom_ref.len
var sh_het:uint64 # shared het
var sh_ha:uint64 # shared hom-alt
var n:uint64
for k in 0..a.hom_ref.high:
result.IBS0 += countSetBits((a.hom_ref[k] and b.hom_alt[k]) or (a.hom_alt[k] and b.hom_ref[k])).int32
sh_het = a.het[k] and b.het[k]
sh_ha = a.hom_alt[k] and b.hom_alt[k]
result.IBS2 += countSetBits(sh_ha or sh_het or (a.hom_ref[k] and b.hom_ref[k])).int32
result.shared_hets += countSetBits(sh_het).int32
result.shared_hom_alts += countSetBits(sh_ha).int32
result.N += countSetBits((a.hom_ref[k] or a.het[k] or a.hom_alt[k]) and (b.hom_ref[k] or b.het[k] or b.hom_alt[k])).int32
n = (a.hom_ref[k] or a.het[k] or a.hom_alt[k]) and (b.hom_ref[k] or b.het[k] or b.hom_alt[k])
# het_ab is count of sites that are het in either sample and not unknown in
# the other sample. see equation 9 and 7 from king paper (PMID: 20926424) where they
# state:
# where NAa,Aa, NAa(i) and NAa(j) are the total numbers of SNPs at which both individuals of the pair are heterozygous, and the total number of heterozygotes for the i-th and j-th individual, respectively, excluding those SNPs with missing genotypes in either individual of the pair.
# Here, NAa(i) and NAa(j) are a.het[k] and b.het[k].
result.het_ab += (countSetBits(b.het[k] and n) + countSetBits(a.het[k] and n)).int32
result.N += countSetBits(n).int32

proc XIBS*(a: genotypes, b: genotypes): IBSResult =
doAssert a.hom_ref.len == b.hom_ref.len
Expand Down
5 changes: 4 additions & 1 deletion src/somalierpkg/relate.nim
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type relation = object
hom_alts_b: uint16
shared_hom_alts: uint16
shared_hets: uint16
het_ab: uint16
ibs0: uint16
ibs2: uint16
x_ibs0: uint16
Expand Down Expand Up @@ -85,7 +86,7 @@ proc hom_alt_concordance(r: relation): float64 {.inline.} =
return (r.shared_hom_alts.float64 - 2 * r.ibs0.float64) / max(1'u16, min(r.hom_alts_a, r.hom_alts_b)).float64

proc rel(r:relation): float64 {.inline.} =
return (r.shared_hets.float64 - 2 * r.ibs0.float64) / min(r.hets_a, r.hets_b).float64
return 2 * (r.shared_hets.float64 - 2 * r.ibs0.float64) / r.het_ab.float64

proc add*(rt:var seq[relations], rel:relation, expected_relatedness:float) =

Expand Down Expand Up @@ -247,6 +248,7 @@ proc relatedness(r: var relation_matrices, j: int, k:int): relation {.inline.} =
r.n[j * r.n_samples + k] = ir.N.uint16
r.n[k * r.n_samples + j] = ir.IBS2.uint16
r.shared_hom_alts[j * r.n_samples + k] = ir.shared_hom_alts.uint16
r.shared_hom_alts[k * r.n_samples + j] = ir.het_ab.uint16

let xir = r.x_genotypes[j].XIBS(r.x_genotypes[k])
r.x[j * r.n_samples + k] = xir.IBS0.uint16
Expand All @@ -261,6 +263,7 @@ proc relatedness(r: var relation_matrices, j: int, k:int): relation {.inline.} =
shared_hom_alts: ir.shared_hom_alts.uint16,
ibs2: ir.IBS2.uint16,
n: ir.N.uint16,
het_ab: ir.het_ab.uint16,
x_ibs0: xir.IBS0.uint16,
x_ibs2: xir.IBS2.uint16,
)
Expand Down

0 comments on commit 3e0b840

Please sign in to comment.