Skip to content

Commit

Permalink
get sample name from bam
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Sep 17, 2018
1 parent d88c80a commit a93cf2f
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 17 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
v0.0.3
======
+ fix bug when annotatign multi-sample vcf (brentp/duphold#2)
+ get sample name from bam read group info. **NOTE** that this changes the command line parameters by removing the `--sample` argument.
4 changes: 2 additions & 2 deletions duphold.nimble
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Package

version = "0.0.2"
version = "0.0.3"
author = "Brent Pedersen"
description = "find depth support for DUP/DEL/CNV calls that use PE/SR"
license = "MIT"


# Dependencies

requires "docopt#0abba63", "genoiser >= 0.2.1"
requires "docopt#0abba63", "genoiser >= 0.2.1", "hts >= 0.2.4"
srcDir = "src"

bin = @["duphold"]
Expand Down
27 changes: 12 additions & 15 deletions src/duphold.nim
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,6 @@ proc add_stats(variant:Variant, values:var seq[int32], sample_i: int, stats:Stat

var floats = newSeq[float32](variant.vcf.n_samples)

#get_or_empty(variant, "DHZ", floats)
#var z = (local_stats.m - stats.m) / sqrt(stats.S/stats.n.float64)
#floats[sample_i] = z.float32
#if variant.format.set("DHZ", floats) != Status.OK:
# quit "error setting DHZ in VCF"

get_or_empty(variant, "DHFC", floats)
var fc = local_stats.m / stats.m
floats[sample_i] = fc.float32
Expand Down Expand Up @@ -278,6 +272,15 @@ iterator duphold*(bam:Bam, vcf:VCF, fai:Fai, sample_i:int, step:int=STEP): Varia
variant.add_stats(depths.values, sample_i, stats, gc_stats, fai)
yield variant

proc sample_name(b:Bam): string =
for line in ($b.hdr).split("\n"):
if not line.startswith("@RG"): continue
var tmp = line.split("SM:")[1].split("\t")[0].strip()
if result != "" and tmp != result:
raise newException(ValueError, "found multiple samples in bam:" & result & "," & tmp)
result = tmp


proc main(argv: seq[string]) =

let doc = format("""
Expand All @@ -289,7 +292,6 @@ Options:
-b --bam <path> path to indexed BAM/CRAM
-f --fasta <path> indexed fasta reference.
-t --threads <int> number of decompression threads. [default: 4]
-s --sample <string> optional VCF sample name or index to annotate
-o --output <string> output VCF/BCF (default is VCF to stdout) [default: -]
-h --help show help
""")
Expand Down Expand Up @@ -332,20 +334,15 @@ Options:

ovcf.header = vcf.header

if $args["--sample"] != "nil":
sample_i = vcf.samples.find($args["--sample"])
if sample_i < 0:
try:
sample_i = parseInt($args["--sample"])
except:
quit "sample:" & $args["--sample"] & "not found in vcf"

open(bam, $args["--bam"], index=true, threads=parseInt($args["--threads"]), fai=($args["--fasta"]))
if bam == nil:
quit "could not open bam file"
if bam.idx == nil:
quit "could not open bam index"
discard bam.set_option(FormatOption.CRAM_OPT_REQUIRED_FIELDS, 511)
sample_i = vcf.samples.find(bam.sample_name)
if sample_i == -1:
quit "couldn't find sample:" & bam.sample_name & " in vcf which had:" & join(vcf.samples, ",")

if not ovcf.write_header():
quit "couldn't write vcf header"
Expand Down

0 comments on commit a93cf2f

Please sign in to comment.