Skip to content

Commit

Permalink
fix: A sample column value of 0|0 is not being parsed correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeremy Myslinski committed Jun 25, 2024
1 parent 1d9a36e commit 7b9dc85
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 5 deletions.
13 changes: 13 additions & 0 deletions tests/test_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@ def build_rec(calls=None, format_extras=None):
)


# Call.__init__() ------------------------------------------------------------


def test_call_init_with_gt():
call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|1")]))
expected_length = 2
expected_allele_0 = 0
expected_allele_1 = 1
assert len(call.gt_alleles) == expected_length
assert call.gt_alleles[0] == expected_allele_0
assert call.gt_alleles[1] == expected_allele_1


# Call.is_phased() ------------------------------------------------------------


Expand Down
22 changes: 22 additions & 0 deletions tests/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,25 @@ def test_header_has_header_line_positive_no_samples():
assert not hdr.has_header_line("INFO", "AD")
assert not hdr.has_header_line("FILTER", "PASS")
assert not hdr.has_header_line("contig", "1")


def test_header_get_format_field_info():
lines = []
samples = header.SamplesInfos(["one", "two", "three"])
hdr = header.Header(lines, samples)
gt_field_info = hdr.get_format_field_info("GT")

expected = header.RESERVED_FORMAT["GT"]

assert gt_field_info is expected


def test_header_get_info_format_field_info():
lines = []
samples = header.SamplesInfos(["one", "two", "three"])
hdr = header.Header(lines, samples)
gt_field_info = hdr.get_info_field_info("AA")

expected = header.RESERVED_INFO["AA"]

assert gt_field_info is expected
16 changes: 16 additions & 0 deletions tests/test_parser_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
__author__ = "Manuel Holtgrewe <manuel.holtgrewe@bihealth.de>"


SMALL_HEADER = """
##fileformat=VCFv4.3
#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\tNA00002\tNA00003
""".lstrip()


MEDIUM_HEADER = """
##fileformat=VCFv4.3
##fileDate=20090805
Expand Down Expand Up @@ -178,3 +184,13 @@ def test_missing_pass(recwarn):
RESULT = p.parse_next_record()
assert str(RESULT) == EXPECTED
assert list(recwarn) == []


def test_parse_record_with_gt_data():
LINES = "20\t1\t.\tC\tG\t.\t.\tAA=G\tGT\t0|1\t1/1\t.\n"
p = parser.Parser(io.StringIO(SMALL_HEADER + LINES))
p.parse_header()
record = p.parse_next_record()
assert record.calls[0].data["GT"] == "0|1"
assert record.calls[1].data["GT"] == "1/1"
assert record.calls[2].data["GT"] is None
10 changes: 5 additions & 5 deletions vcfpy/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,18 +396,18 @@ def add_line(self, header_line):

def get_info_field_info(self, key):
"""Return :py:class:`FieldInfo` for the given INFO field"""
return self._get_field_info("INFO", key)
return self._get_field_info("INFO", key, RESERVED_INFO)

def get_format_field_info(self, key):
"""Return :py:class:`FieldInfo` for the given INFO field"""
return self._get_field_info("FORMAT", key)
return self._get_field_info("FORMAT", key, RESERVED_FORMAT)

def _get_field_info(self, type_, key):
def _get_field_info(self, type_, key, reserved):
result = self._indices[type_].get(key)
if result:
return result
if key in RESERVED_INFO:
res = FieldInfo(RESERVED_INFO[key].type, RESERVED_INFO[key].number)
if key in reserved:
res = reserved[key]
else:
res = FieldInfo("String", HEADER_NUMBER_UNBOUNDED)
warnings.warn(
Expand Down

0 comments on commit 7b9dc85

Please sign in to comment.