Skip to content

Commit

Permalink
restore haplotype as unordered List with Serializer override
Browse files Browse the repository at this point in the history
  • Loading branch information
ahwagner committed Feb 10, 2024
1 parent 3674042 commit 9ab2026
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 19 deletions.
20 changes: 11 additions & 9 deletions src/ga4gh/vrs/_internal/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,9 @@ def _recurse_ga4gh_serialize(obj):
elif isinstance(obj, _ValueObject):
return obj.ga4gh_serialize()
elif isinstance(obj, RootModel):
return _recurse_ga4gh_serialize(obj.model_dump())
return _recurse_ga4gh_serialize(obj.model_dump(mode='json'))
elif isinstance(obj, str):
return obj
elif isinstance(obj, set):
out = [_recurse_ga4gh_serialize(x) for x in list(obj)]
if all(isinstance(x, str) for x in out):
return sorted(out)
else:
return out
elif isinstance(obj, list):
return [_recurse_ga4gh_serialize(x) for x in obj]
else:
Expand Down Expand Up @@ -220,6 +214,9 @@ class _Ga4ghIdentifiableObject(_ValueObject):
description='A sha512t24u digest created using the VRS Computed Identifier algorithm.',
)

def __lt__(self, other):
return self.get_or_create_digest() < other.get_or_create_digest()

@staticmethod
def is_ga4gh_identifiable():
return True
Expand Down Expand Up @@ -431,13 +428,18 @@ class Haplotype(_VariationBase):
"""A set of non-overlapping Allele members that co-occur on the same molecule."""

type: Literal['Haplotype'] = Field('Haplotype', description='MUST be "Haplotype"')
# TODO members temporarily typed as Set instead of List
members: Set[Union[Allele, IRI]] = Field(
members: List[Union[Allele, IRI]] = Field(
...,
description='A list of Alleles (or IRI references to `Alleles`) that comprise a Haplotype. Since each `Haplotype` member MUST be an `Allele`, and all members MUST share a common `SequenceReference`, implementations MAY use a compact representation of Haplotype that omits type and `SequenceReference` information in individual Haplotype members. Implementations MUST transform compact `Allele` representations into an `Allele` when computing GA4GH identifiers.',
min_length=2,
)

@model_serializer(when_used='json')
def ga4gh_serialize(self) -> Dict:
out = _ValueObject.ga4gh_serialize(self)
out['members'] = sorted(out['members'])
return out

class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
prefix = 'HT'
keys = [
Expand Down
19 changes: 9 additions & 10 deletions tests/test_vrs2.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@
allele_417816 = models.Allele(**allele_417816_dict)
allele_280320 = models.Allele(**allele_280320_dict)

# haplotype_431012_dict = {
# "type": "Haplotype",
# "members": [allele_383650_dict, allele_417816_dict]
# }
# haplotype_431012 = models.Haplotype(**haplotype_431012_dict)
haplotype_431012_dict = {
"type": "Haplotype",
"members": [allele_383650_dict, allele_417816_dict]
}
haplotype_431012 = models.Haplotype(**haplotype_431012_dict)

# genotype_431013_dict = {
# "type": "Genotype",
Expand Down Expand Up @@ -188,15 +188,14 @@ def test_vr():
})


@pytest.mark.skip(reason="Waiting on resolution of ga4gh/vrs#461 before addressing this test")
def test_haplotype():
assert haplotype_431012.model_dump(exclude_none=True) == haplotype_431012_dict
assert is_pydantic_instance(haplotype_431012)
haplotype_serialized = ga4gh_serialize(haplotype_431012)
assert haplotype_serialized == b'{"members":["734G5mtNwe40do8F6GKuqQP4QxyjBqVp","bU3n0M2YQaV5C5ebODJYZ0GnbyCrOIHi"],"type":"Haplotype"}'
assert sha512t24u(haplotype_serialized) == 'fFR5oRpeD8Cuq2hfs3bXd1rgJUQrQA26'
assert ga4gh_digest(haplotype_431012) == 'fFR5oRpeD8Cuq2hfs3bXd1rgJUQrQA26'
assert ga4gh_identify(haplotype_431012) == 'ga4gh:HT.fFR5oRpeD8Cuq2hfs3bXd1rgJUQrQA26'
assert haplotype_serialized == b'{"members":["SZIS2ua7AL-0YgUTAqyBsFPYK3vE8h_d","TKhpDsfclpSXpn6BjTLViB_ceqRerOd2"],"type":"Haplotype"}'
assert sha512t24u(haplotype_serialized) == 'kAFlqAFWNj5xZIv5G_ePM7xepXe5p8TK'
assert ga4gh_digest(haplotype_431012) == 'kAFlqAFWNj5xZIv5G_ePM7xepXe5p8TK'
assert ga4gh_identify(haplotype_431012) == 'ga4gh:HT.kAFlqAFWNj5xZIv5G_ePM7xepXe5p8TK'


@pytest.mark.skip(reason="Genotypes are not yet supported in 2.x")
Expand Down

0 comments on commit 9ab2026

Please sign in to comment.