diff --git a/.gitignore b/.gitignore index 4154aa1..cab60f5 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,6 @@ __pycache__/ .minio.sys .mypy_cache .pytest_cache -.tox # Distribution & packaging #---------------------------------- @@ -24,7 +23,6 @@ wheels/ .eggs/ *.egg *.egg-info/ -_version.py # Editors & IDEs #---------------------------------- diff --git a/CHANGELOG.md b/CHANGELOG.md index 70c5487..055c15a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,32 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +## [Unreleased] + +No unreleased changes + +[Unreleased]: https://github.com/23andMe/yhaplo/compare/2.1.6..HEAD + + +## [2.1.6] - 2024-02-07 + +### Added +- Python 3.12 support +- `__str__` and `__repr__` methods +- SNP-based haplogroup in MRCA output + +### Changed +- Newick representation is now unrotated by default + +### Removed +- `setuptools_scm`-generated version file + +### Fixed +- When generating Newick representation, recalculate maximum depth to support pruned trees + +[2.1.6]: https://github.com/23andMe/yhaplo/compare/2.1.4..2.1.6 + + ## [2.1.4] - 2024-01-29 ### Added diff --git a/README.md b/README.md index 2579ceb..ed24f9d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Yhaplo | Identifying Y-Chromosome Haplogroups [![python]( -https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11-blue.svg)]( +https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)]( https://docs.python.org) [![style]( https://img.shields.io/badge/style-black-blue.svg)]( diff --git a/pyproject.toml b/pyproject.toml index 6677ff1..b4e0159 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,5 @@ [build-system] -requires = [ - "setuptools>=61.0", - "setuptools_scm[toml]>=6.2", -] +requires = ["setuptools>=64", "setuptools_scm>=8"] build-backend = "setuptools.build_meta" [project] @@ -52,7 +49,6 @@ Repository = "https://github.com/23andMe/yhaplo.git" Changelog = "https://github.com/23andMe/yhaplo/blob/master/CHANGELOG.md" [tool.setuptools_scm] -write_to = "yhaplo/_version.py" [tool.isort] profile = "black" @@ -80,18 +76,3 @@ norecursedirs = [ ".*", "build", ] - -[tool.tox] -legacy_tox_ini = """ -[tox] -envlist = py39, py310, py311 - -[testenv] -commands = - pytest -sitepackages = false -deps = - pytest -extras = - vcf -""" diff --git a/scripts/validate_yhaplo.sh b/scripts/validate_yhaplo.sh index f3fc468..71601b6 100755 --- a/scripts/validate_yhaplo.sh +++ b/scripts/validate_yhaplo.sh @@ -40,7 +40,7 @@ yhaplo --example_text \ --breadth_first \ --depth_first \ --depth_first_table \ - --mrca Q J \ + --mrca Q-M3 R-V88 \ --snp_query L1335,S730,S530,foo echo -e "\n" diff --git a/yhaplo/config.py b/yhaplo/config.py index db52175..4d7f98e 100644 --- a/yhaplo/config.py +++ b/yhaplo/config.py @@ -10,7 +10,7 @@ import numpy as np from numpy.typing import NDArray -from yhaplo._version import __version__ +from yhaplo import __version__ from yhaplo.api.command_line_args import get_command_line_arg_defaults from yhaplo.utils.loaders import DataFile @@ -216,6 +216,11 @@ def __init__( if self.suppress_output: self.override_output_generating_args() + def __repr__(self) -> str: + """Return string representation.""" + + return f"<{__name__}.{self.__class__.__name__}: command_line_args={self.args}>" + def set_params_general( self, out_dir: Optional[str], diff --git a/yhaplo/node.py b/yhaplo/node.py index c36d8f2..184dc8d 100644 --- a/yhaplo/node.py +++ b/yhaplo/node.py @@ -100,9 +100,17 @@ def __init__( # String representations # ---------------------------------------------------------------------- - def __str__(self) -> str: + def __repr__(self) -> str: """Return string representation.""" + return ( + f"<{__name__}.{self.__class__.__name__}: " + f'label="{self.label}", hg_snp="{self.hg_snp}">' + ) + + def __str__(self) -> str: + """Return printable string representation.""" + return self.str_simple @property @@ -516,14 +524,32 @@ def write_newick( use_hg_snp_label: bool = False, align_tips: bool = False, platform: Optional[str] = None, + rotate: bool = False, ) -> None: - """Write Newick string for the subtree rooted at this node.""" + """Write Newick representation of the subtree rooted at this node. + + Parameters + ---------- + newick_fp : str + File path to which to write Newick representation. + use_hg_snp_label : bool, optional + Use SNP-based haplogroup labels rather than YCC haplogroup labels. + align_tips : bool, optional + When True, set branch lengths to align the tips of the tree. + platform : str | None, optional + 23andMe platform to use for computing branch lengths. + rotate : bool, optional + Rotate nodes. By default, branches will be ordered top to bottom. + Rotating nodes orders branches bottom to top, which is left to right + when an image is rotated 90 degrees to the right. + """ if not type(self).config.suppress_output: newick = self.build_newick( use_hg_snp_label=use_hg_snp_label, align_tips=align_tips, platform=platform, + rotate=rotate, ) with open(newick_fp, "w") as out_file: out_file.write(newick + "\n") @@ -550,6 +576,7 @@ def build_newick( use_hg_snp_label: bool = False, align_tips: bool = False, platform: Optional[str] = None, + rotate: bool = False, ) -> str: """Build Newick string for the subtree rooted at this node. @@ -561,6 +588,10 @@ def build_newick( When True, set branch lengths to align the tips of the tree. platform : str | None, optional 23andMe platform to use for computing branch lengths. + rotate : bool, optional + Rotate nodes. By default, branches will be ordered top to bottom. + Rotating nodes orders branches bottom to top, which is left to right + when an image is rotated 90 degrees to the right. Returns ------- @@ -568,17 +599,14 @@ def build_newick( Newick representation of the tree. """ - subtree_max_depth = ( - self.tree.max_depth - if self.is_root - else np.max([node.depth for node in self.iter_depth_first()]) - ) + subtree_max_depth = np.max([node.depth for node in self.iter_depth_first()]) newick = ( self.build_newick_recursive( use_hg_snp_label=use_hg_snp_label, align_tips=align_tips, subtree_max_depth=subtree_max_depth, platform=platform, + rotate=rotate, ) + ";" ) @@ -591,6 +619,7 @@ def build_newick_recursive( align_tips: bool = False, subtree_max_depth: Optional[int] = None, platform: Optional[str] = None, + rotate: bool = False, ) -> str: """Build Newick string recursively for the subtree rooted at this node. @@ -605,6 +634,10 @@ def build_newick_recursive( Default to maximum depth of full tree. platform : str | None, optional 23andMe platform to use for computing branch lengths. + rotate : bool, optional + Rotate nodes. By default, branches will be ordered top to bottom. + Rotating nodes orders branches bottom to top, which is left to right + when an image is rotated 90 degrees to the right. Returns ------- @@ -613,23 +646,27 @@ def build_newick_recursive( """ subtree_max_depth = subtree_max_depth or type(self).tree.max_depth - - if not self.is_leaf: - child_string_list = [] - for child in self.child_list[::-1]: - child_string = child.build_newick_recursive( - use_hg_snp_label=use_hg_snp_label, - align_tips=align_tips, - subtree_max_depth=subtree_max_depth, - platform=platform, + child_list = self.child_list if not rotate else self.child_list[::-1] + children_string = ( + ( + "(" + + ",".join( + [ + child.build_newick_recursive( + use_hg_snp_label=use_hg_snp_label, + align_tips=align_tips, + subtree_max_depth=subtree_max_depth, + platform=platform, + rotate=rotate, + ) + for child in child_list + ] ) - child_string_list.append(child_string) - - children = ",".join(child_string_list) - children_string = f"({children})" - else: - children_string = "" - + + ")" + ) + if not self.is_leaf + else "" + ) branch_label = self.hg_snp if use_hg_snp_label else self.label branch_length = self.get_branch_length( align_tips=align_tips, diff --git a/yhaplo/path.py b/yhaplo/path.py index 76c7e15..79dbf56 100644 --- a/yhaplo/path.py +++ b/yhaplo/path.py @@ -88,17 +88,24 @@ def copy_all_attributes_other_than_node(self, other: Path) -> None: self.num_anc_since_push_through = other.num_anc_since_push_through self.num_der_since_push_through = other.num_der_since_push_through - def __str__(self) -> str: + def __repr__(self) -> str: """Return string representation.""" - str_ = ( + return ( + f"<{__name__}.{self.__class__.__name__}: " + f"num_ancestral={self.num_ancestral}, num_derived={self.num_derived}, " + f'node_string="{self.node_string}", snp_string="{self.snp_string}">' + ) + + def __str__(self) -> str: + """Return printable string representation.""" + + return ( f"{self.num_ancestral} {self.num_derived}\n" f"{self.node_string}\n" f"{self.snp_string}" ) - return str_ - # Properties # ---------------------------------------------------------------------- @property diff --git a/yhaplo/sample.py b/yhaplo/sample.py index c0aaab9..1a244a9 100644 --- a/yhaplo/sample.py +++ b/yhaplo/sample.py @@ -119,14 +119,22 @@ def __init__(self, iid: IID_TYPE): type(self).sample_list.append(self) - def __str__(self) -> str: + def __repr__(self) -> str: """Return string representation.""" - sample_string = ( + return ( + f"<{__name__}.{self.__class__.__name__}: " + f'iid={str(self.iid)}, hg_snp_obs="{self.hg_snp_obs}", ' + f'hg_snp="{self.hg_snp}", haplogroup="{self.haplogroup}">' + ) + + def __str__(self) -> str: + """Return printable string representation.""" + + return ( f"{str(self.iid):8s} {self.hg_snp_obs:15s} " f"{self.hg_snp:15s} {self.haplogroup:25s}" ) - return sample_string # Haplogroup calling # ---------------------------------------------------------------------- diff --git a/yhaplo/snp.py b/yhaplo/snp.py index 2477545..086007a 100644 --- a/yhaplo/snp.py +++ b/yhaplo/snp.py @@ -108,14 +108,22 @@ def set_label(self, label: str) -> None: ) = parse_snp_label(label, Config.snp_label_letters_rank_dict) self.label_cleaned = clean_snp_label(label) + def __repr__(self) -> str: + """Return string representation.""" + + return ( + f"<{__name__}.{self.__class__.__name__}: " + f'label="{self.label}", node.label="{self.node.label}", position={self.position}, ' + f'mutation="{self.ancestral}->{self.derived}">' + ) + def __str__(self) -> str: - """Return medium-length string representation.""" + """Return printable string representation.""" - str_ = ( + return ( f"{self.label:15s} {self.node.label:25s} {self.position:8d} " f"{self.ancestral}->{self.derived}" ) - return str_ @property def str_with_all_names(self) -> str: @@ -383,6 +391,14 @@ def __init__( self.haplogroup = haplogroup self.tree = tree + def __repr__(self) -> str: + """Return string representation.""" + + return ( + f"<{__name__}.{self.__class__.__name__}: " + f'name="{self.name}", haplogroup="{self.haplogroup}">' + ) + def add_to_node(self) -> bool: """Add this dropped marker to the corresponding node, if it exists.""" diff --git a/yhaplo/tree.py b/yhaplo/tree.py index abcc378..caf1946 100644 --- a/yhaplo/tree.py +++ b/yhaplo/tree.py @@ -110,6 +110,15 @@ def __init__( self.set_search_root() self.write_optional_traversal_output() + def __repr__(self) -> str: + """Return string representation.""" + + return ( + f"<{__name__}.{self.__class__.__name__}: " + f"{len(self.depth_first_node_list)} nodes, {len(self.snp_list)} SNPs, " + f"max_depth={self.max_depth}>" + ) + # Setters # ---------------------------------------------------------------------- def set_search_root(self) -> None: @@ -212,9 +221,9 @@ def query_mrca(self, haplogroup1: str, haplogroup2: str) -> None: mrca = node1.mrca(node2) logger.info( "\nMRCA Query\n\n" - f"Haplogroup 1: {node1.haplogroup}\n" - f"Haplogroup 2: {node2.haplogroup}\n" - f"MRCA: {mrca.haplogroup}\n" + f"Haplogroup 1: {node1.haplogroup} ({node1.hg_snp})\n" + f"Haplogroup 2: {node2.haplogroup} ({node2.hg_snp})\n" + f"MRCA: {mrca.haplogroup} ({mrca.hg_snp})\n" ) def query_snp_path(self, query_snp_name: str) -> None: @@ -292,12 +301,19 @@ def identify_phylogenetic_path( the phylogenetic path leading from the root to the most terminal branch representing a Sample's haplogroup. + Parameters + ---------- + sample : Sample + Sample instance. + Returns ------- best_path : Path The best phylogenetic path. anc_snp_full_list : list[SNP] List of SNPs observed in the ancestral state. + anc_der_count_tuples : list[tuple[Node, int, int]] + List of (node, num_ancestral, num_derived) tuples. Notes ----- @@ -983,7 +999,7 @@ def verify_newick_token(observed: str, expected: str) -> None: if observed != expected: raise ValueError( - "Malformed newick file.\n" + "Malformed Newick file.\n" f"Expected this token: {expected}\n" f"Got this one: {observed}\n" )