-
-
Notifications
You must be signed in to change notification settings - Fork 46.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implemented Suffix Tree Data Structure (#11554)
* Implemented KD-Tree Data Structure * Implemented KD-Tree Data Structure. updated DIRECTORY.md. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Create __init__.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Replaced legacy `np.random.rand` call with `np.random.Generator` in kd_tree/example_usage.py * Replaced legacy `np.random.rand` call with `np.random.Generator` in kd_tree/hypercube_points.py * added typehints and docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstring for search() * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added tests. Updated docstrings/typehints * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated tests and used | for type annotations * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * E501 for build_kdtree.py, hypercube_points.py, nearest_neighbour_search.py * I001 for example_usage.py and test_kdtree.py * I001 for example_usage.py and test_kdtree.py * Update data_structures/kd_tree/build_kdtree.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update data_structures/kd_tree/example/hypercube_points.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update data_structures/kd_tree/example/hypercube_points.py Co-authored-by: Christian Clauss <cclauss@me.com> * Added new test cases requested in Review. Refactored the test_build_kdtree() to include various checks. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considered ruff errors * Considered ruff errors * Apply suggestions from code review * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update kd_node.py * imported annotations from __future__ * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Implementation of the suffix tree data structure * Adding data to DIRECTORY.md * Minor file renaming * minor correction * renaming in DIRECTORY.md * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-1 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-2 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-3 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-4 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Considering ruff part-5 * Implemented Suffix Tree Data Structure. Added some comments to my files in #11532, #11554. * updating DIRECTORY.md * Implemented Suffix Tree Data Structure. Added some comments to my files in #11532, #11554. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com> Co-authored-by: Ramy-Badr-Ahmed <Ramy-Badr-Ahmed@users.noreply.github.com>
- Loading branch information
1 parent
9b5641d
commit 976e385
Showing
14 changed files
with
253 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) | ||
# in Pull Request: #11554 | ||
# https://github.com/TheAlgorithms/Python/pull/11554 | ||
# | ||
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request | ||
# addressing bugs/corrections to this file. | ||
# Thank you! | ||
|
||
from data_structures.suffix_tree.suffix_tree import SuffixTree | ||
|
||
|
||
def main() -> None: | ||
""" | ||
Demonstrate the usage of the SuffixTree class. | ||
- Initializes a SuffixTree with a predefined text. | ||
- Defines a list of patterns to search for within the suffix tree. | ||
- Searches for each pattern in the suffix tree. | ||
Patterns tested: | ||
- "ana" (found) --> True | ||
- "ban" (found) --> True | ||
- "na" (found) --> True | ||
- "xyz" (not found) --> False | ||
- "mon" (found) --> True | ||
""" | ||
text = "monkey banana" | ||
suffix_tree = SuffixTree(text) | ||
|
||
patterns = ["ana", "ban", "na", "xyz", "mon"] | ||
for pattern in patterns: | ||
found = suffix_tree.search(pattern) | ||
print(f"Pattern '{pattern}' found: {found}") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) | ||
# in Pull Request: #11554 | ||
# https://github.com/TheAlgorithms/Python/pull/11554 | ||
# | ||
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request | ||
# addressing bugs/corrections to this file. | ||
# Thank you! | ||
|
||
from data_structures.suffix_tree.suffix_tree_node import SuffixTreeNode | ||
|
||
|
||
class SuffixTree: | ||
def __init__(self, text: str) -> None: | ||
""" | ||
Initializes the suffix tree with the given text. | ||
Args: | ||
text (str): The text for which the suffix tree is to be built. | ||
""" | ||
self.text: str = text | ||
self.root: SuffixTreeNode = SuffixTreeNode() | ||
self.build_suffix_tree() | ||
|
||
def build_suffix_tree(self) -> None: | ||
""" | ||
Builds the suffix tree for the given text by adding all suffixes. | ||
""" | ||
text = self.text | ||
n = len(text) | ||
for i in range(n): | ||
suffix = text[i:] | ||
self._add_suffix(suffix, i) | ||
|
||
def _add_suffix(self, suffix: str, index: int) -> None: | ||
""" | ||
Adds a suffix to the suffix tree. | ||
Args: | ||
suffix (str): The suffix to add. | ||
index (int): The starting index of the suffix in the original text. | ||
""" | ||
node = self.root | ||
for char in suffix: | ||
if char not in node.children: | ||
node.children[char] = SuffixTreeNode() | ||
node = node.children[char] | ||
node.is_end_of_string = True | ||
node.start = index | ||
node.end = index + len(suffix) - 1 | ||
|
||
def search(self, pattern: str) -> bool: | ||
""" | ||
Searches for a pattern in the suffix tree. | ||
Args: | ||
pattern (str): The pattern to search for. | ||
Returns: | ||
bool: True if the pattern is found, False otherwise. | ||
""" | ||
node = self.root | ||
for char in pattern: | ||
if char not in node.children: | ||
return False | ||
node = node.children[char] | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) | ||
# in Pull Request: #11554 | ||
# https://github.com/TheAlgorithms/Python/pull/11554 | ||
# | ||
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request | ||
# addressing bugs/corrections to this file. | ||
# Thank you! | ||
|
||
from __future__ import annotations | ||
|
||
|
||
class SuffixTreeNode: | ||
def __init__( | ||
self, | ||
children: dict[str, SuffixTreeNode] | None = None, | ||
is_end_of_string: bool = False, | ||
start: int | None = None, | ||
end: int | None = None, | ||
suffix_link: SuffixTreeNode | None = None, | ||
) -> None: | ||
""" | ||
Initializes a suffix tree node. | ||
Parameters: | ||
children (dict[str, SuffixTreeNode] | None): The children of this node. | ||
is_end_of_string (bool): Indicates if this node represents | ||
the end of a string. | ||
start (int | None): The start index of the suffix in the text. | ||
end (int | None): The end index of the suffix in the text. | ||
suffix_link (SuffixTreeNode | None): Link to another suffix tree node. | ||
""" | ||
self.children = children or {} | ||
self.is_end_of_string = is_end_of_string | ||
self.start = start | ||
self.end = end | ||
self.suffix_link = suffix_link |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed) | ||
# in Pull Request: #11554 | ||
# https://github.com/TheAlgorithms/Python/pull/11554 | ||
# | ||
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request | ||
# addressing bugs/corrections to this file. | ||
# Thank you! | ||
|
||
import unittest | ||
|
||
from data_structures.suffix_tree.suffix_tree import SuffixTree | ||
|
||
|
||
class TestSuffixTree(unittest.TestCase): | ||
def setUp(self) -> None: | ||
"""Set up the initial conditions for each test.""" | ||
self.text = "banana" | ||
self.suffix_tree = SuffixTree(self.text) | ||
|
||
def test_search_existing_patterns(self) -> None: | ||
"""Test searching for patterns that exist in the suffix tree.""" | ||
patterns = ["ana", "ban", "na"] | ||
for pattern in patterns: | ||
with self.subTest(pattern=pattern): | ||
assert self.suffix_tree.search( | ||
pattern | ||
), f"Pattern '{pattern}' should be found." | ||
|
||
def test_search_non_existing_patterns(self) -> None: | ||
"""Test searching for patterns that do not exist in the suffix tree.""" | ||
patterns = ["xyz", "apple", "cat"] | ||
for pattern in patterns: | ||
with self.subTest(pattern=pattern): | ||
assert not self.suffix_tree.search( | ||
pattern | ||
), f"Pattern '{pattern}' should not be found." | ||
|
||
def test_search_empty_pattern(self) -> None: | ||
"""Test searching for an empty pattern.""" | ||
assert self.suffix_tree.search(""), "An empty pattern should be found." | ||
|
||
def test_search_full_text(self) -> None: | ||
"""Test searching for the full text.""" | ||
assert self.suffix_tree.search( | ||
self.text | ||
), "The full text should be found in the suffix tree." | ||
|
||
def test_search_substrings(self) -> None: | ||
"""Test searching for substrings of the full text.""" | ||
substrings = ["ban", "ana", "a", "na"] | ||
for substring in substrings: | ||
with self.subTest(substring=substring): | ||
assert self.suffix_tree.search( | ||
substring | ||
), f"Substring '{substring}' should be found." | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |