Skip to content

Commit

Permalink
Updated docstrings, and some small fixes (#132)
Browse files Browse the repository at this point in the history
* Updated docstrings, and some small fixes

* Notes from @forslund

* Two more fucntionas

* bump version
  • Loading branch information
clusterfudge authored Jun 28, 2021
1 parent 4e9f9b2 commit 8c31136
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 37 deletions.
128 changes: 92 additions & 36 deletions adapt/tools/text/trie.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,29 @@ def __init__(self, data=None, is_terminal=False):

def lookup(self, iterable, index=0, gather=False, edit_distance=0, max_edit_distance=0, match_threshold=0.0, matched_length=0):
"""
TODO: Implement trie lookup with edit distance
Args:
iterable(list?): key used to find what is requested this could
be a generator.
index(int): index of what is requested
gather(bool): of whether to gather or not
edit_distance(int): the distance -- currently not used
max_edit_distance(int): the max distance -- not currently used
iterable(hashable): a list of items used to traverse the Trie.
This represents the position of a node in the Trie, matching the
iterable used at insertion time.
For example:
trie.insert('foo', {'bar': 'baz'})
list(trie.lookup('foo')) == [TrieNode(data={'bar': 'baz'}, is_terminal=True)]
index(int): index of item for current position in traversal.
we pass the original iterable and an index to avoid
the cost of repeatedly copying the original iterable
gather(bool): whether to return intermediate results (gather
algorithm)
edit_distance(int): current edit distance in the traversal.
max_edit_distance(int): maximum edit distance
match_threshold(float): minimum confidence of match for discovery
matched_length(int): related to edit distance, for calculating
confidence of match where
confidence = (length - abs(matched_length - length)) / length
yields:
object: yields the results of the search
generator[TrieNode]: a generator that vends the results of the
lookup, of type TrieNode
"""
if self.is_terminal:
if index == len(iterable) or \
Expand All @@ -61,7 +72,8 @@ def lookup(self, iterable, index=0, gather=False, edit_distance=0, max_edit_dist
edit_distance=edit_distance, max_edit_distance=max_edit_distance, matched_length=matched_length + 1):
yield result

# if there's edit distance remaining and it's possible to match a word above the confidence threshold
# if there's edit distance remaining and it's possible to
# match a word above the confidence threshold, continue searching
potential_confidence = float(index - edit_distance + (max_edit_distance - edit_distance)) / \
(float(index) + (max_edit_distance - edit_distance)) if index + max_edit_distance - edit_distance > 0 else 0.0
if edit_distance < max_edit_distance and potential_confidence > match_threshold:
Expand All @@ -87,7 +99,12 @@ def insert(self, iterable, index=0, data=None, weight=1.0):
"""Insert new node into tree
Args:
iterable(hashable): key used to find in the future.
iterable(hashable): a list of items used to traverse the Trie.
This represents the position of a node in the Trie, matching the
iterable used at insertion time.
For example:
trie.insert('foo', {'bar': 'baz'})
list(trie.lookup('foo')) == [TrieNode(data={'bar': 'baz'}, is_terminal=True)]
data(object): data associated with the key
index(int): an index used for insertion.
weight(float): the wait given for the item added.
Expand All @@ -113,7 +130,12 @@ def remove(self, iterable, data=None, index=0):
"""Remove an element from the trie
Args
iterable(hashable): key used to find what is to be removed
iterable(hashable): a list of items used to traverse the Trie.
This represents the position of a node in the Trie, matching the
iterable used at insertion time.
For example:
trie.insert('foo', {'bar': 'baz'})
list(trie.lookup('foo')) == [TrieNode(data={'bar': 'baz'}, is_terminal=True)]
data(object): data associated with the key
index(int): index of what is to me removed
Expand Down Expand Up @@ -141,12 +163,22 @@ def remove(self, iterable, data=None, index=0):


class Trie(object):
"""Interface for the tree
"""Recursive implementation of a prefix trie (Trie)
https://en.wikipedia.org/wiki/Trie
Additionally supports #gather, a traversal whose results include
any terminal nodes visited.
Attributes:
root(TrieNode): parent node to start the tree
max_edit_distance(int): ?
match_threshold(int): ?
max_edit_distance(int): values > 0 allow for fuzzy matching
with a maximum levenshtein edit distance
https://en.wikipedia.org/wiki/Edit_distance
match_threshold(int): only return values with a higher confidence
than this value
While most frequently used with strings, the Trie can be populated with any
iterable (arrays of ints, arrays of objects, arrays of strings) as long
as each value responds to `__hash__`.
"""

Expand All @@ -157,32 +189,46 @@ def __init__(self, max_edit_distance=0, match_threshold=0.0):
max_edit_distance and match_threshold.
Args:
max_edit_distance(int): ?
match_threshold(int): ?
Notes:
This never seems to get called with max_edit_distance or match_threshold
max_edit_distance(int): values > 0 allow for fuzzy matching
with a maximum levenshtein edit distance
https://en.wikipedia.org/wiki/Edit_distance
match_threshold(int): only return values with a higher confidence
than this value
"""
self.root = TrieNode('root')
self.max_edit_distance = max_edit_distance
self.match_threshold = match_threshold

def gather(self, iterable):
"""Calls the lookup with gather True Passing iterable and yields
the result.
"""Executes a "gather" traversal of the Trie
Result set will include any `is_terminal` nodes encountered during
the traversal
Args:
iterable(hashable): a list of items used to traverse the Trie
This represents the position of a node in the Trie, matching the
iterable used at insertion time.
For example:
trie.insert('foo', {'bar': 'baz'})
list(trie.lookup('foo')) == [TrieNode(data={'bar': 'baz'}, is_terminal=True)]
"""
for result in self.lookup(iterable, gather=True):
yield result

def lookup(self, iterable, gather=False):
"""Call the lookup on the root node with the given parameters.
Args
iterable(index or key): Used to retrive nodes from tree
gather(bool): this is passed down to the root node lookup
Args:
iterable(hashable): a list of items used to traverse the Trie
This represents the position of a node in the Trie, matching the
iterable used at insertion time.
For example:
trie.insert('foo', {'bar': 'baz'})
list(trie.lookup('foo')) == [TrieNode(data={'bar': 'baz'}, is_terminal=True)]
gather(bool): flag to indicate whether gather results
should be included
Notes:
max_edit_distance and match_threshold come from the init
"""
for result in self.root.lookup(iterable,
gather=gather,
Expand All @@ -192,28 +238,38 @@ def lookup(self, iterable, gather=False):
yield result

def insert(self, iterable, data=None, weight=1.0):
"""Used to insert into he root node
"""Used to insert into the trie
Args
iterable(hashable): index or key used to identify
data(object): data to be paired with the key
Args:
iterable(hashable): a list of items used to traverse the Trie
This represents the position of a node in the Trie, matching the
iterable used at insertion time.
For example:
trie.insert('foo', {'bar': 'baz'})
list(trie.lookup('foo')) == [TrieNode(data={'bar': 'baz'}, is_terminal=True)]
data(object): data to stored or merged for this iterable
"""
self.root.insert(iterable, index=0, data=data, weight=1.0)
self.root.insert(iterable, index=0, data=data, weight=weight)

def remove(self, iterable, data=None):
"""Used to remove from the root node
Args:
iterable(hashable): index or key used to identify
item to remove
data: data to be paired with the key
iterable(hashable): a list of items used to traverse the Trie
This represents the position of a node in the Trie, matching the
iterable used at insertion time.
For example:
trie.insert('foo', {'bar': 'baz'})
list(trie.lookup('foo')) == [TrieNode(data={'bar': 'baz'}, is_terminal=True)]
data: data to removed. If None, or node is empty as a result,
remove the node.
"""
return self.root.remove(iterable, data=data)

def scan(self, match_func):
"""Traverse the trie scanning for end nodes with matching data.
Arguments:
Args:
match_func (callable): function used to match data.
Returns:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def required(requirements_file):

setup(
name="adapt-parser",
version="0.4.2",
version="0.4.3",
author="Sean Fitzgerald",
author_email="sean@fitzgeralds.me",
description=("A text-to-intent parsing framework."),
Expand Down

0 comments on commit 8c31136

Please sign in to comment.