-
Notifications
You must be signed in to change notification settings - Fork 312
Implementation of the Knuth-Morris-Pratt (KMP) string matching algorithm #403
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,18 @@ | ||
__all__ = [] | ||
|
||
from . import trie | ||
from . import ( | ||
trie, | ||
string_matching_algorithms | ||
) | ||
|
||
from .trie import ( | ||
Trie | ||
) | ||
|
||
__all__.extend(trie.__all__) | ||
|
||
from .string_matching_algorithms import ( | ||
find_string | ||
) | ||
|
||
__all__.extend(string_matching_algorithms.__all__) |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,136 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
from pydatastructs.linear_data_structures.arrays import ( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
OneDimensionalArray) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
__all__ = [ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
'find_string' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
] | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
def find_string(text: str, pattern: str, algorithm: str) -> bool: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
algorithm: str | |
The algorithm which should be used for | |
computing a minimum spanning tree. | |
Currently the following algorithms are | |
supported, | |
'kruskal' -> Kruskal's algorithm as given in | |
[1]. | |
'prim' -> Prim's algorithm as given in [2]. |
Full doc string of the above example is as follows,
pydatastructs/pydatastructs/graphs/algorithms.py
Lines 260 to 312 in 0dd2c03
""" | |
Computes a minimum spanning tree for the given | |
graph and algorithm. | |
Parameters | |
========== | |
graph: Graph | |
The graph whose minimum spanning tree | |
has to be computed. | |
algorithm: str | |
The algorithm which should be used for | |
computing a minimum spanning tree. | |
Currently the following algorithms are | |
supported, | |
'kruskal' -> Kruskal's algorithm as given in | |
[1]. | |
'prim' -> Prim's algorithm as given in [2]. | |
Returns | |
======= | |
mst: Graph | |
A minimum spanning tree using the implementation | |
same as the graph provided in the input. | |
Examples | |
======== | |
>>> from pydatastructs import Graph, AdjacencyListGraphNode | |
>>> from pydatastructs import minimum_spanning_tree | |
>>> u = AdjacencyListGraphNode('u') | |
>>> v = AdjacencyListGraphNode('v') | |
>>> G = Graph(u, v) | |
>>> G.add_edge(u.name, v.name, 3) | |
>>> mst = minimum_spanning_tree(G, 'kruskal') | |
>>> u_n = mst.neighbors(u.name) | |
>>> mst.get_edge(u.name, u_n[0].name).value | |
3 | |
References | |
========== | |
.. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm | |
.. [2] https://en.wikipedia.org/wiki/Prim%27s_algorithm | |
Note | |
==== | |
The concept of minimum spanning tree is valid only for | |
connected and undirected graphs. So, this function | |
should be used only for such graphs. Using with other | |
types of graphs may lead to unwanted results. |
Adding note is optional in a doc string.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Avoid using eval. Please use the pattern similar to the one shown below,
pydatastructs/pydatastructs/graphs/algorithms.py
Lines 314 to 321 in 0dd2c03
import pydatastructs.graphs.algorithms as algorithms | |
func = "_minimum_spanning_tree_" + algorithm + "_" + graph._impl | |
if not hasattr(algorithms, func): | |
raise NotImplementedError( | |
"Currently %s algoithm for %s implementation of graphs " | |
"isn't implemented for finding minimum spanning trees." | |
%(algorithm, graph._impl)) | |
return getattr(algorithms, func)(graph) |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be better to name it as, _knuth_morris_pratt
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The documentation is not needed here as it would be a non-public function.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same suggestions as in _doMatch
.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please follow snake case instead of camel case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_doMatch
-> _do_match
. It would be better if we define this function inside _knuth_morris_pratt
as for now it is called only inside its scope.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from pydatastructs.strings.string_matching_algorithms import find_string | ||
|
||
def test_kms(): | ||
_test_common_string_matching('kmp') | ||
|
||
|
||
def _test_common_string_matching(algorithm): | ||
true_text_pattern_dictionary = { | ||
"Knuth-Morris-Pratt": "-Morris-", | ||
"abcabcabcabdabcabdabcabca": "abcabdabcabca", | ||
"aefcdfaecdaefaefcdaefeaefcdcdeae": "aefcdaefeaefcd", | ||
"aaaaaaaa": "aaa", | ||
"fullstringmatch": "fullstringmatch" | ||
} | ||
for test_case_key in true_text_pattern_dictionary: | ||
assert find_string(test_case_key, true_text_pattern_dictionary[test_case_key], algorithm) is True | ||
|
||
false_text_pattern_dictionary = { | ||
"Knuth-Morris-Pratt": "-Pratt-", | ||
"abcabcabcabdabcabdabcabca": "qwertyuiopzxcvbnm", | ||
"aefcdfaecdaefaefcdaefeaefcdcdeae": "cdaefaefe", | ||
"fullstringmatch": "fullstrinmatch" | ||
} | ||
|
||
for test_case_key in false_text_pattern_dictionary: | ||
assert find_string(test_case_key, false_text_pattern_dictionary[test_case_key], algorithm) is False | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please rename the file as
algorithms.py
fromstring_matching_algorithms.py
. We would keep all the string related algorithms in this file.