codezonediitj · czgdp1807 · Oct 10, 2021 · Jun 5, 2021 · Oct 10, 2021 · Oct 10, 2021
diff --git a/pydatastructs/strings/__init__.py b/pydatastructs/strings/__init__.py
@@ -1,8 +1,18 @@
 __all__ = []
 
-from . import trie
+from . import (
+    trie,
+    string_matching_algorithms
+)
+
 from .trie import (
     Trie
 )
 
 __all__.extend(trie.__all__)
+
+from .string_matching_algorithms import (
+    find_string
+)
+
+__all__.extend(string_matching_algorithms.__all__)
diff --git a/pydatastructs/strings/string_matching_algorithms.py b/pydatastructs/strings/string_matching_algorithms.py
@@ -0,0 +1,136 @@
+from pydatastructs.linear_data_structures.arrays import (
+    OneDimensionalArray)
+
+__all__ = [
+    'find_string'
+]
+
+def find_string(text: str, pattern: str, algorithm: str) -> bool:
     algorithm: str 
         The algorithm which should be used for 
         computing a minimum spanning tree. 
         Currently the following algorithms are 
         supported, 
         'kruskal' -> Kruskal's algorithm as given in 
                      [1]. 
         'prim' -> Prim's algorithm as given in [2]. 
     """ 
     Computes a minimum spanning tree for the given 
     graph and algorithm. 
     Parameters 
     ========== 
     graph: Graph 
         The graph whose minimum spanning tree 
         has to be computed. 
     algorithm: str 
         The algorithm which should be used for 
         computing a minimum spanning tree. 
         Currently the following algorithms are 
         supported, 
         'kruskal' -> Kruskal's algorithm as given in 
                      [1]. 
         'prim' -> Prim's algorithm as given in [2]. 
     Returns 
     ======= 
     mst: Graph 
         A minimum spanning tree using the implementation 
         same as the graph provided in the input. 
     Examples 
     ======== 
     >>> from pydatastructs import Graph, AdjacencyListGraphNode 
     >>> from pydatastructs import minimum_spanning_tree 
     >>> u = AdjacencyListGraphNode('u') 
     >>> v = AdjacencyListGraphNode('v') 
     >>> G = Graph(u, v) 
     >>> G.add_edge(u.name, v.name, 3) 
     >>> mst = minimum_spanning_tree(G, 'kruskal') 
     >>> u_n = mst.neighbors(u.name) 
     >>> mst.get_edge(u.name, u_n[0].name).value 
     3 
     References 
     ========== 
     .. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm 
     .. [2] https://en.wikipedia.org/wiki/Prim%27s_algorithm 
     Note 
     ==== 
     The concept of minimum spanning tree is valid only for 
     connected and undirected graphs. So, this function 
     should be used only for such graphs. Using with other 
     types of graphs may lead to unwanted results. 
     algorithm: str 
         The algorithm which should be used for 
         computing a minimum spanning tree. 
         Currently the following algorithms are 
         supported, 
         'kruskal' -> Kruskal's algorithm as given in 
                      [1]. 
         'prim' -> Prim's algorithm as given in [2]. 
     """ 
     Computes a minimum spanning tree for the given 
     graph and algorithm. 
  
     Parameters 
     ========== 
  
     graph: Graph 
         The graph whose minimum spanning tree 
         has to be computed. 
     algorithm: str 
         The algorithm which should be used for 
         computing a minimum spanning tree. 
         Currently the following algorithms are 
         supported, 
         'kruskal' -> Kruskal's algorithm as given in 
                      [1]. 
         'prim' -> Prim's algorithm as given in [2]. 
  
     Returns 
     ======= 
  
     mst: Graph 
         A minimum spanning tree using the implementation 
         same as the graph provided in the input. 
  
     Examples 
     ======== 
  
     >>> from pydatastructs import Graph, AdjacencyListGraphNode 
     >>> from pydatastructs import minimum_spanning_tree 
     >>> u = AdjacencyListGraphNode('u') 
     >>> v = AdjacencyListGraphNode('v') 
     >>> G = Graph(u, v) 
     >>> G.add_edge(u.name, v.name, 3) 
     >>> mst = minimum_spanning_tree(G, 'kruskal') 
     >>> u_n = mst.neighbors(u.name) 
     >>> mst.get_edge(u.name, u_n[0].name).value 
     3 
  
     References 
     ========== 
  
     .. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm 
     .. [2] https://en.wikipedia.org/wiki/Prim%27s_algorithm 
  
     Note 
     ==== 
  
     The concept of minimum spanning tree is valid only for 
     connected and undirected graphs. So, this function 
     should be used only for such graphs. Using with other 
     types of graphs may lead to unwanted results. 
+    """API of finding occurrence of a pattern string within another string or body of text.
+
+    Parameters
+    ----------
+    text: str
+        A text, set of characters can include alphabets, numbers , special characters and blank spaces
+    pattern: str
+        A text, set of characters can include alphabets, numbers , special characters and blank spaces
+    algorithm: str
+        A valid algorithm name
+
+    Returns
+    -------
+    bool
+        True if pattern occurs in the string, else False
+
+    Examples
+    --------
+    >>> from pydatastructs.strings.string_matching_algorithms import find_string
+    >>> find_string("aefoaefcdaefcdaed", "aefcdaed", algorithm = "kmp")
+    True
+    >>> find_string("aefoaefcdaefcdaed", "aefcdaedz", algorithm = "kmp")
+    False
+
+    """
+    return eval(algorithm + "('" + text + "','" + pattern + "')")
 import pydatastructs.graphs.algorithms as algorithms 
 func = "_minimum_spanning_tree_" + algorithm + "_" + graph._impl 
 if not hasattr(algorithms, func): 
     raise NotImplementedError( 
     "Currently %s algoithm for %s implementation of graphs " 
     "isn't implemented for finding minimum spanning trees." 
     %(algorithm, graph._impl)) 
 return getattr(algorithms, func)(graph) 
 import pydatastructs.graphs.algorithms as algorithms 
 func = "_minimum_spanning_tree_" + algorithm + "_" + graph._impl 
 if not hasattr(algorithms, func): 
     raise NotImplementedError( 
     "Currently %s algoithm for %s implementation of graphs " 
     "isn't implemented for finding minimum spanning trees." 
     %(algorithm, graph._impl)) 
 return getattr(algorithms, func)(graph) 
+
+
+def kmp(string: str, substring: str) -> bool:
+    """Determine whether the substring appears somewhere in the string using Knuth–Morris–Pratt algorithm
+
+    Parameters
+    ----------
+    string: str
+        A text, set of characters
+    substring: str
+        A pattern/substring that is searched for in the string
+
+    Returns
+    -------
+    bool
+        Whether substring exists in the string or not
+
+    Examples
+    --------
+    >>> from pydatastructs.strings.string_matching_algorithms import kmp
+    >>> kmp("aefoaefcdaefcdaed", "aefcdaed")
+    True
+    >>> kmp("aefoaefcdaefcdaed", "aefcdaedz")
+    False
+
+    References
+    -------
+    .. [1] https://www.inf.hs-flensburg.de/lang/algorithmen/pattern/kmpen.htm
+    .. [2] https://towardsdatascience.com/pattern-search-with-the-knuth-morris-pratt-kmp-algorithm-8562407dba5b
+    .. [3] https://iopscience.iop.org/article/10.1088/1742-6596/1345/4/042005/pdf
+
+    """
+    patternsInSubString = _buildPattern(substring)
+    return _doMatch(string, substring, patternsInSubString)
+
+
+def _buildPattern(substring: str) -> OneDimensionalArray:
+    """Check for patterns existing in the substring
+
+    Parameters
+    ----------
+    substring: str
+        A text, set of characters
+
+    Returns
+    -------
+    patterns: OneDimensionalArray
+        Returns an array of indicies. For a given index if value > -1
+        represents that the suffix found at the index, is also the prefix
+        at the value index. If value is -1, then there is no prefix that is also
+        a suffix.
+
+    """
+    j = 0
+    i = 1
+    patterns = OneDimensionalArray(int, len(substring))
+    patterns.fill(-1)
+    while i < len(substring):
+        if substring[i] is substring[j]:
+            # A prefix that is also a suffix
+            patterns[i] = j
+            i += 1
+            j += 1
+        elif j > 0:
+            # Check the previous existing pattern
+            j = patterns[j - 1] + 1
+        else:
+            i += 1
+    return patterns
+
+
+def _doMatch(string: str, substring: str, patterns: OneDimensionalArray) -> bool:
+    """Check if the string exists in the substring
+
+    Parameters
+    ----------
+    string: str
+        A text, set of characters
+    substring: str
+        A pattern/substring that is searched for in the string
+    patterns: OneDimensionalArray
+        An array of integers, each value < len(patterns)
+
+    Returns
+    -------
+    bool
+        Whether substring exists in the string or not
+
+    """
+    i = 0
+    j = 0
+    while i < len(string):
+        if string[i] is substring[j]:
+            i += 1
+            j += 1
+        elif j > 0:
+            j = patterns[j - 1] + 1
+        else:
+            i += 1
+        if j is len(substring):
+            return True
+    return False
diff --git a/pydatastructs/strings/tests/test_string_matching_algorithms.py b/pydatastructs/strings/tests/test_string_matching_algorithms.py
@@ -0,0 +1,26 @@
+from pydatastructs.strings.string_matching_algorithms import find_string
+
+def test_kms():
+    _test_common_string_matching('kmp')
+
+
+def _test_common_string_matching(algorithm):
+    true_text_pattern_dictionary = {
+        "Knuth-Morris-Pratt": "-Morris-",
+        "abcabcabcabdabcabdabcabca": "abcabdabcabca",
+        "aefcdfaecdaefaefcdaefeaefcdcdeae": "aefcdaefeaefcd",
+        "aaaaaaaa": "aaa",
+        "fullstringmatch": "fullstringmatch"
+    }
+    for test_case_key in true_text_pattern_dictionary:
+        assert find_string(test_case_key, true_text_pattern_dictionary[test_case_key], algorithm) is True
+
+    false_text_pattern_dictionary = {
+        "Knuth-Morris-Pratt": "-Pratt-",
+        "abcabcabcabdabcabdabcabca": "qwertyuiopzxcvbnm",
+        "aefcdfaecdaefaefcdaefeaefcdcdeae": "cdaefaefe",
+        "fullstringmatch": "fullstrinmatch"
+    }
+
+    for test_case_key in false_text_pattern_dictionary:
+        assert find_string(test_case_key, false_text_pattern_dictionary[test_case_key], algorithm) is False