From 58abc408dfd775c0f076fe751cb712ca36750c15 Mon Sep 17 00:00:00 2001 From: VikoTse Date: Mon, 28 Aug 2017 11:46:20 +0800 Subject: [PATCH 1/2] Support returning wordEndIndex and retrieving count of nodes in trie New feature: Support to retrieve the number of nodes added in trie. Support to return wordEndIndex of text in matching stage. Update Unit Tests for 1. returning wordEndIndex 2. returning raw string, associated data and wordEndIndex Update README's description --- AhoCorasick.Tests.cs | 198 +++++++++-------- AhoCorasick.cs | 519 +++++++++++++++++++++---------------------- README.md | 141 +++++++----- 3 files changed, 447 insertions(+), 411 deletions(-) diff --git a/AhoCorasick.Tests.cs b/AhoCorasick.Tests.cs index 3e92ad4..d443498 100644 --- a/AhoCorasick.Tests.cs +++ b/AhoCorasick.Tests.cs @@ -1,89 +1,109 @@ -// Copyright (c) 2013 Pēteris Ņikiforovs -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System.Linq; - -using NUnit.Framework; - -namespace AhoCorasick -{ - public class Tests - { - [Test] - public void HelloWorld() - { - string text = "hello and welcome to this beautiful world!"; - - AhoCorasick.Trie trie = new AhoCorasick.Trie(); - trie.Add("hello"); - trie.Add("world"); - trie.Build(); - - string[] matches = trie.Find(text).ToArray(); - - Assert.AreEqual(2, matches.Length); - Assert.AreEqual("hello", matches[0]); - Assert.AreEqual("world", matches[1]); - } - - [Test] - public void Contains() - { - string text = "hello and welcome to this beautiful world!"; - - AhoCorasick.Trie trie = new AhoCorasick.Trie(); - trie.Add("hello"); - trie.Add("world"); - trie.Build(); - - Assert.IsTrue(trie.Find(text).Any()); - } - - [Test] - public void LineNumbers() - { - string text = "world, i hello you!"; - string[] words = new[] { "hello", "world" }; - - AhoCorasick.Trie trie = new AhoCorasick.Trie(); - for (int i = 0; i < words.Length; i++) - trie.Add(words[i], i); - trie.Build(); - - int[] lines = trie.Find(text).ToArray(); - - Assert.AreEqual(2, lines.Length); - Assert.AreEqual(1, lines[0]); - Assert.AreEqual(0, lines[1]); - } - - [Test] - public void Words() - { - string[] text = "one two three four".Split(' '); - - AhoCorasick.Trie trie = new AhoCorasick.Trie(); - trie.Add(new[] { "three", "four" }, true); - trie.Build(); - - Assert.IsTrue(trie.Find(text).Any()); - } - } -} +// Copyright (c) 2013 Pēteris Ņikiforovs +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Linq; + +using NUnit.Framework; + +namespace AhoCorasick +{ + public class Tests + { + [Test] + public void HelloWorld() + { + string text = "hello and welcome to this beautiful world!"; + + AhoCorasick.Trie trie = new AhoCorasick.Trie(); + trie.Add("hello"); + trie.Add("world"); + trie.Build(); + + var matches = trie.Find(text).ToArray(); + + Assert.AreEqual(2, matches.Length); + Assert.AreEqual(new Tuple("hello", 4), (Tuple)matches[0]); + Assert.AreEqual(new Tuple("world", 40), (Tuple)matches[1]); + } + + [Test] + public void Contains() + { + string text = "hello and welcome to this beautiful world!"; + + AhoCorasick.Trie trie = new AhoCorasick.Trie(); + trie.Add("hello"); + trie.Add("world"); + trie.Build(); + + Assert.IsTrue(trie.Find(text).Any()); + } + + [Test] + public void Ids() + { + string text = "hello and welcome to this beautiful world!"; + + AhoCorasick.Trie trie = new AhoCorasick.Trie(); + trie.Add("hello", 123); + trie.Add("world", 456); + + trie.Build(); + + var matches = trie.Find(text).ToArray(); + + Assert.AreEqual(2, matches.Length); + Assert.AreEqual(new Tuple(123, 4), matches[0]); + Assert.AreEqual(new Tuple(456, 40), matches[1]); + } + + [Test] + public void WordsAndIds() + { + string text = "hello and welcome to this beautiful world!"; + + AhoCorasick.Trie> trie = new AhoCorasick.Trie>(); + + trie.Add("hello", new Tuple("hello", 123)); + trie.Add("world", new Tuple("world", 456)); + + trie.Build(); + + var matches = trie.Find(text).ToArray(); + + Assert.AreEqual(2, matches.Length); + Assert.AreEqual(new Tuple, int>(new Tuple("hello", 123), 4), matches[0]); + Assert.AreEqual(new Tuple, int>(new Tuple("world", 456), 40), matches[1]); + } + + [Test] + public void Words() + { + string[] text = "one two three four".Split(' '); + + AhoCorasick.Trie trie = new AhoCorasick.Trie(); + trie.Add(new[] { "three", "four" }, true); + trie.Build(); + + Assert.IsTrue(trie.Find(text).Any()); + } + } +} diff --git a/AhoCorasick.cs b/AhoCorasick.cs index e2e2705..0cd20b2 100644 --- a/AhoCorasick.cs +++ b/AhoCorasick.cs @@ -1,265 +1,254 @@ -// Copyright (c) 2013 Pēteris Ņikiforovs -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System.Collections; -using System.Collections.Generic; - -namespace AhoCorasick -{ - /// - /// Trie that will find and return strings found in a text. - /// - public class Trie : Trie - { - /// - /// Adds a string. - /// - /// The string to add. - public void Add(string s) - { - Add(s, s); - } - - /// - /// Adds multiple strings. - /// - /// The strings to add. - public void Add(IEnumerable strings) - { - foreach (string s in strings) - { - Add(s); - } - } - } - - /// - /// Trie that will find strings in a text and return values of type - /// for each string found. - /// - /// Value type. - public class Trie : Trie - { - } - - /// - /// Trie that will find strings or phrases and return values of type - /// for each string or phrase found. - /// - /// - /// will typically be a char for finding strings - /// or a string for finding phrases or whole words. - /// - /// The type of a letter in a word. - /// The type of the value that will be returned when the word is found. - public class Trie - { - /// - /// Root of the trie. It has no value and no parent. - /// - private readonly Node root = new Node(); - - /// - /// Adds a word to the tree. - /// - /// - /// A word consists of letters. A node is built for each letter. - /// If the letter type is char, then the word will be a string, since it consists of letters. - /// But a letter could also be a string which means that a node will be added - /// for each word and so the word is actually a phrase. - /// - /// The word that will be searched. - /// The value that will be returned when the word is found. - public void Add(IEnumerable word, TValue value) - { - // start at the root - var node = root; - - // build a branch for the word, one letter at a time - // if a letter node doesn't exist, add it - foreach (T c in word) - { - var child = node[c]; - - if (child == null) - child = node[c] = new Node(c, node); - - node = child; - } - - // mark the end of the branch - // by adding a value that will be returned when this word is found in a text - node.Values.Add(value); - } - - - /// - /// Constructs fail or fall links. - /// - public void Build() - { - // construction is done using breadth-first-search - var queue = new Queue>(); - queue.Enqueue(root); - - while (queue.Count > 0) - { - var node = queue.Dequeue(); - - // visit children - foreach (var child in node) - queue.Enqueue(child); - - // fail link of root is root - if (node == root) - { - root.Fail = root; - continue; - } - - var fail = node.Parent.Fail; - - while (fail[node.Word] == null && fail != root) - fail = fail.Fail; - - node.Fail = fail[node.Word] ?? root; - if (node.Fail == node) - node.Fail = root; - } - } - - /// - /// Finds all added words in a text. - /// - /// The text to search in. - /// The values that were added for the found words. - public IEnumerable Find(IEnumerable text) - { - var node = root; - - foreach (T c in text) - { - while (node[c] == null && node != root) - node = node.Fail; - - node = node[c] ?? root; - - for (var t = node; t != root; t = t.Fail) - { - foreach (TValue value in t.Values) - yield return value; - } - } - } - - /// - /// Node in a trie. - /// - /// The same as the parent type. - /// The same as the parent value type. - private class Node : IEnumerable> - { - private readonly TNode word; - private readonly Node parent; - private readonly Dictionary> children = new Dictionary>(); - private readonly List values = new List(); - - /// - /// Constructor for the root node. - /// - public Node() - { - } - - /// - /// Constructor for a node with a word - /// - /// - /// - public Node(TNode word, Node parent) - { - this.word = word; - this.parent = parent; - } - - /// - /// Word (or letter) for this node. - /// - public TNode Word - { - get { return word; } - } - - /// - /// Parent node. - /// - public Node Parent - { - get { return parent; } - } - - /// - /// Fail or fall node. - /// - public Node Fail - { - get; - set; - } - - /// - /// Children for this node. - /// - /// Child word. - /// Child node. - public Node this[TNode c] - { - get { return children.ContainsKey(c) ? children[c] : null; } - set { children[c] = value; } - } - - /// - /// Values for words that end at this node. - /// - public List Values - { - get { return values; } - } - - /// - public IEnumerator> GetEnumerator() - { - return children.Values.GetEnumerator(); - } - - /// - IEnumerator IEnumerable.GetEnumerator() - { - return GetEnumerator(); - } - - /// - public override string ToString() - { - return Word.ToString(); - } - } - } -} +// Copyright (c) 2013 Pēteris Ņikiforovs +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Collections; +using System.Collections.Generic; + +namespace AhoCorasick +{ + /// + /// Trie that will find and return strings found in a text. + /// + public class Trie : Trie + { + /// + /// Adds a string. + /// + /// The string to add. + public void Add(string s) + { + Add(s, s); + } + + /// + /// Adds multiple strings. + /// + /// The strings to add. + public void Add(IEnumerable strings) + { + foreach (string s in strings) + { + Add(s); + } + } + } + + /// + /// Trie that will find strings in a text and return values of type + /// for each string found. + /// + /// Value type. + public class Trie : Trie + { + } + + /// + /// Trie that will find strings or phrases and return values of type + /// for each string or phrase found. + /// + /// + /// will typically be a char for finding strings + /// or a string for finding phrases or whole words. + /// + /// The type of a letter in a word. + /// The type of the value that will be returned when the word is found. + public class Trie + { + public int Count { get; private set; } = 0; + + /// + /// Root of the trie. It has no value and no parent. + /// + private readonly Node _root = new Node(); + + /// + /// Adds a word to the tree. + /// + /// + /// A word consists of letters. A node is built for each letter. + /// If the letter type is char, then the word will be a string, since it consists of letters. + /// But a letter could also be a string which means that a node will be added + /// for each word and so the word is actually a phrase. + /// + /// The word that will be searched. + /// The value that will be returned when the word is found. + public void Add(IEnumerable word, TValue value) + { + // start at the root + var node = _root; + + // build a branch for the word, one letter at a time + // if a letter node doesn't exist, add it + foreach (T c in word) + { + var child = node[c] ?? (node[c] = new Node(c, node)); + + node = child; + } + + // mark the end of the branch + // by adding a value that will be returned when this word is found in a text + node.Values.Add(value); + + ++Count; + } + + /// + /// Constructs fail or fall links. + /// + public void Build() + { + // construction is done using breadth-first-search + var queue = new Queue>(); + queue.Enqueue(_root); + + while (queue.Count > 0) + { + var node = queue.Dequeue(); + + // visit children + foreach (var child in node) + queue.Enqueue(child); + + // fail link of root is root + if (node == _root) + { + _root.Fail = _root; + continue; + } + + var fail = node.Parent.Fail; + + while (fail[node.Word] == null && fail != _root) + fail = fail.Fail; + + node.Fail = fail[node.Word] ?? _root; + if (node.Fail == node) + node.Fail = _root; + } + } + + /// + /// Finds all added words in a text. + /// + /// The text to search in. + /// The values, endIndexs that were added for the found words. + public IEnumerable> Find(IEnumerable text) + { + var node = _root; + + int endIndex = 0; + foreach (T c in text) + { + while (node[c] == null && node != _root) + node = node.Fail; + + node = node[c] ?? _root; + + for (var t = node; t != _root; t = t.Fail) + { + foreach (TValue value in t.Values) + yield return new Tuple(value, endIndex); + } + + ++endIndex; + } + } + + /// + /// Node in a trie. + /// + /// The same as the parent type. + /// The same as the parent value type. + private class Node : IEnumerable> + { + private readonly Dictionary> _children = + new Dictionary>(); + + /// + /// Constructor for the root node. + /// + public Node() + { + } + + /// + /// Constructor for a node with a word + /// + /// + /// + public Node(TNode word, Node parent) + { + this.Word = word; + this.Parent = parent; + } + + /// + /// Word (or letter) for this node. + /// + public TNode Word { get; } + + /// + /// Parent node. + /// + public Node Parent { get; } + + /// + /// Fail or fall node. + /// + public Node Fail { get; set; } + + /// + /// Children for this node. + /// + /// Child word. + /// Child node. + public Node this[TNode c] + { + get { return _children.ContainsKey(c) ? _children[c] : null; } + set { _children[c] = value; } + } + + /// + /// Values for words that end at this node. + /// + public List Values { get; } = new List(); + + /// + public IEnumerator> GetEnumerator() + { + return _children.Values.GetEnumerator(); + } + + /// + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + /// + public override string ToString() + { + return Word.ToString(); + } + } + } +} diff --git a/README.md b/README.md index 1e86ca0..6c63305 100644 --- a/README.md +++ b/README.md @@ -1,58 +1,85 @@ -Aho–Corasick string matching algorithm in C# -============================================ - -The [Aho–Corasick string matching algorithm](http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm) is a string searching algorithm. It's useful in NLP when you have a dictionary with words and you need to tell if a text contains any of the words. - -```csharp -AhoCorasick.Trie trie = new AhoCorasick.Trie(); - -// add words -trie.Add("hello"); -trie.Add("world"); - -// build search tree -trie.Build(); - -string text = "hello and welcome to this beautiful world!"; - -// find words -foreach (string word in trie.Find(text)) { - Console.WriteLine(word); -} -``` - -You can associate other data with the words (like an ID or line number). - -```csharp -AhoCorasick.Trie trie = new AhoCorasick.Trie(); - -// add words -trie.Add("hello", 123); -trie.Add("world", 456); - -// build search tree -trie.Build(); - -// retrieve IDs -foreach (int id in trie.Find(text)) { - Console.WriteLine(id); -} -``` - -Use `IEnumerable.Any()` to check if the text contains a match without retrieving all of them. - -If you want to match whole words, you can use `Trie`. - -```csharp -string[] text = "hello world i say to you".Split(' '); - -AhoCorasick.Trie trie = new AhoCorasick.Trie(); -trie.Add("hello world".Split(' '), true); -trie.Build(); -bool containsHelloWorld = trie.Find(text).Any(); -``` - -License -------- - +Aho–Corasick string matching algorithm in C# +============================================ + +The [Aho–Corasick string matching algorithm](http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm) is a string searching algorithm. It's useful in NLP when you have a dictionary with words and you need to tell if a text contains any of the words. + +```csharp +AhoCorasick.Trie trie = new AhoCorasick.Trie(); + +// add words +trie.Add("hello"); +trie.Add("world"); + +// build search tree +trie.Build(); + +string text = "hello and welcome to this beautiful world!"; + +// find words and wordEndIndices +foreach (Tuple tuple in trie.Find(text)) { + var word = tuple.Item1; + var wordEndIndex = tuple.Item2; + Console.WriteLine("{0}, {1}", word, wordEndIndex); +} +``` + +You could associate other data with the words (like an ID or line number). + +```csharp +AhoCorasick.Trie trie = new AhoCorasick.Trie(); + +// add words +trie.Add("hello", 123); +trie.Add("world", 456); + +// build search tree +trie.Build(); + +// retrieve IDs and wordEndIndices +foreach (Tuple tuple in trie.Find(text)) +{ + var id = tuple.Item1; + var wordEndIndex = tuple.Item2; + Console.WriteLine("{0}, {1}", id, wordEndIndex); +} +``` + +You also could retrieve matched strings and associated data (like an ID or line number) + +```csharp +AhoCorasick.Trie> trie = new AhoCorasick.Trie(); + +// add words +trie.Add("hello", new Tuple("hello", 123)); +trie.Add("world", new Tuple("world", 456)); + +// build search tree +trie.Build(); + +// find words, IDs and wordEndIndices +foreach (Tuple, int> tuple in trie.Find(text)) +{ + var word = tuple.Item1.Item1; + var id = tuple.Item1.Item2; + var wordEndIndex = tuple.Item2; + Console.WriteLine("{0}, {1}, {2}", word, id, wordEndIndex); +} +``` + +Use `IEnumerable.Any()` to check if the text contains a match without retrieving all of them. + +If you want to match whole words, you can use `Trie`. + +```csharp +string[] text = "hello world i say to you".Split(' '); + +AhoCorasick.Trie trie = new AhoCorasick.Trie(); +trie.Add("hello world".Split(' '), true); +trie.Build(); +bool containsHelloWorld = trie.Find(text).Any(); +``` + +License +------- + MIT \ No newline at end of file From dc2708b9bff213dbd9aa1e999bac7f550d64eb76 Mon Sep 17 00:00:00 2001 From: VikoTse Date: Tue, 29 Aug 2017 13:11:30 +0800 Subject: [PATCH 2/2] Update AhoCorasick.Tests.cs --- AhoCorasick.Tests.cs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/AhoCorasick.Tests.cs b/AhoCorasick.Tests.cs index d443498..16c1ebe 100644 --- a/AhoCorasick.Tests.cs +++ b/AhoCorasick.Tests.cs @@ -1,4 +1,4 @@ -// Copyright (c) 2013 Pēteris Ņikiforovs +// Copyright (c) 2013 Pēteris Ņikiforovs // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -32,7 +32,7 @@ public void HelloWorld() { string text = "hello and welcome to this beautiful world!"; - AhoCorasick.Trie trie = new AhoCorasick.Trie(); + var trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("world"); trie.Build(); @@ -40,8 +40,8 @@ public void HelloWorld() var matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); - Assert.AreEqual(new Tuple("hello", 4), (Tuple)matches[0]); - Assert.AreEqual(new Tuple("world", 40), (Tuple)matches[1]); + Assert.AreEqual(Tuple.Create("hello", 4), matches[0]); + Assert.AreEqual(Tuple.Create("world", 40), matches[1]); } [Test] @@ -49,7 +49,7 @@ public void Contains() { string text = "hello and welcome to this beautiful world!"; - AhoCorasick.Trie trie = new AhoCorasick.Trie(); + var trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("world"); trie.Build(); @@ -62,7 +62,7 @@ public void Ids() { string text = "hello and welcome to this beautiful world!"; - AhoCorasick.Trie trie = new AhoCorasick.Trie(); + var trie = new AhoCorasick.Trie(); trie.Add("hello", 123); trie.Add("world", 456); @@ -71,8 +71,8 @@ public void Ids() var matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); - Assert.AreEqual(new Tuple(123, 4), matches[0]); - Assert.AreEqual(new Tuple(456, 40), matches[1]); + Assert.AreEqual(Tuple.Create(123, 4), matches[0]); + Assert.AreEqual(Tuple.Create(456, 40), matches[1]); } [Test] @@ -80,26 +80,26 @@ public void WordsAndIds() { string text = "hello and welcome to this beautiful world!"; - AhoCorasick.Trie> trie = new AhoCorasick.Trie>(); + var trie = new AhoCorasick.Trie>(); - trie.Add("hello", new Tuple("hello", 123)); - trie.Add("world", new Tuple("world", 456)); + trie.Add("hello", Tuple.Create("hello", 123)); + trie.Add("world", Tuple.Create("world", 456)); trie.Build(); var matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); - Assert.AreEqual(new Tuple, int>(new Tuple("hello", 123), 4), matches[0]); - Assert.AreEqual(new Tuple, int>(new Tuple("world", 456), 40), matches[1]); + Assert.AreEqual(Tuple.Create(Tuple.Create("hello", 123), 4), matches[0]); + Assert.AreEqual(Tuple.Create(Tuple.Create("world", 456), 40), matches[1]); } [Test] public void Words() { string[] text = "one two three four".Split(' '); - - AhoCorasick.Trie trie = new AhoCorasick.Trie(); + + var trie = new AhoCorasick.Trie(); trie.Add(new[] { "three", "four" }, true); trie.Build();