From 26999540d0e510bc9ac349ae32930e11fb822bf3 Mon Sep 17 00:00:00 2001 From: xidiancanghai <chmy2272120002@outlook.com> Date: Sun, 26 Sep 2021 16:02:07 +0800 Subject: [PATCH 1/3] fix(wildcard) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 支持通配符 --- dict/dict2.txt | 1 + examples/readme/main.go | 3 +-- examples/simple.go | 3 +-- examples/test_issue_3/main.go | 2 +- examples/test_issue_4/simple.go | 9 ++++--- filter.go | 5 ++++ trie_tree.go | 45 +++++++++++++++++++++++++++++++++ 7 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 dict/dict2.txt diff --git a/dict/dict2.txt b/dict/dict2.txt new file mode 100644 index 0000000..1e2e57f --- /dev/null +++ b/dict/dict2.txt @@ -0,0 +1 @@ +刘*上*台 \ No newline at end of file diff --git a/examples/readme/main.go b/examples/readme/main.go index 667cda6..4ccf030 100644 --- a/examples/readme/main.go +++ b/examples/readme/main.go @@ -2,8 +2,7 @@ package main import ( "fmt" - - "github.com/importcjj/sensitive" + "sensitive" ) func main() { diff --git a/examples/simple.go b/examples/simple.go index bc47cd5..543005c 100644 --- a/examples/simple.go +++ b/examples/simple.go @@ -2,8 +2,7 @@ package main import ( "fmt" - - "github.com/importcjj/sensitive" + "sensitive" ) func main() { diff --git a/examples/test_issue_3/main.go b/examples/test_issue_3/main.go index f0e98e3..03c3b43 100644 --- a/examples/test_issue_3/main.go +++ b/examples/test_issue_3/main.go @@ -2,7 +2,7 @@ package main import ( "fmt" - "github.com/importcjj/sensitive" + "sensitive" ) func keywordFilterSearch(content string) (bool, string) { diff --git a/examples/test_issue_4/simple.go b/examples/test_issue_4/simple.go index 045094b..f0b2637 100644 --- a/examples/test_issue_4/simple.go +++ b/examples/test_issue_4/simple.go @@ -2,12 +2,13 @@ package main import ( "fmt" - - "github.com/importcjj/sensitive" + "sensitive" ) func main() { filter := sensitive.New() - filter.LoadWordDict("../../dict/dict.txt") - fmt.Println(filter.Replace("xC4x", '*')) + filter.LoadWordDict("../../dict/dict2.txt") + fmt.Println(filter.ValidateWithWildcard("刘一上三台啊", '*')) + fmt.Println(filter.ValidateWithWildcard("哈哈哈刘一上三", '*')) + fmt.Println(filter.ValidateWithWildcard("哈哈哈刘一上三台", '*')) } diff --git a/filter.go b/filter.go index fed8b0e..a3d57e7 100644 --- a/filter.go +++ b/filter.go @@ -111,3 +111,8 @@ func (filter *Filter) Validate(text string) (bool, string) { func (filter *Filter) RemoveNoise(text string) string { return filter.noise.ReplaceAllString(text, "") } + +func (filter *Filter) ValidateWithWildcard(text string, wildcard rune) (bool, string) { + text = filter.RemoveNoise(text) + return filter.trie.ValidateWithWildcard(text, wildcard) +} diff --git a/trie_tree.go b/trie_tree.go index d17ea66..796eb36 100644 --- a/trie_tree.go +++ b/trie_tree.go @@ -173,6 +173,51 @@ func (tree *Trie) Validate(text string) (bool, string) { return true, Empty } +func (tree *Trie) ValidateWithWildcard(text string, wildcard rune) (bool, string) { + + runes := []rune(text) + parent := tree.Root + patter := "" + + return tree.dfs(runes, parent, 0, wildcard, "", &patter), patter + +} + +func (tree *Trie) dfs(runes []rune, parent *Node, curl int, wildcard rune, str string, patter *string) bool { + if parent == nil { + return false + } + if parent.IsPathEnd() { + *patter = str + return true + } + if curl >= len(runes) { + return false + } + + if current, found := parent.Children[runes[curl]]; found { + if is1 := tree.dfs(runes, current, curl+1, wildcard, str+string(runes[curl]), patter); is1 { + return true + } + } + + // 先看有没有* + if current1, found1 := parent.Children[wildcard]; found1 { + + if is2 := tree.dfs(runes, current1, curl+1, wildcard, str+string(wildcard), patter); is2 { + return true + } + + if current2, found2 := current1.Children[runes[curl]]; found2 { + if is3 := tree.dfs(runes, current2, curl+1, wildcard, str+string(wildcard)+string(runes[curl]), patter); is3 { + return true + } + } + } + + return tree.dfs(runes, tree.Root, curl+1, wildcard, str, patter) +} + // FindIn 判断text中是否含有词库中的词 func (tree *Trie) FindIn(text string) (bool, string) { validated, first := tree.Validate(text) From a6765ad54e2abace9283d671f7d492cdf077128d Mon Sep 17 00:00:00 2001 From: xidiancanghai <chmy2272120002@outlook.com> Date: Sun, 26 Sep 2021 16:08:27 +0800 Subject: [PATCH 2/3] feat(wildcard) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 测试 --- examples/test_issue_4/simple.go | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/test_issue_4/simple.go b/examples/test_issue_4/simple.go index f0b2637..05a825a 100644 --- a/examples/test_issue_4/simple.go +++ b/examples/test_issue_4/simple.go @@ -11,4 +11,5 @@ func main() { fmt.Println(filter.ValidateWithWildcard("刘一上三台啊", '*')) fmt.Println(filter.ValidateWithWildcard("哈哈哈刘一上三", '*')) fmt.Println(filter.ValidateWithWildcard("哈哈哈刘一上三台", '*')) + fmt.Println(filter.ValidateWithWildcard("哈哈哈刘一上三台,你是个小白鼠", '*')) } From f84f1ceffe1c2ba51cd239a297fa75f8babdb6ae Mon Sep 17 00:00:00 2001 From: xidiancanghai <chmy2272120002@outlook.com> Date: Sun, 26 Sep 2021 19:32:57 +0800 Subject: [PATCH 3/3] =?UTF-8?q?feat(wildcard):=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- trie_tree.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/trie_tree.go b/trie_tree.go index 796eb36..4166c69 100644 --- a/trie_tree.go +++ b/trie_tree.go @@ -176,14 +176,20 @@ func (tree *Trie) Validate(text string) (bool, string) { func (tree *Trie) ValidateWithWildcard(text string, wildcard rune) (bool, string) { runes := []rune(text) - parent := tree.Root - patter := "" - return tree.dfs(runes, parent, 0, wildcard, "", &patter), patter + for curl := 0; curl < len(runes); curl++ { + patter := "" + parent := tree.Root + if tree.dfs(runes, parent, curl, wildcard, "", &patter) { + return false, patter + } + } + return true, "" } func (tree *Trie) dfs(runes []rune, parent *Node, curl int, wildcard rune, str string, patter *string) bool { + if parent == nil { return false } @@ -195,6 +201,7 @@ func (tree *Trie) dfs(runes []rune, parent *Node, curl int, wildcard rune, str s return false } + // 匹配到了 if current, found := parent.Children[runes[curl]]; found { if is1 := tree.dfs(runes, current, curl+1, wildcard, str+string(runes[curl]), patter); is1 { return true @@ -214,8 +221,8 @@ func (tree *Trie) dfs(runes []rune, parent *Node, curl int, wildcard rune, str s } } } + return false - return tree.dfs(runes, tree.Root, curl+1, wildcard, str, patter) } // FindIn 判断text中是否含有词库中的词