fix: CI error

PyThaiNLP · Jan 11, 2025 · 7532488 · 7532488
1 parent 9efd6e7
commit 7532488
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 12 deletions.
diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
@@ -8,7 +8,7 @@
 :See Also:
     * `GitHub repository <https://github.com/PyThaiNLP/attacut>`_
 """
-from typing import List
+from typing import Dict, List
 
 from attacut import Tokenizer
 
@@ -25,7 +25,8 @@ def __init__(self, model="attacut-sc"):
     def tokenize(self, text: str) -> List[str]:
         return self._tokenizer.tokenize(text)
 
-_tokenizers = {}
+
+_tokenizers: Dict[str, AttacutTokenizer] = {}
 
 
 def segment(text: str, model: str = "attacut-sc") -> List[str]:
@@ -41,7 +42,7 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
     """
     if not text or not isinstance(text, str):
         return []
-    
+
     global _tokenizers
     if model not in _tokenizers:
         _tokenizers[model] = AttacutTokenizer(model)

diff --git a/pythainlp/tokenize/longest.py b/pythainlp/tokenize/longest.py
@@ -12,7 +12,7 @@
 
 """
 import re
-from typing import List, Union
+from typing import Dict, List, Union
 
 from pythainlp import thai_tonemarks
 from pythainlp.tokenize import DEFAULT_WORD_DICT_TRIE
@@ -149,11 +149,10 @@ def tokenize(self, text: str) -> List[str]:
         return tokens
 
 
-_tokenizers = {}
+_tokenizers: Dict[int, LongestMatchTokenizer] = {}
 
-def segment(
-    text: str, custom_dict: Trie = DEFAULT_WORD_DICT_TRIE
-) -> List[str]:
+
+def segment(text: str, custom_dict: Trie = DEFAULT_WORD_DICT_TRIE) -> List[str]:
     """
     Dictionary-based longest matching word segmentation.
 

diff --git a/tests/core/test_tokenize.py b/tests/core/test_tokenize.py
@@ -411,16 +411,16 @@ def test_longest_custom_dict(self):
             ["ทดสอบ", "  ", "ทดสอบ"],
         )
         self.assertEqual(
-            word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict={'ปวดเฉียบพลัน'}),
+            word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict=dict_trie(["ปวดเฉียบพลัน"])),
             ["ปวดเฉียบพลัน"],
         )
         self.assertEqual(
-            word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict={'ทดสอบท'}),
-             ['ทดสอบท', 'ดสอบ'],
+            word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict=dict_trie(["ทดสอบท"]) ),
+            ["ทดสอบท", "ดสอบ"],
         )
         self.assertEqual(
             word_tokenize("ทดสอบ  ทดสอบ", engine="longest"),
-             ["ทดสอบ", "  ", "ทดสอบ"],
+            ["ทดสอบ", "  ", "ทดสอบ"],
         )
 
     def test_mm(self):