Skip to content

Commit

Permalink
fix: CI error
Browse files Browse the repository at this point in the history
  • Loading branch information
new5558 committed Jan 11, 2025
1 parent 9efd6e7 commit 7532488
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
7 changes: 4 additions & 3 deletions pythainlp/tokenize/attacut.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
:See Also:
* `GitHub repository <https://github.com/PyThaiNLP/attacut>`_
"""
from typing import List
from typing import Dict, List

from attacut import Tokenizer

Expand All @@ -25,7 +25,8 @@ def __init__(self, model="attacut-sc"):
def tokenize(self, text: str) -> List[str]:
return self._tokenizer.tokenize(text)

_tokenizers = {}

_tokenizers: Dict[str, AttacutTokenizer] = {}


def segment(text: str, model: str = "attacut-sc") -> List[str]:
Expand All @@ -41,7 +42,7 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
"""
if not text or not isinstance(text, str):
return []

global _tokenizers
if model not in _tokenizers:
_tokenizers[model] = AttacutTokenizer(model)
Expand Down
9 changes: 4 additions & 5 deletions pythainlp/tokenize/longest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"""
import re
from typing import List, Union
from typing import Dict, List, Union

from pythainlp import thai_tonemarks
from pythainlp.tokenize import DEFAULT_WORD_DICT_TRIE
Expand Down Expand Up @@ -149,11 +149,10 @@ def tokenize(self, text: str) -> List[str]:
return tokens


_tokenizers = {}
_tokenizers: Dict[int, LongestMatchTokenizer] = {}

def segment(
text: str, custom_dict: Trie = DEFAULT_WORD_DICT_TRIE
) -> List[str]:

def segment(text: str, custom_dict: Trie = DEFAULT_WORD_DICT_TRIE) -> List[str]:
"""
Dictionary-based longest matching word segmentation.
Expand Down
8 changes: 4 additions & 4 deletions tests/core/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,16 +411,16 @@ def test_longest_custom_dict(self):
["ทดสอบ", " ", "ทดสอบ"],
)
self.assertEqual(
word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict={'ปวดเฉียบพลัน'}),
word_tokenize("ปวดเฉียบพลัน", engine="longest", custom_dict=dict_trie(["ปวดเฉียบพลัน"])),
["ปวดเฉียบพลัน"],
)
self.assertEqual(
word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict={'ทดสอบท'}),
['ทดสอบท', 'ดสอบ'],
word_tokenize("ทดสอบทดสอบ", engine="longest", custom_dict=dict_trie(["ทดสอบท"]) ),
["ทดสอบท", "ดสอบ"],
)
self.assertEqual(
word_tokenize("ทดสอบ ทดสอบ", engine="longest"),
["ทดสอบ", " ", "ทดสอบ"],
["ทดสอบ", " ", "ทดสอบ"],
)

def test_mm(self):
Expand Down

0 comments on commit 7532488

Please sign in to comment.