Skip to content

Commit

Permalink
Unicode flags are single tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
tsproisl committed Jul 2, 2019
1 parent 1ba3fc1 commit f28bf7c
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion someweta/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(self, beam_size=5, iterations=10, lexicon=None, mapping=None, brown
r"|".join([re.escape(_) for _ in emoticon_list]) +
r"$", re.VERBOSE)
# Unicode emoticons and other symbols
self.unicode_flags = re.compile(r"\p{Regional_Indicator}{2}")
self.unicode_flags = re.compile(r"^\p{Regional_Indicator}{2}$")
# self.emoji = re.compile(r"^[\u2600-\u27BF\uFE0E\uFE0F\U0001F300-\U0001f64f\U0001F680-\U0001F6FF\U0001F900-\U0001F9FF]$")
self.emoji = re.compile(r"[\p{Extended_Pictographic}\p{Emoji_Presentation}\uFE0F]")

Expand Down

0 comments on commit f28bf7c

Please sign in to comment.