Skip to content

Commit ff8db54

Browse files
committed
Add tag for dependency_parsing
1 parent e2a3404 commit ff8db54

File tree

5 files changed

+36
-7
lines changed

5 files changed

+36
-7
lines changed

pythainlp/parse/core.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
# -*- coding: utf-8 -*-
2+
from typing import List, Union
3+
4+
25
_tagger = None
36
_tagger_name = ""
47

5-
def dependency_parsing(text: str, model: str=None, engine: str="esupar")->str:
8+
def dependency_parsing(text: str, model: str=None, tag: str="str", engine: str="esupar")->Union[List[List[str]], str]:
69
"""
710
Dependency Parsing
811
912
:param str text: text to do dependency parsing
1013
:param str model: model for using with engine \
1114
(for esupar and transformers_ud)
15+
:param str tag: output type (str or list)
1216
:param str engine: the name dependency parser
13-
:return: str (conllu)
17+
:return: str (conllu) or List
18+
:rtype: Union[List[List[str]], str]
1419
1520
**Options for engine**
1621
* *esupar* (default) - Tokenizer POS-tagger and Dependency-parser \
@@ -89,4 +94,4 @@ def dependency_parsing(text: str, model: str=None, engine: str="esupar")->str:
8994
"The engine doesn't support."
9095
)
9196
_tagger_name = engine
92-
return _tagger(text)
97+
return _tagger(text, tag=tag)

pythainlp/parse/esupar_engine.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
55
GitHub: https://github.com/KoichiYasuoka/esupar
66
"""
7+
from typing import List, Union
78
import esupar
89

910

@@ -13,5 +14,12 @@ def __init__(self, model: str="th") -> None:
1314
model = "th"
1415
self.nlp=esupar.load(model)
1516

16-
def __call__(self, text):
17-
return self.nlp(text)
17+
def __call__(self, text: str, tag: str="str")->Union[List[List[str]], str]:
18+
_data = str(self.nlp(text))
19+
if tag =="list":
20+
_temp = _data.splitlines()
21+
_tag_data=[]
22+
for i in _temp:
23+
_tag_data.append(i.split())
24+
return _tag_data
25+
return _data

pythainlp/parse/spacy_thai_engine.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,22 @@
44
55
GitHub: https://github.com/KoichiYasuoka/spacy-thai
66
"""
7+
from typing import List, Union
78
import spacy_thai
89

910

1011
class Parse:
1112
def __init__(self, model: str="th") -> None:
1213
self.nlp=spacy_thai.load()
1314

14-
def __call__(self, text:str)->str:
15+
def __call__(self, text:str, tag: str="str")->Union[List[List[str]], str]:
1516
doc = self.nlp(text)
1617
_text = []
18+
if tag == "list":
19+
_tag_data=[]
20+
for t in doc:
21+
_tag_data.append([str(t.i+1),t.orth_,t.lemma_,t.pos_,t.tag_,"_",str(0 if t.head==t else t.head.i+1),t.dep_,"_","_" if t.whitespace_ else "SpaceAfter=No"])
22+
return _tag_data
1723
for t in doc:
1824
_text.append("\t".join([str(t.i+1),t.orth_,t.lemma_,t.pos_,t.tag_,"_",str(0 if t.head==t else t.head.i+1),t.dep_,"_","_" if t.whitespace_ else "SpaceAfter=No"]))
1925
return '\n'.join(_text)

pythainlp/parse/transformers_ud.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
GitHub: https://github.com/KoichiYasuoka
1212
"""
1313
import os
14+
from typing import List, Union
1415
import numpy
1516
import torch
1617
import ufal.chu_liu_edmonds
@@ -48,7 +49,7 @@ def __init__(self, model: str="KoichiYasuoka/deberta-base-thai-ud-head") -> None
4849
tokenizer=self.tokenizer
4950
)
5051

51-
def __call__(self, text: str)->str:
52+
def __call__(self, text: str, tag: str="str")->Union[List[List[str]], str]:
5253
w=[(t["start"],t["end"],t["entity_group"]) for t in self.deprel(text)]
5354
z,n={t["start"]:t["entity"].split("|") for t in self.tagger(text)},len(w)
5455
r,m=[text[s:e] for s,e,p in w],numpy.full((n+1,n+1),numpy.nan)
@@ -73,6 +74,12 @@ def __call__(self, text: str)->str:
7374
m[0:j,0]=m[j+1:,0]=numpy.nan
7475
h=ufal.chu_liu_edmonds.chu_liu_edmonds(m)[0]
7576
u=""
77+
if tag == "list":
78+
_tag_data=[]
79+
for i,(s,e,p) in enumerate(w,1):
80+
p="root" if h[i]==0 else "dep" if p=="root" else p
81+
_tag_data.append([str(i),r[i-1],"_",z[s][0][2:],"_","|".join(z[s][1:]),str(h[i]),p,"_","_" if i<n and e<w[i][0] else "SpaceAfter=No"])
82+
return _tag_data
7683
for i,(s,e,p) in enumerate(w,1):
7784
p="root" if h[i]==0 else "dep" if p=="root" else p
7885
u+="\t".join(

tests/test_parse.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,8 @@
77
class TestParsePackage(unittest.TestCase):
88
def test_dependency_parsing(self):
99
self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="esupar"))
10+
self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="esupar", tag="list"))
1011
self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="transformers_ud"))
12+
self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="transformers_ud", tag="list"))
1113
self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="spacy_thai"))
14+
self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="spacy_thai", tag="list"))

0 commit comments

Comments
 (0)