Skip to content

Commit

Permalink
Merge pull request #5 from UUDigitalHumanitieslab/bugfix/tests
Browse files Browse the repository at this point in the history
Bugfix/tests
  • Loading branch information
oktaal authored Apr 6, 2023
2 parents e5d444b + cc811f3 commit 19dc760
Show file tree
Hide file tree
Showing 17 changed files with 693 additions and 513 deletions.
73 changes: 41 additions & 32 deletions mwe_query/canonicalform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
"""

from typing import Dict, List, Optional, Set, Tuple
from sastatypes import SynTree
from sastadev.sastatypes import SynTree
import re
import sys
from treebankfunctions import getattval as gav, terminal, getnodeyield, find1, bareindexnode, indextransform, \
from sastadev.treebankfunctions import getattval as gav, terminal, getnodeyield, find1, bareindexnode, indextransform, \
getindexednodesmap, getbasicindexednodesmap, clausebodycats

import lxml.etree as ET
import copy
from adpositions import vzazindex
from alpinoparsing import parse
from lcat import expandnonheadwords
from mwe_query.adpositions import vzazindex
from sastadev.alpinoparsing import parse
from mwe_query.lcat import expandnonheadwords

Xpathexpression = str

Expand Down Expand Up @@ -688,6 +688,16 @@ def expandsu(vc: SynTree, subject: SynTree) -> SynTree:
return newvc


def adaptvzlemma(lemma: str) -> str:
if lemma == 'met':
result = 'mee'
elif lemma == ' tot':
result = ' toe'
else:
result = lemma
return result


def getpronadv(lemma, rel, rprons={}):
newnode = mknode()
newlemma = adaptvzlemma(lemma)
Expand Down Expand Up @@ -1329,7 +1339,7 @@ def mkpp(rel: str, vz: str, obj1node: SynTree, begin, end, index, az=None,) ->
return ppnode


def adaptvzlemma(inlemma: str) -> str:
def adaptvzlemma_inv(inlemma: str) -> str:
if inlemma == 'mee':
result = 'met'
elif inlemma == 'toe':
Expand Down Expand Up @@ -1370,7 +1380,7 @@ def relpronsubst(stree: SynTree) -> SynTree:
newstree, f'.//node[@pt="vz" and @rel="hd" and ../node[@index="{rhdindex}"]]')
if govprep is not None:
govprep.attrib['vztype'] = 'init'
govprep.attrib['lemma'] = adaptvzlemma(
govprep.attrib['lemma'] = adaptvzlemma_inv(
govprep.attrib['lemma'])
# ET.dump(newstree)

Expand Down Expand Up @@ -1454,31 +1464,30 @@ def mksuperquery(mwetrees) -> Xpathexpression:
This uses the content words. If only one content word is in the expression, all the words are used.
This way extensions for alternatives (such as the lemma "mijzelf|jezelf|zichzelf") are included.
"""
if mwetrees == []:
result = ''
else:
mwetree = mwetrees[0] # we only have to look at the first tree
wordnodes = [node for node in mwetree.iter() if 'pt' in node.attrib]
contentwordnodes = [
node for node in mwetree.iter() if iscontentwordnode(node)]
contentwordnodes = contentwordnodes if len(
contentwordnodes) > 1 else wordnodes

newmwetree = ET.Element('node', attrib={'cat': 'top'})
for contentwordnode in contentwordnodes:
cwlemma = gav(contentwordnode, 'lemma')
cwpt = gav(contentwordnode, 'pt')
newcontentwordnode = ET.Element(
'node', attrib={'lemma': cwlemma, 'pt': cwpt, 'axis': 'descendant'})
newmwetree.append(newcontentwordnode)
result = tree2xpath(newmwetree)

# lemmapts = [(gav(node, 'lemma'), gav(node, 'pt')) for node in contentwordnodes]
# lemmaptxpaths = [f'.//node[@lemma="{lemma}" and @pt="{pt}"]' for (lemma, pt) in lemmapts]
# lemmaptcondition = ' and '.join(lemmaptxpaths)
# result = f'//node[@cat="top" and {lemmaptcondition}]'
return result

if len(mwetrees) < 1:
raise RuntimeError('Cannot generate superset query for empty tree set')

mwetree = mwetrees[0] # we only have to look at the first tree
wordnodes = [node for node in mwetree.iter() if 'pt' in node.attrib]
contentwordnodes = [node for node in mwetree.iter()
if iscontentwordnode(node)]
search_for = contentwordnodes if len(contentwordnodes) > 1 else wordnodes

target_node = ET.Element('node', attrib={'cat': 'top'})
children = []
for node in search_for:
cwlemma = gav(node, 'lemma')
cwpt = gav(node, 'pt')
n = ET.Element('node', attrib=dict(lemma=cwlemma, pt=cwpt, axis='descendant'))
children.append(n)

del children[0].attrib['axis']
for child in children[1:]:
target_node.append(child)

return '//{}/ancestor::alpino_ds/{}'.format(
tree2xpath(children[0]),
tree2xpath(target_node))

def generatequeries(mwe: str, lcatexpansion=True) -> Tuple[Xpathexpression, Xpathexpression, Xpathexpression]:
"""
Expand Down
3 changes: 2 additions & 1 deletion mwe_query/indextransform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# flake8: noqa
# TODO: implement this file
from copy import copy

indexdict = {}
Expand All @@ -8,5 +9,5 @@ def makeindexdict(stree):
indexdict[index] = stree

for i , node in indexdict.items():

pass # TODO

3 changes: 0 additions & 3 deletions mwe_query/lcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,3 @@ def getlcat(node: SynTree, prel=None) -> str: # noqa: C901
ET.dump(node)

return result

result = 'xp'
return result
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# pip-compile
#
alpino-query==2.1.7
alpino-query==2.1.9
# via mwe-query (setup.py)
basexclient==8.4.4
# via mwe-query (setup.py)
Expand All @@ -20,5 +20,7 @@ requests==2.28.1
# via
# alpino-query
# mwe-query (setup.py)
sastadev==0.0.3
# via mwe-query (setup.py)
urllib3==1.26.11
# via requests
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package_data={"mwe_query": ["py.typed"]},
zip_safe=True,
install_requires=[
'alpino-query>=2.1.8', 'requests', 'BaseXClient'
'alpino-query>=2.1.8', 'requests', 'BaseXClient', 'sastadev>=0.0.3'
],
entry_points={
'console_scripts': [
Expand Down
8 changes: 8 additions & 0 deletions tests/data/transform/0-0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<node id="4">
<node rel="su" id="2" index="1"/>
<node rel="obj1" cat="np" id="6" nodecount="2">
<node lemma="de" rel="det" pt="lid" lwtype="bep" id="7"/>
<node lemma="dans" rel="hd" pt="n" ntype="soort" genus="zijd" getal="ev" graad="basis" id="8"/>
</node>
<node lemma="ontspringen" rel="hd" pt="ww" id="9"/>
</node>
8 changes: 8 additions & 0 deletions tests/data/transform/1-0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<node id="4">
<node rel="su" id="2" index="1"/>
<node rel="obj1" cat="np" id="6">
<node lemma="de" rel="det" pt="lid" lwtype="bep" id="7"/>
<node lemma="dans" rel="hd" pt="n" ntype="soort" genus="zijd" getal="ev" graad="basis" id="8"/>
</node>
<node lemma="ontspringen" rel="hd" pt="ww" id="9"/>
</node>
8 changes: 8 additions & 0 deletions tests/data/transform/2-0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<node id="4">
<node rel="su" id="2" index="1"/>
<node rel="obj1" cat="np" id="6" nodecount="2">
<node lemma="de" rel="det" pt="lid" lwtype="bep" id="7"/>
<node lemma="dans" rel="hd" pt="n" ntype="soort" genus="zijd" id="8"/>
</node>
<node lemma="ontspringen" rel="hd" pt="ww" id="9"/>
</node>
7 changes: 7 additions & 0 deletions tests/data/transform/3-0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<node id="4">
<node rel="su" id="2" index="1"/>
<node rel="obj1" cat="np" id="6" maxnodecount="2">
<node lemma="dans" rel="hd" pt="n" ntype="soort" genus="zijd" getal="ev" graad="basis" id="8"/>
</node>
<node lemma="ontspringen" rel="hd" pt="ww" id="9"/>
</node>
8 changes: 8 additions & 0 deletions tests/data/transform/4-0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<node id="4">
<node rel="su" id="2" index="1"/>
<node rel="obj1" cat="np" id="6">
<node lemma="de" rel="det" pt="lid" lwtype="bep" id="7"/>
<node lemma="dans" rel="hd" pt="n" ntype="soort" genus="zijd" id="8"/>
</node>
<node lemma="ontspringen" rel="hd" pt="ww" id="9"/>
</node>
8 changes: 8 additions & 0 deletions tests/data/transform/5-0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<node id="4">
<node rel="su" id="2" index="1"/>
<node rel="obj1" cat="np" id="6">
<node lemma="de" rel="det" pt="lid" lwtype="bep" id="7"/>
<node lemma="dans" rel="hd" pt="n" ntype="soort" genus="zijd" id="8"/>
</node>
<node lemma="ontspringen" rel="hd" pt="ww" id="9"/>
</node>
8 changes: 8 additions & 0 deletions tests/data/transform/6-0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<node id="4">
<node rel="su" id="2" index="1"/>
<node rel="obj1" cat="np" id="6" nodecount="2">
<node lemma="de" rel="det" pt="lid" lwtype="bep" id="7"/>
<node word="dans" rel="hd" pt="n" ntype="soort" genus="zijd" getal="ev" graad="basis" naamval="stan" id="8"/>
</node>
<node lemma="ontspringen" rel="hd" pt="ww" id="9"/>
</node>
7 changes: 7 additions & 0 deletions tests/data/transform/mwes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
iemand zal de dans ontspringen
iemand zal de *dans ontspringen
iemand zal de +dans ontspringen
iemand zal 0de dans ontspringen
iemand zal de +*dans ontspringen
iemand zal de *+dans ontspringen
iemand zal de =dans ontspringen
58 changes: 58 additions & 0 deletions tests/data/transform/tree.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<alpino_ds version="1.6" id="MWE2022-04-29.txt/541-1.xml:1">
<parser cats="1" skips="0"/>
<node begin="0" cat="top" end="5" id="0" rel="top" highlight="yes">
<node begin="0" cat="smain" end="5" id="1" rel="--" highlight="yes">
<node begin="0" end="1" frame="noun(de,count,sg)" gen="de" getal="ev" his="normal" his_1="normal" id="2" index="1" lcat="np" lemma="iemand" naamval="stan" num="sg" pdtype="pron" persoon="3p" pos="noun" postag="VNW(onbep,pron,stan,vol,3p,ev)" pt="vnw" rel="su" rnum="sg" root="iemand" sense="iemand" status="vol" vwtype="onbep" word="iemand" highlight="yes"/>
<node begin="1" end="2" frame="verb(hebben,modal_not_u,aux(inf))" his="normal" his_1="normal" id="3" infl="modal_not_u" lcat="smain" lemma="zullen" pos="verb" postag="WW(pv,tgw,ev)" pt="ww" pvagr="ev" pvtijd="tgw" rel="hd" root="zal" sc="aux(inf)" sense="zal" stype="declarative" tense="present" word="zal" wvorm="pv" highlight="yes"/>
<node begin="0" cat="inf" end="5" id="4" rel="vc" highlight="yes">
<node begin="0" end="1" id="5" index="1" rel="su" highlight="yes"/>
<node begin="2" cat="np" end="4" id="6" rel="obj1" highlight="yes">
<node begin="2" end="3" frame="determiner(de)" his="normal" his_1="normal" id="7" infl="de" lcat="detp" lemma="de" lwtype="bep" naamval="stan" npagr="rest" pos="det" postag="LID(bep,stan,rest)" pt="lid" rel="det" root="de" sense="de" word="de" highlight="yes"/>
<node begin="3" end="4" frame="noun(de,count,sg)" gen="de" genus="zijd" getal="ev" graad="basis" his="normal" his_1="normal" id="8" lcat="np" lemma="dans" naamval="stan" ntype="soort" num="sg" pos="noun" postag="N(soort,ev,basis,zijd,stan)" pt="n" rel="hd" rnum="sg" root="dans" sense="dans" word="dans" highlight="yes"/>
</node>
<node begin="4" buiging="zonder" end="5" frame="verb(unacc,inf,transitive)" his="normal" his_1="normal" id="9" infl="inf" lcat="inf" lemma="ontspringen" pos="verb" positie="vrij" postag="WW(inf,vrij,zonder)" pt="ww" rel="hd" root="ontspring" sc="transitive" sense="ontspring" word="ontspringen" wvorm="inf" highlight="yes"/>
</node>
</node>
</node>
<sentence sentid="0-0">iemand zal de dans ontspringen</sentence>
<metadata>
<meta type="text" name="ID" value="Stoet0407"/>
<meta type="text" name="OldID" value="407"/>
<meta type="text" name="Uitdrukking" value=" Den dans ontspringen,"/>
<meta type="text" name="Can_Form" value="de dans ontspringen"/>
<meta type="text" name="NewCanForm" value="iemand zal de dans ontspringen"/>
<meta type="text" name="newcandone" value="yes"/>
<meta type="text" name="can_done" value="yes"/>
<meta type="text" name="Known_by_me?" value="yes"/>
<meta type="text" name="Myversion" value=""/>
<meta type="text" name="Content_Words_sorted" value="dans;ontspringen"/>
<meta type="text" name="ToParse" value=""/>
<meta type="text" name="Pos" value=""/>
<meta type="text" name="binding" value="no"/>
<meta type="text" name="Related" value=""/>
<meta type="text" name="Source" value="http://www.dbnl.org/tekst/stoe002nede01_01/"/>
<meta type="text" name="head" value="v"/>
<meta type="text" name="fixed_subject" value=""/>
<meta type="text" name="npi" value=""/>
<meta type="text" name="inanimate_subject" value=""/>
<meta type="text" name="other" value=""/>
<meta type="text" name="headword" value="ontspringen"/>
<meta type="text" name="inalienable" value=""/>
<meta type="text" name="single_word" value=""/>
<meta type="text" name="Remarks" value=""/>
<meta type="text" name="obj1" value="yes"/>
<meta type="text" name="obj2:NP" value="no"/>
<meta type="text" name="pc:PP" value="no"/>
<meta type="text" name="ld:PP" value="no"/>
<meta type="text" name="obj2:PP" value="no"/>
<meta type="text" name="mod:PP" value="no"/>
<meta type="text" name="predc" value="no"/>
<meta type="text" name="su:NP" value="var"/>
<meta type="text" name="als_XP" value=""/>
<meta type="text" name="multV" value="no"/>
<meta type="text" name="status" value=""/>
<meta type="text" name="trimmed_can_form" value="=TRIM(D418)"/>
<meta type="text" name="alpino_version" value="Alpino-x86_64-linux-glibc2.5-21514-sicstus"/>
<meta type="date" name="alpino_version_date" value="2019-03-07"/>
</metadata>
</alpino_ds>
6 changes: 5 additions & 1 deletion tests/test_expand.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import unittest
import os

import xml.etree.ElementTree as ET
from mwe_query import expand_index_nodes


class TextIndexExpansion(unittest.TestCase):
def data_path(self, filename):
return os.path.join(os.path.dirname(__file__), "data", filename)

def test_no_infinite_loop(self):
with open('tests/data/expand/001.xml') as f:
with open(self.data_path('expand/001.xml')) as f:
doc = ET.parse(f)
expand_index_nodes(doc)
Loading

0 comments on commit 19dc760

Please sign in to comment.