Skip to content

Commit

Permalink
top3000 woordenlijst
Browse files Browse the repository at this point in the history
  • Loading branch information
JanOdijk committed Mar 11, 2022
1 parent 97e8a05 commit 7629b79
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 4 deletions.
4 changes: 2 additions & 2 deletions macros/sastamacros1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ JO_kijken_naar = """ parent::node[@cat="pp" and
robusttopicdrop = """(@cat="sv1" and ../node[@lemma="."])"""

Tarsp_hww = """
(@lemma="kunnen" or
(@lemma = "kunnen" or
@lemma = "moeten" or
@lemma= "hoeven" or
@lemma = "hoeven" or
@lemma = "blijven" or
@lemma = "willen" or
@lemma = "zullen" or
Expand Down
10 changes: 9 additions & 1 deletion smallclauses.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
longvowels = ['a', 'é', 'i', 'o', 'u', 'y']
vowels = ['a', 'e', 'i', 'o', 'u']

uniquelynominativeperspros = ['ik', 'jij', 'hij', 'zij', 'wij', 'ikke', "'k", "k", "ie", "we"]


def makegen(lemma):
if lemma is None or len(lemma) < 2:
Expand Down Expand Up @@ -92,6 +94,11 @@ def perspro(node):
result = pt == 'vnw' and vwtype == 'pers'
return result

def nomperspro(node):
lemma = getattval(node, 'lemma')
result = perspro(node) and lemma in uniquelynominativeperspros
return result

def inf(node):
result = getattval(node, 'pt') == 'ww' and getattval(node, 'wvorm') == 'inf'
return result
Expand Down Expand Up @@ -225,7 +232,8 @@ def smallclauses(tokensmd, tree):
inserttokens = [Token('moet' if getal(first) != 'mv' else 'moeten', fpos, subpos=5)]
resultlist = mktokenlist(tokens, fpos, inserttokens)
metadata += mkinsertmeta(inserttokens, resultlist)
elif (aanwvnw(second) or knownnoun(second) or perspro(second) or tw(second)) and predadv(first):
#elif (aanwvnw(second) or knownnoun(second) or perspro(second) or tw(second)) and predadv(first):
elif nomperspro(second) and predadv(first):
fpos = int(getattval(first, 'begin'))
inserttokens = [Token('moet' if getal(second) != 'mv' else 'moeten', fpos, subpos=5)]
resultlist = mktokenlist(tokens, fpos, inserttokens)
Expand Down
4 changes: 3 additions & 1 deletion top3000.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from xlsx import getxlsxdata
from treebankfunctions import getattval
from namepartlexicon import namepart_isa_namepart
from config import SD_DIR
import os

def ishuman(node):
lemma = getattval(node, 'lemma')
Expand Down Expand Up @@ -36,7 +38,7 @@ def intransitive(node):

semicolon = ';'

filename = r'D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\VKL\woordenlijsten\Woordenlijsten Current.xlsx'
filename = os.path.join(SD_DIR, r'top3000\Woordenlijsten Current.xlsx')


lexiconheader, lexicondata = getxlsxdata(filename)
Expand Down
Binary file added top3000/Woordenlijsten Current.xlsx
Binary file not shown.

0 comments on commit 7629b79

Please sign in to comment.