diff --git a/ASTApostfunctions.py b/ASTApostfunctions.py
index 9ef1e14..73ea9e1 100644
--- a/ASTApostfunctions.py
+++ b/ASTApostfunctions.py
@@ -1,12 +1,28 @@
 from collections import Counter
+from treebankfunctions import getattval, getuttid, getnodeyield
+from stringfunctions import realwordstring
 from copy import deepcopy
-
-from treebankfunctions import getattval, getnodeyield, getuttid
+from lexicon import getwordposinfo, getwordinfo
 
 lpad = 3
 zero = '0'
 astamaxwordcount = 300
 
+excluded_lemmas = ['gevallen', 'gewinnen']
+
+nounqid = 'A021'
+lexqid = 'A018'
+samplesizeqid = 'A045'
+mluxqid = 'A029'
+pvqid = 'A024'
+delpvqid = 'A009'
+subpvqid = 'A032'
+kqid = 'A013'
+mqid = 'A020'
+tijdfoutpvqid = 'A041'
+nounlemmaqid = 'A046'
+verblemmaqid = 'A049'
+
 
 def sumctr(ctr):
     result = sum(ctr.values())
@@ -16,8 +32,9 @@ def sumctr(ctr):
 def wordcountperutt(allresults):
     lemmas = getalllemmas(allresults)
     wordcounts = {uttid: sum(ctr.values()) for uttid, ctr in lemmas.items()}
-    ignorewordcounts = deepcopy(allresults.coreresults['A045']) if 'A045' in allresults.coreresults else Counter()  # samplesize
-    ignorewordcounts += allresults.coreresults['A029'] if 'A029' in allresults.coreresults else Counter()  # mlux
+    ignorewordcounts = deepcopy(
+        allresults.coreresults[samplesizeqid]) if samplesizeqid in allresults.coreresults else Counter()  # samplesize
+    ignorewordcounts += allresults.coreresults[mluxqid] if mluxqid in allresults.coreresults else Counter()  # mlux
     # ignorewordcounts += allresults.coreresults['A050'] if 'A050' in allresults.coreresults else Counter() # echolalie covered by mlux
     result = {}
     for uttid in wordcounts:
@@ -29,10 +46,10 @@ def wordcountperutt(allresults):
 
 
 def finietheidsindex(allresults, _):
-    allpvs = allresults.coreresults['A024'] if 'A024' in allresults.coreresults else Counter()
-    subpvs = allresults.coreresults['A032'] if 'A032' in allresults.coreresults else Counter()
-    delpvs = allresults.coreresults['A009'] if 'A009' in allresults.coreresults else Counter()
-    tijdfoutpvs = allresults.coreresults['A041'] if 'A041' in allresults.coreresults else Counter()
+    allpvs = allresults.coreresults[pvqid] if pvqid in allresults.coreresults else Counter()
+    subpvs = allresults.coreresults[subpvqid] if subpvqid in allresults.coreresults else Counter()
+    delpvs = allresults.coreresults[delpvqid] if delpvqid in allresults.coreresults else Counter()
+    tijdfoutpvs = allresults.coreresults[tijdfoutpvqid] if tijdfoutpvqid in allresults.coreresults else Counter()
     foutepvs = subpvs + delpvs + tijdfoutpvs
     allpvcount = sumctr(allpvs)
     foutepvcount = sumctr(foutepvs)
@@ -45,6 +62,7 @@ def finietheidsindex(allresults, _):
 
 
 def countwordsandcutoff(allresults, _):
+    # @@to be adapted
     result = (None, 0)
     if 'A047' in allresults.postresults:
         paddedlist = []
@@ -64,8 +82,8 @@ def countwordsandcutoff(allresults, _):
 
 
 def KMcount(allresults, _):
-    Kcount = sumctr(allresults.coreresults['A013']) if 'A013' in allresults.coreresults else 0
-    Mcount = sumctr(allresults.coreresults['A020']) if 'A020' in allresults.coreresults else 0
+    Kcount = sumctr(allresults.coreresults[kqid]) if kqid in allresults.coreresults else 0
+    Mcount = sumctr(allresults.coreresults[mqid]) if mqid in allresults.coreresults else 0
     result = Kcount + Mcount
     return result
 
@@ -84,7 +102,17 @@ def old_old_getlemmas(allresults, _):
 
 
 def getlemmas(allresults, _):
-    result = getcondlemmas(allresults, _, lambda qid: qid in ['A021', 'A018'])
+    result = getcondlemmas(allresults, _, lambda qid: qid in [nounqid, lexqid])
+    return result
+
+
+def getnounlemmas(allresults, _):
+    result = getposlemmas(allresults, nounqid)
+    return result
+
+
+def getlexlemmas(allresults, _):
+    result = getposlemmas(allresults, lexqid)
     return result
 
 
@@ -95,10 +123,15 @@ def realword(node):
 
 def getalllemmas(allresults):
     result = {}
-    for syntree in allresults.analysedtrees:
-        uttid = getuttid(syntree)
-        lemmas = [getattval(node, 'lemma') for node in getnodeyield(syntree) if realword(node)]
-        result[uttid] = Counter(lemmas)
+    if allresults.annotationinput:
+        for uttid in allresults.allutts:
+            lemmas = [bgetlemma(w) for w in allresults.allutts[uttid] if realwordstring(w)]
+            result[uttid] = Counter(lemmas)
+    else:
+        for syntree in allresults.analysedtrees:
+            uttid = getuttid(syntree)
+            lemmas = [getattval(node, 'lemma') for node in getnodeyield(syntree) if realword(node)]
+            result[uttid] = Counter(lemmas)
     return result
 
 
@@ -115,7 +148,7 @@ def old_getlemmas(allresults, _):
     return result
 
 
-def getcondlemmas(allresults, _, cond):
+def oldgetcondlemmas(allresults, _, cond):
     allmatches = allresults.allmatches
     result = Counter()
     for el in allmatches:
@@ -126,3 +159,85 @@ def getcondlemmas(allresults, _, cond):
                 theword = getattval(amatch[0], 'lemma')
                 result.update([(theword, uttid)])
     return result
+
+#not used anymore, contains an error
+def getcondlemmas(allresults, _, cond):
+    result = Counter()
+    if allresults.annotationinput:
+        for qid in allresults.exactresults:
+            if cond(qid):
+                for (uttid, position) in allresults.exactresults[qid]:
+                    word = allresults.allutts[uttid][position - 1]
+                    if qid == 'A021':
+                        pos = 'n'
+                    elif qid == 'A018':
+                        pos = 'ww'
+                    else:
+                        pos = None
+                    lemma = bgetlemma(word, pos)
+                    result.update([(lemma, qid, uttid)])
+
+    else:
+        allmatches = allresults.allmatches
+        for el in allmatches:
+            (qid, uttid) = el
+            if cond(qid):
+                for amatch in allmatches[el]:
+                    # theword = normalizedword(amatch[0])
+                    theword = getattval(amatch[0], 'lemma')
+                    result.update([(theword, uttid)])
+    return result
+
+
+def getposfromqid(qid):
+    if qid == 'A021':
+        pos = 'n'
+    elif qid == 'A018':
+        pos = 'ww'
+    else:
+        pos = None
+    return pos
+
+
+def getposlemmas(allresults, posqid):
+    result = Counter()
+    if allresults.annotationinput:
+        for (uttid, position) in allresults.exactresults[posqid]:
+            word = allresults.allutts[uttid][position - 1]
+            pos = getposfromqid(posqid)
+            lemma = bgetlemma(word, pos)
+            result.update([(lemma, uttid)])
+    else:
+        allmatches = allresults.allmatches
+        for el in allmatches:
+            (qid, uttid) = el
+            if qid == posqid:
+                for amatch in allmatches[el]:
+                    # theword = normalizedword(amatch[0])
+                    theword = getattval(amatch[0], 'lemma')
+                    result.update([(theword, uttid)])
+    return result
+
+
+def bgetlemma(word, pos=None):
+    if pos is None:
+        wordinfos = getwordinfo(word)
+        if wordinfos == []:
+            lemma = word
+        else:
+            filteredwordinfos = [wi for wi in wordinfos if wi[3] not in excluded_lemmas]
+            if filteredwordinfos == []:
+                lemma = wordinfos[0][3]
+            else:
+                lemma = filteredwordinfos[0][3]
+    else:
+        wordinfos = getwordposinfo(word, pos)
+        if wordinfos == []:
+            lemma = word
+        else:
+            filteredwordinfos = [wi for wi in wordinfos if wi[3] not in excluded_lemmas]
+            if filteredwordinfos == []:
+                lemma = wordinfos[0][3]
+            else:
+                lemma = filteredwordinfos[0][3]
+    return lemma
diff --git a/SAFreader.py b/SAFreader.py
index ed8bb25..fdd8c36 100644
--- a/SAFreader.py
+++ b/SAFreader.py
@@ -5,24 +5,25 @@
 and the function read_annotations() to obtain a score dictionary with queryid as keys and Counter() as values
 '''
 
-#todo
-#-additional columns unaligned treatment and generalisation
-#-code alternatives and replacemtne extensions
-#=codes written without spaces?
-
-import os
-import re
-from collections import Counter, defaultdict
+# todo
+# -additional columns unaligned treatment and generalisation
+# -code alternatives and replacemtne extensions
+# =codes written without spaces?
 
 import xlrd
-
+from collections import defaultdict
+from collections import Counter
+import re
+import os
 from config import SDLOGGER
-from readmethod import itemseppattern, read_method
+#import logging
+from readmethod import read_method, itemseppattern
 
 varitem = ''
 
 txtext = ".txt"
 comma = ","
+space = ' '
 tsvext = '.tsv'
 commaspace = ', '
 tab = '\t'
@@ -69,9 +70,9 @@ def getlabels(labelstr, patterns):
         results = []
         ms = pattern.finditer(labelstr)
         logstr = str([m.group(0) for m in ms if m.group(0) not in ' ;,-'])
-        #print('Cannot interpret {};  found items: {}'.format(labelstr,logstr), file=sys.stderr)
-        logging.warning('Cannot interpret %s; found items: %s', labelstr, logstr)
-        #exit(-1)
+        # print('Cannot interpret {};  found items: {}'.format(labelstr,logstr), file=sys.stderr)
+        SDLOGGER.warning('Cannot interpret %s; found items: %s', labelstr, logstr)
+        # exit(-1)
     return results
 
 
@@ -136,8 +137,8 @@ def oldget_annotations(infilename, patterns):
     headers = {}
     lastrow = sheet.nrows
     lastcol = sheet.ncols
-#    firstwordcol = 2
-#    lastwordcol = lastcol - 4
+    #    firstwordcol = 2
+    #    lastwordcol = lastcol - 4
     levelcol = 1
     uttidcol = 0
     stagescol = -1
@@ -182,7 +183,7 @@ def oldget_annotations(infilename, patterns):
                     thelabelstr = sheet.cell_value(rowctr, colctr)
                     thelevel = sheet.cell_value(rowctr, levelcol)
                     if lastwordcol + 1 <= colctr < sheet.ncols:
-                        #prefix = headers[colctr] aangepast om het simpeler te houden
+                        # prefix = headers[colctr] aangepast om het simpeler te houden
                         prefix = ""
                     else:
                         prefix = ""
@@ -194,7 +195,7 @@ def oldget_annotations(infilename, patterns):
                     for (cleanlevel, cleanlabel) in cleanlevelsandlabels:
                         thedata[(cleanlevel, cleanlabel)].append(uttid)
                         exactdata[(cleanlevel, cleanlabel)].append((uttid, tokenposition))
-    #wb.close() there is no way to close the workbook
+    # wb.close() there is no way to close the workbook
     for atuple in thedata:
         cdata[atuple] = Counter(thedata[atuple])
     return cdata
@@ -211,6 +212,8 @@ def get_annotations(infilename, patterns):
     thedata = defaultdict(list)
     cdata = {}
 
+    allutts = {}
+
     # To open Workbook
     wb = xlrd.open_workbook(infilename)
     sheet = wb.sheet_by_index(0)
@@ -221,8 +224,8 @@ def get_annotations(infilename, patterns):
     headers = {}
     lastrow = sheet.nrows
     lastcol = sheet.ncols
-#    firstwordcol = 2
-#    lastwordcol = lastcol - 4
+    #    firstwordcol = 2
+    #    lastwordcol = lastcol - 4
     levelcol = 1
     uttidcol = 0
     stagescol = -1
@@ -230,6 +233,8 @@ def get_annotations(infilename, patterns):
 
     uttlevel = 'utt'
 
+    uttcount = 0
+
     for rowctr in range(startrow, lastrow):
         if rowctr == headerrow:
             for colctr in range(startcol, lastcol):
@@ -254,14 +259,21 @@ def get_annotations(infilename, patterns):
             thelevel = sheet.cell_value(rowctr, levelcol)
             thelevel = clean(thelevel)
             all_levels.add(thelevel)
+            # if thelevel == uttlevel:
+            #    uttcount += 1
+            curuttwlist = []
             for colctr in range(firstwordcol, sheet.ncols):
-                if thelevel in literallevels and colctr != stagescol and colctr != commentscol:
+                if thelevel == uttlevel:
+                    curcellval = sheet.cell_value(rowctr, colctr)
+                    if curcellval != '':
+                        curuttwlist.append(curcellval)
+                elif thelevel in literallevels and colctr != stagescol and colctr != commentscol:
                     thelabel = sheet.cell_value(rowctr, colctr)
                     if colctr > lastwordcol:
                         tokenposition = 0
                     else:
                         tokenposition = colctr - firstwordcol + 1
-                    #thedata[(thelevel, thelabel)].append(uttid)
+                    # thedata[(thelevel, thelabel)].append(uttid)
                     cleanlevel = thelevel
                     cleanlabel = thelabel
                     if cleanlabel != '':
@@ -270,7 +282,7 @@ def get_annotations(infilename, patterns):
                     thelabelstr = sheet.cell_value(rowctr, colctr)
                     thelevel = sheet.cell_value(rowctr, levelcol)
                     if lastwordcol + 1 <= colctr < sheet.ncols:
-                        #prefix = headers[colctr] aangepast om het simpeler te houden
+                        # prefix = headers[colctr] aangepast om het simpeler te houden
                         prefix = ""
                     else:
                         prefix = ""
@@ -281,8 +293,10 @@ def get_annotations(infilename, patterns):
                         tokenposition = colctr - firstwordcol + 1
                     for (cleanlevel, cleanlabel) in cleanlevelsandlabels:
                         thedata[(cleanlevel, cleanlabel)].append((uttid, tokenposition))
-    #wb.close() there is no way to close the workbook
-    return thedata
+            if curuttwlist != []:
+                allutts[uttid] = curuttwlist
+    # wb.close() there is no way to close the workbook
+    return allutts, thedata
 
 
 def update(thedict, qid, goldtuple):
@@ -322,7 +336,7 @@ def mkpatterns(allcodes):
     adaptedcodes = [codeadapt(c) for c in sortedallcodes]
     basepattern = r'' + '|'.join(adaptedcodes) + '|' + itemseppattern
     fullpattern = r'^(' + basepattern + r')*$'
-    return(re.compile(basepattern), re.compile(fullpattern))
+    return (re.compile(basepattern), re.compile(fullpattern))
 
 
 def get_golddata(filename, mapping, altcodes, queries, includeimplies=False):
@@ -333,7 +347,7 @@ def get_golddata(filename, mapping, altcodes, queries, includeimplies=False):
     allaltcodesitems = [item for (item, _) in altcodes]
     allitems = allmappingitems + allaltcodesitems
     patterns = mkpatterns(allitems)
-    basicdata = get_annotations(filename, patterns)
+    allutts, basicdata = get_annotations(filename, patterns)
     results = {}
     for thelevel, theitem in basicdata:
         thecounter = basicdata[(thelevel, theitem)]
@@ -352,59 +366,70 @@ def get_golddata(filename, mapping, altcodes, queries, includeimplies=False):
                         impliedqid = mapping[(implieditem, thelevel)]
                         update(results, impliedqid, (altlevel, altitem, thecounter))
                     else:
-                        logging.error('Implied Item ({},{}) not found in mapping'.format(implieditem, thecorrectlevel))
+                        SDLOGGER.error('Implied Item ({},{}) not found in mapping'.format(implieditem, thecorrectlevel))
         elif (theitem, thelevel) in altcodes:
             (altitem, altlevel) = altcodes[(theitem, thelevel)]
             qid = mapping[(altitem, altlevel)]
             update(results, qid, (altlevel, altitem, thecounter))
-            logging.info('{} of level {} invalid code replaced by {} of level {}'.format(theitem, thelevel, altitem, altlevel))
+            SDLOGGER.info(
+                '{} of level {} invalid code replaced by {} of level {}'.format(theitem, thelevel, altitem, altlevel))
             if includeimplies:
                 for implieditem in queries[qid].implies:
                     if (implieditem, thecorrectlevel) in mapping:
                         impliedqid = mapping[(implieditem, thelevel)]
                         update(results, impliedqid, (altlevel, altitem, thecounter))
                     else:
-                        logging.error('Implied Item ({},{}) not found in mapping'.format(implieditem, thecorrectlevel))
+                        SDLOGGER.error('Implied Item ({},{}) not found in mapping'.format(implieditem, thecorrectlevel))
         elif theitem in mappingitem2levelmap:
             thecorrectlevels = mappingitem2levelmap[theitem]
             if len(thecorrectlevels) == 1:
                 thecorrectlevel = thecorrectlevels[0]
                 qid = mapping[(theitem, thecorrectlevel)]
                 update(results, qid, (thecorrectlevel, theitem, thecounter))
-                logging.info('level {} of item {} replaced by correct level {}'.format(thelevel, theitem, thecorrectlevel))
+                SDLOGGER.info(
+                    'level {} of item {} replaced by correct level {}'.format(thelevel, theitem, thecorrectlevel))
             elif len(thecorrectlevels) > 1:
-                logging.error('Item {} of level {} not a valid coding (wrong level, multiple candidate levels: {}'.format(theitem, thelevel, str(thecorrectlevels)))
+                SDLOGGER.error(
+                    'Item {} of level {} not a valid coding (wrong level, multiple candidate levels: {}'.format(theitem,
+                                                                                                                thelevel,
+                                                                                                                str(
+                                                                                                                    thecorrectlevels)))
             else:
-                logging.error('{} of level {} not a valid coding (wrong level'.format(theitem, thelevel))
+                SDLOGGER.error('{} of level {} not a valid coding (wrong level'.format(theitem, thelevel))
             if includeimplies:
                 for implieditem in queries[qid].implies:
                     if (implieditem, thecorrectlevel) in mapping:
                         impliedqid = mapping[(implieditem, thecorrectlevel)]
                         update(results, impliedqid, (thecorrectlevel, theitem, thecounter))
                     else:
-                        logging.error('Implied Item ({},{}) not found in mapping'.format(implieditem, thecorrectlevel))
+                        SDLOGGER.error('Implied Item ({},{}) not found in mapping'.format(implieditem, thecorrectlevel))
         elif theitem in altcodesitem2levelmap:
             thecorrectlevels = altcodesitem2levelmap[theitem]
             if len(thecorrectlevels) == 1:
                 (thecorrectitem, thecorrectlevel) = altcodes[(theitem, thecorrectlevels[0])]
                 qid = mapping[(thecorrectitem, thecorrectlevel)]
                 update(results, qid, (thecorrectlevel, thecorrectitem, thecounter))
-                logging.info('level {} of item {} replaced by correct level {} and item {}'.format(thelevel, theitem, thecorrectlevel, thecorrectitem))
+                SDLOGGER.info('level {} of item {} replaced by correct level {} and item {}'.format(thelevel, theitem,
+                                                                                                   thecorrectlevel,
+                                                                                                   thecorrectitem))
             elif len(thecorrectlevels) > 1:
-                logging.error('Item {} of level {} not a valid coding (item replaced by {}, wrong level, multiple candidate levels: {}'.format(theitem. thelevel, thecorrectitem, thecorrectlevels))
+                SDLOGGER.error(
+                    'Item {} of level {} not a valid coding (item replaced by {}, wrong level, multiple candidate levels: {}'.format(
+                        theitem.thelevel, thecorrectitem, thecorrectlevels))
             else:
-                logging.error('{} of level {} not a valid coding (alternative item, wrong level)'.format(theitem, thelevel))
+                SDLOGGER.error(
+                    '{} of level {} not a valid coding (alternative item, wrong level)'.format(theitem, thelevel))
             if includeimplies:
                 for implieditem in queries[qid].implies:
                     if (implieditem, thecorrectlevel) in mapping:
                         impliedqid = mapping[(implieditem, thecorrectlevel)]
                         update(results, impliedqid, (thecorrectlevel, theitem, thecounter))
                     else:
-                        logging.error('Implied Item ({},{}) not found in mapping'.format(implieditem, thecorrectlevel))
+                        SDLOGGER.error('Implied Item ({},{}) not found in mapping'.format(implieditem, thecorrectlevel))
 
         else:
-            logging.error('{} of level {} not a valid coding'.format(theitem, thelevel))
-    return results
+            SDLOGGER.error('{} of level {} not a valid coding'.format(theitem, thelevel))
+    return allutts, results
 
 
 def exact2global(thedata):
@@ -462,12 +487,12 @@ def read_annotations(methodfilename, annotationfilename, includeimplies=False):
 
 if __name__ == "__main__":
     # Give the location of the input file
-    #infilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\Auris\AurisdataAligned Current.xlsx"
-    #infilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\Auris\AurisdataAligned TagsCleaned Current.xlsx"
-    #infilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\VKL\SchlichtingVoorbeeldGoldCurrent.xlsx"
+    # infilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\Auris\AurisdataAligned Current.xlsx"
+    # infilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\Auris\AurisdataAligned TagsCleaned Current.xlsx"
+    # infilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\VKL\SchlichtingVoorbeeldGoldCurrent.xlsx"
     infilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\VKL\aangeleverde data\ASTA\SASTA sample 01.xlsx"
 
-    #Give the location of the method file
+    # Give the location of the method file
     methodfilename = r'D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\VKL\ASTA\ASTA Index Current.xlsx'
 
     thedata = {}
diff --git a/allresults.py b/allresults.py
index ce3e0ab..cfe6f55 100644
--- a/allresults.py
+++ b/allresults.py
@@ -1,13 +1,15 @@
 
 class AllResults:
-    def __init__(self, uttcount, coreresults, postresults, allmatches, filename, analysedtrees):
+    def __init__(self, uttcount, coreresults, exactresults, postresults, allmatches, filename, analysedtrees, allutts, annotationinput=False):
         self.uttcount = uttcount
         self.coreresults = coreresults
+        self.exactresults = exactresults
         self.postresults = postresults
         self.allmatches = allmatches
         self.filename = filename
         self.analysedtrees = analysedtrees
-
+        self.allutts = allutts
+        self.annotationinput = annotationinput
 
 def scores2counts(scores):
     '''
diff --git a/alpinoparsing.py b/alpinoparsing.py
index 96b5417..34e65de 100644
--- a/alpinoparsing.py
+++ b/alpinoparsing.py
@@ -5,7 +5,8 @@
 from lxml import etree
 from memoize import memoize
 
-from config import SDLOGGER
+import logging
+#from config import SDLOGGER
 
 alpino_special_symbols_pattern = r'[\[\]]'
 alpino_special_symbols_re = re.compile(alpino_special_symbols_pattern)
@@ -35,10 +36,10 @@ def parse(origsent, escape=True):
     try:
         r1 = urllib.request.urlopen(fullurl)
     except urllib.request.HTTPError as e:
-        SDLOGGER.error('{}: parsing <{}> failed'.format(e, sent))
+        logging.error('{}: parsing <{}> failed'.format(e, sent))
         return None
     except urllib.error.URLError as e:
-        SDLOGGER.error('{}: parsing <{}> failed'.format(e, sent))
+        logging.error('{}: parsing <{}> failed'.format(e, sent))
         return None
     else:
         if 300 > r1.status >= 200:
@@ -47,7 +48,7 @@ def parse(origsent, escape=True):
             stree = etree.fromstring(streebytes)
             return stree
         else:
-            SDLOGGER.error('parsing failed:', r1.status, r1.reason, sent)
+            logging.error('parsing failed:', r1.status, r1.reason, sent)
             return None
 
 
diff --git a/astaforms.py b/astaforms.py
index 51bf4ed..5c9b057 100644
--- a/astaforms.py
+++ b/astaforms.py
@@ -4,13 +4,15 @@
 
 import xlsxwriter
 
-from ASTApostfunctions import wordcountperutt
+from ASTApostfunctions import wordcountperutt, nounlemmaqid, verblemmaqid
 from treebankfunctions import getattval
 
 green = '#00FF00'
 red = '#FF0000'
 orange = '#FFBB9A'
 grey = '#B0B0B0'
+
+
 # green = 'green'
 # green = '#006100'
 # red = '#9C0006'
@@ -80,7 +82,6 @@ def applytemplate3(sheet, colchar):
          ['Finietheidsindex', 0.99, 0.03],
          ['Aantal bijzinnen', 4.8, 2.78]]
 
-
 scores = [['', 'Score', 'SD'],
           ['Aantal zelfstandige naamwoorden', "='ZNW & WW'!B1", "=(B2-Tabel!B3)/Tabel!C3"],
           ['TTR zelfstandige naamwoorden', applytemplate3('ZNW & WW', 'B'), applytemplate1(3)],
@@ -107,9 +108,11 @@ def applytemplate3(sheet, colchar):
                     ['Correct', '=COUNTIF(C6:C105,"J")', '', '', '', '']
                     ]
 
-sheet2header = ['Nummer', 'Zelfstandig naamwoord', 'Herhaling', 'Aantal', '', 'Lexicaal werkwoord', 'Herhaling', 'Aantal']
+sheet2header = ['Nummer', 'Zelfstandig naamwoord', 'Herhaling', 'Aantal', '', 'Lexicaal werkwoord', 'Herhaling',
+                'Aantal']
 sheet2colwidths = [10, 25, 20, 10, 5, 25, 20, 10]
-sheet3header = ['Uitingsnummer', 'Aantal woorden', 'Correct', "Goede PV's", "Foute en ontbrekende PV's", "Aantal bijzinnen", "Bijzonderheden"]
+sheet3header = ['Uitingsnummer', 'Aantal woorden', 'Correct', "Goede PV's", "Foute en ontbrekende PV's",
+                "Aantal bijzinnen", "Bijzonderheden"]
 
 
 def writetable(tabel, ws, startrow=0, startcol=0, rhformat=None, chformat=None, cellformat=None):
@@ -294,7 +297,7 @@ def resultdict2table(resultdict):
         okpvs = max(0, allpvs - foutepvs)
         bijzincount = dictget(uttid_dict, 'bijzincount')
         remarks = dictget(uttid_dict, 'remarks')
-        paddeduttid = uttid.rjust(3, '0')
+        paddeduttid = str(uttid).rjust(3, '0')
         newrow = [paddeduttid, wc, correct, okpvs, foutepvs, bijzincount, remarks]
         table.append(newrow)
     sortedtable = sorted(table, key=lambda row: row[0])
@@ -324,18 +327,22 @@ def astaform(allresults, _, in_memory=False):
     noundict = defaultdict(int)
     verbdict = defaultdict(int)
     allmatches = allresults.allmatches
-    for el in allmatches:
-        (qid, uttid) = el
-        if qid == 'A021':
-            for amatch in allmatches[el]:
-                # theword = normalizedword(amatch[0])
-                theword = getattval(amatch[0], 'lemma')
-                noundict[theword] += 1
-        if qid == 'A018':
-            for amatch in allmatches[el]:
-                # theword = normalizedword(amatch[0])
-                theword = getattval(amatch[0], 'lemma')
-                verbdict[theword] += 1
+    # for el in allmatches:
+    #     (qid, uttid) = el
+    #     if qid == 'A021':
+    #         for amatch in allmatches[el]:
+    #             # theword = normalizedword(amatch[0])
+    #             theword = getattval(amatch[0], 'lemma')
+    #             noundict[theword] += 1
+    #     if qid == 'A018':
+    #         for amatch in allmatches[el]:
+    #             # theword = normalizedword(amatch[0])
+    #             theword = getattval(amatch[0], 'lemma')
+    #             verbdict[theword] += 1
+    for (lemma, uttid) in allresults.postresults[nounlemmaqid]:
+        noundict[lemma] += 1
+    for (lemma, uttid) in allresults.postresults[verblemmaqid]:
+        verbdict[lemma] += 1
     vardict = getvardict(allresults)
     uttlist = getuttlist(allresults)
     astadata = AstaFormData(noundict, verbdict, vardict, uttlist)
diff --git a/basicreplacements.py b/basicreplacements.py
index 77e504f..f369702 100644
--- a/basicreplacements.py
+++ b/basicreplacements.py
@@ -1,7 +1,6 @@
 from collections import defaultdict
-
+from metadata import bpl_word, bpl_node
 from deregularise import correctinflection
-from metadata import bpl_word
 
 pron = 'Pronunciation'
 orth = 'Orthography'
@@ -21,7 +20,9 @@
 zdev = 'Devoicing of /z/'
 wrongpron = 'Wrong Prunciation'
 phonrepl = '/{wrong}/ instead of /{correct}/'
-
+wronginfl = 'Incorrect inflection'
+morph = 'Morphology'
+overgen = 'Overgeneralisation'
 
 Rvzlist = ['aan', 'achter', 'achteraan', 'achterin', 'achterop', 'af', 'beneden', 'benevens', 'bij', 'binnen',
            'binnenuit', 'boven', 'bovenaan', 'bovenin', 'bovenop', 'buiten', 'dichtbij', 'door', 'doorheen', 'heen',
@@ -33,7 +34,6 @@
 ervzvariants = [('der' + vz, 'er' + vz, pron, varpron, d_er) for vz in Rvzlist] + \
                [("d'r" + vz, 'er' + vz, pron, varpron, d_er) for vz in Rvzlist]
 
-
 basicreplacementlist = [('as', 'als', pron, infpron, codared), ('isse', 'is', pron, infpron, addschwa),
                         ('ooke', 'ook', pron, infpron, addschwa),
                         ('t', "'t", orth, spellerr, apomiss), ('effjes', 'eventjes', pron, infpron, varpron),
@@ -47,7 +47,13 @@
                         ('da', 'dat', pron, infpron, codared),
                         ('si', 'zit', pron, infpron, codared),  # and zdev
                         ('ieduleen', 'iedereen', pron, wrongpron, phonrepl.format(wrong='l', correct='r')),
-                        ('allemaaw', 'allemaal', pron, wrongpron, phonrepl.format(wrong='w', correct='l'))
+                        ('allemaaw', 'allemaal', pron, wrongpron, phonrepl.format(wrong='w', correct='l')),
+                        ('amaal', 'allemaal', pron, infpron, varpron),
+                        ('wiw', 'wil', pron, wrongpron, phonrepl.format(wrong='w', correct='l')),
+                        ('annug', 'ander', pron, wrongpron, phonrepl.format(wrong='nug', correct='der')),
+                        ('nohug', 'nodig', pron, wrongpron, phonrepl.format(wrong='hu', correct='di')),
+                        ('magge', 'mogen', morph, wronginfl, '{} & {}'.format(overgen, infpron)),
+                        ('maggen', 'mogen', morph, wronginfl, overgen)
                         ] + ervzvariants
 # ('inne', 'in', pron, infpron, addschwa) # put off because it b;ock inne -> in de
 
@@ -70,7 +76,6 @@
 for w1, w2, c, n, v in basicexpansionlist:
     basicexpansions[w1].append((w2, c, n, v))
 
-
 knownreplacements = [
     ('ze', "z'n", pron, infpron, fndrop, bpl_word),
     ('desu', 'deze', pron, infpron, zdev, bpl_word),
@@ -78,7 +83,6 @@
 
 ]
 
-
 knownreplacementsdict = {(repl[0], repl[1]): repl for repl in knownreplacements}
 
 
diff --git a/corrector.py b/corrector.py
index 2e561a4..d5c78ba 100644
--- a/corrector.py
+++ b/corrector.py
@@ -25,17 +25,10 @@
                    getfilledpauses, getprefixwords, getrepeatedtokens,
                    getunwantedtokens, nodesfindjaneenou)
 from deregularise import correctinflection
-from find_ngram import findmatches, ngram1
 from iedims import getjeforms
-from lexicon import de, dets, getwordinfo, het, informlexicon, known_word
+from lexicon import de, dets, getwordinfo, het, informlexicon, known_word, isa_namepart
 from macros import expandmacros
-# from alternative import Alternative, Replacement, Metadata, Meta
-from metadata import (Meta, bpl_indeze, bpl_node, bpl_none, bpl_word,
-                      bpl_wordlemma, defaultbackplacement, defaultpenalty,
-                      filled_pause, fstoken, intj, janeenou, longrep,
-                      mkSASTAMeta, repeated, repeatedjaneenou,
-                      repeatedseqtoken, shortrep, substringrep, unknownsymbol)
-from namepartlexicon import isa_namepart
+# from namepartlexicon import namepart_isa_namepart
 from sastatok import sasta_tokenize
 from sastatoken import Token, tokenlist2stringlist
 from stringfunctions import (chatxxxcodes, consonants, deduplicate,
@@ -44,6 +37,15 @@
 from sva import getsvacorrections
 from tokenmd import TokenListMD, TokenMD, mdlist2listmd
 from treebankfunctions import find1, getattval, getnodeyield
+from lxml import etree
+import sys
+# from alternative import Alternative, Replacement, Metadata, Meta
+from metadata import Meta, defaultbackplacement, defaultpenalty, bpl_node, bpl_none, bpl_word, bpl_indeze, \
+    bpl_wordlemma, mkSASTAMeta, janeenou, shortrep, longrep, repeatedseqtoken, intj, unknownword, unknownsymbol, \
+    filled_pause, repeatedjaneenou, repeated, substringrep, fstoken, falsestart
+from alpinoparsing import parse, escape_alpino_input
+from expandquery import expandmacros
+from find_ngram import findmatches, ngram1, ngram2, ngram7, ngram10, ngram11, ngram16, ngram17
 
 SASTA = 'SASTA'
 
@@ -138,10 +140,9 @@ def ngramreduction(reducedtokens, token2nodemap, allremovetokens, allremoveposit
 
 
 def reduce(tokens, tree):
-
     if tree is None:
         SDLOGGER.error('No tree for :{}\nNo reduction applied'.format(tokens))
-        return((tokens, []))
+        return ((tokens, []))
 
     tokennodes = tree.xpath('.//node[@pt or @pos]')
     tokennodesdict = {int(getattval(n, 'begin')): n for n in tokennodes}
@@ -197,7 +198,8 @@ def reduce(tokens, tree):
     # remove ja nee nou
 
     janeenounodes = nodesfindjaneenou(reducednodes)
-    janeenoutokens = [tok for tok in reducedtokens if keycheck(tok.pos, token2nodemap) and token2nodemap[tok.pos] in janeenounodes]
+    janeenoutokens = [tok for tok in reducedtokens if
+                      keycheck(tok.pos, token2nodemap) and token2nodemap[tok.pos] in janeenounodes]
     janeenoupositions = [token.pos for token in janeenoutokens]
     allremovetokens += janeenoutokens
     allremovepositions += janeenoupositions
@@ -206,25 +208,36 @@ def reduce(tokens, tree):
     allmetadata += metadata
 
     # short repetitions
-    def oldcond(x, y): return len(cleanwordofnort(x)) / len(cleanwordofnort(y)) < .5 and not informlexicon(cleanwordofnort(x))
-    def cond(x, y): return len(cleanwordofnort(x)) / len(cleanwordofnort(y)) < .5  # check on lexicon put off actually two variants should be tried if the word is an existin gword
+    def oldcond(x, y):
+        return len(cleanwordofnort(x)) / len(cleanwordofnort(y)) < .5 and not informlexicon(cleanwordofnort(x))
+
+    def cond(x, y):
+        return len(cleanwordofnort(x)) / len(cleanwordofnort(
+            y)) < .5  # check on lexicon put off actually two variants should be tried if the word is an existin gword
+
     shortprefixtokens = getprefixwords(reducedtokens, cond)
     shortprefixpositions = [token.pos for token in shortprefixtokens]
     repeatedtokens = getrepeatedtokens(reducedtokens, shortprefixtokens)
     allremovetokens += shortprefixtokens
     allremovepositions += shortprefixpositions
-    metadata = [mkSASTAMeta(token, repeatedtokens[token], 'ExtraGrammatical', shortrep, 'Tokenisation', subcat=repetition) for token in reducedtokens if token in repeatedtokens]
+    metadata = [
+        mkSASTAMeta(token, repeatedtokens[token], 'ExtraGrammatical', shortrep, 'Tokenisation', subcat=repetition) for
+        token in reducedtokens if token in repeatedtokens]
     allmetadata += metadata
     reducedtokens = [tok for tok in reducedtokens if tok not in shortprefixtokens]
 
     # long repetitions
-    def cond(x, y): return len(cleanwordofnort(x)) / len(cleanwordofnort(y)) >= .5 and not informlexicon(cleanwordofnort(x))
+    def cond(x, y):
+        return len(cleanwordofnort(x)) / len(cleanwordofnort(y)) >= .5 and not informlexicon(cleanwordofnort(x))
+
     longprefixtokens = getprefixwords(reducedtokens, cond)
     longprefixpositions = [token.pos for token in longprefixtokens]
     repeatedtokens = getrepeatedtokens(reducedtokens, longprefixtokens)
     allremovetokens += longprefixtokens
     allremovepositions += longprefixpositions
-    metadata = [mkSASTAMeta(token, repeatedtokens[token], 'ExtraGrammatical', longrep, 'Tokenisation', subcat=repetition) for token in reducedtokens if token in repeatedtokens]
+    metadata = [
+        mkSASTAMeta(token, repeatedtokens[token], 'ExtraGrammatical', longrep, 'Tokenisation', subcat=repetition) for
+        token in reducedtokens if token in repeatedtokens]
     allmetadata += metadata
     reducedtokens = [tok for tok in reducedtokens if tok not in longprefixtokens]
 
@@ -246,7 +259,8 @@ def cond(x, y): return len(cleanwordofnort(x)) / len(cleanwordofnort(y)) >= .5 a
     allremovetokens += dupnodetokens
     allremovepositions += dupnodepositions
     metadata = [mkSASTAMeta(token, repeatedtokens[token], 'ExtraGrammatical',
-                            repeated, 'Tokenisation', subcat=repetition) for token in reducedtokens if token in repeatedtokens]
+                            repeated, 'Tokenisation', subcat=repetition) for token in reducedtokens if
+                token in repeatedtokens]
     allmetadata += metadata
     reducedtokens = [tok for tok in reducedtokens if tok not in dupnodetokens]
 
@@ -284,19 +298,42 @@ def cond(x, y): return len(cleanwordofnort(x)) / len(cleanwordofnort(y)) >= .5 a
 
     # vnw pv vnw pv
 
-    def metaf(falsestarttokens, falsestartpositions, correcttokens): return \
-        [Meta('Retracing', 'Retracing with Correction', annotatedposlist=falsestartpositions,
-              annotatedwordlist=[c.word for c in falsestarttokens],
-              annotationposlist=[c.pos for c in correcttokens],
-              annotationwordlist=[c.word for c in correcttokens], cat='Retracing', subcat=None, source=SASTA,
-              penalty=defaultpenalty, backplacement=bpl_none)] + \
-        [mkSASTAMeta(ftoken, ctoken, 'Retracing with Correction', fstoken, 'Retracing')
-         for ftoken, ctoken in zip(falsestarttokens, correcttokens)]
+    def metaf(falsestarttokens, falsestartpositions, correcttokens):
+        return \
+            [Meta('Retracing', 'Retracing with Correction', annotatedposlist=falsestartpositions,
+                  annotatedwordlist=[c.word for c in falsestarttokens],
+                  annotationposlist=[c.pos for c in correcttokens],
+                  annotationwordlist=[c.word for c in correcttokens], cat='Retracing', subcat=None, source=SASTA,
+                  penalty=defaultpenalty, backplacement=bpl_none)] + \
+            [mkSASTAMeta(ftoken, ctoken, 'Retracing with Correction', fstoken, 'Retracing')
+             for ftoken, ctoken in zip(falsestarttokens, correcttokens)]
 
     vnwpvvnwpvcor = Ngramcorrection(ngram1, (0, 2), (2, 4), metaf)
     reducedtokens, allremovetokens, allmetadata = ngramreduction(reducedtokens, token2nodemap, allremovetokens,
                                                                  allremovepositions, allmetadata, vnwpvvnwpvcor)
 
+    vzdetvzdetcor = Ngramcorrection(ngram2, (0, 2), (2, 4), metaf)
+    reducedtokens, allremovetokens, allmetadata = ngramreduction(reducedtokens, token2nodemap, allremovetokens,
+                                                                 allremovepositions, allmetadata, vzdetvzdetcor)
+
+    vgdetvgdetcor = Ngramcorrection(ngram7, (0, 2), (2, 4), metaf)
+    reducedtokens, allremovetokens, allmetadata = ngramreduction(reducedtokens, token2nodemap, allremovetokens,
+                                                                 allremovepositions, allmetadata, vgdetvgdetcor)
+    vnwipvjxpvjvnwi = Ngramcorrection(ngram10, (0, 2), (3, 5), metaf)
+    reducedtokens, allremovetokens, allmetadata = ngramreduction(reducedtokens, token2nodemap, allremovetokens,
+                                                                 allremovepositions, allmetadata, vnwipvjxpvjvnwi)
+    lemilemjlemilemj = Ngramcorrection(ngram11, (0, 2), (3, 5), metaf)
+    reducedtokens, allremovetokens, allmetadata = ngramreduction(reducedtokens, token2nodemap, allremovetokens,
+                                                                 allremovepositions, allmetadata, lemilemjlemilemj)
+
+    dinjdknj = Ngramcorrection(ngram16, (0, 2), (3, 5), metaf)
+    reducedtokens, allremovetokens, allmetadata = ngramreduction(reducedtokens, token2nodemap, allremovetokens,
+                                                                 allremovepositions, allmetadata, dinjdknj)
+
+    tevtev = Ngramcorrection(ngram17, (0, 2), (2, 4), metaf)
+    reducedtokens, allremovetokens, allmetadata = ngramreduction(reducedtokens, token2nodemap, allremovetokens,
+                                                                 allremovepositions, allmetadata, tevtev)
+
     # reducedleaves = [token2nodemap[tok.pos] for tok in reducedtokens]
     #
     # vnwpvvnwpvmatches = findmatches(ngram1, reducedleaves)
@@ -377,7 +414,6 @@ def getcorrection(utt, tree=None, interactive=False):
 
 
 def getcorrections(utt, method, tree=None, interactive=False):
-
     origutt = utt
     allmetadata = []
     rawtokens = sasta_tokenize(utt)
@@ -418,7 +454,6 @@ def getcorrections(utt, method, tree=None, interactive=False):
 
 # def getalternatives(origtokensmd, method, llremovedtokens, tree, uttid):
 def getalternatives(origtokensmd, method, tree, uttid):
-
     tokensmd = explanationasreplacement(origtokensmd, tree)
     if tokensmd is None:
         tokensmd = origtokensmd
@@ -597,6 +632,7 @@ def lexcheck(intokensmd, allalternativemds):
             finalalternativemds.append(alternativemd)
     return finalalternativemds
 
+
 # moved to metadata
 # def mkSASTAMeta(token, nwt, name, value, cat, subcat=None, penalty=defaultpenalty, backplacement=defaultbackplacement):
 #    result = Meta(name, value, annotatedposlist=[token.pos],
@@ -670,7 +706,8 @@ def explanationasreplacement(tokensmd, tree):
             if known_word(newword):
                 newtokens = tokenreplace(newtokens, newtoken)
                 bpl = bpl_node if known_word(oldword) else bpl_word
-                meta = mkSASTAMeta(oldtoken, newtoken, name='ExplanationasReplacement', value='ExplanationasReplacement',
+                meta = mkSASTAMeta(oldtoken, newtoken, name='ExplanationasReplacement',
+                                   value='ExplanationasReplacement',
                                    cat='Lexical Error', backplacement=bpl_node)
                 newmetadata.append(meta)
                 result = TokenListMD(newtokens, newmetadata)
@@ -682,21 +719,20 @@ def explanationasreplacement(tokensmd, tree):
 
 
 def initdevoicing(token, voiceless, voiced, newtokenmds, beginmetadata):
-
     # initial s -> z, f -> v
     if not known_word(token.word.lower()) or token.word.lower() in specialdevoicingwords:
         if token.word[0] == voiceless:
             newword = voiced + token.word[1:]
             if known_word(newword):
                 newtokenmds = updatenewtokenmds(newtokenmds, token, [newword], beginmetadata,
-                                                name='Pronunciation Variant', value='Initial {} devoicing'.format(voiced),
+                                                name='Pronunciation Variant',
+                                                value='Initial {} devoicing'.format(voiced),
                                                 cat='Pronunciation', backplacement=bpl_word)
 
     return newtokenmds
 
 
 def getalternativetokenmds(tokenmd, method, tokens, tokenctr, tree, uttid):
-
     token = tokenmd.token
     beginmetadata = tokenmd.metadata
     newtokenmds = []
@@ -794,7 +830,8 @@ def getalternativetokenmds(tokenmd, method, tokens, tokenctr, tree, uttid):
         # zenode = find1(tree, zexpath)
         tokennodes = getnodeyield(tree)
         zenode = tokennodes[tokenctr]
-        nexttoken = tokens[tokenctr + 1]  # do not take it from the tree because it may have been replaced by something else, e.g. avoid: ze dee -> ze deed -/-> z'n deed!
+        nexttoken = tokens[
+            tokenctr + 1]  # do not take it from the tree because it may have been replaced by something else, e.g. avoid: ze dee -> ze deed -/-> z'n deed!
         zerel = getattval(zenode, 'rel')
         zeparent = zenode.getparent()
         zeparentcat = getattval(zeparent, 'cat')
@@ -809,7 +846,8 @@ def getalternativetokenmds(tokenmd, method, tokens, tokenctr, tree, uttid):
 
     # e-> e(n)
     enexceptions = {'inne'}
-    if not known_word(token.word) and token.word.lower() not in basicreplacements and token.word.lower() not in enexceptions:
+    if not known_word(
+            token.word) and token.word.lower() not in basicreplacements and token.word.lower() not in enexceptions:
         if endsinschwa(token.word) and not monosyllabic(token.word):
             newword = token.word + 'n'
             if known_word(newword):
@@ -844,7 +882,6 @@ def getalternativetokenmds(tokenmd, method, tokens, tokenctr, tree, uttid):
 
 
 def getvalidalternativetokenmds(tokenmd, newtokenmds):
-
     validnewtokenmds = [tokenmd for tokenmd in newtokenmds if known_word(tokenmd.token.word)]
     if validnewtokenmds == []:
         validnewtokenmds = [tokenmd]
@@ -926,7 +963,8 @@ def correctPdit(tokensmd, tree, uttid):
     for token in tokens:
         tokennode = next(filter(lambda x: getattval(x, 'begin') == str(tokenctr), tokennodes), None)
         tokenlemma = getattval(tokennode, 'lemma')
-        if not token.skip and prevtoken is not None and not prevtoken.skip and tokenlemma in {'dit', 'dat', 'deze', 'die'}:
+        if not token.skip and prevtoken is not None and not prevtoken.skip and tokenlemma in {'dit', 'dat', 'deze',
+                                                                                              'die'}:
             tokenrel = getattval(tokennode, 'rel')
             tokenpt = getattval(tokennode, 'pt')
             prevtokennode = tokennodes[tokenctr - 1] if tokenctr > 0 else None
@@ -935,7 +973,8 @@ def correctPdit(tokensmd, tree, uttid):
                 prevparent = prevtokennode.getparent()
                 prevparentrel, prevparentcat = getattval(prevparent, 'rel'), getattval(prevparent, 'cat')
                 indezemwp = getindezemwp(prevtokennode, tokennode)
-                if (prevpt == 'vz' and prevparentcat != 'pp' and tokenrel not in {'obj1', 'det'} and tokenpt == 'vnw') or \
+                if (prevpt == 'vz' and prevparentcat != 'pp' and tokenrel not in {'obj1',
+                                                                                  'det'} and tokenpt == 'vnw') or \
                         indezemwp:
                     newtoken = Token('hem', tokenctr)
                     bpl = bpl_indeze if indezemwp else bpl_node
diff --git a/external_functions.py b/external_functions.py
index dc09efa..23fabea 100644
--- a/external_functions.py
+++ b/external_functions.py
@@ -1,31 +1,30 @@
 import re
-
-from asta_queries import asta_bijzin, asta_delpv, asta_lex, asta_noun
-from astaforms import astaform
-from ASTApostfunctions import (KMcount, countwordsandcutoff, finietheidsindex,
-                               getalllemmas, getlemmas, wordcountperutt)
 from compounds import getcompounds
-from dedup import correct, mlux, neologisme, onvolledig, samplesize
-from imperatives import wond4, wond5plus, wondx, wx, wxy, wxyz, wxyz5
-from methods import allok
-from queryfunctions import VzN, xneg_neg, xneg_x
-from STAPpostfunctions import GL5LVU, GLVU, BB_totaal
 from Sziplus import sziplus6, vr5plus
-from tarspform import mktarspform
-from TARSPpostfunctions import (gofase, gtotaal, pf, pf2, pf3, pf4, pf5, pf6,
-                                pf7, vutotaal)
-from TARSPscreening import tarsp_screening
 from xenx import xenx
+from imperatives import wx, wxy, wxyz, wxyz5, wondx, wond4, wond5plus
+from TARSPscreening import tarsp_screening
+from TARSPpostfunctions import vutotaal, gofase, gtotaal, pf2, pf3, pf4, pf5, pf6, pf7, pf
+from queryfunctions import xneg_x, xneg_neg, VzN
+from dedup import mlux, samplesize, neologisme, onvolledig, correct
+from STAPpostfunctions import BB_totaal, GLVU, GL5LVU
+from ASTApostfunctions import wordcountperutt, countwordsandcutoff, KMcount, finietheidsindex, getnounlemmas,\
+    getlexlemmas, getalllemmas
+from astaforms import astaform
+from tarspform import mktarspform
+from stapforms import makestapform
+from asta_queries import asta_noun, asta_bijzin, asta_lex, asta_delpv
+from methods import allok
 
 normalfunctionpattern = r'<function\s+(\w+)\b'
 builtinfunctionpattern = r'<built-in function\s+(\w+)\b'
 
+
 # normalfunctionprefix = "<function "
 # lnormalfunctionprefix = len(normalfunctionprefix)
 # builtinfunctionprefix = "<built-in function "
 # lbuiltinfunctionprefix = len(builtinfunctionprefix)
 
-
 def getfname(f):
     return f.__name__
 
@@ -46,26 +45,28 @@ def oldgetfname(f):
 
 # Initialisation
 thetarspfunctions = [getcompounds, sziplus6, xenx, vr5plus, wx, wxy, wxyz, wxyz5, wondx, wond4, wond5plus,
-                     tarsp_screening, vutotaal, gofase, gtotaal, pf2, pf3, pf4, pf5, pf6, pf7, pf, xneg_x, xneg_neg, mktarspform, VzN]
-
-thestapfunctions = [BB_totaal, GLVU, GL5LVU]
+                     tarsp_screening, vutotaal, gofase, gtotaal, pf2, pf3, pf4, pf5, pf6, pf7, pf, xneg_x, xneg_neg,
+                     mktarspform, VzN]
 
+thestapfunctions = [BB_totaal, GLVU, GL5LVU, makestapform]
 
 theastafunctions = [samplesize, mlux, neologisme, onvolledig, correct, wordcountperutt, countwordsandcutoff,
-                    astaform, KMcount, finietheidsindex, getlemmas, getalllemmas, asta_noun, asta_bijzin, asta_lex, asta_delpv, allok]
+                    astaform, KMcount, finietheidsindex, getnounlemmas, getlexlemmas, getalllemmas, asta_noun,
+                    asta_bijzin, asta_lex, asta_delpv, allok]
 
 thefunctions = thetarspfunctions + thestapfunctions + theastafunctions
 
-
 str2functionmap = {}
 
 for f in thefunctions:
     fname = getfname(f)
     str2functionmap[fname] = f
 
+junk = 0
+
+# Used by SASTA to find form functions
 form_map = {
     'TARSP': mktarspform,
-    'ASTA': astaform
+    'ASTA': astaform,
+    'STAP': makestapform
 }
-
-junk = 0
diff --git a/form_templates/STAP Excel VUmc 2018.xlsx b/form_templates/STAP Excel VUmc 2018.xlsx
new file mode 100644
index 0000000..91fbf6d
Binary files /dev/null and b/form_templates/STAP Excel VUmc 2018.xlsx differ
diff --git a/lexicon.py b/lexicon.py
index 8801293..9692346 100644
--- a/lexicon.py
+++ b/lexicon.py
@@ -1,6 +1,6 @@
 import celexlexicon
 import treebankfunctions
-from namepartlexicon import isa_namepart
+from namepartlexicon import namepart_isa_namepart, namepart_isa_namepart_uc
 
 space = ' '
 
@@ -20,6 +20,15 @@
 dets[het] = ['het', 'dat', 'dit', 'ons', 'welk', 'ieder', 'elk', 'zulk']
 
 
+def isa_namepart(word):
+    return namepart_isa_namepart(word)
+
+def isa_namepart_uc(word):
+    return namepart_isa_namepart_uc(word)
+
+
+
+
 def lookup(dct, key):
     result = dct[key] if key in dct else ''
     return result
diff --git a/macros/sastamacros2.txt b/macros/sastamacros2.txt
index 160024b..320b34b 100644
--- a/macros/sastamacros2.txt
+++ b/macros/sastamacros2.txt
@@ -25,6 +25,7 @@ not(@lemma="al"
 	or @lemma="zo")
 """
 
+Rna = """(@lemma="daarna" or @lemma= "erna" or @lemma = "hierna") """
 
 STAP_BB_t = """ 
 not((((@frame="tmp_adverb"
@@ -43,7 +44,7 @@ not((((@frame="tmp_adverb"
     or (@rel="mod" and
         (@lemma="net" or @lemma="gauw"
         or @lemma="vroeger" or @lemma="toen"
-        or @lemma="soms" or @lemma="altijd")))
+        or @lemma="soms" or @lemma="altijd" )))
 """
 
 STAP_BB_p = """
@@ -123,7 +124,7 @@ BB = """ ((@rel="mod" or @rel="dp" or @rel="--" or @rel="nucl" or @rel="whd") an
 tempadv = """ (@frame="tmp_adverb" or  @frame="wh_tmp_adverb" or @frame="adjective(both(tmpadv))" or @special="tmp"  )
 """
 
-templemma = """ (@lemma="soms"  or @lemma="altijd" or @lemma="eerst" or @lemma="steeds" or @lemma="eerder" or @lemma="nou")
+templemma = """ (@lemma="soms"  or @lemma="altijd" or @lemma="eerst" or @lemma="steeds" or @lemma="eerder" or @lemma="nou" or %Rna%)
 """
 
 advpBBt = """ (@cat="advp" and %BB% and node[@rel="hd" and %temp% ])
diff --git a/metadata.py b/metadata.py
index 4e63c2b..2760f14 100644
--- a/metadata.py
+++ b/metadata.py
@@ -92,3 +92,4 @@ def mkSASTAMeta(token, nwt, name, value, cat, subcat=None, penalty=defaultpenalt
 substringrep = 'Substring repetition'
 repetition = 'Repetition'
 fstoken = 'Retraced token'
+falsestart = 'Retracing with Correction'
diff --git a/methods/ASTA Index Current.xlsx b/methods/ASTA Index Current.xlsx
index ca7015e..b575d44 100644
Binary files a/methods/ASTA Index Current.xlsx and b/methods/ASTA Index Current.xlsx differ
diff --git a/methods/STAP_Index_Current.xlsx b/methods/STAP_Index_Current.xlsx
index d06cccc..403b34e 100644
Binary files a/methods/STAP_Index_Current.xlsx and b/methods/STAP_Index_Current.xlsx differ
diff --git a/methods/TARSP Index Current.xlsx b/methods/TARSP Index Current.xlsx
index 3243fab..75e9075 100644
Binary files a/methods/TARSP Index Current.xlsx and b/methods/TARSP Index Current.xlsx differ
diff --git a/mismatches.py b/mismatches.py
index 70908c8..214b938 100644
--- a/mismatches.py
+++ b/mismatches.py
@@ -1,11 +1,10 @@
+
+import os
 from collections import Counter
 from copy import copy
-
 from lxml import etree
-
 from config import SDLOGGER
-from treebankfunctions import getattval, getmarkedyield, getyield
-
+from treebankfunctions import getyield, getmarkedyield, getattval
 tab = '\t'
 space = ' '
 eps = ''
@@ -14,7 +13,6 @@
 usercommentuntil = 3
 usercommentdefaultvalue = eps
 
-
 def getmarkedutt(m, syntree):
     thewordlist = getyield(syntree)
     thepositions = getwordpositions(m, syntree)
@@ -22,12 +20,10 @@ def getmarkedutt(m, syntree):
     yieldstr = space.join(themarkedyield)
     return yieldstr
 
-
 def mark(str):
-    result = '*' + str + '*'
+    result = '*'+ str + '*'
     return result
 
-
 def getwordpositionsold(matchtree, syntree):
     positions1 = []
     for node in matchtree.iter():
@@ -39,7 +35,7 @@ def getwordpositionsold(matchtree, syntree):
     for node in syntree.iter():
         if 'index' in node.attrib and ('pt' in node.attrib or 'cat' in node.attrib or 'pos' in node.attrib):
             theindex = node.attrib['index']
-            indexednodes[theindex] = node
+            indexednodes[theindex]=node
 
     thequery2 = ".//node[@index and not(@pt) and not(@cat)]"
     try:
@@ -53,7 +49,6 @@ def getwordpositionsold(matchtree, syntree):
     result = [int(p) for p in positions]
     return result
 
-
 def getwordpositions(matchtree, syntree):
     #nothing special needs to be done for index nodes since they also have begin and end
     positions = []
@@ -63,7 +58,6 @@ def getwordpositions(matchtree, syntree):
     result = [int(p) for p in positions]
     return result
 
-
 def getfirstwordposition(matchtree):
     if 'begin' in matchtree.attrib:
         positionstr = getattval(matchtree, 'begin')
@@ -73,6 +67,7 @@ def getfirstwordposition(matchtree):
     return position
 
 
+
 def getmarkedyield(wordlist, positions):
     pos = 1
     resultlist = []
@@ -107,8 +102,7 @@ def mismatches(queryid, queries, theresultsminusgold, goldminustheresults, allma
                              uttstr]
         print(tab.join(platinumcheckrow2), file=platinumcheckfile)
 
-
-def exactmismatches(queryid, queries, exactresults, exactgoldscores, allmatches, allutts, platinumcheckfile, permsilverdatadict={}):
+def exactmismatches(queryid, queries, exactresults, exactgoldscores, allmatches, allutts, platinumcheckfile, permsilverdatadict={}, annotationinput=False):
 
     theexactresults = exactresults[queryid] if queryid in exactresults else Counter()
     theexactgoldscores = exactgoldscores[queryid] if queryid in exactgoldscores else Counter()
@@ -118,7 +112,7 @@ def exactmismatches(queryid, queries, exactresults, exactgoldscores, allmatches,
         print('More examples', file=platinumcheckfile)
     for hit in theresultsminusgold:
         uttid, position = hit
-        if (queryid, uttid) in allmatches:
+        if (queryid, uttid) in allmatches or annotationinput:
             markposition = 1 if position == 0 else position
             markedwordlist = getmarkedyield(allutts[uttid], [markposition])
             uttstr = space.join(markedwordlist)
@@ -127,7 +121,7 @@ def exactmismatches(queryid, queries, exactresults, exactgoldscores, allmatches,
             print(tab.join(platinumcheckrow1), file=platinumcheckfile)
             key = (queryid, uttid, position)
             usercomments = getusercomments(permsilverdatadict, key, report=True)
-            xlplatinumcheckrow1 = usercomments + ['More examples'] + platinumcheckrow1
+            xlplatinumcheckrow1 = usercomments +  ['More examples'] + platinumcheckrow1
             newrows.append(xlplatinumcheckrow1)
             #for (m, syntree) in allmatches[(queryid, uttid)]:
             #    if getfirstwordposition(m) == position:
@@ -156,7 +150,6 @@ def exactmismatches(queryid, queries, exactresults, exactgoldscores, allmatches,
         newrows.append(xlplatinumcheckrow2)
     return newrows
 
-
 def compareunaligned(resultctr, goldctr):
     '''
 
@@ -175,21 +168,20 @@ def compareunaligned(resultctr, goldctr):
             takefromresultlist.append((utt1, pos1))
             takefromgoldlist.append((utt1, 0))
             newintersection.append((utt1, pos1))
-            curgoldlist.remove((utt1, 0))
+            curgoldlist.remove((utt1,0))
         elif pos1 == 0:
             for (utt2, pos2) in curgoldlist:
                 if utt1 == utt2:
                     takefromresultlist.append((utt1, pos1))
                     takefromgoldlist.append((utt1, pos2))
                     newintersection.append((utt1, pos2))
-                    curgoldlist.remove((utt2, pos2))
+                    curgoldlist.remove((utt2,pos2))
                     break
     takefromresultctr = Counter(takefromresultlist)
     takefromgoldctr = Counter(takefromgoldlist)
     newintersectionctr = Counter(newintersection)
     return (takefromresultctr, takefromgoldctr, newintersectionctr)
 
-
 def exactcompare(exactresults, exactgoldscores):
     '''
     compares two lists of exact results, i.e. dlists of pairs (uttid, position)
@@ -234,3 +226,19 @@ def getusercomments(permsilverdict, key, report=False):
         if report:
             SDLOGGER.warning('No silver remark for key: {}'.format(key))
     return result
+
+def testcompare():
+    testresults = [(1,2),(1,2), (1,2), (1,5), (1,6),(2,0), (2, 4)]
+    goldresults = [(1,2), (2,4), (2,6), (1,0), (3,5)]
+    reftestminusgold = [(1,2), (1,5), (1,6)]
+    refgoldminustest = [(3,5)]
+    refintersection = [(1,2), (1,2),  (2,4), (2,6)]
+    (testminusgold, goldminustest, intersection) = exactcompare(testresults, goldresults)
+    for (l, r,g ) in zip(['R-G', 'G-R', 'R*G'],[testminusgold, goldminustest, intersection],[reftestminusgold, refgoldminustest, refintersection]):
+        if r == g:
+            print('{}: OK {} == {}'.format(l, r,g))
+        else:
+            print('{}: NO: {} != {}'.format(l, r,g))
+
+if __name__ == '__main__':
+    testcompare()
\ No newline at end of file
diff --git a/namepartlexicon.py b/namepartlexicon.py
index 89b2afe..b0d8f38 100644
--- a/namepartlexicon.py
+++ b/namepartlexicon.py
@@ -7,11 +7,11 @@
 namepartlexicon = {}
 
 
-def isa_namepart(word):
+def namepart_isa_namepart(word):
     return word in namepartlexicon
 
 
-def isa_namepart_uc(word):
+def namepart_isa_namepart_uc(word):
     if word is None or word == '':
         result = False
     else:
diff --git a/sastadev.py b/sastadev.py
index 0bc0fb7..cc2b549 100644
--- a/sastadev.py
+++ b/sastadev.py
@@ -22,47 +22,46 @@
 All input is supposed to be encoded in UTF8, all output is also generated in UTF8
 '''
 
-#to do
-#-Excel output, cleanup output code
+# to do
+# -Excel output, cleanup output code
 
-import csv
-import datetime
-import os
-import re
-import sys
-from collections import Counter, defaultdict
-#from altcodes import altcodes
-from optparse import OptionParser
-from typing import Any, Dict, List
+from typing import Dict, List, Any
 
 import xlrd
-import xlsxwriter
 from lxml import etree
-
+import os
+import sys
+import re
+import csv
+import datetime
 import compounds
-from allresults import AllResults, scores2counts
+from collections import Counter, defaultdict
+# from altcodes import altcodes
+from optparse import OptionParser
+import logging
 from config import SDLOGGER
-from correcttreebank import (corr0, corr1, correcttreebank, corrn,
-                             errorwbheader, validcorroptions)
-from counterfunctions import counter2liststr
+from SAFreader import get_annotations, get_golddata, richscores2scores, exact2global, richexact2global
+from SAFreader import all_levels
 from external_functions import str2functionmap
+from treebankfunctions import getuttid, getyield, getmeta, getattval, getxmetatreepositions, getuttno, getuttidorno
+from SRFreader import read_referencefile
 from goldcountreader import get_goldcounts
+from TARSPscreening import screening4stage
+from allresults import AllResults, scores2counts
+from readmethod import read_method, itemseppattern
+from methods import allok
+from query import pre_process, core_process, post_process, form_process, is_preorcore, query_inform, query_exists, \
+    is_pre, is_core
 from macros import expandmacros
-from methods import Method, allok, defaultfilters
-from mismatches import exactmismatches, mismatches
+from mismatches import mismatches, exactmismatches
+from xlsx import mkworkbook
+import xlsxwriter
+from counterfunctions import counter2liststr
 from mksilver import getsilverannotations, permprefix
-from query import (core_process, form_process, is_core, is_pre, is_preorcore,
-                   post_process, pre_process, query_exists, query_inform)
-from readmethod import itemseppattern, read_method
-from rpf1 import getevalscores, getscores, sumfreq
-from SAFreader import (all_levels, exact2global, get_annotations, get_golddata,
-                       richexact2global, richscores2scores)
-from SRFreader import read_referencefile
+from rpf1 import getscores, getevalscores, sumfreq
 from targets import get_mustbedone, get_targets
-from TARSPscreening import screening4stage
-from treebankfunctions import (getattval, getmeta, getuttid,
-                               getxmetatreepositions, getyield)
-from xlsx import mkworkbook
+from correcttreebank import correcttreebank, corr0, corr1, corrn, validcorroptions, errorwbheader
+from methods import Method, defaultfilters
 
 listDir = False
 if listDir:
@@ -84,7 +83,6 @@
 platinumsuffix = '.platinum.tsv'
 platinumeditedsuffix = '.platinum-edited.tsv'
 
-
 # target_intarget, target_xsid, target_all = 0, 1, 2
 # intargetxpath = '//meta[@name="intarget"]'
 # xsidxpath = '//meta[@name="xsid"]'
@@ -107,10 +105,9 @@
 xlsxext = '.xlsx'
 samzn = 'samzn'
 goldheaderrows = 1
-#platinumheaderrows = 1
+# platinumheaderrows = 1
 na = 'na'
 
-
 getwordsxpath = ".//node[@pt or @pos]"
 
 queryinfoheaderrow = ['id', 'cat', 'subcat', 'item']
@@ -170,7 +167,7 @@ def getpostval(qid, thepostresults):
     return result
 
 
-#def scores2counts(scores):
+# def scores2counts(scores):
 #    counts = {}
 #    for el in scores:
 #        countval = len(scores[el])
@@ -225,7 +222,7 @@ def getwordpositionsold(matchtree, syntree):
 
 
 def getwordpositions(matchtree, syntree):
-    #nothing special needs to be done for index nodes since they also have begin and end
+    # nothing special needs to be done for index nodes since they also have begin and end
     positions = []
     for node in matchtree.iter():
         if 'end' in node.attrib:
@@ -288,9 +285,10 @@ def isxpathquery(query):
 
 def doqueries(syntree, queries, exactresults, allmatches, criterion):
     uttid = getuttid(syntree)
+    #uttid = getuttidorno(syntree)
     omittedwordpositions = getxmetatreepositions(syntree, 'Omitted Word', poslistname='annotatedposlist')
-    #print(uttid)
-    #core queries
+    # print(uttid)
+    # core queries
     junk = 0
     for queryid in queries:
         if queryid not in exactresults:
@@ -313,7 +311,7 @@ def doqueries(syntree, queries, exactresults, allmatches, criterion):
             else:
                 matches = []
                 exactresults[queryid] = []
-            #matchingids = [uttid for x in matches]
+            # matchingids = [uttid for x in matches]
             for m in matches:
                 if (queryid, uttid) in allmatches:
                     allmatches[(queryid, uttid)].append((m, syntree))
@@ -321,9 +319,9 @@ def doqueries(syntree, queries, exactresults, allmatches, criterion):
                     allmatches[(queryid, uttid)] = [(m, syntree)]
                 exactresult = (uttid, int(getattval(m, 'begin')) + 1)
                 exactresults[queryid].append(exactresult)
-            #if queryid in results:
+            # if queryid in results:
             #    results[queryid].update(matchingids)
-            #else:
+            # else:
             #    results[queryid] = Counter(matchingids)
 
 
@@ -336,14 +334,13 @@ def doprequeries(syntree, queries, results, allmatches):
 
 
 def dopostqueries(allresults, postquerylist, queries):
-
-    #post queries
+    # post queries
     for queryid in postquerylist:
         thequeryobj = queries[queryid]
         if query_exists(thequeryobj):
             thelistedquery = thequeryobj.query
 
-            #it is assumed that these are all python functions
+            # it is assumed that these are all python functions
             thef = str2functionmap[thelistedquery]
             result = thef(allresults, queries)
             allresults.postresults[queryid] = result
@@ -367,7 +364,7 @@ def mkpatterns(allcodes):
     adaptedcodes = [codeadapt(c) for c in sortedallcodes]
     basepattern = r'' + '|'.join(adaptedcodes) + '|' + itemseppattern
     fullpattern = r'^(' + basepattern + r')*$'
-    return(re.compile(basepattern), re.compile(fullpattern))
+    return (re.compile(basepattern), re.compile(fullpattern))
 
 
 def get_definedfornonemptygold(goldscores, queries):
@@ -409,7 +406,7 @@ def getmethodfromfile(filename):
         if m in base:
             result = m
     if result == '':
-        logging('No supported method found in filename')
+        SDLOGGER.error('No supported method found in filename')
         exit(-1)
     else:
         return result
@@ -450,7 +447,7 @@ def getexactresults(allmatches):
         matchresults = []
         wholeuttmatch = False
         for (m, _) in allmatches[(queryid, uttid)]:
-            #@@hier de topnode opzoeken@@
+            # @@hier de topnode opzoeken@@
             if m is None:
                 position = 0
                 SDLOGGER.error('None match found')
@@ -507,7 +504,6 @@ def passfilter(rawexactresults, method):
 
 defaulttarsp = r"TARSP Index Current.xlsx"
 
-
 parser = OptionParser()
 parser.add_option("-f", "--file", dest="infilename",
                   help="Treebank File to be analysed")
@@ -541,21 +537,27 @@ def passfilter(rawexactresults, method):
     SDLOGGER.error('Illegal value for -c/--corr option: only the following are allowed: {}'.format(validcorrstr))
     exit(-1)
 
-if options.infilename is None:  # an XML file
-    SDLOGGER.error('Specify an input treebank file name to analyse')
+# @ hier ook toestaan dat er een annotatiefile als input komt (.xlsx)-done
+if options.infilename is None:  # an XML file or an.xlsx file
+    SDLOGGER.error('Specify an input treebank file name to analyse (.xml) or the name of an annotationfile (.xlsx)')
     exit(1)
 (inbase, inext) = os.path.splitext(options.infilename)
-
+if inext not in ['.xml', '.xlsx']:
+    SDLOGGER.error('Illegal input file type: must be a treebank (.xml) or an annotationfile (.xlsx)')
+    exit(1)
+elif inext in ['.xlsx']:
+    annotationinput = True
+else:
+    annotationinput = False
 
 if options.logfilename is None:
     options.logfilename = inbase + logext
 
 options.methodname, options.methodfilename = treatmethod(options.methodname, options.methodfilename)
 
-
-#testlogfilename = inbase + "-test" + logext
-#logfile = open(options.logfilename, 'w', encoding='utf8')
-SDLOGGER.basicConfig(level=SDLOGGER.INFO)
+# testlogfilename = inbase + "-test" + logext
+# logfile = open(options.logfilename, 'w', encoding='utf8')
+SDLOGGER.basicConfig(level=logging.INFO)
 handler = SDLOGGER.FileHandler(options.logfilename, 'w', encoding='utf8')
 logformat = '%(levelname)s:%(message)s'
 formatter = SDLOGGER.Formatter(logformat)
@@ -571,7 +573,6 @@ def passfilter(rawexactresults, method):
 elif options.goldfilename is not None and options.goldcountsfilename is not None:
     SDLOGGER.info('Gold Reference file and Gold counts file found; gold counts file ignored')
 
-
 if options.methodfilename is None:  # an xslx file
     options.methodfilename = defaulttarsp
 if options.annotationfilename is None:  # an xlsx file
@@ -586,7 +587,6 @@ def passfilter(rawexactresults, method):
 if options.goldcountsfilename is None:
     options.goldcountsfilename = inbase + ".goldcounts" + ".xlsx"
 
-
 invalidqueries = {}
 
 (queries, item2idmap, altcodes, postorformquerylist) = read_method(options.methodfilename)
@@ -594,15 +594,17 @@ def passfilter(rawexactresults, method):
 themethod = Method(options.methodname, queries, item2idmap, altcodes, postorformquerylist,
                    options.methodfilename, defaultfilter)
 
-#annotationfilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\Auris\AurisdataAligned Current_out.tsv"
-#annotationfilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\Auris\AurisdataAligned TagsCleaned Current_out.tsv"
+# annotationfilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\Auris\AurisdataAligned Current_out.tsv"
+# annotationfilename = r"D:\jodijk\Dropbox\jodijk\Utrecht\Projects\CLARIAH CORE\WP3\Auris\AurisdataAligned TagsCleaned Current_out.tsv"
 
-#print('annotationfilename=', options.annotationfilename, file=sys.stderr )
+# print('annotationfilename=', options.annotationfilename, file=sys.stderr )
 
-#read the annotation reference file if available, otherwise the gold file, otherwise quit
+# read the annotation reference file if available, otherwise the gold file, otherwise quit
 goldscores = {}
 if options.annotationfilename != '' and os.path.exists(options.annotationfilename):
-    richexactgoldscores = get_golddata(options.annotationfilename, item2idmap, altcodes, queries, options.includeimplies)
+    allannutts, richexactgoldscores = get_golddata(options.annotationfilename, item2idmap, altcodes, queries,
+                                                   options.includeimplies)
+    annuttcount = len(allannutts)
     exactgoldscores = richscores2scores(richexactgoldscores)
     richgoldscores = richexact2global(richexactgoldscores)
     goldscores = richgoldscores
@@ -628,12 +630,13 @@ def passfilter(rawexactresults, method):
     SDLOGGER.error('Neither an annotationfile nor a goldfile, nor a gold count file specified. Aborting')
     exit(1)
 
-#rawcoreresults = {}
-#exact = True
+# rawcoreresults = {}
+# exact = True
 rawexactresults = defaultdict(list)
 
+# @dit aanpassen , voor al de message-done
 if not os.path.exists(options.infilename):
-    SDLOGGER.error('Input treebank {} not found. Aborting'.format(options.infilename))
+    SDLOGGER.error('Input treebank or annotationfile {} not found. Aborting'.format(options.infilename))
     exit(1)
 
 # gather remarks on results of earlier runs, write them to a perm_file  and adapt the silverscore file
@@ -649,7 +652,6 @@ def passfilter(rawexactresults, method):
 except FileExistsError:
     pass
 
-
 perm_silverfilename = permprefix + base + '.xlsx'
 perm_silverfullname = os.path.join(permpath, perm_silverfilename)
 #
@@ -659,66 +661,78 @@ def passfilter(rawexactresults, method):
 platinumcheckfilename = fullbase + platinumchecksuffix + txtext
 silvercheckfilename = fullbase + platinumchecksuffix + '.xlsx'
 
-
 (platbase, platext) = os.path.splitext(platinumcheckfilename)
 platinumcheckxlfullname = platbase + '.xlsx'
 
-
 silverannotationsdict = getsilverannotations(perm_silverfullname, platinumcheckeditedfullname,
                                              platinumcheckxlfullname, silvercheckfilename,
                                              platinumoutfilename, options.platinuminfilename, goldscores)
 
+analysedtrees = []
 
-tree = etree.parse(options.infilename)
-origtreebank = tree.getroot()
-if origtreebank.tag != 'treebank':
-    SDLOGGER.ERROR("Input treebank file does not contain a treebank element")
-    exit(-1)
-allutts = {}
-uttcount = 0
-#determine targets
-targets = get_targets(origtreebank)
-treebank, errordict, allorandalts = correcttreebank(origtreebank, targets, options.methodname, options.corr)
-
-# create the new treebank
-fulltreebank = etree.ElementTree(treebank)
-newtreebankfilename = fullbase + '_corrected' + '.xml'
-fulltreebank.write(newtreebankfilename, encoding="UTF8", xml_declaration=False,
-                   pretty_print=True)
-
-#create error file
-errorreportfilename = fullbase + '_errorreport' + '.xlsx'
-mkerrorreport(errordict, errorreportfilename)
-
-#create error logging
-errorloggingfilename = fullbase + '_errorlogging' + '.xlsx'
-
-allerrorrows = []
-for orandalts in allorandalts:
-    allerrorrows += orandalts.OrigandAlts2rows(base)
-errorwb = mkworkbook(errorloggingfilename, [errorwbheader], allerrorrows, freeze_panes=(1, 1))
-errorwb.close()
+# @vanaf nu gaat het om een treebank, dus hier een if statement toevoegen-done
+if annotationinput:
+    allutts, richexactscores = get_golddata(options.infilename, item2idmap, altcodes, queries, options.includeimplies)
+    uttcount = len(allutts)
+    exactresults = richscores2scores(richexactscores)
+else:
+    tree = etree.parse(options.infilename)
+    origtreebank = tree.getroot()
+    if origtreebank.tag != 'treebank':
+        SDLOGGER.ERROR("Input treebank file does not contain a treebank element")
+        exit(-1)
+    allutts = {}
+    uttcount = 0
+    # determine targets
+    targets = get_targets(origtreebank)
+    treebank, errordict, allorandalts = correcttreebank(origtreebank, targets, options.methodname, options.corr)
+
+    # create the new treebank
+    fulltreebank = etree.ElementTree(treebank)
+    newtreebankfilename = fullbase + '_corrected' + '.xml'
+    fulltreebank.write(newtreebankfilename, encoding="UTF8", xml_declaration=False,
+                       pretty_print=True)
+
+    # create error file
+    errorreportfilename = fullbase + '_errorreport' + '.xlsx'
+    mkerrorreport(errordict, errorreportfilename)
+
+    # create error logging
+    errorloggingfilename = fullbase + '_errorlogging' + '.xlsx'
+
+    allerrorrows = []
+    for orandalts in allorandalts:
+        allerrorrows += orandalts.OrigandAlts2rows(base)
+    errorwb = mkworkbook(errorloggingfilename, [errorwbheader], allerrorrows, freeze_panes=(1, 1))
+    errorwb.close()
+
+    analysedtrees = []
+    for syntree in treebank:
+        uttcount += 1
+        # SDLOGGER.error('uttcount={}'.format(uttcount))
+        mustbedone = get_mustbedone(syntree, targets)
+        if mustbedone:
+            analysedtrees.append(syntree)
+            doprequeries(syntree, queries, rawexactresults, allmatches)
+            docorequeries(syntree, queries, rawexactresults, allmatches)
+        uttid = getuttid(syntree)
+        uttno = getuttno(syntree)
+        allutts[uttno] = getyield(syntree)
+        # allutts[uttid] = getyield(syntree)
+
+    # determine exactresults and apply the filter to catch interdependencies between prequeries and corequeries
+    # rawexactresults = getexactresults(allmatches)
+    exactresults = passfilter(rawexactresults, themethod)
+
+# @ en vanaf hier kan het weer gemeenschappelijk worden; er met dus ook voor de annotatiefile een exactresults opgeleverd worden
+# @d epostfunctions for lemma's etc moeten mogelijk wel aangepast worden
 
-analysedtrees = []
-for syntree in treebank:
-    uttcount += 1
-    #SDLOGGER.error('uttcount={}'.format(uttcount))
-    mustbedone = get_mustbedone(syntree, targets)
-    if mustbedone:
-        analysedtrees.append(syntree)
-        doprequeries(syntree, queries, rawexactresults, allmatches)
-        docorequeries(syntree, queries, rawexactresults, allmatches)
-    uttid = getuttid(syntree)
-    allutts[uttid] = getyield(syntree)
 
-# determine exactresults and apply the filter to catch interdependencies between prequeries and corequeries
-#rawexactresults = getexactresults(allmatches)
-exactresults = passfilter(rawexactresults, themethod)
 coreresults = exact2results(exactresults)
 
-
 postresults = {}
-allresults = AllResults(uttcount, coreresults, postresults, allmatches, options.infilename, analysedtrees)
+allresults = AllResults(uttcount, coreresults, exactresults, postresults, allmatches, options.infilename, analysedtrees,
+                        allutts, annotationinput)
 
 postquerylist = [q for q in postorformquerylist if queries[q].process == post_process]
 formquerylist = [q for q in postorformquerylist if queries[q].process == form_process]
@@ -726,7 +740,6 @@ def passfilter(rawexactresults, method):
 dopostqueries(allresults, postquerylist, queries)
 dopostqueries(allresults, formquerylist, queries)
 
-
 (base, ext) = os.path.splitext(options.infilename)
 outputfilename = base + "_analysis" + tsvext + txtext
 outfile = open(outputfilename, 'w', encoding='utf8')
@@ -748,31 +761,31 @@ def passfilter(rawexactresults, method):
     SDLOGGER.info('Platinum file {} not found.'.format(options.platinuminfilename))
     platinumresults = {}
 
-#platinumoutfilename = base + platinumsuffix + txtext
+# platinumoutfilename = base + platinumsuffix + txtext
 platinumoutfile = open(platinumoutfilename, 'w', encoding='utf8')
-#platinumcheckfilename = base + platinumchecksuffix + txtext
+# platinumcheckfilename = base + platinumchecksuffix + txtext
 platinumcheckfile = open(platinumcheckfilename, 'w', encoding='utf8')
 
 countcomparisonfilename = base + '_countcomparison' + '.tsv' + '.txt'
 
-#print the invalid queries
+# print the invalid queries
 for q in invalidqueries:
     SDLOGGER.error("{}: {}: <{}>".format(q, invalidqueries[q], queries[q].query))
 
-#print the header
+# print the header
 print(resultsheaderstring, file=outfile)
 outworksheet.write_row(outrowctr, outstartcol, resultsheaderrow)
 outrowctr += 1
 
-#print the platinumheader
+# print the platinumheader
 print(platinumheaderstring, file=platinumoutfile)
 
-#print the results
+# print the results
 qcount = 0
 invalidqcount = 0
 undefinedqcount = 0
 results = allresults.coreresults
-#exactresults = getexactresults(allmatches)
+# exactresults = getexactresults(allmatches)
 exact = True
 
 pcheaders = [['User1', 'User2', 'User3', 'MoreorLess', 'qid', 'cat', 'subcat', 'item', 'uttid', 'pos', 'utt']]
@@ -786,7 +799,7 @@ def passfilter(rawexactresults, method):
     theresults = results[queryid]
     resultstr = counter2liststr(theresults)
     if queryid in goldscores:
-        #(goldlevel, golditem, goldcounter) = goldscores[queryid]
+        # (goldlevel, golditem, goldcounter) = goldscores[queryid]
         goldcounter = goldscores[queryid][2]
         goldcount = sumfreq(goldcounter)
         sortedgolduttstr = counter2liststr(goldcounter)
@@ -804,7 +817,7 @@ def passfilter(rawexactresults, method):
         qex = 'no'
         undefinedqcount += 1
     if query_exists(thequery) and queryid not in invalidqueries:
-        #print(queryid, file=logfile)
+        # print(queryid, file=logfile)
         if queryid in goldscores:
             goldcounter = goldscores[queryid][2]
         else:
@@ -851,20 +864,25 @@ def passfilter(rawexactresults, method):
     queryinforow = [queryid, queries[queryid].cat, queries[queryid].subcat, queries[queryid].item]
     queryresultsrow = [str(sumfreq(theresults)), resultstr, str(goldcount), sortedgolduttstr, qex]
     queryRGscorerow = [sf(recall), sf(precision), sf(f1score), liststargoldstr, goldminusliststr, listminusgoldstr]
-    queryRPscorerow = [sortedplatinumliststr, sf(platinumrecall), sf(platinumprecision), sf(platinumf1score), platinumminusliststr, listminusplatinumliststr]
-    queryGPscorerow = [sf(gprecall), sf(gpprecision), sf(gpf1score), goldstarplatinumstr, platinumminusgoldstr, goldminusplatinumstr]
+    queryRPscorerow = [sortedplatinumliststr, sf(platinumrecall), sf(platinumprecision), sf(platinumf1score),
+                       platinumminusliststr, listminusplatinumliststr]
+    queryGPscorerow = [sf(gprecall), sf(gpprecision), sf(gpf1score), goldstarplatinumstr, platinumminusgoldstr,
+                       goldminusplatinumstr]
 
     fullresultrow = queryinforow + queryresultsrow + queryRGscorerow + queryRPscorerow + queryGPscorerow
     print(tab.join(fullresultrow), file=outfile)
     outworksheet.write_row(outrowctr, outstartcol, fullresultrow)
     outrowctr += 1
 
-    platinumrow = [queryid, queries[queryid].cat, queries[queryid].subcat, queries[queryid].item, platinumoutresultsstring, listminusgoldstr, '', '']
+    platinumrow = [queryid, queries[queryid].cat, queries[queryid].subcat, queries[queryid].item,
+                   platinumoutresultsstring, listminusgoldstr, '', '']
 
     print(tab.join(platinumrow), file=platinumoutfile)
 
+    # @with an annotationfile allmatches is empty so we need to redefine newrows (exactmismatches) markedutt (getmarkedutt)-done
     if exact:
-        newrows = exactmismatches(queryid, queries, exactresults, exactgoldscores, allmatches, allutts, platinumcheckfile, silverannotationsdict)
+        newrows = exactmismatches(queryid, queries, exactresults, exactgoldscores, allmatches, allutts,
+                                  platinumcheckfile, silverannotationsdict, annotationinput)
         allrows += newrows
     else:
         if theresultsminusgold != {}:
@@ -873,7 +891,8 @@ def passfilter(rawexactresults, method):
             if (queryid, uttid) in allmatches:
                 for (m, syntree) in allmatches[(queryid, uttid)]:
                     markedutt = getmarkedutt(m, syntree)
-                    platinumcheckrow1 = [queryid, queries[queryid].cat, queries[queryid].subcat, queries[queryid].item, uttid, markedutt]
+                    platinumcheckrow1 = [queryid, queries[queryid].cat, queries[queryid].subcat, queries[queryid].item,
+                                         uttid, markedutt]
                     print(tab.join(platinumcheckrow1), file=platinumcheckfile)
 
         if goldminustheresults != {}:
@@ -883,31 +902,31 @@ def passfilter(rawexactresults, method):
                 uttstr = space.join(allutts[uttid])
             else:
                 SDLOGGER.warning('uttid {} not in allutts'.format(uttid))
-            platinumcheckrow2 = [queryid, queries[queryid].cat, queries[queryid].subcat, queries[queryid].item, uttid, uttstr]
+            platinumcheckrow2 = [queryid, queries[queryid].cat, queries[queryid].subcat, queries[queryid].item, uttid,
+                                 uttstr]
             print(tab.join(platinumcheckrow2), file=platinumcheckfile)
 
 platinumcheckfullname = platinumcheckfile.name
 (base, ext) = os.path.splitext(platinumcheckfullname)
-#platinumcheckxlfullname = base + '.xlsx'
+# platinumcheckxlfullname = base + '.xlsx'
 wb = mkworkbook(platinumcheckxlfullname, pcheaders, allrows, freeze_panes=(1, 9))
 wb.close()
 
-#compute the gold postresults
+# compute the gold postresults
 goldpostresults = {}
 goldcounters = {}
 allgoldmatches = {}
 for qid in goldscores:
     goldcounters[qid] = goldscores[qid][2]
-allgoldresults = AllResults(uttcount, goldcounters, goldpostresults, allgoldmatches, reffilename, [])
+allgoldresults = AllResults(uttcount, goldcounters, exactgoldscores, goldpostresults, allgoldmatches, reffilename, [],
+                            allannutts, annotationinput)
 dopostqueries(allgoldresults, postquerylist, queries)
 
-
 # compute the platinum postresults
 
 platinumpostresults = {}
 
-
-#print the postresults
+# print the postresults
 thepostresults = allresults.postresults
 for queryid in postquerylist:
     resultposval = str(getpostval(queryid, thepostresults))
@@ -927,30 +946,33 @@ def passfilter(rawexactresults, method):
     outworksheet.write_row(outrowctr, outstartcol, postrow)
     outrowctr += 1
 
-#gather overall results, 2 cases: (1)for defined original measure queries only; (2) for all original measure queries
+# gather overall results, 2 cases: (1)for defined original measure queries only; (2) for all original measure queries
 
-overallmethods = [(1, 'Overall (defined pre and core queries in the profile)', lambda x: is_preorcore(x) and query_exists(x) and query_inform(x)),
-                  (2, 'Overall (all pre and core queries in the profile)', lambda x: is_preorcore(x) and query_inform(x)),
-                  (3, 'Overall (original pre and core measures with defined queries only)', lambda x: is_preorcore(x) and query_exists(x)),
+overallmethods = [(1, 'Overall (defined pre and core queries in the profile)',
+                   lambda x: is_preorcore(x) and query_exists(x) and query_inform(x)),
+                  (2, 'Overall (all pre and core queries in the profile)',
+                   lambda x: is_preorcore(x) and query_inform(x)),
+                  (3, 'Overall (original pre and core measures with defined queries only)',
+                   lambda x: is_preorcore(x) and query_exists(x)),
                   (4, 'Overall (all original pre and core measures)', lambda x: is_preorcore(x))]
 
-logheader = ['datetime', 'treebank', 'scorenr,' 'R', 'P', 'F1', 'P-R', 'P-P', 'P-F1', 'GP-R', 'GP-P', 'GP-F1', 'ref', 'method']
+logheader = ['datetime', 'treebank', 'scorenr,' 'R', 'P', 'F1', 'P-R', 'P-P', 'P-F1', 'GP-R', 'GP-P', 'GP-F1', 'ref',
+             'method']
 logname = 'sastalog.txt'
 biglogfile = open(logname, 'a', encoding='utf8')
 
 exactlynow = datetime.datetime.now()
 now = exactlynow.replace(microsecond=0).isoformat()
 
-
 for (ctr, message, queryfunction) in overallmethods:
-    #gather resultscount
+    # gather resultscount
     resultscount = 0
     for queryid in results:
         thequery = queries[queryid]
         if thequery.original and queryfunction(thequery):
             resultscount += sum(results[queryid].values())
 
-    #gather goldcount
+    # gather goldcount
     goldcount = 0
     for queryid in goldscores:
         thequery = queries[queryid]
@@ -958,7 +980,7 @@ def passfilter(rawexactresults, method):
         if thequery.original and queryfunction(thequery):
             goldcount += sum(goldcounter.values())
 
-    #gather platinumcount
+    # gather platinumcount
     platinumcount = 0
     for queryid in platinumresults:
         if queryid in queries:
@@ -968,7 +990,7 @@ def passfilter(rawexactresults, method):
         else:
             SDLOGGER.warning('Query {} found in platinumresults but not in queries'.format(queryid))
 
-    #resultsgoldintersectiocount
+    # resultsgoldintersectiocount
     resultsgoldintersectioncount = 0
     for queryid in results:
         thequery = queries[queryid]
@@ -979,9 +1001,9 @@ def passfilter(rawexactresults, method):
                 resultsgoldintersectioncount += sum(intersection.values())
             else:
                 pass
-                #SDLOGGER.warning('Query {} found in results but not in goldscores'.format(queryid))
+                # SDLOGGER.warning('Query {} found in results but not in goldscores'.format(queryid))
 
-    #resultsplatinumintersectioncount
+    # resultsplatinumintersectioncount
     resultsplatinumintersectioncount = 0
     for queryid in results:
         thequery = queries[queryid]
@@ -991,9 +1013,9 @@ def passfilter(rawexactresults, method):
                 resultsplatinumintersectioncount += sum(intersection.values())
             else:
                 pass
-                #SDLOGGER.warning('queryid {} not in platinumresults'.format(queryid))
+                # SDLOGGER.warning('queryid {} not in platinumresults'.format(queryid))
 
-    #goldplatinumintersectioncount
+    # goldplatinumintersectioncount
     goldplatinumintersectioncount = 0
     for queryid in platinumresults:
         if queryid in queries:
@@ -1005,12 +1027,13 @@ def passfilter(rawexactresults, method):
                     goldplatinumintersectioncount += sum(intersection.values())
                 else:
                     pass
-                    #SDLOGGER.warning('Query {} in platinumresults but not in goldscores'.format(queryid))
+                    # SDLOGGER.warning('Query {} in platinumresults but not in goldscores'.format(queryid))
         else:
             SDLOGGER.warning('Query {} in platinumresults but not in queries'.format(queryid))
 
     (recall, precision, f1score) = getevalscores(resultscount, goldcount, resultsgoldintersectioncount)
-    (platinumrecall, platinumprecision, platinumf1score) = getevalscores(resultscount, platinumcount, resultsplatinumintersectioncount)
+    (platinumrecall, platinumprecision, platinumf1score) = getevalscores(resultscount, platinumcount,
+                                                                         resultsplatinumintersectioncount)
     (gprecall, gpprecision, gpf1score) = getevalscores(goldcount, platinumcount, goldplatinumintersectioncount)
 
     overallrow = ['', '', '', message, '', '', '', '', '', sf(recall), sf(precision), sf(f1score),
@@ -1028,17 +1051,14 @@ def passfilter(rawexactresults, method):
 
     print(tab.join(logrow), file=biglogfile)
 
-
 biglogfile.close()
 outfile.close()
 outworkbook.close()
 platinumoutfile.close()
 platinumcheckfile.close()
 
-
 resultscounts = scores2counts(results)
 
-
 countcomparison = get_comparison(resultscounts, goldcounts, queries)
 if countcomparison != []:
     countcomparisonfile = open(countcomparisonfilename, 'w', encoding='utf8')
@@ -1046,10 +1066,9 @@ def passfilter(rawexactresults, method):
     ccheaderstr = tab.join(ccheader)
     print(ccheaderstr, file=countcomparisonfile)
     for (q, r, g) in countcomparison:
-        if not(r == 0 and g == 0):
+        if not (r == 0 and g == 0):
             print(q, r, g, r - g, sep=tab, file=countcomparisonfile)
 
-
 definedqcount = qcount - undefinedqcount
 
 emptycounter = Counter()
@@ -1072,7 +1091,6 @@ def passfilter(rawexactresults, method):
 else:
     percentagecompletion2str = 'N/A'
 
-
 finalmessagetemplate1 = '{} measures, {} undefined, {} defined,  of which {} invalid.'
 finalmessagetemplate2 = '{} measures defined for a non empty gold score out of {} ({}).'
 finalmessagetemplate3 = '{} measures defined for a non empty gold count out of {} ({}).'
diff --git a/sentence_parser.py b/sentence_parser.py
index 861079a..e49004f 100644
--- a/sentence_parser.py
+++ b/sentence_parser.py
@@ -1,3 +1,4 @@
+from functools import lru_cache
 import socket
 from contextlib import contextmanager
 
@@ -5,6 +6,8 @@
 
 import config
 
+from alpinoparsing import escape_alpino_input
+
 
 class AlpinoSentenceParser:
     ''' Assumes a Alpino server is running on provided host:port,
@@ -19,6 +22,7 @@ def connection(self):
             raise
 
     def parse_sentence(self, sentence: str, buffer_size=8096) -> str:
+        sentence = escape_alpino_input(sentence)
         with self.connection() as s:
             sentence += '\n\n'   # flag end of file
             s.sendall(sentence.encode('utf-8'))
@@ -31,6 +35,7 @@ def parse_sentence(self, sentence: str, buffer_size=8096) -> str:
             return xml.decode('utf-8')
 
 
+@lru_cache(maxsize=128)
 def parse(sentence):
     ''' Wrapper for use in sastadev'''
     alp = AlpinoSentenceParser()
diff --git a/stapforms.py b/stapforms.py
new file mode 100644
index 0000000..ba2fdf5
--- /dev/null
+++ b/stapforms.py
@@ -0,0 +1,120 @@
+from io import BytesIO
+import os
+from shutil import copyfile, copyfileobj
+from collections import defaultdict
+
+from openpyxl import load_workbook
+from allresults import AllResults
+from config import SD_DIR, SDLOGGER
+
+scoresheetname = 'STAP 1 - 5'
+maxutt = 50
+zerocount = 0
+basexl = os.path.join(SD_DIR, 'form_templates', 'STAP Excel VUmc 2018.xlsx')
+
+NS = 'S001'
+OS = 'S002'
+PV = 'S003'
+SGG = 'S004'
+VT = 'S005'
+VD = 'S006'
+N = 'S007'
+BvBep = 'S008'
+zelfvnw3 = 'S009'
+BBp = 'S010'
+BBt = 'S011'
+BBo = 'S012'
+
+AG = 33
+Ucol = 21
+AF = 32
+
+# order in the Excel sheet: NS	OS	PV	SGG	VT	VD	N	BvBep	zelf. vnw. 3	BB p	BB t	BB o
+# i.e.
+sorteditemlist = [NS, OS, PV, SGG, VT, VD, N, BvBep, zelfvnw3, BBp, BBt, BBo]
+
+
+def data2rowtuples(data):
+    # data is  a dictionary with key item and as value a counter with (uttid, count) items
+    newdata = defaultdict(lambda: defaultdict(int))
+    for item in data:
+        for (uttid, count) in data[item].items():
+            newdata[uttid][item] += count
+
+    rowlist = []
+    uttidlist = [uttid for uttid in newdata]
+    sorteduttidlist = sorted(uttidlist)
+
+    for uttid in sorteduttidlist:
+        row = []
+        for item in sorteditemlist:
+            if item in newdata[uttid]:
+                row.append(newdata[uttid][item])
+            else:
+                row.append(zerocount)
+        rowlist.append((uttid, row))
+
+    return rowlist
+
+
+def makestapform(allresults, _, basexl=basexl, in_memory=False):
+    if not in_memory:
+        # copy the basexl to a new one with the appropriate name
+        (base, ext) = os.path.splitext(allresults.filename)
+        target = base + '_STAP-Form' + '.xlsx'
+
+        copyfile(basexl, target)
+
+        # open the workbook
+        wb = load_workbook(filename=target)
+    else:
+        target = BytesIO()
+        with open(basexl, 'rb') as source:
+            copyfileobj(fsrc=source, fdst=target)
+        wb = load_workbook(target)
+
+    # gather the results
+
+    # put the results in the right order
+    rowlist = data2rowtuples(allresults.coreresults)
+
+    ws = wb[scoresheetname]
+
+    cols = ['U', 'V', 'W', 'X', 'Y', 'Z', 'AA', 'AB', 'AC', 'AD', 'AE', 'AF']
+    # adapt the relevant sheet
+    for (uttid, row) in rowlist:
+        uttidrow = int(uttid) + 3
+        xluttctr = ws.cell(column=AG, row=uttidrow).value
+        uttidrowstr = str(uttidrow)
+        if int(uttid) == xluttctr:
+            for col, el in zip(cols, row):
+                # special proviso for PV in column W
+                if col == 'W':
+                    el = el - 1
+                cellkey = col + uttidrowstr
+                ws[cellkey] = el
+        else:
+            SDLOGGER.error('Unexpected utterance id encountered: {}'.format(uttid))
+
+    # save the workbook
+    wb.save(target)
+    wb.close()
+
+    # return the workbook- not needed
+    return target
+
+
+def test():
+    coreresults = {NS: {'1': 3}, OS: {'1': 2, '2': 6}}
+    postresults = {}
+    allmatches = {}
+    fn = 'STAP42.xml'
+    analysedtrees = {}
+    allresults = AllResults(0, coreresults, postresults, allmatches, fn, analysedtrees)
+    fnbase, _ = os.path.splitext(fn)
+    formxl = fnbase + '_form' + '.xlsx'
+    makestapform(allresults, _)
+
+
+if __name__ == '__main__':
+    test()
diff --git a/stringfunctions.py b/stringfunctions.py
index ddf2ad3..684d672 100644
--- a/stringfunctions.py
+++ b/stringfunctions.py
@@ -1,4 +1,5 @@
 import re
+import unicodedata
 
 vertbar = '|'
 space = ' '
@@ -18,12 +19,12 @@
 tremavowels = 'äëïöüÿ'
 circumflexvowels = 'âêîôû\u0177'
 
-
 consonants = 'bcdfghjklmnpqrstvwxz\u00E7'  # \u00E7 is c cedilla
 dutch_base_vowels = barevowels + aiguvowels + gravevowels + tremavowels + circumflexvowels
 vowels = dutch_base_vowels
 dutch_base_diphthongs = ['aa', 'ee', 'ie', 'oo', 'uu', 'ij', 'ei', 'au', 'ou', 'ui', 'eu', 'oe']
-dutch_y_diphthongs = ['y' + d for d in dutch_base_vowels] + [d + 'y' for d in dutch_base_vowels]  # ryen gaat nog fout ye alleen samen nemen aan begin van woord
+dutch_y_diphthongs = ['y' + d for d in dutch_base_vowels] + [d + 'y' for d in
+                                                             dutch_base_vowels]  # ryen gaat nog fout ye alleen samen nemen aan begin van woord
 dutch_y_triphthongs = ['y' + d for d in dutch_base_diphthongs] + [d + 'y' for d in dutch_base_diphthongs]
 dutch_trema_diphthongs = ['äa', "ëe", 'ïe', 'öo', 'üu', 'ëi']
 dutch_diphthongs = dutch_base_diphthongs + dutch_y_diphthongs + dutch_trema_diphthongs
@@ -120,13 +121,13 @@ def delhyphenprefix(word, inlexicon):
         mwinlex = inlexicon(mainword)
         pfinlex = inlexicon(prefix)
         deduppf = barededup(word)
-        if prefix in hyphenprefixes and mwinlex:       # the word starts wit ha known prefix that uses hyphen such as ex (ex-vrouw)
+        if prefix in hyphenprefixes and mwinlex:  # the word starts wit ha known prefix that uses hyphen such as ex (ex-vrouw)
             result = []
         elif mainword.startswith(prefix) and mwinlex:  # this is the core case  e.g. ver-verkoop
             result = [mainword]
-        elif pfinlex and mwinlex:                       # for compounds with a hyphen: kat-oorbellen, generaal-majoor and for tennis-baan(?)
+        elif pfinlex and mwinlex:  # for compounds with a hyphen: kat-oorbellen, generaal-majoor and for tennis-baan(?)
             result = []
-        elif mainword.startswith(deduppf) and mwinlex:   # vver-verkoop
+        elif mainword.startswith(deduppf) and mwinlex:  # vver-verkoop
             result = [mainword]
         else:
             result = []
@@ -150,8 +151,8 @@ def dehyphenate(word):
         head = word[0:1]
         tail = word[1:]
         if head == hyphen:
-            #newresult = head + tail
-            #results.append(newresult)
+            # newresult = head + tail
+            # results.append(newresult)
             rightresults = dehyphenate(tail)
             for rightresult in rightresults:
                 newresult = head + rightresult
@@ -232,13 +233,42 @@ def aigu(c):
     result = aiguvowels[theindex]
 
 
+def testcondition(condition, word):
+    if condition(word):
+        print('OK:{}'.format(word))
+    else:
+        print('NO:{}'.format(word))
+
+
+def test():
+    monosyllabicwords = ['baai', 'eeuw', 'mooi', 'aap', 'deed', 'Piet', 'noot', 'duut', 'rijd', 'meid', 'rauw', 'koud',
+                         'buit', 'reuk', 'boer', 'la', 'de', 'hik', 'dop', 'dut',
+                         'yell', 'ry', 'Händl', 'Pëtr', 'bït', 'Köln', 'Kür', 'Tÿd']
+    disyllabicwords = ['baaien', 'eeuwen', 'mooie', 'aapje', 'deden', 'Pietje', 'noten', 'dut', 'rijden', 'meiden',
+                       'rauwe', 'koude', 'buitje', 'reuken', 'boeren', 'laden', 'dender',
+                       'hikken', 'doppen', 'dutten', 'yellen', 'ryen', 'Händler', 'Pëtri', 'bïty', 'Kölner', 'Kürer',
+                       'Tÿding', 'naäap', 'meeëten', 'ciën', 'coöp']
+
+    for word in monosyllabicwords:
+        testcondition(monosyllabic, word.lower())
+    for word in disyllabicwords:
+        testcondition(monosyllabic, word.lower())
+
+    for word in monosyllabicwords + disyllabicwords:
+        ms = syllableheadsre.finditer(word)
+        print(word, end=' -- ')
+        for m in ms:
+            print(m.group(0), end=', ')
+        print('')
+
+
 def nono(inval):
     result = (inval is None) or (inval == 0) or (inval == []) or (inval == '')
     return result
 
 
 def nonnull(inval):
-    result = not(nono(inval))
+    result = not (nono(inval))
     return result
 
 
@@ -254,3 +284,15 @@ def string2list(liststr):
         core = liststr[1:-1]
         parts = core.split(comma)
         return parts
+
+
+def realwordstring(w):
+    if len(w) != 1:
+        result = True
+    else:
+        result = not unicodedata.category(w).startswith('P')
+    return result
+
+
+if __name__ == '__main__':
+    test()
diff --git a/targets.py b/targets.py
index d0116a6..50f1c7d 100644
--- a/targets.py
+++ b/targets.py
@@ -1,4 +1,3 @@
-
 target_intarget, target_xsid, target_all, target_byrole, target_bysyn = 0, 1, 2, 3, 4
 intargetxpath = '//meta[@name="intarget"]'
 xsidxpath = '//meta[@name="xsid"]'
@@ -17,9 +16,9 @@ def get_targets(treebank):
     roles = treebank.xpath(rolevalxpath)
     targetrolesfound = any(map(lambda x: x.lower() in targetroles, roles))
     synannotations = treebank.xpath(synxpath)
-    if synannotations != []:
-        result = target_bysyn
-    elif xsids != []:
+    # if synannotations != []:
+    #    result = target_bysyn
+    if xsids != []:
         result = target_xsid
     elif intargets != []:
         result = target_intarget
diff --git a/treebankfunctions.py b/treebankfunctions.py
index 6917de3..ca5e4b1 100644
--- a/treebankfunctions.py
+++ b/treebankfunctions.py
@@ -2,15 +2,16 @@
 various treebank functions
 
 '''
+
+import sys
 import re
+import logging
 from copy import copy, deepcopy
-
 from lxml import etree
-
-#from lexicon import informlexiconpos, isa_namepart_uc, informlexicon, isa_namepart
-import lexicon as lex
 from config import SDLOGGER
 from stringfunctions import allconsonants
+# from lexicon import informlexiconpos, isa_namepart_uc, informlexicon, isa_namepart
+import lexicon as lex
 
 
 class Metadata:
@@ -42,8 +43,7 @@ def md2XMLElement(self):
 numberpattern = r'^[\d\.,]+$'
 numberre = re.compile(numberpattern)
 
-
-#next 3 derived from the alpino dtd
+# next 3 derived from the alpino dtd
 allrels = ['hdf', 'hd', 'cmp', 'sup', 'su', 'obj1', 'pobj1', 'obj2', 'se', 'pc', 'vc', 'svp', 'predc', 'ld', 'me',
            'predm', 'obcomp', 'mod', 'body', 'det', 'app', 'whd', 'rhd', 'cnj', 'crd', 'nucl', 'sat', 'tag', 'dp',
            'top', 'mwp', 'dlink', '--']
@@ -78,10 +78,10 @@ def md2XMLElement(self):
                         'evenveel', 'geen', 'ieder', 'meer', 'meerdere', 'menig', 'minder', 'minst', 'sommig',
                         'teveel', 'tevéél', 'veel', 'weinig', 'één', 'keiveel'}
 
-#uttidquery = "//meta[@name='uttid']/@value"
+# uttidquery = "//meta[@name='uttid']/@value"
 sentidxpath = './/sentence/@sentid'
 
-#altquery = "//meta[@name='alt']/@value"
+# altquery = "//meta[@name='alt']/@value"
 metaquerytemplate = "//meta[@name='{}']/@value"
 sentencexpathquery = "//sentence/text()"
 
@@ -193,6 +193,21 @@ def getuttid(syntree):
     return result
 
 
+def getuttno(syntree):
+    result = getmeta(syntree, 'uttno')
+    if result is None:
+        result = '0'
+    return result
+
+def getuttidorno(syntree):
+    result = getmeta(syntree, 'xsid')
+    if result is None:
+        result = getmeta(syntree, 'uttno')
+    if result is None:
+        result = '0'
+    return result
+
+
 def getxsid(syntree):
     result = getmeta(syntree, 'xsid')
     if result is None:
@@ -550,7 +565,7 @@ def mark(str):
 
 
 def getwordpositions(matchtree, syntree):
-    #nothing special needs to be done for index nodes since they also have begin and end
+    # nothing special needs to be done for index nodes since they also have begin and end
     positions = []
     for node in matchtree.iter():
         if 'end' in node.attrib:
@@ -588,7 +603,8 @@ def addmetadata(stree, meta):
             metadatanode = etree.Element('metadata')
             stree.append(metadatanode)
         else:
-            metadatanode = metadatanodes[0]  # we append to the first metadata node if there would be multiple (which should not be the case)
+            metadatanode = metadatanodes[
+                0]  # we append to the first metadata node if there would be multiple (which should not be the case)
         metadatanode.append(meta)
         result = stree
     return result
@@ -714,8 +730,8 @@ def asta_recognised_nounnode(node):
     result = result or sasta_long(node)
     result = result or recognised_wordnodepos(node, pos)
     result = result or recognised_lemmanodepos(node, pos)
-    result = result and not(all_lower_consonantsnode(node))
-    result = result and not(short_nucl_n(node))
+    result = result and not (all_lower_consonantsnode(node))
+    result = result and not (short_nucl_n(node))
     return result
 
 
@@ -727,8 +743,8 @@ def asta_recognised_wordnode(node):
     result = result or recognised_wordnode(node)
     result = result or recognised_lemmanode(node)
     result = result or isnumber(node)
-    result = result and not(all_lower_consonantsnode(node))
-    result = result and not(short_nucl_n(node))
+    result = result and not (all_lower_consonantsnode(node))
+    result = result and not (short_nucl_n(node))
     return result
 
 
@@ -751,7 +767,8 @@ def short_nucl_n(node):
     return result
 
 
-sasta_pseudonyms = ['NAAM', 'VOORNAAM', 'ACHTERNAAM', 'ZIEKENHUIS', 'STRAAT', 'PLAATS', 'PLAATSNAAM', 'KIND', 'BEROEP', 'OPLEIDING']
+sasta_pseudonyms = ['NAAM', 'VOORNAAM', 'ACHTERNAAM', 'ZIEKENHUIS', 'STRAAT', 'PLAATS', 'PLAATSNAAM', 'KIND', 'BEROEP',
+                    'OPLEIDING']
 pseudonym_patternlist = [r'^{}\d?$'.format(el) for el in sasta_pseudonyms]
 pseudonym_pattern = vertbar.join(pseudonym_patternlist)
 pseudonymre = re.compile(pseudonym_pattern)
@@ -768,14 +785,15 @@ def recognised_wordnodepos(node, pos):
     word = getattval(node, 'word')
     lcword = word.lower()
     result = lex.informlexiconpos(word, pos) or lex.informlexiconpos(lcword, pos) or \
-        iscompound(node) or isdiminutive(node) or lex.isa_namepart_uc(word)
+             iscompound(node) or isdiminutive(node) or lex.isa_namepart_uc(word)
     return result
 
 
 def recognised_wordnode(node):
     word = getattval(node, 'word')
     lcword = word.lower()
-    result = lex.informlexicon(word) or lex.informlexicon(lcword) or iscompound(node) or isdiminutive(node) or lex.isa_namepart(word)
+    result = lex.informlexicon(word) or lex.informlexicon(lcword) or iscompound(node) or isdiminutive(
+        node) or lex.isa_namepart(word)
     return result
 
 
@@ -826,10 +844,10 @@ def simpleshow2(stree, showchildren=True):
         if index != '':
             print(nodeformat.format(rel, '', indexstr), end=' ')
         else:
-            #print('top', end=' ')
+            # print('top', end=' ')
             for child in stree:
                 simpleshow2(child)
-            #print(']', end=' ')
+            # print(']', end=' ')
 
 
 def showflatxml(elem):
@@ -890,14 +908,15 @@ def nodecopy(node):
 
 
 def bareindexnode(node):
-    result = terminal(node) and 'index' in node.attrib and 'postag' not in node.attrib and 'cat' not in node.attrib and 'pt' not in node.attrib and 'pos' not in node.attrib
-    #print(props2str(get_node_props(node)), result, file=sys.stderr)
-    return(result)
+    result = terminal(
+        node) and 'index' in node.attrib and 'postag' not in node.attrib and 'cat' not in node.attrib and 'pt' not in node.attrib and 'pos' not in node.attrib
+    # print(props2str(get_node_props(node)), result, file=sys.stderr)
+    return (result)
 
 
 def terminal(node):
     result = node is not None and len(node) == 0
-    return(result)
+    return (result)
 
 
 def indextransform(stree):
@@ -922,16 +941,16 @@ def indextransform2(stree, indexednodesmap):
             therel = getattval(stree, 'rel')
             newstree = deepcopy(indexednodesmap[theindex])
             newstree.attrib['rel'] = therel
-            #simpleshow(newstree)
-            #print()
+            # simpleshow(newstree)
+            # print()
         else:
             newstree = nodecopy(stree)
-            #simpleshow(newstree)
-            #print(id(stree))
-            #print(id(newstree))
-            #print(len(newstree))
-            #print(id(newstree.getparent()))
-            #print(id(None))
+            # simpleshow(newstree)
+            # print(id(stree))
+            # print(id(newstree))
+            # print(len(newstree))
+            # print(id(newstree.getparent()))
+            # print(id(None))
             for child in stree:
                 newchild = indextransform2(child, indexednodesmap)
                 newstree.append(newchild)
@@ -951,7 +970,7 @@ def getstree(fullname):
     except OSError as e:
         SDLOGGER.error('OS Error: {}; file: {}'.format(e, fullname))
         return None
-    except Exception:
+    except:
         SDLOGGER.error('Error: Unknown error in file {}'.format(fullname))
         return None
 
@@ -978,6 +997,137 @@ def getstree(fullname):
             return tree
 
 
+streestrings = {}
+streestrings[1] = '''
+<alpino_ds version="1.6">
+  <parser cats="1" skips="5" />
+  <node begin="0" cat="top" end="8" id="0" rel="top">
+    <node begin="0" conjtype="neven" end="1" frame="complementizer(root)" his="robust_skip" id="1" lcat="--" lemma="en" pos="comp" postag="VG(neven)" pt="vg" rel="--" root="en" sc="root" sense="en" word="en"/>
+    <node begin="1" end="2" frame="--" genus="zijd" getal="ev" graad="basis" his="robust_skip" id="2" lcat="--" lemma="uhm" naamval="stan" ntype="soort" pos="--" postag="N(soort,ev,basis,zijd,stan)" pt="n" rel="--" root="uhm" sense="uhm" word="uhm"/>
+    <node begin="2" conjtype="neven" end="3" frame="conj(en)" his="robust_skip" id="3" lcat="--" lemma="en" pos="vg" postag="VG(neven)" pt="vg" rel="--" root="en" sense="en" word="en"/>
+    <node begin="3" end="4" frame="--" genus="zijd" getal="ev" graad="basis" his="robust_skip" id="4" lcat="--" lemma="uhm" naamval="stan" ntype="soort" pos="--" postag="N(soort,ev,basis,zijd,stan)" pt="n" rel="--" root="uhm" sense="uhm" word="uhm"/>
+    <node begin="4" case="nom" def="def" end="5" frame="pronoun(nwh,thi,sg,de,nom,def)" gen="de" genus="masc" getal="ev" his="robust_skip" id="5" lcat="--" lemma="hij" naamval="nomin" num="sg" pdtype="pron" per="thi" persoon="3" pos="pron" postag="VNW(pers,pron,nomin,vol,3,ev,masc)" pt="vnw" rel="--" root="hij" sense="hij" status="vol" vwtype="pers" wh="nwh" word="hij"/>
+    <node begin="5" cat="smain" end="8" id="6" rel="--">
+      <node begin="5" case="nom" def="def" end="6" frame="pronoun(nwh,thi,sg,de,nom,def)" gen="de" genus="masc" getal="ev" his="normal" his_1="normal" id="7" lcat="np" lemma="hij" naamval="nomin" num="sg" pdtype="pron" per="thi" persoon="3" pos="pron" postag="VNW(pers,pron,nomin,vol,3,ev,masc)" pt="vnw" rel="su" rnum="sg" root="hij" sense="hij" status="vol" vwtype="pers" wh="nwh" word="hij"/>
+      <node begin="6" end="7" frame="verb(unacc,sg_heeft,intransitive)" his="normal" his_1="normal" id="8" infl="sg_heeft" lcat="smain" lemma="zijn" pos="verb" postag="WW(pv,tgw,ev)" pt="ww" pvagr="ev" pvtijd="tgw" rel="hd" root="ben" sc="intransitive" sense="ben" stype="declarative" tense="present" word="is" wvorm="pv"/>
+      <node begin="7" end="8" frame="adverb" his="normal" his_1="normal" id="9" lcat="advp" lemma="nogal" pos="adv" postag="BW()" pt="bw" rel="mod" root="nogal" sense="nogal" word="nogal"/>
+    </node>
+  </node>
+  <sentence sentid="32">en uhm en uhm hij hij is nogal</sentence>
+<metadata>
+<meta type="text" name="charencoding" value="UTF8" />
+<meta type="text" name="childage" value="" />
+<meta type="text" name="childmonths" value="" />
+<meta type="text" name="comment" value="##META text samplenaam = ASTA-06" />
+<meta type="text" name="session" value="ASTA_sample_06" />
+<meta type="text" name="origutt" value="en uhm en uhm hij hij is nogal " />
+<meta type="text" name="parsefile" value="Unknown_corpus_ASTA_sample_06_u00000000046.xml" />
+<meta type="text" name="speaker" value="PMA" />
+<meta type="int" name="uttendlineno" value="85" />
+<meta type="int" name="uttid" value="32" />
+<meta type="int" name="uttstartlineno" value="85" />
+<meta type="text" name="name" value="pma" />
+<meta type="text" name="SES" value="" />
+<meta type="text" name="age" value="" />
+<meta type="text" name="custom" value="" />
+<meta type="text" name="education" value="" />
+<meta type="text" name="group" value="" />
+<meta type="text" name="language" value="nld" />
+<meta type="text" name="months" value="" />
+<meta type="text" name="role" value="Other" />
+<meta type="text" name="sex" value="" />
+<meta type="text" name="xsid" value="32" />
+<meta type="int" name="uttno" value="46" />
+</metadata>
+</alpino_ds>
+'''
+
+streestrings[2] = '''
+<alpino_ds version="1.6">
+  <parser cats="3" skips="0" />
+  <node begin="0" cat="top" end="17" id="0" rel="top">
+    <node begin="0" cat="du" end="16" id="1" rel="--">
+      <node begin="0" cat="smain" end="3" id="2" rel="dp">
+        <node begin="0" case="nom" def="def" end="1" frame="pronoun(nwh,fir,sg,de,nom,def)" gen="de" getal="ev" his="normal" his_1="normal" id="3" lcat="np" lemma="ik" naamval="nomin" num="sg" pdtype="pron" per="fir" persoon="1" pos="pron" postag="VNW(pers,pron,nomin,vol,1,ev)" pt="vnw" rel="su" rnum="sg" root="ik" sense="ik" status="vol" vwtype="pers" wh="nwh" word="ik"/>
+        <node begin="1" end="2" frame="verb(hebben,sg1,transitive_ndev)" his="normal" his_1="normal" id="4" infl="sg1" lcat="smain" lemma="hebben" pos="verb" postag="WW(pv,tgw,ev)" pt="ww" pvagr="ev" pvtijd="tgw" rel="hd" root="heb" sc="transitive_ndev" sense="heb" stype="declarative" tense="present" word="heb" wvorm="pv"/>
+        <node begin="2" case="both" def="indef" end="3" frame="pronoun(nwh,thi,sg,both,both,indef,strpro)" gen="both" his="normal" his_1="normal" id="5" lcat="np" lemma="één" num="sg" numtype="hoofd" per="thi" pos="pron" positie="vrij" postag="TW(hoofd,vrij)" pt="tw" rel="obj1" rnum="sg" root="één" sense="één" special="strpro" wh="nwh" word="een"/>
+      </node>
+      <node begin="3" cat="smain" end="6" id="6" rel="dp">
+        <node begin="3" case="nom" def="def" end="4" frame="pronoun(nwh,fir,sg,de,nom,def)" gen="de" getal="ev" his="normal" his_1="normal" id="7" lcat="np" lemma="ik" naamval="nomin" num="sg" pdtype="pron" per="fir" persoon="1" pos="pron" postag="VNW(pers,pron,nomin,vol,1,ev)" pt="vnw" rel="su" rnum="sg" root="ik" sense="ik" status="vol" vwtype="pers" wh="nwh" word="ik"/>
+        <node begin="4" end="5" frame="verb(hebben,sg1,transitive_ndev)" his="normal" his_1="normal" id="8" infl="sg1" lcat="smain" lemma="hebben" pos="verb" postag="WW(pv,tgw,ev)" pt="ww" pvagr="ev" pvtijd="tgw" rel="hd" root="heb" sc="transitive_ndev" sense="heb" stype="declarative" tense="present" word="heb" wvorm="pv"/>
+        <node begin="5" case="both" def="indef" end="6" frame="pronoun(nwh,thi,sg,both,both,indef,strpro)" gen="both" his="normal" his_1="normal" id="9" lcat="np" lemma="één" num="sg" numtype="hoofd" per="thi" pos="pron" positie="vrij" postag="TW(hoofd,vrij)" pt="tw" rel="obj1" rnum="sg" root="één" sense="één" special="strpro" wh="nwh" word="een"/>
+      </node>
+      <node begin="6" cat="smain" end="16" id="10" rel="dp">
+        <node begin="6" case="nom" def="def" end="7" frame="pronoun(nwh,fir,sg,de,nom,def)" gen="de" getal="ev" his="normal" his_1="normal" id="11" lcat="np" lemma="ik" naamval="nomin" num="sg" pdtype="pron" per="fir" persoon="1" pos="pron" postag="VNW(pers,pron,nomin,vol,1,ev)" pt="vnw" rel="su" rnum="sg" root="ik" sense="ik" status="vol" vwtype="pers" wh="nwh" word="ik"/>
+        <node begin="7" end="8" frame="verb(hebben,sg1,transitive_ndev)" his="normal" his_1="normal" id="12" infl="sg1" lcat="smain" lemma="hebben" pos="verb" postag="WW(pv,tgw,ev)" pt="ww" pvagr="ev" pvtijd="tgw" rel="hd" root="heb" sc="transitive_ndev" sense="heb" stype="declarative" tense="present" word="heb" wvorm="pv"/>
+        <node begin="8" cat="np" end="16" id="13" rel="obj1">
+          <node begin="8" end="9" frame="determiner(een)" his="normal" his_1="normal" id="14" infl="een" lcat="detp" lemma="een" lwtype="onbep" naamval="stan" npagr="agr" pos="det" postag="LID(onbep,stan,agr)" pt="lid" rel="det" root="een" sense="een" word="een"/>
+          <node begin="9" end="10" frame="noun(de,count,bare_meas)" gen="de" genus="zijd" getal="ev" graad="basis" his="normal" his_1="normal" id="15" lcat="np" lemma="man" naamval="stan" ntype="soort" num="bare_meas" pos="noun" postag="N(soort,ev,basis,zijd,stan)" pt="n" rel="hd" rnum="sg" root="man" sense="man" word="man"/>
+          <node begin="10" cat="rel" end="16" id="16" rel="mod">
+            <node begin="10" cat="pp" end="12" id="17" index="1" rel="rhd">
+              <node begin="10" end="11" frame="preposition(met,[mee,[en,al]])" his="normal" his_1="normal" id="18" lcat="pp" lemma="met" pos="prep" postag="VZ(init)" pt="vz" rel="hd" root="met" sense="met" vztype="init" word="met"/>
+              <node begin="11" case="obl" end="12" frame="rel_pronoun(both,obl)" gen="both" getal="getal" his="normal" his_1="normal" id="19" lcat="np" lemma="wie" naamval="stan" pdtype="pron" persoon="3p" pos="pron" postag="VNW(vb,pron,stan,vol,3p,getal)" pt="vnw" rel="obj1" rnum="sg" root="wie" sense="wie" status="vol" vwtype="vb" wh="rel" word="wie"/>
+            </node>
+            <node begin="10" cat="ssub" end="16" id="20" rel="body">
+              <node begin="12" case="nom" def="def" end="13" frame="pronoun(nwh,fir,sg,de,nom,def)" gen="de" getal="ev" his="normal" his_1="normal" id="21" index="2" lcat="np" lemma="ik" naamval="nomin" num="sg" pdtype="pron" per="fir" persoon="1" pos="pron" postag="VNW(pers,pron,nomin,vol,1,ev)" pt="vnw" rel="su" rnum="sg" root="ik" sense="ik" status="vol" vwtype="pers" wh="nwh" word="ik"/>
+              <node begin="13" end="14" frame="verb(hebben,modal_not_u,modifier(aux(inf)))" his="normal" his_1="normal" id="22" infl="modal_not_u" lcat="ssub" lemma="willen" pos="verb" postag="WW(pv,tgw,ev)" pt="ww" pvagr="ev" pvtijd="tgw" rel="hd" root="wil" sc="modifier(aux(inf))" sense="wil" tense="present" word="wil" wvorm="pv"/>
+              <node begin="10" cat="inf" end="16" id="23" rel="vc">
+                <node begin="12" end="13" id="24" index="2" rel="su"/>
+                <node begin="14" buiging="zonder" end="15" frame="verb(zijn,inf(no_e),aux(inf))" his="normal" his_1="normal" id="25" infl="inf(no_e)" lcat="inf" lemma="gaan" pos="verb" positie="vrij" postag="WW(inf,vrij,zonder)" pt="ww" rel="hd" root="ga" sc="aux(inf)" sense="ga" word="gaan" wvorm="inf"/>
+                <node begin="10" cat="inf" end="16" id="26" rel="vc">
+                  <node begin="10" end="12" id="27" index="1" rel="pc"/>
+                  <node begin="12" end="13" id="28" index="2" rel="su"/>
+                  <node begin="15" buiging="zonder" end="16" frame="verb(zijn,inf,pc_pp(met))" his="normal" his_1="normal" id="29" infl="inf" lcat="inf" lemma="trouwen" pos="verb" positie="vrij" postag="WW(inf,vrij,zonder)" pt="ww" rel="hd" root="trouw" sc="pc_pp(met)" sense="trouw-met" word="trouwen" wvorm="inf"/>
+                </node>
+              </node>
+            </node>
+          </node>
+        </node>
+      </node>
+    </node>
+    <node begin="16" end="17" frame="--" genus="zijd" getal="ev" graad="basis" his="skip" id="30" lcat="--" lemma="uhm" naamval="stan" ntype="soort" pos="--" postag="N(soort,ev,basis,zijd,stan)" pt="n" rel="--" root="uhm" sense="uhm" word="uhm"/>
+  </node>
+  <sentence sentid="29">ik heb een ik heb een ik heb een man met wie ik wil gaan trouwen uhm</sentence>
+<metadata>
+<meta type="text" name="charencoding" value="UTF8" />
+<meta type="text" name="childage" value="" />
+<meta type="text" name="childmonths" value="" />
+<meta type="text" name="comment" value="##META text samplenaam = ASTA-06" />
+<meta type="text" name="session" value="ASTA_sample_06" />
+<meta type="text" name="origutt" value="ik heb een ik heb een ik heb een man met wie ik wil gaan trouwen uhm " />
+<meta type="text" name="parsefile" value="Unknown_corpus_ASTA_sample_06_u00000000042.xml" />
+<meta type="text" name="speaker" value="PMA" />
+<meta type="int" name="uttendlineno" value="78" />
+<meta type="int" name="uttid" value="29" />
+<meta type="int" name="uttstartlineno" value="78" />
+<meta type="text" name="name" value="pma" />
+<meta type="text" name="SES" value="" />
+<meta type="text" name="age" value="" />
+<meta type="text" name="custom" value="" />
+<meta type="text" name="education" value="" />
+<meta type="text" name="group" value="" />
+<meta type="text" name="language" value="nld" />
+<meta type="text" name="months" value="" />
+<meta type="text" name="role" value="Other" />
+<meta type="text" name="sex" value="" />
+<meta type="text" name="xsid" value="29" />
+<meta type="int" name="uttno" value="42" />
+</metadata>
+</alpino_ds>
+'''
+
+strees = {}
+for el in streestrings:
+    strees[el] = etree.fromstring(streestrings[el])
+
+
+def test():
+    for el in strees:
+        stree = strees[el]
+        lmc = lastmainclauseof(stree)
+        print(getmarkedutt(lmc, stree))
+
+
 def getsentid(stree):
     sentidlist = stree.xpath(sentidxpath)
     if sentidlist == []:
@@ -988,6 +1138,15 @@ def getsentid(stree):
     return uttid
 
 
+def testindextransform():
+    for el in strees:
+        stree = strees[el]
+        print(el)
+        simpleshow(stree)
+        newstree = indextransform(stree)
+        simpleshow(newstree)
+
+
 def adaptsentence(stree):
     # adapt the sentence
     # find the sentence element's parent and its index
@@ -999,7 +1158,7 @@ def adaptsentence(stree):
     sentencenodeparent = sentencenode.getparent()
     sentencenodeindex = sentencenodeparent.index(sentencenode)
     sentencenodeparent.remove(sentencenode)
-    #del sentencenodeparent[sentencenodeindex]
+    # del sentencenodeparent[sentencenodeindex]
     theyield = getyield(stree)
     theyieldstr = space.join(theyield)
     newsentence = etree.Element('sentence')
@@ -1018,24 +1177,24 @@ def transplant_node(node1, node2, stree):
     :param stree: tree in which the replacement takes place
     :return: None, the stree input parameter is modified
     '''
-    #find the parent of node1
-    #determine the index of node1
+    # find the parent of node1
+    # determine the index of node1
     sentid = getsentid(stree)
     parentindex = get_parentandindex(node1, stree)
     if parentindex is None:
         result = stree
     else:
         parent, index = parentindex
-        #SDLOGGER.debug(simpleshow(parent))
+        # SDLOGGER.debug(simpleshow(parent))
         del parent[index]
-        #SDLOGGER.debug(simpleshow(parent))
+        # SDLOGGER.debug(simpleshow(parent))
         parent.insert(index, node2)
-        #SDLOGGER.debug(simpleshow(parent))
+        # SDLOGGER.debug(simpleshow(parent))
         result = stree
-        #SDLOGGER.debug(simpleshow(stree))
+        # SDLOGGER.debug(simpleshow(stree))
 
-    #adapt the sentence
-    #find the sentence element's parent and its index
+    # adapt the sentence
+    # find the sentence element's parent and its index
     sentencenode = stree.find('.//sentence')
     sentencenodeparent = sentencenode.getparent()
     sentencenodeindex = sentencenodeparent.index(sentencenode)
@@ -1066,7 +1225,7 @@ def get_parentandindex(node, stree):
             return (stree, idx)
         else:
             chresult = get_parentandindex(node, child)
-            if chresult is not None:
+            if chresult != None:
                 return chresult
         idx += 1
     return None
@@ -1081,14 +1240,14 @@ def getspan(node):
 
 def lbrother(node, tree):
     nodebegin = getattval(node, 'begin')
-    def condition(n): return getattval(n, 'end') == nodebegin
+    condition = lambda n: getattval(n, 'end') == nodebegin
     result = findfirstnode(tree, condition)
     return result
 
 
 def rbrother(node, tree):
     nodeend = getattval(node, 'end')
-    def condition(n): return getattval(n, 'begin') == nodeend
+    condition = lambda n: getattval(n, 'begin') == nodeend
     result = findfirstnode(tree, condition)
     return result
 
@@ -1178,7 +1337,7 @@ def getxmetatreepositions(tree, xmetaname, poslistname='annotationposlist'):
     return result
 
 
-#topendxpath = './/node[@cat="top"]/@end'
+# topendxpath = './/node[@cat="top"]/@end'
 wordnodemodel = './/node[(@pt or (not(@pt) and not(@cat) and @index)) and @begin="{}"]'
 
 
@@ -1211,13 +1370,13 @@ def deletewordnode(tree, begin):
         if thenode is not None:
             thenode.getparent().remove(thenode)
         # renumber begins and ends must be done outside this functions when all deletions have been done;
-        #updatebeginend(newtree, begin)
+        # updatebeginend(newtree, begin)
 
         # adapt the cleantokenisation
         # done outside this function
 
-        #adapt the sentence: do this after all deletions
-        #newtree = adaptsentence(newtree)
+        # adapt the sentence: do this after all deletions
+        # newtree = adaptsentence(newtree)
 
         return newtree
 
@@ -1234,7 +1393,7 @@ def deletewordnodes(tree, begins):
     if newtree is None:
         return newtree
     else:
-        #wordnodexpath = wordnodemodel.format(str(begin))
+        # wordnodexpath = wordnodemodel.format(str(begin))
         thenodes = []
         for begin in begins:
             thenodes += newtree.xpath(wordnodemodel.format(str(begin)))
@@ -1251,7 +1410,7 @@ def deletewordnodes(tree, begins):
         # adapt the cleantokenisation
         # done outside this function
 
-        #adapt the sentence
+        # adapt the sentence
         newtree = adaptsentence(newtree)
 
         return newtree
@@ -1270,7 +1429,7 @@ def update_cleantokenisation(stree, begin):
     oldcleanedtokposmeta = find1(stree, '//xmeta[@name="cleanedtokenpositions"]')
     cleanedtokposmeta = copy(oldcleanedtokposmeta)
     parent = oldcleanedtokmeta.getparent()
-    if not(cleanedtokmeta is None and cleanedtokposmeta is None):
+    if not (cleanedtokmeta is None and cleanedtokposmeta is None):
         cleanedtokstr = cleanedtokmeta.attrib['annotationwordlist']
         cleanedtok = strliststr2list(cleanedtokstr)
         newcleanedtok = cleanedtok[:intbegin] + cleanedtok[intbegin + 1:]
@@ -1381,3 +1540,8 @@ def add_metadata(intree, metalist):
     for meta in metalist:
         metadata.append(meta.toElement())
     return tree
+
+
+if __name__ == '__main__':
+    # test()
+    testindextransform()