Skip to content

Commit

Permalink
put hostory calls outside of correcttreebank
Browse files Browse the repository at this point in the history
  • Loading branch information
JanOdijk committed Aug 1, 2024
1 parent 56c3491 commit 3a65bf2
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 91 deletions.
22 changes: 20 additions & 2 deletions src/sastadev/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,12 @@
loggingfolder, outtreebanksfolder, permprefix, platinumsuffix,
platinumeditedsuffix,
resultsfolder, silverfolder, silverpermfolder, silversuffix)
from sastadev.correcttreebank import (correcttreebank, corrn, errorwbheader, validcorroptions)
from sastadev.correcttreebank import (correcttreebank, corr0, corrn, errorwbheader, validcorroptions)
from sastadev.counterfunctions import counter2liststr
from sastadev.external_functions import str2functionmap
from sastadev.goldcountreader import get_goldcounts
from sastadev.history import (donefiles, donefilesfullname, gathercorrections, mergecorrections, putcorrections,
putdonefilenames, samplecorrections, samplecorrectionsfullname)
from sastadev.macros import expandmacros
from sastadev.methods import Method, supported_methods, treatmethod
from sastadev.mismatches import exactmismatches, literalmissedmatches
Expand Down Expand Up @@ -1133,7 +1135,23 @@ def main():
# add xsid to trees that should have one but do not
treebank2 = tb_addxsid(treebank1, targets)

treebank, errordict, allorandalts = correcttreebank(treebank2, targets, methodname, options.infilename, corr)
if corr != corr0:
reducedtreebankfullname = os.path.relpath(options.infilename, start=settings.DATAROOT)
if reducedtreebankfullname not in donefiles:
thissamplecorrections = gathercorrections(treebank2)
else:
thissamplecorrections = {}
# merge the corrections from this sample with the samplecorrections and update the file
mergedsamplecorrections = mergecorrections(samplecorrections, thissamplecorrections)
putcorrections(mergedsamplecorrections, samplecorrectionsfullname)
donefiles.add(reducedtreebankfullname)
putdonefilenames(donefiles, donefilesfullname)
else:
mergedsamplecorrections = samplecorrections



treebank, errordict, allorandalts = correcttreebank(treebank2, targets, methodname, mergedsamplecorrections, corr)

allresults, samplesizetuple = sastacore(
origtreebank, treebank, annotatedfileresults, scp)
Expand Down
20 changes: 4 additions & 16 deletions src/sastadev/correcttreebank.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from sastadev.conf import settings
from sastadev.corrector import (Correction, disambiguationdict, getcorrections,
mkuttwithskips)
from sastadev.history import (donefiles, donefilesfullname, gathercorrections, mergecorrections, putcorrections,
putdonefilenames, samplecorrections, samplecorrectionsfullname)
from sastadev.lexicon import de, dets, known_word, nochildwords
from sastadev.macros import expandmacros
from sastadev.metadata import (Meta, bpl_delete, bpl_indeze, bpl_node,
Expand Down Expand Up @@ -294,8 +292,9 @@ def updateerrordict(errordict: ErrorDict, uttid: UttId, oldtree: SynTree, newtre
return errordict


def correcttreebank(treebank: Treebank, targets: Targets, method: MethodName, treebankfullname,
corr: CorrectionMode = corrn ) -> Tuple[Treebank, ErrorDict, List[Optional[OrigandAlts]]]:
def correcttreebank(treebank: Treebank, targets: Targets, method: MethodName, allsamplecorrections,
corr: CorrectionMode = corrn) -> Tuple[Treebank, ErrorDict, List[Optional[OrigandAlts]]]:

'''
The function *correcttreebank* takes as input:
Expand All @@ -320,12 +319,6 @@ def correcttreebank(treebank: Treebank, targets: Targets, method: MethodName, tr
if corr == corr0:
return treebank, errordict, allorandalts
else:
reducedtreebankfullname = os.path.relpath(treebankfullname, start=settings.DATAROOT)
if reducedtreebankfullname not in donefiles:
thissamplecorrections = gathercorrections(treebank)
else:
thissamplecorrections = {}

newtreebank: Treebank = etree.Element('treebank')
# errorlogrows = []
for stree in treebank:
Expand All @@ -335,7 +328,7 @@ def correcttreebank(treebank: Treebank, targets: Targets, method: MethodName, tr
if mustbedone:
# to implement
sentence = getsentence(stree)
newstree, orandalts = correct_stree(stree, method, corr, thissamplecorrections)
newstree, orandalts = correct_stree(stree, method, corr, allsamplecorrections)
if newstree is not None:
errordict = updateerrordict(
errordict, uttid, stree, newstree)
Expand All @@ -346,11 +339,6 @@ def correcttreebank(treebank: Treebank, targets: Targets, method: MethodName, tr
else:
newtreebank.append(stree)

# merge the corrections from this sample with the samplecorrections and update the file
mergedsamplecorrections = mergecorrections(samplecorrections, thissamplecorrections)
putcorrections(mergedsamplecorrections, samplecorrectionsfullname)
donefiles.add(reducedtreebankfullname)
putdonefilenames(donefiles, donefilesfullname)

return newtreebank, errordict, allorandalts

Expand Down
141 changes: 71 additions & 70 deletions src/sastadev/data/childescorrections/donefiles.txt
Original file line number Diff line number Diff line change
@@ -1,84 +1,85 @@
auristrain\intreebanks\TD28.xml
auristrain\intreebanks\DLD03.xml
vklstap\intreebanks\STAP_04.xml
vklstap\intreebanks\STAP_06.xml
vklstap\intreebanks\STAP_08.xml
vkltarsp\intreebanks\TARSP_08.xml
vkltarsp\intreebanks\TARSP_07.xml
Auris\intreebanks\TD18.xml
vklstap\intreebanks\STAP_10.xml
vkltarsp\intreebanks\TARSP_07.xml
vklstapfase2\intreebanks\kind1.xml
vklasta\intreebanks\ASTA_05.xml
vklstapfase2\intreebanks\STP_3.xml
auristrain\intreebanks\TD09.xml
vklstapfase2\intreebanks\K2.xml
vklasta\intreebanks\ASTA_10.xml
auristest\intreebanks\TD01.xml
vklstap\intreebanks\STAP_05.xml
auristrain\intreebanks\TD19.xml
vklstapfase2\intreebanks\SASTA_STAP_023.xml
vklstap\intreebanks\STAP_08.xml
auristrain\intreebanks\TD05.xml
vklasta\intreebanks\ASTA_08.xml
auristrain\intreebanks\TD11.xml
vklstapfase2\intreebanks\STP_Ko.xml
auristest\intreebanks\TD10.xml
auristest\intreebanks\TD30.xml
vkltarsp\intreebanks\TARSP_09.xml
auristest\intreebanks\TD20.xml
vklastafase2\intreebanks\ASTA_11.xml
auristrain\intreebanks\TD29.xml
auristrain\intreebanks\TD21.xml
auristrain\intreebanks\TD22.xml
vklastafase2\intreebanks\ASTA_13.xml
vklstap\intreebanks\STAP_04.xml
auristrain\intreebanks\TD07.xml
auristrain\intreebanks\TD26.xml
vklasta\intreebanks\ASTA_09.xml
auristrain\intreebanks\TD12.xml
auristrain\intreebanks\TD24.xml
vklasta\intreebanks\ASTA_10.xml
auristrain\intreebanks\TD02.xml
vklastafase2\intreebanks\ASTA_15.xml
auristest\intreebanks\TD15.xml
vkltarsp\intreebanks\tarsp_01.xml
vklstapfase2\intreebanks\STP_Du.xml
vklstapfase2\intreebanks\STAP_024.xml
vklastafase2\intreebanks\ASTA_16.xml
vklstapfase2\intreebanks\STP_3.xml
vkltarsp\intreebanks\TARSP_10.xml
vklasta\intreebanks\ASTA_05.xml
vklasta\intreebanks\ASTA_07.xml
auristrain\intreebanks\DLD14.xml
vklasta\intreebanks\ASTA_02.xml
auristest\intreebanks\TD25.xml
auristrain\intreebanks\TD14.xml
auristrain\intreebanks\TD23.xml
auristrain\intreebanks\TD24.xml
vkltarsp\intreebanks\Tarsp_04.xml
auristrain\intreebanks\TD08.xml
vkltarsp\intreebanks\TARSP_06.xml
vklasta\intreebanks\ASTA_01.xml
vklstapfase2\intreebanks\SASTA_STAP_022.xml
auristest\intreebanks\TD01.xml
vklstap\intreebanks\STAP_09.xml
auristrain\intreebanks\DLD16.xml
auristrain\intreebanks\TD04.xml
vkltarsp\intreebanks\Tarsp_01.xml
auristrain\intreebanks\TD05.xml
auristest\intreebanks\TD30.xml
auristest\intreebanks\TD10.xml
test_tarsp\intreebanks\test_tarsp.xml
auristrain\intreebanks\TD13.xml
vkltarsp\intreebanks\TARSP_13.xml
vkltarsp\intreebanks\Tarsp_05.xml
vkltarsp\intreebanks\Tarsp_04.xml
vkltarsp\intreebanks\Tarsp_02.xml
vklstapfase2\intreebanks\STP_Du.xml
vklasta\intreebanks\ASTA_09.xml
vkltarsp\intreebanks\TARSP_08.xml
vklstap\intreebanks\STAP_07.xml
auristrain\intreebanks\DLD14.xml
vkltarsp\intreebanks\TARSP_06.xml
vklstapfase2\intreebanks\SASTA_STAP_023.xml
VKLStapFase2\intreebanks\K2.xml
auristrain\intreebanks\TD03.xml
vklastafase2\intreebanks\ASTA_16.xml
vklstap\intreebanks\STAP_03.xml
auristrain\intreebanks\TD06.xml
vkltarsp\intreebanks\Tarsp_03.xml
auristest\intreebanks\DLD20.xml
vklstapfase2\intreebanks\STAP025.xml
vklasta\intreebanks\ASTA_04.xml
vklstapfase2\intreebanks\STP_Da.xml
vklasta\intreebanks\ASTA_06.xml
vklasta\intreebanks\ASTA_02.xml
auristest\intreebanks\TD15.xml
vkltarsp\intreebanks\tarsp_01.xml
auristrain\intreebanks\TD11.xml
auristest\intreebanks\DLD07.xml
vklasta\intreebanks\ASTA_03.xml
auristrain\intreebanks\TD16.xml
vklstap\intreebanks\STAP_09.xml
vklasta\intreebanks\ASTA_07.xml
auristrain\intreebanks\DLD11.xml
vklastafase2\intreebanks\ASTA_13.xml
vklstapfase2\intreebanks\STP_Da.xml
vklastafase2\intreebanks\ASTA_14.xml
auristrain\intreebanks\TD09.xml
vklastafase2\intreebanks\ASTA_15.xml
vklstapfase2\intreebanks\STP_KC.xml
auristrain\intreebanks\TD26.xml
auristrain\intreebanks\DLD16.xml
auristrain\intreebanks\TD23.xml
vkltarsp\intreebanks\TARSP_10.xml
auristrain\intreebanks\TD13.xml
vklstap\intreebanks\STAP_06.xml
auristest\intreebanks\TD25.xml
vklstap\intreebanks\STAP_07.xml
vkltarsp\intreebanks\Tarsp_05.xml
auristrain\intreebanks\TD06.xml
auristrain\intreebanks\TD16.xml
vklstap\intreebanks\STAP_02.xml
vklasta\intreebanks\ASTA_06.xml
vklasta\intreebanks\ASTA_08.xml
vklstapfase2\intreebanks\STP_MP_MZ.xml
vkltarsp\intreebanks\Tarsp_01.xml
vklstap\intreebanks\STAP_05.xml
vkltarsp\intreebanks\Tarsp_03.xml
vklastafase2\intreebanks\ASTA_11.xml
vklasta\intreebanks\ASTA_01.xml
vklasta\intreebanks\ASTA_04.xml
auristrain\intreebanks\TD28.xml
auristrain\intreebanks\TD21.xml
auristrain\intreebanks\TD18.xml
auristrain\intreebanks\TD07.xml
vklstapfase2\intreebanks\K2.xml
vklstap\intreebanks\STAP_02.xml
auristrain\intreebanks\TD14.xml
vklstapfase2\intreebanks\STAP_024.xml
auristrain\intreebanks\TD04.xml
auristest\intreebanks\DLD07.xml
auristest\intreebanks\DLD20.xml
vklstapfase2\intreebanks\STP_Ko.xml
vklstapfase2\intreebanks\SASTA_STAP_022.xml
auristrain\intreebanks\TD22.xml
vkltarsp\intreebanks\TARSP_09.xml
auristrain\intreebanks\TD29.xml
auristest\intreebanks\TD20.xml
vkltarsp\intreebanks\Tarsp_02.xml
auristrain\intreebanks\DLD11.xml
vklstap\intreebanks\STAP_03.xml
vklstapfase2\intreebanks\kind1.xml
vklstapfase2\intreebanks\STP_KC.xml
auristrain\intreebanks\DLD03.xml
6 changes: 3 additions & 3 deletions src/sastadev/data/childescorrections/samplecorrections.txt
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ van veel replacement 1
van bij replacement 1
van om replacement 1
arreen alleen replacement 1
het hij replacement 1
het hij replacement 2
het er replacement 1
jiggen liggen replacement 2
kruisd gekruisd replacement 1
Expand Down Expand Up @@ -631,8 +631,8 @@ ook blauwe ramen zitten er ook explanation 1
ginnen gingen replacement 2
dat het replacement 1
allegrooste allegrootste noncompletion 2
en maar replacement 3
en eng noncompletion 1
en maar replacement 2
en want replacement 2
ben wordt replacement 2
pupcakes cupcakes replacement 1
Expand Down Expand Up @@ -682,7 +682,7 @@ ziekje muziekje replacement 1
Bobdebouwerkattet Bobdebouwerkwartet replacement 1
blazen geblazen replacement 1
ik mij replacement 1
wou wilde replacement 2
wou wilde replacement 3
vashouden vasthouden noncompletion 2
tant want replacement 2
aws alles replacement 4
Expand Down

0 comments on commit 3a65bf2

Please sign in to comment.