diff --git a/astaforms.py b/astaforms.py index 5444c50..5c9b057 100644 --- a/astaforms.py +++ b/astaforms.py @@ -297,7 +297,7 @@ def resultdict2table(resultdict): okpvs = max(0, allpvs - foutepvs) bijzincount = dictget(uttid_dict, 'bijzincount') remarks = dictget(uttid_dict, 'remarks') - paddeduttid = uttid.rjust(3, '0') + paddeduttid = str(uttid).rjust(3, '0') newrow = [paddeduttid, wc, correct, okpvs, foutepvs, bijzincount, remarks] table.append(newrow) sortedtable = sorted(table, key=lambda row: row[0]) diff --git a/default_config.py b/default_config.py index 748d4e4..fcc79d7 100644 --- a/default_config.py +++ b/default_config.py @@ -1,6 +1,6 @@ import logging import os.path as op -from sentence_parser import parse +import sentence_parser # logging object @@ -15,4 +15,4 @@ # Function to parse a sentence with Alpino # Should take a string as input and return an lxml.etree -PARSE_FUNC = parse +PARSE_FUNC = sentence_parser.parse diff --git a/external_functions.py b/external_functions.py index 41d907a..23fabea 100644 --- a/external_functions.py +++ b/external_functions.py @@ -63,3 +63,10 @@ def oldgetfname(f): str2functionmap[fname] = f junk = 0 + +# Used by SASTA to find form functions +form_map = { + 'TARSP': mktarspform, + 'ASTA': astaform, + 'STAP': makestapform +} diff --git a/sentence_parser.py b/sentence_parser.py index dc0278f..e49004f 100644 --- a/sentence_parser.py +++ b/sentence_parser.py @@ -1,8 +1,13 @@ -from config import * -from contextlib import contextmanager +from functools import lru_cache import socket +from contextlib import contextmanager + from lxml import etree +import config + +from alpinoparsing import escape_alpino_input + class AlpinoSentenceParser: ''' Assumes a Alpino server is running on provided host:port, @@ -10,13 +15,14 @@ class AlpinoSentenceParser: @contextmanager def connection(self): try: - s = socket.create_connection((ALPINO_HOST, ALPINO_PORT)) + s = socket.create_connection((config.ALPINO_HOST, config.ALPINO_PORT)) yield s s.close() except socket.error: raise def parse_sentence(self, sentence: str, buffer_size=8096) -> str: + sentence = escape_alpino_input(sentence) with self.connection() as s: sentence += '\n\n' # flag end of file s.sendall(sentence.encode('utf-8')) @@ -29,6 +35,7 @@ def parse_sentence(self, sentence: str, buffer_size=8096) -> str: return xml.decode('utf-8') +@lru_cache(maxsize=128) def parse(sentence): ''' Wrapper for use in sastadev''' alp = AlpinoSentenceParser() diff --git a/stapforms.py b/stapforms.py index da7f6af..ba2fdf5 100644 --- a/stapforms.py +++ b/stapforms.py @@ -1,16 +1,16 @@ -import logging +from io import BytesIO import os -from shutil import copyfile +from shutil import copyfile, copyfileobj from collections import defaultdict from openpyxl import load_workbook from allresults import AllResults -from config import SD_DIR +from config import SD_DIR, SDLOGGER scoresheetname = 'STAP 1 - 5' maxutt = 50 zerocount = 0 -basexl = os.path.join(SD_DIR, r'form_templates\STAP Excel VUmc 2018.xlsx') +basexl = os.path.join(SD_DIR, 'form_templates', 'STAP Excel VUmc 2018.xlsx') NS = 'S001' OS = 'S002' @@ -57,21 +57,27 @@ def data2rowtuples(data): return rowlist -def makestapform(allresults, _, basexl=basexl): - # copy the basexl to a new one with the appropriate name +def makestapform(allresults, _, basexl=basexl, in_memory=False): + if not in_memory: + # copy the basexl to a new one with the appropriate name + (base, ext) = os.path.splitext(allresults.filename) + target = base + '_STAP-Form' + '.xlsx' - (base, ext) = os.path.splitext(allresults.filename) - formxl = base + '_STAP-Form' + '.xlsx' + copyfile(basexl, target) - copyfile(basexl, formxl) + # open the workbook + wb = load_workbook(filename=target) + else: + target = BytesIO() + with open(basexl, 'rb') as source: + copyfileobj(fsrc=source, fdst=target) + wb = load_workbook(target) # gather the results # put the results in the right order rowlist = data2rowtuples(allresults.coreresults) - # open the workbook - wb = load_workbook(filename=formxl) ws = wb[scoresheetname] cols = ['U', 'V', 'W', 'X', 'Y', 'Z', 'AA', 'AB', 'AC', 'AD', 'AE', 'AF'] @@ -88,13 +94,14 @@ def makestapform(allresults, _, basexl=basexl): cellkey = col + uttidrowstr ws[cellkey] = el else: - logging.error('Unexpected utterance id encountered: {}'.format(uttid)) + SDLOGGER.error('Unexpected utterance id encountered: {}'.format(uttid)) # save the workbook - wb.save(formxl) + wb.save(target) + wb.close() # return the workbook- not needed - # return wb + return target def test():