-
Notifications
You must be signed in to change notification settings - Fork 0
/
tao-item-overview.py
47 lines (43 loc) · 1.94 KB
/
tao-item-overview.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from gettext import find
import glob
import os
import re
from typing import Dict
import lxml.etree as ET
import pandas as pd
input_folder = 'C://TMP//TAO_BOEKJES'
ns = {'d': 'http://www.imsglobal.org/xsd/imsqti_v2p2'}
rows = []
for filename in glob.iglob(input_folder + '/**/qti.xml', recursive=True):
item_dom: ET.ElementBase = ET.parse(filename).getroot()
title = item_dom.get('title')
identifier = item_dom.get('identifier')
# next(item for item in dicts if item["name"] == "Pam")
matching = next((item for item in rows if item["title"] == title), None)
# matching = next(filter(lambda item: item["title"] == title , rows))
if matching == None:
# row = { 'title': title, 'identifier', identifier, 'text': ''.join(item_dom.itertext()}
text = re.sub('\s+', ' ', ''.join(item_dom.itertext()).rstrip("\n"))
map_keys = item_dom.findall(".//d:*[@mapKey]", ns)
keys = [*map(lambda map_key: map_key.get('mapKey'), map_keys)]
key = '#'.join(keys)
if key == '':
correct_responses = item_dom.findall(".//d:correctResponse/d:value", ns)
if len(correct_responses) > 0:
values = [*map(lambda correct_response: correct_response.text.rstrip().replace(
'choice_', ''), correct_responses)]
# test = ''.join(values)
choices = ['', 'A', 'B', 'C', 'D']
keys = []
for value in values:
if value.isnumeric() and int(value) < len(choices):
keys.append(choices[int(value)])
else:
keys.append(value)
# key = '#'.join(test)
new_row = {'title': title, 'identifier': identifier,
'key': '#'.join(keys), 'text': text}
rows.append(new_row)
column_names = ["title", "identifier", "key", "text"]
df = pd.DataFrame(rows)
df.to_excel(os.path.join(input_folder, 'items.xlsx'))