Skip to content

Commit b3669af

Browse files
committed
Be explicit about encodings in matrix.cgi
This commit does 3 things: 1. Always opens files in matrix.cgi with an explicit encoding (except tar files and urls), currently set to utf-8-sig on read and utf-8 on write. 2. Decodes the bytes from uploaded files with utf-8-sig. 3. Stores the joined path of a choices file in a variable and reuses it instead of recomputing it each time (unrelated to the current issue).
1 parent 34908a1 commit b3669af

File tree

1 file changed

+29
-16
lines changed

1 file changed

+29
-16
lines changed

matrix.cgi

+29-16
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ from gmcs.choices import ChoicesFile
2424
from gmcs.linglib.toolboximport import import_toolbox_lexicon
2525

2626

27+
# Sometimes UTF-8 files have a (gratuitous) BOM. The utf-8-sig
28+
# encoding will strip the BOM, but we want to always write files
29+
# without it, so use regular utf-8 on write.
30+
READ_ENCODING = 'utf-8-sig'
31+
WRITE_ENCODING = 'utf-8'
32+
33+
2734
cgitb.enable()
2835

2936
# Production Check
@@ -106,23 +113,30 @@ for s in sessions:
106113
# figure out the path to the current session's directory, creating it
107114
# if necessary
108115
session_path = 'sessions/' + cookie
116+
choices_path = os.path.join(session_path, 'choices')
117+
109118
if cookie and not os.path.exists(session_path):
110119
os.mkdir(session_path)
111120
# create a blank choices file
112-
open(os.path.join(session_path, 'choices'), 'w').close()
121+
with open(choices_path, 'w', encoding=WRITE_ENCODING):
122+
pass
113123

114124
# if the 'choices' field is defined, we have either the contents of an
115125
# uploaded choices file or the name of a sample choices file (which
116126
# will begin with 'sample-choices/') to replace the current choices.
117127
# TJT 2014-09-18: Get choices files from Language CoLLAGE links
118128
if 'choices' in form_data:
119-
choices = form_data['choices'].value
129+
choices_item = form_data['choices']
130+
if choices_item.file:
131+
# TODO: handle encoding problems
132+
choices = choices_item.value.decode(READ_ENCODING)
133+
else:
134+
choices = choices_item.value
120135
if choices:
121136
data = ''
122137
if choices.startswith('web/sample-choices/'):
123-
f = open(choices, 'r')
124-
data = f.read()
125-
f.close()
138+
with open(choices, 'r', encoding=READ_ENCODING) as f:
139+
data = f.read()
126140
elif choices.startswith('collage/'):
127141
# Get choices files from CoLLAGE
128142
# should be 3 or 7 letter keys... doesn't work for other length keys
@@ -150,13 +164,12 @@ if 'choices' in form_data:
150164
else: # Uploaded choices data
151165
data = choices
152166
if data or choices.endswith('/empty'):
153-
f = open(os.path.join(session_path, 'choices'), 'w')
154-
f.write(data)
155-
f.close()
167+
with open(choices_path, 'w', encoding=WRITE_ENCODING) as f:
168+
f.write(data)
156169

157170
# if the 'section' field is defined, we have submitted values to save
158171
if 'section' in form_data:
159-
matrixdef.save_choices(form_data, os.path.join(session_path, 'choices'))
172+
matrixdef.save_choices(form_data, choices_path)
160173

161174
# if we have recieved toolbox files, then we want to add these lexical items after saving the toolbox configuration (done above).
162175
if 'import_toolbox' in form_data:
@@ -167,8 +180,8 @@ if 'import_toolbox' in form_data:
167180
fout.write(form_data[key].value)
168181
toolbox_files.append(fout)
169182
form_data[key].value = fout.name
170-
matrixdef.save_choices(form_data, os.path.join(session_path, 'choices'))
171-
import_toolbox_lexicon(os.path.join(session_path, 'choices'))
183+
matrixdef.save_choices(form_data, choices_path)
184+
import_toolbox_lexicon(choices_path)
172185
for tbfile in toolbox_files:
173186
tbfile.close()
174187

@@ -184,10 +197,10 @@ if 'verbpred' in form_data:
184197
# no longer true, there can now be validation info messages.
185198
# nothing seems to depend on the list being empty #14 feb 2012
186199
try:
187-
vr = validate_choices(os.path.join(session_path, 'choices'))
200+
vr = validate_choices(choices_path)
188201
except:
189202
exc = sys.exc_info()
190-
matrixdef.choices_error_page(os.path.join(session_path, 'choices'), exc)
203+
matrixdef.choices_error_page(choices_path, exc)
191204
sys.exit()
192205

193206
# modified to support captcha
@@ -208,7 +221,7 @@ elif 'customize' in form_data:
208221
matrixdef.error_page(vr)
209222
else:
210223
# If the user said it's OK, archive the choices file
211-
choices = ChoicesFile(os.path.join(session_path, 'choices'))
224+
choices = ChoicesFile(choices_path)
212225
if choices.get('archive') == 'yes':
213226
# create the saved-choices directory
214227
if not os.path.exists('saved-choices'):
@@ -224,15 +237,15 @@ elif 'customize' in form_data:
224237
num = f[i + 1:]
225238
if num.isdigit():
226239
serial = max(serial, int(num) + 1)
227-
shutil.copy(os.path.join(session_path, 'choices'),
240+
shutil.copy(choices_path,
228241
'saved-choices/choices.' + str(serial))
229242

230243
# Create the customized grammar
231244
try:
232245
grammar_dir = customize_matrix(session_path, arch_type)
233246
except:
234247
exc = sys.exc_info()
235-
matrixdef.customize_error_page(os.path.join(session_path, 'choices'),
248+
matrixdef.customize_error_page(choices_path,
236249
exc)
237250
sys.exit()
238251

0 commit comments

Comments
 (0)