Skip to content

Commit

Permalink
Make responding with binary data work under PY3.
Browse files Browse the repository at this point in the history
The code as previously written would unconditionally encode parts as
though they contained text - this went unnoticed on PY2 because strings
and bytes are one and the same thing but blew up on PY3 where a string
is explicitly of type unicode while a binary file would be raw bytes.

Explicitly check the output_format and if instructed to serve a file do
so without touching the chunks of file content bytes being yielded.
  • Loading branch information
albu-diku committed Oct 2, 2024
1 parent f9859bd commit e9444d4
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 7 deletions.
9 changes: 6 additions & 3 deletions mig/wsgi-bin/migwsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _set_os_environ(value):
return _application(None, environ, start_response, _set_environ=_set_os_environ, _wrap_wsgi_errors=wrap_wsgi_errors)


def _application(configuration, environ, start_response, _set_environ, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):
def _application(configuration, environ, start_response, _set_environ, _fieldstorage_to_dict=fieldstorage_to_dict, _format_output=format_output, _retrieve_handler=_import_backend, _wrap_wsgi_errors=True, _config_file=None, _skip_log=False):

# NOTE: pass app environ including apache and query args on to sub handlers
# through the usual 'os.environ' channel expected in functionality
Expand Down Expand Up @@ -329,7 +329,7 @@ def _application(configuration, environ, start_response, _set_environ, _format_o
# (backend, script_name))
fieldstorage = cgi.FieldStorage(fp=environ['wsgi.input'],
environ=environ)
user_arguments_dict = fieldstorage_to_dict(fieldstorage)
user_arguments_dict = _fieldstorage_to_dict(fieldstorage)
if 'output_format' in user_arguments_dict:
output_format = user_arguments_dict['output_format'][0]

Expand Down Expand Up @@ -447,7 +447,10 @@ def _application(configuration, environ, start_response, _set_environ, _format_o
# (backend, i+1, chunk_parts))
# end index may be after end of content - but no problem
part = output[i*download_block_size:(i+1)*download_block_size]
yield _ensure_encoded_string(part)
if output_format == 'file':
yield part
else:
yield _ensure_encoded_string(part)
if chunk_parts > 1:
_logger.info("WSGI %s finished yielding all %d output parts" %
(backend, chunk_parts))
Expand Down
1 change: 1 addition & 0 deletions tests/data/loading.gif
2 changes: 1 addition & 1 deletion tests/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

from tests.support.configsupp import FakeConfiguration
from tests.support.suppconst import MIG_BASE, TEST_BASE, TEST_FIXTURE_DIR, \
TEST_OUTPUT_DIR
TEST_OUTPUT_DIR, TEST_DATA_DIR

PY2 = (sys.version_info[0] == 2)

Expand Down
1 change: 1 addition & 0 deletions tests/support/suppconst.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# Use abspath for __file__ on Py2
_SUPPORT_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_BASE = os.path.normpath(os.path.join(_SUPPORT_DIR, ".."))
TEST_DATA_DIR = os.path.join(TEST_BASE, "data")
TEST_FIXTURE_DIR = os.path.join(TEST_BASE, "fixture")
TEST_OUTPUT_DIR = os.path.join(TEST_BASE, "output")
MIG_BASE = os.path.realpath(os.path.join(TEST_BASE, ".."))
70 changes: 67 additions & 3 deletions tests/test_mig_wsgi-bin_migwsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@
import os
import stat
import sys
import unittest

from tests.support import MIG_BASE, MigTestCase, testmain
from tests.support import MIG_BASE, TEST_BASE, TEST_DATA_DIR, MigTestCase, testmain
from mig.shared.output import format_output
import mig.shared.returnvalues as returnvalues

Expand Down Expand Up @@ -66,14 +67,35 @@ def _is_return_value(return_value):
return return_value in defined_return_values


def _trigger_and_unpack_result(application_result):
def _trigger_and_unpack_result(application_result, result_kind='textual'):
assert result_kind in ('textual', 'binary')

chunks = list(application_result)
assert len(chunks) > 0, "invocation returned no output"
complete_value = b''.join(chunks)
decoded_value = codecs.decode(complete_value, 'utf8')
if result_kind == 'binary':
decoded_value = complete_value
else:
decoded_value = codecs.decode(complete_value, 'utf8')
return decoded_value


def create_instrumented_fieldstorage_to_dict():
def _instrumented_fieldstorage_to_dict(fieldstorage):
return _instrumented_fieldstorage_to_dict._result

_instrumented_fieldstorage_to_dict._result = {
'output_format': ('html',)
}

def set_result(result):
_instrumented_fieldstorage_to_dict._result = result

_instrumented_fieldstorage_to_dict.set_result = set_result

return _instrumented_fieldstorage_to_dict


def create_instrumented_format_output():
def _instrumented_format_output(
configuration,
Expand All @@ -88,6 +110,16 @@ def _instrumented_format_output(
call_args = (configuration, backend, ret_val, ret_msg, call_args_out_obj, outputformat,)
_instrumented_format_output.calls.append({ 'args': call_args })

if _instrumented_format_output._file:
return format_output(
configuration,
backend,
ret_val,
ret_msg,
out_obj,
outputformat,
)

# FIXME: the following is a workaround for a bug that exists between the WSGI wrapper
# and the output formatter - specifically, the latter adds default header and
# title if start does not exist, but the former ensures that start always exists
Expand Down Expand Up @@ -122,11 +154,18 @@ def _instrumented_format_output(
outputformat,
)
_instrumented_format_output.calls = []
_instrumented_format_output._file = False
_instrumented_format_output.values = dict(
title_text='',
header_text='',
)


def _set_file(is_enabled):
_instrumented_format_output._file = is_enabled

setattr(_instrumented_format_output, 'set_file', _set_file)

def _program_values(**kwargs):
_instrumented_format_output.values.update(kwargs)

Expand Down Expand Up @@ -185,13 +224,15 @@ def before_each(self):
self.fake_start_response = create_wsgi_start_response()

# MiG WSGI wrapper specific setup
self.instrumented_fieldstorage_to_dict = create_instrumented_fieldstorage_to_dict()
self.instrumented_format_output = create_instrumented_format_output()
self.instrumented_retrieve_handler = create_instrumented_retrieve_handler()

self.application_args = (self.configuration, self.fake_wsgi_environ, self.fake_start_response,)
self.application_kwargs = dict(
_wrap_wsgi_errors=noop,
_format_output=self.instrumented_format_output,
_fieldstorage_to_dict=self.instrumented_fieldstorage_to_dict,
_retrieve_handler=self.instrumented_retrieve_handler,
_set_environ=noop,
)
Expand Down Expand Up @@ -236,6 +277,29 @@ def test_return_value_ok_returns_expected_title(self):
self.assertInstrumentation()
self.assertHtmlElementTextContent(output, 'title', 'TEST', trim_newlines=True)

def test_return_value_ok_serving_a_binary_file(self):
test_binary_file = os.path.join(TEST_DATA_DIR, 'loading.gif')
with open(test_binary_file, 'rb') as f:
test_binary_data = f.read()

self.instrumented_fieldstorage_to_dict.set_result({
'output_format': ('file',)
})
self.instrumented_format_output.set_file(True)

file_obj = { 'object_type': 'binary', 'data': test_binary_data }
self.instrumented_retrieve_handler.program([file_obj], returnvalues.OK)

application_result = migwsgi._application(
*self.application_args,
**self.application_kwargs
)

output = _trigger_and_unpack_result(application_result, 'binary')

self.assertInstrumentation()
self.assertEqual(output, test_binary_data)


if __name__ == '__main__':
testmain()

0 comments on commit e9444d4

Please sign in to comment.