Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-43086: Added handling for excess data in binascii.a2b_base64 #24402

Merged
merged 26 commits into from
Jul 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
52d83e9
Added handling for excess data in binascii.a2b_base64
idan22moral Jan 31, 2021
557bce8
📜🤖 Added by blurb_it.
blurb-it[bot] Jan 31, 2021
6ed5193
Added if-state guard
idan22moral Mar 13, 2021
4ee90f5
Merge branch 'master' of https://github.com/idan22moral/cpython
idan22moral Mar 30, 2021
c7b723f
Implemented the strict mode logic
idan22moral Mar 30, 2021
93f497a
Merge branch 'master' of https://github.com/python/cpython
idan22moral Mar 30, 2021
f6283d9
Trying to fix the "Check if generated files are up to date" failure
idan22moral Apr 7, 2021
cedbb85
Generated function signatures using clinic
idan22moral Apr 7, 2021
69c96d5
Handle data in the middle of the padding in strict mode
idan22moral Apr 12, 2021
3718ebd
Added a test for strict mode
idan22moral Apr 12, 2021
d0b60e2
Added a test for invalid data as the first character
idan22moral Jul 10, 2021
afb95db
Disallowed leading padding in strict mode
idan22moral Jul 10, 2021
3c5758b
Added tests for padding-only input
idan22moral Jul 10, 2021
5f8df5b
Added tests to validate the default behavior.
idan22moral Jul 10, 2021
644dbaf
Described the changed of this pull request
idan22moral Jul 10, 2021
464484c
Modified syntax of RST
idan22moral Jul 10, 2021
e1ccf8a
Updated the docs to explain the strict_mode parameter
idan22moral Jul 10, 2021
d6a5cbf
Moved declaration of state to the beginning to prevent multiple decla…
idan22moral Jul 16, 2021
5abc68f
Merge branch 'main' into master
idan22moral Jul 16, 2021
08aa26c
Corrected the RST syntax for argument in docs (italic)
idan22moral Jul 16, 2021
2f1990e
Corrected the RST syntax for argument in news (italic)
idan22moral Jul 16, 2021
fa959bd
Removed whitespace that lead to build failure
idan22moral Jul 16, 2021
a60a8c6
use self.assertEqual instead of assert ==
gpshead Jul 18, 2021
0307272
remove leadg `| ` characters in NEWS
gpshead Jul 18, 2021
d26e1eb
Simplify the error messages.
gpshead Jul 18, 2021
652e7f4
update test for error message (Leading vs Malformed)
gpshead Jul 18, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion Doc/library/binascii.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,23 @@ The :mod:`binascii` module defines the following functions:
Added the *backtick* parameter.


.. function:: a2b_base64(string)
.. function:: a2b_base64(string, strict_mode=False)

Convert a block of base64 data back to binary and return the binary data. More
than one line may be passed at a time.

If *strict_mode* is true, only valid base64 data will be converted. Invalid base64
data will raise :exc:`binascii.Error`.

Valid base64:
* Conforms to :rfc:`3548`.
* Contains only characters from the base64 alphabet.
* Contains no excess data after padding (including excess padding, newlines, etc.).
* Does not start with a padding.

.. versionchanged:: 3.11
Added the *strict_mode* parameter.


.. function:: b2a_base64(data, *, newline=True)

Expand Down
41 changes: 41 additions & 0 deletions Lib/test/test_binascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,47 @@ def addnoise(line):
# empty strings. TBD: shouldn't it raise an exception instead ?
self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')

def test_base64_strict_mode(self):
# Test base64 with strict mode on
def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes):
with self.assertRaisesRegex(binascii.Error, assert_regex):
binascii.a2b_base64(self.type2test(data), strict_mode=True)
self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False),
non_strict_mode_expected_result)
self.assertEqual(binascii.a2b_base64(self.type2test(data)),
non_strict_mode_expected_result)

def assertExcessData(data, non_strict_mode_expected_result: bytes):
_assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result)

def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
_assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result)

def assertMalformedPadding(data, non_strict_mode_expected_result: bytes):
_assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result)

# Test excess data exceptions
assertExcessData(b'ab==a', b'i')
assertExcessData(b'ab===', b'i')
assertExcessData(b'ab==:', b'i')
assertExcessData(b'abc=a', b'i\xb7')
assertExcessData(b'abc=:', b'i\xb7')
assertExcessData(b'ab==\n', b'i')

# Test non-base64 data exceptions
assertNonBase64Data(b'\nab==', b'i')
assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
assertNonBase64Data(b'a\nb==', b'i')
assertNonBase64Data(b'a\x00b==', b'i')

# Test malformed padding
assertMalformedPadding(b'=', b'')
assertMalformedPadding(b'==', b'')
assertMalformedPadding(b'===', b'')
assertMalformedPadding(b'ab=c=', b'i\xb7')
assertMalformedPadding(b'ab=ab==', b'i\xb6\x9b')


def test_base64errors(self):
# Test base64 with invalid padding
def assertIncorrectPadding(data):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added a new optional :code:`strict_mode` parameter to *binascii.a2b_base64*.
When :code:`scrict_mode` is set to :code:`True`, the *a2b_base64* function will accept only valid base64 content.
More details about what "valid base64 content" is, can be found in the function's documentation.
52 changes: 46 additions & 6 deletions Modules/binascii.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,18 +433,26 @@ binascii.a2b_base64

data: ascii_buffer
/
*
strict_mode: bool(accept={int}) = False

Decode a line of base64 data.

strict_mode
When set to True, bytes that are not part of the base64 standard are not allowed.
The same applies to excess data after padding (= / ==).
[clinic start generated code]*/

static PyObject *
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
/*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/
{
assert(data->len >= 0);

const unsigned char *ascii_data = data->buf;
size_t ascii_len = data->len;
binascii_state *state = NULL;
char padding_started = 0;

/* Allocate the buffer */
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
Expand All @@ -455,6 +463,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
return NULL;
unsigned char *bin_data_start = bin_data;

if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
malformed_padding:
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Leading padding not allowed");
}
goto error_end;
}

int quad_pos = 0;
unsigned char leftchar = 0;
int pads = 0;
Expand All @@ -465,20 +482,42 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
** the invalid ones.
*/
if (this_ch == BASE64_PAD) {
padding_started = 1;

if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
/* A pad sequence means no more input.
** We've already interpreted the data
** from the quad at this point.
/* A pad sequence means we should not parse more input.
** We've already interpreted the data from the quad at this point.
** in strict mode, an error should raise if there's excess data after the padding.
*/
if (strict_mode && i + 1 < ascii_len) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Excess data after padding");
}
goto error_end;
}

goto done;
}
continue;
}

this_ch = table_a2b_base64[this_ch];
if (this_ch >= 64) {
if (strict_mode) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Only base64 data is allowed");
}
goto error_end;
}
continue;
}

// Characters that are not '=', in the middle of the padding, are not allowed
if (strict_mode && padding_started) {
goto malformed_padding;
}
pads = 0;

switch (quad_pos) {
Expand All @@ -505,7 +544,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
}

if (quad_pos != 0) {
binascii_state *state = get_binascii_state(module);
state = get_binascii_state(module);
if (state == NULL) {
/* error already set, from get_binascii_state */
} else if (quad_pos == 1) {
Expand All @@ -522,6 +561,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
} else {
PyErr_SetString(state->Error, "Incorrect padding");
}
error_end:
_PyBytesWriter_Dealloc(&writer);
return NULL;
}
Expand Down
37 changes: 29 additions & 8 deletions Modules/clinic/binascii.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.