Skip to content

Commit 4561f64

Browse files
gh-145264: Do not ignore excess Base64 data after the first padded quad (GH-145267)
Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc) no longer ignores excess data after the first padded quad in non-strict (default) mode. Instead, in conformance with RFC 4648, it ignores the pad character, "=", if it is present before the end of the encoded data.
1 parent ae6adc9 commit 4561f64

File tree

3 files changed

+36
-41
lines changed

3 files changed

+36
-41
lines changed

Lib/test/test_binascii.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -274,23 +274,21 @@ def assertNonBase64Data(data, expected, ignorechars):
274274

275275
def test_base64_excess_data(self):
276276
# Test excess data exceptions
277-
def assertExcessData(data, non_strict_expected,
278-
ignore_padchar_expected=None):
277+
def assertExcessData(data, expected):
279278
assert_regex = r'(?i)Excess data'
280279
data = self.type2test(data)
281280
with self.assertRaisesRegex(binascii.Error, assert_regex):
282281
binascii.a2b_base64(data, strict_mode=True)
283282
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
284-
non_strict_expected)
285-
if ignore_padchar_expected is not None:
286-
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
287-
ignorechars=b'='),
288-
ignore_padchar_expected)
289-
self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
290-
291-
assertExcessData(b'ab==c', b'i')
292-
assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
293-
assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
283+
expected)
284+
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
285+
ignorechars=b'='),
286+
expected)
287+
self.assertEqual(binascii.a2b_base64(data), expected)
288+
289+
assertExcessData(b'ab==c=', b'i\xb7')
290+
assertExcessData(b'ab==cd', b'i\xb7\x1d')
291+
assertExcessData(b'abc=d', b'i\xb7\x1d')
294292

295293
def test_base64errors(self):
296294
# Test base64 with invalid padding
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, etc) no
2+
longer ignores excess data after the first padded quad in non-strict
3+
(default) mode. Instead, in conformance with :rfc:`4648`, section 3.3, it now ignores
4+
the pad character, "=", if it is present before the end of the encoded data.

Modules/binascii.c

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -800,40 +800,33 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
800800
*/
801801
if (this_ch == BASE64_PAD) {
802802
pads++;
803-
804-
if (strict_mode) {
805-
if (quad_pos >= 2 && quad_pos + pads <= 4) {
806-
continue;
807-
}
808-
if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
809-
continue;
810-
}
811-
if (quad_pos == 1) {
812-
/* Set an error below. */
813-
break;
814-
}
815-
state = get_binascii_state(module);
816-
if (state) {
817-
PyErr_SetString(state->Error,
818-
(quad_pos == 0 && ascii_data == data->buf)
819-
? "Leading padding not allowed"
820-
: "Excess padding not allowed");
821-
}
822-
goto error_end;
803+
if (quad_pos >= 2 && quad_pos + pads <= 4) {
804+
continue;
823805
}
824-
else {
825-
if (quad_pos >= 2 && quad_pos + pads >= 4) {
826-
/* A pad sequence means we should not parse more input.
827-
** We've already interpreted the data from the quad at this point.
828-
*/
829-
goto done;
830-
}
806+
// See RFC 4648, section-3.3: "specifications MAY ignore the
807+
// pad character, "=", treating it as non-alphabet data, if
808+
// it is present before the end of the encoded data" and
809+
// "the excess pad characters MAY also be ignored."
810+
if (!strict_mode || ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
831811
continue;
832812
}
813+
if (quad_pos == 1) {
814+
/* Set an error below. */
815+
break;
816+
}
817+
state = get_binascii_state(module);
818+
if (state) {
819+
PyErr_SetString(state->Error,
820+
(quad_pos == 0 && ascii_data == data->buf)
821+
? "Leading padding not allowed"
822+
: "Excess padding not allowed");
823+
}
824+
goto error_end;
833825
}
834826

835827
unsigned char v = table_a2b[this_ch];
836828
if (v >= 64) {
829+
// See RFC 4648, section-3.3.
837830
if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) {
838831
state = get_binascii_state(module);
839832
if (state) {
@@ -844,7 +837,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
844837
continue;
845838
}
846839

847-
// Characters that are not '=', in the middle of the padding, are not allowed
840+
// Characters that are not '=', in the middle of the padding, are
841+
// not allowed (except when they are). See RFC 4648, section-3.3.
848842
if (pads && strict_mode &&
849843
!ignorechar(BASE64_PAD, ignorechars, ignorecache))
850844
{
@@ -908,7 +902,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
908902
goto error_end;
909903
}
910904

911-
done:
912905
Py_XDECREF(table_obj);
913906
return PyBytesWriter_FinishWithPointer(writer, bin_data);
914907

0 commit comments

Comments
 (0)