gh-144001: Simplify Base64 decoding with altchars and ignorechars specified (GH-144324)

Treat "+" and "/" like other characters not in the alternative Base64
alphabet when both altchars and ignorechars are specified.
E.g. discard them if they are not in altchars but are in ignorechars,
and set error if they are not in altchars and not in ignorechars.
Only emit warnings if ignorechars is not specified.
This commit is contained in:
Serhiy Storchaka 2026-02-02 13:38:32 +02:00 committed by GitHub
parent 15c9f2491d
commit c3b61ef73d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 41 additions and 50 deletions

View file

@ -91,7 +91,8 @@ POST request.
``False`` otherwise.
If *validate* is false, characters that are neither
in the normal base-64 alphabet nor the alternative alphabet are
in the normal base-64 alphabet nor (if *ignorechars* is not specified)
the alternative alphabet are
discarded prior to the padding check, but the ``+`` and ``/`` characters
keep their meaning if they are not in *altchars* (they will be discarded
in future Python versions).
@ -101,15 +102,14 @@ POST request.
For more information about the strict base64 check, see :func:`binascii.a2b_base64`
.. versionchanged:: next
Added the *ignorechars* parameter.
.. deprecated:: next
Accepting the ``+`` and ``/`` characters with an alternative alphabet
is now deprecated.
.. versionchanged:: next
Added the *ignorechars* parameter.
.. function:: standard_b64encode(s)
Encode :term:`bytes-like object` *s* using the standard Base64 alphabet

View file

@ -89,49 +89,37 @@ def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, *, ignorechars=_NOT_SPE
s = _bytes_from_decode_data(s)
if validate is _NOT_SPECIFIED:
validate = ignorechars is not _NOT_SPECIFIED
if ignorechars is _NOT_SPECIFIED:
ignorechars = b''
badchar = None
badchar_strict = False
if altchars is not None:
altchars = _bytes_from_decode_data(altchars)
if len(altchars) != 2:
raise ValueError(f'invalid altchars: {altchars!r}')
for b in b'+/':
if b not in altchars and b in s:
if badchar is None:
badchar = b
if not validate:
break
if not isinstance(ignorechars, (bytes, bytearray)):
ignorechars = memoryview(ignorechars).cast('B')
if b not in ignorechars:
badchar_strict = True
if ignorechars is _NOT_SPECIFIED:
for b in b'+/':
if b not in altchars and b in s:
badchar = b
break
s = s.translate(bytes.maketrans(altchars, b'+/'))
s = s.translate(bytes.maketrans(altchars, b'+/'))
else:
trans = bytes.maketrans(b'+/' + altchars, altchars + b'+/')
s = s.translate(trans)
ignorechars = ignorechars.translate(trans)
if ignorechars is _NOT_SPECIFIED:
ignorechars = b''
result = binascii.a2b_base64(s, strict_mode=validate,
ignorechars=ignorechars)
if badchar is not None:
import warnings
if badchar_strict:
if validate:
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
f'with altchars={altchars!r} and validate=True '
f'will be an error in future Python versions',
DeprecationWarning, stacklevel=2)
else:
ignorechars = bytes(ignorechars)
if ignorechars:
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
f'with altchars={altchars!r} '
f'and ignorechars={ignorechars!r} '
f'will be discarded in future Python versions',
FutureWarning, stacklevel=2)
else:
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
f'with altchars={altchars!r} and validate=False '
f'will be discarded in future Python versions',
FutureWarning, stacklevel=2)
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
f'with altchars={altchars!r} and validate=False '
f'will be discarded in future Python versions',
FutureWarning, stacklevel=2)
return result

View file

@ -291,6 +291,7 @@ def test_b64decode_altchars(self):
eq(base64.b64decode(data_str, altchars=altchars), res)
eq(base64.b64decode(data, altchars=altchars_str), res)
eq(base64.b64decode(data_str, altchars=altchars_str), res)
eq(base64.b64decode(data, altchars=altchars, ignorechars=b'\n'), res)
self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+')
self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+/-')
@ -370,26 +371,28 @@ def test_b64decode_invalid_chars(self):
self.assertEqual(r, b'\xff\xff\xff')
self.assertEqual(str(cm.warning),
error % ('/', "altchars=b'-_' and validate=True"))
with self.assertWarns(FutureWarning) as cm:
r = base64.b64decode(b'++++', altchars=b'-_', ignorechars=b'+')
self.assertEqual(r, b'\xfb\xef\xbe')
self.assertEqual(str(cm.warning),
discarded % ('+', "altchars=b'-_' and ignorechars=b'+'"))
with self.assertWarns(FutureWarning) as cm:
r = base64.b64decode(b'////', altchars=b'-_', ignorechars=b'/')
self.assertEqual(r, b'\xff\xff\xff')
self.assertEqual(str(cm.warning),
discarded % ('/', "altchars=b'-_' and ignorechars=b'/'"))
with self.assertWarns(DeprecationWarning) as cm:
r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'+')
r = base64.b64decode(b'++++', altchars=b'-_', ignorechars=b'+')
self.assertEqual(r, b'')
r = base64.b64decode(b'////', altchars=b'-_', ignorechars=b'/')
self.assertEqual(r, b'')
r = base64.b64decode(b'++++////', altchars=b'-_', validate=False, ignorechars=b'')
self.assertEqual(r, b'')
with self.assertRaisesRegex(binascii.Error, 'Only base64 data is allowed'):
base64.b64decode(b'////', altchars=b'-_', ignorechars=b'')
with self.assertRaisesRegex(binascii.Error, 'Only base64 data is allowed'):
base64.b64decode(b'++++', altchars=b'-_', ignorechars=b'')
r = base64.b64decode(b'++++YWJj----____', altchars=b'-_', ignorechars=b'+')
self.assertEqual(r, b'abc\xfb\xef\xbe\xff\xff\xff')
r = base64.b64decode(b'////YWJj----____', altchars=b'-_', ignorechars=b'/')
self.assertEqual(r, b'abc\xfb\xef\xbe\xff\xff\xff')
r = base64.b64decode(b'++++,,,,', altchars=b'+,', ignorechars=b'+')
self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
self.assertEqual(str(cm.warning),
error % ('/', "altchars=b'-_' and validate=True"))
with self.assertWarns(DeprecationWarning) as cm:
r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'/')
r = base64.b64decode(b'////YWJj++++,,,,', altchars=b'+,', ignorechars=b'/')
self.assertEqual(r, b'abc\xfb\xef\xbe\xff\xff\xff')
r = base64.b64decode(b'----////', altchars=b'-/', ignorechars=b'/')
self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
self.assertEqual(str(cm.warning),
error % ('+', "altchars=b'-_' and validate=True"))
r = base64.b64decode(b'++++YWJj----////', altchars=b'-/', ignorechars=b'+')
self.assertEqual(r, b'abc\xfb\xef\xbe\xff\xff\xff')
with self.assertWarns(FutureWarning) as cm:
self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'\xfb\xef\xbe')