[3.13] gh-101828: Fix jisx0213 codecs removing null characters (gh-139340) (gh-140112)

* [3.13] gh-101828: Fix `jisx0213` codecs removing null characters (gh-139340)
(cherry picked from commit 87eadce3e0)

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>

* Accidentally removed line
This commit is contained in:
Stan Ulbrych 2025-10-14 15:48:29 +01:00 committed by GitHub
parent 2f27098b42
commit c5ec267311
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 34 additions and 6 deletions

View file

@ -282,6 +282,23 @@ def test_incrementalencoder_del_segfault(self):
with self.assertRaises(AttributeError):
del e.errors
def test_null_terminator(self):
# see gh-101828
text = "フルーツ"
try:
text.encode(self.encoding)
except UnicodeEncodeError:
text = "Python is cool"
encode_w_null = (text + "\0").encode(self.encoding)
encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
self.assertTrue(encode_w_null.endswith(b'\x00'))
self.assertEqual(encode_w_null, encode_plus_null)
encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding)
encode_plus_null_2 = encode_plus_null + encode_plus_null
self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
self.assertEqual(encode_w_null_2, encode_plus_null_2)
class TestBase_Mapping(unittest.TestCase):
pass_enctest = []

View file

@ -0,0 +1,3 @@
Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and
``'euc_jis_2004'`` codecs truncating null chars
as they were treated as part of multi-character sequences.

View file

@ -802,10 +802,13 @@ jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
return coded;
case 2: /* second character of unicode pair */
if (data[1] != 0) { /* Don't consume null char as part of pair */
coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
jisx0213_pair_encmap, JISX0213_ENCPAIRS);
if (coded != DBCINV)
if (coded != DBCINV) {
return coded;
}
}
/* fall through */
case -1: /* flush unterminated */

View file

@ -192,11 +192,14 @@ ENCODER(euc_jis_2004)
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
}
else if (c2 != 0) {
/* Don't consume null char as part of pair */
insize = 2;
}
}
}
}
else if (TRYMAP_ENC(jisxcommon, code, c))
;
else if (c >= 0xff61 && c <= 0xff9f) {
@ -611,11 +614,13 @@ ENCODER(shift_jis_2004)
if (code == DBCINV)
return 1;
}
else
else if (ch2 != 0) {
/* Don't consume null char as part of pair */
insize = 2;
}
}
}
}
else if (TRYMAP_ENC(jisxcommon, code, c)) {
/* abandon JIS X 0212 codes */
if (code & 0x8000)