mirror of
https://github.com/python/cpython.git
synced 2025-10-19 07:53:46 +00:00
[3.13] gh-101828: Fix jisx0213
codecs removing null characters (gh-139340) (gh-140112)
* [3.13] gh-101828: Fix `jisx0213` codecs removing null characters (gh-139340)
(cherry picked from commit 87eadce3e0
)
Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
* Accidentally removed line
This commit is contained in:
parent
2f27098b42
commit
c5ec267311
4 changed files with 34 additions and 6 deletions
|
@ -282,6 +282,23 @@ def test_incrementalencoder_del_segfault(self):
|
|||
with self.assertRaises(AttributeError):
|
||||
del e.errors
|
||||
|
||||
def test_null_terminator(self):
|
||||
# see gh-101828
|
||||
text = "フルーツ"
|
||||
try:
|
||||
text.encode(self.encoding)
|
||||
except UnicodeEncodeError:
|
||||
text = "Python is cool"
|
||||
encode_w_null = (text + "\0").encode(self.encoding)
|
||||
encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
|
||||
self.assertTrue(encode_w_null.endswith(b'\x00'))
|
||||
self.assertEqual(encode_w_null, encode_plus_null)
|
||||
|
||||
encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding)
|
||||
encode_plus_null_2 = encode_plus_null + encode_plus_null
|
||||
self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
|
||||
self.assertEqual(encode_w_null_2, encode_plus_null_2)
|
||||
|
||||
|
||||
class TestBase_Mapping(unittest.TestCase):
|
||||
pass_enctest = []
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and
|
||||
``'euc_jis_2004'`` codecs truncating null chars
|
||||
as they were treated as part of multi-character sequences.
|
|
@ -802,10 +802,13 @@ jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
|
|||
return coded;
|
||||
|
||||
case 2: /* second character of unicode pair */
|
||||
coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
|
||||
jisx0213_pair_encmap, JISX0213_ENCPAIRS);
|
||||
if (coded != DBCINV)
|
||||
return coded;
|
||||
if (data[1] != 0) { /* Don't consume null char as part of pair */
|
||||
coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
|
||||
jisx0213_pair_encmap, JISX0213_ENCPAIRS);
|
||||
if (coded != DBCINV) {
|
||||
return coded;
|
||||
}
|
||||
}
|
||||
/* fall through */
|
||||
|
||||
case -1: /* flush unterminated */
|
||||
|
|
|
@ -192,8 +192,11 @@ ENCODER(euc_jis_2004)
|
|||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
}
|
||||
else if (c2 != 0) {
|
||||
/* Don't consume null char as part of pair */
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -611,8 +614,10 @@ ENCODER(shift_jis_2004)
|
|||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
else if (ch2 != 0) {
|
||||
/* Don't consume null char as part of pair */
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue