mirror of
https://github.com/python/cpython.git
synced 2025-10-19 16:03:42 +00:00
gh-101828: Fix jisx0213
codecs removing null characters (gh-139340)
This commit is contained in:
parent
ded59f7e8e
commit
87eadce3e0
4 changed files with 34 additions and 6 deletions
|
@ -282,6 +282,23 @@ def test_incrementalencoder_del_segfault(self):
|
||||||
with self.assertRaises(AttributeError):
|
with self.assertRaises(AttributeError):
|
||||||
del e.errors
|
del e.errors
|
||||||
|
|
||||||
|
def test_null_terminator(self):
|
||||||
|
# see gh-101828
|
||||||
|
text = "フルーツ"
|
||||||
|
try:
|
||||||
|
text.encode(self.encoding)
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
text = "Python is cool"
|
||||||
|
encode_w_null = (text + "\0").encode(self.encoding)
|
||||||
|
encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
|
||||||
|
self.assertTrue(encode_w_null.endswith(b'\x00'))
|
||||||
|
self.assertEqual(encode_w_null, encode_plus_null)
|
||||||
|
|
||||||
|
encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding)
|
||||||
|
encode_plus_null_2 = encode_plus_null + encode_plus_null
|
||||||
|
self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
|
||||||
|
self.assertEqual(encode_w_null_2, encode_plus_null_2)
|
||||||
|
|
||||||
|
|
||||||
class TestBase_Mapping(unittest.TestCase):
|
class TestBase_Mapping(unittest.TestCase):
|
||||||
pass_enctest = []
|
pass_enctest = []
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and
|
||||||
|
``'euc_jis_2004'`` codecs truncating null chars
|
||||||
|
as they were treated as part of multi-character sequences.
|
|
@ -802,10 +802,13 @@ jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
|
||||||
return coded;
|
return coded;
|
||||||
|
|
||||||
case 2: /* second character of unicode pair */
|
case 2: /* second character of unicode pair */
|
||||||
coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
|
if (data[1] != 0) { /* Don't consume null char as part of pair */
|
||||||
jisx0213_pair_encmap, JISX0213_ENCPAIRS);
|
coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
|
||||||
if (coded != DBCINV)
|
jisx0213_pair_encmap, JISX0213_ENCPAIRS);
|
||||||
return coded;
|
if (coded != DBCINV) {
|
||||||
|
return coded;
|
||||||
|
}
|
||||||
|
}
|
||||||
_Py_FALLTHROUGH;
|
_Py_FALLTHROUGH;
|
||||||
|
|
||||||
case -1: /* flush unterminated */
|
case -1: /* flush unterminated */
|
||||||
|
|
|
@ -192,8 +192,11 @@ ENCODER(euc_jis_2004)
|
||||||
JISX0213_ENCPAIRS);
|
JISX0213_ENCPAIRS);
|
||||||
if (code == DBCINV)
|
if (code == DBCINV)
|
||||||
return 1;
|
return 1;
|
||||||
} else
|
}
|
||||||
|
else if (c2 != 0) {
|
||||||
|
/* Don't consume null char as part of pair */
|
||||||
insize = 2;
|
insize = 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -611,8 +614,10 @@ ENCODER(shift_jis_2004)
|
||||||
if (code == DBCINV)
|
if (code == DBCINV)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else
|
else if (ch2 != 0) {
|
||||||
|
/* Don't consume null char as part of pair */
|
||||||
insize = 2;
|
insize = 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue