mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	[3.13] gh-101828: Fix jisx0213 codecs removing null characters (gh-139340) (gh-140112)
				
					
				
			* [3.13] gh-101828: Fix `jisx0213` codecs removing null characters (gh-139340)
(cherry picked from commit 87eadce3e0)
Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
* Accidentally removed line
			
			
This commit is contained in:
		
							parent
							
								
									2f27098b42
								
							
						
					
					
						commit
						c5ec267311
					
				
					 4 changed files with 34 additions and 6 deletions
				
			
		|  | @ -282,6 +282,23 @@ def test_incrementalencoder_del_segfault(self): | ||||||
|         with self.assertRaises(AttributeError): |         with self.assertRaises(AttributeError): | ||||||
|             del e.errors |             del e.errors | ||||||
| 
 | 
 | ||||||
|  |     def test_null_terminator(self): | ||||||
|  |         # see gh-101828 | ||||||
|  |         text = "フルーツ" | ||||||
|  |         try: | ||||||
|  |             text.encode(self.encoding) | ||||||
|  |         except UnicodeEncodeError: | ||||||
|  |             text = "Python is cool" | ||||||
|  |         encode_w_null = (text + "\0").encode(self.encoding) | ||||||
|  |         encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding) | ||||||
|  |         self.assertTrue(encode_w_null.endswith(b'\x00')) | ||||||
|  |         self.assertEqual(encode_w_null, encode_plus_null) | ||||||
|  | 
 | ||||||
|  |         encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding) | ||||||
|  |         encode_plus_null_2 = encode_plus_null + encode_plus_null | ||||||
|  |         self.assertEqual(encode_w_null_2.count(b'\x00'), 2) | ||||||
|  |         self.assertEqual(encode_w_null_2, encode_plus_null_2) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class TestBase_Mapping(unittest.TestCase): | class TestBase_Mapping(unittest.TestCase): | ||||||
|     pass_enctest = [] |     pass_enctest = [] | ||||||
|  |  | ||||||
|  | @ -0,0 +1,3 @@ | ||||||
|  | Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and | ||||||
|  | ``'euc_jis_2004'`` codecs truncating null chars | ||||||
|  | as they were treated as part of multi-character sequences. | ||||||
|  | @ -802,10 +802,13 @@ jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data, | ||||||
|         return coded; |         return coded; | ||||||
| 
 | 
 | ||||||
|     case 2: /* second character of unicode pair */ |     case 2: /* second character of unicode pair */ | ||||||
|  |         if (data[1] != 0) { /* Don't consume null char as part of pair */ | ||||||
|             coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], |             coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], | ||||||
|                                     jisx0213_pair_encmap, JISX0213_ENCPAIRS); |                                     jisx0213_pair_encmap, JISX0213_ENCPAIRS); | ||||||
|         if (coded != DBCINV) |             if (coded != DBCINV) { | ||||||
|                 return coded; |                 return coded; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|         /* fall through */ |         /* fall through */ | ||||||
| 
 | 
 | ||||||
|     case -1: /* flush unterminated */ |     case -1: /* flush unterminated */ | ||||||
|  |  | ||||||
|  | @ -192,11 +192,14 @@ ENCODER(euc_jis_2004) | ||||||
|                                 JISX0213_ENCPAIRS); |                                 JISX0213_ENCPAIRS); | ||||||
|                             if (code == DBCINV) |                             if (code == DBCINV) | ||||||
|                                 return 1; |                                 return 1; | ||||||
|                         } else |                         } | ||||||
|  |                         else if (c2 != 0) { | ||||||
|  |                             /* Don't consume null char as part of pair */ | ||||||
|                             insize = 2; |                             insize = 2; | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |             } | ||||||
|             else if (TRYMAP_ENC(jisxcommon, code, c)) |             else if (TRYMAP_ENC(jisxcommon, code, c)) | ||||||
|                 ; |                 ; | ||||||
|             else if (c >= 0xff61 && c <= 0xff9f) { |             else if (c >= 0xff61 && c <= 0xff9f) { | ||||||
|  | @ -611,11 +614,13 @@ ENCODER(shift_jis_2004) | ||||||
|                             if (code == DBCINV) |                             if (code == DBCINV) | ||||||
|                                 return 1; |                                 return 1; | ||||||
|                             } |                             } | ||||||
|                             else |                             else if (ch2 != 0) { | ||||||
|  |                                 /* Don't consume null char as part of pair */ | ||||||
|                                 insize = 2; |                                 insize = 2; | ||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|  |                 } | ||||||
|                 else if (TRYMAP_ENC(jisxcommon, code, c)) { |                 else if (TRYMAP_ENC(jisxcommon, code, c)) { | ||||||
|                     /* abandon JIS X 0212 codes */ |                     /* abandon JIS X 0212 codes */ | ||||||
|                     if (code & 0x8000) |                     if (code & 0x8000) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Stan Ulbrych
						Stan Ulbrych