mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
This commit is contained in:
		
							parent
							
								
									437e6a3b15
								
							
						
					
					
						commit
						6e39080649
					
				
					 12 changed files with 999 additions and 2 deletions
				
			
		|  | @ -244,6 +244,137 @@ def test_bug1098990_b(self): | |||
|         self.assertEqual(reader.readline(), s5) | ||||
|         self.assertEqual(reader.readline(), u"") | ||||
| 
 | ||||
| class UTF32Test(ReadTest): | ||||
|     encoding = "utf-32" | ||||
| 
 | ||||
|     spamle = ('\xff\xfe\x00\x00' | ||||
|               's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00' | ||||
|               's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00') | ||||
|     spambe = ('\x00\x00\xfe\xff' | ||||
|               '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m' | ||||
|               '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m') | ||||
| 
 | ||||
|     def test_only_one_bom(self): | ||||
|         _,_,reader,writer = codecs.lookup(self.encoding) | ||||
|         # encode some stream | ||||
|         s = StringIO.StringIO() | ||||
|         f = writer(s) | ||||
|         f.write(u"spam") | ||||
|         f.write(u"spam") | ||||
|         d = s.getvalue() | ||||
|         # check whether there is exactly one BOM in it | ||||
|         self.assert_(d == self.spamle or d == self.spambe) | ||||
|         # try to read it back | ||||
|         s = StringIO.StringIO(d) | ||||
|         f = reader(s) | ||||
|         self.assertEquals(f.read(), u"spamspam") | ||||
| 
 | ||||
|     def test_badbom(self): | ||||
|         s = StringIO.StringIO(4*"\xff") | ||||
|         f = codecs.getreader(self.encoding)(s) | ||||
|         self.assertRaises(UnicodeError, f.read) | ||||
| 
 | ||||
|         s = StringIO.StringIO(8*"\xff") | ||||
|         f = codecs.getreader(self.encoding)(s) | ||||
|         self.assertRaises(UnicodeError, f.read) | ||||
| 
 | ||||
|     def test_partial(self): | ||||
|         self.check_partial( | ||||
|             u"\x00\xff\u0100\uffff", | ||||
|             [ | ||||
|                 u"", # first byte of BOM read | ||||
|                 u"", # second byte of BOM read | ||||
|                 u"", # third byte of BOM read | ||||
|                 u"", # fourth byte of BOM read => byteorder known | ||||
|                 u"", | ||||
|                 u"", | ||||
|                 u"", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100\uffff", | ||||
|             ] | ||||
|         ) | ||||
| 
 | ||||
|     def test_errors(self): | ||||
|         self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode, | ||||
|                           "\xff", "strict", True) | ||||
| 
 | ||||
| class UTF32LETest(ReadTest): | ||||
|     encoding = "utf-32-le" | ||||
| 
 | ||||
|     def test_partial(self): | ||||
|         self.check_partial( | ||||
|             u"\x00\xff\u0100\uffff", | ||||
|             [ | ||||
|                 u"", | ||||
|                 u"", | ||||
|                 u"", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100\uffff", | ||||
|             ] | ||||
|         ) | ||||
| 
 | ||||
|     def test_simple(self): | ||||
|         self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00") | ||||
| 
 | ||||
|     def test_errors(self): | ||||
|         self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode, | ||||
|                           "\xff", "strict", True) | ||||
| 
 | ||||
| class UTF32BETest(ReadTest): | ||||
|     encoding = "utf-32-be" | ||||
| 
 | ||||
|     def test_partial(self): | ||||
|         self.check_partial( | ||||
|             u"\x00\xff\u0100\uffff", | ||||
|             [ | ||||
|                 u"", | ||||
|                 u"", | ||||
|                 u"", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100", | ||||
|                 u"\x00\xff\u0100\uffff", | ||||
|             ] | ||||
|         ) | ||||
| 
 | ||||
|     def test_simple(self): | ||||
|         self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03") | ||||
| 
 | ||||
|     def test_errors(self): | ||||
|         self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode, | ||||
|                           "\xff", "strict", True) | ||||
| 
 | ||||
| class UTF16Test(ReadTest): | ||||
|     encoding = "utf-16" | ||||
| 
 | ||||
|  | @ -1278,6 +1409,9 @@ def test_streamreaderwriter(self): | |||
| 
 | ||||
| def test_main(): | ||||
|     test_support.run_unittest( | ||||
|         UTF32Test, | ||||
|         UTF32LETest, | ||||
|         UTF32BETest, | ||||
|         UTF16Test, | ||||
|         UTF16LETest, | ||||
|         UTF16BETest, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Walter Dörwald
						Walter Dörwald