mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	gh-119614: Fix truncation of strings with embedded null characters in Tkinter (GH-120909)
Now the null character is always represented as \xc0\x80 for Tcl_NewStringObj().
This commit is contained in:
		
							parent
							
								
									fc297b4ba4
								
							
						
					
					
						commit
						c38e2f64d0
					
				
					 4 changed files with 68 additions and 7 deletions
				
			
		|  | @ -73,6 +73,18 @@ def testCall(self): | ||||||
|         tcl.call('set','a','1') |         tcl.call('set','a','1') | ||||||
|         self.assertEqual(tcl.call('set','a'),'1') |         self.assertEqual(tcl.call('set','a'),'1') | ||||||
| 
 | 
 | ||||||
|  |     def test_call_passing_null(self): | ||||||
|  |         tcl = self.interp | ||||||
|  |         tcl.call('set', 'a', 'a\0b')  # ASCII-only | ||||||
|  |         self.assertEqual(tcl.getvar('a'), 'a\x00b') | ||||||
|  |         self.assertEqual(tcl.call('set', 'a'), 'a\x00b') | ||||||
|  |         self.assertEqual(tcl.eval('set a'), 'a\x00b') | ||||||
|  | 
 | ||||||
|  |         tcl.call('set', 'a', '\u20ac\0')  # non-ASCII | ||||||
|  |         self.assertEqual(tcl.getvar('a'), '\u20ac\x00') | ||||||
|  |         self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00') | ||||||
|  |         self.assertEqual(tcl.eval('set a'), '\u20ac\x00') | ||||||
|  | 
 | ||||||
|     def testCallException(self): |     def testCallException(self): | ||||||
|         tcl = self.interp |         tcl = self.interp | ||||||
|         self.assertRaises(TclError,tcl.call,'set','a') |         self.assertRaises(TclError,tcl.call,'set','a') | ||||||
|  | @ -98,6 +110,18 @@ def testSetVar(self): | ||||||
|         tcl.setvar('a','1') |         tcl.setvar('a','1') | ||||||
|         self.assertEqual(tcl.eval('set a'),'1') |         self.assertEqual(tcl.eval('set a'),'1') | ||||||
| 
 | 
 | ||||||
|  |     def test_setvar_passing_null(self): | ||||||
|  |         tcl = self.interp | ||||||
|  |         tcl.setvar('a', 'a\0b')  # ASCII-only | ||||||
|  |         self.assertEqual(tcl.getvar('a'), 'a\x00b') | ||||||
|  |         self.assertEqual(tcl.call('set', 'a'), 'a\x00b') | ||||||
|  |         self.assertEqual(tcl.eval('set a'), 'a\x00b') | ||||||
|  | 
 | ||||||
|  |         tcl.setvar('a', '\u20ac\0')  # non-ASCII | ||||||
|  |         self.assertEqual(tcl.getvar('a'), '\u20ac\x00') | ||||||
|  |         self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00') | ||||||
|  |         self.assertEqual(tcl.eval('set a'), '\u20ac\x00') | ||||||
|  | 
 | ||||||
|     def testSetVarArray(self): |     def testSetVarArray(self): | ||||||
|         tcl = self.interp |         tcl = self.interp | ||||||
|         tcl.setvar('a(1)','1') |         tcl.setvar('a(1)','1') | ||||||
|  |  | ||||||
|  | @ -476,6 +476,15 @@ def test_info_patchlevel(self): | ||||||
|             self.assertEqual(vi.micro, 0) |             self.assertEqual(vi.micro, 0) | ||||||
|         self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}')) |         self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}')) | ||||||
| 
 | 
 | ||||||
|  |     def test_embedded_null(self): | ||||||
|  |         widget = tkinter.Entry(self.root) | ||||||
|  |         widget.insert(0, 'abc\0def')  # ASCII-only | ||||||
|  |         widget.selection_range(0, 'end') | ||||||
|  |         self.assertEqual(widget.selection_get(), 'abc\x00def') | ||||||
|  |         widget.insert(0, '\u20ac\0')  # non-ASCII | ||||||
|  |         widget.selection_range(0, 'end') | ||||||
|  |         self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def') | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class WmTest(AbstractTkTest, unittest.TestCase): | class WmTest(AbstractTkTest, unittest.TestCase): | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -0,0 +1,2 @@ | ||||||
|  | Fix truncation of strings with embedded null characters in some internal | ||||||
|  | operations in :mod:`tkinter`. | ||||||
|  | @ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value) | ||||||
|         else |         else | ||||||
|             Py_UNREACHABLE(); |             Py_UNREACHABLE(); | ||||||
|     } |     } | ||||||
| #endif | #endif /* USE_TCL_UNICODE */ | ||||||
|     const char *s = Tcl_GetStringFromObj(value, &len); |     const char *s = Tcl_GetStringFromObj(value, &len); | ||||||
|     return unicodeFromTclStringAndSize(s, len); |     return unicodeFromTclStringAndSize(s, len); | ||||||
| } | } | ||||||
|  | @ -1018,7 +1018,9 @@ AsObj(PyObject *value) | ||||||
|             PyErr_SetString(PyExc_OverflowError, "string is too long"); |             PyErr_SetString(PyExc_OverflowError, "string is too long"); | ||||||
|             return NULL; |             return NULL; | ||||||
|         } |         } | ||||||
|         if (PyUnicode_IS_ASCII(value)) { |         if (PyUnicode_IS_ASCII(value) && | ||||||
|  |             strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value)) | ||||||
|  |         { | ||||||
|             return Tcl_NewStringObj((const char *)PyUnicode_DATA(value), |             return Tcl_NewStringObj((const char *)PyUnicode_DATA(value), | ||||||
|                                     (int)size); |                                     (int)size); | ||||||
|         } |         } | ||||||
|  | @ -1033,9 +1035,6 @@ AsObj(PyObject *value) | ||||||
|                     "surrogatepass", NATIVE_BYTEORDER); |                     "surrogatepass", NATIVE_BYTEORDER); | ||||||
|         else |         else | ||||||
|             Py_UNREACHABLE(); |             Py_UNREACHABLE(); | ||||||
| #else |  | ||||||
|         encoded = _PyUnicode_AsUTF8String(value, "surrogateescape"); |  | ||||||
| #endif |  | ||||||
|         if (!encoded) { |         if (!encoded) { | ||||||
|             return NULL; |             return NULL; | ||||||
|         } |         } | ||||||
|  | @ -1045,12 +1044,39 @@ AsObj(PyObject *value) | ||||||
|             PyErr_SetString(PyExc_OverflowError, "string is too long"); |             PyErr_SetString(PyExc_OverflowError, "string is too long"); | ||||||
|             return NULL; |             return NULL; | ||||||
|         } |         } | ||||||
| #if USE_TCL_UNICODE |  | ||||||
|         result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded), |         result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded), | ||||||
|                                    (int)(size / sizeof(Tcl_UniChar))); |                                    (int)(size / sizeof(Tcl_UniChar))); | ||||||
| #else | #else | ||||||
|  |         encoded = _PyUnicode_AsUTF8String(value, "surrogateescape"); | ||||||
|  |         if (!encoded) { | ||||||
|  |             return NULL; | ||||||
|  |         } | ||||||
|  |         size = PyBytes_GET_SIZE(encoded); | ||||||
|  |         if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) { | ||||||
|  |             /* The string contains embedded null characters.
 | ||||||
|  |              * Tcl needs a null character to be represented as \xc0\x80 in | ||||||
|  |              * the Modified UTF-8 encoding.  Otherwise the string can be | ||||||
|  |              * truncated in some internal operations. | ||||||
|  |              * | ||||||
|  |              * NOTE: stringlib_replace() could be used here, but optimizing | ||||||
|  |              * this obscure case isn't worth it unless stringlib_replace() | ||||||
|  |              * was already exposed in the C API for other reasons. */ | ||||||
|  |             Py_SETREF(encoded, | ||||||
|  |                       PyObject_CallMethod(encoded, "replace", "y#y#", | ||||||
|  |                                           "\0", (Py_ssize_t)1, | ||||||
|  |                                           "\xc0\x80", (Py_ssize_t)2)); | ||||||
|  |             if (!encoded) { | ||||||
|  |                 return NULL; | ||||||
|  |             } | ||||||
|  |             size = PyBytes_GET_SIZE(encoded); | ||||||
|  |         } | ||||||
|  |         if (size > INT_MAX) { | ||||||
|  |             Py_DECREF(encoded); | ||||||
|  |             PyErr_SetString(PyExc_OverflowError, "string is too long"); | ||||||
|  |             return NULL; | ||||||
|  |         } | ||||||
|         result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size); |         result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size); | ||||||
| #endif | #endif /* USE_TCL_UNICODE */ | ||||||
|         Py_DECREF(encoded); |         Py_DECREF(encoded); | ||||||
|         return result; |         return result; | ||||||
|     } |     } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Serhiy Storchaka
						Serhiy Storchaka