mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	gh-119614: Fix truncation of strings with embedded null characters in Tkinter (GH-120909)
Now the null character is always represented as \xc0\x80 for Tcl_NewStringObj().
This commit is contained in:
		
							parent
							
								
									fc297b4ba4
								
							
						
					
					
						commit
						c38e2f64d0
					
				
					 4 changed files with 68 additions and 7 deletions
				
			
		|  | @ -73,6 +73,18 @@ def testCall(self): | |||
|         tcl.call('set','a','1') | ||||
|         self.assertEqual(tcl.call('set','a'),'1') | ||||
| 
 | ||||
|     def test_call_passing_null(self): | ||||
|         tcl = self.interp | ||||
|         tcl.call('set', 'a', 'a\0b')  # ASCII-only | ||||
|         self.assertEqual(tcl.getvar('a'), 'a\x00b') | ||||
|         self.assertEqual(tcl.call('set', 'a'), 'a\x00b') | ||||
|         self.assertEqual(tcl.eval('set a'), 'a\x00b') | ||||
| 
 | ||||
|         tcl.call('set', 'a', '\u20ac\0')  # non-ASCII | ||||
|         self.assertEqual(tcl.getvar('a'), '\u20ac\x00') | ||||
|         self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00') | ||||
|         self.assertEqual(tcl.eval('set a'), '\u20ac\x00') | ||||
| 
 | ||||
|     def testCallException(self): | ||||
|         tcl = self.interp | ||||
|         self.assertRaises(TclError,tcl.call,'set','a') | ||||
|  | @ -98,6 +110,18 @@ def testSetVar(self): | |||
|         tcl.setvar('a','1') | ||||
|         self.assertEqual(tcl.eval('set a'),'1') | ||||
| 
 | ||||
|     def test_setvar_passing_null(self): | ||||
|         tcl = self.interp | ||||
|         tcl.setvar('a', 'a\0b')  # ASCII-only | ||||
|         self.assertEqual(tcl.getvar('a'), 'a\x00b') | ||||
|         self.assertEqual(tcl.call('set', 'a'), 'a\x00b') | ||||
|         self.assertEqual(tcl.eval('set a'), 'a\x00b') | ||||
| 
 | ||||
|         tcl.setvar('a', '\u20ac\0')  # non-ASCII | ||||
|         self.assertEqual(tcl.getvar('a'), '\u20ac\x00') | ||||
|         self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00') | ||||
|         self.assertEqual(tcl.eval('set a'), '\u20ac\x00') | ||||
| 
 | ||||
|     def testSetVarArray(self): | ||||
|         tcl = self.interp | ||||
|         tcl.setvar('a(1)','1') | ||||
|  |  | |||
|  | @ -476,6 +476,15 @@ def test_info_patchlevel(self): | |||
|             self.assertEqual(vi.micro, 0) | ||||
|         self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}')) | ||||
| 
 | ||||
|     def test_embedded_null(self): | ||||
|         widget = tkinter.Entry(self.root) | ||||
|         widget.insert(0, 'abc\0def')  # ASCII-only | ||||
|         widget.selection_range(0, 'end') | ||||
|         self.assertEqual(widget.selection_get(), 'abc\x00def') | ||||
|         widget.insert(0, '\u20ac\0')  # non-ASCII | ||||
|         widget.selection_range(0, 'end') | ||||
|         self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def') | ||||
| 
 | ||||
| 
 | ||||
| class WmTest(AbstractTkTest, unittest.TestCase): | ||||
| 
 | ||||
|  |  | |||
|  | @ -0,0 +1,2 @@ | |||
| Fix truncation of strings with embedded null characters in some internal | ||||
| operations in :mod:`tkinter`. | ||||
|  | @ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value) | |||
|         else | ||||
|             Py_UNREACHABLE(); | ||||
|     } | ||||
| #endif | ||||
| #endif /* USE_TCL_UNICODE */ | ||||
|     const char *s = Tcl_GetStringFromObj(value, &len); | ||||
|     return unicodeFromTclStringAndSize(s, len); | ||||
| } | ||||
|  | @ -1018,7 +1018,9 @@ AsObj(PyObject *value) | |||
|             PyErr_SetString(PyExc_OverflowError, "string is too long"); | ||||
|             return NULL; | ||||
|         } | ||||
|         if (PyUnicode_IS_ASCII(value)) { | ||||
|         if (PyUnicode_IS_ASCII(value) && | ||||
|             strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value)) | ||||
|         { | ||||
|             return Tcl_NewStringObj((const char *)PyUnicode_DATA(value), | ||||
|                                     (int)size); | ||||
|         } | ||||
|  | @ -1033,9 +1035,6 @@ AsObj(PyObject *value) | |||
|                     "surrogatepass", NATIVE_BYTEORDER); | ||||
|         else | ||||
|             Py_UNREACHABLE(); | ||||
| #else | ||||
|         encoded = _PyUnicode_AsUTF8String(value, "surrogateescape"); | ||||
| #endif | ||||
|         if (!encoded) { | ||||
|             return NULL; | ||||
|         } | ||||
|  | @ -1045,12 +1044,39 @@ AsObj(PyObject *value) | |||
|             PyErr_SetString(PyExc_OverflowError, "string is too long"); | ||||
|             return NULL; | ||||
|         } | ||||
| #if USE_TCL_UNICODE | ||||
|         result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded), | ||||
|                                    (int)(size / sizeof(Tcl_UniChar))); | ||||
| #else | ||||
|         encoded = _PyUnicode_AsUTF8String(value, "surrogateescape"); | ||||
|         if (!encoded) { | ||||
|             return NULL; | ||||
|         } | ||||
|         size = PyBytes_GET_SIZE(encoded); | ||||
|         if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) { | ||||
|             /* The string contains embedded null characters.
 | ||||
|              * Tcl needs a null character to be represented as \xc0\x80 in | ||||
|              * the Modified UTF-8 encoding.  Otherwise the string can be | ||||
|              * truncated in some internal operations. | ||||
|              * | ||||
|              * NOTE: stringlib_replace() could be used here, but optimizing | ||||
|              * this obscure case isn't worth it unless stringlib_replace() | ||||
|              * was already exposed in the C API for other reasons. */ | ||||
|             Py_SETREF(encoded, | ||||
|                       PyObject_CallMethod(encoded, "replace", "y#y#", | ||||
|                                           "\0", (Py_ssize_t)1, | ||||
|                                           "\xc0\x80", (Py_ssize_t)2)); | ||||
|             if (!encoded) { | ||||
|                 return NULL; | ||||
|             } | ||||
|             size = PyBytes_GET_SIZE(encoded); | ||||
|         } | ||||
|         if (size > INT_MAX) { | ||||
|             Py_DECREF(encoded); | ||||
|             PyErr_SetString(PyExc_OverflowError, "string is too long"); | ||||
|             return NULL; | ||||
|         } | ||||
|         result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size); | ||||
| #endif | ||||
| #endif /* USE_TCL_UNICODE */ | ||||
|         Py_DECREF(encoded); | ||||
|         return result; | ||||
|     } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Serhiy Storchaka
						Serhiy Storchaka