mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	bpo-37596: Make set and frozenset marshalling deterministic (GH-27926)
				
					
				
			This commit is contained in:
		
							parent
							
								
									7ecd3425d4
								
							
						
					
					
						commit
						33d95c6fac
					
				
					 3 changed files with 59 additions and 0 deletions
				
			
		|  | @ -344,6 +344,31 @@ def test_eof(self): | ||||||
|         for i in range(len(data)): |         for i in range(len(data)): | ||||||
|             self.assertRaises(EOFError, marshal.loads, data[0: i]) |             self.assertRaises(EOFError, marshal.loads, data[0: i]) | ||||||
| 
 | 
 | ||||||
|  |     def test_deterministic_sets(self): | ||||||
|  |         # bpo-37596: To support reproducible builds, sets and frozensets need to | ||||||
|  |         # have their elements serialized in a consistent order (even when they | ||||||
|  |         # have been scrambled by hash randomization): | ||||||
|  |         for kind in ("set", "frozenset"): | ||||||
|  |             for elements in ( | ||||||
|  |                 "float('nan'), b'a', b'b', b'c', 'x', 'y', 'z'", | ||||||
|  |                 # Also test for bad interactions with backreferencing: | ||||||
|  |                 "('string', 1), ('string', 2), ('string', 3)", | ||||||
|  |             ): | ||||||
|  |                 s = f"{kind}([{elements}])" | ||||||
|  |                 with self.subTest(s): | ||||||
|  |                     # First, make sure that our test case still has different | ||||||
|  |                     # orders under hash seeds 0 and 1. If this check fails, we | ||||||
|  |                     # need to update this test with different elements: | ||||||
|  |                     args = ["-c", f"print({s})"] | ||||||
|  |                     _, repr_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0") | ||||||
|  |                     _, repr_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") | ||||||
|  |                     self.assertNotEqual(repr_0, repr_1) | ||||||
|  |                     # Then, perform the actual test: | ||||||
|  |                     args = ["-c", f"import marshal; print(marshal.dumps({s}))"] | ||||||
|  |                     _, dump_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0") | ||||||
|  |                     _, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") | ||||||
|  |                     self.assertEqual(dump_0, dump_1) | ||||||
|  | 
 | ||||||
| LARGE_SIZE = 2**31 | LARGE_SIZE = 2**31 | ||||||
| pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 | pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -0,0 +1,2 @@ | ||||||
|  | Ensure that :class:`set` and :class:`frozenset` objects are always | ||||||
|  | :mod:`marshalled <marshal>` reproducibly. | ||||||
|  | @ -503,9 +503,41 @@ w_complex_object(PyObject *v, char flag, WFILE *p) | ||||||
|             W_TYPE(TYPE_SET, p); |             W_TYPE(TYPE_SET, p); | ||||||
|         n = PySet_GET_SIZE(v); |         n = PySet_GET_SIZE(v); | ||||||
|         W_SIZE(n, p); |         W_SIZE(n, p); | ||||||
|  |         // bpo-37596: To support reproducible builds, sets and frozensets need
 | ||||||
|  |         // to have their elements serialized in a consistent order (even when
 | ||||||
|  |         // they have been scrambled by hash randomization). To ensure this, we
 | ||||||
|  |         // use an order equivalent to sorted(v, key=marshal.dumps):
 | ||||||
|  |         PyObject *pairs = PyList_New(0); | ||||||
|  |         if (pairs == NULL) { | ||||||
|  |             p->error = WFERR_NOMEMORY; | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|         while (_PySet_NextEntry(v, &pos, &value, &hash)) { |         while (_PySet_NextEntry(v, &pos, &value, &hash)) { | ||||||
|  |             PyObject *dump = PyMarshal_WriteObjectToString(value, p->version); | ||||||
|  |             if (dump == NULL) { | ||||||
|  |                 p->error = WFERR_UNMARSHALLABLE; | ||||||
|  |                 goto anyset_done; | ||||||
|  |             } | ||||||
|  |             PyObject *pair = PyTuple_Pack(2, dump, value); | ||||||
|  |             Py_DECREF(dump); | ||||||
|  |             if (pair == NULL || PyList_Append(pairs, pair)) { | ||||||
|  |                 p->error = WFERR_NOMEMORY; | ||||||
|  |                 Py_XDECREF(pair); | ||||||
|  |                 goto anyset_done; | ||||||
|  |             } | ||||||
|  |             Py_DECREF(pair); | ||||||
|  |         } | ||||||
|  |         if (PyList_Sort(pairs)) { | ||||||
|  |             p->error = WFERR_NOMEMORY; | ||||||
|  |             goto anyset_done; | ||||||
|  |         } | ||||||
|  |         for (Py_ssize_t i = 0; i < n; i++) { | ||||||
|  |             PyObject *pair = PyList_GET_ITEM(pairs, i); | ||||||
|  |             value = PyTuple_GET_ITEM(pair, 1); | ||||||
|             w_object(value, p); |             w_object(value, p); | ||||||
|         } |         } | ||||||
|  |     anyset_done: | ||||||
|  |         Py_DECREF(pairs); | ||||||
|     } |     } | ||||||
|     else if (PyCode_Check(v)) { |     else if (PyCode_Check(v)) { | ||||||
|         PyCodeObject *co = (PyCodeObject *)v; |         PyCodeObject *co = (PyCodeObject *)v; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Brandt Bucher
						Brandt Bucher