mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	gh-120380: fix Python implementation of pickle.Pickler for bytes and bytearray objects in protocol version 5. (GH-120422)
				
					
				
			This commit is contained in:
		
							parent
							
								
									83d3d7aace
								
							
						
					
					
						commit
						7595e6743a
					
				
					 3 changed files with 81 additions and 21 deletions
				
			
		| 
						 | 
					@ -782,14 +782,10 @@ def save_float(self, obj):
 | 
				
			||||||
            self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
 | 
					            self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
 | 
				
			||||||
    dispatch[float] = save_float
 | 
					    dispatch[float] = save_float
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def save_bytes(self, obj):
 | 
					    def _save_bytes_no_memo(self, obj):
 | 
				
			||||||
        if self.proto < 3:
 | 
					        # helper for writing bytes objects for protocol >= 3
 | 
				
			||||||
            if not obj: # bytes object is empty
 | 
					        # without memoizing them
 | 
				
			||||||
                self.save_reduce(bytes, (), obj=obj)
 | 
					        assert self.proto >= 3
 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                self.save_reduce(codecs.encode,
 | 
					 | 
				
			||||||
                                 (str(obj, 'latin1'), 'latin1'), obj=obj)
 | 
					 | 
				
			||||||
            return
 | 
					 | 
				
			||||||
        n = len(obj)
 | 
					        n = len(obj)
 | 
				
			||||||
        if n <= 0xff:
 | 
					        if n <= 0xff:
 | 
				
			||||||
            self.write(SHORT_BINBYTES + pack("<B", n) + obj)
 | 
					            self.write(SHORT_BINBYTES + pack("<B", n) + obj)
 | 
				
			||||||
| 
						 | 
					@ -799,9 +795,29 @@ def save_bytes(self, obj):
 | 
				
			||||||
            self._write_large_bytes(BINBYTES + pack("<I", n), obj)
 | 
					            self._write_large_bytes(BINBYTES + pack("<I", n), obj)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.write(BINBYTES + pack("<I", n) + obj)
 | 
					            self.write(BINBYTES + pack("<I", n) + obj)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def save_bytes(self, obj):
 | 
				
			||||||
 | 
					        if self.proto < 3:
 | 
				
			||||||
 | 
					            if not obj: # bytes object is empty
 | 
				
			||||||
 | 
					                self.save_reduce(bytes, (), obj=obj)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                self.save_reduce(codecs.encode,
 | 
				
			||||||
 | 
					                                 (str(obj, 'latin1'), 'latin1'), obj=obj)
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					        self._save_bytes_no_memo(obj)
 | 
				
			||||||
        self.memoize(obj)
 | 
					        self.memoize(obj)
 | 
				
			||||||
    dispatch[bytes] = save_bytes
 | 
					    dispatch[bytes] = save_bytes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _save_bytearray_no_memo(self, obj):
 | 
				
			||||||
 | 
					        # helper for writing bytearray objects for protocol >= 5
 | 
				
			||||||
 | 
					        # without memoizing them
 | 
				
			||||||
 | 
					        assert self.proto >= 5
 | 
				
			||||||
 | 
					        n = len(obj)
 | 
				
			||||||
 | 
					        if n >= self.framer._FRAME_SIZE_TARGET:
 | 
				
			||||||
 | 
					            self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            self.write(BYTEARRAY8 + pack("<Q", n) + obj)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def save_bytearray(self, obj):
 | 
					    def save_bytearray(self, obj):
 | 
				
			||||||
        if self.proto < 5:
 | 
					        if self.proto < 5:
 | 
				
			||||||
            if not obj:  # bytearray is empty
 | 
					            if not obj:  # bytearray is empty
 | 
				
			||||||
| 
						 | 
					@ -809,11 +825,7 @@ def save_bytearray(self, obj):
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                self.save_reduce(bytearray, (bytes(obj),), obj=obj)
 | 
					                self.save_reduce(bytearray, (bytes(obj),), obj=obj)
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
        n = len(obj)
 | 
					        self._save_bytearray_no_memo(obj)
 | 
				
			||||||
        if n >= self.framer._FRAME_SIZE_TARGET:
 | 
					 | 
				
			||||||
            self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            self.write(BYTEARRAY8 + pack("<Q", n) + obj)
 | 
					 | 
				
			||||||
        self.memoize(obj)
 | 
					        self.memoize(obj)
 | 
				
			||||||
    dispatch[bytearray] = save_bytearray
 | 
					    dispatch[bytearray] = save_bytearray
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -832,10 +844,18 @@ def save_picklebuffer(self, obj):
 | 
				
			||||||
                if in_band:
 | 
					                if in_band:
 | 
				
			||||||
                    # Write data in-band
 | 
					                    # Write data in-band
 | 
				
			||||||
                    # XXX The C implementation avoids a copy here
 | 
					                    # XXX The C implementation avoids a copy here
 | 
				
			||||||
 | 
					                    buf = m.tobytes()
 | 
				
			||||||
 | 
					                    in_memo = id(buf) in self.memo
 | 
				
			||||||
                    if m.readonly:
 | 
					                    if m.readonly:
 | 
				
			||||||
                        self.save_bytes(m.tobytes())
 | 
					                        if in_memo:
 | 
				
			||||||
 | 
					                            self._save_bytes_no_memo(buf)
 | 
				
			||||||
                        else:
 | 
					                        else:
 | 
				
			||||||
                        self.save_bytearray(m.tobytes())
 | 
					                            self.save_bytes(buf)
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        if in_memo:
 | 
				
			||||||
 | 
					                            self._save_bytearray_no_memo(buf)
 | 
				
			||||||
 | 
					                        else:
 | 
				
			||||||
 | 
					                            self.save_bytearray(buf)
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    # Write data out-of-band
 | 
					                    # Write data out-of-band
 | 
				
			||||||
                    self.write(NEXT_BUFFER)
 | 
					                    self.write(NEXT_BUFFER)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1845,6 +1845,25 @@ def test_bytes(self):
 | 
				
			||||||
                p = self.dumps(s, proto)
 | 
					                p = self.dumps(s, proto)
 | 
				
			||||||
                self.assert_is_copy(s, self.loads(p))
 | 
					                self.assert_is_copy(s, self.loads(p))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_bytes_memoization(self):
 | 
				
			||||||
 | 
					        for proto in protocols:
 | 
				
			||||||
 | 
					            for array_type in [bytes, ZeroCopyBytes]:
 | 
				
			||||||
 | 
					                for s in b'', b'xyz', b'xyz'*100:
 | 
				
			||||||
 | 
					                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
 | 
				
			||||||
 | 
					                        b = array_type(s)
 | 
				
			||||||
 | 
					                        p = self.dumps((b, b), proto)
 | 
				
			||||||
 | 
					                        x, y = self.loads(p)
 | 
				
			||||||
 | 
					                        self.assertIs(x, y)
 | 
				
			||||||
 | 
					                        self.assert_is_copy((b, b), (x, y))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
 | 
				
			||||||
 | 
					                        b1, b2 = array_type(s), array_type(s)
 | 
				
			||||||
 | 
					                        p = self.dumps((b1, b2), proto)
 | 
				
			||||||
 | 
					                        # Note that (b1, b2) = self.loads(p) might have identical
 | 
				
			||||||
 | 
					                        # components, i.e., b1 is b2, but this is not always the
 | 
				
			||||||
 | 
					                        # case if the content is large (equality still holds).
 | 
				
			||||||
 | 
					                        self.assert_is_copy((b1, b2), self.loads(p))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_bytearray(self):
 | 
					    def test_bytearray(self):
 | 
				
			||||||
        for proto in protocols:
 | 
					        for proto in protocols:
 | 
				
			||||||
            for s in b'', b'xyz', b'xyz'*100:
 | 
					            for s in b'', b'xyz', b'xyz'*100:
 | 
				
			||||||
| 
						 | 
					@ -1864,14 +1883,32 @@ def test_bytearray(self):
 | 
				
			||||||
                    self.assertNotIn(b'bytearray', p)
 | 
					                    self.assertNotIn(b'bytearray', p)
 | 
				
			||||||
                    self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
 | 
					                    self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_bytearray_memoization_bug(self):
 | 
					    def test_bytearray_memoization(self):
 | 
				
			||||||
        for proto in protocols:
 | 
					        for proto in protocols:
 | 
				
			||||||
 | 
					            for array_type in [bytearray, ZeroCopyBytearray]:
 | 
				
			||||||
                for s in b'', b'xyz', b'xyz'*100:
 | 
					                for s in b'', b'xyz', b'xyz'*100:
 | 
				
			||||||
                b = bytearray(s)
 | 
					                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
 | 
				
			||||||
 | 
					                        b = array_type(s)
 | 
				
			||||||
                        p = self.dumps((b, b), proto)
 | 
					                        p = self.dumps((b, b), proto)
 | 
				
			||||||
                        b1, b2 = self.loads(p)
 | 
					                        b1, b2 = self.loads(p)
 | 
				
			||||||
                        self.assertIs(b1, b2)
 | 
					                        self.assertIs(b1, b2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
 | 
				
			||||||
 | 
					                        b1a, b2a = array_type(s), array_type(s)
 | 
				
			||||||
 | 
					                        # Unlike bytes, equal but independent bytearray objects are
 | 
				
			||||||
 | 
					                        # never identical.
 | 
				
			||||||
 | 
					                        self.assertIsNot(b1a, b2a)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        p = self.dumps((b1a, b2a), proto)
 | 
				
			||||||
 | 
					                        b1b, b2b = self.loads(p)
 | 
				
			||||||
 | 
					                        self.assertIsNot(b1b, b2b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        self.assertIsNot(b1a, b1b)
 | 
				
			||||||
 | 
					                        self.assert_is_copy(b1a, b1b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        self.assertIsNot(b2a, b2b)
 | 
				
			||||||
 | 
					                        self.assert_is_copy(b2a, b2b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_ints(self):
 | 
					    def test_ints(self):
 | 
				
			||||||
        for proto in protocols:
 | 
					        for proto in protocols:
 | 
				
			||||||
            n = sys.maxsize
 | 
					            n = sys.maxsize
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,3 @@
 | 
				
			||||||
 | 
					Fix Python implementation of :class:`pickle.Pickler` for :class:`bytes` and
 | 
				
			||||||
 | 
					:class:`bytearray` objects when using protocol version 5. Patch by Bénédikt
 | 
				
			||||||
 | 
					Tran.
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue