mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	gh-129005: Align FileIO.readall between _pyio and _io (#129705)
Utilize `bytearray.resize()` and `os.readinto()` to reduce copies and match behavior of `_io.FileIO.readall()`. There is still an extra copy which means twice the memory required compared to FileIO because there isn't a zero-copy path from `bytearray` -> `bytes` currently. On my system reading a 2 GB file: `./python -m test -M8g -uall test_largefile -m test.test_largefile.PyLargeFileTest.test_large_read -v` Goes from ~2.7 seconds -> ~2.2 seconds Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
		
							parent
							
								
									ae132edc29
								
							
						
					
					
						commit
						a3d5aab9a8
					
				
					 2 changed files with 25 additions and 14 deletions
				
			
		
							
								
								
									
										33
									
								
								Lib/_pyio.py
									
										
									
									
									
								
							
							
						
						
									
										33
									
								
								Lib/_pyio.py
									
										
									
									
									
								
							|  | @ -1454,6 +1454,17 @@ def write(self, b): | ||||||
|         return BufferedWriter.write(self, b) |         return BufferedWriter.write(self, b) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def _new_buffersize(bytes_read): | ||||||
|  |     # Parallels _io/fileio.c new_buffersize | ||||||
|  |     if bytes_read > 65536: | ||||||
|  |         addend = bytes_read >> 3 | ||||||
|  |     else: | ||||||
|  |         addend = 256 + bytes_read | ||||||
|  |     if addend < DEFAULT_BUFFER_SIZE: | ||||||
|  |         addend = DEFAULT_BUFFER_SIZE | ||||||
|  |     return bytes_read + addend | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class FileIO(RawIOBase): | class FileIO(RawIOBase): | ||||||
|     _fd = -1 |     _fd = -1 | ||||||
|     _created = False |     _created = False | ||||||
|  | @ -1672,22 +1683,20 @@ def readall(self): | ||||||
|                 except OSError: |                 except OSError: | ||||||
|                     pass |                     pass | ||||||
| 
 | 
 | ||||||
|         result = bytearray() |         result = bytearray(bufsize) | ||||||
|         while True: |         bytes_read = 0 | ||||||
|             if len(result) >= bufsize: |  | ||||||
|                 bufsize = len(result) |  | ||||||
|                 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) |  | ||||||
|             n = bufsize - len(result) |  | ||||||
|         try: |         try: | ||||||
|                 chunk = os.read(self._fd, n) |             while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): | ||||||
|  |                 bytes_read += n | ||||||
|  |                 if bytes_read >= len(result): | ||||||
|  |                     result.resize(_new_buffersize(bytes_read)) | ||||||
|         except BlockingIOError: |         except BlockingIOError: | ||||||
|                 if result: |             if not bytes_read: | ||||||
|                     break |  | ||||||
|                 return None |                 return None | ||||||
|             if not chunk: # reached the end of the file |  | ||||||
|                 break |  | ||||||
|             result += chunk |  | ||||||
| 
 | 
 | ||||||
|  |         assert len(result) - bytes_read >= 1, \ | ||||||
|  |             "os.readinto buffer size 0 will result in erroneous EOF / returns 0" | ||||||
|  |         result.resize(bytes_read) | ||||||
|         return bytes(result) |         return bytes(result) | ||||||
| 
 | 
 | ||||||
|     def readinto(self, buffer): |     def readinto(self, buffer): | ||||||
|  |  | ||||||
|  | @ -0,0 +1,2 @@ | ||||||
|  | ``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer | ||||||
|  | using the same algorithm ``_io.FileIO.readall()`` uses. | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Cody Maloney
						Cody Maloney