mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	gh-129005: Align FileIO.readall between _pyio and _io (#129705)
Utilize `bytearray.resize()` and `os.readinto()` to reduce copies and match behavior of `_io.FileIO.readall()`. There is still an extra copy which means twice the memory required compared to FileIO because there isn't a zero-copy path from `bytearray` -> `bytes` currently. On my system reading a 2 GB file: `./python -m test -M8g -uall test_largefile -m test.test_largefile.PyLargeFileTest.test_large_read -v` Goes from ~2.7 seconds -> ~2.2 seconds Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
		
							parent
							
								
									ae132edc29
								
							
						
					
					
						commit
						a3d5aab9a8
					
				
					 2 changed files with 25 additions and 14 deletions
				
			
		
							
								
								
									
										37
									
								
								Lib/_pyio.py
									
										
									
									
									
								
							
							
						
						
									
										37
									
								
								Lib/_pyio.py
									
										
									
									
									
								
							|  | @ -1454,6 +1454,17 @@ def write(self, b): | |||
|         return BufferedWriter.write(self, b) | ||||
| 
 | ||||
| 
 | ||||
| def _new_buffersize(bytes_read): | ||||
|     # Parallels _io/fileio.c new_buffersize | ||||
|     if bytes_read > 65536: | ||||
|         addend = bytes_read >> 3 | ||||
|     else: | ||||
|         addend = 256 + bytes_read | ||||
|     if addend < DEFAULT_BUFFER_SIZE: | ||||
|         addend = DEFAULT_BUFFER_SIZE | ||||
|     return bytes_read + addend | ||||
| 
 | ||||
| 
 | ||||
| class FileIO(RawIOBase): | ||||
|     _fd = -1 | ||||
|     _created = False | ||||
|  | @ -1672,22 +1683,20 @@ def readall(self): | |||
|                 except OSError: | ||||
|                     pass | ||||
| 
 | ||||
|         result = bytearray() | ||||
|         while True: | ||||
|             if len(result) >= bufsize: | ||||
|                 bufsize = len(result) | ||||
|                 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) | ||||
|             n = bufsize - len(result) | ||||
|             try: | ||||
|                 chunk = os.read(self._fd, n) | ||||
|             except BlockingIOError: | ||||
|                 if result: | ||||
|                     break | ||||
|         result = bytearray(bufsize) | ||||
|         bytes_read = 0 | ||||
|         try: | ||||
|             while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): | ||||
|                 bytes_read += n | ||||
|                 if bytes_read >= len(result): | ||||
|                     result.resize(_new_buffersize(bytes_read)) | ||||
|         except BlockingIOError: | ||||
|             if not bytes_read: | ||||
|                 return None | ||||
|             if not chunk: # reached the end of the file | ||||
|                 break | ||||
|             result += chunk | ||||
| 
 | ||||
|         assert len(result) - bytes_read >= 1, \ | ||||
|             "os.readinto buffer size 0 will result in erroneous EOF / returns 0" | ||||
|         result.resize(bytes_read) | ||||
|         return bytes(result) | ||||
| 
 | ||||
|     def readinto(self, buffer): | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Cody Maloney
						Cody Maloney