mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			345 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			345 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import io
 | 
						|
from os import PathLike
 | 
						|
from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize
 | 
						|
from compression._common import _streams
 | 
						|
 | 
						|
__all__ = ('ZstdFile', 'open')
 | 
						|
 | 
						|
_MODE_CLOSED = 0
 | 
						|
_MODE_READ = 1
 | 
						|
_MODE_WRITE = 2
 | 
						|
 | 
						|
 | 
						|
def _nbytes(dat, /):
 | 
						|
    if isinstance(dat, (bytes, bytearray)):
 | 
						|
        return len(dat)
 | 
						|
    with memoryview(dat) as mv:
 | 
						|
        return mv.nbytes
 | 
						|
 | 
						|
 | 
						|
class ZstdFile(_streams.BaseStream):
 | 
						|
    """A file-like object providing transparent Zstandard (de)compression.
 | 
						|
 | 
						|
    A ZstdFile can act as a wrapper for an existing file object, or refer
 | 
						|
    directly to a named file on disk.
 | 
						|
 | 
						|
    ZstdFile provides a *binary* file interface. Data is read and returned as
 | 
						|
    bytes, and may only be written to objects that support the Buffer Protocol.
 | 
						|
    """
 | 
						|
 | 
						|
    FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK
 | 
						|
    FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME
 | 
						|
 | 
						|
    def __init__(self, file, /, mode='r', *,
 | 
						|
                 level=None, options=None, zstd_dict=None):
 | 
						|
        """Open a Zstandard compressed file in binary mode.
 | 
						|
 | 
						|
        *file* can be either an file-like object, or a file name to open.
 | 
						|
 | 
						|
        *mode* can be 'r' for reading (default), 'w' for (over)writing, 'x' for
 | 
						|
        creating exclusively, or 'a' for appending. These can equivalently be
 | 
						|
        given as 'rb', 'wb', 'xb' and 'ab' respectively.
 | 
						|
 | 
						|
        *level* is an optional int specifying the compression level to use,
 | 
						|
        or COMPRESSION_LEVEL_DEFAULT if not given.
 | 
						|
 | 
						|
        *options* is an optional dict for advanced compression parameters.
 | 
						|
        See CompressionParameter and DecompressionParameter for the possible
 | 
						|
        options.
 | 
						|
 | 
						|
        *zstd_dict* is an optional ZstdDict object, a pre-trained Zstandard
 | 
						|
        dictionary. See train_dict() to train ZstdDict on sample data.
 | 
						|
        """
 | 
						|
        self._fp = None
 | 
						|
        self._close_fp = False
 | 
						|
        self._mode = _MODE_CLOSED
 | 
						|
        self._buffer = None
 | 
						|
 | 
						|
        if not isinstance(mode, str):
 | 
						|
            raise ValueError('mode must be a str')
 | 
						|
        if options is not None and not isinstance(options, dict):
 | 
						|
            raise TypeError('options must be a dict or None')
 | 
						|
        mode = mode.removesuffix('b')  # handle rb, wb, xb, ab
 | 
						|
        if mode == 'r':
 | 
						|
            if level is not None:
 | 
						|
                raise TypeError('level is illegal in read mode')
 | 
						|
            self._mode = _MODE_READ
 | 
						|
        elif mode in {'w', 'a', 'x'}:
 | 
						|
            if level is not None and not isinstance(level, int):
 | 
						|
                raise TypeError('level must be int or None')
 | 
						|
            self._mode = _MODE_WRITE
 | 
						|
            self._compressor = ZstdCompressor(level=level, options=options,
 | 
						|
                                              zstd_dict=zstd_dict)
 | 
						|
            self._pos = 0
 | 
						|
        else:
 | 
						|
            raise ValueError(f'Invalid mode: {mode!r}')
 | 
						|
 | 
						|
        if isinstance(file, (str, bytes, PathLike)):
 | 
						|
            self._fp = io.open(file, f'{mode}b')
 | 
						|
            self._close_fp = True
 | 
						|
        elif ((mode == 'r' and hasattr(file, 'read'))
 | 
						|
                or (mode != 'r' and hasattr(file, 'write'))):
 | 
						|
            self._fp = file
 | 
						|
        else:
 | 
						|
            raise TypeError('file must be a file-like object '
 | 
						|
                            'or a str, bytes, or PathLike object')
 | 
						|
 | 
						|
        if self._mode == _MODE_READ:
 | 
						|
            raw = _streams.DecompressReader(
 | 
						|
                self._fp,
 | 
						|
                ZstdDecompressor,
 | 
						|
                zstd_dict=zstd_dict,
 | 
						|
                options=options,
 | 
						|
            )
 | 
						|
            self._buffer = io.BufferedReader(raw)
 | 
						|
 | 
						|
    def close(self):
 | 
						|
        """Flush and close the file.
 | 
						|
 | 
						|
        May be called multiple times. Once the file has been closed,
 | 
						|
        any other operation on it will raise ValueError.
 | 
						|
        """
 | 
						|
        if self._fp is None:
 | 
						|
            return
 | 
						|
        try:
 | 
						|
            if self._mode == _MODE_READ:
 | 
						|
                if getattr(self, '_buffer', None):
 | 
						|
                    self._buffer.close()
 | 
						|
                    self._buffer = None
 | 
						|
            elif self._mode == _MODE_WRITE:
 | 
						|
                self.flush(self.FLUSH_FRAME)
 | 
						|
                self._compressor = None
 | 
						|
        finally:
 | 
						|
            self._mode = _MODE_CLOSED
 | 
						|
            try:
 | 
						|
                if self._close_fp:
 | 
						|
                    self._fp.close()
 | 
						|
            finally:
 | 
						|
                self._fp = None
 | 
						|
                self._close_fp = False
 | 
						|
 | 
						|
    def write(self, data, /):
 | 
						|
        """Write a bytes-like object *data* to the file.
 | 
						|
 | 
						|
        Returns the number of uncompressed bytes written, which is
 | 
						|
        always the length of data in bytes. Note that due to buffering,
 | 
						|
        the file on disk may not reflect the data written until .flush()
 | 
						|
        or .close() is called.
 | 
						|
        """
 | 
						|
        self._check_can_write()
 | 
						|
 | 
						|
        length = _nbytes(data)
 | 
						|
 | 
						|
        compressed = self._compressor.compress(data)
 | 
						|
        self._fp.write(compressed)
 | 
						|
        self._pos += length
 | 
						|
        return length
 | 
						|
 | 
						|
    def flush(self, mode=FLUSH_BLOCK):
 | 
						|
        """Flush remaining data to the underlying stream.
 | 
						|
 | 
						|
        The mode argument can be FLUSH_BLOCK or FLUSH_FRAME. Abuse of this
 | 
						|
        method will reduce compression ratio, use it only when necessary.
 | 
						|
 | 
						|
        If the program is interrupted afterwards, all data can be recovered.
 | 
						|
        To ensure saving to disk, also need to use os.fsync(fd).
 | 
						|
 | 
						|
        This method does nothing in reading mode.
 | 
						|
        """
 | 
						|
        if self._mode == _MODE_READ:
 | 
						|
            return
 | 
						|
        self._check_not_closed()
 | 
						|
        if mode not in {self.FLUSH_BLOCK, self.FLUSH_FRAME}:
 | 
						|
            raise ValueError('Invalid mode argument, expected either '
 | 
						|
                             'ZstdFile.FLUSH_FRAME or '
 | 
						|
                             'ZstdFile.FLUSH_BLOCK')
 | 
						|
        if self._compressor.last_mode == mode:
 | 
						|
            return
 | 
						|
        # Flush zstd block/frame, and write.
 | 
						|
        data = self._compressor.flush(mode)
 | 
						|
        self._fp.write(data)
 | 
						|
        if hasattr(self._fp, 'flush'):
 | 
						|
            self._fp.flush()
 | 
						|
 | 
						|
    def read(self, size=-1):
 | 
						|
        """Read up to size uncompressed bytes from the file.
 | 
						|
 | 
						|
        If size is negative or omitted, read until EOF is reached.
 | 
						|
        Returns b'' if the file is already at EOF.
 | 
						|
        """
 | 
						|
        if size is None:
 | 
						|
            size = -1
 | 
						|
        self._check_can_read()
 | 
						|
        return self._buffer.read(size)
 | 
						|
 | 
						|
    def read1(self, size=-1):
 | 
						|
        """Read up to size uncompressed bytes, while trying to avoid
 | 
						|
        making multiple reads from the underlying stream. Reads up to a
 | 
						|
        buffer's worth of data if size is negative.
 | 
						|
 | 
						|
        Returns b'' if the file is at EOF.
 | 
						|
        """
 | 
						|
        self._check_can_read()
 | 
						|
        if size < 0:
 | 
						|
            # Note this should *not* be io.DEFAULT_BUFFER_SIZE.
 | 
						|
            # ZSTD_DStreamOutSize is the minimum amount to read guaranteeing
 | 
						|
            # a full block is read.
 | 
						|
            size = ZSTD_DStreamOutSize
 | 
						|
        return self._buffer.read1(size)
 | 
						|
 | 
						|
    def readinto(self, b):
 | 
						|
        """Read bytes into b.
 | 
						|
 | 
						|
        Returns the number of bytes read (0 for EOF).
 | 
						|
        """
 | 
						|
        self._check_can_read()
 | 
						|
        return self._buffer.readinto(b)
 | 
						|
 | 
						|
    def readinto1(self, b):
 | 
						|
        """Read bytes into b, while trying to avoid making multiple reads
 | 
						|
        from the underlying stream.
 | 
						|
 | 
						|
        Returns the number of bytes read (0 for EOF).
 | 
						|
        """
 | 
						|
        self._check_can_read()
 | 
						|
        return self._buffer.readinto1(b)
 | 
						|
 | 
						|
    def readline(self, size=-1):
 | 
						|
        """Read a line of uncompressed bytes from the file.
 | 
						|
 | 
						|
        The terminating newline (if present) is retained. If size is
 | 
						|
        non-negative, no more than size bytes will be read (in which
 | 
						|
        case the line may be incomplete). Returns b'' if already at EOF.
 | 
						|
        """
 | 
						|
        self._check_can_read()
 | 
						|
        return self._buffer.readline(size)
 | 
						|
 | 
						|
    def seek(self, offset, whence=io.SEEK_SET):
 | 
						|
        """Change the file position.
 | 
						|
 | 
						|
        The new position is specified by offset, relative to the
 | 
						|
        position indicated by whence. Possible values for whence are:
 | 
						|
 | 
						|
            0: start of stream (default): offset must not be negative
 | 
						|
            1: current stream position
 | 
						|
            2: end of stream; offset must not be positive
 | 
						|
 | 
						|
        Returns the new file position.
 | 
						|
 | 
						|
        Note that seeking is emulated, so depending on the arguments,
 | 
						|
        this operation may be extremely slow.
 | 
						|
        """
 | 
						|
        self._check_can_read()
 | 
						|
 | 
						|
        # BufferedReader.seek() checks seekable
 | 
						|
        return self._buffer.seek(offset, whence)
 | 
						|
 | 
						|
    def peek(self, size=-1):
 | 
						|
        """Return buffered data without advancing the file position.
 | 
						|
 | 
						|
        Always returns at least one byte of data, unless at EOF.
 | 
						|
        The exact number of bytes returned is unspecified.
 | 
						|
        """
 | 
						|
        # Relies on the undocumented fact that BufferedReader.peek() always
 | 
						|
        # returns at least one byte (except at EOF)
 | 
						|
        self._check_can_read()
 | 
						|
        return self._buffer.peek(size)
 | 
						|
 | 
						|
    def __next__(self):
 | 
						|
        if ret := self._buffer.readline():
 | 
						|
            return ret
 | 
						|
        raise StopIteration
 | 
						|
 | 
						|
    def tell(self):
 | 
						|
        """Return the current file position."""
 | 
						|
        self._check_not_closed()
 | 
						|
        if self._mode == _MODE_READ:
 | 
						|
            return self._buffer.tell()
 | 
						|
        elif self._mode == _MODE_WRITE:
 | 
						|
            return self._pos
 | 
						|
 | 
						|
    def fileno(self):
 | 
						|
        """Return the file descriptor for the underlying file."""
 | 
						|
        self._check_not_closed()
 | 
						|
        return self._fp.fileno()
 | 
						|
 | 
						|
    @property
 | 
						|
    def name(self):
 | 
						|
        self._check_not_closed()
 | 
						|
        return self._fp.name
 | 
						|
 | 
						|
    @property
 | 
						|
    def mode(self):
 | 
						|
        return 'wb' if self._mode == _MODE_WRITE else 'rb'
 | 
						|
 | 
						|
    @property
 | 
						|
    def closed(self):
 | 
						|
        """True if this file is closed."""
 | 
						|
        return self._mode == _MODE_CLOSED
 | 
						|
 | 
						|
    def seekable(self):
 | 
						|
        """Return whether the file supports seeking."""
 | 
						|
        return self.readable() and self._buffer.seekable()
 | 
						|
 | 
						|
    def readable(self):
 | 
						|
        """Return whether the file was opened for reading."""
 | 
						|
        self._check_not_closed()
 | 
						|
        return self._mode == _MODE_READ
 | 
						|
 | 
						|
    def writable(self):
 | 
						|
        """Return whether the file was opened for writing."""
 | 
						|
        self._check_not_closed()
 | 
						|
        return self._mode == _MODE_WRITE
 | 
						|
 | 
						|
 | 
						|
def open(file, /, mode='rb', *, level=None, options=None, zstd_dict=None,
 | 
						|
         encoding=None, errors=None, newline=None):
 | 
						|
    """Open a Zstandard compressed file in binary or text mode.
 | 
						|
 | 
						|
    file can be either a file name (given as a str, bytes, or PathLike object),
 | 
						|
    in which case the named file is opened, or it can be an existing file object
 | 
						|
    to read from or write to.
 | 
						|
 | 
						|
    The mode parameter can be 'r', 'rb' (default), 'w', 'wb', 'x', 'xb', 'a',
 | 
						|
    'ab' for binary mode, or 'rt', 'wt', 'xt', 'at' for text mode.
 | 
						|
 | 
						|
    The level, options, and zstd_dict parameters specify the settings the same
 | 
						|
    as ZstdFile.
 | 
						|
 | 
						|
    When using read mode (decompression), the options parameter is a dict
 | 
						|
    representing advanced decompression options. The level parameter is not
 | 
						|
    supported in this case. When using write mode (compression), only one of
 | 
						|
    level, an int representing the compression level, or options, a dict
 | 
						|
    representing advanced compression options, may be passed. In both modes,
 | 
						|
    zstd_dict is a ZstdDict instance containing a trained Zstandard dictionary.
 | 
						|
 | 
						|
    For binary mode, this function is equivalent to the ZstdFile constructor:
 | 
						|
    ZstdFile(filename, mode, ...). In this case, the encoding, errors and
 | 
						|
    newline parameters must not be provided.
 | 
						|
 | 
						|
    For text mode, an ZstdFile object is created, and wrapped in an
 | 
						|
    io.TextIOWrapper instance with the specified encoding, error handling
 | 
						|
    behavior, and line ending(s).
 | 
						|
    """
 | 
						|
 | 
						|
    text_mode = 't' in mode
 | 
						|
    mode = mode.replace('t', '')
 | 
						|
 | 
						|
    if text_mode:
 | 
						|
        if 'b' in mode:
 | 
						|
            raise ValueError(f'Invalid mode: {mode!r}')
 | 
						|
    else:
 | 
						|
        if encoding is not None:
 | 
						|
            raise ValueError('Argument "encoding" not supported in binary mode')
 | 
						|
        if errors is not None:
 | 
						|
            raise ValueError('Argument "errors" not supported in binary mode')
 | 
						|
        if newline is not None:
 | 
						|
            raise ValueError('Argument "newline" not supported in binary mode')
 | 
						|
 | 
						|
    binary_file = ZstdFile(file, mode, level=level, options=options,
 | 
						|
                           zstd_dict=zstd_dict)
 | 
						|
 | 
						|
    if text_mode:
 | 
						|
        return io.TextIOWrapper(binary_file, encoding, errors, newline)
 | 
						|
    else:
 | 
						|
        return binary_file
 |