2025-12-25 upload

This commit is contained in:
“shengyudong”
2025-12-25 11:16:59 +08:00
commit 322ac74336
2241 changed files with 639966 additions and 0 deletions

View File

@@ -0,0 +1,217 @@
# Copyright (c) 2017-present, Gregory Szorc
# All rights reserved.
#
# This software may be modified and distributed under the terms
# of the BSD license. See the LICENSE file for details.
# ruff: noqa: F403, F405
"""Python interface to the Zstandard (zstd) compression library."""
from __future__ import absolute_import, unicode_literals
# This module serves 2 roles:
#
# 1) Export the C or CFFI "backend" through a central module.
# 2) Implement additional functionality built on top of C or CFFI backend.
import builtins
import io
import os
import platform
import sys
if sys.version_info >= (3, 12):
from collections.abc import Buffer
else:
from typing import ByteString as Buffer
# Some Python implementations don't support C extensions. That's why we have
# a CFFI implementation in the first place. The code here import one of our
# "backends" then re-exports the symbols from this module. For convenience,
# we support falling back to the CFFI backend if the C extension can't be
# imported. But for performance reasons, we only do this on unknown Python
# implementation. Notably, for CPython we require the C extension by default.
# Because someone will inevitably want special behavior, the behavior is
# configurable via an environment variable. A potentially better way to handle
# this is to import a special ``__importpolicy__`` module or something
# defining a variable and `setup.py` could write the file with whatever
# policy was specified at build time. Until someone needs it, we go with
# the hacky but simple environment variable approach.
_module_policy = os.environ.get(
"PYTHON_ZSTANDARD_IMPORT_POLICY", "default"
).strip()
if _module_policy == "default":
if platform.python_implementation() in ("CPython",):
from .backend_c import * # type: ignore
backend = "cext"
elif platform.python_implementation() in ("PyPy",):
from .backend_cffi import * # type: ignore
backend = "cffi"
else:
try:
from .backend_c import *
backend = "cext"
except ImportError:
from .backend_cffi import *
backend = "cffi"
elif _module_policy == "cffi_fallback":
try:
from .backend_c import *
backend = "cext"
except ImportError:
from .backend_cffi import *
backend = "cffi"
elif _module_policy == "rust":
from .backend_rust import * # type: ignore
backend = "rust"
elif _module_policy == "cext":
from .backend_c import *
backend = "cext"
elif _module_policy == "cffi":
from .backend_cffi import *
backend = "cffi"
else:
raise ImportError(
"unknown module import policy: %s; use default, cffi_fallback, "
"cext, or cffi" % _module_policy
)
# Keep this in sync with python-zstandard.h, rust-ext/src/lib.rs, and debian/changelog.
__version__ = "0.25.0"
_MODE_CLOSED = 0
_MODE_READ = 1
_MODE_WRITE = 2
def open(
filename,
mode="rb",
cctx=None,
dctx=None,
encoding=None,
errors=None,
newline=None,
closefd=None,
):
"""Create a file object with zstd (de)compression.
The object returned from this function will be a
:py:class:`ZstdDecompressionReader` if opened for reading in binary mode,
a :py:class:`ZstdCompressionWriter` if opened for writing in binary mode,
or an ``io.TextIOWrapper`` if opened for reading or writing in text mode.
:param filename:
``bytes``, ``str``, or ``os.PathLike`` defining a file to open or a
file object (with a ``read()`` or ``write()`` method).
:param mode:
``str`` File open mode. Accepts any of the open modes recognized by
``open()``.
:param cctx:
``ZstdCompressor`` to use for compression. If not specified and file
is opened for writing, the default ``ZstdCompressor`` will be used.
:param dctx:
``ZstdDecompressor`` to use for decompression. If not specified and file
is opened for reading, the default ``ZstdDecompressor`` will be used.
:param encoding:
``str`` that defines text encoding to use when file is opened in text
mode.
:param errors:
``str`` defining text encoding error handling mode.
:param newline:
``str`` defining newline to use in text mode.
:param closefd:
``bool`` whether to close the file when the returned object is closed.
Only used if a file object is passed. If a filename is specified, the
opened file is always closed when the returned object is closed.
"""
normalized_mode = mode.replace("t", "")
if normalized_mode in ("r", "rb"):
dctx = dctx or ZstdDecompressor()
open_mode = "r"
raw_open_mode = "rb"
elif normalized_mode in ("w", "wb", "a", "ab", "x", "xb"):
cctx = cctx or ZstdCompressor()
open_mode = "w"
raw_open_mode = normalized_mode
if not raw_open_mode.endswith("b"):
raw_open_mode = raw_open_mode + "b"
else:
raise ValueError("Invalid mode: {!r}".format(mode))
if hasattr(os, "PathLike"):
types = (str, bytes, os.PathLike)
else:
types = (str, bytes)
if isinstance(filename, types): # type: ignore
inner_fh = builtins.open(filename, raw_open_mode)
closefd = True
elif hasattr(filename, "read") or hasattr(filename, "write"):
inner_fh = filename
closefd = bool(closefd)
else:
raise TypeError(
"filename must be a str, bytes, file or PathLike object"
)
if open_mode == "r":
fh = dctx.stream_reader(inner_fh, closefd=closefd)
elif open_mode == "w":
fh = cctx.stream_writer(inner_fh, closefd=closefd)
else:
raise RuntimeError("logic error in zstandard.open() handling open mode")
if "b" not in normalized_mode:
return io.TextIOWrapper(
fh, encoding=encoding, errors=errors, newline=newline
)
else:
return fh
def compress(data: Buffer, level: int = 3) -> bytes:
"""Compress source data using the zstd compression format.
This performs one-shot compression using basic/default compression
settings.
This method is provided for convenience and is equivalent to calling
``ZstdCompressor(level=level).compress(data)``.
If you find yourself calling this function in a tight loop,
performance will be greater if you construct a single ``ZstdCompressor``
and repeatedly call ``compress()`` on it.
"""
cctx = ZstdCompressor(level=level)
return cctx.compress(data)
def decompress(data: Buffer, max_output_size: int = 0) -> bytes:
"""Decompress a zstd frame into its original data.
This performs one-shot decompression using basic/default compression
settings.
This method is provided for convenience and is equivalent to calling
``ZstdDecompressor().decompress(data, max_output_size=max_output_size)``.
If you find yourself calling this function in a tight loop, performance
will be greater if you construct a single ``ZstdDecompressor`` and
repeatedly call ``decompress()`` on it.
"""
dctx = ZstdDecompressor()
return dctx.decompress(data, max_output_size=max_output_size)

View File

@@ -0,0 +1,481 @@
# Copyright (c) 2016-present, Gregory Szorc
# All rights reserved.
#
# This software may be modified and distributed under the terms
# of the BSD license. See the LICENSE file for details.
import os
from typing import (
IO,
BinaryIO,
ByteString,
Generator,
Iterable,
List,
Optional,
Set,
Tuple,
Union,
)
FLUSH_BLOCK: int
FLUSH_FRAME: int
COMPRESSOBJ_FLUSH_FINISH: int
COMPRESSOBJ_FLUSH_BLOCK: int
CONTENTSIZE_UNKNOWN: int
CONTENTSIZE_ERROR: int
MAX_COMPRESSION_LEVEL: int
COMPRESSION_RECOMMENDED_INPUT_SIZE: int
COMPRESSION_RECOMMENDED_OUTPUT_SIZE: int
DECOMPRESSION_RECOMMENDED_INPUT_SIZE: int
DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE: int
BLOCKSIZELOG_MAX: int
BLOCKSIZE_MAX: int
WINDOWLOG_MIN: int
WINDOWLOG_MAX: int
CHAINLOG_MIN: int
CHAINLOG_MAX: int
HASHLOG_MIN: int
HASHLOG_MAX: int
MINMATCH_MIN: int
MINMATCH_MAX: int
SEARCHLOG_MIN: int
SEARCHLOG_MAX: int
SEARCHLENGTH_MIN: int
SEARCHLENGTH_MAX: int
TARGETLENGTH_MIN: int
TARGETLENGTH_MAX: int
LDM_MINMATCH_MIN: int
LDM_MINMATCH_MAX: int
LDM_BUCKETSIZELOG_MAX: int
STRATEGY_FAST: int
STRATEGY_DFAST: int
STRATEGY_GREEDY: int
STRATEGY_LAZY: int
STRATEGY_LAZY2: int
STRATEGY_BTLAZY2: int
STRATEGY_BTOPT: int
STRATEGY_BTULTRA: int
STRATEGY_BTULTRA2: int
DICT_TYPE_AUTO: int
DICT_TYPE_RAWCONTENT: int
DICT_TYPE_FULLDICT: int
FORMAT_ZSTD1: int
FORMAT_ZSTD1_MAGICLESS: int
ZSTD_VERSION: Tuple[int, int, int]
FRAME_HEADER: bytes
MAGIC_NUMBER: int
backend: str
backend_features: Set[str]
__version__: str
class ZstdError(Exception): ...
class BufferSegment(object):
offset: int
def __len__(self) -> int: ...
def tobytes(self) -> bytes: ...
class BufferSegments(object):
def __len__(self) -> int: ...
def __getitem__(self, i: int) -> BufferSegment: ...
class BufferWithSegments(object):
size: int
def __init__(self, data: ByteString, segments: ByteString): ...
def __len__(self) -> int: ...
def __getitem__(self, i: int) -> BufferSegment: ...
def segments(self): ...
def tobytes(self) -> bytes: ...
class BufferWithSegmentsCollection(object):
def __init__(self, *args): ...
def __len__(self) -> int: ...
def __getitem__(self, i: int) -> BufferSegment: ...
def size(self) -> int: ...
class ZstdCompressionParameters(object):
@staticmethod
def from_level(
level: int, source_size: int = ..., dict_size: int = ..., **kwargs
) -> "ZstdCompressionParameters": ...
def __init__(
self,
format: int = ...,
compression_level: int = ...,
window_log: int = ...,
hash_log: int = ...,
chain_log: int = ...,
search_log: int = ...,
min_match: int = ...,
target_length: int = ...,
strategy: int = ...,
write_content_size: int = ...,
write_checksum: int = ...,
write_dict_id: int = ...,
job_size: int = ...,
overlap_log: int = ...,
force_max_window: int = ...,
enable_ldm: int = ...,
ldm_hash_log: int = ...,
ldm_min_match: int = ...,
ldm_bucket_size_log: int = ...,
ldm_hash_rate_log: int = ...,
threads: int = ...,
): ...
@property
def format(self) -> int: ...
@property
def compression_level(self) -> int: ...
@property
def window_log(self) -> int: ...
@property
def hash_log(self) -> int: ...
@property
def chain_log(self) -> int: ...
@property
def search_log(self) -> int: ...
@property
def min_match(self) -> int: ...
@property
def target_length(self) -> int: ...
@property
def strategy(self) -> int: ...
@property
def write_content_size(self) -> int: ...
@property
def write_checksum(self) -> int: ...
@property
def write_dict_id(self) -> int: ...
@property
def job_size(self) -> int: ...
@property
def overlap_log(self) -> int: ...
@property
def force_max_window(self) -> int: ...
@property
def enable_ldm(self) -> int: ...
@property
def ldm_hash_log(self) -> int: ...
@property
def ldm_min_match(self) -> int: ...
@property
def ldm_bucket_size_log(self) -> int: ...
@property
def ldm_hash_rate_log(self) -> int: ...
@property
def threads(self) -> int: ...
def estimated_compression_context_size(self) -> int: ...
class CompressionParameters(ZstdCompressionParameters): ...
class ZstdCompressionDict(object):
k: int
d: int
def __init__(
self,
data: ByteString,
dict_type: int = ...,
k: int = ...,
d: int = ...,
): ...
def __len__(self) -> int: ...
def dict_id(self) -> int: ...
def as_bytes(self) -> bytes: ...
def precompute_compress(
self,
level: int = ...,
compression_params: ZstdCompressionParameters = ...,
): ...
class ZstdCompressionObj(object):
def compress(self, data: ByteString) -> bytes: ...
def flush(self, flush_mode: int = ...) -> bytes: ...
class ZstdCompressionChunker(object):
def compress(self, data: ByteString): ...
def flush(self): ...
def finish(self): ...
class ZstdCompressionReader(BinaryIO):
def __enter__(self) -> "ZstdCompressionReader": ...
def __exit__(self, exc_type, exc_value, exc_tb): ...
def readable(self) -> bool: ...
def writable(self) -> bool: ...
def seekable(self) -> bool: ...
def readline(self, limit: int = ...) -> bytes: ...
def readlines(self, hint: int = ...) -> List[bytes]: ...
def write(self, data: ByteString): ...
def writelines(self, data: Iterable[bytes]): ...
def isatty(self) -> bool: ...
def flush(self): ...
def close(self): ...
@property
def closed(self) -> bool: ...
def tell(self) -> int: ...
def readall(self) -> bytes: ...
def __iter__(self): ...
def __next__(self): ...
def next(self): ...
def read(self, size: int = ...) -> bytes: ...
def read1(self, size: int = ...) -> bytes: ...
def readinto(self, b) -> int: ...
def readinto1(self, b) -> int: ...
class ZstdCompressionWriter(BinaryIO):
def __enter__(self) -> "ZstdCompressionWriter": ...
def __exit__(self, exc_type, exc_value, exc_tb): ...
def memory_size(self) -> int: ...
def fileno(self) -> int: ...
def close(self): ...
@property
def closed(self) -> bool: ...
def isatty(self) -> bool: ...
def readable(self) -> bool: ...
def readline(self, size: int = ...) -> bytes: ...
def readlines(self, hint: int = ...) -> List[bytes]: ...
def seek(self, offset: int, whence: int = ...): ...
def seekable(self) -> bool: ...
def truncate(self, size: int = ...): ...
def writable(self) -> bool: ...
def writelines(self, lines: Iterable[bytes]): ...
def read(self, size: int = ...) -> bytes: ...
def readall(self) -> bytes: ...
def readinto(self, b): ...
def write(self, data: ByteString) -> int: ...
def flush(self, flush_mode: int = ...) -> int: ...
def tell(self) -> int: ...
class ZstdCompressor(object):
def __init__(
self,
level: int = ...,
dict_data: Optional[ZstdCompressionDict] = ...,
compression_params: Optional[ZstdCompressionParameters] = ...,
write_checksum: Optional[bool] = ...,
write_content_size: Optional[bool] = ...,
write_dict_id: Optional[bool] = ...,
threads: int = ...,
): ...
def memory_size(self) -> int: ...
def compress(self, data: ByteString) -> bytes: ...
def compressobj(self, size: int = ...) -> ZstdCompressionObj: ...
def chunker(
self, size: int = ..., chunk_size: int = ...
) -> ZstdCompressionChunker: ...
def copy_stream(
self,
ifh: IO[bytes],
ofh: IO[bytes],
size: int = ...,
read_size: int = ...,
write_size: int = ...,
) -> Tuple[int, int]: ...
def stream_reader(
self,
source: Union[IO[bytes], ByteString],
size: int = ...,
read_size: int = ...,
*,
closefd: bool = ...,
) -> ZstdCompressionReader: ...
def stream_writer(
self,
writer: IO[bytes],
size: int = ...,
write_size: int = ...,
write_return_read: bool = ...,
*,
closefd: bool = ...,
) -> ZstdCompressionWriter: ...
def read_to_iter(
self,
reader: Union[IO[bytes], ByteString],
size: int = ...,
read_size: int = ...,
write_size: int = ...,
) -> Generator[bytes, None, None]: ...
def frame_progression(self) -> Tuple[int, int, int]: ...
def multi_compress_to_buffer(
self,
data: Union[
BufferWithSegments,
BufferWithSegmentsCollection,
List[ByteString],
],
threads: int = ...,
) -> BufferWithSegmentsCollection: ...
class ZstdDecompressionObj(object):
def decompress(self, data: ByteString) -> bytes: ...
def flush(self, length: int = ...) -> bytes: ...
@property
def unused_data(self) -> bytes: ...
@property
def unconsumed_tail(self) -> bytes: ...
@property
def eof(self) -> bool: ...
class ZstdDecompressionReader(BinaryIO):
def __enter__(self) -> "ZstdDecompressionReader": ...
def __exit__(self, exc_type, exc_value, exc_tb): ...
def readable(self) -> bool: ...
def writable(self) -> bool: ...
def seekable(self) -> bool: ...
def readline(self, size: int = ...): ...
def readlines(self, hint: int = ...): ...
def write(self, data: ByteString): ...
def writelines(self, lines: Iterable[bytes]): ...
def isatty(self) -> bool: ...
def flush(self): ...
def close(self): ...
@property
def closed(self) -> bool: ...
def tell(self) -> int: ...
def readall(self) -> bytes: ...
def __iter__(self): ...
def __next__(self): ...
def next(self): ...
def read(self, size: int = ...) -> bytes: ...
def readinto(self, b) -> int: ...
def read1(self, size: int = ...) -> bytes: ...
def readinto1(self, b) -> int: ...
def seek(self, pos: int, whence: int = ...) -> int: ...
class ZstdDecompressionWriter(BinaryIO):
def __enter__(self) -> "ZstdDecompressionWriter": ...
def __exit__(self, exc_type, exc_value, exc_tb): ...
def memory_size(self) -> int: ...
def close(self): ...
@property
def closed(self) -> bool: ...
def fileno(self) -> int: ...
def flush(self): ...
def isatty(self) -> bool: ...
def readable(self) -> bool: ...
def readline(self, size: int = ...): ...
def readlines(self, hint: int = ...): ...
def seek(self, offset: int, whence: int = ...): ...
def seekable(self) -> bool: ...
def tell(self): ...
def truncate(self, size: int = ...): ...
def writable(self) -> bool: ...
def writelines(self, lines: Iterable[bytes]): ...
def read(self, size: int = ...): ...
def readall(self): ...
def readinto(self, b): ...
def write(self, data: ByteString) -> int: ...
class ZstdDecompressor(object):
def __init__(
self,
dict_data: Optional[ZstdCompressionDict] = ...,
max_window_size: int = ...,
format: int = ...,
): ...
def memory_size(self) -> int: ...
def decompress(
self,
data: ByteString,
max_output_size: int = ...,
read_across_frames: bool = ...,
allow_extra_data: bool = ...,
) -> bytes: ...
def stream_reader(
self,
source: Union[IO[bytes], ByteString],
read_size: int = ...,
read_across_frames: bool = ...,
*,
closefd=False,
) -> ZstdDecompressionReader: ...
def decompressobj(
self, write_size: int = ..., read_across_frames: bool = False
) -> ZstdDecompressionObj: ...
def read_to_iter(
self,
reader: Union[IO[bytes], ByteString],
read_size: int = ...,
write_size: int = ...,
skip_bytes: int = ...,
) -> Generator[bytes, None, None]: ...
def stream_writer(
self,
writer: IO[bytes],
write_size: int = ...,
write_return_read: bool = ...,
*,
closefd: bool = ...,
) -> ZstdDecompressionWriter: ...
def copy_stream(
self,
ifh: IO[bytes],
ofh: IO[bytes],
read_size: int = ...,
write_size: int = ...,
) -> Tuple[int, int]: ...
def decompress_content_dict_chain(
self, frames: list[ByteString]
) -> bytes: ...
def multi_decompress_to_buffer(
self,
frames: Union[
BufferWithSegments,
BufferWithSegmentsCollection,
List[ByteString],
],
decompressed_sizes: ByteString = ...,
threads: int = ...,
) -> BufferWithSegmentsCollection: ...
class FrameParameters(object):
content_size: int
window_size: int
dict_id: int
has_checksum: bool
def estimate_decompression_context_size() -> int: ...
def frame_content_size(data: ByteString) -> int: ...
def frame_header_size(data: ByteString) -> int: ...
def get_frame_parameters(
data: ByteString, format: Optional[int] = None
) -> FrameParameters: ...
def train_dictionary(
dict_size: int,
samples: list[ByteString],
k: int = ...,
d: int = ...,
f: int = ...,
split_point: float = ...,
accel: int = ...,
notifications: int = ...,
dict_id: int = ...,
level: int = ...,
steps: int = ...,
threads: int = ...,
) -> ZstdCompressionDict: ...
def open(
filename: Union[bytes, str, os.PathLike, BinaryIO],
mode: str = ...,
cctx: Optional[ZstdCompressor] = ...,
dctx: Optional[ZstdDecompressor] = ...,
encoding: Optional[str] = ...,
errors: Optional[str] = ...,
newline: Optional[str] = ...,
closefd: bool = ...,
): ...
def compress(data: ByteString, level: int = ...) -> bytes: ...
def decompress(data: ByteString, max_output_size: int = ...) -> bytes: ...

File diff suppressed because it is too large Load Diff