2025-12-25 upload

2025-12-25 11:16:59 +08:00
commit 322ac74336
2241 changed files with 639966 additions and 0 deletions
--- a/venv/Lib/site-packages/urwid/str_util.py
+++ b/venv/Lib/site-packages/urwid/str_util.py
@@ -0,0 +1,354 @@
+# Urwid unicode character processing tables
+#    Copyright (C) 2004-2011  Ian Ward
+#
+#    This library is free software; you can redistribute it and/or
+#    modify it under the terms of the GNU Lesser General Public
+#    License as published by the Free Software Foundation; either
+#    version 2.1 of the License, or (at your option) any later version.
+#
+#    This library is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#    Lesser General Public License for more details.
+#
+#    You should have received a copy of the GNU Lesser General Public
+#    License along with this library; if not, write to the Free Software
+#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Urwid web site: https://urwid.org/
+
+
+from __future__ import annotations
+
+import re
+import typing
+import warnings
+
+import wcwidth
+
+if typing.TYPE_CHECKING:
+    from typing_extensions import Literal
+
+SAFE_ASCII_RE = re.compile(r"^[ -~]*$")
+SAFE_ASCII_BYTES_RE = re.compile(rb"^[ -~]*$")
+
+_byte_encoding: Literal["utf8", "narrow", "wide"] = "narrow"
+
+
+def get_char_width(char: str) -> Literal[0, 1, 2]:
+    if (width := wcwidth.wcwidth(char)) >= 0:
+        return width
+
+    return 0
+
+
+def get_width(o: int) -> Literal[0, 1, 2]:
+    """Return the screen column width for unicode ordinal o."""
+    return get_char_width(chr(o))
+
+
+def decode_one(text: bytes | str, pos: int) -> tuple[int, int]:
+    """
+    Return (ordinal at pos, next position) for UTF-8 encoded text.
+    """
+    lt = len(text) - pos
+
+    b2 = 0  # Fallback, not changing anything
+    b3 = 0  # Fallback, not changing anything
+    b4 = 0  # Fallback, not changing anything
+
+    try:
+        if isinstance(text, str):
+            b1 = ord(text[pos])
+            if lt > 1:
+                b2 = ord(text[pos + 1])
+            if lt > 2:
+                b3 = ord(text[pos + 2])
+            if lt > 3:
+                b4 = ord(text[pos + 3])
+        else:
+            b1 = text[pos]
+            if lt > 1:
+                b2 = text[pos + 1]
+            if lt > 2:
+                b3 = text[pos + 2]
+            if lt > 3:
+                b4 = text[pos + 3]
+    except Exception as e:
+        raise ValueError(f"{e}: text={text!r}, pos={pos!r}, lt={lt!r}").with_traceback(e.__traceback__) from e
+
+    if not b1 & 0x80:
+        return b1, pos + 1
+    error = ord("?"), pos + 1
+
+    if lt < 2:
+        return error
+    if b1 & 0xE0 == 0xC0:
+        if b2 & 0xC0 != 0x80:
+            return error
+        if (o := ((b1 & 0x1F) << 6) | (b2 & 0x3F)) >= 0x80:
+            return o, pos + 2
+        return error
+    if lt < 3:
+        return error
+    if b1 & 0xF0 == 0xE0:
+        if b2 & 0xC0 != 0x80:
+            return error
+        if b3 & 0xC0 != 0x80:
+            return error
+        if (o := ((b1 & 0x0F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F)) >= 0x800:
+            return o, pos + 3
+        return error
+    if lt < 4:
+        return error
+    if b1 & 0xF8 == 0xF0:
+        if b2 & 0xC0 != 0x80:
+            return error
+        if b3 & 0xC0 != 0x80:
+            return error
+        if b4 & 0xC0 != 0x80:
+            return error
+        if (o := ((b1 & 0x07) << 18) | ((b2 & 0x3F) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F)) >= 0x10000:
+            return o, pos + 4
+        return error
+    return error
+
+
+def decode_one_uni(text: str, i: int) -> tuple[int, int]:
+    """
+    decode_one implementation for unicode strings
+    """
+    return ord(text[i]), i + 1
+
+
+def decode_one_right(text: bytes, pos: int) -> tuple[int, int] | None:
+    """
+    Return (ordinal at pos, next position) for UTF-8 encoded text.
+    pos is assumed to be on the trailing byte of a utf-8 sequence.
+    """
+    if not isinstance(text, bytes):
+        raise TypeError(text)
+    error = ord("?"), pos - 1
+    p = pos
+    while p >= 0:
+        if text[p] & 0xC0 != 0x80:
+            o, _next_pos = decode_one(text, p)
+            return o, p - 1
+        p -= 1
+        if p == p - 4:
+            return error
+    return None
+
+
+def set_byte_encoding(enc: Literal["utf8", "narrow", "wide"]) -> None:
+    if enc not in {"utf8", "narrow", "wide"}:
+        raise ValueError(enc)
+    global _byte_encoding  # noqa: PLW0603  # pylint: disable=global-statement
+    _byte_encoding = enc
+
+
+def get_byte_encoding() -> Literal["utf8", "narrow", "wide"]:
+    return _byte_encoding
+
+
+def calc_string_text_pos(text: str, start_offs: int, end_offs: int, pref_col: int) -> tuple[int, int]:
+    """
+    Calculate the closest position to the screen column pref_col in text
+    where start_offs is the offset into text assumed to be screen column 0
+    and end_offs is the end of the range to search.
+
+    :param text: string
+    :param start_offs: starting text position
+    :param end_offs: ending text position
+    :param pref_col: target column
+    :returns: (position, actual_col)
+
+    ..note:: this method is a simplified version of `wcwidth.wcswidth` and ideally should be in wcwidth package.
+    """
+    if start_offs > end_offs:
+        raise ValueError((start_offs, end_offs))
+
+    cols = 0
+    for idx in range(start_offs, end_offs):
+        width = get_char_width(text[idx])
+        if width + cols > pref_col:
+            return idx, cols
+        cols += width
+
+    return end_offs, cols
+
+
+def calc_text_pos(text: str | bytes, start_offs: int, end_offs: int, pref_col: int) -> tuple[int, int]:
+    """
+    Calculate the closest position to the screen column pref_col in text
+    where start_offs is the offset into text assumed to be screen column 0
+    and end_offs is the end of the range to search.
+
+    text may be unicode or a byte string in the target _byte_encoding
+
+    Returns (position, actual_col).
+    """
+    if start_offs > end_offs:
+        raise ValueError((start_offs, end_offs))
+
+    if isinstance(text, str):
+        return calc_string_text_pos(text, start_offs, end_offs, pref_col)
+
+    if not isinstance(text, bytes):
+        raise TypeError(text)
+
+    if _byte_encoding == "utf8":
+        i = start_offs
+        sc = 0
+        while i < end_offs:
+            o, n = decode_one(text, i)
+            w = get_width(o)
+            if w + sc > pref_col:
+                return i, sc
+            i = n
+            sc += w
+        return i, sc
+
+    # "wide" and "narrow"
+    i = start_offs + pref_col
+    if i >= end_offs:
+        return end_offs, end_offs - start_offs
+    if _byte_encoding == "wide" and within_double_byte(text, start_offs, i) == 2:
+        i -= 1
+    return i, i - start_offs
+
+
+def calc_width(text: str | bytes, start_offs: int, end_offs: int) -> int:
+    """
+    Return the screen column width of text between start_offs and end_offs.
+
+    text may be unicode or a byte string in the target _byte_encoding
+
+    Some characters are wide (take two columns) and others affect the
+    previous character (take zero columns).  Use the widths table above
+    to calculate the screen column width of text[start_offs:end_offs]
+    """
+
+    if start_offs > end_offs:
+        raise ValueError((start_offs, end_offs))
+
+    if isinstance(text, str):
+        return sum(get_char_width(char) for char in text[start_offs:end_offs])
+
+    if _byte_encoding == "utf8":
+        try:
+            return sum(get_char_width(char) for char in text[start_offs:end_offs].decode("utf-8"))
+        except UnicodeDecodeError as exc:
+            warnings.warn(
+                "`calc_width` with text encoded to bytes can produce incorrect results"
+                f"due to possible offset in the middle of character: {exc}",
+                UnicodeWarning,
+                stacklevel=2,
+            )
+
+        i = start_offs
+        sc = 0
+        while i < end_offs:
+            o, i = decode_one(text, i)
+            w = get_width(o)
+            sc += w
+        return sc
+    # "wide", "narrow" or all printable ASCII, just return the character count
+    return end_offs - start_offs
+
+
+def is_wide_char(text: str | bytes, offs: int) -> bool:
+    """
+    Test if the character at offs within text is wide.
+
+    text may be unicode or a byte string in the target _byte_encoding
+    """
+    if isinstance(text, str):
+        return get_char_width(text[offs]) == 2
+    if not isinstance(text, bytes):
+        raise TypeError(text)
+    if _byte_encoding == "utf8":
+        o, _n = decode_one(text, offs)
+        return get_width(o) == 2
+    if _byte_encoding == "wide":
+        return within_double_byte(text, offs, offs) == 1
+    return False
+
+
+def move_prev_char(text: str | bytes, start_offs: int, end_offs: int) -> int:
+    """
+    Return the position of the character before end_offs.
+    """
+    if start_offs >= end_offs:
+        raise ValueError((start_offs, end_offs))
+    if isinstance(text, str):
+        return end_offs - 1
+    if not isinstance(text, bytes):
+        raise TypeError(text)
+    if _byte_encoding == "utf8":
+        o = end_offs - 1
+        while text[o] & 0xC0 == 0x80:
+            o -= 1
+        return o
+    if _byte_encoding == "wide" and within_double_byte(text, start_offs, end_offs - 1) == 2:
+        return end_offs - 2
+    return end_offs - 1
+
+
+def move_next_char(text: str | bytes, start_offs: int, end_offs: int) -> int:
+    """
+    Return the position of the character after start_offs.
+    """
+    if start_offs >= end_offs:
+        raise ValueError((start_offs, end_offs))
+    if isinstance(text, str):
+        return start_offs + 1
+    if not isinstance(text, bytes):
+        raise TypeError(text)
+    if _byte_encoding == "utf8":
+        o = start_offs + 1
+        while o < end_offs and text[o] & 0xC0 == 0x80:
+            o += 1
+        return o
+    if _byte_encoding == "wide" and within_double_byte(text, start_offs, start_offs) == 1:
+        return start_offs + 2
+    return start_offs + 1
+
+
+def within_double_byte(text: bytes, line_start: int, pos: int) -> Literal[0, 1, 2]:
+    """Return whether pos is within a double-byte encoded character.
+
+    text -- byte string in question
+    line_start -- offset of beginning of line (< pos)
+    pos -- offset in question
+
+    Return values:
+    0 -- not within dbe char, or double_byte_encoding == False
+    1 -- pos is on the 1st half of a dbe char
+    2 -- pos is on the 2nd half of a dbe char
+    """
+    if not isinstance(text, bytes):
+        raise TypeError(text)
+    v = text[pos]
+
+    if 0x40 <= v < 0x7F:
+        # might be second half of big5, uhc or gbk encoding
+        if pos == line_start:
+            return 0
+
+        if text[pos - 1] >= 0x81 and within_double_byte(text, line_start, pos - 1) == 1:
+            return 2
+        return 0
+
+    if v < 0x80:
+        return 0
+
+    i = pos - 1
+    while i >= line_start:
+        if text[i] < 0x80:
+            break
+        i -= 1
+
+    if (pos - i) & 1:
+        return 1
+    return 2