2025-12-25 upload

This commit is contained in:
“shengyudong”
2025-12-25 11:16:59 +08:00
commit 322ac74336
2241 changed files with 639966 additions and 0 deletions

View File

@@ -0,0 +1,167 @@
import re
import sys
DEPRECATED = """
--confdir
-Z
--body-size-limit
--stream
--palette
--palette-transparent
--follow
--order
--no-mouse
--reverse
--http2-priority
--no-http2-priority
--no-websocket
--websocket
--upstream-bind-address
--ciphers-client
--ciphers-server
--client-certs
--no-upstream-cert
--add-upstream-certs-to-client-chain
--upstream-trusted-confdir
--upstream-trusted-ca
--ssl-version-client
--ssl-version-server
--no-onboarding
--onboarding-host
--onboarding-port
--server-replay-use-header
--no-pop
--replay-ignore-content
--replay-ignore-payload-param
--replay-ignore-param
--replay-ignore-host
--replace-from-file
"""
REPLACED = """
-t
-u
--wfile
-a
--afile
-z
-b
--bind-address
--port
-I
--ignore
--tcp
--cert
--insecure
-c
--replace
--replacements
-i
-f
--filter
--socks
--server-replay-nopop
"""
REPLACEMENTS = {
"--stream": "stream_large_bodies",
"--palette": "console_palette",
"--palette-transparent": "console_palette_transparent:",
"--follow": "console_focus_follow",
"--order": "view_order",
"--no-mouse": "console_mouse",
"--reverse": "view_order_reversed",
"--no-websocket": "websocket",
"--no-upstream-cert": "upstream_cert",
"--upstream-trusted-confdir": "ssl_verify_upstream_trusted_confdir",
"--upstream-trusted-ca": "ssl_verify_upstream_trusted_ca",
"--no-onboarding": "onboarding",
"--no-pop": "server_replay_reuse",
"--replay-ignore-content": "server_replay_ignore_content",
"--replay-ignore-payload-param": "server_replay_ignore_payload_params",
"--replay-ignore-param": "server_replay_ignore_params",
"--replay-ignore-host": "server_replay_ignore_host",
"--replace-from-file": "replacements (use @ to specify path)",
"-t": "--stickycookie",
"-u": "--stickyauth",
"--wfile": "--save-stream-file",
"-a": "-w Prefix path with + to append.",
"--afile": "-w Prefix path with + to append.",
"-z": "--anticomp",
"-b": "--listen-host",
"--bind-address": "--listen-host",
"--port": "--listen-port",
"-I": "--ignore-hosts",
"--ignore": "--ignore-hosts",
"--tcp": "--tcp-hosts",
"--cert": "--certs",
"--insecure": "--ssl-insecure",
"-c": "-C",
"--replace": ["--modify-body", "--modify-headers"],
"--replacements": ["--modify-body", "--modify-headers"],
"-i": "--intercept",
"-f": "--view-filter",
"--filter": "--view-filter",
"--socks": "--mode socks5",
"--server-replay-nopop": "--server-replay-reuse",
}
def check():
args = sys.argv[1:]
print()
if "-U" in args:
print("-U is deprecated, please use --mode upstream:SPEC instead")
if "-T" in args:
print("-T is deprecated, please use --mode transparent instead")
for option in ("-e", "--eventlog", "--norefresh"):
if option in args:
print(f"{option} has been removed.")
for option in ("--nonanonymous", "--singleuser", "--htpasswd"):
if option in args:
print(
"{} is deprecated.\n"
"Please use `--proxyauth SPEC` instead.\n"
'SPEC Format: "username:pass", "any" to accept any user/pass combination,\n'
'"@path" to use an Apache htpasswd file, or\n'
'"ldap[s]:url_server_ldap[:port]:dn_auth:password:dn_subtree[?search_filter_key=...]" '
"for LDAP authentication.".format(option)
)
for option in REPLACED.splitlines():
if option in args:
r = REPLACEMENTS.get(option)
if isinstance(r, list):
new_options = r
else:
new_options = [r]
print(
"{} is deprecated.\nPlease use `{}` instead.".format(
option, "` or `".join(new_options)
)
)
for option in DEPRECATED.splitlines():
if option in args:
print(
"{} is deprecated.\n"
"Please use `--set {}=value` instead.\n"
"To show all options and their default values use --options".format(
option,
REPLACEMENTS.get(option, None)
or option.lstrip("-").replace("-", "_"),
)
)
# Check for underscores in the options. Options always follow '--'.
for argument in args:
underscoreParam = re.search(r"[-]{2}((.*?_)(.*?(\s|$)))+", argument)
if underscoreParam is not None:
print(
"{} uses underscores, please use hyphens {}".format(
argument, argument.replace("_", "-")
)
)

View File

@@ -0,0 +1,100 @@
import asyncio
import os
import sys
import time
from collections.abc import Coroutine
from collections.abc import Iterator
from contextlib import contextmanager
from mitmproxy.utils import human
_KEEP_ALIVE = set()
def create_task(
coro: Coroutine,
*,
name: str,
keep_ref: bool,
client: tuple | None = None,
) -> asyncio.Task:
"""
Wrapper around `asyncio.create_task`.
- Use `keep_ref` to keep an internal reference.
This ensures that the task is not garbage collected mid-execution if no other reference is kept.
- Use `client` to pass the client address as additional debug info on the task.
"""
t = asyncio.create_task(coro) # noqa: TID251
set_task_debug_info(t, name=name, client=client)
if keep_ref and not t.done():
# The event loop only keeps weak references to tasks.
# A task that isnt referenced elsewhere may get garbage collected at any time, even before its done.
_KEEP_ALIVE.add(t)
t.add_done_callback(_KEEP_ALIVE.discard)
return t
def set_task_debug_info(
task: asyncio.Task,
*,
name: str,
client: tuple | None = None,
) -> None:
"""Set debug info for an externally-spawned task."""
task.created = time.time() # type: ignore
if __debug__ is True and (test := os.environ.get("PYTEST_CURRENT_TEST", None)):
name = f"{name} [created in {test}]"
task.set_name(name)
if client:
task.client = client # type: ignore
def set_current_task_debug_info(
*,
name: str,
client: tuple | None = None,
) -> None:
"""Set debug info for the current task."""
task = asyncio.current_task()
assert task
set_task_debug_info(task, name=name, client=client)
def task_repr(task: asyncio.Task) -> str:
"""Get a task representation with debug info."""
name = task.get_name()
a: float = getattr(task, "created", 0)
if a:
age = f" (age: {time.time() - a:.0f}s)"
else:
age = ""
client = getattr(task, "client", "")
if client:
client = f"{human.format_address(client)}: "
return f"{client}{name}{age}"
@contextmanager
def install_exception_handler(handler) -> Iterator[None]:
loop = asyncio.get_running_loop()
existing = loop.get_exception_handler()
loop.set_exception_handler(handler)
try:
yield
finally:
loop.set_exception_handler(existing)
@contextmanager
def set_eager_task_factory() -> Iterator[None]:
loop = asyncio.get_running_loop()
if sys.version_info < (3, 12): # pragma: no cover
yield
else:
existing = loop.get_task_factory()
loop.set_task_factory(asyncio.eager_task_factory) # type: ignore
try:
yield
finally:
loop.set_task_factory(existing)

View File

@@ -0,0 +1,13 @@
def setbit(byte, offset, value):
"""
Set a bit in a byte to 1 if value is truthy, 0 if not.
"""
if value:
return byte | (1 << offset)
else:
return byte & ~(1 << offset)
def getbit(byte, offset):
mask = 1 << offset
return bool(byte & mask)

View File

@@ -0,0 +1,37 @@
import importlib
import inspect
import os.path
class Data:
def __init__(self, name):
self.name = name
m = importlib.import_module(name)
f = inspect.getsourcefile(m)
assert f is not None
dirname = os.path.dirname(f)
self.dirname = os.path.abspath(dirname)
def push(self, subpath):
"""
Change the data object to a path relative to the module.
"""
dirname = os.path.normpath(os.path.join(self.dirname, subpath))
ret = Data(self.name)
ret.dirname = dirname
return ret
def path(self, path):
"""
Returns a path to the package data housed at 'path' under this
module.Path can be a path to a file, or to a directory.
This function will raise ValueError if the path does not exist.
"""
fullpath = os.path.normpath(os.path.join(self.dirname, path))
if not os.path.exists(fullpath):
raise ValueError("dataPath: %s does not exist." % fullpath)
return fullpath
pkg_data = Data(__name__).push("..")

View File

@@ -0,0 +1,131 @@
import asyncio
import gc
import linecache
import os
import platform
import signal
import sys
import threading
import traceback
from collections import Counter
from contextlib import redirect_stdout
from OpenSSL import SSL
from mitmproxy import version
from mitmproxy.utils import asyncio_utils
def dump_system_info():
mitmproxy_version = version.get_dev_version()
openssl_version: str | bytes = SSL.SSLeay_version(SSL.SSLEAY_VERSION)
if isinstance(openssl_version, bytes):
openssl_version = openssl_version.decode()
data = [
f"Mitmproxy: {mitmproxy_version}",
f"Python: {platform.python_version()}",
f"OpenSSL: {openssl_version}",
f"Platform: {platform.platform()}",
]
return "\n".join(data)
def dump_info(signal=None, frame=None, file=sys.stdout): # pragma: no cover
with redirect_stdout(file):
print("****************************************************")
print("Summary")
print("=======")
try:
import psutil
except ModuleNotFoundError:
print("(psutil not installed, skipping some debug info)")
else:
p = psutil.Process()
print("num threads: ", p.num_threads())
if hasattr(p, "num_fds"):
print("num fds: ", p.num_fds())
print("memory: ", p.memory_info())
print()
print("Files")
print("=====")
for i in p.open_files():
print(i)
print()
print("Connections")
print("===========")
for i in p.connections():
print(i)
print()
print("Threads")
print("=======")
bthreads = []
for i in threading.enumerate():
if hasattr(i, "_threadinfo"):
bthreads.append(i)
else:
print(i.name)
bthreads.sort(key=lambda x: getattr(x, "_thread_started", 0))
for i in bthreads:
print(i._threadinfo())
print()
print("Memory")
print("======")
gc.collect()
objs = Counter(str(type(i)) for i in gc.get_objects())
for cls, count in objs.most_common(20):
print(f"{count} {cls}")
print()
print("Memory (mitmproxy only)")
print("=======================")
mitm_objs = Counter({k: v for k, v in objs.items() if "mitmproxy" in k})
for cls, count in mitm_objs.most_common(20):
print(f"{count} {cls}")
try:
asyncio.get_running_loop()
except RuntimeError:
pass
else:
print()
print("Tasks")
print("=======")
for task in asyncio.all_tasks():
f = task.get_stack(limit=1)[0]
line = linecache.getline(
f.f_code.co_filename, f.f_lineno, f.f_globals
).strip()
line = f"{line} # at {os.path.basename(f.f_code.co_filename)}:{f.f_lineno}"
print(f"{asyncio_utils.task_repr(task)}\n {line}")
print("****************************************************")
if os.getenv("MITMPROXY_DEBUG_EXIT"): # pragma: no cover
sys.exit(1)
def dump_stacks(signal=None, frame=None, file=sys.stdout):
id2name = {th.ident: th.name for th in threading.enumerate()}
code = []
for threadId, stack in sys._current_frames().items():
code.append("\n# Thread: %s(%d)" % (id2name.get(threadId, ""), threadId))
for filename, lineno, name, line in traceback.extract_stack(stack):
code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
if line:
code.append(" %s" % (line.strip()))
print("\n".join(code), file=file)
if os.getenv("MITMPROXY_DEBUG_EXIT"): # pragma: no cover
sys.exit(1)
def register_info_dumpers():
if os.name != "nt": # pragma: windows no cover
signal.signal(signal.SIGUSR1, dump_info) # type: ignore
signal.signal(signal.SIGUSR2, dump_stacks) # type: ignore

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,81 @@
"""
A standalone, minimal htpasswd parser.
This implementation currently supports bcrypt and SHA1 passwords. SHA1 is insecure.
"""
from __future__ import annotations
import base64
import hashlib
from pathlib import Path
import bcrypt
class HtpasswdFile:
def __init__(self, content: str):
"""
Create a HtpasswdFile from a string.
"""
self.users: dict[str, str] = {}
for line in content.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
if ":" not in line:
raise ValueError(f"Malformed htpasswd line: {line!r}")
user, pwhash = line.split(":", 1)
if not user:
raise ValueError(f"Malformed htpasswd line: {line!r}")
is_sha = pwhash.startswith("{SHA}")
is_bcrypt = pwhash.startswith(("$2y$", "$2b$", "$2a$"))
if not is_sha and not is_bcrypt:
raise ValueError(f"Unsupported htpasswd format for user {user!r}")
self.users[user] = pwhash
@classmethod
def from_file(cls, path: Path) -> HtpasswdFile:
"""
Initializes and loads an htpasswd file.
Args:
path: The path to the htpasswd file.
Raises:
OSError: If the file cannot be read.
ValueError: If the file is malformed.
"""
try:
content = path.read_text("utf-8")
except FileNotFoundError:
raise OSError(f"Htpasswd file not found: {path}") from None
return cls(content)
def check_password(self, username: str, password: str) -> bool:
"""
Checks if a username and password combination is valid.
Args:
username: The username to check.
password: The password to check.
Returns:
True if the password is valid, False otherwise.
"""
pwhash = self.users.get(username)
if pwhash is None:
return False
pwhash = pwhash.split(":", 1)[0]
if pwhash.startswith("{SHA}"):
# Apache's {SHA} is base64-encoded SHA-1.
# https://httpd.apache.org/docs/2.4/misc/password_encryptions.html
digest = hashlib.sha1(password.encode("utf-8")).digest()
expected = base64.b64encode(digest).decode("ascii")
return pwhash[5:] == expected
else: # pwhash.startswith(("$2y$", "$2b$", "$2a$")):
return bcrypt.checkpw(password.encode("utf-8"), pwhash.encode("utf-8"))

View File

@@ -0,0 +1,99 @@
import datetime
import functools
import ipaddress
import time
SIZE_UNITS = {
"b": 1024**0,
"k": 1024**1,
"m": 1024**2,
"g": 1024**3,
"t": 1024**4,
}
def pretty_size(size: int) -> str:
"""Convert a number of bytes into a human-readable string.
len(return value) <= 5 always holds true.
"""
s: float = size # type cast for mypy
if s < 1024:
return f"{s}b"
for suffix in ["k", "m", "g", "t"]:
s /= 1024
if s < 99.95:
return f"{s:.1f}{suffix}"
if s < 1024 or suffix == "t":
return f"{s:.0f}{suffix}"
raise AssertionError
@functools.lru_cache
def parse_size(s: str | None) -> int | None:
"""
Parse a size with an optional k/m/... suffix.
Invalid values raise a ValueError. For added convenience, passing `None` returns `None`.
"""
if s is None:
return None
try:
return int(s)
except ValueError:
pass
for i in SIZE_UNITS.keys():
if s.endswith(i):
try:
return int(s[:-1]) * SIZE_UNITS[i]
except ValueError:
break
raise ValueError("Invalid size specification.")
def pretty_duration(secs: float | None) -> str:
formatters = [
(100, "{:.0f}s"),
(10, "{:2.1f}s"),
(1, "{:1.2f}s"),
]
if secs is None:
return ""
for limit, formatter in formatters:
if secs >= limit:
return formatter.format(secs)
# less than 1 sec
return f"{secs * 1000:.0f}ms"
def format_timestamp(s):
s = time.localtime(s)
d = datetime.datetime.fromtimestamp(time.mktime(s))
return d.strftime("%Y-%m-%d %H:%M:%S")
def format_timestamp_with_milli(s):
d = datetime.datetime.fromtimestamp(s)
return d.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
@functools.lru_cache
def format_address(address: tuple | None) -> str:
"""
This function accepts IPv4/IPv6 tuples and
returns the formatted address string with port number
"""
if address is None:
return "<no address>"
try:
host = ipaddress.ip_address(address[0])
if host.is_unspecified:
return f"*:{address[1]}"
if isinstance(host, ipaddress.IPv4Address):
return f"{host}:{address[1]}"
# If IPv6 is mapped to IPv4
elif host.ipv4_mapped:
return f"{host.ipv4_mapped}:{address[1]}"
return f"[{host}]:{address[1]}"
except ValueError:
return f"{address[0]}:{address[1]}"

View File

@@ -0,0 +1,112 @@
import hashlib
import os
from zipfile import ZipFile
from cryptography import x509
from cryptography.hazmat.primitives import serialization
from mitmproxy import certs
from mitmproxy import ctx
from mitmproxy.options import CONF_BASENAME
# The following 3 variables are for including in the magisk module as text file
MODULE_PROP_TEXT = """id=mitmproxycert
name=MITMProxy cert
version=v1
versionCode=1
author=mitmproxy
description=Adds the mitmproxy certificate to the system store
template=3"""
CONFIG_SH_TEXT = """
MODID=mitmproxycert
AUTOMOUNT=true
PROPFILE=false
POSTFSDATA=false
LATESTARTSERVICE=false
print_modname() {
ui_print "*******************************"
ui_print " MITMProxy cert installer "
ui_print "*******************************"
}
REPLACE="
"
set_permissions() {
set_perm_recursive $MODPATH 0 0 0755 0644
}
"""
UPDATE_BINARY_TEXT = """
#!/sbin/sh
#################
# Initialization
#################
umask 022
# echo before loading util_functions
ui_print() { echo "$1"; }
require_new_magisk() {
ui_print "*******************************"
ui_print " Please install Magisk v20.4+! "
ui_print "*******************************"
exit 1
}
OUTFD=$2
ZIPFILE=$3
mount /data 2>/dev/null
[ -f /data/adb/magisk/util_functions.sh ] || require_new_magisk
. /data/adb/magisk/util_functions.sh
[ $MAGISK_VER_CODE -lt 20400 ] && require_new_magisk
install_module
exit 0
"""
def get_ca_from_files() -> x509.Certificate:
# Borrowed from tlsconfig
certstore_path = os.path.expanduser(ctx.options.confdir)
certstore = certs.CertStore.from_store(
path=certstore_path,
basename=CONF_BASENAME,
key_size=ctx.options.key_size,
passphrase=ctx.options.cert_passphrase.encode("utf8")
if ctx.options.cert_passphrase
else None,
)
return certstore.default_ca._cert
def subject_hash_old(ca: x509.Certificate) -> str:
# Mimics the -subject_hash_old option of openssl used for android certificate names
full_hash = hashlib.md5(ca.subject.public_bytes()).digest()
sho = full_hash[0] | (full_hash[1] << 8) | (full_hash[2] << 16) | full_hash[3] << 24
return hex(sho)[2:]
def write_magisk_module(path: str):
# Makes a zip file that can be loaded by Magisk
# Android certs are stored as DER files
ca = get_ca_from_files()
der_cert = ca.public_bytes(serialization.Encoding.DER)
with ZipFile(path, "w") as zipp:
# Main cert file, name is always the old subject hash with a '.0' added
zipp.writestr(f"system/etc/security/cacerts/{subject_hash_old(ca)}.0", der_cert)
zipp.writestr("module.prop", MODULE_PROP_TEXT)
zipp.writestr("config.sh", CONFIG_SH_TEXT)
zipp.writestr("META-INF/com/google/android/updater-script", "#MAGISK")
zipp.writestr("META-INF/com/google/android/update-binary", UPDATE_BINARY_TEXT)
zipp.writestr(
"common/file_contexts_image", "/magisk(/.*)? u:object_r:system_file:s0"
)
zipp.writestr("common/post-fs-data.sh", "MODDIR=${0%/*}")
zipp.writestr("common/service.sh", "MODDIR=${0%/*}")
zipp.writestr("common/system.prop", "")

View File

@@ -0,0 +1,7 @@
from pathlib import Path
here = Path(__file__).parent.absolute()
def hook_dirs() -> list[str]:
return [str(here)]

View File

@@ -0,0 +1,3 @@
from PyInstaller.utils.hooks import collect_data_files
datas = collect_data_files("mitmproxy.addons.onboardingapp")

View File

@@ -0,0 +1 @@
hiddenimports = ["mitmproxy.script"]

View File

@@ -0,0 +1,3 @@
from PyInstaller.utils.hooks import collect_data_files
datas = collect_data_files("mitmproxy.tools.web")

View File

@@ -0,0 +1,137 @@
"""
This module provides signals, which are a simple dispatching system that allows any number of interested parties
to subscribe to events ("signals").
This is similar to the Blinker library (https://pypi.org/project/blinker/), with the following changes:
- provides only a small subset of Blinker's functionality
- supports type hints
- supports async receivers.
"""
from __future__ import annotations
import asyncio
import inspect
import weakref
from collections.abc import Awaitable
from collections.abc import Callable
from typing import Any
from typing import cast
from typing import Generic
from typing import ParamSpec
from typing import TypeVar
P = ParamSpec("P")
R = TypeVar("R")
def make_weak_ref(obj: Any) -> weakref.ReferenceType:
"""
Like weakref.ref(), but using weakref.WeakMethod for bound methods.
"""
if hasattr(obj, "__self__"):
return cast(weakref.ref, weakref.WeakMethod(obj))
else:
return weakref.ref(obj)
# We're running into https://github.com/python/mypy/issues/6073 here,
# which is why the base class is a mixin and not a generic superclass.
class _SignalMixin:
def __init__(self) -> None:
self.receivers: list[weakref.ref[Callable]] = []
def connect(self, receiver: Callable) -> None:
"""
Register a signal receiver.
The signal will only hold a weak reference to the receiver function.
"""
receiver = make_weak_ref(receiver)
self.receivers.append(receiver)
def disconnect(self, receiver: Callable) -> None:
self.receivers = [r for r in self.receivers if r() != receiver]
def notify(self, *args, **kwargs):
cleanup = False
for ref in self.receivers:
r = ref()
if r is not None:
yield r(*args, **kwargs)
else:
cleanup = True
if cleanup:
self.receivers = [r for r in self.receivers if r() is not None]
class _SyncSignal(Generic[P], _SignalMixin):
def connect(self, receiver: Callable[P, None]) -> None:
assert not inspect.iscoroutinefunction(receiver)
super().connect(receiver)
def disconnect(self, receiver: Callable[P, None]) -> None:
super().disconnect(receiver)
def send(self, *args: P.args, **kwargs: P.kwargs) -> None:
for ret in super().notify(*args, **kwargs):
assert ret is None or not inspect.isawaitable(ret)
class _AsyncSignal(Generic[P], _SignalMixin):
def connect(self, receiver: Callable[P, Awaitable[None] | None]) -> None:
super().connect(receiver)
def disconnect(self, receiver: Callable[P, Awaitable[None] | None]) -> None:
super().disconnect(receiver)
async def send(self, *args: P.args, **kwargs: P.kwargs) -> None:
await asyncio.gather(
*[
aws
for aws in super().notify(*args, **kwargs)
if aws is not None and inspect.isawaitable(aws)
]
)
# noinspection PyPep8Naming
def SyncSignal(receiver_spec: Callable[P, None]) -> _SyncSignal[P]:
"""
Create a synchronous signal with the given function signature for receivers.
Example:
s = SyncSignal(lambda event: None) # all receivers must accept a single "event" argument.
def receiver(event):
print(event)
s.connect(receiver)
s.send("foo") # prints foo
s.send(event="bar") # prints bar
def receiver2():
...
s.connect(receiver2) # mypy complains about receiver2 not having the right signature
s2 = SyncSignal(lambda: None) # this signal has no arguments
s2.send()
"""
return cast(_SyncSignal[P], _SyncSignal())
# noinspection PyPep8Naming
def AsyncSignal(receiver_spec: Callable[P, Awaitable[None] | None]) -> _AsyncSignal[P]:
"""
Create an signal that supports both regular and async receivers:
Example:
s = AsyncSignal(lambda event: None)
async def receiver(event):
print(event)
s.connect(receiver)
await s.send("foo") # prints foo
"""
return cast(_AsyncSignal[P], _AsyncSignal())

View File

@@ -0,0 +1,32 @@
import itertools
from collections.abc import Iterable
from collections.abc import Iterator
from typing import TypeVar
T = TypeVar("T")
def window(
iterator: Iterable[T], behind: int = 0, ahead: int = 0
) -> Iterator[tuple[T | None, ...]]:
"""
Sliding window for an iterator.
Example:
>>> for prev, i, nxt in window(range(10), 1, 1):
>>> print(prev, i, nxt)
None 0 1
0 1 2
1 2 3
2 3 None
"""
# TODO: move into utils
iters: list[Iterator[T | None]] = list(itertools.tee(iterator, behind + 1 + ahead))
for i in range(behind):
iters[i] = itertools.chain((behind - i) * [None], iters[i])
for i in range(ahead):
iters[-1 - i] = itertools.islice(
itertools.chain(iters[-1 - i], (ahead - i) * [None]), (ahead - i), None
)
return zip(*iters)

View File

@@ -0,0 +1,21 @@
from mitmproxy import flowfilter
def parse_spec(option: str) -> tuple[flowfilter.TFilter, str, str]:
"""
Parse strings in the following format:
[/flow-filter]/subject/replacement
"""
sep, rem = option[0], option[1:]
parts = rem.split(sep, 2)
if len(parts) == 2:
subject, replacement = parts
return flowfilter.match_all, subject, replacement
elif len(parts) == 3:
patt, subject, replacement = parts
flow_filter = flowfilter.parse(patt)
return flow_filter, subject, replacement
else:
raise ValueError("Invalid number of parameters (2 or 3 are expected)")

View File

@@ -0,0 +1,289 @@
import codecs
import io
import re
from collections.abc import Iterable
from typing import overload
# https://mypy.readthedocs.io/en/stable/more_types.html#function-overloading
@overload
def always_bytes(str_or_bytes: None, *encode_args) -> None: ...
@overload
def always_bytes(str_or_bytes: str | bytes, *encode_args) -> bytes: ...
def always_bytes(str_or_bytes: None | str | bytes, *encode_args) -> None | bytes:
if str_or_bytes is None or isinstance(str_or_bytes, bytes):
return str_or_bytes
elif isinstance(str_or_bytes, str):
return str_or_bytes.encode(*encode_args)
else:
raise TypeError(
f"Expected str or bytes, but got {type(str_or_bytes).__name__}."
)
@overload
def always_str(str_or_bytes: None, *encode_args) -> None: ...
@overload
def always_str(str_or_bytes: str | bytes, *encode_args) -> str: ...
def always_str(str_or_bytes: None | str | bytes, *decode_args) -> None | str:
"""
Returns,
str_or_bytes unmodified, if
"""
if str_or_bytes is None or isinstance(str_or_bytes, str):
return str_or_bytes
elif isinstance(str_or_bytes, bytes):
return str_or_bytes.decode(*decode_args)
else:
raise TypeError(
f"Expected str or bytes, but got {type(str_or_bytes).__name__}."
)
# Translate control characters to "safe" characters. This implementation
# initially replaced them with the matching control pictures
# (http://unicode.org/charts/PDF/U2400.pdf), but that turned out to render badly
# with monospace fonts. We are back to "." therefore.
_control_char_trans = {
x: ord(".")
for x in range(32) # x + 0x2400 for unicode control group pictures
}
_control_char_trans[127] = ord(".") # 0x2421
_control_char_trans_newline = _control_char_trans.copy()
for x in ("\r", "\n", "\t"):
del _control_char_trans_newline[ord(x)]
_control_char_trans = str.maketrans(_control_char_trans)
_control_char_trans_newline = str.maketrans(_control_char_trans_newline)
def escape_control_characters(text: str, keep_spacing=True) -> str:
"""
Replace all unicode C1 control characters from the given text with a single "."
Args:
keep_spacing: If True, tabs and newlines will not be replaced.
"""
if not isinstance(text, str):
raise ValueError(f"text type must be unicode but is {type(text).__name__}")
trans = _control_char_trans_newline if keep_spacing else _control_char_trans
return text.translate(trans)
def bytes_to_escaped_str(
data: bytes, keep_spacing: bool = False, escape_single_quotes: bool = False
) -> str:
"""
Take bytes and return a safe string that can be displayed to the user.
Single quotes are always escaped, double quotes are never escaped:
"'" + bytes_to_escaped_str(...) + "'"
gives a valid Python string.
Args:
keep_spacing: If True, tabs and newlines will not be escaped.
"""
if not isinstance(data, bytes):
raise ValueError(f"data must be bytes, but is {data.__class__.__name__}")
# We always insert a double-quote here so that we get a single-quoted string back
# https://stackoverflow.com/questions/29019340/why-does-python-use-different-quotes-for-representing-strings-depending-on-their
ret = repr(b'"' + data).lstrip("b")[2:-1]
if not escape_single_quotes:
ret = re.sub(r"(?<!\\)(\\\\)*\\'", lambda m: (m.group(1) or "") + "'", ret)
if keep_spacing:
ret = re.sub(
r"(?<!\\)(\\\\)*\\([nrt])",
lambda m: (m.group(1) or "") + dict(n="\n", r="\r", t="\t")[m.group(2)],
ret,
)
return ret
def escaped_str_to_bytes(data: str) -> bytes:
"""
Take an escaped string and return the unescaped bytes equivalent.
Raises:
ValueError, if the escape sequence is invalid.
"""
if not isinstance(data, str):
raise ValueError(f"data must be str, but is {data.__class__.__name__}")
# This one is difficult - we use an undocumented Python API here
# as per http://stackoverflow.com/a/23151714/934719
return codecs.escape_decode(data)[0] # type: ignore
def is_mostly_bin(s: bytes) -> bool:
if not s:
return False
# Cut off at ~100 chars, but do it smartly so that if the input is UTF-8, we don't
# chop a multibyte code point in half.
if len(s) > 100:
for cut in range(100, 104):
is_continuation_byte = (s[cut] >> 6) == 0b10
if not is_continuation_byte:
# A new character starts here, so we cut off just before that.
s = s[:cut]
break
else:
s = s[:100]
low_bytes = sum(i < 9 or 13 < i < 32 for i in s)
high_bytes = sum(i > 126 for i in s)
ascii_bytes = len(s) - low_bytes - high_bytes
# Heuristic 1: If it's mostly printable ASCII, it's not bin.
if ascii_bytes / len(s) > 0.7:
return False
# Heuristic 2: If it's UTF-8 without too many ASCII control chars, it's not bin.
# Note that b"\x00\x00\x00" would be valid UTF-8, so we don't want to accept _any_
# UTF-8 with higher code points.
if (ascii_bytes + high_bytes) / len(s) > 0.95:
try:
s.decode()
return False
except ValueError:
pass
return True
def is_xml(s: bytes) -> bool:
for char in s:
if char in (9, 10, 32): # is space?
continue
return char == 60 # is a "<"?
return False
def clean_hanging_newline(t):
"""
Many editors will silently add a newline to the final line of a
document (I'm looking at you, Vim). This function fixes this common
problem at the risk of removing a hanging newline in the rare cases
where the user actually intends it.
"""
if t and t[-1] == "\n":
return t[:-1]
return t
def hexdump(s):
"""
Returns:
A generator of (offset, hex, str) tuples
"""
for i in range(0, len(s), 16):
offset = f"{i:0=10x}"
part = s[i : i + 16]
x = " ".join(f"{i:0=2x}" for i in part)
x = x.ljust(47) # 16*2 + 15
part_repr = always_str(
escape_control_characters(
part.decode("ascii", "replace").replace("\ufffd", "."), False
)
)
yield (offset, x, part_repr)
def _move_to_private_code_plane(matchobj):
return chr(ord(matchobj.group(0)) + 0xE000)
def _restore_from_private_code_plane(matchobj):
return chr(ord(matchobj.group(0)) - 0xE000)
NO_ESCAPE = r"(?<!\\)(?:\\\\)*"
MULTILINE_CONTENT = r"[\s\S]*?"
SINGLELINE_CONTENT = r".*?"
MULTILINE_CONTENT_LINE_CONTINUATION = r"(?:.|(?<=\\)\n)*?"
def split_special_areas(
data: str,
area_delimiter: Iterable[str],
):
"""
Split a string of code into a [code, special area, code, special area, ..., code] list.
For example,
>>> split_special_areas(
>>> "test /* don't modify me */ foo",
>>> [r"/\\*[\\s\\S]*?\\*/"]) # (regex matching comments)
["test ", "/* don't modify me */", " foo"]
"".join(split_special_areas(x, ...)) == x always holds true.
"""
return re.split("({})".format("|".join(area_delimiter)), data, flags=re.MULTILINE)
def escape_special_areas(
data: str,
area_delimiter: Iterable[str],
control_characters,
):
"""
Escape all control characters present in special areas with UTF8 symbols
in the private use plane (U+E000 t+ ord(char)).
This is useful so that one can then use regex replacements on the resulting string without
interfering with special areas.
control_characters must be 0 < ord(x) < 256.
Example:
>>> print(x)
if (true) { console.log('{}'); }
>>> x = escape_special_areas(x, "{", ["'" + SINGLELINE_CONTENT + "'"])
>>> print(x)
if (true) { console.log('<EFBFBD>}'); }
>>> x = re.sub(r"\\s*{\\s*", " {\n ", x)
>>> x = unescape_special_areas(x)
>>> print(x)
if (true) {
console.log('{}'); }
"""
buf = io.StringIO()
parts = split_special_areas(data, area_delimiter)
rex = re.compile(rf"[{control_characters}]")
for i, x in enumerate(parts):
if i % 2:
x = rex.sub(_move_to_private_code_plane, x)
buf.write(x)
return buf.getvalue()
def unescape_special_areas(data: str):
"""
Invert escape_special_areas.
x == unescape_special_areas(escape_special_areas(x)) always holds true.
"""
return re.sub(r"[\ue000-\ue0ff]", _restore_from_private_code_plane, data)
def cut_after_n_lines(content: str, n: int) -> str:
assert n > 0
pos = content.find("\n")
while pos >= 0 and n > 1:
pos = content.find("\n", pos + 1)
n -= 1
if pos >= 0:
content = content[: pos + 1]
return content

View File

@@ -0,0 +1,72 @@
import typing
from collections import abc
try:
from types import UnionType
except ImportError: # pragma: no cover
UnionType = object() # type: ignore
Type = typing.Union[
typing.Any # anything more elaborate really fails with mypy at the moment.
]
def check_option_type(name: str, value: typing.Any, typeinfo: Type) -> None:
"""
Check if the provided value is an instance of typeinfo and raises a
TypeError otherwise. This function supports only those types required for
options.
"""
e = TypeError(f"Expected {typeinfo} for {name}, but got {type(value)}.")
origin = typing.get_origin(typeinfo)
if origin is typing.Union or origin is UnionType:
for T in typing.get_args(typeinfo):
try:
check_option_type(name, value, T)
except TypeError:
pass
else:
return
raise e
elif origin is tuple:
types = typing.get_args(typeinfo)
if not isinstance(value, (tuple, list)):
raise e
if len(types) != len(value):
raise e
for i, (x, T) in enumerate(zip(value, types)):
check_option_type(f"{name}[{i}]", x, T)
return
elif origin is abc.Sequence:
T = typing.get_args(typeinfo)[0]
if not isinstance(value, (tuple, list)):
raise e
for v in value:
check_option_type(name, v, T)
elif origin is typing.IO or typeinfo in (typing.TextIO, typing.BinaryIO):
if hasattr(value, "read"):
return
else:
raise e
elif typeinfo is typing.Any:
return
elif not isinstance(value, typeinfo):
if typeinfo is float and isinstance(value, int):
return
raise e
def typespec_to_str(typespec: typing.Any) -> str:
if typespec in (str, int, float, bool):
t = typespec.__name__
elif typespec == typing.Optional[str]:
t = "optional str"
elif typespec in (typing.Sequence[str], abc.Sequence[str]):
t = "sequence of str"
elif typespec == typing.Optional[int]:
t = "optional int"
else:
raise NotImplementedError
return t

View File

@@ -0,0 +1,60 @@
"""
This module provides a method to detect if a given file object supports virtual terminal escape codes.
"""
import os
import sys
from typing import IO
if os.name == "nt":
from ctypes import byref # type: ignore
from ctypes import windll # type: ignore
from ctypes.wintypes import BOOL
from ctypes.wintypes import DWORD
from ctypes.wintypes import HANDLE
from ctypes.wintypes import LPDWORD
ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
STD_OUTPUT_HANDLE = -11
STD_ERROR_HANDLE = -12
# https://docs.microsoft.com/de-de/windows/console/getstdhandle
GetStdHandle = windll.kernel32.GetStdHandle
GetStdHandle.argtypes = [DWORD]
GetStdHandle.restype = HANDLE
# https://docs.microsoft.com/de-de/windows/console/getconsolemode
GetConsoleMode = windll.kernel32.GetConsoleMode
GetConsoleMode.argtypes = [HANDLE, LPDWORD]
GetConsoleMode.restype = BOOL
# https://docs.microsoft.com/de-de/windows/console/setconsolemode
SetConsoleMode = windll.kernel32.SetConsoleMode
SetConsoleMode.argtypes = [HANDLE, DWORD]
SetConsoleMode.restype = BOOL
def ensure_supported(f: IO[str]) -> bool:
if not f.isatty():
return False
if f == sys.stdout:
h = STD_OUTPUT_HANDLE
elif f == sys.stderr:
h = STD_ERROR_HANDLE
else:
return False
handle = GetStdHandle(h)
console_mode = DWORD()
ok = GetConsoleMode(handle, byref(console_mode))
if not ok:
return False
ok = SetConsoleMode(
handle, console_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING
)
return ok
else:
def ensure_supported(f: IO[str]) -> bool:
return f.isatty()