2025-12-25 upload

This commit is contained in:
“shengyudong”
2025-12-25 11:16:59 +08:00
commit 322ac74336
2241 changed files with 639966 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
from .assemble import assemble_body
from .assemble import assemble_request
from .assemble import assemble_request_head
from .assemble import assemble_response
from .assemble import assemble_response_head
from .read import connection_close
from .read import expected_http_body_size
from .read import read_request_head
from .read import read_response_head
__all__ = [
"read_request_head",
"read_response_head",
"connection_close",
"expected_http_body_size",
"assemble_request",
"assemble_request_head",
"assemble_response",
"assemble_response_head",
"assemble_body",
]

View File

@@ -0,0 +1,99 @@
def assemble_request(request):
if request.data.content is None:
raise ValueError("Cannot assemble flow with missing content")
head = assemble_request_head(request)
body = b"".join(
assemble_body(
request.data.headers, [request.data.content], request.data.trailers
)
)
return head + body
def assemble_request_head(request):
first_line = _assemble_request_line(request.data)
headers = _assemble_request_headers(request.data)
return b"%s\r\n%s\r\n" % (first_line, headers)
def assemble_response(response):
if response.data.content is None:
raise ValueError("Cannot assemble flow with missing content")
head = assemble_response_head(response)
body = b"".join(
assemble_body(
response.data.headers, [response.data.content], response.data.trailers
)
)
return head + body
def assemble_response_head(response):
first_line = _assemble_response_line(response.data)
headers = _assemble_response_headers(response.data)
return b"%s\r\n%s\r\n" % (first_line, headers)
def assemble_body(headers, body_chunks, trailers):
if "chunked" in headers.get("transfer-encoding", "").lower():
for chunk in body_chunks:
if chunk:
yield b"%x\r\n%s\r\n" % (len(chunk), chunk)
if trailers:
yield b"0\r\n%s\r\n" % trailers
else:
yield b"0\r\n\r\n"
else:
if trailers:
raise ValueError(
"Sending HTTP/1.1 trailer headers requires transfer-encoding: chunked"
)
for chunk in body_chunks:
yield chunk
def _assemble_request_line(request_data):
"""
Args:
request_data (mitmproxy.net.http.request.RequestData)
"""
if request_data.method.upper() == b"CONNECT":
return b"%s %s %s" % (
request_data.method,
request_data.authority,
request_data.http_version,
)
elif request_data.authority:
return b"%s %s://%s%s %s" % (
request_data.method,
request_data.scheme,
request_data.authority,
request_data.path,
request_data.http_version,
)
else:
return b"%s %s %s" % (
request_data.method,
request_data.path,
request_data.http_version,
)
def _assemble_request_headers(request_data):
"""
Args:
request_data (mitmproxy.net.http.request.RequestData)
"""
return bytes(request_data.headers)
def _assemble_response_line(response_data):
return b"%s %d %s" % (
response_data.http_version,
response_data.status_code,
response_data.reason,
)
def _assemble_response_headers(response):
return bytes(response.headers)

View File

@@ -0,0 +1,303 @@
import re
import time
import typing
from collections.abc import Iterable
from mitmproxy.http import Headers
from mitmproxy.http import Request
from mitmproxy.http import Response
from mitmproxy.net.http import url
from mitmproxy.net.http import validate
def get_header_tokens(headers, key):
"""
Retrieve all tokens for a header key. A number of different headers
follow a pattern where each header line can containe comma-separated
tokens, and headers can be set multiple times.
"""
if key not in headers:
return []
tokens = headers[key].split(",")
return [token.strip() for token in tokens]
def connection_close(http_version, headers):
"""
Checks the message to see if the client connection should be closed
according to RFC 2616 Section 8.1.
If we don't have a Connection header, HTTP 1.1 connections are assumed
to be persistent.
"""
if "connection" in headers:
tokens = get_header_tokens(headers, "connection")
if "close" in tokens:
return True
elif "keep-alive" in tokens:
return False
return http_version not in (
"HTTP/1.1",
b"HTTP/1.1",
"HTTP/2.0",
b"HTTP/2.0",
)
def expected_http_body_size(
request: Request, response: Response | None = None
) -> int | None:
"""
Returns:
The expected body length:
- a positive integer, if the size is known in advance
- None, if the size in unknown in advance (chunked encoding)
- -1, if all data should be read until end of stream.
Raises:
ValueError, if the content-length or transfer-encoding header is invalid
"""
# Determine response size according to http://tools.ietf.org/html/rfc7230#section-3.3, which is inlined below.
if not response:
headers = request.headers
else:
headers = response.headers
# 1. Any response to a HEAD request and any response with a 1xx
# (Informational), 204 (No Content), or 304 (Not Modified) status
# code is always terminated by the first empty line after the
# header fields, regardless of the header fields present in the
# message, and thus cannot contain a message body.
if request.method.upper() == "HEAD":
return 0
if 100 <= response.status_code <= 199:
return 0
if response.status_code in (204, 304):
return 0
# 2. Any 2xx (Successful) response to a CONNECT request implies that
# the connection will become a tunnel immediately after the empty
# line that concludes the header fields. A client MUST ignore any
# Content-Length or Transfer-Encoding header fields received in
# such a message.
if 200 <= response.status_code <= 299 and request.method.upper() == "CONNECT":
return 0
# 3. If a Transfer-Encoding header field is present and the chunked
# transfer coding (Section 4.1) is the final encoding, the message
# body length is determined by reading and decoding the chunked
# data until the transfer coding indicates the data is complete.
#
# If a Transfer-Encoding header field is present in a response and
# the chunked transfer coding is not the final encoding, the
# message body length is determined by reading the connection until
# it is closed by the server. If a Transfer-Encoding header field
# is present in a request and the chunked transfer coding is not
# the final encoding, the message body length cannot be determined
# reliably; the server MUST respond with the 400 (Bad Request)
# status code and then close the connection.
#
# If a message is received with both a Transfer-Encoding and a
# Content-Length header field, the Transfer-Encoding overrides the
# Content-Length. Such a message might indicate an attempt to
# perform request smuggling (Section 9.5) or response splitting
# (Section 9.4) and ought to be handled as an error. A sender MUST
# remove the received Content-Length field prior to forwarding such
# a message downstream.
#
if te_str := headers.get("transfer-encoding"):
te = validate.parse_transfer_encoding(te_str)
match te:
case "chunked" | "compress,chunked" | "deflate,chunked" | "gzip,chunked":
return None
case "compress" | "deflate" | "gzip" | "identity":
if response:
return -1
# These values are valid for responses only (not requests), which is ensured in
# mitmproxy.net.http.validate. If users have explicitly disabled header validation,
# we strive for maximum compatibility with weird clients.
if te == "identity" or "content-length" in headers:
pass # Content-Length or 0
else:
return (
-1
) # compress/deflate/gzip with no content-length -> read until eof
case other: # pragma: no cover
typing.assert_never(other)
# 4. If a message is received without Transfer-Encoding and with
# either multiple Content-Length header fields having differing
# field-values or a single Content-Length header field having an
# invalid value, then the message framing is invalid and the
# recipient MUST treat it as an unrecoverable error. If this is a
# request message, the server MUST respond with a 400 (Bad Request)
# status code and then close the connection. If this is a response
# message received by a proxy, the proxy MUST close the connection
# to the server, discard the received response, and send a 502 (Bad
# Gateway) response to the client. If this is a response message
# received by a user agent, the user agent MUST close the
# connection to the server and discard the received response.
#
# 5. If a valid Content-Length header field is present without
# Transfer-Encoding, its decimal value defines the expected message
# body length in octets. If the sender closes the connection or
# the recipient times out before the indicated number of octets are
# received, the recipient MUST consider the message to be
# incomplete and close the connection.
if cl := headers.get("content-length"):
return validate.parse_content_length(cl)
# 6. If this is a request message and none of the above are true, then
# the message body length is zero (no message body is present).
if not response:
return 0
# 7. Otherwise, this is a response message without a declared message
# body length, so the message body length is determined by the
# number of octets received prior to the server closing the
# connection.
return -1
def raise_if_http_version_unknown(http_version: bytes) -> None:
if not re.match(rb"^HTTP/\d\.\d$", http_version):
raise ValueError(f"Unknown HTTP version: {http_version!r}")
def _read_request_line(
line: bytes,
) -> tuple[str, int, bytes, bytes, bytes, bytes, bytes]:
try:
method, target, http_version = line.split()
port: int | None
if target == b"*" or target.startswith(b"/"):
scheme, authority, path = b"", b"", target
host, port = "", 0
elif method == b"CONNECT":
scheme, authority, path = b"", target, b""
host, port = url.parse_authority(authority, check=True)
if not port:
raise ValueError
else:
scheme, rest = target.split(b"://", maxsplit=1)
authority, _, path_ = rest.partition(b"/")
path = b"/" + path_
host, port = url.parse_authority(authority, check=True)
port = port or url.default_port(scheme)
if not port:
raise ValueError
# TODO: we can probably get rid of this check?
url.parse(target)
raise_if_http_version_unknown(http_version)
except ValueError as e:
raise ValueError(f"Bad HTTP request line: {line!r}") from e
return host, port, method, scheme, authority, path, http_version
def _read_response_line(line: bytes) -> tuple[bytes, int, bytes]:
try:
parts = line.split(None, 2)
if len(parts) == 2: # handle missing message gracefully
parts.append(b"")
http_version, status_code_str, reason = parts
status_code = int(status_code_str)
raise_if_http_version_unknown(http_version)
except ValueError as e:
raise ValueError(f"Bad HTTP response line: {line!r}") from e
return http_version, status_code, reason
def _read_headers(lines: Iterable[bytes]) -> Headers:
"""
Read a set of headers.
Stop once a blank line is reached.
Returns:
A headers object
Raises:
exceptions.HttpSyntaxException
"""
ret: list[tuple[bytes, bytes]] = []
for line in lines:
if line[0] in b" \t":
if not ret:
raise ValueError("Invalid headers")
# continued header
ret[-1] = (ret[-1][0], ret[-1][1] + b"\r\n " + line.strip())
else:
try:
name, value = line.split(b":", 1)
value = value.strip()
if not name:
raise ValueError()
ret.append((name, value))
except ValueError:
raise ValueError(f"Invalid header line: {line!r}")
return Headers(ret)
def read_request_head(lines: list[bytes]) -> Request:
"""
Parse an HTTP request head (request line + headers) from an iterable of lines
Args:
lines: The input lines
Returns:
The HTTP request object (without body)
Raises:
ValueError: The input is malformed.
"""
host, port, method, scheme, authority, path, http_version = _read_request_line(
lines[0]
)
headers = _read_headers(lines[1:])
return Request(
host=host,
port=port,
method=method,
scheme=scheme,
authority=authority,
path=path,
http_version=http_version,
headers=headers,
content=None,
trailers=None,
timestamp_start=time.time(),
timestamp_end=None,
)
def read_response_head(lines: list[bytes]) -> Response:
"""
Parse an HTTP response head (response line + headers) from an iterable of lines
Args:
lines: The input lines
Returns:
The HTTP response object (without body)
Raises:
ValueError: The input is malformed.
"""
http_version, status_code, reason = _read_response_line(lines[0])
headers = _read_headers(lines[1:])
return Response(
http_version=http_version,
status_code=status_code,
reason=reason,
headers=headers,
content=None,
trailers=None,
timestamp_start=time.time(),
timestamp_end=None,
)