Files
baijiahao_data_crawl/venv/Lib/site-packages/mitmproxy/http.py
“shengyudong” 322ac74336 2025-12-25 upload
2025-12-25 11:16:59 +08:00

1296 lines
43 KiB
Python

import binascii
import json
import os
import time
import urllib.parse
import warnings
from collections.abc import Callable
from collections.abc import Iterable
from collections.abc import Iterator
from collections.abc import Mapping
from collections.abc import Sequence
from dataclasses import dataclass
from dataclasses import fields
from email.utils import formatdate
from email.utils import mktime_tz
from email.utils import parsedate_tz
from typing import Any
from typing import cast
from mitmproxy import flow
from mitmproxy.coretypes import multidict
from mitmproxy.coretypes import serializable
from mitmproxy.net import encoding
from mitmproxy.net.http import cookies
from mitmproxy.net.http import multipart
from mitmproxy.net.http import status_codes
from mitmproxy.net.http import url
from mitmproxy.net.http.headers import assemble_content_type
from mitmproxy.net.http.headers import infer_content_encoding
from mitmproxy.net.http.headers import parse_content_type
from mitmproxy.utils import human
from mitmproxy.utils import strutils
from mitmproxy.utils import typecheck
from mitmproxy.utils.strutils import always_bytes
from mitmproxy.utils.strutils import always_str
from mitmproxy.websocket import WebSocketData
# While headers _should_ be ASCII, it's not uncommon for certain headers to be utf-8 encoded.
def _native(x: bytes) -> str:
return x.decode("utf-8", "surrogateescape")
def _always_bytes(x: str | bytes) -> bytes:
return strutils.always_bytes(x, "utf-8", "surrogateescape")
# This cannot be easily typed with mypy yet, so we just specify MultiDict without concrete types.
class Headers(multidict.MultiDict): # type: ignore
"""
Header class which allows both convenient access to individual headers as well as
direct access to the underlying raw data. Provides a full dictionary interface.
Create headers with keyword arguments:
>>> h = Headers(host="example.com", content_type="application/xml")
Headers mostly behave like a normal dict:
>>> h["Host"]
"example.com"
Headers are case insensitive:
>>> h["host"]
"example.com"
Headers can also be created from a list of raw (header_name, header_value) byte tuples:
>>> h = Headers([
(b"Host",b"example.com"),
(b"Accept",b"text/html"),
(b"accept",b"application/xml")
])
Multiple headers are folded into a single header as per RFC 7230:
>>> h["Accept"]
"text/html, application/xml"
Setting a header removes all existing headers with the same name:
>>> h["Accept"] = "application/text"
>>> h["Accept"]
"application/text"
`bytes(h)` returns an HTTP/1 header block:
>>> print(bytes(h))
Host: example.com
Accept: application/text
For full control, the raw header fields can be accessed:
>>> h.fields
Caveats:
- For use with the "Set-Cookie" and "Cookie" headers, either use `Response.cookies` or see `Headers.get_all`.
"""
def __init__(self, fields: Iterable[tuple[bytes, bytes]] = (), **headers):
"""
*Args:*
- *fields:* (optional) list of ``(name, value)`` header byte tuples,
e.g. ``[(b"Host", b"example.com")]``. All names and values must be bytes.
- *\\*\\*headers:* Additional headers to set. Will overwrite existing values from `fields`.
For convenience, underscores in header names will be transformed to dashes -
this behaviour does not extend to other methods.
If ``**headers`` contains multiple keys that have equal ``.lower()`` representations,
the behavior is undefined.
"""
super().__init__(fields)
for key, value in self.fields:
if not isinstance(key, bytes) or not isinstance(value, bytes):
raise TypeError("Header fields must be bytes.")
# content_type -> content-type
self.update(
{
_always_bytes(name).replace(b"_", b"-"): _always_bytes(value)
for name, value in headers.items()
}
)
fields: tuple[tuple[bytes, bytes], ...]
@staticmethod
def _reduce_values(values) -> str:
# Headers can be folded
return ", ".join(values)
@staticmethod
def _kconv(key) -> str:
# Headers are case-insensitive
return key.lower()
def __bytes__(self) -> bytes:
if self.fields:
return b"\r\n".join(b": ".join(field) for field in self.fields) + b"\r\n"
else:
return b""
def __delitem__(self, key: str | bytes) -> None:
key = _always_bytes(key)
super().__delitem__(key)
def __iter__(self) -> Iterator[str]:
for x in super().__iter__():
yield _native(x)
def get_all(self, name: str | bytes) -> list[str]:
"""
Like `Headers.get`, but does not fold multiple headers into a single one.
This is useful for Set-Cookie and Cookie headers, which do not support folding.
*See also:*
- <https://tools.ietf.org/html/rfc7230#section-3.2.2>
- <https://datatracker.ietf.org/doc/html/rfc6265#section-5.4>
- <https://datatracker.ietf.org/doc/html/rfc7540#section-8.1.2.5>
"""
name = _always_bytes(name)
return [_native(x) for x in super().get_all(name)]
def set_all(self, name: str | bytes, values: Iterable[str | bytes]):
"""
Explicitly set multiple headers for the given key.
See `Headers.get_all`.
"""
name = _always_bytes(name)
values = [_always_bytes(x) for x in values]
return super().set_all(name, values)
def insert(self, index: int, key: str | bytes, value: str | bytes):
key = _always_bytes(key)
value = _always_bytes(value)
super().insert(index, key, value)
def items(self, multi=False):
if multi:
return ((_native(k), _native(v)) for k, v in self.fields)
else:
return super().items()
@dataclass
class MessageData(serializable.Serializable):
http_version: bytes
headers: Headers
content: bytes | None
trailers: Headers | None
timestamp_start: float
timestamp_end: float | None
# noinspection PyUnreachableCode
if __debug__:
def __post_init__(self):
for field in fields(self):
val = getattr(self, field.name)
typecheck.check_option_type(field.name, val, field.type)
def set_state(self, state):
for k, v in state.items():
if k in ("headers", "trailers") and v is not None:
v = Headers.from_state(v)
setattr(self, k, v)
def get_state(self):
state = vars(self).copy()
state["headers"] = state["headers"].get_state()
if state["trailers"] is not None:
state["trailers"] = state["trailers"].get_state()
return state
@classmethod
def from_state(cls, state):
state["headers"] = Headers.from_state(state["headers"])
if state["trailers"] is not None:
state["trailers"] = Headers.from_state(state["trailers"])
return cls(**state)
@dataclass
class RequestData(MessageData):
host: str
port: int
method: bytes
scheme: bytes
authority: bytes
path: bytes
@dataclass
class ResponseData(MessageData):
status_code: int
reason: bytes
class Message(serializable.Serializable):
"""Base class for `Request` and `Response`."""
@classmethod
def from_state(cls, state):
return cls(**state)
def get_state(self):
return self.data.get_state()
def set_state(self, state):
self.data.set_state(state)
data: MessageData
stream: Callable[[bytes], Iterable[bytes] | bytes] | bool = False
"""
This attribute controls if the message body should be streamed.
If `False`, mitmproxy will buffer the entire body before forwarding it to the destination.
This makes it possible to perform string replacements on the entire body.
If `True`, the message body will not be buffered on the proxy
but immediately forwarded instead.
Alternatively, a transformation function can be specified, which will be called for each chunk of data.
Please note that packet boundaries generally should not be relied upon.
This attribute must be set in the `requestheaders` or `responseheaders` hook.
Setting it in `request` or `response` is already too late, mitmproxy has buffered the message body already.
"""
@property
def http_version(self) -> str:
"""
HTTP version string, for example `HTTP/1.1`.
"""
return self.data.http_version.decode("utf-8", "surrogateescape")
@http_version.setter
def http_version(self, http_version: str | bytes) -> None:
self.data.http_version = strutils.always_bytes(
http_version, "utf-8", "surrogateescape"
)
@property
def is_http10(self) -> bool:
return self.data.http_version == b"HTTP/1.0"
@property
def is_http11(self) -> bool:
return self.data.http_version == b"HTTP/1.1"
@property
def is_http2(self) -> bool:
return self.data.http_version == b"HTTP/2.0"
@property
def is_http3(self) -> bool:
return self.data.http_version == b"HTTP/3"
@property
def headers(self) -> Headers:
"""
The HTTP headers.
"""
return self.data.headers
@headers.setter
def headers(self, h: Headers) -> None:
self.data.headers = h
@property
def trailers(self) -> Headers | None:
"""
The [HTTP trailers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Trailer).
"""
return self.data.trailers
@trailers.setter
def trailers(self, h: Headers | None) -> None:
self.data.trailers = h
@property
def raw_content(self) -> bytes | None:
"""
The raw (potentially compressed) HTTP message body.
In contrast to `Message.content` and `Message.text`, accessing this property never raises.
`raw_content` may be `None` if the content is missing, for example due to body streaming
(see `Message.stream`). In contrast, `b""` signals a present but empty message body.
*See also:* `Message.content`, `Message.text`
"""
return self.data.content
@raw_content.setter
def raw_content(self, content: bytes | None) -> None:
self.data.content = content
@property
def content(self) -> bytes | None:
"""
The uncompressed HTTP message body as bytes.
Accessing this attribute may raise a `ValueError` when the HTTP content-encoding is invalid.
*See also:* `Message.raw_content`, `Message.text`
"""
return self.get_content()
@content.setter
def content(self, value: bytes | None) -> None:
self.set_content(value)
@property
def text(self) -> str | None:
"""
The uncompressed and decoded HTTP message body as text.
Accessing this attribute may raise a `ValueError` when either content-encoding or charset is invalid.
*See also:* `Message.raw_content`, `Message.content`
"""
return self.get_text()
@text.setter
def text(self, value: str | None) -> None:
self.set_text(value)
def set_content(self, value: bytes | None) -> None:
if value is None:
self.raw_content = None
return
if not isinstance(value, bytes):
raise TypeError(
f"Message content must be bytes, not {type(value).__name__}. "
"Please use .text if you want to assign a str."
)
ce = self.headers.get("content-encoding")
try:
self.raw_content = encoding.encode(value, ce or "identity")
except ValueError:
# So we have an invalid content-encoding?
# Let's remove it!
del self.headers["content-encoding"]
self.raw_content = value
if "transfer-encoding" in self.headers:
# https://httpwg.org/specs/rfc7230.html#header.content-length
# don't set content-length if a transfer-encoding is provided
pass
else:
self.headers["content-length"] = str(len(self.raw_content))
def get_content(self, strict: bool = True) -> bytes | None:
"""
Similar to `Message.content`, but does not raise if `strict` is `False`.
Instead, the compressed message body is returned as-is.
"""
if self.raw_content is None:
return None
ce = self.headers.get("content-encoding")
if ce:
try:
content = encoding.decode(self.raw_content, ce)
# A client may illegally specify a byte -> str encoding here (e.g. utf8)
if isinstance(content, str):
raise ValueError(f"Invalid Content-Encoding: {ce}")
return content
except ValueError:
if strict:
raise
return self.raw_content
else:
return self.raw_content
def set_text(self, text: str | None) -> None:
if text is None:
self.content = None
return
enc = infer_content_encoding(self.headers.get("content-type", ""))
try:
self.content = cast(bytes, encoding.encode(text, enc))
except ValueError:
# Fall back to UTF-8 and update the content-type header.
ct = parse_content_type(self.headers.get("content-type", "")) or (
"text",
"plain",
{},
)
ct[2]["charset"] = "utf-8"
self.headers["content-type"] = assemble_content_type(*ct)
enc = "utf8"
self.content = text.encode(enc, "surrogateescape")
def get_text(self, strict: bool = True) -> str | None:
"""
Similar to `Message.text`, but does not raise if `strict` is `False`.
Instead, the message body is returned as surrogate-escaped UTF-8.
"""
content = self.get_content(strict)
if content is None:
return None
enc = infer_content_encoding(self.headers.get("content-type", ""), content)
try:
return cast(str, encoding.decode(content, enc))
except ValueError:
if strict:
raise
return content.decode("utf8", "surrogateescape")
@property
def timestamp_start(self) -> float:
"""
*Timestamp:* Headers received.
"""
return self.data.timestamp_start
@timestamp_start.setter
def timestamp_start(self, timestamp_start: float) -> None:
self.data.timestamp_start = timestamp_start
@property
def timestamp_end(self) -> float | None:
"""
*Timestamp:* Last byte received.
"""
return self.data.timestamp_end
@timestamp_end.setter
def timestamp_end(self, timestamp_end: float | None):
self.data.timestamp_end = timestamp_end
def decode(self, strict: bool = True) -> None:
"""
Decodes body based on the current Content-Encoding header, then
removes the header.
If the message body is missing or empty, no action is taken.
*Raises:*
- `ValueError`, when the content-encoding is invalid and strict is True.
"""
if not self.raw_content:
# The body is missing (for example, because of body streaming or because it's a response
# to a HEAD request), so we can't correctly update content-length.
return
decoded = self.get_content(strict)
self.headers.pop("content-encoding", None)
self.content = decoded
def encode(self, encoding: str) -> None:
"""
Encodes body with the given encoding, where e is "gzip", "deflate", "identity", "br", or "zstd".
Any existing content-encodings are overwritten, the content is not decoded beforehand.
*Raises:*
- `ValueError`, when the specified content-encoding is invalid.
"""
self.headers["content-encoding"] = encoding
self.content = self.raw_content
if "content-encoding" not in self.headers:
raise ValueError(f"Invalid content encoding {encoding!r}")
def json(self, **kwargs: Any) -> Any:
"""
Returns the JSON encoded content of the response, if any.
`**kwargs` are optional arguments that will be
passed to `json.loads()`.
Will raise if the content can not be decoded and then parsed as JSON.
*Raises:*
- `json.decoder.JSONDecodeError` if content is not valid JSON.
- `TypeError` if the content is not available, for example because the response
has been streamed.
"""
content = self.get_content(strict=False)
if content is None:
raise TypeError("Message content is not available.")
else:
return json.loads(content, **kwargs)
class Request(Message):
"""
An HTTP request.
"""
data: RequestData
def __init__(
self,
host: str,
port: int,
method: bytes,
scheme: bytes,
authority: bytes,
path: bytes,
http_version: bytes,
headers: Headers | tuple[tuple[bytes, bytes], ...],
content: bytes | None,
trailers: Headers | tuple[tuple[bytes, bytes], ...] | None,
timestamp_start: float,
timestamp_end: float | None,
):
# auto-convert invalid types to retain compatibility with older code.
if isinstance(host, bytes):
host = host.decode("idna", "strict")
if isinstance(method, str):
method = method.encode("ascii", "strict")
if isinstance(scheme, str):
scheme = scheme.encode("ascii", "strict")
if isinstance(authority, str):
authority = authority.encode("ascii", "strict")
if isinstance(path, str):
path = path.encode("ascii", "strict")
if isinstance(http_version, str):
http_version = http_version.encode("ascii", "strict")
if isinstance(content, str):
raise ValueError(f"Content must be bytes, not {type(content).__name__}")
if not isinstance(headers, Headers):
headers = Headers(headers)
if trailers is not None and not isinstance(trailers, Headers):
trailers = Headers(trailers)
self.data = RequestData(
host=host,
port=port,
method=method,
scheme=scheme,
authority=authority,
path=path,
http_version=http_version,
headers=headers,
content=content,
trailers=trailers,
timestamp_start=timestamp_start,
timestamp_end=timestamp_end,
)
def __repr__(self) -> str:
if self.host and self.port:
hostport = f"{self.host}:{self.port}"
else:
hostport = ""
path = self.path or ""
return f"Request({self.method} {hostport}{path})"
@classmethod
def make(
cls,
method: str,
url: str,
content: bytes | str = "",
headers: (
Headers | dict[str | bytes, str | bytes] | Iterable[tuple[bytes, bytes]]
) = (),
) -> "Request":
"""
Simplified API for creating request objects.
"""
# Headers can be list or dict, we differentiate here.
if isinstance(headers, Headers):
pass
elif isinstance(headers, dict):
headers = Headers(
(
always_bytes(k, "utf-8", "surrogateescape"),
always_bytes(v, "utf-8", "surrogateescape"),
)
for k, v in headers.items()
)
elif isinstance(headers, Iterable):
headers = Headers(headers) # type: ignore
else:
raise TypeError(
"Expected headers to be an iterable or dict, but is {}.".format(
type(headers).__name__
)
)
req = cls(
"",
0,
method.encode("utf-8", "surrogateescape"),
b"",
b"",
b"",
b"HTTP/1.1",
headers,
b"",
None,
time.time(),
time.time(),
)
req.url = url
# Assign this manually to update the content-length header.
if isinstance(content, bytes):
req.content = content
elif isinstance(content, str):
req.text = content
else:
raise TypeError(
f"Expected content to be str or bytes, but is {type(content).__name__}."
)
return req
@property
def first_line_format(self) -> str:
"""
*Read-only:* HTTP request form as defined in [RFC 7230](https://tools.ietf.org/html/rfc7230#section-5.3).
origin-form and asterisk-form are subsumed as "relative".
"""
if self.method == "CONNECT":
return "authority"
elif self.authority:
return "absolute"
else:
return "relative"
@property
def method(self) -> str:
"""
HTTP request method, e.g. "GET".
"""
return self.data.method.decode("utf-8", "surrogateescape").upper()
@method.setter
def method(self, val: str | bytes) -> None:
self.data.method = always_bytes(val, "utf-8", "surrogateescape")
@property
def scheme(self) -> str:
"""
HTTP request scheme, which should be "http" or "https".
"""
return self.data.scheme.decode("utf-8", "surrogateescape")
@scheme.setter
def scheme(self, val: str | bytes) -> None:
self.data.scheme = always_bytes(val, "utf-8", "surrogateescape")
@property
def authority(self) -> str:
"""
HTTP request authority.
For HTTP/1, this is the authority portion of the request target
(in either absolute-form or authority-form).
For origin-form and asterisk-form requests, this property is set to an empty string.
For HTTP/2, this is the :authority pseudo header.
*See also:* `Request.host`, `Request.host_header`, `Request.pretty_host`
"""
try:
return self.data.authority.decode("idna")
except UnicodeError:
return self.data.authority.decode("utf8", "surrogateescape")
@authority.setter
def authority(self, val: str | bytes) -> None:
if isinstance(val, str):
try:
val = val.encode("idna", "strict")
except UnicodeError:
val = val.encode("utf8", "surrogateescape") # type: ignore
self.data.authority = val
@property
def host(self) -> str:
"""
Target server for this request. This may be parsed from the raw request
(e.g. from a ``GET http://example.com/ HTTP/1.1`` request line)
or inferred from the proxy mode (e.g. an IP in transparent mode).
Setting the host attribute also updates the host header and authority information, if present.
*See also:* `Request.authority`, `Request.host_header`, `Request.pretty_host`
"""
return self.data.host
@host.setter
def host(self, val: str | bytes) -> None:
self.data.host = always_str(val, "idna", "strict")
self._update_host_and_authority()
@property
def host_header(self) -> str | None:
"""
The request's host/authority header.
This property maps to either ``request.headers["Host"]`` or
``request.authority``, depending on whether it's HTTP/1.x or HTTP/2.0.
*See also:* `Request.authority`,`Request.host`, `Request.pretty_host`
"""
if self.is_http2 or self.is_http3:
return self.authority or self.data.headers.get("Host", None)
else:
return self.data.headers.get("Host", None)
@host_header.setter
def host_header(self, val: None | str | bytes) -> None:
if val is None:
if self.is_http2 or self.is_http3:
self.data.authority = b""
self.headers.pop("Host", None)
else:
if self.is_http2 or self.is_http3:
self.authority = val # type: ignore
if not (self.is_http2 or self.is_http3) or "Host" in self.headers:
# For h2, we only overwrite, but not create, as :authority is the h2 host header.
self.headers["Host"] = val
@property
def port(self) -> int:
"""
Target port.
"""
return self.data.port
@port.setter
def port(self, port: int) -> None:
if not isinstance(port, int):
raise ValueError(f"Port must be an integer, not {port!r}.")
self.data.port = port
self._update_host_and_authority()
def _update_host_and_authority(self) -> None:
val = url.hostport(self.scheme, self.host, self.port)
# Update host header
if "Host" in self.data.headers:
self.data.headers["Host"] = val
# Update authority
if self.data.authority:
self.authority = val
@property
def path(self) -> str:
"""
HTTP request path, e.g. "/index.html" or "/index.html?a=b".
Usually starts with a slash, except for OPTIONS requests, which may just be "*".
This attribute includes both path and query parts of the target URI
(see Sections 3.3 and 3.4 of [RFC3986](https://datatracker.ietf.org/doc/html/rfc3986)).
"""
return self.data.path.decode("utf-8", "surrogateescape")
@path.setter
def path(self, val: str | bytes) -> None:
self.data.path = always_bytes(val, "utf-8", "surrogateescape")
@property
def url(self) -> str:
"""
The full URL string, constructed from `Request.scheme`, `Request.host`, `Request.port` and `Request.path`.
Settings this property updates these attributes as well.
"""
if self.first_line_format == "authority":
return f"{self.host}:{self.port}"
path = self.path if self.path != "*" else ""
return url.unparse(self.scheme, self.host, self.port, path)
@url.setter
def url(self, val: str | bytes) -> None:
val = always_str(val, "utf-8", "surrogateescape")
self.scheme, self.host, self.port, self.path = url.parse(val) # type: ignore
@property
def pretty_host(self) -> str:
"""
*Read-only:* Like `Request.host`, but using `Request.host_header` header as an additional (preferred) data source.
This is useful in transparent mode where `Request.host` is only an IP address.
*Warning:* When working in adversarial environments, this may not reflect the actual destination
as the Host header could be spoofed.
"""
authority = self.host_header
if authority:
return url.parse_authority(authority, check=False)[0]
else:
return self.host
@property
def pretty_url(self) -> str:
"""
*Read-only:* Like `Request.url`, but using `Request.pretty_host` instead of `Request.host`.
"""
if self.first_line_format == "authority":
return self.authority
host_header = self.host_header
if not host_header:
return self.url
pretty_host, pretty_port = url.parse_authority(host_header, check=False)
pretty_port = pretty_port or url.default_port(self.scheme) or 443
path = self.path if self.path != "*" else ""
return url.unparse(self.scheme, pretty_host, pretty_port, path)
def _get_query(self):
query = urllib.parse.urlparse(self.url).query
return tuple(url.decode(query))
def _set_query(self, query_data):
query = url.encode(query_data)
_, _, path, params, _, fragment = urllib.parse.urlparse(self.url)
self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment])
@property
def query(self) -> multidict.MultiDictView[str, str]:
"""
The request query as a mutable mapping view on the request's path.
For the most part, this behaves like a dictionary.
Modifications to the MultiDictView update `Request.path`, and vice versa.
"""
return multidict.MultiDictView(self._get_query, self._set_query)
@query.setter
def query(self, value):
self._set_query(value)
def _get_cookies(self):
h = self.headers.get_all("Cookie")
return tuple(cookies.parse_cookie_headers(h))
def _set_cookies(self, value):
self.headers["cookie"] = cookies.format_cookie_header(value)
@property
def cookies(self) -> multidict.MultiDictView[str, str]:
"""
The request cookies.
For the most part, this behaves like a dictionary.
Modifications to the MultiDictView update `Request.headers`, and vice versa.
"""
return multidict.MultiDictView(self._get_cookies, self._set_cookies)
@cookies.setter
def cookies(self, value):
self._set_cookies(value)
@property
def path_components(self) -> tuple[str, ...]:
"""
The URL's path components as a tuple of strings.
Components are unquoted.
"""
path = urllib.parse.urlparse(self.url).path
# This needs to be a tuple so that it's immutable.
# Otherwise, this would fail silently:
# request.path_components.append("foo")
return tuple(url.unquote(i) for i in path.split("/") if i)
@path_components.setter
def path_components(self, components: Iterable[str]):
components = map(lambda x: url.quote(x, safe=""), components)
path = "/" + "/".join(components)
_, _, _, params, query, fragment = urllib.parse.urlparse(self.url)
self.path = urllib.parse.urlunparse(["", "", path, params, query, fragment])
def anticache(self) -> None:
"""
Modifies this request to remove headers that might produce a cached response.
"""
delheaders = (
"if-modified-since",
"if-none-match",
)
for i in delheaders:
self.headers.pop(i, None)
def anticomp(self) -> None:
"""
Modify the Accept-Encoding header to only accept uncompressed responses.
"""
self.headers["accept-encoding"] = "identity"
def constrain_encoding(self) -> None:
"""
Limits the permissible Accept-Encoding values, based on what we can decode appropriately.
"""
accept_encoding = self.headers.get("accept-encoding")
if accept_encoding:
self.headers["accept-encoding"] = ", ".join(
e
for e in {"gzip", "identity", "deflate", "br", "zstd"}
if e in accept_encoding
)
def _get_urlencoded_form(self):
is_valid_content_type = (
"application/x-www-form-urlencoded"
in self.headers.get("content-type", "").lower()
)
if is_valid_content_type:
return tuple(url.decode(self.get_text(strict=False)))
return ()
def _set_urlencoded_form(self, form_data: Sequence[tuple[str, str]]) -> None:
"""
Sets the body to the URL-encoded form data, and adds the appropriate content-type header.
This will overwrite the existing content if there is one.
"""
self.headers["content-type"] = "application/x-www-form-urlencoded"
self.content = url.encode(form_data, self.get_text(strict=False)).encode()
@property
def urlencoded_form(self) -> multidict.MultiDictView[str, str]:
"""
The URL-encoded form data.
If the content-type indicates non-form data or the form could not be parsed, this is set to
an empty `MultiDictView`.
Modifications to the MultiDictView update `Request.content`, and vice versa.
"""
return multidict.MultiDictView(
self._get_urlencoded_form, self._set_urlencoded_form
)
@urlencoded_form.setter
def urlencoded_form(self, value):
self._set_urlencoded_form(value)
def _get_multipart_form(self) -> list[tuple[bytes, bytes]]:
is_valid_content_type = (
"multipart/form-data" in self.headers.get("content-type", "").lower()
)
if is_valid_content_type and self.content is not None:
try:
return multipart.decode_multipart(
self.headers.get("content-type"), self.content
)
except ValueError:
pass
return []
def _set_multipart_form(self, value: list[tuple[bytes, bytes]]) -> None:
ct = self.headers.get("content-type", "")
is_valid_content_type = ct.lower().startswith("multipart/form-data")
if not is_valid_content_type:
"""
Generate a random boundary here.
See <https://datatracker.ietf.org/doc/html/rfc2046#section-5.1.1> for specifications
on generating the boundary.
"""
boundary = "-" * 20 + binascii.hexlify(os.urandom(16)).decode()
self.headers["content-type"] = ct = f"multipart/form-data; {boundary=!s}"
self.content = multipart.encode_multipart(ct, value)
@property
def multipart_form(self) -> multidict.MultiDictView[bytes, bytes]:
"""
The multipart form data.
If the content-type indicates non-form data or the form could not be parsed, this is set to
an empty `MultiDictView`.
Modifications to the MultiDictView update `Request.content`, and vice versa.
"""
return multidict.MultiDictView(
self._get_multipart_form, self._set_multipart_form
)
@multipart_form.setter
def multipart_form(self, value: list[tuple[bytes, bytes]]) -> None:
self._set_multipart_form(value)
class Response(Message):
"""
An HTTP response.
"""
data: ResponseData
def __init__(
self,
http_version: bytes,
status_code: int,
reason: bytes,
headers: Headers | tuple[tuple[bytes, bytes], ...],
content: bytes | None,
trailers: None | Headers | tuple[tuple[bytes, bytes], ...],
timestamp_start: float,
timestamp_end: float | None,
):
# auto-convert invalid types to retain compatibility with older code.
if isinstance(http_version, str):
http_version = http_version.encode("ascii", "strict")
if isinstance(reason, str):
reason = reason.encode("ascii", "strict")
if isinstance(content, str):
raise ValueError(f"Content must be bytes, not {type(content).__name__}")
if not isinstance(headers, Headers):
headers = Headers(headers)
if trailers is not None and not isinstance(trailers, Headers):
trailers = Headers(trailers)
self.data = ResponseData(
http_version=http_version,
status_code=status_code,
reason=reason,
headers=headers,
content=content,
trailers=trailers,
timestamp_start=timestamp_start,
timestamp_end=timestamp_end,
)
def __repr__(self) -> str:
if self.raw_content:
ct = self.headers.get("content-type", "unknown content type")
size = human.pretty_size(len(self.raw_content))
details = f"{ct}, {size}"
else:
details = "no content"
return f"Response({self.status_code}, {details})"
@classmethod
def make(
cls,
status_code: int = 200,
content: bytes | str = b"",
headers: (
Headers | Mapping[str, str | bytes] | Iterable[tuple[bytes, bytes]]
) = (),
) -> "Response":
"""
Simplified API for creating response objects.
"""
if isinstance(headers, Headers):
headers = headers
elif isinstance(headers, dict):
headers = Headers(
(
always_bytes(k, "utf-8", "surrogateescape"), # type: ignore
always_bytes(v, "utf-8", "surrogateescape"),
)
for k, v in headers.items()
)
elif isinstance(headers, Iterable):
headers = Headers(headers) # type: ignore
else:
raise TypeError(
"Expected headers to be an iterable or dict, but is {}.".format(
type(headers).__name__
)
)
resp = cls(
b"HTTP/1.1",
status_code,
status_codes.RESPONSES.get(status_code, "").encode(),
headers,
None,
None,
time.time(),
time.time(),
)
# Assign this manually to update the content-length header.
if isinstance(content, bytes):
resp.content = content
elif isinstance(content, str):
resp.text = content
else:
raise TypeError(
f"Expected content to be str or bytes, but is {type(content).__name__}."
)
return resp
@property
def status_code(self) -> int:
"""
HTTP Status Code, e.g. ``200``.
"""
return self.data.status_code
@status_code.setter
def status_code(self, status_code: int) -> None:
self.data.status_code = status_code
@property
def reason(self) -> str:
"""
HTTP reason phrase, for example "Not Found".
HTTP/2 responses do not contain a reason phrase, an empty string will be returned instead.
"""
# Encoding: http://stackoverflow.com/a/16674906/934719
return self.data.reason.decode("ISO-8859-1")
@reason.setter
def reason(self, reason: str | bytes) -> None:
self.data.reason = strutils.always_bytes(reason, "ISO-8859-1")
def _get_cookies(self):
h = self.headers.get_all("set-cookie")
all_cookies = cookies.parse_set_cookie_headers(h)
return tuple((name, (value, attrs)) for name, value, attrs in all_cookies)
def _set_cookies(self, value):
cookie_headers = []
for k, v in value:
header = cookies.format_set_cookie_header([(k, v[0], v[1])])
cookie_headers.append(header)
self.headers.set_all("set-cookie", cookie_headers)
@property
def cookies(
self,
) -> multidict.MultiDictView[str, tuple[str, multidict.MultiDict[str, str | None]]]:
"""
The response cookies. A possibly empty `MultiDictView`, where the keys are cookie
name strings, and values are `(cookie value, attributes)` tuples. Within
attributes, unary attributes (e.g. `HTTPOnly`) are indicated by a `None` value.
Modifications to the MultiDictView update `Response.headers`, and vice versa.
*Warning:* Changes to `attributes` will not be picked up unless you also reassign
the `(cookie value, attributes)` tuple directly in the `MultiDictView`.
"""
return multidict.MultiDictView(self._get_cookies, self._set_cookies)
@cookies.setter
def cookies(self, value):
self._set_cookies(value)
def refresh(self, now=None):
"""
This fairly complex and heuristic function refreshes a server
response for replay.
- It adjusts date, expires, and last-modified headers.
- It adjusts cookie expiration.
"""
if not now:
now = time.time()
delta = now - self.timestamp_start
refresh_headers = [
"date",
"expires",
"last-modified",
]
for i in refresh_headers:
if i in self.headers:
d = parsedate_tz(self.headers[i])
if d:
new = mktime_tz(d) + delta
try:
self.headers[i] = formatdate(new, usegmt=True)
except OSError: # pragma: no cover
pass # value out of bounds on Windows only (which is why we exclude it from coverage).
c = []
for set_cookie_header in self.headers.get_all("set-cookie"):
try:
refreshed = cookies.refresh_set_cookie_header(set_cookie_header, delta)
except ValueError:
refreshed = set_cookie_header
c.append(refreshed)
if c:
self.headers.set_all("set-cookie", c)
class HTTPFlow(flow.Flow):
"""
An HTTPFlow is a collection of objects representing a single HTTP
transaction.
"""
request: Request
"""The client's HTTP request."""
response: Response | None = None
"""The server's HTTP response."""
error: flow.Error | None = None
"""
A connection or protocol error affecting this flow.
Note that it's possible for a Flow to have both a response and an error
object. This might happen, for instance, when a response was received
from the server, but there was an error sending it back to the client.
"""
websocket: WebSocketData | None = None
"""
If this HTTP flow initiated a WebSocket connection, this attribute contains all associated WebSocket data.
"""
def get_state(self) -> serializable.State:
return {
**super().get_state(),
"request": self.request.get_state(),
"response": self.response.get_state() if self.response else None,
"websocket": self.websocket.get_state() if self.websocket else None,
}
def set_state(self, state: serializable.State) -> None:
self.request = Request.from_state(state.pop("request"))
self.response = Response.from_state(r) if (r := state.pop("response")) else None
self.websocket = (
WebSocketData.from_state(w) if (w := state.pop("websocket")) else None
)
super().set_state(state)
def __repr__(self):
s = "<HTTPFlow"
for a in (
"request",
"response",
"websocket",
"error",
"client_conn",
"server_conn",
):
if getattr(self, a, False):
s += f"\r\n {a} = {{flow.{a}}}"
s += ">"
return s.format(flow=self)
@property
def timestamp_start(self) -> float:
"""*Read-only:* An alias for `Request.timestamp_start`."""
return self.request.timestamp_start
@property
def mode(self) -> str: # pragma: no cover
warnings.warn("HTTPFlow.mode is deprecated.", DeprecationWarning, stacklevel=2)
return getattr(self, "_mode", "regular")
@mode.setter
def mode(self, val: str) -> None: # pragma: no cover
warnings.warn("HTTPFlow.mode is deprecated.", DeprecationWarning, stacklevel=2)
self._mode = val
def copy(self):
f = super().copy()
if self.request:
f.request = self.request.copy()
if self.response:
f.response = self.response.copy()
return f
__all__ = [
"HTTPFlow",
"Message",
"Request",
"Response",
"Headers",
]