2025-12-25 upload
This commit is contained in:
387
venv/Lib/site-packages/mitmproxy/net/http/cookies.py
Normal file
387
venv/Lib/site-packages/mitmproxy/net/http/cookies.py
Normal file
@@ -0,0 +1,387 @@
|
||||
import email.utils
|
||||
import re
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
|
||||
from mitmproxy.coretypes import multidict
|
||||
|
||||
"""
|
||||
A flexible module for cookie parsing and manipulation.
|
||||
|
||||
This module differs from usual standards-compliant cookie modules in a number
|
||||
of ways. We try to be as permissive as possible, and to retain even mal-formed
|
||||
information. Duplicate cookies are preserved in parsing, and can be set in
|
||||
formatting. We do attempt to escape and quote values where needed, but will not
|
||||
reject data that violate the specs.
|
||||
|
||||
Parsing accepts the formats in RFC6265 and partially RFC2109 and RFC2965. We
|
||||
also parse the comma-separated variant of Set-Cookie that allows multiple
|
||||
cookies to be set in a single header. Serialization follows RFC6265.
|
||||
|
||||
http://tools.ietf.org/html/rfc6265
|
||||
http://tools.ietf.org/html/rfc2109
|
||||
http://tools.ietf.org/html/rfc2965
|
||||
"""
|
||||
|
||||
_cookie_params = {
|
||||
"expires",
|
||||
"path",
|
||||
"comment",
|
||||
"max-age",
|
||||
"secure",
|
||||
"httponly",
|
||||
"version",
|
||||
}
|
||||
|
||||
ESCAPE = re.compile(r"([\"\\])")
|
||||
|
||||
|
||||
class CookieAttrs(multidict.MultiDict):
|
||||
@staticmethod
|
||||
def _kconv(key):
|
||||
return key.lower()
|
||||
|
||||
@staticmethod
|
||||
def _reduce_values(values):
|
||||
# See the StickyCookieTest for a weird cookie that only makes sense
|
||||
# if we take the last part.
|
||||
return values[-1]
|
||||
|
||||
|
||||
TSetCookie = tuple[str, str | None, CookieAttrs]
|
||||
TPairs = list[tuple[str, str | None]]
|
||||
|
||||
|
||||
def _read_until(s, start, term):
|
||||
"""
|
||||
Read until one of the characters in term is reached.
|
||||
"""
|
||||
if start == len(s):
|
||||
return "", start + 1
|
||||
for i in range(start, len(s)):
|
||||
if s[i] in term:
|
||||
return s[start:i], i
|
||||
return s[start : i + 1], i + 1
|
||||
|
||||
|
||||
def _read_quoted_string(s, start):
|
||||
"""
|
||||
start: offset to the first quote of the string to be read
|
||||
|
||||
A sort of loose super-set of the various quoted string specifications.
|
||||
|
||||
RFC6265 disallows backslashes or double quotes within quoted strings.
|
||||
Prior RFCs use backslashes to escape. This leaves us free to apply
|
||||
backslash escaping by default and be compatible with everything.
|
||||
"""
|
||||
escaping = False
|
||||
ret = []
|
||||
# Skip the first quote
|
||||
i = start # initialize in case the loop doesn't run.
|
||||
for i in range(start + 1, len(s)):
|
||||
if escaping:
|
||||
ret.append(s[i])
|
||||
escaping = False
|
||||
elif s[i] == '"':
|
||||
break
|
||||
elif s[i] == "\\":
|
||||
escaping = True
|
||||
else:
|
||||
ret.append(s[i])
|
||||
return "".join(ret), i + 1
|
||||
|
||||
|
||||
def _read_key(s, start, delims=";="):
|
||||
"""
|
||||
Read a key - the LHS of a token/value pair in a cookie.
|
||||
"""
|
||||
return _read_until(s, start, delims)
|
||||
|
||||
|
||||
def _read_value(s, start, delims):
|
||||
"""
|
||||
Reads a value - the RHS of a token/value pair in a cookie.
|
||||
"""
|
||||
if start >= len(s):
|
||||
return "", start
|
||||
elif s[start] == '"':
|
||||
return _read_quoted_string(s, start)
|
||||
else:
|
||||
return _read_until(s, start, delims)
|
||||
|
||||
|
||||
def _read_cookie_pairs(s, off=0):
|
||||
"""
|
||||
Read pairs of lhs=rhs values from Cookie headers.
|
||||
|
||||
off: start offset
|
||||
"""
|
||||
pairs = []
|
||||
|
||||
while True:
|
||||
lhs, off = _read_key(s, off)
|
||||
lhs = lhs.lstrip()
|
||||
|
||||
rhs = ""
|
||||
if off < len(s) and s[off] == "=":
|
||||
rhs, off = _read_value(s, off + 1, ";")
|
||||
if rhs or lhs:
|
||||
pairs.append([lhs, rhs])
|
||||
|
||||
off += 1
|
||||
|
||||
if not off < len(s):
|
||||
break
|
||||
|
||||
return pairs, off
|
||||
|
||||
|
||||
def _read_set_cookie_pairs(s: str, off=0) -> tuple[list[TPairs], int]:
|
||||
"""
|
||||
Read pairs of lhs=rhs values from SetCookie headers while handling multiple cookies.
|
||||
|
||||
off: start offset
|
||||
specials: attributes that are treated specially
|
||||
"""
|
||||
cookies: list[TPairs] = []
|
||||
pairs: TPairs = []
|
||||
|
||||
while True:
|
||||
lhs, off = _read_key(s, off, ";=,")
|
||||
lhs = lhs.lstrip()
|
||||
|
||||
rhs = ""
|
||||
if off < len(s) and s[off] == "=":
|
||||
rhs, off = _read_value(s, off + 1, ";,")
|
||||
|
||||
# Special handling of attributes
|
||||
if lhs.lower() == "expires":
|
||||
# 'expires' values can contain commas in them so they need to
|
||||
# be handled separately.
|
||||
|
||||
# We actually bank on the fact that the expires value WILL
|
||||
# contain a comma. Things will fail, if they don't.
|
||||
|
||||
# '3' is just a heuristic we use to determine whether we've
|
||||
# only read a part of the expires value and we should read more.
|
||||
if len(rhs) <= 3:
|
||||
trail, off = _read_value(s, off + 1, ";,")
|
||||
rhs = rhs + "," + trail
|
||||
|
||||
# as long as there's a "=", we consider it a pair
|
||||
pairs.append((lhs, rhs))
|
||||
|
||||
elif lhs:
|
||||
pairs.append((lhs, None))
|
||||
|
||||
# comma marks the beginning of a new cookie
|
||||
if off < len(s) and s[off] == ",":
|
||||
cookies.append(pairs)
|
||||
pairs = []
|
||||
|
||||
off += 1
|
||||
|
||||
if not off < len(s):
|
||||
break
|
||||
|
||||
if pairs or not cookies:
|
||||
cookies.append(pairs)
|
||||
|
||||
return cookies, off
|
||||
|
||||
|
||||
def _has_special(s: str) -> bool:
|
||||
for i in s:
|
||||
if i in '",;\\':
|
||||
return True
|
||||
o = ord(i)
|
||||
if o < 0x21 or o > 0x7E:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _format_pairs(pairs, specials=(), sep="; "):
|
||||
"""
|
||||
specials: A lower-cased list of keys that will not be quoted.
|
||||
"""
|
||||
vals = []
|
||||
for k, v in pairs:
|
||||
if v is None:
|
||||
val = k
|
||||
elif k.lower() not in specials and _has_special(v):
|
||||
v = ESCAPE.sub(r"\\\1", v)
|
||||
v = '"%s"' % v
|
||||
val = f"{k}={v}"
|
||||
else:
|
||||
val = f"{k}={v}"
|
||||
vals.append(val)
|
||||
return sep.join(vals)
|
||||
|
||||
|
||||
def _format_set_cookie_pairs(lst):
|
||||
return _format_pairs(lst, specials=("expires", "path"))
|
||||
|
||||
|
||||
def parse_cookie_header(line):
|
||||
"""
|
||||
Parse a Cookie header value.
|
||||
Returns a list of (lhs, rhs) tuples.
|
||||
"""
|
||||
pairs, off_ = _read_cookie_pairs(line)
|
||||
return pairs
|
||||
|
||||
|
||||
def parse_cookie_headers(cookie_headers):
|
||||
cookie_list = []
|
||||
for header in cookie_headers:
|
||||
cookie_list.extend(parse_cookie_header(header))
|
||||
return cookie_list
|
||||
|
||||
|
||||
def format_cookie_header(lst):
|
||||
"""
|
||||
Formats a Cookie header value.
|
||||
"""
|
||||
return _format_pairs(lst)
|
||||
|
||||
|
||||
def parse_set_cookie_header(line: str) -> list[TSetCookie]:
|
||||
"""
|
||||
Parse a Set-Cookie header value
|
||||
|
||||
Returns:
|
||||
A list of (name, value, attrs) tuples, where attrs is a
|
||||
CookieAttrs dict of attributes. No attempt is made to parse attribute
|
||||
values - they are treated purely as strings.
|
||||
"""
|
||||
cookie_pairs, off = _read_set_cookie_pairs(line)
|
||||
cookies = []
|
||||
for pairs in cookie_pairs:
|
||||
if pairs:
|
||||
cookie, *attrs = pairs
|
||||
cookies.append((cookie[0], cookie[1], CookieAttrs(attrs)))
|
||||
return cookies
|
||||
|
||||
|
||||
def parse_set_cookie_headers(headers: Iterable[str]) -> list[TSetCookie]:
|
||||
rv = []
|
||||
for header in headers:
|
||||
cookies = parse_set_cookie_header(header)
|
||||
rv.extend(cookies)
|
||||
return rv
|
||||
|
||||
|
||||
def format_set_cookie_header(set_cookies: list[TSetCookie]) -> str:
|
||||
"""
|
||||
Formats a Set-Cookie header value.
|
||||
"""
|
||||
|
||||
rv = []
|
||||
|
||||
for name, value, attrs in set_cookies:
|
||||
pairs = [(name, value)]
|
||||
pairs.extend(attrs.fields if hasattr(attrs, "fields") else attrs)
|
||||
|
||||
rv.append(_format_set_cookie_pairs(pairs))
|
||||
|
||||
return ", ".join(rv)
|
||||
|
||||
|
||||
def refresh_set_cookie_header(c: str, delta: int) -> str:
|
||||
"""
|
||||
Args:
|
||||
c: A Set-Cookie string
|
||||
delta: Time delta in seconds
|
||||
Returns:
|
||||
A refreshed Set-Cookie string
|
||||
Raises:
|
||||
ValueError, if the cookie is invalid.
|
||||
"""
|
||||
cookies = parse_set_cookie_header(c)
|
||||
for cookie in cookies:
|
||||
name, value, attrs = cookie
|
||||
if not name or not value:
|
||||
raise ValueError("Invalid Cookie")
|
||||
|
||||
if "expires" in attrs:
|
||||
e = email.utils.parsedate_tz(attrs["expires"])
|
||||
if e:
|
||||
f = email.utils.mktime_tz(e) + delta
|
||||
attrs.set_all("expires", [email.utils.formatdate(f, usegmt=True)])
|
||||
else:
|
||||
# This can happen when the expires tag is invalid.
|
||||
# reddit.com sends a an expires tag like this: "Thu, 31 Dec
|
||||
# 2037 23:59:59 GMT", which is valid RFC 1123, but not
|
||||
# strictly correct according to the cookie spec. Browsers
|
||||
# appear to parse this tolerantly - maybe we should too.
|
||||
# For now, we just ignore this.
|
||||
del attrs["expires"]
|
||||
return format_set_cookie_header(cookies)
|
||||
|
||||
|
||||
def get_expiration_ts(cookie_attrs):
|
||||
"""
|
||||
Determines the time when the cookie will be expired.
|
||||
|
||||
Considering both 'expires' and 'max-age' parameters.
|
||||
|
||||
Returns: timestamp of when the cookie will expire.
|
||||
None, if no expiration time is set.
|
||||
"""
|
||||
if "expires" in cookie_attrs:
|
||||
e = email.utils.parsedate_tz(cookie_attrs["expires"])
|
||||
if e:
|
||||
return email.utils.mktime_tz(e)
|
||||
|
||||
elif "max-age" in cookie_attrs:
|
||||
try:
|
||||
max_age = int(cookie_attrs["Max-Age"])
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
now_ts = time.time()
|
||||
return now_ts + max_age
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_expired(cookie_attrs):
|
||||
"""
|
||||
Determines whether a cookie has expired.
|
||||
|
||||
Returns: boolean
|
||||
"""
|
||||
|
||||
exp_ts = get_expiration_ts(cookie_attrs)
|
||||
now_ts = time.time()
|
||||
|
||||
# If no expiration information was provided with the cookie
|
||||
if exp_ts is None:
|
||||
return False
|
||||
else:
|
||||
return exp_ts <= now_ts
|
||||
|
||||
|
||||
def group_cookies(pairs):
|
||||
"""
|
||||
Converts a list of pairs to a (name, value, attrs) for each cookie.
|
||||
"""
|
||||
|
||||
if not pairs:
|
||||
return []
|
||||
|
||||
cookie_list = []
|
||||
|
||||
# First pair is always a new cookie
|
||||
name, value = pairs[0]
|
||||
attrs = []
|
||||
|
||||
for k, v in pairs[1:]:
|
||||
if k.lower() in _cookie_params:
|
||||
attrs.append((k, v))
|
||||
else:
|
||||
cookie_list.append((name, value, CookieAttrs(attrs)))
|
||||
name, value, attrs = k, v, []
|
||||
|
||||
cookie_list.append((name, value, CookieAttrs(attrs)))
|
||||
return cookie_list
|
||||
113
venv/Lib/site-packages/mitmproxy/net/http/headers.py
Normal file
113
venv/Lib/site-packages/mitmproxy/net/http/headers.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import collections
|
||||
import re
|
||||
|
||||
|
||||
def parse_content_type(c: str) -> tuple[str, str, dict[str, str]] | None:
|
||||
"""
|
||||
A simple parser for content-type values. Returns a (type, subtype,
|
||||
parameters) tuple, where type and subtype are strings, and parameters
|
||||
is a dict. If the string could not be parsed, return None.
|
||||
|
||||
E.g. the following string:
|
||||
|
||||
text/html; charset=UTF-8
|
||||
|
||||
Returns:
|
||||
|
||||
("text", "html", {"charset": "UTF-8"})
|
||||
"""
|
||||
parts = c.split(";", 1)
|
||||
ts = parts[0].split("/", 1)
|
||||
if len(ts) != 2:
|
||||
return None
|
||||
d = collections.OrderedDict()
|
||||
if len(parts) == 2:
|
||||
for i in parts[1].split(";"):
|
||||
clause = i.split("=", 1)
|
||||
if len(clause) == 2:
|
||||
d[clause[0].strip()] = clause[1].strip()
|
||||
return ts[0].lower(), ts[1].lower(), d
|
||||
|
||||
|
||||
def assemble_content_type(type, subtype, parameters):
|
||||
if not parameters:
|
||||
return f"{type}/{subtype}"
|
||||
params = "; ".join(f"{k}={v}" for k, v in parameters.items())
|
||||
return f"{type}/{subtype}; {params}"
|
||||
|
||||
|
||||
def infer_content_encoding(content_type: str, content: bytes = b"") -> str:
|
||||
"""
|
||||
Infer the encoding of content from the content-type header.
|
||||
"""
|
||||
enc = None
|
||||
|
||||
# BOM has the highest priority
|
||||
if content.startswith(b"\x00\x00\xfe\xff"):
|
||||
enc = "utf-32be"
|
||||
elif content.startswith(b"\xff\xfe\x00\x00"):
|
||||
enc = "utf-32le"
|
||||
elif content.startswith(b"\xfe\xff"):
|
||||
enc = "utf-16be"
|
||||
elif content.startswith(b"\xff\xfe"):
|
||||
enc = "utf-16le"
|
||||
elif content.startswith(b"\xef\xbb\xbf"):
|
||||
# 'utf-8-sig' will strip the BOM on decode
|
||||
enc = "utf-8-sig"
|
||||
elif parsed_content_type := parse_content_type(content_type):
|
||||
# Use the charset from the header if possible
|
||||
enc = parsed_content_type[2].get("charset")
|
||||
|
||||
# Otherwise, infer the encoding
|
||||
if not enc and "json" in content_type:
|
||||
enc = "utf8"
|
||||
|
||||
if not enc and "html" in content_type:
|
||||
meta_charset = re.search(
|
||||
rb"""<meta[^>]+charset=['"]?([^'">]+)""", content, re.IGNORECASE
|
||||
)
|
||||
if meta_charset:
|
||||
enc = meta_charset.group(1).decode("ascii", "ignore")
|
||||
else:
|
||||
# Fallback to utf8 for html
|
||||
# Ref: https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
|
||||
# > 9. [snip] the comprehensive UTF-8 encoding is suggested.
|
||||
enc = "utf8"
|
||||
|
||||
if not enc and "xml" in content_type:
|
||||
if xml_encoding := re.search(
|
||||
rb"""<\?xml[^\?>]+encoding=['"]([^'"\?>]+)""", content, re.IGNORECASE
|
||||
):
|
||||
enc = xml_encoding.group(1).decode("ascii", "ignore")
|
||||
else:
|
||||
# Fallback to utf8 for xml
|
||||
# Ref: https://datatracker.ietf.org/doc/html/rfc7303#section-8.5
|
||||
# > the XML processor [snip] to determine an encoding of UTF-8.
|
||||
enc = "utf8"
|
||||
|
||||
if not enc and ("javascript" in content_type or "ecmascript" in content_type):
|
||||
# Fallback to utf8 for javascript
|
||||
# Ref: https://datatracker.ietf.org/doc/html/rfc9239#section-4.2
|
||||
# > 3. Else, the character encoding scheme is assumed to be UTF-8
|
||||
enc = "utf8"
|
||||
|
||||
if not enc and "text/css" in content_type:
|
||||
# @charset rule must be the very first thing.
|
||||
css_charset = re.match(rb"""@charset "([^"]+)";""", content, re.IGNORECASE)
|
||||
if css_charset:
|
||||
enc = css_charset.group(1).decode("ascii", "ignore")
|
||||
else:
|
||||
# Fallback to utf8 for css
|
||||
# Ref: https://drafts.csswg.org/css-syntax/#determine-the-fallback-encoding
|
||||
# > 4. Otherwise, return utf-8
|
||||
enc = "utf8"
|
||||
|
||||
# Fallback to latin-1
|
||||
if not enc:
|
||||
enc = "latin-1"
|
||||
|
||||
# Use GB 18030 as the superset of GB2312 and GBK to fix common encoding problems on Chinese websites.
|
||||
if enc.lower() in ("gb2312", "gbk"):
|
||||
enc = "gb18030"
|
||||
|
||||
return enc
|
||||
21
venv/Lib/site-packages/mitmproxy/net/http/http1/__init__.py
Normal file
21
venv/Lib/site-packages/mitmproxy/net/http/http1/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from .assemble import assemble_body
|
||||
from .assemble import assemble_request
|
||||
from .assemble import assemble_request_head
|
||||
from .assemble import assemble_response
|
||||
from .assemble import assemble_response_head
|
||||
from .read import connection_close
|
||||
from .read import expected_http_body_size
|
||||
from .read import read_request_head
|
||||
from .read import read_response_head
|
||||
|
||||
__all__ = [
|
||||
"read_request_head",
|
||||
"read_response_head",
|
||||
"connection_close",
|
||||
"expected_http_body_size",
|
||||
"assemble_request",
|
||||
"assemble_request_head",
|
||||
"assemble_response",
|
||||
"assemble_response_head",
|
||||
"assemble_body",
|
||||
]
|
||||
99
venv/Lib/site-packages/mitmproxy/net/http/http1/assemble.py
Normal file
99
venv/Lib/site-packages/mitmproxy/net/http/http1/assemble.py
Normal file
@@ -0,0 +1,99 @@
|
||||
def assemble_request(request):
|
||||
if request.data.content is None:
|
||||
raise ValueError("Cannot assemble flow with missing content")
|
||||
head = assemble_request_head(request)
|
||||
body = b"".join(
|
||||
assemble_body(
|
||||
request.data.headers, [request.data.content], request.data.trailers
|
||||
)
|
||||
)
|
||||
return head + body
|
||||
|
||||
|
||||
def assemble_request_head(request):
|
||||
first_line = _assemble_request_line(request.data)
|
||||
headers = _assemble_request_headers(request.data)
|
||||
return b"%s\r\n%s\r\n" % (first_line, headers)
|
||||
|
||||
|
||||
def assemble_response(response):
|
||||
if response.data.content is None:
|
||||
raise ValueError("Cannot assemble flow with missing content")
|
||||
head = assemble_response_head(response)
|
||||
body = b"".join(
|
||||
assemble_body(
|
||||
response.data.headers, [response.data.content], response.data.trailers
|
||||
)
|
||||
)
|
||||
return head + body
|
||||
|
||||
|
||||
def assemble_response_head(response):
|
||||
first_line = _assemble_response_line(response.data)
|
||||
headers = _assemble_response_headers(response.data)
|
||||
return b"%s\r\n%s\r\n" % (first_line, headers)
|
||||
|
||||
|
||||
def assemble_body(headers, body_chunks, trailers):
|
||||
if "chunked" in headers.get("transfer-encoding", "").lower():
|
||||
for chunk in body_chunks:
|
||||
if chunk:
|
||||
yield b"%x\r\n%s\r\n" % (len(chunk), chunk)
|
||||
if trailers:
|
||||
yield b"0\r\n%s\r\n" % trailers
|
||||
else:
|
||||
yield b"0\r\n\r\n"
|
||||
else:
|
||||
if trailers:
|
||||
raise ValueError(
|
||||
"Sending HTTP/1.1 trailer headers requires transfer-encoding: chunked"
|
||||
)
|
||||
for chunk in body_chunks:
|
||||
yield chunk
|
||||
|
||||
|
||||
def _assemble_request_line(request_data):
|
||||
"""
|
||||
Args:
|
||||
request_data (mitmproxy.net.http.request.RequestData)
|
||||
"""
|
||||
if request_data.method.upper() == b"CONNECT":
|
||||
return b"%s %s %s" % (
|
||||
request_data.method,
|
||||
request_data.authority,
|
||||
request_data.http_version,
|
||||
)
|
||||
elif request_data.authority:
|
||||
return b"%s %s://%s%s %s" % (
|
||||
request_data.method,
|
||||
request_data.scheme,
|
||||
request_data.authority,
|
||||
request_data.path,
|
||||
request_data.http_version,
|
||||
)
|
||||
else:
|
||||
return b"%s %s %s" % (
|
||||
request_data.method,
|
||||
request_data.path,
|
||||
request_data.http_version,
|
||||
)
|
||||
|
||||
|
||||
def _assemble_request_headers(request_data):
|
||||
"""
|
||||
Args:
|
||||
request_data (mitmproxy.net.http.request.RequestData)
|
||||
"""
|
||||
return bytes(request_data.headers)
|
||||
|
||||
|
||||
def _assemble_response_line(response_data):
|
||||
return b"%s %d %s" % (
|
||||
response_data.http_version,
|
||||
response_data.status_code,
|
||||
response_data.reason,
|
||||
)
|
||||
|
||||
|
||||
def _assemble_response_headers(response):
|
||||
return bytes(response.headers)
|
||||
303
venv/Lib/site-packages/mitmproxy/net/http/http1/read.py
Normal file
303
venv/Lib/site-packages/mitmproxy/net/http/http1/read.py
Normal file
@@ -0,0 +1,303 @@
|
||||
import re
|
||||
import time
|
||||
import typing
|
||||
from collections.abc import Iterable
|
||||
|
||||
from mitmproxy.http import Headers
|
||||
from mitmproxy.http import Request
|
||||
from mitmproxy.http import Response
|
||||
from mitmproxy.net.http import url
|
||||
from mitmproxy.net.http import validate
|
||||
|
||||
|
||||
def get_header_tokens(headers, key):
|
||||
"""
|
||||
Retrieve all tokens for a header key. A number of different headers
|
||||
follow a pattern where each header line can containe comma-separated
|
||||
tokens, and headers can be set multiple times.
|
||||
"""
|
||||
if key not in headers:
|
||||
return []
|
||||
tokens = headers[key].split(",")
|
||||
return [token.strip() for token in tokens]
|
||||
|
||||
|
||||
def connection_close(http_version, headers):
|
||||
"""
|
||||
Checks the message to see if the client connection should be closed
|
||||
according to RFC 2616 Section 8.1.
|
||||
If we don't have a Connection header, HTTP 1.1 connections are assumed
|
||||
to be persistent.
|
||||
"""
|
||||
if "connection" in headers:
|
||||
tokens = get_header_tokens(headers, "connection")
|
||||
if "close" in tokens:
|
||||
return True
|
||||
elif "keep-alive" in tokens:
|
||||
return False
|
||||
|
||||
return http_version not in (
|
||||
"HTTP/1.1",
|
||||
b"HTTP/1.1",
|
||||
"HTTP/2.0",
|
||||
b"HTTP/2.0",
|
||||
)
|
||||
|
||||
|
||||
def expected_http_body_size(
|
||||
request: Request, response: Response | None = None
|
||||
) -> int | None:
|
||||
"""
|
||||
Returns:
|
||||
The expected body length:
|
||||
- a positive integer, if the size is known in advance
|
||||
- None, if the size in unknown in advance (chunked encoding)
|
||||
- -1, if all data should be read until end of stream.
|
||||
|
||||
Raises:
|
||||
ValueError, if the content-length or transfer-encoding header is invalid
|
||||
"""
|
||||
# Determine response size according to http://tools.ietf.org/html/rfc7230#section-3.3, which is inlined below.
|
||||
if not response:
|
||||
headers = request.headers
|
||||
else:
|
||||
headers = response.headers
|
||||
|
||||
# 1. Any response to a HEAD request and any response with a 1xx
|
||||
# (Informational), 204 (No Content), or 304 (Not Modified) status
|
||||
# code is always terminated by the first empty line after the
|
||||
# header fields, regardless of the header fields present in the
|
||||
# message, and thus cannot contain a message body.
|
||||
if request.method.upper() == "HEAD":
|
||||
return 0
|
||||
if 100 <= response.status_code <= 199:
|
||||
return 0
|
||||
if response.status_code in (204, 304):
|
||||
return 0
|
||||
|
||||
# 2. Any 2xx (Successful) response to a CONNECT request implies that
|
||||
# the connection will become a tunnel immediately after the empty
|
||||
# line that concludes the header fields. A client MUST ignore any
|
||||
# Content-Length or Transfer-Encoding header fields received in
|
||||
# such a message.
|
||||
if 200 <= response.status_code <= 299 and request.method.upper() == "CONNECT":
|
||||
return 0
|
||||
|
||||
# 3. If a Transfer-Encoding header field is present and the chunked
|
||||
# transfer coding (Section 4.1) is the final encoding, the message
|
||||
# body length is determined by reading and decoding the chunked
|
||||
# data until the transfer coding indicates the data is complete.
|
||||
#
|
||||
# If a Transfer-Encoding header field is present in a response and
|
||||
# the chunked transfer coding is not the final encoding, the
|
||||
# message body length is determined by reading the connection until
|
||||
# it is closed by the server. If a Transfer-Encoding header field
|
||||
# is present in a request and the chunked transfer coding is not
|
||||
# the final encoding, the message body length cannot be determined
|
||||
# reliably; the server MUST respond with the 400 (Bad Request)
|
||||
# status code and then close the connection.
|
||||
#
|
||||
# If a message is received with both a Transfer-Encoding and a
|
||||
# Content-Length header field, the Transfer-Encoding overrides the
|
||||
# Content-Length. Such a message might indicate an attempt to
|
||||
# perform request smuggling (Section 9.5) or response splitting
|
||||
# (Section 9.4) and ought to be handled as an error. A sender MUST
|
||||
# remove the received Content-Length field prior to forwarding such
|
||||
# a message downstream.
|
||||
#
|
||||
if te_str := headers.get("transfer-encoding"):
|
||||
te = validate.parse_transfer_encoding(te_str)
|
||||
match te:
|
||||
case "chunked" | "compress,chunked" | "deflate,chunked" | "gzip,chunked":
|
||||
return None
|
||||
case "compress" | "deflate" | "gzip" | "identity":
|
||||
if response:
|
||||
return -1
|
||||
# These values are valid for responses only (not requests), which is ensured in
|
||||
# mitmproxy.net.http.validate. If users have explicitly disabled header validation,
|
||||
# we strive for maximum compatibility with weird clients.
|
||||
if te == "identity" or "content-length" in headers:
|
||||
pass # Content-Length or 0
|
||||
else:
|
||||
return (
|
||||
-1
|
||||
) # compress/deflate/gzip with no content-length -> read until eof
|
||||
case other: # pragma: no cover
|
||||
typing.assert_never(other)
|
||||
|
||||
# 4. If a message is received without Transfer-Encoding and with
|
||||
# either multiple Content-Length header fields having differing
|
||||
# field-values or a single Content-Length header field having an
|
||||
# invalid value, then the message framing is invalid and the
|
||||
# recipient MUST treat it as an unrecoverable error. If this is a
|
||||
# request message, the server MUST respond with a 400 (Bad Request)
|
||||
# status code and then close the connection. If this is a response
|
||||
# message received by a proxy, the proxy MUST close the connection
|
||||
# to the server, discard the received response, and send a 502 (Bad
|
||||
# Gateway) response to the client. If this is a response message
|
||||
# received by a user agent, the user agent MUST close the
|
||||
# connection to the server and discard the received response.
|
||||
#
|
||||
# 5. If a valid Content-Length header field is present without
|
||||
# Transfer-Encoding, its decimal value defines the expected message
|
||||
# body length in octets. If the sender closes the connection or
|
||||
# the recipient times out before the indicated number of octets are
|
||||
# received, the recipient MUST consider the message to be
|
||||
# incomplete and close the connection.
|
||||
if cl := headers.get("content-length"):
|
||||
return validate.parse_content_length(cl)
|
||||
# 6. If this is a request message and none of the above are true, then
|
||||
# the message body length is zero (no message body is present).
|
||||
if not response:
|
||||
return 0
|
||||
|
||||
# 7. Otherwise, this is a response message without a declared message
|
||||
# body length, so the message body length is determined by the
|
||||
# number of octets received prior to the server closing the
|
||||
# connection.
|
||||
return -1
|
||||
|
||||
|
||||
def raise_if_http_version_unknown(http_version: bytes) -> None:
|
||||
if not re.match(rb"^HTTP/\d\.\d$", http_version):
|
||||
raise ValueError(f"Unknown HTTP version: {http_version!r}")
|
||||
|
||||
|
||||
def _read_request_line(
|
||||
line: bytes,
|
||||
) -> tuple[str, int, bytes, bytes, bytes, bytes, bytes]:
|
||||
try:
|
||||
method, target, http_version = line.split()
|
||||
port: int | None
|
||||
|
||||
if target == b"*" or target.startswith(b"/"):
|
||||
scheme, authority, path = b"", b"", target
|
||||
host, port = "", 0
|
||||
elif method == b"CONNECT":
|
||||
scheme, authority, path = b"", target, b""
|
||||
host, port = url.parse_authority(authority, check=True)
|
||||
if not port:
|
||||
raise ValueError
|
||||
else:
|
||||
scheme, rest = target.split(b"://", maxsplit=1)
|
||||
authority, _, path_ = rest.partition(b"/")
|
||||
path = b"/" + path_
|
||||
host, port = url.parse_authority(authority, check=True)
|
||||
port = port or url.default_port(scheme)
|
||||
if not port:
|
||||
raise ValueError
|
||||
# TODO: we can probably get rid of this check?
|
||||
url.parse(target)
|
||||
|
||||
raise_if_http_version_unknown(http_version)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Bad HTTP request line: {line!r}") from e
|
||||
|
||||
return host, port, method, scheme, authority, path, http_version
|
||||
|
||||
|
||||
def _read_response_line(line: bytes) -> tuple[bytes, int, bytes]:
|
||||
try:
|
||||
parts = line.split(None, 2)
|
||||
if len(parts) == 2: # handle missing message gracefully
|
||||
parts.append(b"")
|
||||
|
||||
http_version, status_code_str, reason = parts
|
||||
status_code = int(status_code_str)
|
||||
raise_if_http_version_unknown(http_version)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Bad HTTP response line: {line!r}") from e
|
||||
|
||||
return http_version, status_code, reason
|
||||
|
||||
|
||||
def _read_headers(lines: Iterable[bytes]) -> Headers:
|
||||
"""
|
||||
Read a set of headers.
|
||||
Stop once a blank line is reached.
|
||||
|
||||
Returns:
|
||||
A headers object
|
||||
|
||||
Raises:
|
||||
exceptions.HttpSyntaxException
|
||||
"""
|
||||
ret: list[tuple[bytes, bytes]] = []
|
||||
for line in lines:
|
||||
if line[0] in b" \t":
|
||||
if not ret:
|
||||
raise ValueError("Invalid headers")
|
||||
# continued header
|
||||
ret[-1] = (ret[-1][0], ret[-1][1] + b"\r\n " + line.strip())
|
||||
else:
|
||||
try:
|
||||
name, value = line.split(b":", 1)
|
||||
value = value.strip()
|
||||
if not name:
|
||||
raise ValueError()
|
||||
ret.append((name, value))
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid header line: {line!r}")
|
||||
return Headers(ret)
|
||||
|
||||
|
||||
def read_request_head(lines: list[bytes]) -> Request:
|
||||
"""
|
||||
Parse an HTTP request head (request line + headers) from an iterable of lines
|
||||
|
||||
Args:
|
||||
lines: The input lines
|
||||
|
||||
Returns:
|
||||
The HTTP request object (without body)
|
||||
|
||||
Raises:
|
||||
ValueError: The input is malformed.
|
||||
"""
|
||||
host, port, method, scheme, authority, path, http_version = _read_request_line(
|
||||
lines[0]
|
||||
)
|
||||
headers = _read_headers(lines[1:])
|
||||
|
||||
return Request(
|
||||
host=host,
|
||||
port=port,
|
||||
method=method,
|
||||
scheme=scheme,
|
||||
authority=authority,
|
||||
path=path,
|
||||
http_version=http_version,
|
||||
headers=headers,
|
||||
content=None,
|
||||
trailers=None,
|
||||
timestamp_start=time.time(),
|
||||
timestamp_end=None,
|
||||
)
|
||||
|
||||
|
||||
def read_response_head(lines: list[bytes]) -> Response:
|
||||
"""
|
||||
Parse an HTTP response head (response line + headers) from an iterable of lines
|
||||
|
||||
Args:
|
||||
lines: The input lines
|
||||
|
||||
Returns:
|
||||
The HTTP response object (without body)
|
||||
|
||||
Raises:
|
||||
ValueError: The input is malformed.
|
||||
"""
|
||||
http_version, status_code, reason = _read_response_line(lines[0])
|
||||
headers = _read_headers(lines[1:])
|
||||
|
||||
return Response(
|
||||
http_version=http_version,
|
||||
status_code=status_code,
|
||||
reason=reason,
|
||||
headers=headers,
|
||||
content=None,
|
||||
trailers=None,
|
||||
timestamp_start=time.time(),
|
||||
timestamp_end=None,
|
||||
)
|
||||
95
venv/Lib/site-packages/mitmproxy/net/http/multipart.py
Normal file
95
venv/Lib/site-packages/mitmproxy/net/http/multipart.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import mimetypes
|
||||
import re
|
||||
import warnings
|
||||
from urllib.parse import quote
|
||||
|
||||
from mitmproxy.net.http import headers
|
||||
|
||||
|
||||
def encode_multipart(content_type: str, parts: list[tuple[bytes, bytes]]) -> bytes:
|
||||
if content_type:
|
||||
ct = headers.parse_content_type(content_type)
|
||||
if ct is not None:
|
||||
try:
|
||||
raw_boundary = ct[2]["boundary"].encode("ascii")
|
||||
boundary = quote(raw_boundary)
|
||||
except (KeyError, UnicodeError):
|
||||
return b""
|
||||
hdrs = []
|
||||
for key, value in parts:
|
||||
file_type = (
|
||||
mimetypes.guess_type(str(key))[0] or "text/plain; charset=utf-8"
|
||||
)
|
||||
|
||||
if key:
|
||||
hdrs.append(b"--%b" % boundary.encode("utf-8"))
|
||||
disposition = b'form-data; name="%b"' % key
|
||||
hdrs.append(b"Content-Disposition: %b" % disposition)
|
||||
hdrs.append(b"Content-Type: %b" % file_type.encode("utf-8"))
|
||||
hdrs.append(b"")
|
||||
hdrs.append(value)
|
||||
hdrs.append(b"")
|
||||
|
||||
if value is not None:
|
||||
# If boundary is found in value then raise ValueError
|
||||
if re.search(
|
||||
rb"^--%b$" % re.escape(boundary.encode("utf-8")), value
|
||||
):
|
||||
raise ValueError(b"boundary found in encoded string")
|
||||
|
||||
hdrs.append(b"--%b--\r\n" % boundary.encode("utf-8"))
|
||||
temp = b"\r\n".join(hdrs)
|
||||
return temp
|
||||
return b""
|
||||
|
||||
|
||||
def decode_multipart(
|
||||
content_type: str | None, content: bytes
|
||||
) -> list[tuple[bytes, bytes]]:
|
||||
"""
|
||||
Takes a multipart boundary encoded string and returns list of (key, value) tuples.
|
||||
"""
|
||||
if content_type:
|
||||
ct = headers.parse_content_type(content_type)
|
||||
if not ct:
|
||||
return []
|
||||
try:
|
||||
boundary = ct[2]["boundary"].encode("ascii")
|
||||
except (KeyError, UnicodeError):
|
||||
return []
|
||||
|
||||
rx = re.compile(rb'\bname="([^"]+)"')
|
||||
r = []
|
||||
if content is not None:
|
||||
for i in content.split(b"--" + boundary):
|
||||
parts = i.splitlines()
|
||||
if len(parts) > 1 and parts[0][0:2] != b"--":
|
||||
match = rx.search(parts[1])
|
||||
if match:
|
||||
key = match.group(1)
|
||||
value = b"".join(parts[3 + parts[2:].index(b"") :])
|
||||
r.append((key, value))
|
||||
return r
|
||||
return []
|
||||
|
||||
|
||||
def encode(ct, parts): # pragma: no cover
|
||||
# 2023-02
|
||||
warnings.warn(
|
||||
"multipart.encode is deprecated, use multipart.encode_multipart instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return encode_multipart(ct, parts)
|
||||
|
||||
|
||||
def decode(ct, content): # pragma: no cover
|
||||
# 2023-02
|
||||
warnings.warn(
|
||||
"multipart.decode is deprecated, use multipart.decode_multipart instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return encode_multipart(ct, content)
|
||||
146
venv/Lib/site-packages/mitmproxy/net/http/status_codes.py
Normal file
146
venv/Lib/site-packages/mitmproxy/net/http/status_codes.py
Normal file
@@ -0,0 +1,146 @@
|
||||
# Covered status codes:
|
||||
# - official HTTP status codes: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
|
||||
# - custom codes:
|
||||
# - 444 No Response
|
||||
# - 499 Client Closed Request
|
||||
CONTINUE = 100
|
||||
SWITCHING = 101
|
||||
PROCESSING = 102
|
||||
EARLY_HINTS = 103
|
||||
|
||||
OK = 200
|
||||
CREATED = 201
|
||||
ACCEPTED = 202
|
||||
NON_AUTHORITATIVE_INFORMATION = 203
|
||||
NO_CONTENT = 204
|
||||
RESET_CONTENT = 205
|
||||
PARTIAL_CONTENT = 206
|
||||
MULTI_STATUS = 207
|
||||
ALREADY_REPORTED = 208
|
||||
IM_USED = 226
|
||||
|
||||
MULTIPLE_CHOICE = 300
|
||||
MOVED_PERMANENTLY = 301
|
||||
FOUND = 302
|
||||
SEE_OTHER = 303
|
||||
NOT_MODIFIED = 304
|
||||
USE_PROXY = 305
|
||||
TEMPORARY_REDIRECT = 307
|
||||
PERMANENT_REDIRECT = 308
|
||||
|
||||
BAD_REQUEST = 400
|
||||
UNAUTHORIZED = 401
|
||||
PAYMENT_REQUIRED = 402
|
||||
FORBIDDEN = 403
|
||||
NOT_FOUND = 404
|
||||
NOT_ALLOWED = 405
|
||||
NOT_ACCEPTABLE = 406
|
||||
PROXY_AUTH_REQUIRED = 407
|
||||
REQUEST_TIMEOUT = 408
|
||||
CONFLICT = 409
|
||||
GONE = 410
|
||||
LENGTH_REQUIRED = 411
|
||||
PRECONDITION_FAILED = 412
|
||||
PAYLOAD_TOO_LARGE = 413
|
||||
REQUEST_URI_TOO_LONG = 414
|
||||
UNSUPPORTED_MEDIA_TYPE = 415
|
||||
REQUESTED_RANGE_NOT_SATISFIABLE = 416
|
||||
EXPECTATION_FAILED = 417
|
||||
IM_A_TEAPOT = 418
|
||||
MISDIRECTED_REQUEST = 421
|
||||
UNPROCESSABLE_CONTENT = 422
|
||||
LOCKED = 423
|
||||
FAILED_DEPENDENCY = 424
|
||||
TOO_EARLY = 425
|
||||
UPGRADE_REQUIRED = 426
|
||||
PRECONDITION_REQUIRED = 428
|
||||
TOO_MANY_REQUESTS = 429
|
||||
REQUEST_HEADER_FIELDS_TOO_LARGE = 431
|
||||
UNAVAILABLE_FOR_LEGAL_REASONS = 451
|
||||
NO_RESPONSE = 444
|
||||
CLIENT_CLOSED_REQUEST = 499
|
||||
|
||||
INTERNAL_SERVER_ERROR = 500
|
||||
NOT_IMPLEMENTED = 501
|
||||
BAD_GATEWAY = 502
|
||||
SERVICE_UNAVAILABLE = 503
|
||||
GATEWAY_TIMEOUT = 504
|
||||
HTTP_VERSION_NOT_SUPPORTED = 505
|
||||
VARIANT_ALSO_NEGOTIATES = 506
|
||||
INSUFFICIENT_STORAGE_SPACE = 507
|
||||
LOOP_DETECTED = 508
|
||||
NOT_EXTENDED = 510
|
||||
NETWORK_AUTHENTICATION_REQUIRED = 511
|
||||
|
||||
RESPONSES = {
|
||||
# 100
|
||||
CONTINUE: "Continue",
|
||||
SWITCHING: "Switching Protocols",
|
||||
PROCESSING: "Processing",
|
||||
EARLY_HINTS: "Early Hints",
|
||||
# 200
|
||||
OK: "OK",
|
||||
CREATED: "Created",
|
||||
ACCEPTED: "Accepted",
|
||||
NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information",
|
||||
NO_CONTENT: "No Content",
|
||||
RESET_CONTENT: "Reset Content",
|
||||
PARTIAL_CONTENT: "Partial Content",
|
||||
MULTI_STATUS: "Multi-Status",
|
||||
ALREADY_REPORTED: "Already Reported",
|
||||
IM_USED: "IM Used",
|
||||
# 300
|
||||
MULTIPLE_CHOICE: "Multiple Choices",
|
||||
MOVED_PERMANENTLY: "Moved Permanently",
|
||||
FOUND: "Found",
|
||||
SEE_OTHER: "See Other",
|
||||
NOT_MODIFIED: "Not Modified",
|
||||
USE_PROXY: "Use Proxy",
|
||||
# 306 not defined??
|
||||
TEMPORARY_REDIRECT: "Temporary Redirect",
|
||||
PERMANENT_REDIRECT: "Permanent Redirect",
|
||||
# 400
|
||||
BAD_REQUEST: "Bad Request",
|
||||
UNAUTHORIZED: "Unauthorized",
|
||||
PAYMENT_REQUIRED: "Payment Required",
|
||||
FORBIDDEN: "Forbidden",
|
||||
NOT_FOUND: "Not Found",
|
||||
NOT_ALLOWED: "Method Not Allowed",
|
||||
NOT_ACCEPTABLE: "Not Acceptable",
|
||||
PROXY_AUTH_REQUIRED: "Proxy Authentication Required",
|
||||
REQUEST_TIMEOUT: "Request Time-out",
|
||||
CONFLICT: "Conflict",
|
||||
GONE: "Gone",
|
||||
LENGTH_REQUIRED: "Length Required",
|
||||
PRECONDITION_FAILED: "Precondition Failed",
|
||||
PAYLOAD_TOO_LARGE: "Payload Too Large",
|
||||
REQUEST_URI_TOO_LONG: "Request-URI Too Long",
|
||||
UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type",
|
||||
REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable",
|
||||
EXPECTATION_FAILED: "Expectation Failed",
|
||||
IM_A_TEAPOT: "I'm a teapot",
|
||||
MISDIRECTED_REQUEST: "Misdirected Request",
|
||||
UNPROCESSABLE_CONTENT: "Unprocessable Content",
|
||||
LOCKED: "Locked",
|
||||
FAILED_DEPENDENCY: "Failed Dependency",
|
||||
TOO_EARLY: "Too Early",
|
||||
UPGRADE_REQUIRED: "Upgrade Required",
|
||||
PRECONDITION_REQUIRED: "Precondition Required",
|
||||
TOO_MANY_REQUESTS: "Too Many Requests",
|
||||
REQUEST_HEADER_FIELDS_TOO_LARGE: "Request Header Fields Too Large",
|
||||
UNAVAILABLE_FOR_LEGAL_REASONS: "Unavailable For Legal Reasons",
|
||||
NO_RESPONSE: "No Response",
|
||||
CLIENT_CLOSED_REQUEST: "Client Closed Request",
|
||||
# 500
|
||||
INTERNAL_SERVER_ERROR: "Internal Server Error",
|
||||
NOT_IMPLEMENTED: "Not Implemented",
|
||||
BAD_GATEWAY: "Bad Gateway",
|
||||
SERVICE_UNAVAILABLE: "Service Unavailable",
|
||||
GATEWAY_TIMEOUT: "Gateway Time-out",
|
||||
HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported",
|
||||
VARIANT_ALSO_NEGOTIATES: "Variant Also Negotiates",
|
||||
INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space",
|
||||
LOOP_DETECTED: "Loop Detected",
|
||||
NOT_EXTENDED: "Not Extended",
|
||||
NETWORK_AUTHENTICATION_REQUIRED: "Network Authentication Required",
|
||||
}
|
||||
200
venv/Lib/site-packages/mitmproxy/net/http/url.py
Normal file
200
venv/Lib/site-packages/mitmproxy/net/http/url.py
Normal file
@@ -0,0 +1,200 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import urllib.parse
|
||||
from collections.abc import Sequence
|
||||
from typing import AnyStr
|
||||
from typing import overload
|
||||
|
||||
from mitmproxy.net import check
|
||||
from mitmproxy.net.check import is_valid_host
|
||||
from mitmproxy.net.check import is_valid_port
|
||||
from mitmproxy.utils.strutils import always_str
|
||||
|
||||
# This regex extracts & splits the host header into host and port.
|
||||
# Handles the edge case of IPv6 addresses containing colons.
|
||||
# https://bugzilla.mozilla.org/show_bug.cgi?id=45891
|
||||
|
||||
_authority_re = re.compile(r"^(?P<host>[^:]+|\[.+\])(?::(?P<port>\d+))?$")
|
||||
|
||||
|
||||
def parse(url: str | bytes) -> tuple[bytes, bytes, int, bytes]:
|
||||
"""
|
||||
URL-parsing function that checks that
|
||||
- port is an integer 0-65535
|
||||
- host is a valid IDNA-encoded hostname with no null-bytes
|
||||
- path is valid ASCII
|
||||
|
||||
Args:
|
||||
A URL (as bytes or as unicode)
|
||||
|
||||
Returns:
|
||||
A (scheme, host, port, path) tuple
|
||||
|
||||
Raises:
|
||||
ValueError, if the URL is not properly formatted.
|
||||
"""
|
||||
# FIXME: We shouldn't rely on urllib here.
|
||||
|
||||
# Size of Ascii character after encoding is 1 byte which is same as its size
|
||||
# But non-Ascii character's size after encoding will be more than its size
|
||||
def ascii_check(x):
|
||||
if len(x) == len(str(x).encode()):
|
||||
return True
|
||||
return False
|
||||
|
||||
if isinstance(url, bytes):
|
||||
url = url.decode()
|
||||
if not ascii_check(url):
|
||||
url = urllib.parse.urlsplit(url) # type: ignore
|
||||
url = list(url) # type: ignore
|
||||
url[3] = urllib.parse.quote(url[3]) # type: ignore
|
||||
url = urllib.parse.urlunsplit(url) # type: ignore
|
||||
|
||||
parsed: urllib.parse.ParseResult = urllib.parse.urlparse(url)
|
||||
if not parsed.hostname:
|
||||
raise ValueError("No hostname given")
|
||||
else:
|
||||
host = parsed.hostname.encode("idna")
|
||||
|
||||
parsed_b: urllib.parse.ParseResultBytes = parsed.encode("ascii") # type: ignore
|
||||
|
||||
port = parsed_b.port
|
||||
if not port:
|
||||
port = 443 if parsed_b.scheme == b"https" else 80
|
||||
|
||||
full_path: bytes = urllib.parse.urlunparse(
|
||||
(b"", b"", parsed_b.path, parsed_b.params, parsed_b.query, parsed_b.fragment) # type: ignore
|
||||
)
|
||||
if not full_path.startswith(b"/"):
|
||||
full_path = b"/" + full_path # type: ignore
|
||||
|
||||
if not check.is_valid_host(host):
|
||||
raise ValueError("Invalid Host")
|
||||
|
||||
return parsed_b.scheme, host, port, full_path
|
||||
|
||||
|
||||
@overload
|
||||
def unparse(scheme: str, host: str, port: int, path) -> str: ...
|
||||
|
||||
|
||||
@overload
|
||||
def unparse(scheme: bytes, host: bytes, port: int, path) -> bytes: ...
|
||||
|
||||
|
||||
def unparse(scheme, host, port, path):
|
||||
"""
|
||||
Returns a URL string, constructed from the specified components.
|
||||
"""
|
||||
authority = hostport(scheme, host, port)
|
||||
|
||||
if isinstance(scheme, str):
|
||||
return f"{scheme}://{authority}{path}"
|
||||
else:
|
||||
return b"%s://%s%s" % (scheme, authority, path)
|
||||
|
||||
|
||||
def encode(s: Sequence[tuple[str, str]], similar_to: str | None = None) -> str:
|
||||
"""
|
||||
Takes a list of (key, value) tuples and returns a urlencoded string.
|
||||
If similar_to is passed, the output is formatted similar to the provided urlencoded string.
|
||||
"""
|
||||
|
||||
remove_trailing_equal = False
|
||||
if similar_to:
|
||||
remove_trailing_equal = any("=" not in param for param in similar_to.split("&"))
|
||||
|
||||
encoded = urllib.parse.urlencode(s, False, errors="surrogateescape")
|
||||
|
||||
if encoded and remove_trailing_equal:
|
||||
encoded = encoded.replace("=&", "&")
|
||||
if encoded[-1] == "=":
|
||||
encoded = encoded[:-1]
|
||||
|
||||
return encoded
|
||||
|
||||
|
||||
def decode(s):
|
||||
"""
|
||||
Takes a urlencoded string and returns a list of surrogate-escaped (key, value) tuples.
|
||||
"""
|
||||
return urllib.parse.parse_qsl(s, keep_blank_values=True, errors="surrogateescape")
|
||||
|
||||
|
||||
def quote(b: str, safe: str = "/") -> str:
|
||||
"""
|
||||
Returns:
|
||||
An ascii-encodable str.
|
||||
"""
|
||||
return urllib.parse.quote(b, safe=safe, errors="surrogateescape")
|
||||
|
||||
|
||||
def unquote(s: str) -> str:
|
||||
"""
|
||||
Args:
|
||||
s: A surrogate-escaped str
|
||||
Returns:
|
||||
A surrogate-escaped str
|
||||
"""
|
||||
return urllib.parse.unquote(s, errors="surrogateescape")
|
||||
|
||||
|
||||
def hostport(scheme: AnyStr, host: AnyStr, port: int) -> AnyStr:
|
||||
"""
|
||||
Returns the host component, with a port specification if needed.
|
||||
"""
|
||||
if default_port(scheme) == port:
|
||||
return host
|
||||
else:
|
||||
if isinstance(host, bytes):
|
||||
return b"%s:%d" % (host, port)
|
||||
else:
|
||||
return "%s:%d" % (host, port)
|
||||
|
||||
|
||||
def default_port(scheme: AnyStr) -> int | None:
|
||||
return {
|
||||
"http": 80,
|
||||
b"http": 80,
|
||||
"https": 443,
|
||||
b"https": 443,
|
||||
}.get(scheme, None)
|
||||
|
||||
|
||||
def parse_authority(authority: AnyStr, check: bool) -> tuple[str, int | None]:
|
||||
"""Extract the host and port from host header/authority information
|
||||
|
||||
Raises:
|
||||
ValueError, if check is True and the authority information is malformed.
|
||||
"""
|
||||
try:
|
||||
if isinstance(authority, bytes):
|
||||
m = _authority_re.match(authority.decode("utf-8"))
|
||||
if not m:
|
||||
raise ValueError
|
||||
host = m["host"].encode("utf-8").decode("idna")
|
||||
else:
|
||||
m = _authority_re.match(authority)
|
||||
if not m:
|
||||
raise ValueError
|
||||
host = m.group("host")
|
||||
|
||||
if host.startswith("[") and host.endswith("]"):
|
||||
host = host[1:-1]
|
||||
if not is_valid_host(host):
|
||||
raise ValueError
|
||||
|
||||
if m.group("port"):
|
||||
port = int(m.group("port"))
|
||||
if not is_valid_port(port):
|
||||
raise ValueError
|
||||
return host, port
|
||||
else:
|
||||
return host, None
|
||||
|
||||
except ValueError:
|
||||
if check:
|
||||
raise
|
||||
else:
|
||||
return always_str(authority, "utf-8", "surrogateescape"), None
|
||||
60
venv/Lib/site-packages/mitmproxy/net/http/user_agents.py
Normal file
60
venv/Lib/site-packages/mitmproxy/net/http/user_agents.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
A small collection of useful user-agent header strings. These should be
|
||||
kept reasonably current to reflect common usage.
|
||||
"""
|
||||
# pylint: line-too-long
|
||||
# A collection of (name, shortcut, string) tuples.
|
||||
|
||||
UASTRINGS = [
|
||||
(
|
||||
"android",
|
||||
"a",
|
||||
"Mozilla/5.0 (Linux; U; Android 4.1.1; en-gb; Nexus 7 Build/JRO03D) AFL/01.04.02",
|
||||
),
|
||||
(
|
||||
"blackberry",
|
||||
"l",
|
||||
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+",
|
||||
),
|
||||
(
|
||||
"bingbot",
|
||||
"b",
|
||||
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
|
||||
),
|
||||
(
|
||||
"chrome",
|
||||
"c",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
|
||||
),
|
||||
(
|
||||
"firefox",
|
||||
"f",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:14.0) Gecko/20120405 Firefox/14.0a1",
|
||||
),
|
||||
("googlebot", "g", "Googlebot/2.1 (+http://www.googlebot.com/bot.html)"),
|
||||
("ie9", "i", "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US)"),
|
||||
(
|
||||
"ipad",
|
||||
"p",
|
||||
"Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B176 Safari/7534.48.3",
|
||||
),
|
||||
(
|
||||
"iphone",
|
||||
"h",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 4_2_1 like Mac OS X) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148a Safari/6533.18.5", # noqa
|
||||
),
|
||||
(
|
||||
"safari",
|
||||
"s",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def get_by_shortcut(s):
|
||||
"""
|
||||
Retrieve a user agent entry by shortcut.
|
||||
"""
|
||||
for i in UASTRINGS:
|
||||
if s == i[1]:
|
||||
return i
|
||||
141
venv/Lib/site-packages/mitmproxy/net/http/validate.py
Normal file
141
venv/Lib/site-packages/mitmproxy/net/http/validate.py
Normal file
@@ -0,0 +1,141 @@
|
||||
import logging
|
||||
import re
|
||||
import typing
|
||||
|
||||
from mitmproxy.http import Message
|
||||
from mitmproxy.http import Request
|
||||
from mitmproxy.http import Response
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# https://datatracker.ietf.org/doc/html/rfc7230#section-3.2: Header fields are tokens.
|
||||
# "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
|
||||
_valid_header_name = re.compile(rb"^[!#$%&'*+\-.^_`|~0-9a-zA-Z]+$")
|
||||
|
||||
_valid_content_length = re.compile(rb"^(?:0|[1-9][0-9]*)$")
|
||||
_valid_content_length_str = re.compile(r"^(?:0|[1-9][0-9]*)$")
|
||||
|
||||
# https://datatracker.ietf.org/doc/html/rfc9112#section-6.1:
|
||||
# > A sender MUST NOT apply the chunked transfer coding more than once to a message body (i.e., chunking an already
|
||||
# > chunked message is not allowed). If any transfer coding other than chunked is applied to a request's content, the
|
||||
# > sender MUST apply chunked as the final transfer coding to ensure that the message is properly framed. If any
|
||||
# > transfer coding other than chunked is applied to a response's content, the sender MUST either apply chunked as the
|
||||
# > final transfer coding or terminate the message by closing the connection.
|
||||
#
|
||||
# The RFC technically still allows for fun encodings, we are a bit stricter and only accept a known subset by default.
|
||||
TransferEncoding = typing.Literal[
|
||||
"chunked",
|
||||
"compress,chunked",
|
||||
"deflate,chunked",
|
||||
"gzip,chunked",
|
||||
"compress",
|
||||
"deflate",
|
||||
"gzip",
|
||||
"identity",
|
||||
]
|
||||
_HTTP_1_1_TRANSFER_ENCODINGS = frozenset(typing.get_args(TransferEncoding))
|
||||
|
||||
|
||||
def parse_content_length(value: str | bytes) -> int:
|
||||
"""Parse a content-length header value, or raise a ValueError if it is invalid."""
|
||||
if isinstance(value, str):
|
||||
valid = bool(_valid_content_length_str.match(value))
|
||||
else:
|
||||
valid = bool(_valid_content_length.match(value))
|
||||
if not valid:
|
||||
raise ValueError(f"invalid content-length header: {value!r}")
|
||||
return int(value)
|
||||
|
||||
|
||||
def parse_transfer_encoding(value: str | bytes) -> TransferEncoding:
|
||||
"""Parse a transfer-encoding header value, or raise a ValueError if it is invalid or unknown."""
|
||||
# guard against .lower() transforming non-ascii to ascii
|
||||
if not value.isascii():
|
||||
raise ValueError(f"invalid transfer-encoding header: {value!r}")
|
||||
if isinstance(value, str):
|
||||
te = value
|
||||
else:
|
||||
te = value.decode()
|
||||
te = te.lower()
|
||||
te = re.sub(r"[\t ]*,[\t ]*", ",", te)
|
||||
if te not in _HTTP_1_1_TRANSFER_ENCODINGS:
|
||||
raise ValueError(f"unknown transfer-encoding header: {value!r}")
|
||||
return typing.cast(TransferEncoding, te)
|
||||
|
||||
|
||||
def validate_headers(message: Message) -> None:
|
||||
"""
|
||||
Validate HTTP message headers to avoid request smuggling attacks.
|
||||
|
||||
Raises a ValueError if they are malformed.
|
||||
"""
|
||||
|
||||
te = []
|
||||
cl = []
|
||||
|
||||
for name, value in message.headers.fields:
|
||||
if not _valid_header_name.match(name):
|
||||
raise ValueError(f"invalid header name: {name!r}")
|
||||
match name.lower():
|
||||
case b"transfer-encoding":
|
||||
te.append(value)
|
||||
case b"content-length":
|
||||
cl.append(value)
|
||||
|
||||
if te and cl:
|
||||
# > A server MAY reject a request that contains both Content-Length and Transfer-Encoding or process such a
|
||||
# > request in accordance with the Transfer-Encoding alone.
|
||||
|
||||
# > A sender MUST NOT send a Content-Length header field in any message that contains a Transfer-Encoding header
|
||||
# > field.
|
||||
raise ValueError(
|
||||
"message with both transfer-encoding and content-length headers"
|
||||
)
|
||||
elif te:
|
||||
if len(te) > 1:
|
||||
raise ValueError(f"multiple transfer-encoding headers: {te!r}")
|
||||
# > Transfer-Encoding was added in HTTP/1.1. It is generally assumed that implementations advertising only
|
||||
# > HTTP/1.0 support will not understand how to process transfer-encoded content, and that an HTTP/1.0 message
|
||||
# > received with a Transfer-Encoding is likely to have been forwarded without proper handling of the chunked
|
||||
# > transfer coding in transit.
|
||||
#
|
||||
# > A client MUST NOT send a request containing Transfer-Encoding unless it knows the server will handle
|
||||
# > HTTP/1.1 requests (or later minor revisions); such knowledge might be in the form of specific user
|
||||
# > configuration or by remembering the version of a prior received response. A server MUST NOT send a response
|
||||
# > containing Transfer-Encoding unless the corresponding request indicates HTTP/1.1 (or later minor revisions).
|
||||
if not message.is_http11:
|
||||
raise ValueError(
|
||||
f"unexpected HTTP transfer-encoding {te[0]!r} for {message.http_version}"
|
||||
)
|
||||
# > A server MUST NOT send a Transfer-Encoding header field in any response with a status code of 1xx
|
||||
# > (Informational) or 204 (No Content).
|
||||
if isinstance(message, Response) and (
|
||||
100 <= message.status_code <= 199 or message.status_code == 204
|
||||
):
|
||||
raise ValueError(
|
||||
f"unexpected HTTP transfer-encoding {te[0]!r} for response with status code {message.status_code}"
|
||||
)
|
||||
# > If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the final
|
||||
# > encoding, the message body length cannot be determined reliably; the server MUST respond with the 400 (Bad
|
||||
# > Request) status code and then close the connection.
|
||||
te_parsed = parse_transfer_encoding(te[0])
|
||||
match te_parsed:
|
||||
case "chunked" | "compress,chunked" | "deflate,chunked" | "gzip,chunked":
|
||||
pass
|
||||
case "compress" | "deflate" | "gzip" | "identity":
|
||||
if isinstance(message, Request):
|
||||
raise ValueError(
|
||||
f"unexpected HTTP transfer-encoding {te_parsed!r} for request"
|
||||
)
|
||||
case other: # pragma: no cover
|
||||
typing.assert_never(other)
|
||||
elif cl:
|
||||
# > If a message is received without Transfer-Encoding and with an invalid Content-Length header field, then the
|
||||
# > message framing is invalid and the recipient MUST treat it as an unrecoverable error, unless the field value
|
||||
# > can be successfully parsed as a comma-separated list (Section 5.6.1 of [HTTP]), all values in the list are
|
||||
# > valid, and all values in the list are the same (in which case, the message is processed with that single
|
||||
# > value used as the Content-Length field value).
|
||||
# We are stricter here and reject comma-separated lists.
|
||||
if len(cl) > 1:
|
||||
raise ValueError(f"multiple content-length headers: {cl!r}")
|
||||
parse_content_length(cl[0])
|
||||
Reference in New Issue
Block a user