Files

529 lines
16 KiB
Python
Raw Permalink Normal View History

2025-12-25 11:16:59 +08:00
"""
This module handles the import of mitmproxy flows generated by old versions.
The flow file version is decoupled from the mitmproxy release cycle (since
v3.0.0dev) and versioning. Every change or migration gets a new flow file
version number, this prevents issues with developer builds and snapshots.
"""
import copy
import uuid
from typing import Any
from mitmproxy import version
from mitmproxy.utils import strutils
def convert_011_012(data):
data[b"version"] = (0, 12)
return data
def convert_012_013(data):
data[b"version"] = (0, 13)
return data
def convert_013_014(data):
data[b"request"][b"first_line_format"] = data[b"request"].pop(b"form_in")
data[b"request"][b"http_version"] = (
b"HTTP/"
+ ".".join(str(x) for x in data[b"request"].pop(b"httpversion")).encode()
)
data[b"response"][b"http_version"] = (
b"HTTP/"
+ ".".join(str(x) for x in data[b"response"].pop(b"httpversion")).encode()
)
data[b"response"][b"status_code"] = data[b"response"].pop(b"code")
data[b"response"][b"body"] = data[b"response"].pop(b"content")
data[b"server_conn"].pop(b"state")
data[b"server_conn"][b"via"] = None
data[b"version"] = (0, 14)
return data
def convert_014_015(data):
data[b"version"] = (0, 15)
return data
def convert_015_016(data):
for m in (b"request", b"response"):
if b"body" in data[m]:
data[m][b"content"] = data[m].pop(b"body")
if b"msg" in data[b"response"]:
data[b"response"][b"reason"] = data[b"response"].pop(b"msg")
data[b"request"].pop(b"form_out", None)
data[b"version"] = (0, 16)
return data
def convert_016_017(data):
data[b"server_conn"][b"peer_address"] = None
data[b"version"] = (0, 17)
return data
def convert_017_018(data):
# convert_unicode needs to be called for every dual release and the first py3-only release
data = convert_unicode(data)
data["server_conn"]["ip_address"] = data["server_conn"].pop("peer_address", None)
data["marked"] = False
data["version"] = (0, 18)
return data
def convert_018_019(data):
# convert_unicode needs to be called for every dual release and the first py3-only release
data = convert_unicode(data)
data["request"].pop("stickyauth", None)
data["request"].pop("stickycookie", None)
data["client_conn"]["sni"] = None
data["client_conn"]["alpn_proto_negotiated"] = None
data["client_conn"]["cipher_name"] = None
data["client_conn"]["tls_version"] = None
data["server_conn"]["alpn_proto_negotiated"] = None
if data["server_conn"]["via"]:
data["server_conn"]["via"]["alpn_proto_negotiated"] = None
data["mode"] = "regular"
data["metadata"] = dict()
data["version"] = (0, 19)
return data
def convert_019_100(data):
# convert_unicode needs to be called for every dual release and the first py3-only release
data = convert_unicode(data)
data["version"] = (1, 0, 0)
return data
def convert_100_200(data):
data["version"] = (2, 0, 0)
data["client_conn"]["address"] = data["client_conn"]["address"]["address"]
data["server_conn"]["address"] = data["server_conn"]["address"]["address"]
data["server_conn"]["source_address"] = data["server_conn"]["source_address"][
"address"
]
if data["server_conn"]["ip_address"]:
data["server_conn"]["ip_address"] = data["server_conn"]["ip_address"]["address"]
if data["server_conn"]["via"]:
data["server_conn"]["via"]["address"] = data["server_conn"]["via"]["address"][
"address"
]
data["server_conn"]["via"]["source_address"] = data["server_conn"]["via"][
"source_address"
]["address"]
if data["server_conn"]["via"]["ip_address"]:
data["server_conn"]["via"]["ip_address"] = data["server_conn"]["via"][
"ip_address"
]["address"]
return data
def convert_200_300(data):
data["version"] = (3, 0, 0)
data["client_conn"]["mitmcert"] = None
data["server_conn"]["tls_version"] = None
if data["server_conn"]["via"]:
data["server_conn"]["via"]["tls_version"] = None
return data
def convert_300_4(data):
data["version"] = 4
# This is an empty migration to transition to the new versioning scheme.
return data
client_connections: dict[tuple[str, ...], str] = {}
server_connections: dict[tuple[str, ...], str] = {}
def convert_4_5(data):
data["version"] = 5
client_conn_key = (
data["client_conn"]["timestamp_start"],
*data["client_conn"]["address"],
)
server_conn_key = (
data["server_conn"]["timestamp_start"],
*data["server_conn"]["source_address"],
)
data["client_conn"]["id"] = client_connections.setdefault(
client_conn_key, str(uuid.uuid4())
)
data["server_conn"]["id"] = server_connections.setdefault(
server_conn_key, str(uuid.uuid4())
)
if data["server_conn"]["via"]:
server_conn_key = (
data["server_conn"]["via"]["timestamp_start"],
*data["server_conn"]["via"]["source_address"],
)
data["server_conn"]["via"]["id"] = server_connections.setdefault(
server_conn_key, str(uuid.uuid4())
)
return data
def convert_5_6(data):
data["version"] = 6
data["client_conn"]["tls_established"] = data["client_conn"].pop("ssl_established")
data["client_conn"]["timestamp_tls_setup"] = data["client_conn"].pop(
"timestamp_ssl_setup"
)
data["server_conn"]["tls_established"] = data["server_conn"].pop("ssl_established")
data["server_conn"]["timestamp_tls_setup"] = data["server_conn"].pop(
"timestamp_ssl_setup"
)
if data["server_conn"]["via"]:
data["server_conn"]["via"]["tls_established"] = data["server_conn"]["via"].pop(
"ssl_established"
)
data["server_conn"]["via"]["timestamp_tls_setup"] = data["server_conn"][
"via"
].pop("timestamp_ssl_setup")
return data
def convert_6_7(data):
data["version"] = 7
data["client_conn"]["tls_extensions"] = None
return data
def convert_7_8(data):
data["version"] = 8
if "request" in data and data["request"] is not None:
data["request"]["trailers"] = None
if "response" in data and data["response"] is not None:
data["response"]["trailers"] = None
return data
def convert_8_9(data):
data["version"] = 9
is_request_replay = False
if "request" in data:
data["request"].pop("first_line_format")
data["request"]["authority"] = b""
is_request_replay = data["request"].pop("is_replay", False)
is_response_replay = False
if "response" in data and data["response"] is not None:
is_response_replay = data["response"].pop("is_replay", False)
if is_request_replay: # pragma: no cover
data["is_replay"] = "request"
elif is_response_replay: # pragma: no cover
data["is_replay"] = "response"
else:
data["is_replay"] = None
return data
def convert_9_10(data):
data["version"] = 10
def conv_conn(conn):
conn["state"] = 0
conn["error"] = None
conn["tls"] = conn["tls_established"]
alpn = conn["alpn_proto_negotiated"]
conn["alpn_offers"] = [alpn] if alpn else None
cipher = conn["cipher_name"]
conn["cipher_list"] = [cipher] if cipher else None
def conv_cconn(conn):
conn["sockname"] = ("", 0)
cc = conn.pop("clientcert", None)
conn["certificate_list"] = [cc] if cc else []
conv_conn(conn)
def conv_sconn(conn):
crt = conn.pop("cert", None)
conn["certificate_list"] = [crt] if crt else []
conn["cipher_name"] = None
conn["via2"] = None
conv_conn(conn)
conv_cconn(data["client_conn"])
conv_sconn(data["server_conn"])
if data["server_conn"]["via"]:
conv_sconn(data["server_conn"]["via"])
return data
def convert_10_11(data):
data["version"] = 11
def conv_conn(conn):
conn["sni"] = strutils.always_str(conn["sni"], "ascii", "backslashreplace")
conn["alpn"] = conn.pop("alpn_proto_negotiated")
conn["alpn_offers"] = conn["alpn_offers"] or []
conn["cipher_list"] = conn["cipher_list"] or []
conv_conn(data["client_conn"])
conv_conn(data["server_conn"])
if data["server_conn"]["via"]:
conv_conn(data["server_conn"]["via"])
return data
_websocket_handshakes = {}
def convert_11_12(data):
data["version"] = 12
if "websocket" in data["metadata"]:
_websocket_handshakes[data["id"]] = copy.deepcopy(data)
if "websocket_handshake" in data["metadata"]:
ws_flow = data
try:
data = _websocket_handshakes.pop(data["metadata"]["websocket_handshake"])
except KeyError:
# The handshake flow is missing, which should never really happen. We make up a dummy.
data = {
"client_conn": data["client_conn"],
"error": data["error"],
"id": data["id"],
"intercepted": data["intercepted"],
"is_replay": data["is_replay"],
"marked": data["marked"],
"metadata": {},
"mode": "transparent",
"request": {
"authority": b"",
"content": None,
"headers": [],
"host": b"unknown",
"http_version": b"HTTP/1.1",
"method": b"GET",
"path": b"/",
"port": 80,
"scheme": b"http",
"timestamp_end": 0,
"timestamp_start": 0,
"trailers": None,
},
"response": None,
"server_conn": data["server_conn"],
"type": "http",
"version": 12,
}
data["metadata"]["duplicated"] = (
"This WebSocket flow has been migrated from an old file format version "
"and may appear duplicated."
)
data["websocket"] = {
"messages": ws_flow["messages"],
"closed_by_client": ws_flow["close_sender"] == "client",
"close_code": ws_flow["close_code"],
"close_reason": ws_flow["close_reason"],
"timestamp_end": data.get("server_conn", {}).get("timestamp_end", None),
}
else:
data["websocket"] = None
return data
def convert_12_13(data):
data["version"] = 13
if data["marked"]:
data["marked"] = ":default:"
else:
data["marked"] = ""
return data
def convert_13_14(data):
data["version"] = 14
data["comment"] = ""
# bugfix for https://github.com/mitmproxy/mitmproxy/issues/4576
if data.get("response", None) and data["response"]["timestamp_start"] is None:
data["response"]["timestamp_start"] = data["request"]["timestamp_end"]
data["response"]["timestamp_end"] = data["request"]["timestamp_end"] + 1
return data
def convert_14_15(data):
data["version"] = 15
if data.get("websocket", None):
# Add "injected" attribute.
data["websocket"]["messages"] = [
msg + [False] for msg in data["websocket"]["messages"]
]
return data
def convert_15_16(data):
data["version"] = 16
data["timestamp_created"] = data.get("request", data["client_conn"])[
"timestamp_start"
]
return data
def convert_16_17(data):
data["version"] = 17
data.pop("mode", None)
return data
def convert_17_18(data):
data["version"] = 18
data["client_conn"]["proxy_mode"] = "regular"
return data
def convert_18_19(data):
data["version"] = 19
data["client_conn"]["peername"] = data["client_conn"].pop("address", None)
if data["client_conn"].get("timestamp_start") is None:
data["client_conn"]["timestamp_start"] = 0.0
data["client_conn"].pop("tls_extensions")
data["server_conn"]["peername"] = data["server_conn"].pop("ip_address", None)
data["server_conn"]["sockname"] = data["server_conn"].pop("source_address", None)
data["server_conn"]["via"] = data["server_conn"].pop("via2", None)
for conn in ["client_conn", "server_conn"]:
data[conn].pop("tls_established")
data[conn]["cipher"] = data[conn].pop("cipher_name", None)
data[conn].setdefault("transport_protocol", "tcp")
for name in ["peername", "sockname", "address"]:
if data[conn].get(name) and isinstance(data[conn][name][0], bytes):
data[conn][name][0] = data[conn][name][0].decode(
errors="backslashreplace"
)
if data["server_conn"]["sni"] is True:
data["server_conn"]["sni"] = data["server_conn"]["address"][0]
return data
def convert_19_20(data):
data["version"] = 20
data["client_conn"].pop("state", None)
data["server_conn"].pop("state", None)
return data
def convert_20_21(data):
data["version"] = 21
if data["client_conn"]["tls_version"] == "QUIC":
data["client_conn"]["tls_version"] = "QUICv1"
if data["server_conn"]["tls_version"] == "QUIC":
data["server_conn"]["tls_version"] = "QUICv1"
return data
def _convert_dict_keys(o: Any) -> Any:
if isinstance(o, dict):
return {strutils.always_str(k): _convert_dict_keys(v) for k, v in o.items()}
else:
return o
def _convert_dict_vals(o: dict, values_to_convert: dict) -> dict:
for k, v in values_to_convert.items():
if not o or k not in o:
continue # pragma: no cover
if v is True:
o[k] = strutils.always_str(o[k])
else:
_convert_dict_vals(o[k], v)
return o
def convert_unicode(data: dict) -> dict:
"""
This method converts between Python 3 and Python 2 dumpfiles.
"""
data = _convert_dict_keys(data)
data = _convert_dict_vals(
data,
{
"type": True,
"id": True,
"request": {"first_line_format": True},
"error": {"msg": True},
},
)
return data
converters = {
(0, 11): convert_011_012,
(0, 12): convert_012_013,
(0, 13): convert_013_014,
(0, 14): convert_014_015,
(0, 15): convert_015_016,
(0, 16): convert_016_017,
(0, 17): convert_017_018,
(0, 18): convert_018_019,
(0, 19): convert_019_100,
(1, 0): convert_100_200,
(2, 0): convert_200_300,
(3, 0): convert_300_4,
4: convert_4_5,
5: convert_5_6,
6: convert_6_7,
7: convert_7_8,
8: convert_8_9,
9: convert_9_10,
10: convert_10_11,
11: convert_11_12,
12: convert_12_13,
13: convert_13_14,
14: convert_14_15,
15: convert_15_16,
16: convert_16_17,
17: convert_17_18,
18: convert_18_19,
19: convert_19_20,
20: convert_20_21,
}
def migrate_flow(flow_data: dict[bytes | str, Any]) -> dict[bytes | str, Any]:
while True:
flow_version = flow_data.get(b"version", flow_data.get("version"))
# Historically, we used the mitmproxy minor version tuple as the flow format version.
if not isinstance(flow_version, int):
flow_version = tuple(flow_version)[:2] # type: ignore
if flow_version == version.FLOW_FORMAT_VERSION:
break
elif flow_version in converters:
flow_data = converters[flow_version](flow_data)
else:
should_upgrade = (
isinstance(flow_version, int)
and flow_version > version.FLOW_FORMAT_VERSION
)
raise ValueError(
"{} cannot read files with flow format version {}{}.".format(
version.MITMPROXY,
flow_version,
", please update mitmproxy" if should_upgrade else "",
)
)
return flow_data