529 lines
16 KiB
Python
529 lines
16 KiB
Python
|
|
"""
|
||
|
|
This module handles the import of mitmproxy flows generated by old versions.
|
||
|
|
|
||
|
|
The flow file version is decoupled from the mitmproxy release cycle (since
|
||
|
|
v3.0.0dev) and versioning. Every change or migration gets a new flow file
|
||
|
|
version number, this prevents issues with developer builds and snapshots.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import copy
|
||
|
|
import uuid
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
from mitmproxy import version
|
||
|
|
from mitmproxy.utils import strutils
|
||
|
|
|
||
|
|
|
||
|
|
def convert_011_012(data):
|
||
|
|
data[b"version"] = (0, 12)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_012_013(data):
|
||
|
|
data[b"version"] = (0, 13)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_013_014(data):
|
||
|
|
data[b"request"][b"first_line_format"] = data[b"request"].pop(b"form_in")
|
||
|
|
data[b"request"][b"http_version"] = (
|
||
|
|
b"HTTP/"
|
||
|
|
+ ".".join(str(x) for x in data[b"request"].pop(b"httpversion")).encode()
|
||
|
|
)
|
||
|
|
data[b"response"][b"http_version"] = (
|
||
|
|
b"HTTP/"
|
||
|
|
+ ".".join(str(x) for x in data[b"response"].pop(b"httpversion")).encode()
|
||
|
|
)
|
||
|
|
data[b"response"][b"status_code"] = data[b"response"].pop(b"code")
|
||
|
|
data[b"response"][b"body"] = data[b"response"].pop(b"content")
|
||
|
|
data[b"server_conn"].pop(b"state")
|
||
|
|
data[b"server_conn"][b"via"] = None
|
||
|
|
data[b"version"] = (0, 14)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_014_015(data):
|
||
|
|
data[b"version"] = (0, 15)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_015_016(data):
|
||
|
|
for m in (b"request", b"response"):
|
||
|
|
if b"body" in data[m]:
|
||
|
|
data[m][b"content"] = data[m].pop(b"body")
|
||
|
|
if b"msg" in data[b"response"]:
|
||
|
|
data[b"response"][b"reason"] = data[b"response"].pop(b"msg")
|
||
|
|
data[b"request"].pop(b"form_out", None)
|
||
|
|
data[b"version"] = (0, 16)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_016_017(data):
|
||
|
|
data[b"server_conn"][b"peer_address"] = None
|
||
|
|
data[b"version"] = (0, 17)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_017_018(data):
|
||
|
|
# convert_unicode needs to be called for every dual release and the first py3-only release
|
||
|
|
data = convert_unicode(data)
|
||
|
|
|
||
|
|
data["server_conn"]["ip_address"] = data["server_conn"].pop("peer_address", None)
|
||
|
|
data["marked"] = False
|
||
|
|
data["version"] = (0, 18)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_018_019(data):
|
||
|
|
# convert_unicode needs to be called for every dual release and the first py3-only release
|
||
|
|
data = convert_unicode(data)
|
||
|
|
|
||
|
|
data["request"].pop("stickyauth", None)
|
||
|
|
data["request"].pop("stickycookie", None)
|
||
|
|
data["client_conn"]["sni"] = None
|
||
|
|
data["client_conn"]["alpn_proto_negotiated"] = None
|
||
|
|
data["client_conn"]["cipher_name"] = None
|
||
|
|
data["client_conn"]["tls_version"] = None
|
||
|
|
data["server_conn"]["alpn_proto_negotiated"] = None
|
||
|
|
if data["server_conn"]["via"]:
|
||
|
|
data["server_conn"]["via"]["alpn_proto_negotiated"] = None
|
||
|
|
data["mode"] = "regular"
|
||
|
|
data["metadata"] = dict()
|
||
|
|
data["version"] = (0, 19)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_019_100(data):
|
||
|
|
# convert_unicode needs to be called for every dual release and the first py3-only release
|
||
|
|
data = convert_unicode(data)
|
||
|
|
|
||
|
|
data["version"] = (1, 0, 0)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_100_200(data):
|
||
|
|
data["version"] = (2, 0, 0)
|
||
|
|
data["client_conn"]["address"] = data["client_conn"]["address"]["address"]
|
||
|
|
data["server_conn"]["address"] = data["server_conn"]["address"]["address"]
|
||
|
|
data["server_conn"]["source_address"] = data["server_conn"]["source_address"][
|
||
|
|
"address"
|
||
|
|
]
|
||
|
|
if data["server_conn"]["ip_address"]:
|
||
|
|
data["server_conn"]["ip_address"] = data["server_conn"]["ip_address"]["address"]
|
||
|
|
|
||
|
|
if data["server_conn"]["via"]:
|
||
|
|
data["server_conn"]["via"]["address"] = data["server_conn"]["via"]["address"][
|
||
|
|
"address"
|
||
|
|
]
|
||
|
|
data["server_conn"]["via"]["source_address"] = data["server_conn"]["via"][
|
||
|
|
"source_address"
|
||
|
|
]["address"]
|
||
|
|
if data["server_conn"]["via"]["ip_address"]:
|
||
|
|
data["server_conn"]["via"]["ip_address"] = data["server_conn"]["via"][
|
||
|
|
"ip_address"
|
||
|
|
]["address"]
|
||
|
|
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_200_300(data):
|
||
|
|
data["version"] = (3, 0, 0)
|
||
|
|
data["client_conn"]["mitmcert"] = None
|
||
|
|
data["server_conn"]["tls_version"] = None
|
||
|
|
if data["server_conn"]["via"]:
|
||
|
|
data["server_conn"]["via"]["tls_version"] = None
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_300_4(data):
|
||
|
|
data["version"] = 4
|
||
|
|
# This is an empty migration to transition to the new versioning scheme.
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
client_connections: dict[tuple[str, ...], str] = {}
|
||
|
|
server_connections: dict[tuple[str, ...], str] = {}
|
||
|
|
|
||
|
|
|
||
|
|
def convert_4_5(data):
|
||
|
|
data["version"] = 5
|
||
|
|
client_conn_key = (
|
||
|
|
data["client_conn"]["timestamp_start"],
|
||
|
|
*data["client_conn"]["address"],
|
||
|
|
)
|
||
|
|
server_conn_key = (
|
||
|
|
data["server_conn"]["timestamp_start"],
|
||
|
|
*data["server_conn"]["source_address"],
|
||
|
|
)
|
||
|
|
data["client_conn"]["id"] = client_connections.setdefault(
|
||
|
|
client_conn_key, str(uuid.uuid4())
|
||
|
|
)
|
||
|
|
data["server_conn"]["id"] = server_connections.setdefault(
|
||
|
|
server_conn_key, str(uuid.uuid4())
|
||
|
|
)
|
||
|
|
|
||
|
|
if data["server_conn"]["via"]:
|
||
|
|
server_conn_key = (
|
||
|
|
data["server_conn"]["via"]["timestamp_start"],
|
||
|
|
*data["server_conn"]["via"]["source_address"],
|
||
|
|
)
|
||
|
|
data["server_conn"]["via"]["id"] = server_connections.setdefault(
|
||
|
|
server_conn_key, str(uuid.uuid4())
|
||
|
|
)
|
||
|
|
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_5_6(data):
|
||
|
|
data["version"] = 6
|
||
|
|
data["client_conn"]["tls_established"] = data["client_conn"].pop("ssl_established")
|
||
|
|
data["client_conn"]["timestamp_tls_setup"] = data["client_conn"].pop(
|
||
|
|
"timestamp_ssl_setup"
|
||
|
|
)
|
||
|
|
data["server_conn"]["tls_established"] = data["server_conn"].pop("ssl_established")
|
||
|
|
data["server_conn"]["timestamp_tls_setup"] = data["server_conn"].pop(
|
||
|
|
"timestamp_ssl_setup"
|
||
|
|
)
|
||
|
|
if data["server_conn"]["via"]:
|
||
|
|
data["server_conn"]["via"]["tls_established"] = data["server_conn"]["via"].pop(
|
||
|
|
"ssl_established"
|
||
|
|
)
|
||
|
|
data["server_conn"]["via"]["timestamp_tls_setup"] = data["server_conn"][
|
||
|
|
"via"
|
||
|
|
].pop("timestamp_ssl_setup")
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_6_7(data):
|
||
|
|
data["version"] = 7
|
||
|
|
data["client_conn"]["tls_extensions"] = None
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_7_8(data):
|
||
|
|
data["version"] = 8
|
||
|
|
if "request" in data and data["request"] is not None:
|
||
|
|
data["request"]["trailers"] = None
|
||
|
|
if "response" in data and data["response"] is not None:
|
||
|
|
data["response"]["trailers"] = None
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_8_9(data):
|
||
|
|
data["version"] = 9
|
||
|
|
is_request_replay = False
|
||
|
|
if "request" in data:
|
||
|
|
data["request"].pop("first_line_format")
|
||
|
|
data["request"]["authority"] = b""
|
||
|
|
is_request_replay = data["request"].pop("is_replay", False)
|
||
|
|
is_response_replay = False
|
||
|
|
if "response" in data and data["response"] is not None:
|
||
|
|
is_response_replay = data["response"].pop("is_replay", False)
|
||
|
|
if is_request_replay: # pragma: no cover
|
||
|
|
data["is_replay"] = "request"
|
||
|
|
elif is_response_replay: # pragma: no cover
|
||
|
|
data["is_replay"] = "response"
|
||
|
|
else:
|
||
|
|
data["is_replay"] = None
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_9_10(data):
|
||
|
|
data["version"] = 10
|
||
|
|
|
||
|
|
def conv_conn(conn):
|
||
|
|
conn["state"] = 0
|
||
|
|
conn["error"] = None
|
||
|
|
conn["tls"] = conn["tls_established"]
|
||
|
|
alpn = conn["alpn_proto_negotiated"]
|
||
|
|
conn["alpn_offers"] = [alpn] if alpn else None
|
||
|
|
cipher = conn["cipher_name"]
|
||
|
|
conn["cipher_list"] = [cipher] if cipher else None
|
||
|
|
|
||
|
|
def conv_cconn(conn):
|
||
|
|
conn["sockname"] = ("", 0)
|
||
|
|
cc = conn.pop("clientcert", None)
|
||
|
|
conn["certificate_list"] = [cc] if cc else []
|
||
|
|
conv_conn(conn)
|
||
|
|
|
||
|
|
def conv_sconn(conn):
|
||
|
|
crt = conn.pop("cert", None)
|
||
|
|
conn["certificate_list"] = [crt] if crt else []
|
||
|
|
conn["cipher_name"] = None
|
||
|
|
conn["via2"] = None
|
||
|
|
conv_conn(conn)
|
||
|
|
|
||
|
|
conv_cconn(data["client_conn"])
|
||
|
|
conv_sconn(data["server_conn"])
|
||
|
|
if data["server_conn"]["via"]:
|
||
|
|
conv_sconn(data["server_conn"]["via"])
|
||
|
|
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_10_11(data):
|
||
|
|
data["version"] = 11
|
||
|
|
|
||
|
|
def conv_conn(conn):
|
||
|
|
conn["sni"] = strutils.always_str(conn["sni"], "ascii", "backslashreplace")
|
||
|
|
conn["alpn"] = conn.pop("alpn_proto_negotiated")
|
||
|
|
conn["alpn_offers"] = conn["alpn_offers"] or []
|
||
|
|
conn["cipher_list"] = conn["cipher_list"] or []
|
||
|
|
|
||
|
|
conv_conn(data["client_conn"])
|
||
|
|
conv_conn(data["server_conn"])
|
||
|
|
if data["server_conn"]["via"]:
|
||
|
|
conv_conn(data["server_conn"]["via"])
|
||
|
|
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
_websocket_handshakes = {}
|
||
|
|
|
||
|
|
|
||
|
|
def convert_11_12(data):
|
||
|
|
data["version"] = 12
|
||
|
|
|
||
|
|
if "websocket" in data["metadata"]:
|
||
|
|
_websocket_handshakes[data["id"]] = copy.deepcopy(data)
|
||
|
|
|
||
|
|
if "websocket_handshake" in data["metadata"]:
|
||
|
|
ws_flow = data
|
||
|
|
try:
|
||
|
|
data = _websocket_handshakes.pop(data["metadata"]["websocket_handshake"])
|
||
|
|
except KeyError:
|
||
|
|
# The handshake flow is missing, which should never really happen. We make up a dummy.
|
||
|
|
data = {
|
||
|
|
"client_conn": data["client_conn"],
|
||
|
|
"error": data["error"],
|
||
|
|
"id": data["id"],
|
||
|
|
"intercepted": data["intercepted"],
|
||
|
|
"is_replay": data["is_replay"],
|
||
|
|
"marked": data["marked"],
|
||
|
|
"metadata": {},
|
||
|
|
"mode": "transparent",
|
||
|
|
"request": {
|
||
|
|
"authority": b"",
|
||
|
|
"content": None,
|
||
|
|
"headers": [],
|
||
|
|
"host": b"unknown",
|
||
|
|
"http_version": b"HTTP/1.1",
|
||
|
|
"method": b"GET",
|
||
|
|
"path": b"/",
|
||
|
|
"port": 80,
|
||
|
|
"scheme": b"http",
|
||
|
|
"timestamp_end": 0,
|
||
|
|
"timestamp_start": 0,
|
||
|
|
"trailers": None,
|
||
|
|
},
|
||
|
|
"response": None,
|
||
|
|
"server_conn": data["server_conn"],
|
||
|
|
"type": "http",
|
||
|
|
"version": 12,
|
||
|
|
}
|
||
|
|
data["metadata"]["duplicated"] = (
|
||
|
|
"This WebSocket flow has been migrated from an old file format version "
|
||
|
|
"and may appear duplicated."
|
||
|
|
)
|
||
|
|
data["websocket"] = {
|
||
|
|
"messages": ws_flow["messages"],
|
||
|
|
"closed_by_client": ws_flow["close_sender"] == "client",
|
||
|
|
"close_code": ws_flow["close_code"],
|
||
|
|
"close_reason": ws_flow["close_reason"],
|
||
|
|
"timestamp_end": data.get("server_conn", {}).get("timestamp_end", None),
|
||
|
|
}
|
||
|
|
|
||
|
|
else:
|
||
|
|
data["websocket"] = None
|
||
|
|
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_12_13(data):
|
||
|
|
data["version"] = 13
|
||
|
|
if data["marked"]:
|
||
|
|
data["marked"] = ":default:"
|
||
|
|
else:
|
||
|
|
data["marked"] = ""
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_13_14(data):
|
||
|
|
data["version"] = 14
|
||
|
|
data["comment"] = ""
|
||
|
|
# bugfix for https://github.com/mitmproxy/mitmproxy/issues/4576
|
||
|
|
if data.get("response", None) and data["response"]["timestamp_start"] is None:
|
||
|
|
data["response"]["timestamp_start"] = data["request"]["timestamp_end"]
|
||
|
|
data["response"]["timestamp_end"] = data["request"]["timestamp_end"] + 1
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_14_15(data):
|
||
|
|
data["version"] = 15
|
||
|
|
if data.get("websocket", None):
|
||
|
|
# Add "injected" attribute.
|
||
|
|
data["websocket"]["messages"] = [
|
||
|
|
msg + [False] for msg in data["websocket"]["messages"]
|
||
|
|
]
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_15_16(data):
|
||
|
|
data["version"] = 16
|
||
|
|
data["timestamp_created"] = data.get("request", data["client_conn"])[
|
||
|
|
"timestamp_start"
|
||
|
|
]
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_16_17(data):
|
||
|
|
data["version"] = 17
|
||
|
|
data.pop("mode", None)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_17_18(data):
|
||
|
|
data["version"] = 18
|
||
|
|
data["client_conn"]["proxy_mode"] = "regular"
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_18_19(data):
|
||
|
|
data["version"] = 19
|
||
|
|
data["client_conn"]["peername"] = data["client_conn"].pop("address", None)
|
||
|
|
if data["client_conn"].get("timestamp_start") is None:
|
||
|
|
data["client_conn"]["timestamp_start"] = 0.0
|
||
|
|
data["client_conn"].pop("tls_extensions")
|
||
|
|
|
||
|
|
data["server_conn"]["peername"] = data["server_conn"].pop("ip_address", None)
|
||
|
|
data["server_conn"]["sockname"] = data["server_conn"].pop("source_address", None)
|
||
|
|
data["server_conn"]["via"] = data["server_conn"].pop("via2", None)
|
||
|
|
|
||
|
|
for conn in ["client_conn", "server_conn"]:
|
||
|
|
data[conn].pop("tls_established")
|
||
|
|
|
||
|
|
data[conn]["cipher"] = data[conn].pop("cipher_name", None)
|
||
|
|
data[conn].setdefault("transport_protocol", "tcp")
|
||
|
|
|
||
|
|
for name in ["peername", "sockname", "address"]:
|
||
|
|
if data[conn].get(name) and isinstance(data[conn][name][0], bytes):
|
||
|
|
data[conn][name][0] = data[conn][name][0].decode(
|
||
|
|
errors="backslashreplace"
|
||
|
|
)
|
||
|
|
|
||
|
|
if data["server_conn"]["sni"] is True:
|
||
|
|
data["server_conn"]["sni"] = data["server_conn"]["address"][0]
|
||
|
|
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_19_20(data):
|
||
|
|
data["version"] = 20
|
||
|
|
data["client_conn"].pop("state", None)
|
||
|
|
data["server_conn"].pop("state", None)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def convert_20_21(data):
|
||
|
|
data["version"] = 21
|
||
|
|
if data["client_conn"]["tls_version"] == "QUIC":
|
||
|
|
data["client_conn"]["tls_version"] = "QUICv1"
|
||
|
|
if data["server_conn"]["tls_version"] == "QUIC":
|
||
|
|
data["server_conn"]["tls_version"] = "QUICv1"
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
def _convert_dict_keys(o: Any) -> Any:
|
||
|
|
if isinstance(o, dict):
|
||
|
|
return {strutils.always_str(k): _convert_dict_keys(v) for k, v in o.items()}
|
||
|
|
else:
|
||
|
|
return o
|
||
|
|
|
||
|
|
|
||
|
|
def _convert_dict_vals(o: dict, values_to_convert: dict) -> dict:
|
||
|
|
for k, v in values_to_convert.items():
|
||
|
|
if not o or k not in o:
|
||
|
|
continue # pragma: no cover
|
||
|
|
if v is True:
|
||
|
|
o[k] = strutils.always_str(o[k])
|
||
|
|
else:
|
||
|
|
_convert_dict_vals(o[k], v)
|
||
|
|
return o
|
||
|
|
|
||
|
|
|
||
|
|
def convert_unicode(data: dict) -> dict:
|
||
|
|
"""
|
||
|
|
This method converts between Python 3 and Python 2 dumpfiles.
|
||
|
|
"""
|
||
|
|
data = _convert_dict_keys(data)
|
||
|
|
data = _convert_dict_vals(
|
||
|
|
data,
|
||
|
|
{
|
||
|
|
"type": True,
|
||
|
|
"id": True,
|
||
|
|
"request": {"first_line_format": True},
|
||
|
|
"error": {"msg": True},
|
||
|
|
},
|
||
|
|
)
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
converters = {
|
||
|
|
(0, 11): convert_011_012,
|
||
|
|
(0, 12): convert_012_013,
|
||
|
|
(0, 13): convert_013_014,
|
||
|
|
(0, 14): convert_014_015,
|
||
|
|
(0, 15): convert_015_016,
|
||
|
|
(0, 16): convert_016_017,
|
||
|
|
(0, 17): convert_017_018,
|
||
|
|
(0, 18): convert_018_019,
|
||
|
|
(0, 19): convert_019_100,
|
||
|
|
(1, 0): convert_100_200,
|
||
|
|
(2, 0): convert_200_300,
|
||
|
|
(3, 0): convert_300_4,
|
||
|
|
4: convert_4_5,
|
||
|
|
5: convert_5_6,
|
||
|
|
6: convert_6_7,
|
||
|
|
7: convert_7_8,
|
||
|
|
8: convert_8_9,
|
||
|
|
9: convert_9_10,
|
||
|
|
10: convert_10_11,
|
||
|
|
11: convert_11_12,
|
||
|
|
12: convert_12_13,
|
||
|
|
13: convert_13_14,
|
||
|
|
14: convert_14_15,
|
||
|
|
15: convert_15_16,
|
||
|
|
16: convert_16_17,
|
||
|
|
17: convert_17_18,
|
||
|
|
18: convert_18_19,
|
||
|
|
19: convert_19_20,
|
||
|
|
20: convert_20_21,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def migrate_flow(flow_data: dict[bytes | str, Any]) -> dict[bytes | str, Any]:
|
||
|
|
while True:
|
||
|
|
flow_version = flow_data.get(b"version", flow_data.get("version"))
|
||
|
|
|
||
|
|
# Historically, we used the mitmproxy minor version tuple as the flow format version.
|
||
|
|
if not isinstance(flow_version, int):
|
||
|
|
flow_version = tuple(flow_version)[:2] # type: ignore
|
||
|
|
|
||
|
|
if flow_version == version.FLOW_FORMAT_VERSION:
|
||
|
|
break
|
||
|
|
elif flow_version in converters:
|
||
|
|
flow_data = converters[flow_version](flow_data)
|
||
|
|
else:
|
||
|
|
should_upgrade = (
|
||
|
|
isinstance(flow_version, int)
|
||
|
|
and flow_version > version.FLOW_FORMAT_VERSION
|
||
|
|
)
|
||
|
|
raise ValueError(
|
||
|
|
"{} cannot read files with flow format version {}{}.".format(
|
||
|
|
version.MITMPROXY,
|
||
|
|
flow_version,
|
||
|
|
", please update mitmproxy" if should_upgrade else "",
|
||
|
|
)
|
||
|
|
)
|
||
|
|
return flow_data
|