""" This module handles the import of mitmproxy flows generated by old versions. The flow file version is decoupled from the mitmproxy release cycle (since v3.0.0dev) and versioning. Every change or migration gets a new flow file version number, this prevents issues with developer builds and snapshots. """ import copy import uuid from typing import Any from mitmproxy import version from mitmproxy.utils import strutils def convert_011_012(data): data[b"version"] = (0, 12) return data def convert_012_013(data): data[b"version"] = (0, 13) return data def convert_013_014(data): data[b"request"][b"first_line_format"] = data[b"request"].pop(b"form_in") data[b"request"][b"http_version"] = ( b"HTTP/" + ".".join(str(x) for x in data[b"request"].pop(b"httpversion")).encode() ) data[b"response"][b"http_version"] = ( b"HTTP/" + ".".join(str(x) for x in data[b"response"].pop(b"httpversion")).encode() ) data[b"response"][b"status_code"] = data[b"response"].pop(b"code") data[b"response"][b"body"] = data[b"response"].pop(b"content") data[b"server_conn"].pop(b"state") data[b"server_conn"][b"via"] = None data[b"version"] = (0, 14) return data def convert_014_015(data): data[b"version"] = (0, 15) return data def convert_015_016(data): for m in (b"request", b"response"): if b"body" in data[m]: data[m][b"content"] = data[m].pop(b"body") if b"msg" in data[b"response"]: data[b"response"][b"reason"] = data[b"response"].pop(b"msg") data[b"request"].pop(b"form_out", None) data[b"version"] = (0, 16) return data def convert_016_017(data): data[b"server_conn"][b"peer_address"] = None data[b"version"] = (0, 17) return data def convert_017_018(data): # convert_unicode needs to be called for every dual release and the first py3-only release data = convert_unicode(data) data["server_conn"]["ip_address"] = data["server_conn"].pop("peer_address", None) data["marked"] = False data["version"] = (0, 18) return data def convert_018_019(data): # convert_unicode needs to be called for every dual release and the first py3-only release data = convert_unicode(data) data["request"].pop("stickyauth", None) data["request"].pop("stickycookie", None) data["client_conn"]["sni"] = None data["client_conn"]["alpn_proto_negotiated"] = None data["client_conn"]["cipher_name"] = None data["client_conn"]["tls_version"] = None data["server_conn"]["alpn_proto_negotiated"] = None if data["server_conn"]["via"]: data["server_conn"]["via"]["alpn_proto_negotiated"] = None data["mode"] = "regular" data["metadata"] = dict() data["version"] = (0, 19) return data def convert_019_100(data): # convert_unicode needs to be called for every dual release and the first py3-only release data = convert_unicode(data) data["version"] = (1, 0, 0) return data def convert_100_200(data): data["version"] = (2, 0, 0) data["client_conn"]["address"] = data["client_conn"]["address"]["address"] data["server_conn"]["address"] = data["server_conn"]["address"]["address"] data["server_conn"]["source_address"] = data["server_conn"]["source_address"][ "address" ] if data["server_conn"]["ip_address"]: data["server_conn"]["ip_address"] = data["server_conn"]["ip_address"]["address"] if data["server_conn"]["via"]: data["server_conn"]["via"]["address"] = data["server_conn"]["via"]["address"][ "address" ] data["server_conn"]["via"]["source_address"] = data["server_conn"]["via"][ "source_address" ]["address"] if data["server_conn"]["via"]["ip_address"]: data["server_conn"]["via"]["ip_address"] = data["server_conn"]["via"][ "ip_address" ]["address"] return data def convert_200_300(data): data["version"] = (3, 0, 0) data["client_conn"]["mitmcert"] = None data["server_conn"]["tls_version"] = None if data["server_conn"]["via"]: data["server_conn"]["via"]["tls_version"] = None return data def convert_300_4(data): data["version"] = 4 # This is an empty migration to transition to the new versioning scheme. return data client_connections: dict[tuple[str, ...], str] = {} server_connections: dict[tuple[str, ...], str] = {} def convert_4_5(data): data["version"] = 5 client_conn_key = ( data["client_conn"]["timestamp_start"], *data["client_conn"]["address"], ) server_conn_key = ( data["server_conn"]["timestamp_start"], *data["server_conn"]["source_address"], ) data["client_conn"]["id"] = client_connections.setdefault( client_conn_key, str(uuid.uuid4()) ) data["server_conn"]["id"] = server_connections.setdefault( server_conn_key, str(uuid.uuid4()) ) if data["server_conn"]["via"]: server_conn_key = ( data["server_conn"]["via"]["timestamp_start"], *data["server_conn"]["via"]["source_address"], ) data["server_conn"]["via"]["id"] = server_connections.setdefault( server_conn_key, str(uuid.uuid4()) ) return data def convert_5_6(data): data["version"] = 6 data["client_conn"]["tls_established"] = data["client_conn"].pop("ssl_established") data["client_conn"]["timestamp_tls_setup"] = data["client_conn"].pop( "timestamp_ssl_setup" ) data["server_conn"]["tls_established"] = data["server_conn"].pop("ssl_established") data["server_conn"]["timestamp_tls_setup"] = data["server_conn"].pop( "timestamp_ssl_setup" ) if data["server_conn"]["via"]: data["server_conn"]["via"]["tls_established"] = data["server_conn"]["via"].pop( "ssl_established" ) data["server_conn"]["via"]["timestamp_tls_setup"] = data["server_conn"][ "via" ].pop("timestamp_ssl_setup") return data def convert_6_7(data): data["version"] = 7 data["client_conn"]["tls_extensions"] = None return data def convert_7_8(data): data["version"] = 8 if "request" in data and data["request"] is not None: data["request"]["trailers"] = None if "response" in data and data["response"] is not None: data["response"]["trailers"] = None return data def convert_8_9(data): data["version"] = 9 is_request_replay = False if "request" in data: data["request"].pop("first_line_format") data["request"]["authority"] = b"" is_request_replay = data["request"].pop("is_replay", False) is_response_replay = False if "response" in data and data["response"] is not None: is_response_replay = data["response"].pop("is_replay", False) if is_request_replay: # pragma: no cover data["is_replay"] = "request" elif is_response_replay: # pragma: no cover data["is_replay"] = "response" else: data["is_replay"] = None return data def convert_9_10(data): data["version"] = 10 def conv_conn(conn): conn["state"] = 0 conn["error"] = None conn["tls"] = conn["tls_established"] alpn = conn["alpn_proto_negotiated"] conn["alpn_offers"] = [alpn] if alpn else None cipher = conn["cipher_name"] conn["cipher_list"] = [cipher] if cipher else None def conv_cconn(conn): conn["sockname"] = ("", 0) cc = conn.pop("clientcert", None) conn["certificate_list"] = [cc] if cc else [] conv_conn(conn) def conv_sconn(conn): crt = conn.pop("cert", None) conn["certificate_list"] = [crt] if crt else [] conn["cipher_name"] = None conn["via2"] = None conv_conn(conn) conv_cconn(data["client_conn"]) conv_sconn(data["server_conn"]) if data["server_conn"]["via"]: conv_sconn(data["server_conn"]["via"]) return data def convert_10_11(data): data["version"] = 11 def conv_conn(conn): conn["sni"] = strutils.always_str(conn["sni"], "ascii", "backslashreplace") conn["alpn"] = conn.pop("alpn_proto_negotiated") conn["alpn_offers"] = conn["alpn_offers"] or [] conn["cipher_list"] = conn["cipher_list"] or [] conv_conn(data["client_conn"]) conv_conn(data["server_conn"]) if data["server_conn"]["via"]: conv_conn(data["server_conn"]["via"]) return data _websocket_handshakes = {} def convert_11_12(data): data["version"] = 12 if "websocket" in data["metadata"]: _websocket_handshakes[data["id"]] = copy.deepcopy(data) if "websocket_handshake" in data["metadata"]: ws_flow = data try: data = _websocket_handshakes.pop(data["metadata"]["websocket_handshake"]) except KeyError: # The handshake flow is missing, which should never really happen. We make up a dummy. data = { "client_conn": data["client_conn"], "error": data["error"], "id": data["id"], "intercepted": data["intercepted"], "is_replay": data["is_replay"], "marked": data["marked"], "metadata": {}, "mode": "transparent", "request": { "authority": b"", "content": None, "headers": [], "host": b"unknown", "http_version": b"HTTP/1.1", "method": b"GET", "path": b"/", "port": 80, "scheme": b"http", "timestamp_end": 0, "timestamp_start": 0, "trailers": None, }, "response": None, "server_conn": data["server_conn"], "type": "http", "version": 12, } data["metadata"]["duplicated"] = ( "This WebSocket flow has been migrated from an old file format version " "and may appear duplicated." ) data["websocket"] = { "messages": ws_flow["messages"], "closed_by_client": ws_flow["close_sender"] == "client", "close_code": ws_flow["close_code"], "close_reason": ws_flow["close_reason"], "timestamp_end": data.get("server_conn", {}).get("timestamp_end", None), } else: data["websocket"] = None return data def convert_12_13(data): data["version"] = 13 if data["marked"]: data["marked"] = ":default:" else: data["marked"] = "" return data def convert_13_14(data): data["version"] = 14 data["comment"] = "" # bugfix for https://github.com/mitmproxy/mitmproxy/issues/4576 if data.get("response", None) and data["response"]["timestamp_start"] is None: data["response"]["timestamp_start"] = data["request"]["timestamp_end"] data["response"]["timestamp_end"] = data["request"]["timestamp_end"] + 1 return data def convert_14_15(data): data["version"] = 15 if data.get("websocket", None): # Add "injected" attribute. data["websocket"]["messages"] = [ msg + [False] for msg in data["websocket"]["messages"] ] return data def convert_15_16(data): data["version"] = 16 data["timestamp_created"] = data.get("request", data["client_conn"])[ "timestamp_start" ] return data def convert_16_17(data): data["version"] = 17 data.pop("mode", None) return data def convert_17_18(data): data["version"] = 18 data["client_conn"]["proxy_mode"] = "regular" return data def convert_18_19(data): data["version"] = 19 data["client_conn"]["peername"] = data["client_conn"].pop("address", None) if data["client_conn"].get("timestamp_start") is None: data["client_conn"]["timestamp_start"] = 0.0 data["client_conn"].pop("tls_extensions") data["server_conn"]["peername"] = data["server_conn"].pop("ip_address", None) data["server_conn"]["sockname"] = data["server_conn"].pop("source_address", None) data["server_conn"]["via"] = data["server_conn"].pop("via2", None) for conn in ["client_conn", "server_conn"]: data[conn].pop("tls_established") data[conn]["cipher"] = data[conn].pop("cipher_name", None) data[conn].setdefault("transport_protocol", "tcp") for name in ["peername", "sockname", "address"]: if data[conn].get(name) and isinstance(data[conn][name][0], bytes): data[conn][name][0] = data[conn][name][0].decode( errors="backslashreplace" ) if data["server_conn"]["sni"] is True: data["server_conn"]["sni"] = data["server_conn"]["address"][0] return data def convert_19_20(data): data["version"] = 20 data["client_conn"].pop("state", None) data["server_conn"].pop("state", None) return data def convert_20_21(data): data["version"] = 21 if data["client_conn"]["tls_version"] == "QUIC": data["client_conn"]["tls_version"] = "QUICv1" if data["server_conn"]["tls_version"] == "QUIC": data["server_conn"]["tls_version"] = "QUICv1" return data def _convert_dict_keys(o: Any) -> Any: if isinstance(o, dict): return {strutils.always_str(k): _convert_dict_keys(v) for k, v in o.items()} else: return o def _convert_dict_vals(o: dict, values_to_convert: dict) -> dict: for k, v in values_to_convert.items(): if not o or k not in o: continue # pragma: no cover if v is True: o[k] = strutils.always_str(o[k]) else: _convert_dict_vals(o[k], v) return o def convert_unicode(data: dict) -> dict: """ This method converts between Python 3 and Python 2 dumpfiles. """ data = _convert_dict_keys(data) data = _convert_dict_vals( data, { "type": True, "id": True, "request": {"first_line_format": True}, "error": {"msg": True}, }, ) return data converters = { (0, 11): convert_011_012, (0, 12): convert_012_013, (0, 13): convert_013_014, (0, 14): convert_014_015, (0, 15): convert_015_016, (0, 16): convert_016_017, (0, 17): convert_017_018, (0, 18): convert_018_019, (0, 19): convert_019_100, (1, 0): convert_100_200, (2, 0): convert_200_300, (3, 0): convert_300_4, 4: convert_4_5, 5: convert_5_6, 6: convert_6_7, 7: convert_7_8, 8: convert_8_9, 9: convert_9_10, 10: convert_10_11, 11: convert_11_12, 12: convert_12_13, 13: convert_13_14, 14: convert_14_15, 15: convert_15_16, 16: convert_16_17, 17: convert_17_18, 18: convert_18_19, 19: convert_19_20, 20: convert_20_21, } def migrate_flow(flow_data: dict[bytes | str, Any]) -> dict[bytes | str, Any]: while True: flow_version = flow_data.get(b"version", flow_data.get("version")) # Historically, we used the mitmproxy minor version tuple as the flow format version. if not isinstance(flow_version, int): flow_version = tuple(flow_version)[:2] # type: ignore if flow_version == version.FLOW_FORMAT_VERSION: break elif flow_version in converters: flow_data = converters[flow_version](flow_data) else: should_upgrade = ( isinstance(flow_version, int) and flow_version > version.FLOW_FORMAT_VERSION ) raise ValueError( "{} cannot read files with flow format version {}{}.".format( version.MITMPROXY, flow_version, ", please update mitmproxy" if should_upgrade else "", ) ) return flow_data