313 lines
11 KiB
Python
313 lines
11 KiB
Python
"""Write flow objects to a HAR file"""
|
|
|
|
import base64
|
|
import json
|
|
import logging
|
|
import zlib
|
|
from collections.abc import Sequence
|
|
from datetime import datetime
|
|
from datetime import timezone
|
|
from typing import Any
|
|
|
|
from mitmproxy import command
|
|
from mitmproxy import ctx
|
|
from mitmproxy import exceptions
|
|
from mitmproxy import flow
|
|
from mitmproxy import flowfilter
|
|
from mitmproxy import http
|
|
from mitmproxy import types
|
|
from mitmproxy import version
|
|
from mitmproxy.addonmanager import Loader
|
|
from mitmproxy.connection import Server
|
|
from mitmproxy.coretypes.multidict import _MultiDict
|
|
from mitmproxy.log import ALERT
|
|
from mitmproxy.utils import human
|
|
from mitmproxy.utils import strutils
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SaveHar:
|
|
def __init__(self) -> None:
|
|
self.flows: list[flow.Flow] = []
|
|
self.filt: flowfilter.TFilter | None = None
|
|
|
|
@command.command("save.har")
|
|
def export_har(self, flows: Sequence[flow.Flow], path: types.Path) -> None:
|
|
"""Export flows to an HAR (HTTP Archive) file."""
|
|
|
|
har = json.dumps(self.make_har(flows), indent=4).encode()
|
|
|
|
if path.endswith(".zhar"):
|
|
har = zlib.compress(har, 9)
|
|
|
|
with open(path, "wb") as f:
|
|
f.write(har)
|
|
|
|
logging.log(ALERT, f"HAR file saved ({human.pretty_size(len(har))} bytes).")
|
|
|
|
def make_har(self, flows: Sequence[flow.Flow]) -> dict:
|
|
entries = []
|
|
skipped = 0
|
|
# A list of server seen till now is maintained so we can avoid
|
|
# using 'connect' time for entries that use an existing connection.
|
|
servers_seen: set[Server] = set()
|
|
|
|
for f in flows:
|
|
if isinstance(f, http.HTTPFlow):
|
|
entries.append(self.flow_entry(f, servers_seen))
|
|
else:
|
|
skipped += 1
|
|
|
|
if skipped > 0:
|
|
logger.info(f"Skipped {skipped} flows that weren't HTTP flows.")
|
|
|
|
return {
|
|
"log": {
|
|
"version": "1.2",
|
|
"creator": {
|
|
"name": "mitmproxy",
|
|
"version": version.VERSION,
|
|
"comment": "",
|
|
},
|
|
"pages": [],
|
|
"entries": entries,
|
|
}
|
|
}
|
|
|
|
def load(self, loader: Loader):
|
|
loader.add_option(
|
|
"hardump",
|
|
str,
|
|
"",
|
|
"""
|
|
Save a HAR file with all flows on exit.
|
|
You may select particular flows by setting save_stream_filter.
|
|
For mitmdump, enabling this option will mean that flows are kept in memory.
|
|
""",
|
|
)
|
|
|
|
def configure(self, updated):
|
|
if "save_stream_filter" in updated:
|
|
if ctx.options.save_stream_filter:
|
|
try:
|
|
self.filt = flowfilter.parse(ctx.options.save_stream_filter)
|
|
except ValueError as e:
|
|
raise exceptions.OptionsError(str(e)) from e
|
|
else:
|
|
self.filt = None
|
|
|
|
if "hardump" in updated:
|
|
if not ctx.options.hardump:
|
|
self.flows = []
|
|
|
|
def response(self, flow: http.HTTPFlow) -> None:
|
|
# websocket flows will receive a websocket_end,
|
|
# we don't want to persist them here already
|
|
if flow.websocket is None:
|
|
self._save_flow(flow)
|
|
|
|
def error(self, flow: http.HTTPFlow) -> None:
|
|
self.response(flow)
|
|
|
|
def websocket_end(self, flow: http.HTTPFlow) -> None:
|
|
self._save_flow(flow)
|
|
|
|
def _save_flow(self, flow: http.HTTPFlow) -> None:
|
|
if ctx.options.hardump:
|
|
flow_matches = self.filt is None or self.filt(flow)
|
|
if flow_matches:
|
|
self.flows.append(flow)
|
|
|
|
def done(self):
|
|
if ctx.options.hardump:
|
|
if ctx.options.hardump == "-":
|
|
har = self.make_har(self.flows)
|
|
print(json.dumps(har, indent=4))
|
|
else:
|
|
self.export_har(self.flows, ctx.options.hardump)
|
|
|
|
def flow_entry(self, flow: http.HTTPFlow, servers_seen: set[Server]) -> dict:
|
|
"""Creates HAR entry from flow"""
|
|
|
|
if flow.server_conn in servers_seen:
|
|
connect_time = -1.0
|
|
ssl_time = -1.0
|
|
elif flow.server_conn.timestamp_tcp_setup:
|
|
assert flow.server_conn.timestamp_start
|
|
connect_time = 1000 * (
|
|
flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start
|
|
)
|
|
|
|
if flow.server_conn.timestamp_tls_setup:
|
|
ssl_time = 1000 * (
|
|
flow.server_conn.timestamp_tls_setup
|
|
- flow.server_conn.timestamp_tcp_setup
|
|
)
|
|
else:
|
|
ssl_time = -1.0
|
|
servers_seen.add(flow.server_conn)
|
|
else:
|
|
connect_time = -1.0
|
|
ssl_time = -1.0
|
|
|
|
if flow.request.timestamp_end:
|
|
send = 1000 * (flow.request.timestamp_end - flow.request.timestamp_start)
|
|
else:
|
|
send = 0
|
|
|
|
if flow.response and flow.request.timestamp_end:
|
|
wait = 1000 * (flow.response.timestamp_start - flow.request.timestamp_end)
|
|
else:
|
|
wait = 0
|
|
|
|
if flow.response and flow.response.timestamp_end:
|
|
receive = 1000 * (
|
|
flow.response.timestamp_end - flow.response.timestamp_start
|
|
)
|
|
|
|
else:
|
|
receive = 0
|
|
|
|
timings: dict[str, float | None] = {
|
|
"connect": connect_time,
|
|
"ssl": ssl_time,
|
|
"send": send,
|
|
"receive": receive,
|
|
"wait": wait,
|
|
}
|
|
|
|
if flow.response:
|
|
try:
|
|
content = flow.response.content
|
|
except ValueError:
|
|
content = flow.response.raw_content
|
|
response_body_size = (
|
|
len(flow.response.raw_content) if flow.response.raw_content else 0
|
|
)
|
|
response_body_decoded_size = len(content) if content else 0
|
|
response_body_compression = response_body_decoded_size - response_body_size
|
|
response = {
|
|
"status": flow.response.status_code,
|
|
"statusText": flow.response.reason,
|
|
"httpVersion": flow.response.http_version,
|
|
"cookies": self.format_response_cookies(flow.response),
|
|
"headers": self.format_multidict(flow.response.headers),
|
|
"content": {
|
|
"size": response_body_size,
|
|
"compression": response_body_compression,
|
|
"mimeType": flow.response.headers.get("Content-Type", ""),
|
|
},
|
|
"redirectURL": flow.response.headers.get("Location", ""),
|
|
"headersSize": len(str(flow.response.headers)),
|
|
"bodySize": response_body_size,
|
|
}
|
|
if content and strutils.is_mostly_bin(content):
|
|
response["content"]["text"] = base64.b64encode(content).decode()
|
|
response["content"]["encoding"] = "base64"
|
|
else:
|
|
text_content = flow.response.get_text(strict=False)
|
|
if text_content is None:
|
|
response["content"]["text"] = ""
|
|
else:
|
|
response["content"]["text"] = text_content
|
|
else:
|
|
response = {
|
|
"status": 0,
|
|
"statusText": "",
|
|
"httpVersion": "",
|
|
"headers": [],
|
|
"cookies": [],
|
|
"content": {},
|
|
"redirectURL": "",
|
|
"headersSize": -1,
|
|
"bodySize": -1,
|
|
"_transferSize": 0,
|
|
"_error": None,
|
|
}
|
|
if flow.error:
|
|
response["_error"] = flow.error.msg
|
|
|
|
if flow.request.method == "CONNECT":
|
|
url = f"https://{flow.request.pretty_url}/"
|
|
else:
|
|
url = flow.request.pretty_url
|
|
|
|
entry: dict[str, Any] = {
|
|
"startedDateTime": datetime.fromtimestamp(
|
|
flow.request.timestamp_start, timezone.utc
|
|
).isoformat(),
|
|
"time": sum(v for v in timings.values() if v is not None and v >= 0),
|
|
"request": {
|
|
"method": flow.request.method,
|
|
"url": url,
|
|
"httpVersion": flow.request.http_version,
|
|
"cookies": self.format_multidict(flow.request.cookies),
|
|
"headers": self.format_multidict(flow.request.headers),
|
|
"queryString": self.format_multidict(flow.request.query),
|
|
"headersSize": len(str(flow.request.headers)),
|
|
"bodySize": len(flow.request.raw_content)
|
|
if flow.request.raw_content
|
|
else 0,
|
|
},
|
|
"response": response,
|
|
"cache": {},
|
|
"timings": timings,
|
|
}
|
|
|
|
if flow.request.method in ["POST", "PUT", "PATCH"]:
|
|
params = self.format_multidict(flow.request.urlencoded_form)
|
|
entry["request"]["postData"] = {
|
|
"mimeType": flow.request.headers.get("Content-Type", ""),
|
|
"text": flow.request.get_text(strict=False),
|
|
"params": params,
|
|
}
|
|
|
|
if flow.server_conn.peername:
|
|
entry["serverIPAddress"] = str(flow.server_conn.peername[0])
|
|
|
|
websocket_messages = []
|
|
if flow.websocket:
|
|
for message in flow.websocket.messages:
|
|
if message.is_text:
|
|
data = message.text
|
|
else:
|
|
data = base64.b64encode(message.content).decode()
|
|
websocket_message = {
|
|
"type": "send" if message.from_client else "receive",
|
|
"time": message.timestamp,
|
|
"opcode": message.type.value,
|
|
"data": data,
|
|
}
|
|
websocket_messages.append(websocket_message)
|
|
|
|
entry["_resourceType"] = "websocket"
|
|
entry["_webSocketMessages"] = websocket_messages
|
|
return entry
|
|
|
|
def format_response_cookies(self, response: http.Response) -> list[dict]:
|
|
"""Formats the response's cookie header to list of cookies"""
|
|
cookie_list = response.cookies.items(multi=True)
|
|
rv = []
|
|
for name, (value, attrs) in cookie_list:
|
|
cookie = {
|
|
"name": name,
|
|
"value": value,
|
|
"path": attrs.get("path", "/"),
|
|
"domain": attrs.get("domain", ""),
|
|
"httpOnly": "httpOnly" in attrs,
|
|
"secure": "secure" in attrs,
|
|
}
|
|
# TODO: handle expires attribute here.
|
|
# This is not quite trivial because we need to parse random date formats.
|
|
# For now, we just ignore the attribute.
|
|
|
|
if "sameSite" in attrs:
|
|
cookie["sameSite"] = attrs["sameSite"]
|
|
|
|
rv.append(cookie)
|
|
return rv
|
|
|
|
def format_multidict(self, obj: _MultiDict[str, str]) -> list[dict]:
|
|
return [{"name": k, "value": v} for k, v in obj.items(multi=True)]
|