import re import json from urllib.request import Request, urlopen def _split_curl_blocks(text): """按出现的 `curl ` 关键字切分文本为多个命令块""" blocks = [] indices = [m.start() for m in re.finditer(r"\bcurl\s", text)] if not indices: return blocks for i, start in enumerate(indices): end = indices[i + 1] if i + 1 < len(indices) else len(text) blocks.append(text[start:end]) return blocks def _parse_block(block): """从单个 curl 命令块中解析 URL 与头部 返回:`{'url': str, 'headers': dict}`,若无法解析 URL 返回 None """ url_m = re.search(r"curl\s+['\"](.*?)['\"]", block, re.S) if not url_m: return None url = url_m.group(1) headers = {} for hm in re.finditer(r"-H\s+['\"]([^:]+):\s*(.*?)['\"]", block): k = hm.group(1).strip() v = hm.group(2).strip() headers[k.lower()] = v cm = re.search(r"-b\s+['\"](.*?)['\"]", block, re.S) if cm: headers['cookie'] = cm.group(1) return {'url': url, 'headers': headers} def parse_curl_file(file_path): """读取 curl 文本文件并解析为请求描述列表 参数:`file_path` 文件路径 返回:列表,每项包含 `url` 与 `headers` """ with open(file_path, 'r', encoding='utf-8') as f: text = f.read() blocks = _split_curl_blocks(text) result = [] for b in blocks: parsed = _parse_block(b) if parsed: result.append(parsed) return result def fetch_from_curl(file_path, index=0, timeout=30): """按索引选取解析出的请求并发起 GET 参数:`index` 为第几个 curl 块;`timeout` 请求超时秒数 返回:尝试解析为 JSON,失败则返回原始 bytes """ reqs = parse_curl_file(file_path) if not reqs or index < 0 or index >= len(reqs): return None item = reqs[index] req = Request(item['url'], headers=item['headers'], method='GET') with urlopen(req, timeout=timeout) as resp: data = resp.read() try: return json.loads(data.decode('utf-8', errors='ignore')) except Exception: return data """curl 文本解析与请求发送工具 职责: - 将包含多个 curl 命令的文本切分为块 - 从每个块解析 URL 与请求头(含 Cookie) - 基于解析结果发起 GET 请求并尝试返回 JSON """