""" 测试Cookie格式处理修复 验证scheduler.py中的_format_cookies方法能正确处理各种Cookie格式 """ import json from typing import List, Dict def _format_cookies(cookies) -> List[Dict]: """ 格式化Cookie,只处理非标准格式的Cookie 对于Playwright原生格式的Cookie,直接返回,不做任何修改 这是scheduler.py中_format_cookies方法的副本,用于独立测试 Args: cookies: Cookie数据,支持list[dict]或dict格式 Returns: 格式化后的Cookie列表 """ # 如果是字典格式(键值对),转换为列表格式 if isinstance(cookies, dict): cookies = [ { "name": name, "value": str(value) if not isinstance(value, str) else value, "domain": ".xiaohongshu.com", "path": "/" } for name, value in cookies.items() ] # 验证是否为列表 if not isinstance(cookies, list): raise ValueError(f"Cookie必须是列表或字典格式,当前类型: {type(cookies).__name__}") # 检查是否是Playwright原生格式(包含name和value字段) if cookies and isinstance(cookies[0], dict) and 'name' in cookies[0] and 'value' in cookies[0]: # 已经是Playwright格式,直接返回,不做任何修改 return cookies # 其他格式,进行基础验证 formatted_cookies = [] for cookie in cookies: if not isinstance(cookie, dict): raise ValueError(f"Cookie元素必须是字典格式,当前类型: {type(cookie).__name__}") # 确保有基本字段 if 'domain' not in cookie and 'url' not in cookie: cookie = cookie.copy() cookie['domain'] = '.xiaohongshu.com' if 'path' not in cookie and 'url' not in cookie: if 'domain' in cookie or 'url' not in cookie: cookie = cookie.copy() if cookie is cookies[cookies.index(cookie)] else cookie cookie['path'] = '/' formatted_cookies.append(cookie) return formatted_cookies def test_format_cookies(): """测试_format_cookies方法""" print("="*60) print("测试 Cookie 格式处理") print("="*60) # 测试1: 字典格式(键值对) print("\n测试 1: 字典格式(键值对)") cookies_dict = { "a1": "xxx", "webId": "yyy", "web_session": "zzz" } try: result = _format_cookies(cookies_dict) print(f"✅ 成功处理字典格式") print(f" 输入: {type(cookies_dict).__name__} with {len(cookies_dict)} items") print(f" 输出: {type(result).__name__} with {len(result)} items") print(f" 第一个Cookie: {result[0]}") assert isinstance(result, list) assert len(result) == 3 assert all('name' in c and 'value' in c and 'domain' in c for c in result) except Exception as e: print(f"❌ 失败: {str(e)}") # 测试2: 列表格式(完整格式,已有domain和path) print("\n测试 2: 列表格式(完整格式)") cookies_list_full = [ { "name": "a1", "value": "xxx", "domain": ".xiaohongshu.com", "path": "/", "expires": -1, "httpOnly": False, "secure": False, "sameSite": "Lax" } ] try: result = _format_cookies(cookies_list_full) print(f"✅ 成功处理完整列表格式") print(f" 输入: {type(cookies_list_full).__name__} with {len(cookies_list_full)} items") print(f" 输出: {type(result).__name__} with {len(result)} items") # 验证Playwright原生格式被完整保留 print(f" 保留的字段: {list(result[0].keys())}") assert result == cookies_list_full, "Playwright原生格式应该被完整保留,不做任何修改" assert 'expires' in result[0], "expires字段应该被保留" assert result[0]['expires'] == -1, "expires=-1应该被保留" assert isinstance(result, list) assert len(result) == 1 except Exception as e: print(f"❌ 失败: {str(e)}") # 测试3: 非Playwright格式(缺少name字段,需要补充domain和path) print("\n测试 3: 非Playwright格式(缺少字段,需要补充)") cookies_list_partial = [ { "cookie_name": "a1", # 没有name字段,不是Playwright格式 "cookie_value": "xxx" } ] try: result = _format_cookies(cookies_list_partial) print(f"✅ 成功处理非Playwright格式") print(f" 输入: {type(cookies_list_partial).__name__} with {len(cookies_list_partial)} items") print(f" 输出: {type(result).__name__} with {len(result)} items") print(f" 自动添加的字段: domain={result[0].get('domain')}, path={result[0].get('path')}") assert isinstance(result, list) # 应该自动添加domain和path assert result[0]['domain'] == '.xiaohongshu.com' assert result[0]['path'] == '/' except Exception as e: print(f"❌ 失败: {str(e)}") # 测试4: 双重JSON编码(模拟数据库存储场景) print("\n测试 4: 双重JSON编码字符串") cookies_dict = {"a1": "xxx", "webId": "yyy"} # 第一次JSON编码 cookies_json_1 = json.dumps(cookies_dict) # 第二次JSON编码 cookies_json_2 = json.dumps(cookies_json_1) print(f" 原始字典: {cookies_dict}") print(f" 第一次编码: {cookies_json_1}") print(f" 第二次编码: {cookies_json_2}") # 模拟从数据库读取并解析 try: # 第一次解析 cookies_parsed_1 = json.loads(cookies_json_2) print(f" 第一次解析后类型: {type(cookies_parsed_1).__name__}") # 处理双重编码 if isinstance(cookies_parsed_1, str): cookies_parsed_2 = json.loads(cookies_parsed_1) print(f" 第二次解析后类型: {type(cookies_parsed_2).__name__}") cookies = cookies_parsed_2 else: cookies = cookies_parsed_1 # 格式化 result = _format_cookies(cookies) print(f"✅ 成功处理双重JSON编码") print(f" 最终输出: {type(result).__name__} with {len(result)} items") assert isinstance(result, list) except Exception as e: print(f"❌ 失败: {str(e)}") # 测试5: 错误格式 - 字符串(不是JSON) print("\n测试 5: 错误格式 - 普通字符串") try: result = _format_cookies("invalid_string") print(f"❌ 应该抛出异常但没有") except ValueError as e: print(f"✅ 正确抛出ValueError异常") print(f" 错误信息: {str(e)}") except Exception as e: print(f"❌ 抛出了非预期的异常: {str(e)}") # 测试6: 错误格式 - 列表中包含非字典元素 print("\n测试 6: 错误格式 - 列表中包含非字典元素") try: result = _format_cookies(["string_item", 123]) print(f"❌ 应该抛出异常但没有") except ValueError as e: print(f"✅ 正确抛出ValueError异常") print(f" 错误信息: {str(e)}") except Exception as e: print(f"❌ 抛出了非预期的异常: {str(e)}") # 测试7: Playwright原生格式中value为对象(保持原样) print("\n测试 7: Playwright原生格式中value为对象(应保持原样)") cookies_with_object_value = [ { "name": "test_cookie", "value": {"nested": "object"}, # value是对象 "domain": ".xiaohongshu.com", "path": "/" } ] try: result = _format_cookies(cookies_with_object_value) print(f"✅ Playwright原生格式被完整保留") print(f" 输入value类型: {type(cookies_with_object_value[0]['value']).__name__}") print(f" 输出value类型: {type(result[0]['value']).__name__}") print(f" 输出value内容: {result[0]['value']}") # Playwright原生格式不做任何修改,包括uvalue assert result == cookies_with_object_value, "Playwright原生格式应完整保留" except Exception as e: print(f"❌ 失败: {str(e)}") # 测试8: 字典格式中value为数字 print("\n测试 8: 字典格式中value为数字(应自动转换为字符串)") cookies_dict_with_number = { "a1": "xxx", "user_id": 12345, # value是数字 "is_login": True # value是布尔值 } try: result = _format_cookies(cookies_dict_with_number) print(f"✅ 成功处理数字/布尔value") print(f" 输入: {cookies_dict_with_number}") print(f" user_id value类型: {type(result[1]['value']).__name__}, 值: {result[1]['value']}") print(f" is_login value类型: {type(result[2]['value']).__name__}, 值: {result[2]['value']}") # 验证不再包含expires等字段 print(f" 字段: {list(result[0].keys())}") assert all(isinstance(c['value'], str) for c in result), "所有value应该都是字符串类型" assert 'expires' not in result[0], "不应该包含expires字段" except Exception as e: print(f"❌ 失败: {str(e)}") # 测试9: Playwright原生格式中expires=-1(应被保留) print("\n测试 9: Playwright原生格式中expires=-1(应被保留)") cookies_with_invalid_expires = [ { "name": "test_cookie", "value": "test_value", "domain": ".xiaohongshu.com", "path": "/", "expires": -1 # Playwright原生格式 } ] try: result = _format_cookies(cookies_with_invalid_expires) print(f"✅ Playwright原生格式被完整保留") print(f" 原始字段: {list(cookies_with_invalid_expires[0].keys())}") print(f" 处理后字段: {list(result[0].keys())}") assert result == cookies_with_invalid_expires, "Playwright原生格式应被完整保留" assert 'expires' in result[0] and result[0]['expires'] == -1, "expires=-1应该被保留" except Exception as e: print(f"❌ 失败: {str(e)}") # 测试10: Playwright原生格式中expires为浮点数(应被保留) print("\n测试 10: Playwright原生格式中expires为浮点数(应被保留)") cookies_with_float_expires = [ { "name": "test_cookie", "value": "test_value", "domain": ".xiaohongshu.com", "path": "/", "expires": 1797066497.112584 # Playwright原生格式常常有浮点数 } ] try: result = _format_cookies(cookies_with_float_expires) print(f"✅ Playwright原生格式被完整保留") print(f" 原始expires: {cookies_with_float_expires[0]['expires']} (类型: {type(cookies_with_float_expires[0]['expires']).__name__})") print(f" 处理后expires: {result[0]['expires']} (类型: {type(result[0]['expires']).__name__})") assert result == cookies_with_float_expires, "Playwright原生格式应被完整保留" assert isinstance(result[0]['expires'], float), "expires浮点数应该被保留" except Exception as e: print(f"❌ 失败: {str(e)}") # 测试11: Playwright原生格式中sameSite大小写(应被保留) print("\n测试 11: Playwright原生格式中sameSite(应被完整保留)") cookies_with_samesite = [ { "name": "test_cookie1", "value": "test_value1", "domain": ".xiaohongshu.com", "path": "/", "sameSite": "Lax" # Playwright原生格式 }, { "name": "test_cookie2", "value": "test_value2", "domain": ".xiaohongshu.com", "path": "/", "sameSite": "Strict" } ] try: result = _format_cookies(cookies_with_samesite) print(f"✅ Playwright原生格式被完整保留") print(f" cookie1 sameSite: {result[0]['sameSite']}") print(f" cookie2 sameSite: {result[1]['sameSite']}") assert result == cookies_with_samesite, "Playwright原生格式应被完整保留" assert result[0]['sameSite'] == 'Lax' assert result[1]['sameSite'] == 'Strict' except Exception as e: print(f"❌ 失败: {str(e)}") print("\n" + "="*60) print("测试完成") print("="*60) if __name__ == "__main__": test_format_cookies()