Files
ai_wht_wechat/backend/test_cookie_format_fix.py
2026-01-06 19:36:42 +08:00

314 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
测试Cookie格式处理修复
验证scheduler.py中的_format_cookies方法能正确处理各种Cookie格式
"""
import json
from typing import List, Dict
def _format_cookies(cookies) -> List[Dict]:
"""
格式化Cookie只处理非标准格式的Cookie
对于Playwright原生格式的Cookie直接返回不做任何修改
这是scheduler.py中_format_cookies方法的副本用于独立测试
Args:
cookies: Cookie数据支持list[dict]或dict格式
Returns:
格式化后的Cookie列表
"""
# 如果是字典格式(键值对),转换为列表格式
if isinstance(cookies, dict):
cookies = [
{
"name": name,
"value": str(value) if not isinstance(value, str) else value,
"domain": ".xiaohongshu.com",
"path": "/"
}
for name, value in cookies.items()
]
# 验证是否为列表
if not isinstance(cookies, list):
raise ValueError(f"Cookie必须是列表或字典格式当前类型: {type(cookies).__name__}")
# 检查是否是Playwright原生格式包含name和value字段
if cookies and isinstance(cookies[0], dict) and 'name' in cookies[0] and 'value' in cookies[0]:
# 已经是Playwright格式直接返回不做任何修改
return cookies
# 其他格式,进行基础验证
formatted_cookies = []
for cookie in cookies:
if not isinstance(cookie, dict):
raise ValueError(f"Cookie元素必须是字典格式当前类型: {type(cookie).__name__}")
# 确保有基本字段
if 'domain' not in cookie and 'url' not in cookie:
cookie = cookie.copy()
cookie['domain'] = '.xiaohongshu.com'
if 'path' not in cookie and 'url' not in cookie:
if 'domain' in cookie or 'url' not in cookie:
cookie = cookie.copy() if cookie is cookies[cookies.index(cookie)] else cookie
cookie['path'] = '/'
formatted_cookies.append(cookie)
return formatted_cookies
def test_format_cookies():
"""测试_format_cookies方法"""
print("="*60)
print("测试 Cookie 格式处理")
print("="*60)
# 测试1: 字典格式(键值对)
print("\n测试 1: 字典格式(键值对)")
cookies_dict = {
"a1": "xxx",
"webId": "yyy",
"web_session": "zzz"
}
try:
result = _format_cookies(cookies_dict)
print(f"✅ 成功处理字典格式")
print(f" 输入: {type(cookies_dict).__name__} with {len(cookies_dict)} items")
print(f" 输出: {type(result).__name__} with {len(result)} items")
print(f" 第一个Cookie: {result[0]}")
assert isinstance(result, list)
assert len(result) == 3
assert all('name' in c and 'value' in c and 'domain' in c for c in result)
except Exception as e:
print(f"❌ 失败: {str(e)}")
# 测试2: 列表格式(完整格式已有domain和path)
print("\n测试 2: 列表格式(完整格式)")
cookies_list_full = [
{
"name": "a1",
"value": "xxx",
"domain": ".xiaohongshu.com",
"path": "/",
"expires": -1,
"httpOnly": False,
"secure": False,
"sameSite": "Lax"
}
]
try:
result = _format_cookies(cookies_list_full)
print(f"✅ 成功处理完整列表格式")
print(f" 输入: {type(cookies_list_full).__name__} with {len(cookies_list_full)} items")
print(f" 输出: {type(result).__name__} with {len(result)} items")
# 验证Playwright原生格式被完整保留
print(f" 保留的字段: {list(result[0].keys())}")
assert result == cookies_list_full, "Playwright原生格式应该被完整保留不做任何修改"
assert 'expires' in result[0], "expires字段应该被保留"
assert result[0]['expires'] == -1, "expires=-1应该被保留"
assert isinstance(result, list)
assert len(result) == 1
except Exception as e:
print(f"❌ 失败: {str(e)}")
# 测试3: 非Playwright格式缺少name字段需要补充domain和path
print("\n测试 3: 非Playwright格式缺少字段需要补充")
cookies_list_partial = [
{
"cookie_name": "a1", # 没有name字段不是Playwright格式
"cookie_value": "xxx"
}
]
try:
result = _format_cookies(cookies_list_partial)
print(f"✅ 成功处理非Playwright格式")
print(f" 输入: {type(cookies_list_partial).__name__} with {len(cookies_list_partial)} items")
print(f" 输出: {type(result).__name__} with {len(result)} items")
print(f" 自动添加的字段: domain={result[0].get('domain')}, path={result[0].get('path')}")
assert isinstance(result, list)
# 应该自动添加domain和path
assert result[0]['domain'] == '.xiaohongshu.com'
assert result[0]['path'] == '/'
except Exception as e:
print(f"❌ 失败: {str(e)}")
# 测试4: 双重JSON编码(模拟数据库存储场景)
print("\n测试 4: 双重JSON编码字符串")
cookies_dict = {"a1": "xxx", "webId": "yyy"}
# 第一次JSON编码
cookies_json_1 = json.dumps(cookies_dict)
# 第二次JSON编码
cookies_json_2 = json.dumps(cookies_json_1)
print(f" 原始字典: {cookies_dict}")
print(f" 第一次编码: {cookies_json_1}")
print(f" 第二次编码: {cookies_json_2}")
# 模拟从数据库读取并解析
try:
# 第一次解析
cookies_parsed_1 = json.loads(cookies_json_2)
print(f" 第一次解析后类型: {type(cookies_parsed_1).__name__}")
# 处理双重编码
if isinstance(cookies_parsed_1, str):
cookies_parsed_2 = json.loads(cookies_parsed_1)
print(f" 第二次解析后类型: {type(cookies_parsed_2).__name__}")
cookies = cookies_parsed_2
else:
cookies = cookies_parsed_1
# 格式化
result = _format_cookies(cookies)
print(f"✅ 成功处理双重JSON编码")
print(f" 最终输出: {type(result).__name__} with {len(result)} items")
assert isinstance(result, list)
except Exception as e:
print(f"❌ 失败: {str(e)}")
# 测试5: 错误格式 - 字符串(不是JSON)
print("\n测试 5: 错误格式 - 普通字符串")
try:
result = _format_cookies("invalid_string")
print(f"❌ 应该抛出异常但没有")
except ValueError as e:
print(f"✅ 正确抛出ValueError异常")
print(f" 错误信息: {str(e)}")
except Exception as e:
print(f"❌ 抛出了非预期的异常: {str(e)}")
# 测试6: 错误格式 - 列表中包含非字典元素
print("\n测试 6: 错误格式 - 列表中包含非字典元素")
try:
result = _format_cookies(["string_item", 123])
print(f"❌ 应该抛出异常但没有")
except ValueError as e:
print(f"✅ 正确抛出ValueError异常")
print(f" 错误信息: {str(e)}")
except Exception as e:
print(f"❌ 抛出了非预期的异常: {str(e)}")
# 测试7: Playwright原生格式中value为对象保持原样
print("\n测试 7: Playwright原生格式中value为对象应保持原样")
cookies_with_object_value = [
{
"name": "test_cookie",
"value": {"nested": "object"}, # value是对象
"domain": ".xiaohongshu.com",
"path": "/"
}
]
try:
result = _format_cookies(cookies_with_object_value)
print(f"✅ Playwright原生格式被完整保留")
print(f" 输入value类型: {type(cookies_with_object_value[0]['value']).__name__}")
print(f" 输出value类型: {type(result[0]['value']).__name__}")
print(f" 输出value内容: {result[0]['value']}")
# Playwright原生格式不做任何修改包括uvalue
assert result == cookies_with_object_value, "Playwright原生格式应完整保留"
except Exception as e:
print(f"❌ 失败: {str(e)}")
# 测试8: 字典格式中value为数字
print("\n测试 8: 字典格式中value为数字应自动转换为字符串")
cookies_dict_with_number = {
"a1": "xxx",
"user_id": 12345, # value是数字
"is_login": True # value是布尔值
}
try:
result = _format_cookies(cookies_dict_with_number)
print(f"✅ 成功处理数字/布尔value")
print(f" 输入: {cookies_dict_with_number}")
print(f" user_id value类型: {type(result[1]['value']).__name__}, 值: {result[1]['value']}")
print(f" is_login value类型: {type(result[2]['value']).__name__}, 值: {result[2]['value']}")
# 验证不再包含expires等字段
print(f" 字段: {list(result[0].keys())}")
assert all(isinstance(c['value'], str) for c in result), "所有value应该都是字符串类型"
assert 'expires' not in result[0], "不应该包含expires字段"
except Exception as e:
print(f"❌ 失败: {str(e)}")
# 测试9: Playwright原生格式中expires=-1应被保留
print("\n测试 9: Playwright原生格式中expires=-1应被保留")
cookies_with_invalid_expires = [
{
"name": "test_cookie",
"value": "test_value",
"domain": ".xiaohongshu.com",
"path": "/",
"expires": -1 # Playwright原生格式
}
]
try:
result = _format_cookies(cookies_with_invalid_expires)
print(f"✅ Playwright原生格式被完整保留")
print(f" 原始字段: {list(cookies_with_invalid_expires[0].keys())}")
print(f" 处理后字段: {list(result[0].keys())}")
assert result == cookies_with_invalid_expires, "Playwright原生格式应被完整保留"
assert 'expires' in result[0] and result[0]['expires'] == -1, "expires=-1应该被保留"
except Exception as e:
print(f"❌ 失败: {str(e)}")
# 测试10: Playwright原生格式中expires为浮点数应被保留
print("\n测试 10: Playwright原生格式中expires为浮点数应被保留")
cookies_with_float_expires = [
{
"name": "test_cookie",
"value": "test_value",
"domain": ".xiaohongshu.com",
"path": "/",
"expires": 1797066497.112584 # Playwright原生格式常常有浮点数
}
]
try:
result = _format_cookies(cookies_with_float_expires)
print(f"✅ Playwright原生格式被完整保留")
print(f" 原始expires: {cookies_with_float_expires[0]['expires']} (类型: {type(cookies_with_float_expires[0]['expires']).__name__})")
print(f" 处理后expires: {result[0]['expires']} (类型: {type(result[0]['expires']).__name__})")
assert result == cookies_with_float_expires, "Playwright原生格式应被完整保留"
assert isinstance(result[0]['expires'], float), "expires浮点数应该被保留"
except Exception as e:
print(f"❌ 失败: {str(e)}")
# 测试11: Playwright原生格式中sameSite大小写应被保留
print("\n测试 11: Playwright原生格式中sameSite应被完整保留")
cookies_with_samesite = [
{
"name": "test_cookie1",
"value": "test_value1",
"domain": ".xiaohongshu.com",
"path": "/",
"sameSite": "Lax" # Playwright原生格式
},
{
"name": "test_cookie2",
"value": "test_value2",
"domain": ".xiaohongshu.com",
"path": "/",
"sameSite": "Strict"
}
]
try:
result = _format_cookies(cookies_with_samesite)
print(f"✅ Playwright原生格式被完整保留")
print(f" cookie1 sameSite: {result[0]['sameSite']}")
print(f" cookie2 sameSite: {result[1]['sameSite']}")
assert result == cookies_with_samesite, "Playwright原生格式应被完整保留"
assert result[0]['sameSite'] == 'Lax'
assert result[1]['sameSite'] == 'Strict'
except Exception as e:
print(f"❌ 失败: {str(e)}")
print("\n" + "="*60)
print("测试完成")
print("="*60)
if __name__ == "__main__":
test_format_cookies()