commit
This commit is contained in:
313
backend/test_cookie_format_fix.py
Normal file
313
backend/test_cookie_format_fix.py
Normal file
@@ -0,0 +1,313 @@
|
||||
"""
|
||||
测试Cookie格式处理修复
|
||||
验证scheduler.py中的_format_cookies方法能正确处理各种Cookie格式
|
||||
"""
|
||||
import json
|
||||
from typing import List, Dict
|
||||
|
||||
|
||||
def _format_cookies(cookies) -> List[Dict]:
|
||||
"""
|
||||
格式化Cookie,只处理非标准格式的Cookie
|
||||
对于Playwright原生格式的Cookie,直接返回,不做任何修改
|
||||
|
||||
这是scheduler.py中_format_cookies方法的副本,用于独立测试
|
||||
|
||||
Args:
|
||||
cookies: Cookie数据,支持list[dict]或dict格式
|
||||
|
||||
Returns:
|
||||
格式化后的Cookie列表
|
||||
"""
|
||||
# 如果是字典格式(键值对),转换为列表格式
|
||||
if isinstance(cookies, dict):
|
||||
cookies = [
|
||||
{
|
||||
"name": name,
|
||||
"value": str(value) if not isinstance(value, str) else value,
|
||||
"domain": ".xiaohongshu.com",
|
||||
"path": "/"
|
||||
}
|
||||
for name, value in cookies.items()
|
||||
]
|
||||
|
||||
# 验证是否为列表
|
||||
if not isinstance(cookies, list):
|
||||
raise ValueError(f"Cookie必须是列表或字典格式,当前类型: {type(cookies).__name__}")
|
||||
|
||||
# 检查是否是Playwright原生格式(包含name和value字段)
|
||||
if cookies and isinstance(cookies[0], dict) and 'name' in cookies[0] and 'value' in cookies[0]:
|
||||
# 已经是Playwright格式,直接返回,不做任何修改
|
||||
return cookies
|
||||
|
||||
# 其他格式,进行基础验证
|
||||
formatted_cookies = []
|
||||
for cookie in cookies:
|
||||
if not isinstance(cookie, dict):
|
||||
raise ValueError(f"Cookie元素必须是字典格式,当前类型: {type(cookie).__name__}")
|
||||
|
||||
# 确保有基本字段
|
||||
if 'domain' not in cookie and 'url' not in cookie:
|
||||
cookie = cookie.copy()
|
||||
cookie['domain'] = '.xiaohongshu.com'
|
||||
if 'path' not in cookie and 'url' not in cookie:
|
||||
if 'domain' in cookie or 'url' not in cookie:
|
||||
cookie = cookie.copy() if cookie is cookies[cookies.index(cookie)] else cookie
|
||||
cookie['path'] = '/'
|
||||
|
||||
formatted_cookies.append(cookie)
|
||||
|
||||
return formatted_cookies
|
||||
|
||||
|
||||
def test_format_cookies():
|
||||
"""测试_format_cookies方法"""
|
||||
|
||||
print("="*60)
|
||||
print("测试 Cookie 格式处理")
|
||||
print("="*60)
|
||||
|
||||
# 测试1: 字典格式(键值对)
|
||||
print("\n测试 1: 字典格式(键值对)")
|
||||
cookies_dict = {
|
||||
"a1": "xxx",
|
||||
"webId": "yyy",
|
||||
"web_session": "zzz"
|
||||
}
|
||||
try:
|
||||
result = _format_cookies(cookies_dict)
|
||||
print(f"✅ 成功处理字典格式")
|
||||
print(f" 输入: {type(cookies_dict).__name__} with {len(cookies_dict)} items")
|
||||
print(f" 输出: {type(result).__name__} with {len(result)} items")
|
||||
print(f" 第一个Cookie: {result[0]}")
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 3
|
||||
assert all('name' in c and 'value' in c and 'domain' in c for c in result)
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
# 测试2: 列表格式(完整格式,已有domain和path)
|
||||
print("\n测试 2: 列表格式(完整格式)")
|
||||
cookies_list_full = [
|
||||
{
|
||||
"name": "a1",
|
||||
"value": "xxx",
|
||||
"domain": ".xiaohongshu.com",
|
||||
"path": "/",
|
||||
"expires": -1,
|
||||
"httpOnly": False,
|
||||
"secure": False,
|
||||
"sameSite": "Lax"
|
||||
}
|
||||
]
|
||||
try:
|
||||
result = _format_cookies(cookies_list_full)
|
||||
print(f"✅ 成功处理完整列表格式")
|
||||
print(f" 输入: {type(cookies_list_full).__name__} with {len(cookies_list_full)} items")
|
||||
print(f" 输出: {type(result).__name__} with {len(result)} items")
|
||||
# 验证Playwright原生格式被完整保留
|
||||
print(f" 保留的字段: {list(result[0].keys())}")
|
||||
assert result == cookies_list_full, "Playwright原生格式应该被完整保留,不做任何修改"
|
||||
assert 'expires' in result[0], "expires字段应该被保留"
|
||||
assert result[0]['expires'] == -1, "expires=-1应该被保留"
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 1
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
# 测试3: 非Playwright格式(缺少name字段,需要补充domain和path)
|
||||
print("\n测试 3: 非Playwright格式(缺少字段,需要补充)")
|
||||
cookies_list_partial = [
|
||||
{
|
||||
"cookie_name": "a1", # 没有name字段,不是Playwright格式
|
||||
"cookie_value": "xxx"
|
||||
}
|
||||
]
|
||||
try:
|
||||
result = _format_cookies(cookies_list_partial)
|
||||
print(f"✅ 成功处理非Playwright格式")
|
||||
print(f" 输入: {type(cookies_list_partial).__name__} with {len(cookies_list_partial)} items")
|
||||
print(f" 输出: {type(result).__name__} with {len(result)} items")
|
||||
print(f" 自动添加的字段: domain={result[0].get('domain')}, path={result[0].get('path')}")
|
||||
assert isinstance(result, list)
|
||||
# 应该自动添加domain和path
|
||||
assert result[0]['domain'] == '.xiaohongshu.com'
|
||||
assert result[0]['path'] == '/'
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
# 测试4: 双重JSON编码(模拟数据库存储场景)
|
||||
print("\n测试 4: 双重JSON编码字符串")
|
||||
cookies_dict = {"a1": "xxx", "webId": "yyy"}
|
||||
# 第一次JSON编码
|
||||
cookies_json_1 = json.dumps(cookies_dict)
|
||||
# 第二次JSON编码
|
||||
cookies_json_2 = json.dumps(cookies_json_1)
|
||||
|
||||
print(f" 原始字典: {cookies_dict}")
|
||||
print(f" 第一次编码: {cookies_json_1}")
|
||||
print(f" 第二次编码: {cookies_json_2}")
|
||||
|
||||
# 模拟从数据库读取并解析
|
||||
try:
|
||||
# 第一次解析
|
||||
cookies_parsed_1 = json.loads(cookies_json_2)
|
||||
print(f" 第一次解析后类型: {type(cookies_parsed_1).__name__}")
|
||||
|
||||
# 处理双重编码
|
||||
if isinstance(cookies_parsed_1, str):
|
||||
cookies_parsed_2 = json.loads(cookies_parsed_1)
|
||||
print(f" 第二次解析后类型: {type(cookies_parsed_2).__name__}")
|
||||
cookies = cookies_parsed_2
|
||||
else:
|
||||
cookies = cookies_parsed_1
|
||||
|
||||
# 格式化
|
||||
result = _format_cookies(cookies)
|
||||
print(f"✅ 成功处理双重JSON编码")
|
||||
print(f" 最终输出: {type(result).__name__} with {len(result)} items")
|
||||
assert isinstance(result, list)
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
# 测试5: 错误格式 - 字符串(不是JSON)
|
||||
print("\n测试 5: 错误格式 - 普通字符串")
|
||||
try:
|
||||
result = _format_cookies("invalid_string")
|
||||
print(f"❌ 应该抛出异常但没有")
|
||||
except ValueError as e:
|
||||
print(f"✅ 正确抛出ValueError异常")
|
||||
print(f" 错误信息: {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"❌ 抛出了非预期的异常: {str(e)}")
|
||||
|
||||
# 测试6: 错误格式 - 列表中包含非字典元素
|
||||
print("\n测试 6: 错误格式 - 列表中包含非字典元素")
|
||||
try:
|
||||
result = _format_cookies(["string_item", 123])
|
||||
print(f"❌ 应该抛出异常但没有")
|
||||
except ValueError as e:
|
||||
print(f"✅ 正确抛出ValueError异常")
|
||||
print(f" 错误信息: {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"❌ 抛出了非预期的异常: {str(e)}")
|
||||
|
||||
# 测试7: Playwright原生格式中value为对象(保持原样)
|
||||
print("\n测试 7: Playwright原生格式中value为对象(应保持原样)")
|
||||
cookies_with_object_value = [
|
||||
{
|
||||
"name": "test_cookie",
|
||||
"value": {"nested": "object"}, # value是对象
|
||||
"domain": ".xiaohongshu.com",
|
||||
"path": "/"
|
||||
}
|
||||
]
|
||||
try:
|
||||
result = _format_cookies(cookies_with_object_value)
|
||||
print(f"✅ Playwright原生格式被完整保留")
|
||||
print(f" 输入value类型: {type(cookies_with_object_value[0]['value']).__name__}")
|
||||
print(f" 输出value类型: {type(result[0]['value']).__name__}")
|
||||
print(f" 输出value内容: {result[0]['value']}")
|
||||
# Playwright原生格式不做任何修改,包括uvalue
|
||||
assert result == cookies_with_object_value, "Playwright原生格式应完整保留"
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
# 测试8: 字典格式中value为数字
|
||||
print("\n测试 8: 字典格式中value为数字(应自动转换为字符串)")
|
||||
cookies_dict_with_number = {
|
||||
"a1": "xxx",
|
||||
"user_id": 12345, # value是数字
|
||||
"is_login": True # value是布尔值
|
||||
}
|
||||
try:
|
||||
result = _format_cookies(cookies_dict_with_number)
|
||||
print(f"✅ 成功处理数字/布尔value")
|
||||
print(f" 输入: {cookies_dict_with_number}")
|
||||
print(f" user_id value类型: {type(result[1]['value']).__name__}, 值: {result[1]['value']}")
|
||||
print(f" is_login value类型: {type(result[2]['value']).__name__}, 值: {result[2]['value']}")
|
||||
# 验证不再包含expires等字段
|
||||
print(f" 字段: {list(result[0].keys())}")
|
||||
assert all(isinstance(c['value'], str) for c in result), "所有value应该都是字符串类型"
|
||||
assert 'expires' not in result[0], "不应该包含expires字段"
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
# 测试9: Playwright原生格式中expires=-1(应被保留)
|
||||
print("\n测试 9: Playwright原生格式中expires=-1(应被保留)")
|
||||
cookies_with_invalid_expires = [
|
||||
{
|
||||
"name": "test_cookie",
|
||||
"value": "test_value",
|
||||
"domain": ".xiaohongshu.com",
|
||||
"path": "/",
|
||||
"expires": -1 # Playwright原生格式
|
||||
}
|
||||
]
|
||||
try:
|
||||
result = _format_cookies(cookies_with_invalid_expires)
|
||||
print(f"✅ Playwright原生格式被完整保留")
|
||||
print(f" 原始字段: {list(cookies_with_invalid_expires[0].keys())}")
|
||||
print(f" 处理后字段: {list(result[0].keys())}")
|
||||
assert result == cookies_with_invalid_expires, "Playwright原生格式应被完整保留"
|
||||
assert 'expires' in result[0] and result[0]['expires'] == -1, "expires=-1应该被保留"
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
# 测试10: Playwright原生格式中expires为浮点数(应被保留)
|
||||
print("\n测试 10: Playwright原生格式中expires为浮点数(应被保留)")
|
||||
cookies_with_float_expires = [
|
||||
{
|
||||
"name": "test_cookie",
|
||||
"value": "test_value",
|
||||
"domain": ".xiaohongshu.com",
|
||||
"path": "/",
|
||||
"expires": 1797066497.112584 # Playwright原生格式常常有浮点数
|
||||
}
|
||||
]
|
||||
try:
|
||||
result = _format_cookies(cookies_with_float_expires)
|
||||
print(f"✅ Playwright原生格式被完整保留")
|
||||
print(f" 原始expires: {cookies_with_float_expires[0]['expires']} (类型: {type(cookies_with_float_expires[0]['expires']).__name__})")
|
||||
print(f" 处理后expires: {result[0]['expires']} (类型: {type(result[0]['expires']).__name__})")
|
||||
assert result == cookies_with_float_expires, "Playwright原生格式应被完整保留"
|
||||
assert isinstance(result[0]['expires'], float), "expires浮点数应该被保留"
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
# 测试11: Playwright原生格式中sameSite大小写(应被保留)
|
||||
print("\n测试 11: Playwright原生格式中sameSite(应被完整保留)")
|
||||
cookies_with_samesite = [
|
||||
{
|
||||
"name": "test_cookie1",
|
||||
"value": "test_value1",
|
||||
"domain": ".xiaohongshu.com",
|
||||
"path": "/",
|
||||
"sameSite": "Lax" # Playwright原生格式
|
||||
},
|
||||
{
|
||||
"name": "test_cookie2",
|
||||
"value": "test_value2",
|
||||
"domain": ".xiaohongshu.com",
|
||||
"path": "/",
|
||||
"sameSite": "Strict"
|
||||
}
|
||||
]
|
||||
try:
|
||||
result = _format_cookies(cookies_with_samesite)
|
||||
print(f"✅ Playwright原生格式被完整保留")
|
||||
print(f" cookie1 sameSite: {result[0]['sameSite']}")
|
||||
print(f" cookie2 sameSite: {result[1]['sameSite']}")
|
||||
assert result == cookies_with_samesite, "Playwright原生格式应被完整保留"
|
||||
assert result[0]['sameSite'] == 'Lax'
|
||||
assert result[1]['sameSite'] == 'Strict'
|
||||
except Exception as e:
|
||||
print(f"❌ 失败: {str(e)}")
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("测试完成")
|
||||
print("="*60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_format_cookies()
|
||||
Reference in New Issue
Block a user