Files
ai_wht_wechat/backend/test_headless_comparison.py

282 lines
10 KiB
Python
Raw Normal View History

2026-01-06 19:36:42 +08:00
"""
对比测试有头模式和无头模式的页面获取情况
"""
import asyncio
from playwright.async_api import async_playwright
import sys
async def test_headless_comparison(proxy_index: int = 0):
"""对比测试有头模式和无头模式"""
print(f"\n{'='*60}")
print(f"🔍 对比测试有头模式 vs 无头模式")
print(f"{'='*60}")
# 从代理配置获取代理信息
from damai_proxy_config import get_proxy_config
proxy_config = get_proxy_config(proxy_index)
proxy_server = proxy_config['server'].replace('http://', '')
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}"
print(f"✅ 使用代理: 代理{proxy_index + 1}")
print(f" 代理服务器: {proxy_config['server']}")
# 配置代理对象
proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@')
if len(proxy_parts) == 2:
auth_part = proxy_parts[0]
server_part = proxy_parts[1]
username, password = auth_part.split(':')
proxy_config_obj = {
"server": f"http://{server_part}",
"username": username,
"password": password
}
else:
proxy_config_obj = {"server": proxy_url}
print(f" 配置的代理对象: {proxy_config_obj}")
# 测试无头模式
print(f"\n🧪 测试 1/2: 无头模式 (headless=True)")
await test_single_mode(True, proxy_config_obj)
print(f"\n🧪 测试 2/2: 有头模式 (headless=False)")
await test_single_mode(False, proxy_config_obj)
print(f"\n{'='*60}")
print("✅ 对比测试完成!")
print("="*60)
async def test_single_mode(headless: bool, proxy_config_obj: dict):
"""测试单个模式"""
mode_name = "无头模式" if headless else "有头模式"
print(f" 正在启动浏览器 ({mode_name})...")
try:
async with async_playwright() as p:
# 启动浏览器
browser = await p.chromium.launch(
headless=headless,
proxy=proxy_config_obj,
# 添加一些额外参数以提高稳定性
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
]
)
# 创建上下文
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport={'width': 1280, 'height': 720}
)
# 创建页面
page = await context.new_page()
# 访问小红书登录页面
print(f" 访问小红书登录页...")
try:
# 使用不同的wait_until策略
await page.goto('https://creator.xiaohongshu.com/login',
wait_until='domcontentloaded',
timeout=15000)
# 等待一段时间让页面内容加载
await asyncio.sleep(3)
# 获取页面信息
title = await page.title()
url = page.url
content = await page.content()
content_len = len(content)
print(f"{mode_name} - 访问成功")
print(f" 标题: {title}")
print(f" URL: {url}")
print(f" 内容长度: {content_len} 字符")
# 检查关键元素
phone_input = await page.query_selector('input[placeholder="手机号"]')
if phone_input:
print(f" ✅ 找到手机号输入框")
else:
print(f" ❌ 未找到手机号输入框")
# 查找所有input元素
inputs = await page.query_selector_all('input')
print(f" 找到 {len(inputs)} 个input元素")
if content_len == 0:
print(f" ⚠️ 页面内容为空")
elif "验证" in content or "captcha" in content.lower() or "安全" in content:
print(f" ⚠️ 检测到验证或安全提示")
else:
print(f" ✅ 页面内容正常")
except Exception as e:
print(f"{mode_name} - 访问失败: {str(e)}")
await browser.close()
print(f" 🔄 {mode_name} 浏览器已关闭")
except Exception as e:
print(f"{mode_name} - 测试异常: {str(e)}")
async def test_with_different_wait_strategies(proxy_index: int = 0):
"""测试不同的页面等待策略"""
print(f"\n{'='*60}")
print(f"🔍 测试不同页面等待策略")
print(f"{'='*60}")
from damai_proxy_config import get_proxy_config
proxy_config = get_proxy_config(proxy_index)
proxy_server = proxy_config['server'].replace('http://', '')
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}"
proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@')
if len(proxy_parts) == 2:
auth_part = proxy_parts[0]
server_part = proxy_parts[1]
username, password = auth_part.split(':')
proxy_config_obj = {
"server": f"http://{server_part}",
"username": username,
"password": password
}
else:
proxy_config_obj = {"server": proxy_url}
wait_strategies = [
('domcontentloaded', 'DOM内容加载完成'),
('load', '页面完全加载'),
('networkidle', '网络空闲'),
('commit', '导航提交')
]
for wait_strategy, description in wait_strategies:
print(f"\n🧪 测试等待策略: {description} ({wait_strategy})")
try:
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True, # 使用无头模式进行测试
proxy=proxy_config_obj
)
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
)
page = await context.new_page()
try:
print(f" 访问小红书登录页 (wait_until='{wait_strategy}')...")
await page.goto('https://creator.xiaohongshu.com/login',
wait_until=wait_strategy,
timeout=15000)
# 额外等待时间
await asyncio.sleep(2)
content = await page.content()
content_len = len(content)
print(f" ✅ 访问成功")
print(f" 内容长度: {content_len} 字符")
# 检查手机号输入框
phone_input = await page.query_selector('input[placeholder="手机号"]')
if phone_input:
print(f" ✅ 找到手机号输入框")
else:
print(f" ❌ 未找到手机号输入框")
except Exception as e:
print(f" ❌ 访问失败: {str(e)}")
await browser.close()
except Exception as e:
print(f" ❌ 测试异常: {str(e)}")
def explain_page_loading_factors():
"""解释影响页面加载的因素"""
print("="*60)
print("💡 影响页面加载的因素")
print("="*60)
print("\n1. 浏览器模式差异:")
print(" • 有头模式: 浏览器界面可见,渲染更完整")
print(" • 无头模式: 后台运行,可能加载策略略有不同")
print("\n2. 页面等待策略:")
print(" • domcontentloaded: DOM构建完成推荐")
print(" • load: 所有资源加载完成")
print(" • networkidle: 网络空闲(可能等待较长时间)")
print("\n3. 反检测措施:")
print(" • 浏览器指纹混淆")
print(" • User-Agent设置")
print(" • 禁用webdriver属性")
print("\n4. 网络因素:")
print(" • 代理IP质量")
print(" • 网络延迟")
print(" • 目标网站反爬虫机制")
async def main():
"""主函数"""
explain_page_loading_factors()
print(f"\n{'='*60}")
print("🎯 选择测试模式")
print("="*60)
print("\n1. 有头模式 vs 无头模式对比测试")
print("2. 不同页面等待策略测试")
try:
choice = input("\n请选择测试模式 (1-2, 默认为1): ").strip()
if choice not in ['1', '2']:
choice = '1'
proxy_choice = input("请选择代理 (0 或 1, 默认为0): ").strip()
if proxy_choice not in ['0', '1']:
proxy_choice = '0'
proxy_idx = int(proxy_choice)
if choice == '1':
await test_headless_comparison(proxy_idx)
elif choice == '2':
await test_with_different_wait_strategies(proxy_idx)
print(f"\n{'='*60}")
print("✅ 测试完成!")
print("="*60)
except KeyboardInterrupt:
print("\n\n⚠️ 测试被用户中断")
except Exception as e:
print(f"\n❌ 测试过程中出现错误: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
# Windows环境下设置事件循环策略
if sys.platform == 'win32':
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
# 运行测试
asyncio.run(main())