Files
ai_wht_wechat/backend/test_headless_comparison.py
2026-01-06 19:36:42 +08:00

282 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
对比测试有头模式和无头模式的页面获取情况
"""
import asyncio
from playwright.async_api import async_playwright
import sys
async def test_headless_comparison(proxy_index: int = 0):
"""对比测试有头模式和无头模式"""
print(f"\n{'='*60}")
print(f"🔍 对比测试有头模式 vs 无头模式")
print(f"{'='*60}")
# 从代理配置获取代理信息
from damai_proxy_config import get_proxy_config
proxy_config = get_proxy_config(proxy_index)
proxy_server = proxy_config['server'].replace('http://', '')
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}"
print(f"✅ 使用代理: 代理{proxy_index + 1}")
print(f" 代理服务器: {proxy_config['server']}")
# 配置代理对象
proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@')
if len(proxy_parts) == 2:
auth_part = proxy_parts[0]
server_part = proxy_parts[1]
username, password = auth_part.split(':')
proxy_config_obj = {
"server": f"http://{server_part}",
"username": username,
"password": password
}
else:
proxy_config_obj = {"server": proxy_url}
print(f" 配置的代理对象: {proxy_config_obj}")
# 测试无头模式
print(f"\n🧪 测试 1/2: 无头模式 (headless=True)")
await test_single_mode(True, proxy_config_obj)
print(f"\n🧪 测试 2/2: 有头模式 (headless=False)")
await test_single_mode(False, proxy_config_obj)
print(f"\n{'='*60}")
print("✅ 对比测试完成!")
print("="*60)
async def test_single_mode(headless: bool, proxy_config_obj: dict):
"""测试单个模式"""
mode_name = "无头模式" if headless else "有头模式"
print(f" 正在启动浏览器 ({mode_name})...")
try:
async with async_playwright() as p:
# 启动浏览器
browser = await p.chromium.launch(
headless=headless,
proxy=proxy_config_obj,
# 添加一些额外参数以提高稳定性
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
]
)
# 创建上下文
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport={'width': 1280, 'height': 720}
)
# 创建页面
page = await context.new_page()
# 访问小红书登录页面
print(f" 访问小红书登录页...")
try:
# 使用不同的wait_until策略
await page.goto('https://creator.xiaohongshu.com/login',
wait_until='domcontentloaded',
timeout=15000)
# 等待一段时间让页面内容加载
await asyncio.sleep(3)
# 获取页面信息
title = await page.title()
url = page.url
content = await page.content()
content_len = len(content)
print(f"{mode_name} - 访问成功")
print(f" 标题: {title}")
print(f" URL: {url}")
print(f" 内容长度: {content_len} 字符")
# 检查关键元素
phone_input = await page.query_selector('input[placeholder="手机号"]')
if phone_input:
print(f" ✅ 找到手机号输入框")
else:
print(f" ❌ 未找到手机号输入框")
# 查找所有input元素
inputs = await page.query_selector_all('input')
print(f" 找到 {len(inputs)} 个input元素")
if content_len == 0:
print(f" ⚠️ 页面内容为空")
elif "验证" in content or "captcha" in content.lower() or "安全" in content:
print(f" ⚠️ 检测到验证或安全提示")
else:
print(f" ✅ 页面内容正常")
except Exception as e:
print(f"{mode_name} - 访问失败: {str(e)}")
await browser.close()
print(f" 🔄 {mode_name} 浏览器已关闭")
except Exception as e:
print(f"{mode_name} - 测试异常: {str(e)}")
async def test_with_different_wait_strategies(proxy_index: int = 0):
"""测试不同的页面等待策略"""
print(f"\n{'='*60}")
print(f"🔍 测试不同页面等待策略")
print(f"{'='*60}")
from damai_proxy_config import get_proxy_config
proxy_config = get_proxy_config(proxy_index)
proxy_server = proxy_config['server'].replace('http://', '')
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}"
proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@')
if len(proxy_parts) == 2:
auth_part = proxy_parts[0]
server_part = proxy_parts[1]
username, password = auth_part.split(':')
proxy_config_obj = {
"server": f"http://{server_part}",
"username": username,
"password": password
}
else:
proxy_config_obj = {"server": proxy_url}
wait_strategies = [
('domcontentloaded', 'DOM内容加载完成'),
('load', '页面完全加载'),
('networkidle', '网络空闲'),
('commit', '导航提交')
]
for wait_strategy, description in wait_strategies:
print(f"\n🧪 测试等待策略: {description} ({wait_strategy})")
try:
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True, # 使用无头模式进行测试
proxy=proxy_config_obj
)
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
)
page = await context.new_page()
try:
print(f" 访问小红书登录页 (wait_until='{wait_strategy}')...")
await page.goto('https://creator.xiaohongshu.com/login',
wait_until=wait_strategy,
timeout=15000)
# 额外等待时间
await asyncio.sleep(2)
content = await page.content()
content_len = len(content)
print(f" ✅ 访问成功")
print(f" 内容长度: {content_len} 字符")
# 检查手机号输入框
phone_input = await page.query_selector('input[placeholder="手机号"]')
if phone_input:
print(f" ✅ 找到手机号输入框")
else:
print(f" ❌ 未找到手机号输入框")
except Exception as e:
print(f" ❌ 访问失败: {str(e)}")
await browser.close()
except Exception as e:
print(f" ❌ 测试异常: {str(e)}")
def explain_page_loading_factors():
"""解释影响页面加载的因素"""
print("="*60)
print("💡 影响页面加载的因素")
print("="*60)
print("\n1. 浏览器模式差异:")
print(" • 有头模式: 浏览器界面可见,渲染更完整")
print(" • 无头模式: 后台运行,可能加载策略略有不同")
print("\n2. 页面等待策略:")
print(" • domcontentloaded: DOM构建完成推荐")
print(" • load: 所有资源加载完成")
print(" • networkidle: 网络空闲(可能等待较长时间)")
print("\n3. 反检测措施:")
print(" • 浏览器指纹混淆")
print(" • User-Agent设置")
print(" • 禁用webdriver属性")
print("\n4. 网络因素:")
print(" • 代理IP质量")
print(" • 网络延迟")
print(" • 目标网站反爬虫机制")
async def main():
"""主函数"""
explain_page_loading_factors()
print(f"\n{'='*60}")
print("🎯 选择测试模式")
print("="*60)
print("\n1. 有头模式 vs 无头模式对比测试")
print("2. 不同页面等待策略测试")
try:
choice = input("\n请选择测试模式 (1-2, 默认为1): ").strip()
if choice not in ['1', '2']:
choice = '1'
proxy_choice = input("请选择代理 (0 或 1, 默认为0): ").strip()
if proxy_choice not in ['0', '1']:
proxy_choice = '0'
proxy_idx = int(proxy_choice)
if choice == '1':
await test_headless_comparison(proxy_idx)
elif choice == '2':
await test_with_different_wait_strategies(proxy_idx)
print(f"\n{'='*60}")
print("✅ 测试完成!")
print("="*60)
except KeyboardInterrupt:
print("\n\n⚠️ 测试被用户中断")
except Exception as e:
print(f"\n❌ 测试过程中出现错误: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
# Windows环境下设置事件循环策略
if sys.platform == 'win32':
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
# 运行测试
asyncio.run(main())