282 lines
10 KiB
Python
282 lines
10 KiB
Python
"""
|
||
对比测试有头模式和无头模式的页面获取情况
|
||
"""
|
||
import asyncio
|
||
from playwright.async_api import async_playwright
|
||
import sys
|
||
|
||
|
||
async def test_headless_comparison(proxy_index: int = 0):
|
||
"""对比测试有头模式和无头模式"""
|
||
print(f"\n{'='*60}")
|
||
print(f"🔍 对比测试有头模式 vs 无头模式")
|
||
print(f"{'='*60}")
|
||
|
||
# 从代理配置获取代理信息
|
||
from damai_proxy_config import get_proxy_config
|
||
proxy_config = get_proxy_config(proxy_index)
|
||
proxy_server = proxy_config['server'].replace('http://', '')
|
||
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}"
|
||
|
||
print(f"✅ 使用代理: 代理{proxy_index + 1}")
|
||
print(f" 代理服务器: {proxy_config['server']}")
|
||
|
||
# 配置代理对象
|
||
proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@')
|
||
if len(proxy_parts) == 2:
|
||
auth_part = proxy_parts[0]
|
||
server_part = proxy_parts[1]
|
||
username, password = auth_part.split(':')
|
||
|
||
proxy_config_obj = {
|
||
"server": f"http://{server_part}",
|
||
"username": username,
|
||
"password": password
|
||
}
|
||
else:
|
||
proxy_config_obj = {"server": proxy_url}
|
||
|
||
print(f" 配置的代理对象: {proxy_config_obj}")
|
||
|
||
# 测试无头模式
|
||
print(f"\n🧪 测试 1/2: 无头模式 (headless=True)")
|
||
await test_single_mode(True, proxy_config_obj)
|
||
|
||
print(f"\n🧪 测试 2/2: 有头模式 (headless=False)")
|
||
await test_single_mode(False, proxy_config_obj)
|
||
|
||
print(f"\n{'='*60}")
|
||
print("✅ 对比测试完成!")
|
||
print("="*60)
|
||
|
||
|
||
async def test_single_mode(headless: bool, proxy_config_obj: dict):
|
||
"""测试单个模式"""
|
||
mode_name = "无头模式" if headless else "有头模式"
|
||
print(f" 正在启动浏览器 ({mode_name})...")
|
||
|
||
try:
|
||
async with async_playwright() as p:
|
||
# 启动浏览器
|
||
browser = await p.chromium.launch(
|
||
headless=headless,
|
||
proxy=proxy_config_obj,
|
||
# 添加一些额外参数以提高稳定性
|
||
args=[
|
||
'--no-sandbox',
|
||
'--disable-setuid-sandbox',
|
||
'--disable-dev-shm-usage',
|
||
'--disable-blink-features=AutomationControlled',
|
||
]
|
||
)
|
||
|
||
# 创建上下文
|
||
context = await browser.new_context(
|
||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||
viewport={'width': 1280, 'height': 720}
|
||
)
|
||
|
||
# 创建页面
|
||
page = await context.new_page()
|
||
|
||
# 访问小红书登录页面
|
||
print(f" 访问小红书登录页...")
|
||
try:
|
||
# 使用不同的wait_until策略
|
||
await page.goto('https://creator.xiaohongshu.com/login',
|
||
wait_until='domcontentloaded',
|
||
timeout=15000)
|
||
|
||
# 等待一段时间让页面内容加载
|
||
await asyncio.sleep(3)
|
||
|
||
# 获取页面信息
|
||
title = await page.title()
|
||
url = page.url
|
||
content = await page.content()
|
||
content_len = len(content)
|
||
|
||
print(f" ✅ {mode_name} - 访问成功")
|
||
print(f" 标题: {title}")
|
||
print(f" URL: {url}")
|
||
print(f" 内容长度: {content_len} 字符")
|
||
|
||
# 检查关键元素
|
||
phone_input = await page.query_selector('input[placeholder="手机号"]')
|
||
if phone_input:
|
||
print(f" ✅ 找到手机号输入框")
|
||
else:
|
||
print(f" ❌ 未找到手机号输入框")
|
||
|
||
# 查找所有input元素
|
||
inputs = await page.query_selector_all('input')
|
||
print(f" 找到 {len(inputs)} 个input元素")
|
||
|
||
if content_len == 0:
|
||
print(f" ⚠️ 页面内容为空")
|
||
elif "验证" in content or "captcha" in content.lower() or "安全" in content:
|
||
print(f" ⚠️ 检测到验证或安全提示")
|
||
else:
|
||
print(f" ✅ 页面内容正常")
|
||
|
||
except Exception as e:
|
||
print(f" ❌ {mode_name} - 访问失败: {str(e)}")
|
||
|
||
await browser.close()
|
||
print(f" 🔄 {mode_name} 浏览器已关闭")
|
||
|
||
except Exception as e:
|
||
print(f" ❌ {mode_name} - 测试异常: {str(e)}")
|
||
|
||
|
||
async def test_with_different_wait_strategies(proxy_index: int = 0):
|
||
"""测试不同的页面等待策略"""
|
||
print(f"\n{'='*60}")
|
||
print(f"🔍 测试不同页面等待策略")
|
||
print(f"{'='*60}")
|
||
|
||
from damai_proxy_config import get_proxy_config
|
||
proxy_config = get_proxy_config(proxy_index)
|
||
proxy_server = proxy_config['server'].replace('http://', '')
|
||
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}"
|
||
|
||
proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@')
|
||
if len(proxy_parts) == 2:
|
||
auth_part = proxy_parts[0]
|
||
server_part = proxy_parts[1]
|
||
username, password = auth_part.split(':')
|
||
|
||
proxy_config_obj = {
|
||
"server": f"http://{server_part}",
|
||
"username": username,
|
||
"password": password
|
||
}
|
||
else:
|
||
proxy_config_obj = {"server": proxy_url}
|
||
|
||
wait_strategies = [
|
||
('domcontentloaded', 'DOM内容加载完成'),
|
||
('load', '页面完全加载'),
|
||
('networkidle', '网络空闲'),
|
||
('commit', '导航提交')
|
||
]
|
||
|
||
for wait_strategy, description in wait_strategies:
|
||
print(f"\n🧪 测试等待策略: {description} ({wait_strategy})")
|
||
|
||
try:
|
||
async with async_playwright() as p:
|
||
browser = await p.chromium.launch(
|
||
headless=True, # 使用无头模式进行测试
|
||
proxy=proxy_config_obj
|
||
)
|
||
|
||
context = await browser.new_context(
|
||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||
)
|
||
|
||
page = await context.new_page()
|
||
|
||
try:
|
||
print(f" 访问小红书登录页 (wait_until='{wait_strategy}')...")
|
||
await page.goto('https://creator.xiaohongshu.com/login',
|
||
wait_until=wait_strategy,
|
||
timeout=15000)
|
||
|
||
# 额外等待时间
|
||
await asyncio.sleep(2)
|
||
|
||
content = await page.content()
|
||
content_len = len(content)
|
||
|
||
print(f" ✅ 访问成功")
|
||
print(f" 内容长度: {content_len} 字符")
|
||
|
||
# 检查手机号输入框
|
||
phone_input = await page.query_selector('input[placeholder="手机号"]')
|
||
if phone_input:
|
||
print(f" ✅ 找到手机号输入框")
|
||
else:
|
||
print(f" ❌ 未找到手机号输入框")
|
||
|
||
except Exception as e:
|
||
print(f" ❌ 访问失败: {str(e)}")
|
||
|
||
await browser.close()
|
||
|
||
except Exception as e:
|
||
print(f" ❌ 测试异常: {str(e)}")
|
||
|
||
|
||
def explain_page_loading_factors():
|
||
"""解释影响页面加载的因素"""
|
||
print("="*60)
|
||
print("💡 影响页面加载的因素")
|
||
print("="*60)
|
||
|
||
print("\n1. 浏览器模式差异:")
|
||
print(" • 有头模式: 浏览器界面可见,渲染更完整")
|
||
print(" • 无头模式: 后台运行,可能加载策略略有不同")
|
||
|
||
print("\n2. 页面等待策略:")
|
||
print(" • domcontentloaded: DOM构建完成(推荐)")
|
||
print(" • load: 所有资源加载完成")
|
||
print(" • networkidle: 网络空闲(可能等待较长时间)")
|
||
|
||
print("\n3. 反检测措施:")
|
||
print(" • 浏览器指纹混淆")
|
||
print(" • User-Agent设置")
|
||
print(" • 禁用webdriver属性")
|
||
|
||
print("\n4. 网络因素:")
|
||
print(" • 代理IP质量")
|
||
print(" • 网络延迟")
|
||
print(" • 目标网站反爬虫机制")
|
||
|
||
|
||
async def main():
|
||
"""主函数"""
|
||
explain_page_loading_factors()
|
||
|
||
print(f"\n{'='*60}")
|
||
print("🎯 选择测试模式")
|
||
print("="*60)
|
||
|
||
print("\n1. 有头模式 vs 无头模式对比测试")
|
||
print("2. 不同页面等待策略测试")
|
||
|
||
try:
|
||
choice = input("\n请选择测试模式 (1-2, 默认为1): ").strip()
|
||
|
||
if choice not in ['1', '2']:
|
||
choice = '1'
|
||
|
||
proxy_choice = input("请选择代理 (0 或 1, 默认为0): ").strip()
|
||
if proxy_choice not in ['0', '1']:
|
||
proxy_choice = '0'
|
||
proxy_idx = int(proxy_choice)
|
||
|
||
if choice == '1':
|
||
await test_headless_comparison(proxy_idx)
|
||
elif choice == '2':
|
||
await test_with_different_wait_strategies(proxy_idx)
|
||
|
||
print(f"\n{'='*60}")
|
||
print("✅ 测试完成!")
|
||
print("="*60)
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n\n⚠️ 测试被用户中断")
|
||
except Exception as e:
|
||
print(f"\n❌ 测试过程中出现错误: {str(e)}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# Windows环境下设置事件循环策略
|
||
if sys.platform == 'win32':
|
||
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
||
|
||
# 运行测试
|
||
asyncio.run(main()) |