""" 对比测试有头模式和无头模式的页面获取情况 """ import asyncio from playwright.async_api import async_playwright import sys async def test_headless_comparison(proxy_index: int = 0): """对比测试有头模式和无头模式""" print(f"\n{'='*60}") print(f"🔍 对比测试有头模式 vs 无头模式") print(f"{'='*60}") # 从代理配置获取代理信息 from damai_proxy_config import get_proxy_config proxy_config = get_proxy_config(proxy_index) proxy_server = proxy_config['server'].replace('http://', '') proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}" print(f"✅ 使用代理: 代理{proxy_index + 1}") print(f" 代理服务器: {proxy_config['server']}") # 配置代理对象 proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@') if len(proxy_parts) == 2: auth_part = proxy_parts[0] server_part = proxy_parts[1] username, password = auth_part.split(':') proxy_config_obj = { "server": f"http://{server_part}", "username": username, "password": password } else: proxy_config_obj = {"server": proxy_url} print(f" 配置的代理对象: {proxy_config_obj}") # 测试无头模式 print(f"\n🧪 测试 1/2: 无头模式 (headless=True)") await test_single_mode(True, proxy_config_obj) print(f"\n🧪 测试 2/2: 有头模式 (headless=False)") await test_single_mode(False, proxy_config_obj) print(f"\n{'='*60}") print("✅ 对比测试完成!") print("="*60) async def test_single_mode(headless: bool, proxy_config_obj: dict): """测试单个模式""" mode_name = "无头模式" if headless else "有头模式" print(f" 正在启动浏览器 ({mode_name})...") try: async with async_playwright() as p: # 启动浏览器 browser = await p.chromium.launch( headless=headless, proxy=proxy_config_obj, # 添加一些额外参数以提高稳定性 args=[ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled', ] ) # 创建上下文 context = await browser.new_context( user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', viewport={'width': 1280, 'height': 720} ) # 创建页面 page = await context.new_page() # 访问小红书登录页面 print(f" 访问小红书登录页...") try: # 使用不同的wait_until策略 await page.goto('https://creator.xiaohongshu.com/login', wait_until='domcontentloaded', timeout=15000) # 等待一段时间让页面内容加载 await asyncio.sleep(3) # 获取页面信息 title = await page.title() url = page.url content = await page.content() content_len = len(content) print(f" ✅ {mode_name} - 访问成功") print(f" 标题: {title}") print(f" URL: {url}") print(f" 内容长度: {content_len} 字符") # 检查关键元素 phone_input = await page.query_selector('input[placeholder="手机号"]') if phone_input: print(f" ✅ 找到手机号输入框") else: print(f" ❌ 未找到手机号输入框") # 查找所有input元素 inputs = await page.query_selector_all('input') print(f" 找到 {len(inputs)} 个input元素") if content_len == 0: print(f" ⚠️ 页面内容为空") elif "验证" in content or "captcha" in content.lower() or "安全" in content: print(f" ⚠️ 检测到验证或安全提示") else: print(f" ✅ 页面内容正常") except Exception as e: print(f" ❌ {mode_name} - 访问失败: {str(e)}") await browser.close() print(f" 🔄 {mode_name} 浏览器已关闭") except Exception as e: print(f" ❌ {mode_name} - 测试异常: {str(e)}") async def test_with_different_wait_strategies(proxy_index: int = 0): """测试不同的页面等待策略""" print(f"\n{'='*60}") print(f"🔍 测试不同页面等待策略") print(f"{'='*60}") from damai_proxy_config import get_proxy_config proxy_config = get_proxy_config(proxy_index) proxy_server = proxy_config['server'].replace('http://', '') proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}" proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@') if len(proxy_parts) == 2: auth_part = proxy_parts[0] server_part = proxy_parts[1] username, password = auth_part.split(':') proxy_config_obj = { "server": f"http://{server_part}", "username": username, "password": password } else: proxy_config_obj = {"server": proxy_url} wait_strategies = [ ('domcontentloaded', 'DOM内容加载完成'), ('load', '页面完全加载'), ('networkidle', '网络空闲'), ('commit', '导航提交') ] for wait_strategy, description in wait_strategies: print(f"\n🧪 测试等待策略: {description} ({wait_strategy})") try: async with async_playwright() as p: browser = await p.chromium.launch( headless=True, # 使用无头模式进行测试 proxy=proxy_config_obj ) context = await browser.new_context( user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ) page = await context.new_page() try: print(f" 访问小红书登录页 (wait_until='{wait_strategy}')...") await page.goto('https://creator.xiaohongshu.com/login', wait_until=wait_strategy, timeout=15000) # 额外等待时间 await asyncio.sleep(2) content = await page.content() content_len = len(content) print(f" ✅ 访问成功") print(f" 内容长度: {content_len} 字符") # 检查手机号输入框 phone_input = await page.query_selector('input[placeholder="手机号"]') if phone_input: print(f" ✅ 找到手机号输入框") else: print(f" ❌ 未找到手机号输入框") except Exception as e: print(f" ❌ 访问失败: {str(e)}") await browser.close() except Exception as e: print(f" ❌ 测试异常: {str(e)}") def explain_page_loading_factors(): """解释影响页面加载的因素""" print("="*60) print("💡 影响页面加载的因素") print("="*60) print("\n1. 浏览器模式差异:") print(" • 有头模式: 浏览器界面可见,渲染更完整") print(" • 无头模式: 后台运行,可能加载策略略有不同") print("\n2. 页面等待策略:") print(" • domcontentloaded: DOM构建完成(推荐)") print(" • load: 所有资源加载完成") print(" • networkidle: 网络空闲(可能等待较长时间)") print("\n3. 反检测措施:") print(" • 浏览器指纹混淆") print(" • User-Agent设置") print(" • 禁用webdriver属性") print("\n4. 网络因素:") print(" • 代理IP质量") print(" • 网络延迟") print(" • 目标网站反爬虫机制") async def main(): """主函数""" explain_page_loading_factors() print(f"\n{'='*60}") print("🎯 选择测试模式") print("="*60) print("\n1. 有头模式 vs 无头模式对比测试") print("2. 不同页面等待策略测试") try: choice = input("\n请选择测试模式 (1-2, 默认为1): ").strip() if choice not in ['1', '2']: choice = '1' proxy_choice = input("请选择代理 (0 或 1, 默认为0): ").strip() if proxy_choice not in ['0', '1']: proxy_choice = '0' proxy_idx = int(proxy_choice) if choice == '1': await test_headless_comparison(proxy_idx) elif choice == '2': await test_with_different_wait_strategies(proxy_idx) print(f"\n{'='*60}") print("✅ 测试完成!") print("="*60) except KeyboardInterrupt: print("\n\n⚠️ 测试被用户中断") except Exception as e: print(f"\n❌ 测试过程中出现错误: {str(e)}") import traceback traceback.print_exc() if __name__ == "__main__": # Windows环境下设置事件循环策略 if sys.platform == 'win32': asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) # 运行测试 asyncio.run(main())