""" 优化的代理浏览器配置 解决小红书对代理IP的限制问题 """ import asyncio from playwright.async_api import async_playwright import sys async def test_optimized_proxy_browser(proxy_index: int = 0): """测试优化的代理浏览器配置""" print(f"\n{'='*60}") print(f"🚀 测试优化的代理浏览器配置") print(f"{'='*60}") # 从代理配置获取代理信息 from damai_proxy_config import get_proxy_config proxy_config = get_proxy_config(proxy_index) proxy_server = proxy_config['server'].replace('http://', '') proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}" print(f"✅ 使用代理: 代理{proxy_index + 1}") print(f" 代理服务器: {proxy_config['server']}") try: async with async_playwright() as p: # 配置代理 proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@') if len(proxy_parts) == 2: auth_part = proxy_parts[0] server_part = proxy_parts[1] username, password = auth_part.split(':') proxy_config_obj = { "server": f"http://{server_part}", "username": username, "password": password } else: proxy_config_obj = {"server": proxy_url} print(f" 配置的代理对象: {proxy_config_obj}") # 启动浏览器 - 使用优化参数 browser = await p.chromium.launch( headless=False, # 使用有头模式,便于观察 proxy=proxy_config_obj, args=[ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled', '--disable-background-timer-throttling', '--disable-renderer-backgrounding', '--disable-background-networking', '--enable-features=NetworkService,NetworkServiceInProcess', '--disable-ipc-flooding-protection', '--disable-web-security', '--disable-features=IsolateOrigins,site-per-process', '--disable-site-isolation-trials', '--disable-extensions', '--disable-breakpad', '--disable-component-extensions-with-background-pages', '--disable-hang-monitor', '--disable-prompt-on-repost', '--disable-domain-reliability', '--disable-component-update', '--hide-scrollbars', '--mute-audio', '--no-first-run', '--no-default-browser-check', '--metrics-recording-only', '--force-color-profile=srgb', '--disable-default-apps', '--disable-features=TranslateUI', '--disable-features=Translate', '--disable-features=OptimizationHints', '--disable-features=InterestCohortAPI', '--disable-features=BlinkGenPropertyTrees', '--disable-features=ImprovedCookieControls', '--disable-features=SameSiteDefaultChecksMethodRigorously', '--disable-features=CookieSameSiteByDefaultWhenReportingEnabled', '--disable-features=AutofillServerCommunication', '--disable-features=AutofillUseOptimizedLocalStorage', '--disable-features=CalculateNativeWinOcclusion', '--disable-features=VizDisplayCompositor', '--disable-features=VizHitTestQuery', ] ) # 创建上下文 - 设置浏览器指纹混淆 context = await browser.new_context( user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', viewport={'width': 1280, 'height': 720}, # 隐瞒自动化特征 bypass_csp=True, java_script_enabled=True, ) # 创建页面 page = await context.new_page() # 隐瞒自动化特征 await page.add_init_script(""" Object.defineProperty(navigator, 'webdriver', { get: () => undefined, }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5], }); Object.defineProperty(navigator, 'languages', { get: () => ['zh-CN', 'zh', 'en'], }); // 隐瞒代理检测 delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array; delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise; delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol; """) print(f"\n🌐 访问百度测试代理连接...") try: await page.goto('https://www.baidu.com', wait_until='domcontentloaded', timeout=15000) await asyncio.sleep(2) title = await page.title() url = page.url print(f" ✅ 百度访问成功") print(f" 标题: {title}") print(f" URL: {url}") except Exception as e: print(f" ❌ 百度访问失败: {str(e)}") print(f"\n🌐 访问小红书创作者平台...") try: await page.goto('https://creator.xiaohongshu.com/login', wait_until='domcontentloaded', timeout=30000) await asyncio.sleep(3) # 等待更长时间 title = await page.title() url = page.url content = await page.content() content_len = len(content) print(f" 访问结果:") print(f" 标题: {title}") print(f" URL: {url}") print(f" 内容长度: {content_len} 字符") if content_len == 0: print(f" ⚠️ 页面内容为空") elif "验证" in content or "captcha" in content.lower() or "安全" in content: print(f" ⚠️ 检测到验证或安全提示") else: print(f" ✅ 页面加载成功") # 查找手机号输入框 print(f"\n🔍 查找手机号输入框...") try: phone_input = await page.wait_for_selector('input[placeholder="手机号"]', timeout=5000) if phone_input: print(f" ✅ 找到手机号输入框") else: print(f" ❌ 未找到手机号输入框") except: print(f" ❌ 未找到手机号输入框") # 查找所有input元素 inputs = await page.query_selector_all('input') print(f" 找到 {len(inputs)} 个input元素") # 查找发送验证码按钮 print(f"\n🔍 查找发送验证码按钮...") try: code_button = await page.wait_for_selector('text="发送验证码"', timeout=5000) if code_button: print(f" ✅ 找到发送验证码按钮") else: print(f" ❌ 未找到发送验证码按钮") except: print(f" ❌ 未找到发送验证码按钮") except Exception as e: print(f" ❌ 小红书访问失败: {str(e)}") print(f"\n⏸️ 浏览器保持打开状态,您可以观察页面") print(f" 按 Enter 键关闭浏览器...") input() await browser.close() print(f"✅ 浏览器已关闭") except Exception as e: print(f"❌ 测试过程异常: {str(e)}") import traceback traceback.print_exc() def explain_optimizations(): """解释优化措施""" print("="*60) print("🔧 优化措施说明") print("="*60) print("\n1. 浏览器启动参数优化:") print(" • 添加更多反检测参数") print(" • 禁用可能导致检测的功能") print("\n2. 浏览器指纹混淆:") print(" • 隐瞒webdriver特征") print(" • 伪造插件列表") print(" • 设置真实语言") print("\n3. 页面加载策略:") print(" • 使用domcontentloaded而非networkidle") print(" • 增加超时时间") async def main(): """主函数""" explain_optimizations() print(f"\n{'='*60}") print("🎯 选择代理进行测试") print("="*60) proxy_choice = input("\n请选择代理 (0 或 1, 默认为0): ").strip() if proxy_choice not in ['0', '1']: proxy_choice = '0' proxy_idx = int(proxy_choice) await test_optimized_proxy_browser(proxy_idx) print(f"\n{'='*60}") print("✅ 测试完成!") print("="*60) if __name__ == "__main__": # Windows环境下设置事件循环策略 if sys.platform == 'win32': asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) # 运行测试 asyncio.run(main())