Files
ai_wht_wechat/backend/test_optimized_browser.py
2026-01-06 19:36:42 +08:00

246 lines
9.8 KiB
Python

"""
优化的代理浏览器配置
解决小红书对代理IP的限制问题
"""
import asyncio
from playwright.async_api import async_playwright
import sys
async def test_optimized_proxy_browser(proxy_index: int = 0):
"""测试优化的代理浏览器配置"""
print(f"\n{'='*60}")
print(f"🚀 测试优化的代理浏览器配置")
print(f"{'='*60}")
# 从代理配置获取代理信息
from damai_proxy_config import get_proxy_config
proxy_config = get_proxy_config(proxy_index)
proxy_server = proxy_config['server'].replace('http://', '')
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_server}"
print(f"✅ 使用代理: 代理{proxy_index + 1}")
print(f" 代理服务器: {proxy_config['server']}")
try:
async with async_playwright() as p:
# 配置代理
proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@')
if len(proxy_parts) == 2:
auth_part = proxy_parts[0]
server_part = proxy_parts[1]
username, password = auth_part.split(':')
proxy_config_obj = {
"server": f"http://{server_part}",
"username": username,
"password": password
}
else:
proxy_config_obj = {"server": proxy_url}
print(f" 配置的代理对象: {proxy_config_obj}")
# 启动浏览器 - 使用优化参数
browser = await p.chromium.launch(
headless=False, # 使用有头模式,便于观察
proxy=proxy_config_obj,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
'--disable-background-timer-throttling',
'--disable-renderer-backgrounding',
'--disable-background-networking',
'--enable-features=NetworkService,NetworkServiceInProcess',
'--disable-ipc-flooding-protection',
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
'--disable-site-isolation-trials',
'--disable-extensions',
'--disable-breakpad',
'--disable-component-extensions-with-background-pages',
'--disable-hang-monitor',
'--disable-prompt-on-repost',
'--disable-domain-reliability',
'--disable-component-update',
'--hide-scrollbars',
'--mute-audio',
'--no-first-run',
'--no-default-browser-check',
'--metrics-recording-only',
'--force-color-profile=srgb',
'--disable-default-apps',
'--disable-features=TranslateUI',
'--disable-features=Translate',
'--disable-features=OptimizationHints',
'--disable-features=InterestCohortAPI',
'--disable-features=BlinkGenPropertyTrees',
'--disable-features=ImprovedCookieControls',
'--disable-features=SameSiteDefaultChecksMethodRigorously',
'--disable-features=CookieSameSiteByDefaultWhenReportingEnabled',
'--disable-features=AutofillServerCommunication',
'--disable-features=AutofillUseOptimizedLocalStorage',
'--disable-features=CalculateNativeWinOcclusion',
'--disable-features=VizDisplayCompositor',
'--disable-features=VizHitTestQuery',
]
)
# 创建上下文 - 设置浏览器指纹混淆
context = await browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport={'width': 1280, 'height': 720},
# 隐瞒自动化特征
bypass_csp=True,
java_script_enabled=True,
)
# 创建页面
page = await context.new_page()
# 隐瞒自动化特征
await page.add_init_script("""
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});
Object.defineProperty(navigator, 'languages', {
get: () => ['zh-CN', 'zh', 'en'],
});
// 隐瞒代理检测
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array;
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise;
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol;
""")
print(f"\n🌐 访问百度测试代理连接...")
try:
await page.goto('https://www.baidu.com', wait_until='domcontentloaded', timeout=15000)
await asyncio.sleep(2)
title = await page.title()
url = page.url
print(f" ✅ 百度访问成功")
print(f" 标题: {title}")
print(f" URL: {url}")
except Exception as e:
print(f" ❌ 百度访问失败: {str(e)}")
print(f"\n🌐 访问小红书创作者平台...")
try:
await page.goto('https://creator.xiaohongshu.com/login', wait_until='domcontentloaded', timeout=30000)
await asyncio.sleep(3) # 等待更长时间
title = await page.title()
url = page.url
content = await page.content()
content_len = len(content)
print(f" 访问结果:")
print(f" 标题: {title}")
print(f" URL: {url}")
print(f" 内容长度: {content_len} 字符")
if content_len == 0:
print(f" ⚠️ 页面内容为空")
elif "验证" in content or "captcha" in content.lower() or "安全" in content:
print(f" ⚠️ 检测到验证或安全提示")
else:
print(f" ✅ 页面加载成功")
# 查找手机号输入框
print(f"\n🔍 查找手机号输入框...")
try:
phone_input = await page.wait_for_selector('input[placeholder="手机号"]', timeout=5000)
if phone_input:
print(f" ✅ 找到手机号输入框")
else:
print(f" ❌ 未找到手机号输入框")
except:
print(f" ❌ 未找到手机号输入框")
# 查找所有input元素
inputs = await page.query_selector_all('input')
print(f" 找到 {len(inputs)} 个input元素")
# 查找发送验证码按钮
print(f"\n🔍 查找发送验证码按钮...")
try:
code_button = await page.wait_for_selector('text="发送验证码"', timeout=5000)
if code_button:
print(f" ✅ 找到发送验证码按钮")
else:
print(f" ❌ 未找到发送验证码按钮")
except:
print(f" ❌ 未找到发送验证码按钮")
except Exception as e:
print(f" ❌ 小红书访问失败: {str(e)}")
print(f"\n⏸️ 浏览器保持打开状态,您可以观察页面")
print(f" 按 Enter 键关闭浏览器...")
input()
await browser.close()
print(f"✅ 浏览器已关闭")
except Exception as e:
print(f"❌ 测试过程异常: {str(e)}")
import traceback
traceback.print_exc()
def explain_optimizations():
"""解释优化措施"""
print("="*60)
print("🔧 优化措施说明")
print("="*60)
print("\n1. 浏览器启动参数优化:")
print(" • 添加更多反检测参数")
print(" • 禁用可能导致检测的功能")
print("\n2. 浏览器指纹混淆:")
print(" • 隐瞒webdriver特征")
print(" • 伪造插件列表")
print(" • 设置真实语言")
print("\n3. 页面加载策略:")
print(" • 使用domcontentloaded而非networkidle")
print(" • 增加超时时间")
async def main():
"""主函数"""
explain_optimizations()
print(f"\n{'='*60}")
print("🎯 选择代理进行测试")
print("="*60)
proxy_choice = input("\n请选择代理 (0 或 1, 默认为0): ").strip()
if proxy_choice not in ['0', '1']:
proxy_choice = '0'
proxy_idx = int(proxy_choice)
await test_optimized_proxy_browser(proxy_idx)
print(f"\n{'='*60}")
print("✅ 测试完成!")
print("="*60)
if __name__ == "__main__":
# Windows环境下设置事件循环策略
if sys.platform == 'win32':
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
# 运行测试
asyncio.run(main())