Files
ai_mip/fingerprint_browser.py
2026-01-13 18:59:26 +08:00

650 lines
24 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
指纹浏览器管理模块
支持 AdsPower 指纹浏览器 + Playwright CDP 连接
用于绕过小红书风控检测
"""
import requests
import time
import random
import logging
import asyncio
import os
from typing import Dict, Any, Optional, Tuple
from playwright.async_api import async_playwright, Browser, Page, BrowserContext
# 创建不使用代理的 Session用于本地 AdsPower API
def get_no_proxy_session():
"""获取不使用代理的 requests Session"""
session = requests.Session()
session.trust_env = False # 禁用环境变量中的代理
return session
# 全局无代理 Session
_no_proxy_session = None
def get_local_session():
"""获取本地API调用专用Session无代理"""
global _no_proxy_session
if _no_proxy_session is None:
_no_proxy_session = get_no_proxy_session()
return _no_proxy_session
from log_config import setup_logger
from damai_proxy_config import get_proxy_ip
# 初始化日志系统 - 统一使用xhs_server日志
logger = setup_logger(
name='xhs_server',
log_file='logs/xhs_server.log',
error_log_file='logs/xhs_server_error.log',
level=logging.INFO,
backup_count=30,
error_backup_count=90,
console_output=True
)
# AdsPower 本地API配置
ADSPOWER_CONFIG = {
'api_base': 'http://127.0.0.1:50325', # AdsPower 本地API地址
'enabled': True, # 是否启用指纹浏览器
'default_group_id': '0', # 默认分组ID
'api_key': 'e5afd5a4cead5589247febbeabc39bcb', # AdsPower API Key
}
class FingerprintBrowserManager:
"""
指纹浏览器管理器
支持 AdsPower 指纹浏览器的启动、连接和管理
"""
def __init__(self):
self.api_base = ADSPOWER_CONFIG['api_base']
self.api_key = ADSPOWER_CONFIG.get('api_key', '')
self.enabled = ADSPOWER_CONFIG['enabled']
self.current_browser = None
self.current_context = None
self.current_page = None
self.current_profile_id = None
self.playwright = None
def _get_headers(self):
"""获取API请求头使用Bearer Token认证"""
headers = {'Content-Type': 'application/json'}
if self.api_key:
headers['Authorization'] = f'Bearer {self.api_key}'
return headers
def _add_api_key(self, params: dict) -> dict:
"""添加API Key到请求参数备用方法"""
# 现在主要使用 Authorization header这个方法作为备用
return params
async def check_adspower_status(self) -> bool:
"""
检查 AdsPower 是否运行中
Returns:
bool: AdsPower 是否可用
"""
try:
session = get_local_session()
response = session.get(f"{self.api_base}/status", timeout=5)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
logger.info("[指纹浏览器] AdsPower 状态正常")
return True
logger.warning(f"[指纹浏览器] AdsPower 状态异常: {response.text}")
return False
except requests.exceptions.ConnectionError:
logger.warning("[指纹浏览器] AdsPower 未运行,请先启动 AdsPower")
return False
except Exception as e:
logger.error(f"[指纹浏览器] 检查 AdsPower 状态失败: {e}")
return False
async def get_browser_profiles(self) -> list:
"""
获取所有浏览器配置文件列表
Returns:
list: 配置文件列表
"""
try:
session = get_local_session()
response = session.get(
f"{self.api_base}/api/v1/user/list",
params={'page_size': 100},
headers=self._get_headers(),
timeout=10
)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
profiles = data.get('data', {}).get('list', [])
logger.info(f"[指纹浏览器] 获取到 {len(profiles)} 个浏览器配置")
return profiles
logger.warning(f"[指纹浏览器] 获取配置列表失败: {response.text}")
return []
except Exception as e:
logger.error(f"[指纹浏览器] 获取配置列表异常: {e}")
return []
async def query_profile_proxy(self, profile_id: str) -> dict:
"""
查询指定配置的代理信息
Args:
profile_id: 配置文件ID
Returns:
dict: 代理配置信息
"""
try:
session = get_local_session()
response = session.get(
f"{self.api_base}/api/v1/user/list",
params={'user_id': profile_id},
headers=self._get_headers(),
timeout=10
)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
profiles = data.get('data', {}).get('list', [])
if profiles:
profile = profiles[0]
proxy_config = profile.get('user_proxy_config', {})
logger.info(f"[指纹浏览器] 查询到配置 {profile_id} 的代理: {proxy_config}")
return proxy_config
return {}
except Exception as e:
logger.error(f"[指纹浏览器] 查询配置代理异常: {e}")
return {}
async def create_browser_profile(self, name: str = None, proxy_config: dict = None) -> Optional[str]:
"""
创建新的浏览器配置文件
Args:
name: 配置文件名称
proxy_config: 代理配置 {'server': 'http://ip:port', 'username': '...', 'password': '...'}
Returns:
str: 配置文件ID失败返回None
"""
try:
if not name:
name = f"xhs_profile_{int(time.time())}"
# 构建创建参数
create_params = {
'name': name,
'group_id': ADSPOWER_CONFIG['default_group_id'],
'fingerprint_config': {
'automatic_timezone': '1',
'language': ['zh-CN', 'zh'],
'ua': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
}
# 如果有代理配置,添加代理
if proxy_config:
# 解析代理服务器地址
server = proxy_config.get('server', '')
if server.startswith('http://'):
server = server[7:]
parts = server.split(':')
if len(parts) == 2:
create_params['user_proxy_config'] = {
'proxy_soft': 'other',
'proxy_type': 'http',
'proxy_host': parts[0],
'proxy_port': parts[1],
'proxy_user': proxy_config.get('username', ''),
'proxy_password': proxy_config.get('password', '')
}
logger.info(f"[指纹浏览器] 配置代理: {parts[0]}:{parts[1]}")
session = get_local_session()
response = session.post(
f"{self.api_base}/api/v1/user/create",
json=create_params,
headers=self._get_headers(),
timeout=30
)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
profile_id = data.get('data', {}).get('id')
logger.info(f"[指纹浏览器] 创建配置成功: {profile_id}")
return profile_id
logger.warning(f"[指纹浏览器] 创建配置失败: {response.text}")
return None
except Exception as e:
logger.error(f"[指纹浏览器] 创建配置异常: {e}")
return None
async def update_browser_proxy(self, profile_id: str, proxy_config: dict) -> bool:
"""
更新指定配置的代理IP使用AdsPower API动态更新
Args:
profile_id: 配置文件ID
proxy_config: 代理配置 {'server': 'http://ip:port', 'username': '...', 'password': '...'}
Returns:
bool: 是否更新成功
"""
try:
if not proxy_config:
logger.warning("[指纹浏览器] 没有代理配置,跳过更新")
return False
# 解析代理服务器地址
server = proxy_config.get('server', '')
if server.startswith('http://'):
server = server[7:]
elif server.startswith('https://'):
server = server[8:]
parts = server.split(':')
if len(parts) != 2:
logger.warning(f"[指纹浏览器] 代理地址格式错误: {server}")
return False
proxy_host = parts[0]
proxy_port = int(parts[1]) # 端口必须是整数
logger.info(f"[指纹浏览器] 更新代理配置: {proxy_host}:{proxy_port}")
session = get_local_session()
# 注意:不再清除旧代理配置,直接覆盖更新
# 因为清除后再设置可能导致配置不一致
# 第二步:设置新的代理配置
# 检查是否有认证信息(白名单模式不需要认证)
proxy_user = proxy_config.get('username', '')
proxy_password = proxy_config.get('password', '')
user_proxy_config = {
'proxy_soft': 'other',
'proxy_type': 'http',
'proxy_host': proxy_host,
'proxy_port': proxy_port
}
# 只有在有认证信息时才添加用户名密码
if proxy_user and proxy_password:
user_proxy_config['proxy_user'] = proxy_user
user_proxy_config['proxy_password'] = proxy_password
logger.info(f"[指纹浏览器] 使用认证代理: {proxy_host}:{proxy_port}")
else:
logger.info(f"[指纹浏览器] 使用白名单代理(无认证): {proxy_host}:{proxy_port}")
update_params = {
'user_id': profile_id,
'user_proxy_config': user_proxy_config
}
# 打印完整的请求参数用于调试
import json as json_module
logger.info(f"[指纹浏览器] 发送更新请求: {json_module.dumps(update_params, ensure_ascii=False)}")
response = session.post(
f"{self.api_base}/api/v1/user/update",
json=update_params,
headers=self._get_headers(),
timeout=30
)
# 打印完整的响应用于调试
logger.info(f"[指纹浏览器] API响应: {response.text}")
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
logger.info(f"[指纹浏览器] 代理配置API返回成功: {profile_id}")
# 验证代理是否真正写入
await asyncio.sleep(0.5) # 等待配置生效
verify_config = await self.query_profile_proxy(profile_id)
actual_host = verify_config.get('proxy_host', '')
actual_port = verify_config.get('proxy_port', '')
if actual_host == proxy_host and str(actual_port) == str(proxy_port):
logger.info(f"[指纹浏览器] ✅ 代理配置验证通过: {actual_host}:{actual_port}")
return True
else:
logger.warning(f"[指纹浏览器] ❌ 代理配置验证失败! 期望: {proxy_host}:{proxy_port}, 实际: {actual_host}:{actual_port}")
logger.warning(f"[指纹浏览器] 完整配置: {verify_config}")
return False
else:
logger.warning(f"[指纹浏览器] 更新代理失败: {data.get('msg', '未知错误')}")
return False
logger.warning(f"[指纹浏览器] 更新代理请求失败: {response.text}")
return False
except Exception as e:
logger.error(f"[指纹浏览器] 更新代理异常: {e}")
return False
async def start_browser(self, profile_id: str, proxy_config: dict = None) -> Optional[str]:
"""
启动指定配置的浏览器,返回 CDP 调试地址
Args:
profile_id: 配置文件ID
proxy_config: 可选的代理配置,在启动前更新
Returns:
str: CDP WebSocket URL失败返回None
"""
try:
# 先停止可能正在运行的旧浏览器,避免状态混乱
logger.info(f"[指纹浏览器] 先停止可能存在的旧浏览器: {profile_id}")
await self.stop_browser(profile_id)
await asyncio.sleep(1) # 等待浏览器完全关闭
# 如果有代理配置在启动前更新代理关键必须在stop之后、start之前
if proxy_config:
logger.info(f"[指纹浏览器] 启动前更新代理配置...")
await self.update_browser_proxy(profile_id, proxy_config)
await asyncio.sleep(0.5) # 等待配置生效
logger.info(f"[指纹浏览器] 正在启动浏览器: {profile_id}")
session = get_local_session()
response = session.get(
f"{self.api_base}/api/v1/browser/start",
params={'user_id': profile_id},
headers=self._get_headers(),
timeout=60
)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
ws_url = data.get('data', {}).get('ws', {}).get('puppeteer')
if ws_url:
logger.info(f"[指纹浏览器] 浏览器启动成功CDP地址: {ws_url}")
self.current_profile_id = profile_id
return ws_url
logger.warning(f"[指纹浏览器] 启动浏览器失败: {response.text}")
return None
except Exception as e:
logger.error(f"[指纹浏览器] 启动浏览器异常: {e}")
return None
async def stop_browser(self, profile_id: str = None) -> bool:
"""
停止指定配置的浏览器
Args:
profile_id: 配置文件ID不传则使用当前配置
Returns:
bool: 是否成功
"""
try:
pid = profile_id or self.current_profile_id
if not pid:
logger.warning("[指纹浏览器] 没有需要停止的浏览器")
return False
session = get_local_session()
response = session.get(
f"{self.api_base}/api/v1/browser/stop",
params={'user_id': pid},
headers=self._get_headers(),
timeout=10
)
if response.status_code == 200:
data = response.json()
if data.get('code') == 0:
logger.info(f"[指纹浏览器] 浏览器已停止: {pid}")
if pid == self.current_profile_id:
self.current_profile_id = None
return True
logger.warning(f"[指纹浏览器] 停止浏览器失败: {response.text}")
return False
except Exception as e:
logger.error(f"[指纹浏览器] 停止浏览器异常: {e}")
return False
async def connect_browser(self, cdp_url: str) -> Tuple[Optional[Browser], Optional[BrowserContext], Optional[Page]]:
"""
通过 CDP 连接到指纹浏览器
Args:
cdp_url: CDP WebSocket URL
Returns:
Tuple[Browser, BrowserContext, Page]: 浏览器、上下文、页面对象
"""
try:
logger.info(f"[指纹浏览器] 正在连接 CDP: {cdp_url}")
self.playwright = await async_playwright().start()
# 连接到指纹浏览器
browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
# 获取上下文指纹浏览器通常只有一个context
contexts = browser.contexts
if contexts:
context = contexts[0]
else:
context = await browser.new_context()
# 获取或创建页面
pages = context.pages
if pages:
page = pages[0]
else:
page = await context.new_page()
self.current_browser = browser
self.current_context = context
self.current_page = page
logger.info("[指纹浏览器] CDP 连接成功")
return browser, context, page
except Exception as e:
logger.error(f"[指纹浏览器] CDP 连接失败: {e}")
return None, None, None
async def disconnect(self):
"""断开浏览器连接(不关闭浏览器)"""
try:
if self.current_browser:
# 注意connect_over_cdp 模式下不要 close只 disconnect
await self.current_browser.close()
self.current_browser = None
self.current_context = None
self.current_page = None
logger.info("[指纹浏览器] 已断开连接")
if self.playwright:
await self.playwright.stop()
self.playwright = None
except Exception as e:
logger.error(f"[指纹浏览器] 断开连接异常: {e}")
async def get_all_profiles(self) -> list:
"""
获取所有可用的浏览器配置文件,随机排序
Returns:
list: 配置文件ID列表
"""
profiles = await self.get_browser_profiles()
profile_ids = []
for profile in profiles:
user_id = profile.get('user_id', '')
name = profile.get('name', '')
if user_id:
profile_ids.append({'id': user_id, 'name': name})
# 随机打乱顺序,支持多配置轮换使用
random.shuffle(profile_ids)
logger.info(f"[指纹浏览器] 获取到 {len(profile_ids)} 个配置,已随机排序")
return profile_ids
async def get_or_create_profile(self, proxy_config: dict = None) -> Optional[str]:
"""
获取或创建浏览器配置文件
优先使用已有的配置,没有则创建新的
Args:
proxy_config: 代理配置
Returns:
str: 配置文件ID
"""
# 获取所有配置(已随机排序)
profiles = await self.get_all_profiles()
if profiles:
# 返回第一个(随机选择的)
profile = profiles[0]
logger.info(f"[指纹浏览器] 随机选择配置: {profile['id']} ({profile['name']})")
return profile['id']
# 没有可用配置,创建新的
logger.info("[指纹浏览器] 没有可用配置,创建新配置...")
return await self.create_browser_profile(proxy_config=None)
async def human_type(page: Page, selector: str, text: str, clear_first: bool = True):
"""
模拟人类打字速度输入文本
Args:
page: Playwright Page 对象
selector: 输入框选择器
text: 要输入的文本
clear_first: 是否先清空输入框
"""
try:
# 聚焦输入框
await page.focus(selector)
# 先清空
if clear_first:
await page.fill(selector, '')
await asyncio.sleep(random.uniform(0.1, 0.3))
# 模拟人类打字
for char in text:
await page.keyboard.type(char)
# 随机延迟 50ms - 150ms
await asyncio.sleep(random.uniform(0.05, 0.15))
logger.info(f"[人类输入] 已输入 {len(text)} 个字符")
except Exception as e:
logger.error(f"[人类输入] 输入失败: {e}")
raise
async def human_click(page: Page, selector: str, wait_after: float = 0.5):
"""
模拟人类点击行为
Args:
page: Playwright Page 对象
selector: 元素选择器
wait_after: 点击后等待时间
"""
try:
# 先移动到元素位置
element = await page.query_selector(selector)
if element:
box = await element.bounding_box()
if box:
# 在元素范围内随机一个点击位置
x = box['x'] + random.uniform(box['width'] * 0.3, box['width'] * 0.7)
y = box['y'] + random.uniform(box['height'] * 0.3, box['height'] * 0.7)
# 移动鼠标
await page.mouse.move(x, y)
await asyncio.sleep(random.uniform(0.1, 0.3))
# 点击
await page.mouse.click(x, y)
logger.info(f"[人类点击] 点击位置: ({x:.0f}, {y:.0f})")
else:
await page.click(selector)
else:
await page.click(selector)
await asyncio.sleep(wait_after)
except Exception as e:
logger.error(f"[人类点击] 点击失败: {e}")
raise
# 全局单例
_fingerprint_manager = None
def get_fingerprint_manager() -> FingerprintBrowserManager:
"""获取指纹浏览器管理器单例"""
global _fingerprint_manager
if _fingerprint_manager is None:
_fingerprint_manager = FingerprintBrowserManager()
return _fingerprint_manager
if __name__ == "__main__":
# 测试代码
async def test():
manager = get_fingerprint_manager()
# 检查 AdsPower 状态
if await manager.check_adspower_status():
print("AdsPower 运行正常")
# 获取代理IP
proxy = get_proxy_ip()
print(f"代理IP: {proxy}")
# 获取或创建配置
profile_id = await manager.get_or_create_profile(proxy_config=proxy)
if profile_id:
print(f"配置ID: {profile_id}")
# 启动浏览器
cdp_url = await manager.start_browser(profile_id)
if cdp_url:
print(f"CDP URL: {cdp_url}")
# 连接浏览器
browser, context, page = await manager.connect_browser(cdp_url)
if page:
# 访问测试页面
await page.goto("https://httpbin.org/ip")
content = await page.content()
print(f"页面内容: {content[:200]}")
# 断开连接
await manager.disconnect()
# 停止浏览器
await manager.stop_browser(profile_id)
else:
print("AdsPower 未运行")
asyncio.run(test())