This commit is contained in:
sjk
2026-01-07 22:55:12 +08:00
parent cb267e8d5e
commit 4720ab2a15
76 changed files with 3110 additions and 7168 deletions

View File

@@ -12,13 +12,13 @@ import sys
class BrowserPool:
"""浏览器池管理器(单例模式)"""
def __init__(self, idle_timeout: int = 1800, max_instances: int = 5, headless: bool = True):
def __init__(self, idle_timeout: int = 1800, max_instances: int = 20, headless: bool = True):
"""
初始化浏览器池
Args:
idle_timeout: 空闲超时时间默认30分钟已禁用保持常驻
max_instances: 最大浏览器实例数,默认5个
max_instances: 最大浏览器实例数,默认20个支持更多并发
headless: 是否使用无头模式False为有头模式方便调试
"""
self.playwright = None
@@ -37,20 +37,29 @@ class BrowserPool:
self.temp_browsers: Dict[str, Dict] = {} # {session_id: {browser, context, page, created_at}}
self.temp_lock = asyncio.Lock()
# 请求队列当超过max_instances时排队等待
self.waiting_queue: asyncio.Queue = asyncio.Queue()
self.queue_processing = False
# 扫码登录专用页面隔离池共享浏览器和context但每个用户独立page
self.qrcode_pages: Dict[str, Dict] = {} # {session_id: {page, created_at}}
self.qrcode_lock = asyncio.Lock()
print(f"[浏览器池] 已创建,常驻模式(不自动清理),最大实例数: {max_instances}", file=sys.stderr)
async def get_browser(self, cookies: Optional[list] = None, proxy: Optional[str] = None,
async def get_browser(self, cookies: Optional[list] = None, proxy: Optional[dict] = None,
user_agent: Optional[str] = None, session_id: Optional[str] = None,
headless: Optional[bool] = None) -> tuple[Browser, BrowserContext, Page]:
headless: Optional[bool] = None, force_new: bool = False) -> tuple[Browser, BrowserContext, Page]:
"""
获取浏览器实例(复用或新建)
Args:
cookies: 可选的Cookie列表
proxy: 可选的代理地址
proxy: 可选的代理配置,格式: {"server": "...", "username": "...", "password": "..."}
user_agent: 可选的自定义User-Agent
session_id: 会话 ID用于区分不同的并发请求
headless: 可选的headless模式为None时使用默认配置
force_new: 是否强制创建全新浏览器即使session_id已存在
Returns:
(browser, context, page) 三元组
@@ -83,16 +92,37 @@ class BrowserPool:
else:
async with self.temp_lock:
# 首先检查是否已存在该session_id的临时浏览器
if session_id in self.temp_browsers:
if session_id in self.temp_browsers and not force_new:
print(f"[浏览器池] 复用会话 {session_id} 的临时浏览器", file=sys.stderr)
browser_info = self.temp_browsers[session_id]
return browser_info["browser"], browser_info["context"], browser_info["page"]
# 强制创建全新浏览器:先释放旧的
if force_new and session_id in self.temp_browsers:
print(f"[浏览器池] force_new=True释放旧的会话 {session_id}", file=sys.stderr)
old_browser_info = self.temp_browsers[session_id]
try:
await old_browser_info["page"].close()
await old_browser_info["context"].close()
await old_browser_info["browser"].close()
except Exception as e:
print(f"[浏览器池] 释放旧浏览器失败: {str(e)}", file=sys.stderr)
finally:
del self.temp_browsers[session_id]
# 检查是否超过最大实例数
if len(self.temp_browsers) >= self.max_instances - 1: # -1 留给主浏览器
print(f"[浏览器池] ⚠️ 已达最大实例数 ({self.max_instances}),等待释放...", file=sys.stderr)
# TODO: 可以实现等待队列,这里直接报错
raise Exception(f"浏览器实例数已满,请稍后再试")
# 等待最多30秒每秒1秒检查一次
for i in range(30):
await asyncio.sleep(1)
if len(self.temp_browsers) < self.max_instances - 1:
print(f"[浏览器池] 检测到空闲实例,继续创建", file=sys.stderr)
break
else:
# 超时30秒仍满返回错误
raise Exception(f"浏览器实例数已满,请稍后再试")
print(f"[浏览器池] 为会话 {session_id} 创建临时浏览器 ({len(self.temp_browsers)+1}/{self.max_instances-1})", file=sys.stderr)
@@ -131,9 +161,9 @@ class BrowserPool:
await self.close()
return False
async def _init_browser(self, cookies: Optional[list] = None, proxy: Optional[str] = None,
async def _init_browser(self, cookies: Optional[list] = None, proxy: Optional[dict] = None,
user_agent: Optional[str] = None):
"""初始化新浏览器实例"""
"""初始化新浏览器实例。proxy为dict格式: {"server": "...", "username": "...", "password": "..."}"""
try:
# 启动Playwright
if not self.playwright:
@@ -202,7 +232,7 @@ class BrowserPool:
],
}
if proxy:
launch_kwargs["proxy"] = {"server": proxy}
launch_kwargs["proxy"] = proxy # proxy已经是dict格式直接使用
self.browser = await self.playwright.chromium.launch(**launch_kwargs)
print("[浏览器池] Chromium浏览器启动成功", file=sys.stderr)
@@ -215,9 +245,9 @@ class BrowserPool:
await self.close()
raise
async def _create_new_context(self, cookies: Optional[list] = None, proxy: Optional[str] = None,
async def _create_new_context(self, cookies: Optional[list] = None, proxy: Optional[dict] = None,
user_agent: Optional[str] = None):
"""创建新的浏览器上下文"""
"""创建新的浏览器上下文。proxy为dict格式: {"server": "...", "username": "...", "password": "..."}"""
try:
# 关闭旧上下文
if self.context:
@@ -231,6 +261,62 @@ class BrowserPool:
}
self.context = await self.browser.new_context(**context_kwargs)
# 注入反检测脚本(关键)
await self.context.add_init_script("""
// 移除webdriver标记
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});
// 隐藏chrome自动化特征
window.chrome = {
runtime: {}
};
// 模拟plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [
{
0: {type: "application/x-google-chrome-pdf", suffixes: "pdf", description: "Portable Document Format"},
description: "Portable Document Format",
filename: "internal-pdf-viewer",
length: 1,
name: "Chrome PDF Plugin"
},
{
0: {type: "application/pdf", suffixes: "pdf", description: ""},
description: "",
filename: "mhjfbmdgcfjbbpaeojofohoefgiehjai",
length: 1,
name: "Chrome PDF Viewer"
}
],
});
// 模拟permissions API
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
// 阻止检测自动化的网络请求
const originalFetch = window.fetch;
window.fetch = function(...args) {
const url = args[0];
if (typeof url === 'string' && (
url.includes('127.0.0.1:9222') ||
url.includes('localhost:9222') ||
url.includes('chrome-extension://invalid')
)) {
return Promise.reject(new Error('blocked'));
}
return originalFetch.apply(this, args);
};
""")
print("[浏览器池] 已注入反检测脚本", file=sys.stderr)
# 注入Cookie
if cookies:
await self.context.add_cookies(cookies)
@@ -402,13 +488,13 @@ class BrowserPool:
except:
pass
async def _create_temp_browser(self, cookies: Optional[list] = None, proxy: Optional[str] = None,
async def _create_temp_browser(self, cookies: Optional[list] = None, proxy: Optional[dict] = None,
user_agent: Optional[str] = None, headless: bool = True) -> tuple[Browser, BrowserContext, Page]:
"""创建临时浏览器实例(用于并发请求)
Args:
cookies: Cookie列表
proxy: 代理地址
proxy: 代理配置,格式: {"server": "...", "username": "...", "password": "..."}
user_agent: 自定义User-Agent
headless: 是否使用无头模式
"""
@@ -425,14 +511,14 @@ class BrowserPool:
# 启动浏览器(临时实例,性能优先配置)
launch_kwargs = {
"headless": headless, # 使用传入的headless参数
"headless": headless,
"args": [
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
# 性能优化
# 性能优化 - 减少资源占用
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
'--disable-site-isolation-trials',
@@ -442,20 +528,19 @@ class BrowserPool:
'--disable-renderer-backgrounding',
'--disable-background-networking',
# 缓存优化
'--disk-cache-size=268435456',
'--media-cache-size=134217728',
# 缓存优化 - 减小缓存以节省内存
'--disk-cache-size=67108864', # 64MB原256MB
'--media-cache-size=33554432', # 32MB原128MB
# 渲染优化
'--enable-gpu-rasterization',
'--enable-zero-copy',
'--ignore-gpu-blocklist',
'--enable-accelerated-2d-canvas',
# 渲染优化 - 禁用GPU以减少资源占用
'--disable-gpu',
'--disable-accelerated-2d-canvas',
'--disable-accelerated-video-decode',
# 网络优化
'--enable-quic',
'--enable-tcp-fast-open',
'--max-connections-per-host=10',
'--max-connections-per-host=6', # 减少连接数原10
# 减少不必要的功能
'--disable-extensions',
@@ -466,6 +551,9 @@ class BrowserPool:
'--disable-prompt-on-repost',
'--disable-domain-reliability',
'--disable-component-update',
'--disable-plugins',
'--disable-sync',
'--disable-translate',
# 界面优化
'--hide-scrollbars',
@@ -473,21 +561,82 @@ class BrowserPool:
'--no-first-run',
'--no-default-browser-check',
'--metrics-recording-only',
'--force-color-profile=srgb',
# 内存优化
'--js-flags=--max-old-space-size=512', # 限制JS堆内存
],
}
if proxy:
launch_kwargs["proxy"] = {"server": proxy}
launch_kwargs["proxy"] = proxy # proxy已经是dict格式直接使用
browser = await self.playwright.chromium.launch(**launch_kwargs)
# 创建上下文
# 创建上下文(使用隐身模式,确保无痕迹)
context_kwargs = {
"viewport": {'width': 1280, 'height': 720},
"user_agent": user_agent or 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
"no_viewport": False,
"ignore_https_errors": True,
# 不使用storage_state确保完全干净
}
context = await browser.new_context(**context_kwargs)
# 注入反检测脚本(关键)
await context.add_init_script("""
// 移除webdriver标记
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});
// 隐藏chrome自动化特征
window.chrome = {
runtime: {}
};
// 模拟plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [
{
0: {type: "application/x-google-chrome-pdf", suffixes: "pdf", description: "Portable Document Format"},
description: "Portable Document Format",
filename: "internal-pdf-viewer",
length: 1,
name: "Chrome PDF Plugin"
},
{
0: {type: "application/pdf", suffixes: "pdf", description: ""},
description: "",
filename: "mhjfbmdgcfjbbpaeojofohoefgiehjai",
length: 1,
name: "Chrome PDF Viewer"
}
],
});
// 模拟permissions API
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
// 阻止检测自动化的网络请求
const originalFetch = window.fetch;
window.fetch = function(...args) {
const url = args[0];
if (typeof url === 'string' && (
url.includes('127.0.0.1:9222') ||
url.includes('localhost:9222') ||
url.includes('chrome-extension://invalid')
)) {
return Promise.reject(new Error('blocked'));
}
return originalFetch.apply(this, args);
};
""")
print("[临时浏览器] 已注入反检测脚本", file=sys.stderr)
# 注入Cookie
if cookies:
await context.add_cookies(cookies)
@@ -516,6 +665,54 @@ class BrowserPool:
finally:
del self.temp_browsers[session_id]
async def get_qrcode_page(self, session_id: str) -> Page:
"""
为扫码登录获取页面(页面隔离模式)
多个用户共享同一个浏览器实例但每个用户有独立的page
这样可以大大减少浏览器崩溃风险
Args:
session_id: 会话 ID
Returns:
Page 对象
"""
async with self.qrcode_lock:
# 复用已有的page
if session_id in self.qrcode_pages:
print(f"[扫码页面池] 复用会话 {session_id} 的页面", file=sys.stderr)
return self.qrcode_pages[session_id]["page"]
# 确保主浏览器已初始化
async with self.init_lock:
if not await self._is_browser_alive():
print("[扫码页面池] 主浏览器未初始化,创建中...", file=sys.stderr)
await self._init_browser()
# 从主context创建新page
print(f"[扫码页面池] 为会话 {session_id} 创建新页面 ({len(self.qrcode_pages)+1} 个活跃页面)", file=sys.stderr)
page = await self.context.new_page()
self.qrcode_pages[session_id] = {
"page": page,
"created_at": time.time()
}
return page
async def release_qrcode_page(self, session_id: str):
"""释放扫码登录页面"""
async with self.qrcode_lock:
if session_id in self.qrcode_pages:
page_info = self.qrcode_pages[session_id]
try:
await page_info["page"].close()
print(f"[扫码页面池] 已释放会话 {session_id} 的页面", file=sys.stderr)
except Exception as e:
print(f"[扫码页面池] 释放页面异常: {str(e)}", file=sys.stderr)
finally:
del self.qrcode_pages[session_id]
def get_stats(self) -> Dict[str, Any]:
"""获取浏览器池统计信息"""
return {
@@ -524,6 +721,7 @@ class BrowserPool:
"page_alive": self.page is not None,
"is_preheated": self.is_preheated,
"temp_browsers_count": len(self.temp_browsers),
"qrcode_pages_count": len(self.qrcode_pages),
"max_instances": self.max_instances,
"last_used_time": self.last_used_time,
"idle_seconds": int(time.time() - self.last_used_time) if self.last_used_time > 0 else 0,