commit
This commit is contained in:
@@ -12,13 +12,13 @@ import sys
|
||||
class BrowserPool:
|
||||
"""浏览器池管理器(单例模式)"""
|
||||
|
||||
def __init__(self, idle_timeout: int = 1800, max_instances: int = 5, headless: bool = True):
|
||||
def __init__(self, idle_timeout: int = 1800, max_instances: int = 20, headless: bool = True):
|
||||
"""
|
||||
初始化浏览器池
|
||||
|
||||
Args:
|
||||
idle_timeout: 空闲超时时间(秒),默认30分钟(已禁用,保持常驻)
|
||||
max_instances: 最大浏览器实例数,默认5个
|
||||
max_instances: 最大浏览器实例数,默认20个(支持更多并发)
|
||||
headless: 是否使用无头模式,False为有头模式(方便调试)
|
||||
"""
|
||||
self.playwright = None
|
||||
@@ -37,20 +37,29 @@ class BrowserPool:
|
||||
self.temp_browsers: Dict[str, Dict] = {} # {session_id: {browser, context, page, created_at}}
|
||||
self.temp_lock = asyncio.Lock()
|
||||
|
||||
# 请求队列:当超过max_instances时排队等待
|
||||
self.waiting_queue: asyncio.Queue = asyncio.Queue()
|
||||
self.queue_processing = False
|
||||
|
||||
# 扫码登录专用:页面隔离池(共享浏览器和context,但每个用户独立page)
|
||||
self.qrcode_pages: Dict[str, Dict] = {} # {session_id: {page, created_at}}
|
||||
self.qrcode_lock = asyncio.Lock()
|
||||
|
||||
print(f"[浏览器池] 已创建,常驻模式(不自动清理),最大实例数: {max_instances}", file=sys.stderr)
|
||||
|
||||
async def get_browser(self, cookies: Optional[list] = None, proxy: Optional[str] = None,
|
||||
async def get_browser(self, cookies: Optional[list] = None, proxy: Optional[dict] = None,
|
||||
user_agent: Optional[str] = None, session_id: Optional[str] = None,
|
||||
headless: Optional[bool] = None) -> tuple[Browser, BrowserContext, Page]:
|
||||
headless: Optional[bool] = None, force_new: bool = False) -> tuple[Browser, BrowserContext, Page]:
|
||||
"""
|
||||
获取浏览器实例(复用或新建)
|
||||
|
||||
Args:
|
||||
cookies: 可选的Cookie列表
|
||||
proxy: 可选的代理地址
|
||||
proxy: 可选的代理配置,格式: {"server": "...", "username": "...", "password": "..."}
|
||||
user_agent: 可选的自定义User-Agent
|
||||
session_id: 会话 ID,用于区分不同的并发请求
|
||||
headless: 可选的headless模式,为None时使用默认配置
|
||||
force_new: 是否强制创建全新浏览器(即使session_id已存在)
|
||||
|
||||
Returns:
|
||||
(browser, context, page) 三元组
|
||||
@@ -83,16 +92,37 @@ class BrowserPool:
|
||||
else:
|
||||
async with self.temp_lock:
|
||||
# 首先检查是否已存在该session_id的临时浏览器
|
||||
if session_id in self.temp_browsers:
|
||||
if session_id in self.temp_browsers and not force_new:
|
||||
print(f"[浏览器池] 复用会话 {session_id} 的临时浏览器", file=sys.stderr)
|
||||
browser_info = self.temp_browsers[session_id]
|
||||
return browser_info["browser"], browser_info["context"], browser_info["page"]
|
||||
|
||||
# 强制创建全新浏览器:先释放旧的
|
||||
if force_new and session_id in self.temp_browsers:
|
||||
print(f"[浏览器池] force_new=True,释放旧的会话 {session_id}", file=sys.stderr)
|
||||
old_browser_info = self.temp_browsers[session_id]
|
||||
try:
|
||||
await old_browser_info["page"].close()
|
||||
await old_browser_info["context"].close()
|
||||
await old_browser_info["browser"].close()
|
||||
except Exception as e:
|
||||
print(f"[浏览器池] 释放旧浏览器失败: {str(e)}", file=sys.stderr)
|
||||
finally:
|
||||
del self.temp_browsers[session_id]
|
||||
|
||||
# 检查是否超过最大实例数
|
||||
if len(self.temp_browsers) >= self.max_instances - 1: # -1 留给主浏览器
|
||||
print(f"[浏览器池] ⚠️ 已达最大实例数 ({self.max_instances}),等待释放...", file=sys.stderr)
|
||||
# TODO: 可以实现等待队列,这里直接报错
|
||||
raise Exception(f"浏览器实例数已满,请稍后再试")
|
||||
|
||||
# 等待最多30秒,每秒1秒检查一次
|
||||
for i in range(30):
|
||||
await asyncio.sleep(1)
|
||||
if len(self.temp_browsers) < self.max_instances - 1:
|
||||
print(f"[浏览器池] 检测到空闲实例,继续创建", file=sys.stderr)
|
||||
break
|
||||
else:
|
||||
# 超时30秒仍满,返回错误
|
||||
raise Exception(f"浏览器实例数已满,请稍后再试")
|
||||
|
||||
print(f"[浏览器池] 为会话 {session_id} 创建临时浏览器 ({len(self.temp_browsers)+1}/{self.max_instances-1})", file=sys.stderr)
|
||||
|
||||
@@ -131,9 +161,9 @@ class BrowserPool:
|
||||
await self.close()
|
||||
return False
|
||||
|
||||
async def _init_browser(self, cookies: Optional[list] = None, proxy: Optional[str] = None,
|
||||
async def _init_browser(self, cookies: Optional[list] = None, proxy: Optional[dict] = None,
|
||||
user_agent: Optional[str] = None):
|
||||
"""初始化新浏览器实例"""
|
||||
"""初始化新浏览器实例。proxy为dict格式: {"server": "...", "username": "...", "password": "..."}"""
|
||||
try:
|
||||
# 启动Playwright
|
||||
if not self.playwright:
|
||||
@@ -202,7 +232,7 @@ class BrowserPool:
|
||||
],
|
||||
}
|
||||
if proxy:
|
||||
launch_kwargs["proxy"] = {"server": proxy}
|
||||
launch_kwargs["proxy"] = proxy # proxy已经是dict格式,直接使用
|
||||
|
||||
self.browser = await self.playwright.chromium.launch(**launch_kwargs)
|
||||
print("[浏览器池] Chromium浏览器启动成功", file=sys.stderr)
|
||||
@@ -215,9 +245,9 @@ class BrowserPool:
|
||||
await self.close()
|
||||
raise
|
||||
|
||||
async def _create_new_context(self, cookies: Optional[list] = None, proxy: Optional[str] = None,
|
||||
async def _create_new_context(self, cookies: Optional[list] = None, proxy: Optional[dict] = None,
|
||||
user_agent: Optional[str] = None):
|
||||
"""创建新的浏览器上下文"""
|
||||
"""创建新的浏览器上下文。proxy为dict格式: {"server": "...", "username": "...", "password": "..."}"""
|
||||
try:
|
||||
# 关闭旧上下文
|
||||
if self.context:
|
||||
@@ -231,6 +261,62 @@ class BrowserPool:
|
||||
}
|
||||
self.context = await self.browser.new_context(**context_kwargs)
|
||||
|
||||
# 注入反检测脚本(关键)
|
||||
await self.context.add_init_script("""
|
||||
// 移除webdriver标记
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined
|
||||
});
|
||||
|
||||
// 隐藏chrome自动化特征
|
||||
window.chrome = {
|
||||
runtime: {}
|
||||
};
|
||||
|
||||
// 模拟plugins
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => [
|
||||
{
|
||||
0: {type: "application/x-google-chrome-pdf", suffixes: "pdf", description: "Portable Document Format"},
|
||||
description: "Portable Document Format",
|
||||
filename: "internal-pdf-viewer",
|
||||
length: 1,
|
||||
name: "Chrome PDF Plugin"
|
||||
},
|
||||
{
|
||||
0: {type: "application/pdf", suffixes: "pdf", description: ""},
|
||||
description: "",
|
||||
filename: "mhjfbmdgcfjbbpaeojofohoefgiehjai",
|
||||
length: 1,
|
||||
name: "Chrome PDF Viewer"
|
||||
}
|
||||
],
|
||||
});
|
||||
|
||||
// 模拟permissions API
|
||||
const originalQuery = window.navigator.permissions.query;
|
||||
window.navigator.permissions.query = (parameters) => (
|
||||
parameters.name === 'notifications' ?
|
||||
Promise.resolve({ state: Notification.permission }) :
|
||||
originalQuery(parameters)
|
||||
);
|
||||
|
||||
// 阻止检测自动化的网络请求
|
||||
const originalFetch = window.fetch;
|
||||
window.fetch = function(...args) {
|
||||
const url = args[0];
|
||||
if (typeof url === 'string' && (
|
||||
url.includes('127.0.0.1:9222') ||
|
||||
url.includes('localhost:9222') ||
|
||||
url.includes('chrome-extension://invalid')
|
||||
)) {
|
||||
return Promise.reject(new Error('blocked'));
|
||||
}
|
||||
return originalFetch.apply(this, args);
|
||||
};
|
||||
""")
|
||||
print("[浏览器池] 已注入反检测脚本", file=sys.stderr)
|
||||
|
||||
# 注入Cookie
|
||||
if cookies:
|
||||
await self.context.add_cookies(cookies)
|
||||
@@ -402,13 +488,13 @@ class BrowserPool:
|
||||
except:
|
||||
pass
|
||||
|
||||
async def _create_temp_browser(self, cookies: Optional[list] = None, proxy: Optional[str] = None,
|
||||
async def _create_temp_browser(self, cookies: Optional[list] = None, proxy: Optional[dict] = None,
|
||||
user_agent: Optional[str] = None, headless: bool = True) -> tuple[Browser, BrowserContext, Page]:
|
||||
"""创建临时浏览器实例(用于并发请求)
|
||||
|
||||
Args:
|
||||
cookies: Cookie列表
|
||||
proxy: 代理地址
|
||||
proxy: 代理配置,格式: {"server": "...", "username": "...", "password": "..."}
|
||||
user_agent: 自定义User-Agent
|
||||
headless: 是否使用无头模式
|
||||
"""
|
||||
@@ -425,14 +511,14 @@ class BrowserPool:
|
||||
|
||||
# 启动浏览器(临时实例,性能优先配置)
|
||||
launch_kwargs = {
|
||||
"headless": headless, # 使用传入的headless参数
|
||||
"headless": headless,
|
||||
"args": [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
|
||||
# 性能优化
|
||||
# 性能优化 - 减少资源占用
|
||||
'--disable-web-security',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
'--disable-site-isolation-trials',
|
||||
@@ -442,20 +528,19 @@ class BrowserPool:
|
||||
'--disable-renderer-backgrounding',
|
||||
'--disable-background-networking',
|
||||
|
||||
# 缓存优化
|
||||
'--disk-cache-size=268435456',
|
||||
'--media-cache-size=134217728',
|
||||
# 缓存优化 - 减小缓存以节省内存
|
||||
'--disk-cache-size=67108864', # 64MB(原256MB)
|
||||
'--media-cache-size=33554432', # 32MB(原128MB)
|
||||
|
||||
# 渲染优化
|
||||
'--enable-gpu-rasterization',
|
||||
'--enable-zero-copy',
|
||||
'--ignore-gpu-blocklist',
|
||||
'--enable-accelerated-2d-canvas',
|
||||
# 渲染优化 - 禁用GPU以减少资源占用
|
||||
'--disable-gpu',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--disable-accelerated-video-decode',
|
||||
|
||||
# 网络优化
|
||||
'--enable-quic',
|
||||
'--enable-tcp-fast-open',
|
||||
'--max-connections-per-host=10',
|
||||
'--max-connections-per-host=6', # 减少连接数(原10)
|
||||
|
||||
# 减少不必要的功能
|
||||
'--disable-extensions',
|
||||
@@ -466,6 +551,9 @@ class BrowserPool:
|
||||
'--disable-prompt-on-repost',
|
||||
'--disable-domain-reliability',
|
||||
'--disable-component-update',
|
||||
'--disable-plugins',
|
||||
'--disable-sync',
|
||||
'--disable-translate',
|
||||
|
||||
# 界面优化
|
||||
'--hide-scrollbars',
|
||||
@@ -473,21 +561,82 @@ class BrowserPool:
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check',
|
||||
'--metrics-recording-only',
|
||||
'--force-color-profile=srgb',
|
||||
|
||||
# 内存优化
|
||||
'--js-flags=--max-old-space-size=512', # 限制JS堆内存
|
||||
],
|
||||
}
|
||||
if proxy:
|
||||
launch_kwargs["proxy"] = {"server": proxy}
|
||||
launch_kwargs["proxy"] = proxy # proxy已经是dict格式,直接使用
|
||||
|
||||
browser = await self.playwright.chromium.launch(**launch_kwargs)
|
||||
|
||||
# 创建上下文
|
||||
# 创建上下文(使用隐身模式,确保无痕迹)
|
||||
context_kwargs = {
|
||||
"viewport": {'width': 1280, 'height': 720},
|
||||
"user_agent": user_agent or 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
"no_viewport": False,
|
||||
"ignore_https_errors": True,
|
||||
# 不使用storage_state,确保完全干净
|
||||
}
|
||||
context = await browser.new_context(**context_kwargs)
|
||||
|
||||
# 注入反检测脚本(关键)
|
||||
await context.add_init_script("""
|
||||
// 移除webdriver标记
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined
|
||||
});
|
||||
|
||||
// 隐藏chrome自动化特征
|
||||
window.chrome = {
|
||||
runtime: {}
|
||||
};
|
||||
|
||||
// 模拟plugins
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => [
|
||||
{
|
||||
0: {type: "application/x-google-chrome-pdf", suffixes: "pdf", description: "Portable Document Format"},
|
||||
description: "Portable Document Format",
|
||||
filename: "internal-pdf-viewer",
|
||||
length: 1,
|
||||
name: "Chrome PDF Plugin"
|
||||
},
|
||||
{
|
||||
0: {type: "application/pdf", suffixes: "pdf", description: ""},
|
||||
description: "",
|
||||
filename: "mhjfbmdgcfjbbpaeojofohoefgiehjai",
|
||||
length: 1,
|
||||
name: "Chrome PDF Viewer"
|
||||
}
|
||||
],
|
||||
});
|
||||
|
||||
// 模拟permissions API
|
||||
const originalQuery = window.navigator.permissions.query;
|
||||
window.navigator.permissions.query = (parameters) => (
|
||||
parameters.name === 'notifications' ?
|
||||
Promise.resolve({ state: Notification.permission }) :
|
||||
originalQuery(parameters)
|
||||
);
|
||||
|
||||
// 阻止检测自动化的网络请求
|
||||
const originalFetch = window.fetch;
|
||||
window.fetch = function(...args) {
|
||||
const url = args[0];
|
||||
if (typeof url === 'string' && (
|
||||
url.includes('127.0.0.1:9222') ||
|
||||
url.includes('localhost:9222') ||
|
||||
url.includes('chrome-extension://invalid')
|
||||
)) {
|
||||
return Promise.reject(new Error('blocked'));
|
||||
}
|
||||
return originalFetch.apply(this, args);
|
||||
};
|
||||
""")
|
||||
print("[临时浏览器] 已注入反检测脚本", file=sys.stderr)
|
||||
|
||||
# 注入Cookie
|
||||
if cookies:
|
||||
await context.add_cookies(cookies)
|
||||
@@ -516,6 +665,54 @@ class BrowserPool:
|
||||
finally:
|
||||
del self.temp_browsers[session_id]
|
||||
|
||||
async def get_qrcode_page(self, session_id: str) -> Page:
|
||||
"""
|
||||
为扫码登录获取页面(页面隔离模式)
|
||||
多个用户共享同一个浏览器实例,但每个用户有独立的page
|
||||
这样可以大大减少浏览器崩溃风险
|
||||
|
||||
Args:
|
||||
session_id: 会话 ID
|
||||
|
||||
Returns:
|
||||
Page 对象
|
||||
"""
|
||||
async with self.qrcode_lock:
|
||||
# 复用已有的page
|
||||
if session_id in self.qrcode_pages:
|
||||
print(f"[扫码页面池] 复用会话 {session_id} 的页面", file=sys.stderr)
|
||||
return self.qrcode_pages[session_id]["page"]
|
||||
|
||||
# 确保主浏览器已初始化
|
||||
async with self.init_lock:
|
||||
if not await self._is_browser_alive():
|
||||
print("[扫码页面池] 主浏览器未初始化,创建中...", file=sys.stderr)
|
||||
await self._init_browser()
|
||||
|
||||
# 从主context创建新page
|
||||
print(f"[扫码页面池] 为会话 {session_id} 创建新页面 ({len(self.qrcode_pages)+1} 个活跃页面)", file=sys.stderr)
|
||||
page = await self.context.new_page()
|
||||
|
||||
self.qrcode_pages[session_id] = {
|
||||
"page": page,
|
||||
"created_at": time.time()
|
||||
}
|
||||
|
||||
return page
|
||||
|
||||
async def release_qrcode_page(self, session_id: str):
|
||||
"""释放扫码登录页面"""
|
||||
async with self.qrcode_lock:
|
||||
if session_id in self.qrcode_pages:
|
||||
page_info = self.qrcode_pages[session_id]
|
||||
try:
|
||||
await page_info["page"].close()
|
||||
print(f"[扫码页面池] 已释放会话 {session_id} 的页面", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f"[扫码页面池] 释放页面异常: {str(e)}", file=sys.stderr)
|
||||
finally:
|
||||
del self.qrcode_pages[session_id]
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""获取浏览器池统计信息"""
|
||||
return {
|
||||
@@ -524,6 +721,7 @@ class BrowserPool:
|
||||
"page_alive": self.page is not None,
|
||||
"is_preheated": self.is_preheated,
|
||||
"temp_browsers_count": len(self.temp_browsers),
|
||||
"qrcode_pages_count": len(self.qrcode_pages),
|
||||
"max_instances": self.max_instances,
|
||||
"last_used_time": self.last_used_time,
|
||||
"idle_seconds": int(time.time() - self.last_used_time) if self.last_used_time > 0 else 0,
|
||||
|
||||
Reference in New Issue
Block a user