This commit is contained in:
sjk
2026-01-23 16:27:47 +08:00
parent 213229953b
commit e8e6d913df
26 changed files with 4294 additions and 431 deletions

View File

@@ -32,7 +32,10 @@ class XHSScheduler:
enable_random_ua: bool = True,
min_publish_interval: int = 30,
max_publish_interval: int = 120,
headless: bool = True):
headless: bool = True,
use_adspower: bool = True,
proxy_username: Optional[str] = None, # 新增:代理用户名
proxy_password: Optional[str] = None): # 新增:代理密码
"""
初始化调度器
@@ -48,6 +51,9 @@ class XHSScheduler:
min_publish_interval: 最小发布间隔(秒)
max_publish_interval: 最大发布间隔(秒)
headless: 是否使用无头模式False为有头模式方便调试
use_adspower: 是否使用AdsPower浏览器管理
proxy_username: 代理用户名(可选,白名单模式可留空)
proxy_password: 代理密码(可选,白名单模式可留空)
"""
self.db_config = db_config
self.max_concurrent = max_concurrent
@@ -58,16 +64,25 @@ class XHSScheduler:
self.max_hourly_articles_per_user = max_hourly_articles_per_user
self.proxy_pool_enabled = proxy_pool_enabled
self.proxy_pool_api_url = proxy_pool_api_url or ""
self.proxy_username = proxy_username or "" # 保存代理用户名
self.proxy_password = proxy_password or "" # 保存代理密码
self.enable_random_ua = enable_random_ua
self.min_publish_interval = min_publish_interval
self.max_publish_interval = max_publish_interval
self.headless = headless
self.use_adspower = use_adspower
self.scheduler = AsyncIOScheduler()
self.login_service = XHSLoginService(use_pool=True, headless=headless)
# 使用AdsPower时禁用浏览器池避免资源冲突
self.login_service = XHSLoginService(
use_pool=False, # 使用AdsPower不需要浏览器池
headless=headless,
use_adspower=use_adspower
)
self.semaphore = asyncio.Semaphore(max_concurrent)
print(f"[调度器] 已创建,最大并发: {max_concurrent}", file=sys.stderr)
mode_text = "AdsPower" if use_adspower else "浏览器池" if not use_adspower else "传统"
print(f"[调度器] 已创建,最大并发: {max_concurrent},浏览器模式: {mode_text}", file=sys.stderr)
def start(self, cron_expr: str = "*/5 * * * * *"):
"""
@@ -122,8 +137,13 @@ class XHSScheduler:
cursorclass=pymysql.cursors.DictCursor
)
async def _fetch_proxy_from_pool(self) -> Optional[str]:
"""从代理池接口获取一个代理地址http://ip:port"""
async def _fetch_proxy_from_pool(self) -> Optional[dict]:
"""从代理池接口获取一个代理地址,并附加认证信息
Returns:
dict: 代理配置字典 {'server': 'http://ip:port', 'username': '...', 'password': '...'}
或 None 如果未启用或获取失败
"""
if not self.proxy_pool_enabled or not self.proxy_pool_api_url:
return None
@@ -145,9 +165,24 @@ class XHSScheduler:
print("[调度器] 代理池首行内容为空", file=sys.stderr)
return None
if line.startswith("http://") or line.startswith("https://"):
return line
return "http://" + line
# 构建代理URL
proxy_server = line if line.startswith(("http://", "https://")) else "http://" + line
# 构建完整的代理配置字典
proxy_config = {
'server': proxy_server
}
# 如果配置了认证信息,添加到配置中
if self.proxy_username and self.proxy_password:
proxy_config['username'] = self.proxy_username
proxy_config['password'] = self.proxy_password
print(f"[调度器] 获取代理成功: {proxy_server} (认证代理, 用户名: {self.proxy_username})", file=sys.stderr)
else:
print(f"[调度器] 获取代理成功: {proxy_server} (白名单模式)", file=sys.stderr)
return proxy_config
except Exception as e:
print(f"[调度器] 请求代理池接口失败: {str(e)}", file=sys.stderr)
return None