Files
ai_wht_wechat/backend/main.py
2026-01-09 23:27:52 +08:00

1567 lines
63 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Windows兼容性必须在任何异步操作之前设置事件循环策略
import sys
import asyncio
import aiohttp
import json
if sys.platform == 'win32':
# Windows下使用ProactorEventLoopPolicy来支持Playwright的子进程
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
print("[系统] Windows环境已设置ProactorEventLoopPolicy", file=sys.stderr)
# 加载配置
from config import init_config, get_config
from dotenv import load_dotenv
load_dotenv() # 从 .env 文件加载环境变量(可选,用于覆盖配置文件)
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, Dict, Any, List
from datetime import datetime
import os
import shutil
from pathlib import Path
from xhs_login import XHSLoginService
from browser_pool import get_browser_pool
from scheduler import XHSScheduler
from error_screenshot import cleanup_old_screenshots
from ali_sms_service import AliSmsService
app = FastAPI(title="小红书登录API")
# CORS配置
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # 生产环境应该限制具体域名
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 全局登录服务实例延迟初始化避免在startup前创建浏览器池
login_service = None
# 全局浏览器池实例在startup时初始化
browser_pool = None
# 全局调度器实例
scheduler = None
# 全局阿里云短信服务实例
sms_service = None
# WebSocket连接管理器
class ConnectionManager:
def __init__(self):
# session_id -> WebSocket连接
self.active_connections: Dict[str, WebSocket] = {}
# session_id -> 消息队列(用于缓存连接建立前的消息)
self.pending_messages: Dict[str, list] = {}
async def connect(self, session_id: str, websocket: WebSocket):
await websocket.accept()
self.active_connections[session_id] = websocket
print(f"[WebSocket] 新连接: {session_id}", file=sys.stderr)
print(f"[WebSocket] 当前活跃连接数: {len(self.active_connections)}", file=sys.stderr)
# 立即检查缓存消息(不等待)
if session_id in self.pending_messages:
pending_count = len(self.pending_messages[session_id])
print(f"[WebSocket] 发现缓存消息: {pending_count}", file=sys.stderr)
print(f"[WebSocket] 缓存消息内容: {self.pending_messages[session_id]}", file=sys.stderr)
# 等待100ms让前端监听器就绪
await asyncio.sleep(0.1)
for idx, message in enumerate(self.pending_messages[session_id]):
try:
print(f"[WebSocket] 准备发送第{idx+1}条消息...", file=sys.stderr)
await websocket.send_json(message)
print(f"[WebSocket] 已发送缓存消息 [{idx+1}/{pending_count}]: {message.get('type')}", file=sys.stderr)
# 每条消息间隔100ms
await asyncio.sleep(0.1)
except Exception as e:
print(f"[WebSocket] 发送缓存消息失败: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
del self.pending_messages[session_id]
print(f"[WebSocket] 缓存消息已清空: {session_id}", file=sys.stderr)
else:
print(f"[WebSocket] 没有缓存消息: {session_id}", file=sys.stderr)
def disconnect(self, session_id: str):
if session_id in self.active_connections:
del self.active_connections[session_id]
print(f"[WebSocket] 断开连接: {session_id}", file=sys.stderr)
# 清理缓存消息
if session_id in self.pending_messages:
del self.pending_messages[session_id]
async def send_message(self, session_id: str, message: dict):
if session_id in self.active_connections:
try:
await self.active_connections[session_id].send_json(message)
print(f"[WebSocket] 发送消息到 {session_id}: {message.get('type')}", file=sys.stderr)
except Exception as e:
print(f"[WebSocket] 发送消息失败 {session_id}: {str(e)}", file=sys.stderr)
self.disconnect(session_id)
else:
# WebSocket还未连接缓存消息
print(f"[WebSocket] 连接尚未建立,缓存消息: {session_id}", file=sys.stderr)
if session_id not in self.pending_messages:
self.pending_messages[session_id] = []
self.pending_messages[session_id].append(message)
# 最多缓存10条消息
if len(self.pending_messages[session_id]) > 10:
self.pending_messages[session_id].pop(0)
# 全局WebSocket管理器
ws_manager = ConnectionManager()
async def fetch_proxy_from_pool() -> Optional[str]:
"""从代理池接口获取一个代理地址http://ip:port获取失败返回None"""
config = get_config()
if not config.get_bool('proxy_pool.enabled', False):
return None
api_url = config.get_str('proxy_pool.api_url', '')
if not api_url:
return None
try:
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(api_url) as resp:
if resp.status != 200:
print(f"[代理池] 接口返回非200状态码: {resp.status}", file=sys.stderr)
return None
text = (await resp.text()).strip()
if not text:
print("[代理池] 返回内容为空", file=sys.stderr)
return None
line = text.splitlines()[0].strip()
if not line:
print("[代理池] 首行内容为空", file=sys.stderr)
return None
if line.startswith("http://") or line.startswith("https://"):
return line
return "http://" + line
except Exception as e:
print(f"[代理池] 请求失败: {str(e)}", file=sys.stderr)
return None
# 临时文件存储目录
TEMP_DIR = Path("temp_uploads")
TEMP_DIR.mkdir(exist_ok=True)
# 请求模型
class SendCodeRequest(BaseModel):
phone: str
country_code: str = "+86"
login_page: Optional[str] = None # 登录页面creator 或 home为None时使用配置文件默认值
session_id: Optional[str] = None # 可选前端生成的session_id用于WebSocket通知
class VerifyCodeRequest(BaseModel):
phone: str
code: str
country_code: str = "+86"
class LoginRequest(BaseModel):
phone: str
code: str
country_code: str = "+86"
login_page: Optional[str] = None # 登录页面creator 或 home为None时使用配置文件默认值
session_id: Optional[str] = None # 可选复用send-code接口的session_id
class PublishNoteRequest(BaseModel):
title: str
content: str
images: Optional[list] = None
topics: Optional[list] = None
class PublishWithCookiesRequest(BaseModel):
cookies: Optional[list] = None # 兼容旧版仅传Cookies
login_state: Optional[dict] = None # 新版传完整的login_state
storage_state_path: Optional[str] = None # 新增storage_state文件路径最优先
phone: Optional[str] = None # 新增手机号用于查找storage_state文件
title: str
content: str
images: Optional[list] = None
topics: Optional[list] = None
class InjectCookiesRequest(BaseModel):
cookies: Optional[list] = None # 兼容旧版仅传Cookies
login_state: Optional[dict] = None # 新版传完整的login_state
target_page: Optional[str] = "creator" # 目标页面creator 或 home
# 响应模型
class BaseResponse(BaseModel):
code: int
message: str
data: Optional[Dict[str, Any]] = None
@app.on_event("startup")
async def startup_event():
"""启动时启动后台清理任务和定时发布任务(已禁用预热)"""
# 初始化配置从ENV环境变量读取默认dev
config = init_config()
print("[服务启动] FastAPI服务启动浏览器池已就绪")
# 清理旧的错误截图保留最近7天
try:
cleanup_old_screenshots(days=7)
except Exception as e:
print(f"[启动] 清理旧截图失败: {str(e)}")
# 从配置文件读取headless参数
headless = config.get_bool('scheduler.headless', True) # 定时发布的headless配置
login_headless = config.get_bool('login.headless', False) # 登录/绑定的headless配置默认为有头模式
login_page = config.get_str('login.page', 'creator') # 登录页面类型,默认为创作者中心
# 根据配置自动调整预热URL
if login_page == "home":
preheat_url = "https://www.xiaohongshu.com"
else:
preheat_url = "https://creator.xiaohongshu.com/login"
# 初始化全局浏览器池使用配置的headless参数
global browser_pool, login_service, sms_service
browser_pool = get_browser_pool(idle_timeout=1800, headless=headless)
print(f"[服务启动] 浏览器池模式: {'headless(无头模式)' if headless else 'headed(有头模式)'}")
# 初始化登录服务使用独立的login.headless配置
login_service = XHSLoginService(use_pool=True, headless=login_headless)
print(f"[服务启动] 登录服务模式: {'headless(无头模式)' if login_headless else 'headed(有头模式)'}")
# 初始化阿里云短信服务
sms_dict = config.get_dict('ali_sms')
sms_service = AliSmsService(
access_key_id=sms_dict.get('access_key_id', ''),
access_key_secret=sms_dict.get('access_key_secret', ''),
sign_name=sms_dict.get('sign_name', ''),
template_code=sms_dict.get('template_code', '')
)
print("[服务启动] 阿里云短信服务已初始化")
# 启动浏览器池清理任务
asyncio.create_task(browser_cleanup_task())
# 已禁用预热功能,避免干扰正常业务流程
# asyncio.create_task(browser_preheat_task())
print("[服务启动] 浏览器预热功能已禁用")
# 启动定时发布任务
global scheduler
# 从配置文件读取数据库配置
db_dict = config.get_dict('database')
db_config = {
'host': db_dict.get('host', 'localhost'),
'port': db_dict.get('port', 3306),
'user': db_dict.get('username', 'root'),
'password': db_dict.get('password', ''),
'database': db_dict.get('dbname', 'ai_wht')
}
# 从配置文件读取调度器配置
scheduler_enabled = config.get_bool('scheduler.enabled', False)
proxy_pool_enabled = config.get_bool('proxy_pool.enabled', False)
proxy_pool_api_url = config.get_str('proxy_pool.api_url', '')
enable_random_ua = config.get_bool('scheduler.enable_random_ua', True)
min_publish_interval = config.get_int('scheduler.min_publish_interval', 30)
max_publish_interval = config.get_int('scheduler.max_publish_interval', 120)
# headless已经在上面读取了
if scheduler_enabled:
scheduler = XHSScheduler(
db_config=db_config,
max_concurrent=config.get_int('scheduler.max_concurrent', 2),
publish_timeout=config.get_int('scheduler.publish_timeout', 300),
max_articles_per_user_per_run=config.get_int('scheduler.max_articles_per_user_per_run', 2),
max_failures_per_user_per_run=config.get_int('scheduler.max_failures_per_user_per_run', 3),
max_daily_articles_per_user=config.get_int('scheduler.max_daily_articles_per_user', 6),
max_hourly_articles_per_user=config.get_int('scheduler.max_hourly_articles_per_user', 2),
proxy_pool_enabled=proxy_pool_enabled,
proxy_pool_api_url=proxy_pool_api_url,
enable_random_ua=enable_random_ua,
min_publish_interval=min_publish_interval,
max_publish_interval=max_publish_interval,
headless=headless, # 新增: 传递headless参数
)
cron_expr = config.get_str('scheduler.cron', '*/5 * * * * *')
scheduler.start(cron_expr)
print(f"[服务启动] 定时发布任务已启动Cron: {cron_expr}")
else:
print("[服务启动] 定时发布任务未启用")
async def browser_cleanup_task():
"""后台任务:定期清理空闲浏览器"""
while True:
await asyncio.sleep(300) # 每5分钟检查一次
try:
await browser_pool.cleanup_if_idle()
except Exception as e:
print(f"[清理任务] 浏览器清理异常: {str(e)}")
async def browser_preheat_task():
"""后台任务:预热浏览器"""
try:
# 延迟3秒启动避免影响服务启动速度
await asyncio.sleep(3)
print("[预热任务] 开始预热浏览器...")
await browser_pool.preheat("https://creator.xiaohongshu.com/login")
except Exception as e:
print(f"[预热任务] 预热失败: {str(e)}")
async def repreheat_browser_after_use():
"""后台任务:使用后补充预热浏览器(仅用于登录流程)"""
try:
# 延迟5秒确保
# 1. 响应已经返回给用户
# 2. Cookie已经完全获取并保存
# 3. 登录流程完全结束
await asyncio.sleep(5)
print("[补充预热任务] 开始补充预热浏览器...")
await browser_pool.repreheat("https://creator.xiaohongshu.com/login")
except Exception as e:
print(f"[补充预热任务] 补充预热失败: {str(e)}")
@app.on_event("shutdown")
async def shutdown_event():
"""关闭时清理浏览器池和停止调度器"""
print("[服务关闭] 正在关闭服务...")
# 停止调度器
global scheduler
if scheduler:
scheduler.stop()
print("[服务关闭] 调度器已停止")
# 关闭浏览器池
await browser_pool.close()
print("[服务关闭] 浏览器池已关闭")
@app.post("/api/xhs/send-code", response_model=BaseResponse)
async def send_code(request: SendCodeRequest):
"""
发送验证码
通过playwright访问小红书官网输入手机号并触发验证码发送
支持选择从创作者中心或小红书首页登录
并发支持:为每个请求分配独立的浏览器实例
"""
# 使用前端传递的session_id如果没有则生成新的
if request.session_id:
session_id = request.session_id
print(f"[发送验证码] 使用前端传递的session_id={session_id}, phone={request.phone}", file=sys.stderr)
else:
import uuid
session_id = f"xhs_login_{uuid.uuid4().hex}"
print(f"[发送验证码] 前端未传session_id生成新的session_id={session_id}, phone={request.phone}", file=sys.stderr)
# 获取配置中的默认login_page如果API传入了则优先使用API参数
config = get_config()
default_login_page = config.get_str('login.page', 'creator')
login_page = request.login_page if request.login_page else default_login_page
print(f"[发送验证码] 使用登录页面: {login_page} (配置默认={default_login_page}, API参数={request.login_page})", file=sys.stderr)
try:
# 为此请求创建独立的登录服务实例使用session_id实现并发隔离
request_login_service = XHSLoginService(
use_pool=True,
headless=login_service.headless, # 使用配置文件中的login.headless配置
session_id=session_id # 关键传递session_id
)
# 调用登录服务发送验证码
result = await request_login_service.send_verification_code(
phone=request.phone,
country_code=request.country_code,
login_page=login_page, # 传递登录页面参数
session_id=session_id # 传递session_id用于WebSocket通知
)
# 检查是否需要验证(发送验证码时触发风控)
if result.get("need_captcha"):
print(f"[发送验证码] 检测到需要扫码验证保持session {session_id} 的浏览器继续运行", file=sys.stderr)
return BaseResponse(
code=0, # 成功返回二维码
message=result.get("message", "需要扫码验证"),
data={
"need_captcha": True,
"captcha_type": result.get("captcha_type"),
"qrcode_image": result.get("qrcode_image"),
"session_id": session_id
}
)
if result["success"]:
# 验证浏览器是否已保存到池中
if browser_pool and session_id in browser_pool.temp_browsers:
print(f"[发送验证码] ✅ 浏览器实例已保存到池中: {session_id}", file=sys.stderr)
print(f"[发送验证码] 当前池中共有 {len(browser_pool.temp_browsers)} 个临时浏览器", file=sys.stderr)
else:
print(f"[发送验证码] ⚠️ 浏览器实例未保存到池中: {session_id}", file=sys.stderr)
print(f"[发送验证码] 池中的session列表: {list(browser_pool.temp_browsers.keys()) if browser_pool else 'None'}", file=sys.stderr)
return BaseResponse(
code=0,
message="验证码已发送请在小红书APP中查看",
data={
"sent_at": datetime.now().isoformat(),
"session_id": session_id # 返回session_id供前端使用
}
)
else:
# 发送失败,释放临时浏览器
if session_id and browser_pool:
try:
await browser_pool.release_temp_browser(session_id)
print(f"[发送验证码] 已释放临时浏览器: {session_id}", file=sys.stderr)
except Exception as e:
print(f"[发送验证码] 释放临时浏览器失败: {str(e)}", file=sys.stderr)
return BaseResponse(
code=1,
message=result.get("error", "发送验证码失败"),
data=None
)
except Exception as e:
print(f"发送验证码异常: {str(e)}", file=sys.stderr)
# 异常情况,释放临时浏览器
if session_id and browser_pool:
try:
await browser_pool.release_temp_browser(session_id)
print(f"[发送验证码] 已释放临时浏览器: {session_id}", file=sys.stderr)
except Exception as release_error:
print(f"[发送验证码] 释放临时浏览器失败: {str(release_error)}", file=sys.stderr)
return BaseResponse(
code=1,
message=f"发送验证码失败: {str(e)}",
data=None
)
@app.post("/api/xhs/phone/send-code", response_model=BaseResponse)
async def send_phone_code(request: SendCodeRequest):
"""
发送手机短信验证码(使用阿里云短信服务)
用于小红书手机号验证码登录
"""
try:
# 调用阿里云短信服务发送验证码
result = await sms_service.send_verification_code(request.phone)
if result["success"]:
return BaseResponse(
code=0,
message=result.get("message", "验证码已发送"),
data={
"sent_at": datetime.now().isoformat(),
# 开发环境返回验证码,生产环境应移除
"code": result.get("code") if get_config().get_bool('server.debug', False) else None
}
)
else:
return BaseResponse(
code=1,
message=result.get("error", "发送验证码失败"),
data=None
)
except Exception as e:
print(f"发送短信验证码异常: {str(e)}")
return BaseResponse(
code=1,
message=f"发送验证码失败: {str(e)}",
data=None
)
@app.post("/api/xhs/phone/verify-code", response_model=BaseResponse)
async def verify_phone_code(request: VerifyCodeRequest):
"""
验证手机短信验证码
用于小红书手机号验证码登录
"""
try:
# 调用阿里云短信服务验证验证码
result = sms_service.verify_code(request.phone, request.code)
if result["success"]:
return BaseResponse(
code=0,
message="验证码验证成功",
data={"verified_at": datetime.now().isoformat()}
)
else:
return BaseResponse(
code=1,
message=result.get("error", "验证码验证失败"),
data=None
)
except Exception as e:
print(f"验证验证码异常: {str(e)}")
return BaseResponse(
code=1,
message=f"验证失败: {str(e)}",
data=None
)
@app.post("/api/xhs/qrcode/start", response_model=BaseResponse)
async def start_qrcode_login():
"""
启动小红书扫码登录,返回二维码图片和状态
每个用户必须使用独立的浏览器实例不能共享Context
"""
try:
print("[扫码登录] 启动扫码登录流程", file=sys.stderr)
# 使用随机UUID创建临时的登录服务实例完全不复用
import uuid
session_id = f"qrcode_login_{uuid.uuid4().hex}"
print(f"[扫码登录] 创建全新浏览器实例 session_id={session_id}", file=sys.stderr)
qrcode_service = XHSLoginService(
use_pool=True,
headless=login_service.headless,
session_id=session_id,
use_page_isolation=False # 小红书不支持页面隔离,必须独立浏览器
)
# 初始化浏览器
await qrcode_service.init_browser()
# 启动扫码登录
result = await qrcode_service.start_qrcode_login()
if result["success"]:
return BaseResponse(
code=0,
message="二维码获取成功",
data={
"session_id": session_id,
"qrcode_image": result["qrcode_image"],
"status_text": result.get("status_text", ""),
"status_desc": result.get("status_desc", ""),
"is_expired": result.get("is_expired", False),
# 添加二维码创建信息
"qr_url": result.get("qr_url", ""),
"qr_id": result.get("qr_id", ""),
"qr_code": result.get("qr_code", ""),
"multi_flag": result.get("multi_flag", 0)
}
)
else:
# 失败后释放临时浏览器
if browser_pool and session_id:
try:
await browser_pool.release_temp_browser(session_id)
print(f"[扫码登录] 已释放失败的session: {session_id}", file=sys.stderr)
except Exception as release_error:
print(f"[扫码登录] 释放浏览器失败: {str(release_error)}", file=sys.stderr)
return BaseResponse(
code=1,
message=result.get("error", "获取二维码失败"),
data=None
)
except Exception as e:
print(f"[扫码登录] 异常: {str(e)}", file=sys.stderr)
# 异常后释放临时浏览器
if browser_pool and 'session_id' in locals():
try:
await browser_pool.release_temp_browser(session_id)
print(f"[扫码登录] 已释放异常的session: {session_id}", file=sys.stderr)
except Exception as release_error:
print(f"[扫码登录] 释放浏览器失败: {str(release_error)}", file=sys.stderr)
return BaseResponse(
code=1,
message=f"启动扫码登录失败: {str(e)}",
data=None
)
@app.post("/api/xhs/qrcode/status")
async def get_qrcode_status(request: dict):
"""
轮询获取扫码状态和最新的二维码图片
"""
try:
session_id = request.get('session_id')
if not session_id:
return BaseResponse(
code=1,
message="session_id不能为空",
data=None
)
# 检查session是否存在于浏览器池中
if browser_pool and session_id not in browser_pool.temp_browsers:
print(f"[扫码状态] session_id={session_id} 已失效,要求重新创建二维码", file=sys.stderr)
return BaseResponse(
code=2, # 特殊错误码表示session失效
message="会话已失效,请刷新二维码重新开始",
data={
"session_expired": True
}
)
# 使用session_id获取浏览器实例
qrcode_service = XHSLoginService(
use_pool=True,
headless=login_service.headless,
session_id=session_id
)
# 初始化浏览器(会复用已有的)
await qrcode_service.init_browser()
# 提取当前二维码状态
result = await qrcode_service.extract_qrcode_with_status()
if result["success"]:
# 如果登录成功,返回登录信息
if result.get("login_success"):
return BaseResponse(
code=0,
message="扫码登录成功",
data={
"login_success": True,
"user_info": result.get("user_info"),
"cookies": result.get("cookies"),
"cookies_full": result.get("cookies_full"),
"login_state": result.get("login_state")
}
)
else:
# 还未登录,返回二维码状态
return BaseResponse(
code=0,
message="获取状态成功",
data={
"login_success": False,
"qrcode_image": result["qrcode_image"],
"status_text": result.get("status_text", ""),
"status_desc": result.get("status_desc", ""),
"is_expired": result.get("is_expired", False)
}
)
else:
return BaseResponse(
code=1,
message=result.get("error", "获取状态失败"),
data=None
)
except Exception as e:
print(f"[扫码状态] 异常: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
return BaseResponse(
code=1,
message=f"获取状态失败: {str(e)}",
data=None
)
@app.post("/api/xhs/qrcode/refresh")
async def refresh_qrcode(request: dict):
"""
刷新过期的二维码
"""
try:
session_id = request.get('session_id')
if not session_id:
return BaseResponse(
code=1,
message="session_id不能为空",
data=None
)
# 使用session_id获取浏览器实例
qrcode_service = XHSLoginService(
use_pool=True,
headless=login_service.headless,
session_id=session_id
)
# 初始化浏览器
await qrcode_service.init_browser()
# 刷新二维码
result = await qrcode_service.refresh_qrcode()
if result["success"]:
return BaseResponse(
code=0,
message="二维码刷新成功",
data={
"qrcode_image": result["qrcode_image"],
"status_text": result.get("status_text", ""),
"status_desc": result.get("status_desc", ""),
"is_expired": result.get("is_expired", False),
# 添加二维码创建信息
"qr_url": result.get("qr_url", ""),
"qr_id": result.get("qr_id", ""),
"qr_code": result.get("qr_code", ""),
"multi_flag": result.get("multi_flag", 0)
}
)
else:
# 检查是否需要重启
if result.get("need_restart"):
return BaseResponse(
code=3, # 特殊错误码,表示需要重启
message="页面已失效,请重新启动扫码登录",
data={
"need_restart": True
}
)
return BaseResponse(
code=1,
message=result.get("error", "刷新失败"),
data=None
)
except Exception as e:
print(f"[刷新二维码] 异常: {str(e)}", file=sys.stderr)
return BaseResponse(
code=1,
message=f"刷新二维码失败: {str(e)}",
data=None
)
@app.post("/api/xhs/save-bind-info")
async def save_bind_info(request: dict):
"""
保存扫码登录的绑定信息到Go后端
与验证码登录不同扫码登录直接返回了完整数据需要由Python转发给Go后端保存
"""
try:
employee_id = request.get('employee_id')
cookies_full = request.get('cookies_full', [])
user_info = request.get('user_info', {})
login_state = request.get('login_state', {})
if not employee_id:
return BaseResponse(
code=1,
message="employee_id不能为空",
data=None
)
# 调用Go后端API保存
config = get_config()
go_backend_url = config.get_str('go_backend.url', 'http://localhost:8080')
# 构造请求数据模仏bind-xhs接口的返回格式
# Go后端期望接收的是验证码登录的结果
save_data = {
"employee_id": employee_id,
"cookies_full": cookies_full,
"user_info": user_info,
"login_state": login_state
}
import aiohttp
async with aiohttp.ClientSession() as session:
# 获取小程序传来的token
auth_header = request.get('Authorization', '')
async with session.post(
f"{go_backend_url}/api/xhs/save-qrcode-login",
json=save_data,
headers={'Authorization': auth_header} if auth_header else {}
) as resp:
result = await resp.json()
if resp.status == 200 and result.get('code') == 200:
return BaseResponse(
code=0,
message="保存成功",
data=result.get('data')
)
else:
return BaseResponse(
code=1,
message=result.get('message', '保存失败'),
data=None
)
except Exception as e:
print(f"[保存绑定信息] 异常: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
return BaseResponse(
code=1,
message=f"保存失败: {str(e)}",
data=None
)
@app.post("/api/xhs/qrcode/cancel")
async def cancel_qrcode_login(request: dict):
"""
取消扫码登录,释放浏览器资源
用于用户切换登录方式或关闭页面时
"""
try:
session_id = request.get('session_id')
if not session_id:
return BaseResponse(
code=1,
message="session_id不能为空",
data=None
)
# 释放临时浏览器
if browser_pool:
try:
await browser_pool.release_temp_browser(session_id)
print(f"[取消扫码] 已释放 session: {session_id}", file=sys.stderr)
return BaseResponse(
code=0,
message="已取消扫码登录",
data=None
)
except Exception as e:
print(f"[取消扫码] 释放浏览器失败: {str(e)}", file=sys.stderr)
# 即使失败也返回成功,不影响用户体验
return BaseResponse(
code=0,
message="已取消扫码登录",
data=None
)
else:
return BaseResponse(
code=0,
message="浏览器池未初始化",
data=None
)
except Exception as e:
print(f"[取消扫码] 异常: {str(e)}", file=sys.stderr)
return BaseResponse(
code=0, # 即使异常也返回成功
message="已取消扫码登录",
data=None
)
@app.post("/api/xhs/login", response_model=BaseResponse)
async def login(request: LoginRequest):
"""
登录验证
用户填写验证码后,完成登录并获取小红书返回的数据
支持选择从创作者中心或小红书首页登录
并发支持可复用send-code接口的session_id
"""
# 必须使用前端传递的session_id来复用浏览器
# 如果前端没有传session_id说明前端实现有问题
if not request.session_id:
return BaseResponse(
code=1,
message="缺少session_id参数无法复用浏览器实例请重新发送验证码",
data=None
)
session_id = request.session_id
print(f"[登录验证] session_id={session_id}, phone={request.phone}", file=sys.stderr)
# 获取配置中的默认login_page如果API传入了则优先使用API参数
config = get_config()
default_login_page = config.get_str('login.page', 'creator')
login_page = request.login_page if request.login_page else default_login_page
print(f"[登录验证] 使用登录页面: {login_page} (配置默认={default_login_page}, API参数={request.login_page})", file=sys.stderr)
try:
# 如果有session_id复用send-code的浏览器否则创建新的
if session_id:
print(f"[登录验证] 尝试复用send-code的浏览器: {session_id}", file=sys.stderr)
# 先检查浏览器池中是否存在该session
if browser_pool and session_id in browser_pool.temp_browsers:
print(f"[登录验证] ✅ 在浏览器池中找到session: {session_id}", file=sys.stderr)
else:
print(f"[登录验证] ⚠️ 浏览器池中未找到session: {session_id}", file=sys.stderr)
print(f"[登录验证] 当前池中的session列表: {list(browser_pool.temp_browsers.keys()) if browser_pool else 'None'}", file=sys.stderr)
request_login_service = XHSLoginService(
use_pool=True,
headless=login_service.headless, # 使用配置文件中的login.headless配置
session_id=session_id
)
# 初始化浏览器,以便从浏览器池获取临时浏览器
await request_login_service.init_browser()
# 再次验证浏览器是否正常初始化
if request_login_service.page:
print(f"[登录验证] ✅ 浏览器初始化成功当前URL: {request_login_service.page.url}", file=sys.stderr)
else:
print(f"[登录验证] ❌ 浏览器初始化失败page为None", file=sys.stderr)
else:
# 旧逻辑不传session_id使用全局登录服务
print(f"[登录验证] 使用全局登录服务(旧逻辑)", file=sys.stderr)
request_login_service = login_service
# 调用登录服务进行登录
result = await request_login_service.login(
phone=request.phone,
code=request.code,
country_code=request.country_code,
login_page=login_page # 传递登录页面参数
)
# 检查是否需要扫码验证
if result.get("need_captcha"):
# 需要扫码验证不释放浏览器保持session_id对应的浏览器继续运行
print(f"[登录验证] 检测到需要扫码验证保持session {session_id} 的浏览器继续运行", file=sys.stderr)
return BaseResponse(
code=0, # 成功返回二维码
message=result.get("message", "需要扫码验证"),
data={
"need_captcha": True,
"captcha_type": result.get("captcha_type"),
"qrcode_image": result.get("qrcode_image"),
"session_id": session_id
}
)
# 释放临时浏览器(仅在登录成功或失败时释放)
if session_id and browser_pool:
try:
await browser_pool.release_temp_browser(session_id)
print(f"[登录验证] 已释放临时浏览器: {session_id}", file=sys.stderr)
except Exception as e:
print(f"[登录验证] 释放临时浏览器失败: {str(e)}", file=sys.stderr)
if result["success"]:
# 登录成功,不再触发预热(已禁用预热功能)
# asyncio.create_task(repreheat_browser_after_use())
return BaseResponse(
code=0,
message="登录成功",
data={
"user_info": result.get("user_info"),
"cookies": result.get("cookies"), # 键值对格式(前端展示)
"cookies_full": result.get("cookies_full"), # Playwright完整格式数据库存储/脚本使用)
"login_state": result.get("login_state"), # 完整登录状态包含cookies + localStorage + sessionStorage
"localStorage": result.get("localStorage"), # localStorage数据
"sessionStorage": result.get("sessionStorage"), # sessionStorage数据
"url": result.get("url"), # 当前URL
"storage_state_path": result.get("storage_state_path"), # storage_state文件路径
"login_time": datetime.now().isoformat()
}
)
else:
# 登录失败
return BaseResponse(
code=1,
message=result.get("error", "登录失败"),
data=None
)
except Exception as e:
print(f"登录异常: {str(e)}", file=sys.stderr)
# 异常情况,释放临时浏览器
if session_id and browser_pool:
try:
await browser_pool.release_temp_browser(session_id)
print(f"[登录验证] 已释放临时浏览器: {session_id}", file=sys.stderr)
except Exception as release_error:
print(f"[登录验证] 释放临时浏览器失败: {str(release_error)}", file=sys.stderr)
return BaseResponse(
code=1,
message=f"登录失败: {str(e)}",
data=None
)
@app.get("/")
async def root():
"""健康检查"""
if browser_pool:
stats = browser_pool.get_stats()
return {
"status": "ok",
"message": "小红书登录服务运行中(浏览器池模式)",
"browser_pool": stats
}
return {"status": "ok", "message": "服务初始化中..."}
@app.get("/api/health")
async def health_check():
"""健康检查接口(详细)"""
if browser_pool:
stats = browser_pool.get_stats()
return {
"status": "healthy",
"service": "xhs-login-service",
"mode": "browser-pool",
"browser_pool_stats": stats,
"timestamp": datetime.now().isoformat()
}
return {
"status": "initializing",
"service": "xhs-login-service",
"timestamp": datetime.now().isoformat()
}
@app.post("/api/xhs/inject-cookies", response_model=BaseResponse)
async def inject_cookies(request: InjectCookiesRequest):
"""
注入Cookies或完整登录状态并验证
支持两种模式:
1. 仅注入Cookies兼容旧版
2. 注入完整login_state包含Cookies + localStorage + sessionStorage
支持选择跳转到创作者中心或小红书首页
重要:为了避免检测,不使用浏览器池,每次创建全新的浏览器实例
"""
try:
# 关闭旧的浏览器(如果有)
if login_service.browser:
await login_service.close_browser()
# 创建一个独立的登录服务实例,不使用浏览器池
print("✅ 为注入Cookie创建全新的浏览器实例不使用浏览器池", file=sys.stderr)
inject_service = XHSLoginService(use_pool=False, headless=False) # 不使用浏览器池,使用有头模式方便调试
# 优先使用login_state其次使用cookies
if request.login_state:
# 新版使用完整的login_state
print("✅ 检测到login_state将恢复完整登录状态", file=sys.stderr)
# 保存login_state到文件供 init_browser 加载
with open('login_state.json', 'w', encoding='utf-8') as f:
json.dump(request.login_state, f, ensure_ascii=False, indent=2)
# 使用restore_state=True恢复完整状态
await inject_service.init_browser(restore_state=True)
elif request.cookies:
# 兼容旧版仅使用Cookies
print("⚠️ 检测到仅有Cookies建议使用login_state获取更好的兼容性", file=sys.stderr)
await inject_service.init_browser(cookies=request.cookies)
else:
return BaseResponse(
code=1,
message="请提供 cookies 或 login_state",
data=None
)
# 根据target_page参数确定验证URL
target_page = request.target_page or "creator"
if target_page == "home":
verify_url = "https://www.xiaohongshu.com"
page_name = "小红书首页"
else:
verify_url = "https://creator.xiaohongshu.com"
page_name = "创作者中心"
# 访问目标页面并验证登录状态
result = await inject_service.verify_login_status(url=verify_url)
# 关闭独立的浏览器实例(注:因为不是池模式,会真正关闭)
# await inject_service.close_browser() # 先不关闭,让用户看到结果
if result.get("logged_in"):
return BaseResponse(
code=0,
message=f"{'login_state' if request.login_state else 'Cookie'}注入成功,已跳转到{page_name}",
data={
"logged_in": True,
"target_page": page_name,
"user_info": result.get("user_info"),
"cookies": result.get("cookies"), # 键值对格式
"cookies_full": result.get("cookies_full"), # Playwright完整格式
"url": result.get("url")
}
)
else:
# 失败时关闭浏览器
await inject_service.close_browser()
return BaseResponse(
code=1,
message=result.get("message", "{'login_state' if request.login_state else 'Cookie'}已失效,请重新登录"),
data={
"logged_in": False
}
)
except Exception as e:
print(f"注入失败: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
return BaseResponse(
code=1,
message=f"注入失败: {str(e)}",
data=None
)
@app.post("/api/xhs/publish-with-cookies", response_model=BaseResponse)
async def publish_note_with_cookies(request: PublishWithCookiesRequest):
"""
使用Cookies或完整login_state或storage_state发布笔记供Go后端定时任务调用
支持三种模式(按优先级):
1. 使用storage_state_path推荐最完整的登录状态
2. 传入完整login_state次选包含cookies + localStorage + sessionStorage
3. 仅传入Cookies兼容旧版
重要:为了避免检测,不使用浏览器池,每次创建全新的浏览器实例
"""
try:
# 获取代理(如果启用)
proxy = await fetch_proxy_from_pool()
if proxy:
print(f"[发布接口] 使用代理: {proxy}", file=sys.stderr)
# 创建一个独立的登录服务实例,不使用浏览器池,应用所有反检测措施
print("✅ 为发布任务创建全新的浏览器实例,不使用浏览器池", file=sys.stderr)
# 从配置读取headless参数
config = get_config()
headless = config.get_bool('scheduler.headless', True)
publish_service = XHSLoginService(use_pool=False, headless=headless) # 不使用浏览器池
# 优先级判断storage_state_path > login_state > cookies
if request.storage_state_path or request.phone:
# 模式1使用storage_state最优先
storage_state_file = None
if request.storage_state_path:
# 直接指定了storage_state路径
storage_state_file = request.storage_state_path
elif request.phone:
# 根据手机号查找
storage_state_dir = 'storage_states'
storage_state_file = os.path.join(storage_state_dir, f"xhs_{request.phone}.json")
if storage_state_file and os.path.exists(storage_state_file):
print(f"✅ 检测到storage_state文件: {storage_state_file}将使用Playwright原生恢复", file=sys.stderr)
# 使用Playwright原生API恢复登录状态
await publish_service.init_browser_with_storage_state(
storage_state_path=storage_state_file,
proxy=proxy
)
else:
print(f"⚠️ storage_state文件不存在: {storage_state_file}回退到login_state或cookies模式", file=sys.stderr)
# 回退到旧模式
if request.login_state:
await _init_with_login_state(publish_service, request.login_state, proxy)
elif request.cookies:
await publish_service.init_browser(cookies=request.cookies, proxy=proxy)
else:
return BaseResponse(
code=1,
message="storage_state文件不存在且未提供 login_state 或 cookies",
data=None
)
elif request.login_state:
# 模式2使用login_state
print("✅ 检测到login_state将恢复完整登录状态", file=sys.stderr)
await _init_with_login_state(publish_service, request.login_state, proxy)
elif request.cookies:
# 模式3仅使用Cookies兼容旧版
print("⚠️ 检测到仅有Cookies建议使用storage_state或login_state获取更好的兼容性", file=sys.stderr)
await publish_service.init_browser(cookies=request.cookies, proxy=proxy)
else:
return BaseResponse(
code=1,
message="请提供 storage_state_path、phone、login_state 或 cookies",
data=None
)
# 调用发布方法使用已经初始化好的publish_service
result = await publish_service.publish_note(
title=request.title,
content=request.content,
images=request.images,
topics=request.topics,
cookies=None, # 已经注入,不需要再传
proxy=None, # 已经设置,不需要再传
)
# 关闭独立的浏览器实例
await publish_service.close_browser()
if result["success"]:
return BaseResponse(
code=0,
message="笔记发布成功",
data={
"url": result.get("url"),
"publish_time": datetime.now().isoformat()
}
)
else:
return BaseResponse(
code=1,
message=result.get("error", "发布失败"),
data=None
)
except Exception as e:
print(f"发布笔记异常: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
return BaseResponse(
code=1,
message=f"发布失败: {str(e)}",
data=None
)
async def _init_with_login_state(publish_service, login_state, proxy):
"""使用login_state初始化浏览器"""
# 保存login_state到临时文件
import tempfile
import uuid
temp_file = os.path.join(tempfile.gettempdir(), f"login_state_{uuid.uuid4()}.json")
with open(temp_file, 'w', encoding='utf-8') as f:
json.dump(login_state, f, ensure_ascii=False, indent=2)
# 使用restore_state=True恢复完整状态
await publish_service.init_browser(
cookies=login_state.get('cookies'),
proxy=proxy,
user_agent=login_state.get('user_agent')
)
# 恢夏localStorage和sessionStorage
try:
if login_state.get('localStorage') or login_state.get('sessionStorage'):
target_url = login_state.get('url', 'https://creator.xiaohongshu.com')
await publish_service.page.goto(target_url, wait_until='domcontentloaded', timeout=15000)
if login_state.get('localStorage'):
for key, value in login_state['localStorage'].items():
await publish_service.page.evaluate(f'localStorage.setItem("{key}", {json.dumps(value)})')
if login_state.get('sessionStorage'):
for key, value in login_state['sessionStorage'].items():
await publish_service.page.evaluate(f'sessionStorage.setItem("{key}", {json.dumps(value)})')
print("✅ 已恢夏localStorage和sessionStorage", file=sys.stderr)
except Exception as e:
print(f"⚠️ 恢夏storage失败: {str(e)}", file=sys.stderr)
# 清理临时文件
try:
os.remove(temp_file)
except:
pass
@app.post("/api/xhs/upload-images")
async def upload_images(files: List[UploadFile] = File(...)):
"""
上传图片到服务器临时目录
返回图片的服务器路径
"""
try:
uploaded_paths = []
for file in files:
# 检查文件类型
if not file.content_type.startswith('image/'):
return {
"code": 1,
"message": f"文件 {file.filename} 不是图片类型",
"data": None
}
# 生成唯一文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
file_ext = os.path.splitext(file.filename)[1]
safe_filename = f"{timestamp}{file_ext}"
file_path = TEMP_DIR / safe_filename
# 保存文件
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# 使用绝对路径
abs_path = str(file_path.absolute())
uploaded_paths.append(abs_path)
print(f"已上传图片: {abs_path}")
return {
"code": 0,
"message": f"成功上传 {len(uploaded_paths)} 张图片",
"data": {
"paths": uploaded_paths,
"count": len(uploaded_paths)
}
}
except Exception as e:
print(f"上传图片异常: {str(e)}")
return {
"code": 1,
"message": f"上传失败: {str(e)}",
"data": None
}
async def handle_send_code_ws(session_id: str, phone: str, country_code: str, login_page: str, websocket: WebSocket):
"""
异步处理WebSocket发送验证码请求
"""
try:
print(f"[WebSocket-SendCode] 开始处理: session={session_id}, phone={phone}", file=sys.stderr)
# 创建登录服务实例
request_login_service = XHSLoginService(
use_pool=True,
headless=login_service.headless,
session_id=session_id
)
# 调用登录服务发送验证码
result = await request_login_service.send_verification_code(
phone=phone,
country_code=country_code,
login_page=login_page,
session_id=session_id
)
# 检查是否需要验证(发送验证码时触发风控)
if result.get("need_captcha"):
print(f"[WebSocket-SendCode] 检测到风控,需要扫码", file=sys.stderr)
await websocket.send_json({
"type": "need_captcha",
"captcha_type": result.get("captcha_type"),
"qrcode_image": result.get("qrcode_image"),
"message": result.get("message", "需要扫码验证")
})
print(f"[WebSocket-SendCode] 已推送风控信息", file=sys.stderr)
return
if result["success"]:
print(f"[WebSocket-SendCode] 验证码发送成功", file=sys.stderr)
await websocket.send_json({
"type": "code_sent",
"success": True,
"message": "验证码已发送请在小红书APP中查看"
})
else:
print(f"[WebSocket-SendCode] 发送失败: {result.get('error')}", file=sys.stderr)
await websocket.send_json({
"type": "code_sent",
"success": False,
"message": result.get("error", "发送验证码失败")
})
except Exception as e:
print(f"[WebSocket-SendCode] 异常: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
try:
await websocket.send_json({
"type": "code_sent",
"success": False,
"message": f"发送验证码失败: {str(e)}"
})
except:
pass
async def handle_verify_code_ws(session_id: str, phone: str, code: str, country_code: str, login_page: str, websocket: WebSocket):
"""
异步处理WebSocket验证码验证请求
"""
try:
print(f"[WebSocket-VerifyCode] 开始验证: session={session_id}, phone={phone}, code={code}", file=sys.stderr)
# 从浏览器池中获取之前的浏览器实例
if session_id not in browser_pool.temp_browsers:
print(f"[WebSocket-VerifyCode] 未找到session: {session_id}", file=sys.stderr)
await websocket.send_json({
"type": "login_result",
"success": False,
"message": "会话已过期,请重新发送验证码"
})
return
# 获取浏览器实例
browser_data = browser_pool.temp_browsers[session_id]
request_login_service = browser_data['service']
# 调用登录服务验证登录
result = await request_login_service.login_with_code(
phone=phone,
code=code,
country_code=country_code,
login_page=login_page
)
# 检查是否需要验证(登录时触发风控)
if result.get("need_captcha"):
print(f"[WebSocket-VerifyCode] 登录时检测到风控", file=sys.stderr)
await websocket.send_json({
"type": "need_captcha",
"captcha_type": result.get("captcha_type"),
"qrcode_image": result.get("qrcode_image"),
"message": result.get("message", "需要扫码验证")
})
return
if result["success"]:
print(f"[WebSocket-VerifyCode] 登录成功", file=sys.stderr)
# 获取storage_state
storage_state = result.get("storage_state")
# 保存storage_state到文件
storage_state_path = None
if storage_state:
import os
os.makedirs('storage_states', exist_ok=True)
storage_state_path = f"storage_states/{phone}_state.json"
import json
with open(storage_state_path, 'w', encoding='utf-8') as f:
json.dump(storage_state, f, ensure_ascii=False, indent=2)
print(f"[WebSocket-VerifyCode] 已保存storage_state: {storage_state_path}", file=sys.stderr)
# 推送登录成功消息
await websocket.send_json({
"type": "login_success",
"success": True,
"storage_state": storage_state,
"storage_state_path": storage_state_path,
"message": "登录成功"
})
# 释放浏览器
try:
await browser_pool.release_temp_browser(session_id)
print(f"[WebSocket-VerifyCode] 已释放浏览器: {session_id}", file=sys.stderr)
except Exception as e:
print(f"[WebSocket-VerifyCode] 释放浏览器失败: {str(e)}", file=sys.stderr)
else:
print(f"[WebSocket-VerifyCode] 登录失败: {result.get('error')}", file=sys.stderr)
await websocket.send_json({
"type": "login_result",
"success": False,
"message": result.get("error", "登录失败")
})
except Exception as e:
print(f"[WebSocket-VerifyCode] 异常: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
try:
await websocket.send_json({
"type": "login_result",
"success": False,
"message": f"登录失败: {str(e)}"
})
except:
pass
@app.websocket("/ws/login/{session_id}")
async def websocket_login(websocket: WebSocket, session_id: str):
"""
WebSocket端点实时监听登录状态
用于扫码验证后的实时通知
"""
await ws_manager.connect(session_id, websocket)
try:
# 保持连接,等待消息或断开
while True:
# 接收客户端消息ping/pong保持连接
data = await websocket.receive_text()
print(f"[WebSocket] 收到客户端消息 {session_id}: {data}", file=sys.stderr)
# 处理ping消息
if data == "ping":
await websocket.send_text("pong")
else:
# 尝试解析JSON消息
try:
import json
msg = json.loads(data)
msg_type = msg.get('type', 'unknown')
print(f"[WebSocket] 解析消息类型: {msg_type}", file=sys.stderr)
# 处理测试消息
if msg_type == 'test':
print(f"[WebSocket] 收到测试消息: {msg.get('message')}", file=sys.stderr)
# 回复测试消息
await websocket.send_json({
"type": "test_response",
"message": "Test message received by backend successfully!",
"timestamp": data
})
print(f"[WebSocket] 已回复测试消息", file=sys.stderr)
# 处理发送验证码消息
elif msg_type == 'send_code':
phone = msg.get('phone')
country_code = msg.get('country_code', '+86')
login_page = msg.get('login_page', 'creator')
print(f"[WebSocket] 收到发送验证码请求: phone={phone}", file=sys.stderr)
# 启动异步任务处理发送验证码
asyncio.create_task(handle_send_code_ws(session_id, phone, country_code, login_page, websocket))
# 处理验证码验证消息
elif msg_type == 'verify_code':
phone = msg.get('phone')
code = msg.get('code')
country_code = msg.get('country_code', '+86')
login_page = msg.get('login_page', 'creator')
print(f"[WebSocket] 收到验证码验证请求: phone={phone}, code={code}", file=sys.stderr)
# 启动异步任务处理验证码验证
asyncio.create_task(handle_verify_code_ws(session_id, phone, code, country_code, login_page, websocket))
except json.JSONDecodeError:
print(f"[WebSocket] 无法解析为JSON: {data}", file=sys.stderr)
except WebSocketDisconnect:
ws_manager.disconnect(session_id)
print(f"[WebSocket] 客户端断开: {session_id}", file=sys.stderr)
except Exception as e:
ws_manager.disconnect(session_id)
print(f"[WebSocket] 连接异常 {session_id}: {str(e)}", file=sys.stderr)
if __name__ == "__main__":
import uvicorn
# 从配置文件读取服务器配置
config = get_config()
host = config.get_str('server.host', '0.0.0.0')
port = config.get_int('server.port', 8000)
debug = config.get_bool('server.debug', False)
reload = config.get_bool('server.reload', False)
print(f"[启动服务] 主机: {host}, 端口: {port}, 调试: {debug}, 热重载: {reload}")
print(f"[WebSocket] WebSocket服务地址: ws://{host}:{port}/ws/login/{{session_id}}")
print(f"[WebSocket] 示例: ws://{host}:{port}/ws/login/xhs_login_xxxxx")
uvicorn.run(
app,
host=host,
port=port,
reload=reload,
log_level="debug" if debug else "info"
)