This commit is contained in:
sjk
2026-01-09 23:27:52 +08:00
parent 8446c004e7
commit 3b66018271
18 changed files with 2006 additions and 508 deletions

View File

@@ -56,7 +56,7 @@ PROXY_POOL = [
"server": "http://210.51.27.194:50001",
"username": "hb6su3",
"password": "acv2ciow",
"enabled": False
"enabled": True
}
]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 352 KiB

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 166 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 245 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 MiB

View File

@@ -13,7 +13,7 @@ from config import init_config, get_config
from dotenv import load_dotenv
load_dotenv() # 从 .env 文件加载环境变量(可选,用于覆盖配置文件)
from fastapi import FastAPI, HTTPException, File, UploadFile, Form
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, Dict, Any, List
@@ -51,6 +51,75 @@ scheduler = None
# 全局阿里云短信服务实例
sms_service = None
# WebSocket连接管理器
class ConnectionManager:
def __init__(self):
# session_id -> WebSocket连接
self.active_connections: Dict[str, WebSocket] = {}
# session_id -> 消息队列(用于缓存连接建立前的消息)
self.pending_messages: Dict[str, list] = {}
async def connect(self, session_id: str, websocket: WebSocket):
await websocket.accept()
self.active_connections[session_id] = websocket
print(f"[WebSocket] 新连接: {session_id}", file=sys.stderr)
print(f"[WebSocket] 当前活跃连接数: {len(self.active_connections)}", file=sys.stderr)
# 立即检查缓存消息(不等待)
if session_id in self.pending_messages:
pending_count = len(self.pending_messages[session_id])
print(f"[WebSocket] 发现缓存消息: {pending_count}", file=sys.stderr)
print(f"[WebSocket] 缓存消息内容: {self.pending_messages[session_id]}", file=sys.stderr)
# 等待100ms让前端监听器就绪
await asyncio.sleep(0.1)
for idx, message in enumerate(self.pending_messages[session_id]):
try:
print(f"[WebSocket] 准备发送第{idx+1}条消息...", file=sys.stderr)
await websocket.send_json(message)
print(f"[WebSocket] 已发送缓存消息 [{idx+1}/{pending_count}]: {message.get('type')}", file=sys.stderr)
# 每条消息间隔100ms
await asyncio.sleep(0.1)
except Exception as e:
print(f"[WebSocket] 发送缓存消息失败: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
del self.pending_messages[session_id]
print(f"[WebSocket] 缓存消息已清空: {session_id}", file=sys.stderr)
else:
print(f"[WebSocket] 没有缓存消息: {session_id}", file=sys.stderr)
def disconnect(self, session_id: str):
if session_id in self.active_connections:
del self.active_connections[session_id]
print(f"[WebSocket] 断开连接: {session_id}", file=sys.stderr)
# 清理缓存消息
if session_id in self.pending_messages:
del self.pending_messages[session_id]
async def send_message(self, session_id: str, message: dict):
if session_id in self.active_connections:
try:
await self.active_connections[session_id].send_json(message)
print(f"[WebSocket] 发送消息到 {session_id}: {message.get('type')}", file=sys.stderr)
except Exception as e:
print(f"[WebSocket] 发送消息失败 {session_id}: {str(e)}", file=sys.stderr)
self.disconnect(session_id)
else:
# WebSocket还未连接缓存消息
print(f"[WebSocket] 连接尚未建立,缓存消息: {session_id}", file=sys.stderr)
if session_id not in self.pending_messages:
self.pending_messages[session_id] = []
self.pending_messages[session_id].append(message)
# 最多缓存10条消息
if len(self.pending_messages[session_id]) > 10:
self.pending_messages[session_id].pop(0)
# 全局WebSocket管理器
ws_manager = ConnectionManager()
async def fetch_proxy_from_pool() -> Optional[str]:
"""从代理池接口获取一个代理地址http://ip:port获取失败返回None"""
@@ -97,6 +166,7 @@ class SendCodeRequest(BaseModel):
phone: str
country_code: str = "+86"
login_page: Optional[str] = None # 登录页面creator 或 home为None时使用配置文件默认值
session_id: Optional[str] = None # 可选前端生成的session_id用于WebSocket通知
class VerifyCodeRequest(BaseModel):
phone: str
@@ -288,10 +358,14 @@ async def send_code(request: SendCodeRequest):
支持选择从创作者中心或小红书首页登录
并发支持:为每个请求分配独立的浏览器实例
"""
# 使用随机UUID作为session_id确保每次都创建全新浏览器,完全不复用
import uuid
session_id = f"xhs_login_{uuid.uuid4().hex}"
print(f"[发送验证码] 创建全新浏览器实例 session_id={session_id}, phone={request.phone}", file=sys.stderr)
# 使用前端传递的session_id如果没有则生成新的
if request.session_id:
session_id = request.session_id
print(f"[发送验证码] 使用前端传递的session_id={session_id}, phone={request.phone}", file=sys.stderr)
else:
import uuid
session_id = f"xhs_login_{uuid.uuid4().hex}"
print(f"[发送验证码] 前端未传session_id生成新的session_id={session_id}, phone={request.phone}", file=sys.stderr)
# 获取配置中的默认login_page如果API传入了则优先使用API参数
config = get_config()
@@ -312,9 +386,24 @@ async def send_code(request: SendCodeRequest):
result = await request_login_service.send_verification_code(
phone=request.phone,
country_code=request.country_code,
login_page=login_page # 传递登录页面参数
login_page=login_page, # 传递登录页面参数
session_id=session_id # 传递session_id用于WebSocket通知
)
# 检查是否需要验证(发送验证码时触发风控)
if result.get("need_captcha"):
print(f"[发送验证码] 检测到需要扫码验证保持session {session_id} 的浏览器继续运行", file=sys.stderr)
return BaseResponse(
code=0, # 成功返回二维码
message=result.get("message", "需要扫码验证"),
data={
"need_captcha": True,
"captcha_type": result.get("captcha_type"),
"qrcode_image": result.get("qrcode_image"),
"session_id": session_id
}
)
if result["success"]:
# 验证浏览器是否已保存到池中
if browser_pool and session_id in browser_pool.temp_browsers:
@@ -835,7 +924,22 @@ async def login(request: LoginRequest):
login_page=login_page # 传递登录页面参数
)
# 释放临时浏览器(无论成功还是失败)
# 检查是否需要扫码验证
if result.get("need_captcha"):
# 需要扫码验证不释放浏览器保持session_id对应的浏览器继续运行
print(f"[登录验证] 检测到需要扫码验证保持session {session_id} 的浏览器继续运行", file=sys.stderr)
return BaseResponse(
code=0, # 成功返回二维码
message=result.get("message", "需要扫码验证"),
data={
"need_captcha": True,
"captcha_type": result.get("captcha_type"),
"qrcode_image": result.get("qrcode_image"),
"session_id": session_id
}
)
# 释放临时浏览器(仅在登录成功或失败时释放)
if session_id and browser_pool:
try:
await browser_pool.release_temp_browser(session_id)
@@ -1217,6 +1321,228 @@ async def upload_images(files: List[UploadFile] = File(...)):
"data": None
}
async def handle_send_code_ws(session_id: str, phone: str, country_code: str, login_page: str, websocket: WebSocket):
"""
异步处理WebSocket发送验证码请求
"""
try:
print(f"[WebSocket-SendCode] 开始处理: session={session_id}, phone={phone}", file=sys.stderr)
# 创建登录服务实例
request_login_service = XHSLoginService(
use_pool=True,
headless=login_service.headless,
session_id=session_id
)
# 调用登录服务发送验证码
result = await request_login_service.send_verification_code(
phone=phone,
country_code=country_code,
login_page=login_page,
session_id=session_id
)
# 检查是否需要验证(发送验证码时触发风控)
if result.get("need_captcha"):
print(f"[WebSocket-SendCode] 检测到风控,需要扫码", file=sys.stderr)
await websocket.send_json({
"type": "need_captcha",
"captcha_type": result.get("captcha_type"),
"qrcode_image": result.get("qrcode_image"),
"message": result.get("message", "需要扫码验证")
})
print(f"[WebSocket-SendCode] 已推送风控信息", file=sys.stderr)
return
if result["success"]:
print(f"[WebSocket-SendCode] 验证码发送成功", file=sys.stderr)
await websocket.send_json({
"type": "code_sent",
"success": True,
"message": "验证码已发送请在小红书APP中查看"
})
else:
print(f"[WebSocket-SendCode] 发送失败: {result.get('error')}", file=sys.stderr)
await websocket.send_json({
"type": "code_sent",
"success": False,
"message": result.get("error", "发送验证码失败")
})
except Exception as e:
print(f"[WebSocket-SendCode] 异常: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
try:
await websocket.send_json({
"type": "code_sent",
"success": False,
"message": f"发送验证码失败: {str(e)}"
})
except:
pass
async def handle_verify_code_ws(session_id: str, phone: str, code: str, country_code: str, login_page: str, websocket: WebSocket):
"""
异步处理WebSocket验证码验证请求
"""
try:
print(f"[WebSocket-VerifyCode] 开始验证: session={session_id}, phone={phone}, code={code}", file=sys.stderr)
# 从浏览器池中获取之前的浏览器实例
if session_id not in browser_pool.temp_browsers:
print(f"[WebSocket-VerifyCode] 未找到session: {session_id}", file=sys.stderr)
await websocket.send_json({
"type": "login_result",
"success": False,
"message": "会话已过期,请重新发送验证码"
})
return
# 获取浏览器实例
browser_data = browser_pool.temp_browsers[session_id]
request_login_service = browser_data['service']
# 调用登录服务验证登录
result = await request_login_service.login_with_code(
phone=phone,
code=code,
country_code=country_code,
login_page=login_page
)
# 检查是否需要验证(登录时触发风控)
if result.get("need_captcha"):
print(f"[WebSocket-VerifyCode] 登录时检测到风控", file=sys.stderr)
await websocket.send_json({
"type": "need_captcha",
"captcha_type": result.get("captcha_type"),
"qrcode_image": result.get("qrcode_image"),
"message": result.get("message", "需要扫码验证")
})
return
if result["success"]:
print(f"[WebSocket-VerifyCode] 登录成功", file=sys.stderr)
# 获取storage_state
storage_state = result.get("storage_state")
# 保存storage_state到文件
storage_state_path = None
if storage_state:
import os
os.makedirs('storage_states', exist_ok=True)
storage_state_path = f"storage_states/{phone}_state.json"
import json
with open(storage_state_path, 'w', encoding='utf-8') as f:
json.dump(storage_state, f, ensure_ascii=False, indent=2)
print(f"[WebSocket-VerifyCode] 已保存storage_state: {storage_state_path}", file=sys.stderr)
# 推送登录成功消息
await websocket.send_json({
"type": "login_success",
"success": True,
"storage_state": storage_state,
"storage_state_path": storage_state_path,
"message": "登录成功"
})
# 释放浏览器
try:
await browser_pool.release_temp_browser(session_id)
print(f"[WebSocket-VerifyCode] 已释放浏览器: {session_id}", file=sys.stderr)
except Exception as e:
print(f"[WebSocket-VerifyCode] 释放浏览器失败: {str(e)}", file=sys.stderr)
else:
print(f"[WebSocket-VerifyCode] 登录失败: {result.get('error')}", file=sys.stderr)
await websocket.send_json({
"type": "login_result",
"success": False,
"message": result.get("error", "登录失败")
})
except Exception as e:
print(f"[WebSocket-VerifyCode] 异常: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
try:
await websocket.send_json({
"type": "login_result",
"success": False,
"message": f"登录失败: {str(e)}"
})
except:
pass
@app.websocket("/ws/login/{session_id}")
async def websocket_login(websocket: WebSocket, session_id: str):
"""
WebSocket端点实时监听登录状态
用于扫码验证后的实时通知
"""
await ws_manager.connect(session_id, websocket)
try:
# 保持连接,等待消息或断开
while True:
# 接收客户端消息ping/pong保持连接
data = await websocket.receive_text()
print(f"[WebSocket] 收到客户端消息 {session_id}: {data}", file=sys.stderr)
# 处理ping消息
if data == "ping":
await websocket.send_text("pong")
else:
# 尝试解析JSON消息
try:
import json
msg = json.loads(data)
msg_type = msg.get('type', 'unknown')
print(f"[WebSocket] 解析消息类型: {msg_type}", file=sys.stderr)
# 处理测试消息
if msg_type == 'test':
print(f"[WebSocket] 收到测试消息: {msg.get('message')}", file=sys.stderr)
# 回复测试消息
await websocket.send_json({
"type": "test_response",
"message": "Test message received by backend successfully!",
"timestamp": data
})
print(f"[WebSocket] 已回复测试消息", file=sys.stderr)
# 处理发送验证码消息
elif msg_type == 'send_code':
phone = msg.get('phone')
country_code = msg.get('country_code', '+86')
login_page = msg.get('login_page', 'creator')
print(f"[WebSocket] 收到发送验证码请求: phone={phone}", file=sys.stderr)
# 启动异步任务处理发送验证码
asyncio.create_task(handle_send_code_ws(session_id, phone, country_code, login_page, websocket))
# 处理验证码验证消息
elif msg_type == 'verify_code':
phone = msg.get('phone')
code = msg.get('code')
country_code = msg.get('country_code', '+86')
login_page = msg.get('login_page', 'creator')
print(f"[WebSocket] 收到验证码验证请求: phone={phone}, code={code}", file=sys.stderr)
# 启动异步任务处理验证码验证
asyncio.create_task(handle_verify_code_ws(session_id, phone, code, country_code, login_page, websocket))
except json.JSONDecodeError:
print(f"[WebSocket] 无法解析为JSON: {data}", file=sys.stderr)
except WebSocketDisconnect:
ws_manager.disconnect(session_id)
print(f"[WebSocket] 客户端断开: {session_id}", file=sys.stderr)
except Exception as e:
ws_manager.disconnect(session_id)
print(f"[WebSocket] 连接异常 {session_id}: {str(e)}", file=sys.stderr)
if __name__ == "__main__":
import uvicorn
@@ -1227,7 +1553,9 @@ if __name__ == "__main__":
debug = config.get_bool('server.debug', False)
reload = config.get_bool('server.reload', False)
print(f"[\u542f\u52a8\u670d\u52a1] \u4e3b\u673a: {host}, \u7aef\u53e3: {port}, \u8c03\u8bd5: {debug}, \u70ed\u91cd\u8f7d: {reload}")
print(f"[启动服务] 主机: {host}, 端口: {port}, 调试: {debug}, 热重载: {reload}")
print(f"[WebSocket] WebSocket服务地址: ws://{host}:{port}/ws/login/{{session_id}}")
print(f"[WebSocket] 示例: ws://{host}:{port}/ws/login/xhs_login_xxxxx")
uvicorn.run(
app,

View File

@@ -0,0 +1,65 @@
"""
批量替换 xhs_login.py 中的 print 为 logger
"""
import re
def replace_print_to_logger(content):
"""将 print 语句替换为对应的 logger 语句"""
# 替换规则:根据内容判断日志级别
def determine_log_level_and_replace(match):
text = match.group(1)
# 错误相关
if any(keyword in text for keyword in ['失败', '错误', '异常', '', 'error', 'Error', 'failed', 'Failed']):
return f'logger.error({text})'
# 警告相关
elif any(keyword in text for keyword in ['警告', '⚠️', 'warning', 'Warning', '未找到', '检测到']):
return f'logger.warning({text})'
# 成功相关
elif any(keyword in text for keyword in ['成功', '', 'success', 'Success', '', '完成']):
return f'logger.success({text})'
# 调试相关
elif any(keyword in text for keyword in ['调试', 'debug', 'Debug', '查找', '正在', '开始']):
return f'logger.debug({text})'
# 默认 info
else:
return f'logger.info({text})'
# 匹配 print(xxx, file=sys.stderr)
pattern1 = r'print\((.*?),\s*file=sys\.stderr\)'
content = re.sub(pattern1, determine_log_level_and_replace, content)
# 匹配普通 print(xxx)
pattern2 = r'print\((.*?)\)(?!\s*#.*logger)'
content = re.sub(pattern2, determine_log_level_and_replace, content)
return content
def main():
# 读取文件
with open('xhs_login.py', 'r', encoding='utf-8') as f:
content = f.read()
# 替换
new_content = replace_print_to_logger(content)
# 备份原文件
with open('xhs_login.py.bak', 'w', encoding='utf-8') as f:
f.write(content)
# 写入新文件
with open('xhs_login.py', 'w', encoding='utf-8') as f:
f.write(new_content)
print("✅ 替换完成!")
print("原文件已备份到 xhs_login.py.bak")
if __name__ == '__main__':
main()

View File

@@ -14,3 +14,4 @@ alibabacloud_credentials==0.3.4
alibabacloud_tea_openapi==0.3.9
alibabacloud_tea_util==0.3.13
loguru==0.7.2
websockets==12.0

File diff suppressed because it is too large Load Diff