This commit is contained in:
sjk
2026-01-23 16:27:47 +08:00
parent 213229953b
commit e8e6d913df
26 changed files with 4294 additions and 431 deletions

506
backend/xhs_login_helper.py Normal file
View File

@@ -0,0 +1,506 @@
"""
小红书登录辅助模块
借鉴 ai_mip 项目的优秀实践,提供增强的验证码登录功能
"""
import asyncio
import random
import time
from typing import Optional, List, Dict, Any
from playwright.async_api import Page, ElementHandle
from loguru import logger
class XHSLoginHelper:
"""小红书登录辅助工具类 - 借鉴ai_mip项目"""
def __init__(self, page: Page):
"""
初始化登录辅助器
Args:
page: Playwright Page 对象
"""
self.page = page
async def human_type(self, selector: str, text: str, clear_first: bool = True) -> bool:
"""
模拟人类打字速度输入文本借鉴ai_mip
Args:
selector: 输入框选择器
text: 要输入的文本
clear_first: 是否先清空输入框
Returns:
是否输入成功
"""
try:
# 查找元素
element = await self._find_element_smart(selector)
if not element:
logger.error(f"[人类输入] 未找到元素: {selector}")
return False
# 滚动到可见
await element.scroll_into_view_if_needed()
await asyncio.sleep(random.uniform(0.1, 0.3))
# 聚焦输入框
await element.focus()
await asyncio.sleep(random.uniform(0.1, 0.2))
# 先清空
if clear_first:
await element.fill('')
await asyncio.sleep(random.uniform(0.1, 0.3))
# 模拟人类打字(逐字符输入,随机延迟)
for char in text:
await self.page.keyboard.type(char)
# 随机延迟 50ms - 150ms
await asyncio.sleep(random.uniform(0.05, 0.15))
# 触发change事件
await element.evaluate('el => el.dispatchEvent(new Event("input", { bubbles: true }))')
await element.evaluate('el => el.dispatchEvent(new Event("change", { bubbles: true }))')
logger.success(f"[人类输入] 已输入 {len(text)} 个字符: {text}")
return True
except Exception as e:
logger.error(f"[人类输入] 输入失败: {e}")
return False
async def human_click(self, selector: str, wait_after: float = 0.5) -> bool:
"""
模拟人类点击行为借鉴ai_mip
Args:
selector: 元素选择器
wait_after: 点击后等待时间
Returns:
是否点击成功
"""
try:
# 查找元素
element = await self._find_element_smart(selector)
if not element:
logger.error(f"[人类点击] 未找到元素: {selector}")
return False
# 滚动到可见
await element.scroll_into_view_if_needed()
await asyncio.sleep(random.uniform(0.1, 0.3))
# 获取元素位置
box = await element.bounding_box()
if box:
# 在元素范围内随机一个点击位置(避免总是点击中心)
x = box['x'] + random.uniform(box['width'] * 0.3, box['width'] * 0.7)
y = box['y'] + random.uniform(box['height'] * 0.3, box['height'] * 0.7)
# 移动鼠标(模拟人类移动轨迹)
await self.page.mouse.move(x, y)
await asyncio.sleep(random.uniform(0.1, 0.3))
# 点击
await self.page.mouse.click(x, y)
logger.success(f"[人类点击] 点击位置: ({x:.0f}, {y:.0f})")
else:
# 直接点击(降级方案)
await element.click()
logger.success(f"[人类点击] 直接点击元素")
await asyncio.sleep(wait_after)
return True
except Exception as e:
logger.error(f"[人类点击] 点击失败: {e}")
return False
async def _find_element_smart(self, selector: str, timeout: int = 5000) -> Optional[ElementHandle]:
"""
智能查找元素(支持多种选择器格式)
Args:
selector: 元素选择器CSS/XPath/text等
timeout: 超时时间(毫秒)
Returns:
找到的元素失败返回None
"""
try:
# 尝试等待元素
element = await self.page.wait_for_selector(selector, timeout=timeout, state='visible')
return element
except Exception:
return None
async def find_input_with_fallback(self, primary_selectors: List[str], fallback_selectors: List[str] = None) -> Optional[ElementHandle]:
"""
查找输入框多选择器降级策略借鉴ai_mip
Args:
primary_selectors: 主要选择器列表
fallback_selectors: 降级选择器列表
Returns:
找到的输入框元素
"""
try:
logger.info("[智能查找] 开始查找输入框...")
# 第一轮:尝试主要选择器
for selector in primary_selectors:
try:
elements = await self.page.query_selector_all(selector)
logger.debug(f"[智能查找] 选择器 '{selector}' 找到 {len(elements)} 个元素")
for elem in elements:
if await elem.is_visible():
logger.success(f"[智能查找] 找到可见输入框: {selector}")
return elem
except Exception as e:
logger.debug(f"[智能查找] 选择器 '{selector}' 失败: {str(e)}")
continue
# 第二轮:尝试降级选择器
if fallback_selectors:
logger.warning("[智能查找] 主要选择器未找到,尝试降级选择器...")
for selector in fallback_selectors:
try:
elements = await self.page.query_selector_all(selector)
logger.debug(f"[智能查找] 降级选择器 '{selector}' 找到 {len(elements)} 个元素")
for elem in elements:
if await elem.is_visible():
logger.warning(f"[智能查找] 使用降级选择器找到: {selector}")
return elem
except Exception as e:
logger.debug(f"[智能查找] 降级选择器 '{selector}' 失败: {str(e)}")
continue
logger.error("[智能查找] 所有选择器均未找到可见输入框")
return None
except Exception as e:
logger.error(f"[智能查找] 查找异常: {str(e)}")
return None
async def find_button_with_fallback(self, primary_selectors: List[str], expected_texts: List[str] = None) -> Optional[ElementHandle]:
"""
查找按钮(多选择器降级策略,支持文本验证)
Args:
primary_selectors: 主要选择器列表
expected_texts: 期望的按钮文本列表(用于验证)
Returns:
找到的按钮元素
"""
try:
logger.info("[智能查找] 开始查找按钮...")
for selector in primary_selectors:
try:
elements = await self.page.query_selector_all(selector)
logger.debug(f"[智能查找] 选择器 '{selector}' 找到 {len(elements)} 个按钮")
for elem in elements:
if not await elem.is_visible():
continue
# 验证按钮文本(如果指定)
if expected_texts:
try:
btn_text = await elem.inner_text()
btn_text = btn_text.strip() if btn_text else ""
if not any(expected in btn_text for expected in expected_texts):
logger.debug(f"[智能查找] 按钮文本不匹配: '{btn_text}', 期望: {expected_texts}")
continue
logger.success(f"[智能查找] 找到匹配按钮: {selector}, 文本: '{btn_text}'")
return elem
except Exception:
# 无法获取文本,跳过验证
logger.success(f"[智能查找] 找到可见按钮: {selector}")
return elem
else:
logger.success(f"[智能查找] 找到可见按钮: {selector}")
return elem
except Exception as e:
logger.debug(f"[智能查找] 选择器 '{selector}' 失败: {str(e)}")
continue
logger.error("[智能查找] 所有选择器均未找到可见按钮")
return None
except Exception as e:
logger.error(f"[智能查找] 查找按钮异常: {str(e)}")
return None
async def wait_for_button_active(self, element: ElementHandle, timeout: int = 5) -> bool:
"""
等待按钮激活状态(小红书特有逻辑)
Args:
element: 按钮元素
timeout: 超时时间(秒)
Returns:
是否激活成功
"""
try:
logger.info("[按钮激活] 等待按钮激活...")
start_time = time.time()
while time.time() - start_time < timeout:
try:
class_name = await element.get_attribute('class') or ""
if 'active' in class_name or 'enabled' in class_name:
logger.success(f"[按钮激活] 按钮已激活: class={class_name}")
return True
except Exception:
pass
await asyncio.sleep(0.2)
logger.warning(f"[按钮激活] 等待超时({timeout}秒)")
return False
except Exception as e:
logger.error(f"[按钮激活] 检查失败: {e}")
return False
async def check_button_countdown(self, element: ElementHandle) -> Optional[str]:
"""
检查按钮是否处于倒计时状态
Args:
element: 按钮元素
Returns:
倒计时文本如果处于倒计时否则返回None
"""
try:
btn_text = await element.inner_text()
btn_text = btn_text.strip() if btn_text else ""
# 检查是否包含倒计时标识
if btn_text and (btn_text[-1] == 's' or '' in btn_text or btn_text.isdigit()):
logger.warning(f"[倒计时检测] 按钮处于倒计时: {btn_text}")
return btn_text
return None
except Exception as e:
logger.error(f"[倒计时检测] 检查失败: {e}")
return None
async def random_delay(self, min_seconds: float = 0.5, max_seconds: float = 1.5):
"""
随机延迟模拟人工操作借鉴ai_mip
Args:
min_seconds: 最小延迟(秒)
max_seconds: 最大延迟(秒)
"""
delay = random.uniform(min_seconds, max_seconds)
await asyncio.sleep(delay)
async def scroll_to_element(self, element: ElementHandle):
"""
平滑滚动到元素位置(模拟人类滚动行为)
Args:
element: 目标元素
"""
try:
# 获取元素位置
box = await element.bounding_box()
if box:
# 计算滚动目标(元素在视口中间位置)
viewport = self.page.viewport_size
target_y = box['y'] - (viewport['height'] / 2) + (box['height'] / 2)
# 分步滚动(模拟人类滚动)
current_scroll = await self.page.evaluate('window.pageYOffset')
distance = target_y - current_scroll
steps = max(3, int(abs(distance) / 100)) # 根据距离计算步数
for i in range(steps):
progress = (i + 1) / steps
scroll_y = current_scroll + distance * progress
await self.page.evaluate(f'window.scrollTo(0, {scroll_y})')
await asyncio.sleep(random.uniform(0.05, 0.1))
logger.success(f"[平滑滚动] 已滚动到元素位置")
else:
# 降级方案:直接滚动到可见
await element.scroll_into_view_if_needed()
logger.success(f"[平滑滚动] 使用降级方案滚动")
except Exception as e:
logger.error(f"[平滑滚动] 滚动失败: {e}")
# 最终降级方案
try:
await element.scroll_into_view_if_needed()
except Exception:
pass
async def debug_print_inputs(self):
"""
调试打印页面上所有输入框信息借鉴ai_mip的调试逻辑
"""
try:
logger.info("=" * 50)
logger.info("[调试] 打印页面所有输入框...")
logger.info("=" * 50)
inputs = await self.page.query_selector_all('input')
logger.info(f"[调试] 页面上找到 {len(inputs)} 个input元素")
for i, inp in enumerate(inputs[:10]): # 只打印前10个
try:
placeholder = await inp.get_attribute('placeholder')
input_type = await inp.get_attribute('type')
name = await inp.get_attribute('name')
class_name = await inp.get_attribute('class')
is_visible = await inp.is_visible()
logger.info(f"[调试] Input {i+1}:")
logger.info(f" - type: {input_type}")
logger.info(f" - placeholder: {placeholder}")
logger.info(f" - name: {name}")
logger.info(f" - class: {class_name}")
logger.info(f" - visible: {is_visible}")
except Exception as e:
logger.debug(f"[调试] 获取Input {i+1}信息失败: {e}")
logger.info("=" * 50)
except Exception as e:
logger.error(f"[调试] 打印输入框信息失败: {e}")
async def debug_print_buttons(self):
"""
调试:打印页面上所有按钮信息
"""
try:
logger.info("=" * 50)
logger.info("[调试] 打印页面所有按钮...")
logger.info("=" * 50)
buttons = await self.page.query_selector_all('button, div[role="button"], span[role="button"]')
logger.info(f"[调试] 页面上找到 {len(buttons)} 个按钮元素")
for i, btn in enumerate(buttons[:10]): # 只打印前10个
try:
text = await btn.inner_text()
class_name = await btn.get_attribute('class')
is_visible = await btn.is_visible()
logger.info(f"[调试] Button {i+1}:")
logger.info(f" - text: {text}")
logger.info(f" - class: {class_name}")
logger.info(f" - visible: {is_visible}")
except Exception as e:
logger.debug(f"[调试] 获取Button {i+1}信息失败: {e}")
logger.info("=" * 50)
except Exception as e:
logger.error(f"[调试] 打印按钮信息失败: {e}")
# 定义常用的选择器配置借鉴ai_mip的结构化选择器管理
class XHSSelectors:
"""小红书登录页面选择器配置"""
# 手机号输入框选择器(创作者中心)
PHONE_INPUT_CREATOR = [
'input[placeholder="手机号"]',
'input.css-nt440g',
'input[placeholder*="手机号"]',
'input[type="tel"]',
]
# 手机号输入框选择器(小红书首页)
PHONE_INPUT_HOME = [
'input[placeholder="输入手机号"]',
'label.phone input',
'input[name="blur"]',
]
# 手机号输入框降级选择器
PHONE_INPUT_FALLBACK = [
'input[type="text"]',
'input',
]
# 验证码输入框选择器(创作者中心)
CODE_INPUT_CREATOR = [
'input[placeholder="验证码"]',
'input.css-1ge5flv',
'input[placeholder*="验证码"]',
'input[type="text"]:not([placeholder*="手机"])',
]
# 验证码输入框选择器(小红书首页)
CODE_INPUT_HOME = [
'input[placeholder="输入验证码"]',
'label.auth-code input',
'input[type="number"]',
'input[placeholder*="验证码"]',
]
# 发送验证码按钮选择器(创作者中心)
SEND_CODE_BTN_CREATOR = [
'div.css-uyobdj',
'text="发送验证码"',
'div:has-text("发送验证码")',
'text="重新发送"',
'text="获取验证码"',
]
# 发送验证码按钮选择器(小红书首页)
SEND_CODE_BTN_HOME = [
'span.code-button',
'.code-button',
'text="获取验证码"',
'span:has-text("获取验证码")',
]
# 登录按钮选择器
LOGIN_BTN = [
'button:has-text("登录")',
'text="登录"',
'div:has-text("登录")',
'.login-button',
'button.login',
]
# 协议复选框选择器(小红书首页)
AGREEMENT_CHECKBOX = [
'.agree-icon',
'.agreements .icon-wrapper',
'span.agree-icon',
'.icon-wrapper',
]
# 导出便捷函数
def get_login_helper(page: Page) -> XHSLoginHelper:
"""
获取登录辅助器实例
Args:
page: Playwright Page 对象
Returns:
XHSLoginHelper实例
"""
return XHSLoginHelper(page)