Files
ai_wht_wechat/backend/xhs_login_helper.py
2026-01-23 16:27:47 +08:00

507 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
小红书登录辅助模块
借鉴 ai_mip 项目的优秀实践,提供增强的验证码登录功能
"""
import asyncio
import random
import time
from typing import Optional, List, Dict, Any
from playwright.async_api import Page, ElementHandle
from loguru import logger
class XHSLoginHelper:
"""小红书登录辅助工具类 - 借鉴ai_mip项目"""
def __init__(self, page: Page):
"""
初始化登录辅助器
Args:
page: Playwright Page 对象
"""
self.page = page
async def human_type(self, selector: str, text: str, clear_first: bool = True) -> bool:
"""
模拟人类打字速度输入文本借鉴ai_mip
Args:
selector: 输入框选择器
text: 要输入的文本
clear_first: 是否先清空输入框
Returns:
是否输入成功
"""
try:
# 查找元素
element = await self._find_element_smart(selector)
if not element:
logger.error(f"[人类输入] 未找到元素: {selector}")
return False
# 滚动到可见
await element.scroll_into_view_if_needed()
await asyncio.sleep(random.uniform(0.1, 0.3))
# 聚焦输入框
await element.focus()
await asyncio.sleep(random.uniform(0.1, 0.2))
# 先清空
if clear_first:
await element.fill('')
await asyncio.sleep(random.uniform(0.1, 0.3))
# 模拟人类打字(逐字符输入,随机延迟)
for char in text:
await self.page.keyboard.type(char)
# 随机延迟 50ms - 150ms
await asyncio.sleep(random.uniform(0.05, 0.15))
# 触发change事件
await element.evaluate('el => el.dispatchEvent(new Event("input", { bubbles: true }))')
await element.evaluate('el => el.dispatchEvent(new Event("change", { bubbles: true }))')
logger.success(f"[人类输入] 已输入 {len(text)} 个字符: {text}")
return True
except Exception as e:
logger.error(f"[人类输入] 输入失败: {e}")
return False
async def human_click(self, selector: str, wait_after: float = 0.5) -> bool:
"""
模拟人类点击行为借鉴ai_mip
Args:
selector: 元素选择器
wait_after: 点击后等待时间
Returns:
是否点击成功
"""
try:
# 查找元素
element = await self._find_element_smart(selector)
if not element:
logger.error(f"[人类点击] 未找到元素: {selector}")
return False
# 滚动到可见
await element.scroll_into_view_if_needed()
await asyncio.sleep(random.uniform(0.1, 0.3))
# 获取元素位置
box = await element.bounding_box()
if box:
# 在元素范围内随机一个点击位置(避免总是点击中心)
x = box['x'] + random.uniform(box['width'] * 0.3, box['width'] * 0.7)
y = box['y'] + random.uniform(box['height'] * 0.3, box['height'] * 0.7)
# 移动鼠标(模拟人类移动轨迹)
await self.page.mouse.move(x, y)
await asyncio.sleep(random.uniform(0.1, 0.3))
# 点击
await self.page.mouse.click(x, y)
logger.success(f"[人类点击] 点击位置: ({x:.0f}, {y:.0f})")
else:
# 直接点击(降级方案)
await element.click()
logger.success(f"[人类点击] 直接点击元素")
await asyncio.sleep(wait_after)
return True
except Exception as e:
logger.error(f"[人类点击] 点击失败: {e}")
return False
async def _find_element_smart(self, selector: str, timeout: int = 5000) -> Optional[ElementHandle]:
"""
智能查找元素(支持多种选择器格式)
Args:
selector: 元素选择器CSS/XPath/text等
timeout: 超时时间(毫秒)
Returns:
找到的元素失败返回None
"""
try:
# 尝试等待元素
element = await self.page.wait_for_selector(selector, timeout=timeout, state='visible')
return element
except Exception:
return None
async def find_input_with_fallback(self, primary_selectors: List[str], fallback_selectors: List[str] = None) -> Optional[ElementHandle]:
"""
查找输入框多选择器降级策略借鉴ai_mip
Args:
primary_selectors: 主要选择器列表
fallback_selectors: 降级选择器列表
Returns:
找到的输入框元素
"""
try:
logger.info("[智能查找] 开始查找输入框...")
# 第一轮:尝试主要选择器
for selector in primary_selectors:
try:
elements = await self.page.query_selector_all(selector)
logger.debug(f"[智能查找] 选择器 '{selector}' 找到 {len(elements)} 个元素")
for elem in elements:
if await elem.is_visible():
logger.success(f"[智能查找] 找到可见输入框: {selector}")
return elem
except Exception as e:
logger.debug(f"[智能查找] 选择器 '{selector}' 失败: {str(e)}")
continue
# 第二轮:尝试降级选择器
if fallback_selectors:
logger.warning("[智能查找] 主要选择器未找到,尝试降级选择器...")
for selector in fallback_selectors:
try:
elements = await self.page.query_selector_all(selector)
logger.debug(f"[智能查找] 降级选择器 '{selector}' 找到 {len(elements)} 个元素")
for elem in elements:
if await elem.is_visible():
logger.warning(f"[智能查找] 使用降级选择器找到: {selector}")
return elem
except Exception as e:
logger.debug(f"[智能查找] 降级选择器 '{selector}' 失败: {str(e)}")
continue
logger.error("[智能查找] 所有选择器均未找到可见输入框")
return None
except Exception as e:
logger.error(f"[智能查找] 查找异常: {str(e)}")
return None
async def find_button_with_fallback(self, primary_selectors: List[str], expected_texts: List[str] = None) -> Optional[ElementHandle]:
"""
查找按钮(多选择器降级策略,支持文本验证)
Args:
primary_selectors: 主要选择器列表
expected_texts: 期望的按钮文本列表(用于验证)
Returns:
找到的按钮元素
"""
try:
logger.info("[智能查找] 开始查找按钮...")
for selector in primary_selectors:
try:
elements = await self.page.query_selector_all(selector)
logger.debug(f"[智能查找] 选择器 '{selector}' 找到 {len(elements)} 个按钮")
for elem in elements:
if not await elem.is_visible():
continue
# 验证按钮文本(如果指定)
if expected_texts:
try:
btn_text = await elem.inner_text()
btn_text = btn_text.strip() if btn_text else ""
if not any(expected in btn_text for expected in expected_texts):
logger.debug(f"[智能查找] 按钮文本不匹配: '{btn_text}', 期望: {expected_texts}")
continue
logger.success(f"[智能查找] 找到匹配按钮: {selector}, 文本: '{btn_text}'")
return elem
except Exception:
# 无法获取文本,跳过验证
logger.success(f"[智能查找] 找到可见按钮: {selector}")
return elem
else:
logger.success(f"[智能查找] 找到可见按钮: {selector}")
return elem
except Exception as e:
logger.debug(f"[智能查找] 选择器 '{selector}' 失败: {str(e)}")
continue
logger.error("[智能查找] 所有选择器均未找到可见按钮")
return None
except Exception as e:
logger.error(f"[智能查找] 查找按钮异常: {str(e)}")
return None
async def wait_for_button_active(self, element: ElementHandle, timeout: int = 5) -> bool:
"""
等待按钮激活状态(小红书特有逻辑)
Args:
element: 按钮元素
timeout: 超时时间(秒)
Returns:
是否激活成功
"""
try:
logger.info("[按钮激活] 等待按钮激活...")
start_time = time.time()
while time.time() - start_time < timeout:
try:
class_name = await element.get_attribute('class') or ""
if 'active' in class_name or 'enabled' in class_name:
logger.success(f"[按钮激活] 按钮已激活: class={class_name}")
return True
except Exception:
pass
await asyncio.sleep(0.2)
logger.warning(f"[按钮激活] 等待超时({timeout}秒)")
return False
except Exception as e:
logger.error(f"[按钮激活] 检查失败: {e}")
return False
async def check_button_countdown(self, element: ElementHandle) -> Optional[str]:
"""
检查按钮是否处于倒计时状态
Args:
element: 按钮元素
Returns:
倒计时文本如果处于倒计时否则返回None
"""
try:
btn_text = await element.inner_text()
btn_text = btn_text.strip() if btn_text else ""
# 检查是否包含倒计时标识
if btn_text and (btn_text[-1] == 's' or '' in btn_text or btn_text.isdigit()):
logger.warning(f"[倒计时检测] 按钮处于倒计时: {btn_text}")
return btn_text
return None
except Exception as e:
logger.error(f"[倒计时检测] 检查失败: {e}")
return None
async def random_delay(self, min_seconds: float = 0.5, max_seconds: float = 1.5):
"""
随机延迟模拟人工操作借鉴ai_mip
Args:
min_seconds: 最小延迟(秒)
max_seconds: 最大延迟(秒)
"""
delay = random.uniform(min_seconds, max_seconds)
await asyncio.sleep(delay)
async def scroll_to_element(self, element: ElementHandle):
"""
平滑滚动到元素位置(模拟人类滚动行为)
Args:
element: 目标元素
"""
try:
# 获取元素位置
box = await element.bounding_box()
if box:
# 计算滚动目标(元素在视口中间位置)
viewport = self.page.viewport_size
target_y = box['y'] - (viewport['height'] / 2) + (box['height'] / 2)
# 分步滚动(模拟人类滚动)
current_scroll = await self.page.evaluate('window.pageYOffset')
distance = target_y - current_scroll
steps = max(3, int(abs(distance) / 100)) # 根据距离计算步数
for i in range(steps):
progress = (i + 1) / steps
scroll_y = current_scroll + distance * progress
await self.page.evaluate(f'window.scrollTo(0, {scroll_y})')
await asyncio.sleep(random.uniform(0.05, 0.1))
logger.success(f"[平滑滚动] 已滚动到元素位置")
else:
# 降级方案:直接滚动到可见
await element.scroll_into_view_if_needed()
logger.success(f"[平滑滚动] 使用降级方案滚动")
except Exception as e:
logger.error(f"[平滑滚动] 滚动失败: {e}")
# 最终降级方案
try:
await element.scroll_into_view_if_needed()
except Exception:
pass
async def debug_print_inputs(self):
"""
调试打印页面上所有输入框信息借鉴ai_mip的调试逻辑
"""
try:
logger.info("=" * 50)
logger.info("[调试] 打印页面所有输入框...")
logger.info("=" * 50)
inputs = await self.page.query_selector_all('input')
logger.info(f"[调试] 页面上找到 {len(inputs)} 个input元素")
for i, inp in enumerate(inputs[:10]): # 只打印前10个
try:
placeholder = await inp.get_attribute('placeholder')
input_type = await inp.get_attribute('type')
name = await inp.get_attribute('name')
class_name = await inp.get_attribute('class')
is_visible = await inp.is_visible()
logger.info(f"[调试] Input {i+1}:")
logger.info(f" - type: {input_type}")
logger.info(f" - placeholder: {placeholder}")
logger.info(f" - name: {name}")
logger.info(f" - class: {class_name}")
logger.info(f" - visible: {is_visible}")
except Exception as e:
logger.debug(f"[调试] 获取Input {i+1}信息失败: {e}")
logger.info("=" * 50)
except Exception as e:
logger.error(f"[调试] 打印输入框信息失败: {e}")
async def debug_print_buttons(self):
"""
调试:打印页面上所有按钮信息
"""
try:
logger.info("=" * 50)
logger.info("[调试] 打印页面所有按钮...")
logger.info("=" * 50)
buttons = await self.page.query_selector_all('button, div[role="button"], span[role="button"]')
logger.info(f"[调试] 页面上找到 {len(buttons)} 个按钮元素")
for i, btn in enumerate(buttons[:10]): # 只打印前10个
try:
text = await btn.inner_text()
class_name = await btn.get_attribute('class')
is_visible = await btn.is_visible()
logger.info(f"[调试] Button {i+1}:")
logger.info(f" - text: {text}")
logger.info(f" - class: {class_name}")
logger.info(f" - visible: {is_visible}")
except Exception as e:
logger.debug(f"[调试] 获取Button {i+1}信息失败: {e}")
logger.info("=" * 50)
except Exception as e:
logger.error(f"[调试] 打印按钮信息失败: {e}")
# 定义常用的选择器配置借鉴ai_mip的结构化选择器管理
class XHSSelectors:
"""小红书登录页面选择器配置"""
# 手机号输入框选择器(创作者中心)
PHONE_INPUT_CREATOR = [
'input[placeholder="手机号"]',
'input.css-nt440g',
'input[placeholder*="手机号"]',
'input[type="tel"]',
]
# 手机号输入框选择器(小红书首页)
PHONE_INPUT_HOME = [
'input[placeholder="输入手机号"]',
'label.phone input',
'input[name="blur"]',
]
# 手机号输入框降级选择器
PHONE_INPUT_FALLBACK = [
'input[type="text"]',
'input',
]
# 验证码输入框选择器(创作者中心)
CODE_INPUT_CREATOR = [
'input[placeholder="验证码"]',
'input.css-1ge5flv',
'input[placeholder*="验证码"]',
'input[type="text"]:not([placeholder*="手机"])',
]
# 验证码输入框选择器(小红书首页)
CODE_INPUT_HOME = [
'input[placeholder="输入验证码"]',
'label.auth-code input',
'input[type="number"]',
'input[placeholder*="验证码"]',
]
# 发送验证码按钮选择器(创作者中心)
SEND_CODE_BTN_CREATOR = [
'div.css-uyobdj',
'text="发送验证码"',
'div:has-text("发送验证码")',
'text="重新发送"',
'text="获取验证码"',
]
# 发送验证码按钮选择器(小红书首页)
SEND_CODE_BTN_HOME = [
'span.code-button',
'.code-button',
'text="获取验证码"',
'span:has-text("获取验证码")',
]
# 登录按钮选择器
LOGIN_BTN = [
'button:has-text("登录")',
'text="登录"',
'div:has-text("登录")',
'.login-button',
'button.login',
]
# 协议复选框选择器(小红书首页)
AGREEMENT_CHECKBOX = [
'.agree-icon',
'.agreements .icon-wrapper',
'span.agree-icon',
'.icon-wrapper',
]
# 导出便捷函数
def get_login_helper(page: Page) -> XHSLoginHelper:
"""
获取登录辅助器实例
Args:
page: Playwright Page 对象
Returns:
XHSLoginHelper实例
"""
return XHSLoginHelper(page)