2026-01-16 22:06:46 +08:00
|
|
|
|
"""
|
|
|
|
|
|
数据库管理器
|
|
|
|
|
|
使用 MySQL 数据库
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
import random
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
from typing import List, Dict, Optional, Union
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from loguru import logger
|
|
|
|
|
|
from config import Config
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
import pymysql
|
|
|
|
|
|
MYSQL_AVAILABLE = True
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
MYSQL_AVAILABLE = False
|
|
|
|
|
|
logger.error("pymysql 未安装,请安装: pip install pymysql")
|
|
|
|
|
|
raise ImportError("使用 MySQL 需要安装 pymysql: pip install pymysql")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DatabaseManager:
|
|
|
|
|
|
"""数据库管理器,使用 MySQL"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, db_path: str = None):
|
|
|
|
|
|
"""
|
|
|
|
|
|
初始化数据库连接
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
db_path: 忽略,仅为了兼容性
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not MYSQL_AVAILABLE:
|
|
|
|
|
|
raise ImportError("使用 MySQL 需要安装 pymysql: pip install pymysql")
|
|
|
|
|
|
|
|
|
|
|
|
self.db_config = {
|
|
|
|
|
|
'host': Config.MYSQL_HOST,
|
|
|
|
|
|
'port': Config.MYSQL_PORT,
|
|
|
|
|
|
'user': Config.MYSQL_USER,
|
|
|
|
|
|
'password': Config.MYSQL_PASSWORD,
|
|
|
|
|
|
'database': Config.MYSQL_DATABASE,
|
|
|
|
|
|
'charset': 'utf8mb4'
|
|
|
|
|
|
}
|
2026-01-21 14:33:10 +08:00
|
|
|
|
|
2026-01-16 22:06:46 +08:00
|
|
|
|
def get_connection(self) -> 'pymysql.Connection':
|
|
|
|
|
|
"""获取MySQL数据库连接"""
|
|
|
|
|
|
conn = pymysql.connect(**self.db_config)
|
|
|
|
|
|
return conn
|
|
|
|
|
|
|
|
|
|
|
|
def _dict_from_row(self, row) -> Dict:
|
|
|
|
|
|
"""将数据库行转换为字典"""
|
|
|
|
|
|
if row is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return dict(row) if isinstance(row, dict) else row
|
|
|
|
|
|
|
|
|
|
|
|
def _get_placeholder(self) -> str:
|
|
|
|
|
|
"""获取SQL占位符,MySQL使用 %s"""
|
|
|
|
|
|
return '%s'
|
|
|
|
|
|
|
|
|
|
|
|
def _execute_query(self, conn, sql: str, params: tuple = None):
|
|
|
|
|
|
"""执行SQL查询,使用DictCursor"""
|
|
|
|
|
|
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
|
|
|
|
|
|
|
|
|
|
|
if params:
|
|
|
|
|
|
cursor.execute(sql, params)
|
|
|
|
|
|
else:
|
|
|
|
|
|
cursor.execute(sql)
|
|
|
|
|
|
|
|
|
|
|
|
return cursor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SiteManager(DatabaseManager):
|
|
|
|
|
|
"""站点管理"""
|
|
|
|
|
|
|
|
|
|
|
|
def add_site(self, site_url: str, site_name: str = None,
|
2026-01-21 14:33:10 +08:00
|
|
|
|
site_dimension: str = None, query_word: str = None,
|
|
|
|
|
|
frequency: int = None,
|
2026-01-16 22:06:46 +08:00
|
|
|
|
time_start: str = None, time_end: str = None,
|
|
|
|
|
|
interval_minutes: int = None) -> Optional[int]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
添加新站点
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
site_url: 网站URL
|
|
|
|
|
|
site_name: 网站名称
|
|
|
|
|
|
site_dimension: 网站维度标签
|
2026-01-21 14:33:10 +08:00
|
|
|
|
query_word: 来源查询词(从哪个关键词抓取)
|
2026-01-16 22:06:46 +08:00
|
|
|
|
frequency: 频次
|
|
|
|
|
|
time_start: 开始时间
|
|
|
|
|
|
time_end: 结束时间
|
|
|
|
|
|
interval_minutes: 执行间隔(分钟)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
站点ID,失败返回None
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
|
|
|
|
|
|
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
|
|
|
|
|
|
|
|
|
|
|
# 生成随机目标点击次数(兼容原有逻辑)
|
|
|
|
|
|
target_clicks = random.randint(
|
|
|
|
|
|
getattr(Config, 'MIN_CLICK_COUNT', 1),
|
|
|
|
|
|
getattr(Config, 'MAX_CLICK_COUNT', 10)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
sql = f"""
|
|
|
|
|
|
INSERT INTO ai_mip_site (
|
|
|
|
|
|
site_url, site_name, status, frequency,
|
|
|
|
|
|
time_start, time_end, interval_minutes,
|
2026-01-21 14:33:10 +08:00
|
|
|
|
site_dimension, query_word, created_by
|
|
|
|
|
|
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
|
2026-01-16 22:06:46 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
cursor.execute(sql, (
|
|
|
|
|
|
site_url,
|
|
|
|
|
|
site_name or site_url,
|
|
|
|
|
|
'active',
|
|
|
|
|
|
frequency or 1,
|
|
|
|
|
|
time_start or '09:00:00',
|
|
|
|
|
|
time_end or '21:00:00',
|
2026-01-21 14:33:10 +08:00
|
|
|
|
interval_minutes or 30,
|
2026-01-16 22:06:46 +08:00
|
|
|
|
site_dimension,
|
2026-01-21 14:33:10 +08:00
|
|
|
|
query_word, # 新增:来源查询词
|
2026-01-16 22:06:46 +08:00
|
|
|
|
'system'
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
site_id = cursor.lastrowid
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
2026-01-21 14:33:10 +08:00
|
|
|
|
logger.info(f"成功添加站点: {site_url} (ID: {site_id}, 查询词: {query_word})")
|
2026-01-16 22:06:46 +08:00
|
|
|
|
return site_id
|
|
|
|
|
|
|
|
|
|
|
|
except pymysql.IntegrityError:
|
|
|
|
|
|
logger.warning(f"站点URL已存在: {site_url}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"添加站点失败: {str(e)}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_site_by_url(self, site_url: str) -> Optional[Dict]:
|
|
|
|
|
|
"""根据URL获取站点信息"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
cursor = self._execute_query(conn, f"SELECT * FROM ai_mip_site WHERE site_url = {ph}", (site_url,))
|
|
|
|
|
|
row = cursor.fetchone()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return self._dict_from_row(row) if row else None
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询站点失败: {str(e)}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_site_by_id(self, site_id: int) -> Optional[Dict]:
|
|
|
|
|
|
"""根据ID获取站点信息"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
cursor = self._execute_query(conn, f"SELECT * FROM ai_mip_site WHERE id = {ph}", (site_id,))
|
|
|
|
|
|
row = cursor.fetchone()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return self._dict_from_row(row) if row else None
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询站点失败: {str(e)}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_active_sites(self) -> List[Dict]:
|
|
|
|
|
|
"""获取所有活跃站点"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
cursor = self._execute_query(conn, "SELECT * FROM ai_mip_site WHERE status = 'active' ORDER BY created_at DESC")
|
|
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return [self._dict_from_row(row) for row in rows]
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询活跃站点失败: {str(e)}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
def get_all_sites(self) -> List[Dict]:
|
|
|
|
|
|
"""获取所有站点"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
cursor = self._execute_query(conn, "SELECT * FROM ai_mip_site ORDER BY created_at DESC")
|
|
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return [self._dict_from_row(row) for row in rows]
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询所有站点失败: {str(e)}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
def update_site_status(self, site_id: int, status: str) -> bool:
|
|
|
|
|
|
"""更新站点状态"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
cursor = self._execute_query(
|
|
|
|
|
|
conn,
|
|
|
|
|
|
f"UPDATE ai_mip_site SET status = {ph}, updated_by = {ph} WHERE id = {ph}",
|
|
|
|
|
|
(status, 'system', site_id)
|
|
|
|
|
|
)
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
logger.info(f"更新站点状态: ID={site_id}, status={status}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"更新站点状态失败: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def increment_click_count(self, site_id: int, count: int = 1) -> bool:
|
|
|
|
|
|
"""增加点击次数"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
cursor = self._execute_query(
|
|
|
|
|
|
conn,
|
|
|
|
|
|
f"UPDATE ai_mip_site SET click_count = click_count + {ph} WHERE id = {ph}",
|
|
|
|
|
|
(count, site_id)
|
|
|
|
|
|
)
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"更新点击次数失败: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def increment_reply_count(self, site_id: int, count: int = 1) -> bool:
|
|
|
|
|
|
"""增加回复次数"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
cursor = self._execute_query(
|
|
|
|
|
|
conn,
|
|
|
|
|
|
f"UPDATE ai_mip_site SET reply_count = reply_count + {ph} WHERE id = {ph}",
|
|
|
|
|
|
(count, site_id)
|
|
|
|
|
|
)
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"更新回复次数失败: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def delete_site(self, site_id: int) -> bool:
|
|
|
|
|
|
"""删除站点"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
cursor = self._execute_query(conn, f"DELETE FROM ai_mip_site WHERE id = {ph}", (site_id,))
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
logger.info(f"已删除站点: ID={site_id}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"删除站点失败: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ClickManager(DatabaseManager):
|
|
|
|
|
|
"""点击记录管理"""
|
|
|
|
|
|
|
|
|
|
|
|
def record_click(self, site_id: int, site_url: str,
|
|
|
|
|
|
user_ip: str = None, device_type: str = 'pc',
|
|
|
|
|
|
task_id: str = None) -> Optional[int]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
记录一次点击
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
site_id: 站点ID
|
|
|
|
|
|
site_url: 站点URL
|
|
|
|
|
|
user_ip: 用户IP(代理IP)
|
|
|
|
|
|
device_type: 设备类型
|
|
|
|
|
|
task_id: 任务ID
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
点击记录ID
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
|
|
|
|
|
|
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
|
|
|
|
|
|
|
|
|
|
|
sql = f"""
|
|
|
|
|
|
INSERT INTO ai_mip_click (
|
|
|
|
|
|
site_id, site_url, click_time, user_ip,
|
|
|
|
|
|
device_type, task_id, operator
|
|
|
|
|
|
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
cursor.execute(sql, (
|
|
|
|
|
|
site_id,
|
|
|
|
|
|
site_url,
|
|
|
|
|
|
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
|
|
|
user_ip,
|
|
|
|
|
|
device_type,
|
|
|
|
|
|
task_id or f'TASK_{datetime.now().strftime("%Y%m%d%H%M%S")}',
|
|
|
|
|
|
'RPA_SYSTEM'
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
click_id = cursor.lastrowid
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
# 更新站点点击次数
|
|
|
|
|
|
site_mgr = SiteManager()
|
|
|
|
|
|
site_mgr.increment_click_count(site_id)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"记录点击: site_id={site_id}, click_id={click_id}")
|
|
|
|
|
|
return click_id
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"记录点击失败: {str(e)}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_clicks_by_site(self, site_id: int, limit: int = 100) -> List[Dict]:
|
|
|
|
|
|
"""获取站点的点击记录"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
cursor.execute("""
|
|
|
|
|
|
SELECT * FROM ai_mip_click
|
|
|
|
|
|
WHERE site_id = ?
|
|
|
|
|
|
ORDER BY click_time DESC
|
|
|
|
|
|
LIMIT ?
|
|
|
|
|
|
""", (site_id, limit))
|
|
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return [self._dict_from_row(row) for row in rows]
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询点击记录失败: {str(e)}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
def get_click_count_by_site(self, site_id: int) -> int:
|
|
|
|
|
|
"""获取站点的总点击次数"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
cursor.execute("SELECT COUNT(*) as count FROM ai_mip_click WHERE site_id = ?", (site_id,))
|
|
|
|
|
|
row = cursor.fetchone()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return row['count'] if row else 0
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询点击次数失败: {str(e)}")
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class InteractionManager(DatabaseManager):
|
|
|
|
|
|
"""互动记录管理"""
|
|
|
|
|
|
|
|
|
|
|
|
def record_interaction(self, site_id: int, click_id: int = None,
|
|
|
|
|
|
task_id: str = None, interaction_type: str = 'reply',
|
|
|
|
|
|
reply_content: str = None, is_successful: bool = False,
|
|
|
|
|
|
response_received: bool = False, response_content: str = None,
|
|
|
|
|
|
proxy_ip: str = None, fingerprint_id: str = None,
|
|
|
|
|
|
error_message: str = None) -> Optional[int]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
记录一次互动
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
site_id: 站点ID
|
|
|
|
|
|
click_id: 关联的点击记录ID
|
|
|
|
|
|
task_id: 任务ID
|
|
|
|
|
|
interaction_type: 互动类型(reply/comment等)
|
|
|
|
|
|
reply_content: 回复内容
|
|
|
|
|
|
is_successful: 是否成功
|
|
|
|
|
|
response_received: 是否收到回复
|
|
|
|
|
|
response_content: 对方回复内容
|
|
|
|
|
|
proxy_ip: 使用的代理IP
|
|
|
|
|
|
fingerprint_id: 浏览器指纹ID
|
|
|
|
|
|
error_message: 错误信息
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
互动记录ID
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
|
|
|
|
|
|
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
|
|
|
|
|
|
|
|
|
|
|
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
|
|
|
|
|
|
|
sql = f"""
|
|
|
|
|
|
INSERT INTO ai_mip_interaction (
|
|
|
|
|
|
site_id, click_id, task_id, interaction_type,
|
|
|
|
|
|
interaction_time, interaction_status, reply_content,
|
|
|
|
|
|
execution_mode, browser_type, proxy_ip, fingerprint_id,
|
|
|
|
|
|
response_received, response_content, is_successful,
|
|
|
|
|
|
error_message, operator
|
|
|
|
|
|
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
cursor.execute(sql, (
|
|
|
|
|
|
site_id,
|
|
|
|
|
|
click_id,
|
|
|
|
|
|
task_id or f'TASK_{datetime.now().strftime("%Y%m%d%H%M%S")}',
|
|
|
|
|
|
interaction_type,
|
|
|
|
|
|
now,
|
|
|
|
|
|
'success' if is_successful else 'failed',
|
|
|
|
|
|
reply_content,
|
|
|
|
|
|
'auto',
|
|
|
|
|
|
'playwright',
|
|
|
|
|
|
proxy_ip,
|
|
|
|
|
|
fingerprint_id,
|
|
|
|
|
|
1 if response_received else 0,
|
|
|
|
|
|
response_content,
|
|
|
|
|
|
1 if is_successful else 0,
|
|
|
|
|
|
error_message,
|
|
|
|
|
|
'RPA_SYSTEM'
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
interaction_id = cursor.lastrowid
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
# 如果收到回复,更新站点回复次数
|
|
|
|
|
|
if response_received:
|
|
|
|
|
|
site_mgr = SiteManager()
|
|
|
|
|
|
site_mgr.increment_reply_count(site_id)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"记录互动: site_id={site_id}, interaction_id={interaction_id}, success={is_successful}")
|
|
|
|
|
|
return interaction_id
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"记录互动失败: {str(e)}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_interactions_by_site(self, site_id: int, limit: int = 100) -> List[Dict]:
|
|
|
|
|
|
"""获取站点的互动记录"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
cursor.execute("""
|
|
|
|
|
|
SELECT * FROM ai_mip_interaction
|
|
|
|
|
|
WHERE site_id = ?
|
|
|
|
|
|
ORDER BY interaction_time DESC
|
|
|
|
|
|
LIMIT ?
|
|
|
|
|
|
""", (site_id, limit))
|
|
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return [self._dict_from_row(row) for row in rows]
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询互动记录失败: {str(e)}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
def get_successful_interactions_count(self, site_id: int) -> int:
|
|
|
|
|
|
"""获取站点的成功互动次数"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
cursor.execute("""
|
|
|
|
|
|
SELECT COUNT(*) as count FROM ai_mip_interaction
|
|
|
|
|
|
WHERE site_id = ? AND is_successful = 1
|
|
|
|
|
|
""", (site_id,))
|
|
|
|
|
|
row = cursor.fetchone()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return row['count'] if row else 0
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询成功互动次数失败: {str(e)}")
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class StatisticsManager(DatabaseManager):
|
|
|
|
|
|
"""统计数据管理"""
|
|
|
|
|
|
|
|
|
|
|
|
def get_statistics(self) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
获取全局统计数据
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
统计数据字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
|
|
|
|
|
|
# 站点统计
|
|
|
|
|
|
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_site")
|
|
|
|
|
|
total_sites = cursor.fetchone()['total']
|
|
|
|
|
|
|
|
|
|
|
|
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_site WHERE status = 'active'")
|
|
|
|
|
|
active_sites = cursor.fetchone()['total']
|
|
|
|
|
|
|
|
|
|
|
|
# 点击统计
|
|
|
|
|
|
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_click")
|
|
|
|
|
|
total_clicks = cursor.fetchone()['total']
|
|
|
|
|
|
|
|
|
|
|
|
# 互动统计
|
|
|
|
|
|
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE response_received = 1")
|
|
|
|
|
|
total_replies = cursor.fetchone()['total']
|
|
|
|
|
|
|
|
|
|
|
|
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE is_successful = 1")
|
|
|
|
|
|
successful_interactions = cursor.fetchone()['total']
|
|
|
|
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
reply_rate = (total_replies / total_clicks * 100) if total_clicks > 0 else 0
|
|
|
|
|
|
success_rate = (successful_interactions / total_clicks * 100) if total_clicks > 0 else 0
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
'total_sites': total_sites,
|
|
|
|
|
|
'active_sites': active_sites,
|
|
|
|
|
|
'total_clicks': total_clicks,
|
|
|
|
|
|
'total_replies': total_replies,
|
|
|
|
|
|
'successful_interactions': successful_interactions,
|
|
|
|
|
|
'reply_rate': f"{reply_rate:.2f}%",
|
|
|
|
|
|
'success_rate': f"{success_rate:.2f}%"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"获取统计数据失败: {str(e)}")
|
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
def get_site_statistics(self, site_id: int) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
获取单个站点的统计数据
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
site_id: 站点ID
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
站点统计数据
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
site_mgr = SiteManager(self.db_path)
|
|
|
|
|
|
click_mgr = ClickManager(self.db_path)
|
|
|
|
|
|
interaction_mgr = InteractionManager(self.db_path)
|
|
|
|
|
|
|
|
|
|
|
|
site = site_mgr.get_site_by_id(site_id)
|
|
|
|
|
|
if not site:
|
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
click_count = click_mgr.get_click_count_by_site(site_id)
|
|
|
|
|
|
success_count = interaction_mgr.get_successful_interactions_count(site_id)
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
'site_url': site['site_url'],
|
|
|
|
|
|
'site_name': site['site_name'],
|
|
|
|
|
|
'status': site['status'],
|
|
|
|
|
|
'click_count': click_count,
|
|
|
|
|
|
'reply_count': site['reply_count'],
|
|
|
|
|
|
'successful_interactions': success_count,
|
|
|
|
|
|
'reply_rate': f"{(site['reply_count'] / click_count * 100) if click_count > 0 else 0:.2f}%"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"获取站点统计失败: {str(e)}")
|
|
|
|
|
|
return {}
|
2026-01-21 14:33:10 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class QueryTaskManager(DatabaseManager):
|
|
|
|
|
|
"""查询任务管理器"""
|
|
|
|
|
|
|
|
|
|
|
|
def create_task(self, query_word: str, task_date: str = None,
|
|
|
|
|
|
query_type: str = 'keyword', threshold_max: int = 100,
|
|
|
|
|
|
priority: int = 5, category: str = None,
|
|
|
|
|
|
source_platform: str = 'baidu',
|
|
|
|
|
|
created_by: str = 'system',
|
|
|
|
|
|
remark: str = None) -> Optional[int]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
创建查询任务
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
query_word: 查询词
|
|
|
|
|
|
task_date: 任务日期 YYYYMMDD,默认今天
|
|
|
|
|
|
query_type: 查询类型
|
|
|
|
|
|
threshold_max: 最大抓取数量
|
|
|
|
|
|
priority: 优先级 1-10
|
|
|
|
|
|
category: 分类标签
|
|
|
|
|
|
source_platform: 来源平台
|
|
|
|
|
|
created_by: 创建人
|
|
|
|
|
|
remark: 备注
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
任务ID,失败返回None
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
if task_date is None:
|
|
|
|
|
|
task_date = datetime.now().strftime('%Y%m%d')
|
|
|
|
|
|
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
|
|
|
|
|
|
sql = f"""
|
|
|
|
|
|
INSERT INTO ai_mip_query_task (
|
|
|
|
|
|
query_word, query_type, task_date, threshold_max,
|
|
|
|
|
|
priority, category, source_platform, created_by, remark
|
|
|
|
|
|
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
cursor.execute(sql, (
|
|
|
|
|
|
query_word, query_type, task_date, threshold_max,
|
|
|
|
|
|
priority, category, source_platform, created_by, remark
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
task_id = cursor.lastrowid
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"创建查询任务成功: {query_word} (ID: {task_id})")
|
|
|
|
|
|
return task_id
|
|
|
|
|
|
|
|
|
|
|
|
except pymysql.IntegrityError:
|
|
|
|
|
|
logger.warning(f"查询任务已存在: {query_word} @ {task_date}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"创建查询任务失败: {str(e)}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_task_by_id(self, task_id: int) -> Optional[Dict]:
|
|
|
|
|
|
"""根据ID获取任务"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
cursor = self._execute_query(
|
|
|
|
|
|
conn,
|
|
|
|
|
|
f"SELECT * FROM ai_mip_query_task WHERE id = {ph}",
|
|
|
|
|
|
(task_id,)
|
|
|
|
|
|
)
|
|
|
|
|
|
row = cursor.fetchone()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return self._dict_from_row(row) if row else None
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询任务失败: {str(e)}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_ready_tasks(self, limit: int = None) -> List[Dict]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
获取准备执行的任务(按优先级排序)
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
limit: 限制数量
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
任务列表
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
sql = "SELECT * FROM ai_mip_query_task WHERE status = 'ready' ORDER BY priority ASC, created_at ASC"
|
|
|
|
|
|
|
|
|
|
|
|
if limit:
|
|
|
|
|
|
sql += f" LIMIT {limit}"
|
|
|
|
|
|
|
|
|
|
|
|
cursor = self._execute_query(conn, sql)
|
|
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return [self._dict_from_row(row) for row in rows]
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询ready任务失败: {str(e)}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
def get_tasks_by_date(self, task_date: str) -> List[Dict]:
|
|
|
|
|
|
"""根据日期获取任务"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
cursor = self._execute_query(
|
|
|
|
|
|
conn,
|
|
|
|
|
|
f"SELECT * FROM ai_mip_query_task WHERE task_date = {ph} ORDER BY priority ASC",
|
|
|
|
|
|
(task_date,)
|
|
|
|
|
|
)
|
|
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return [self._dict_from_row(row) for row in rows]
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查询日期任务失败: {str(e)}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
def update_task_status(self, task_id: int, status: str,
|
|
|
|
|
|
error_message: str = None) -> bool:
|
|
|
|
|
|
"""
|
|
|
|
|
|
更新任务状态
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
task_id: 任务ID
|
|
|
|
|
|
status: 状态 ready/doing/failed/finished/closed
|
|
|
|
|
|
error_message: 错误信息(失败时)
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
|
|
|
|
|
|
# 根据状态更新时间字段
|
|
|
|
|
|
timestamp_field = None
|
|
|
|
|
|
if status == 'doing':
|
|
|
|
|
|
timestamp_field = 'started_at'
|
|
|
|
|
|
elif status in ['finished', 'failed']:
|
|
|
|
|
|
timestamp_field = 'finished_at'
|
|
|
|
|
|
elif status == 'closed':
|
|
|
|
|
|
timestamp_field = 'closed_at'
|
|
|
|
|
|
|
|
|
|
|
|
if timestamp_field:
|
|
|
|
|
|
sql = f"""
|
|
|
|
|
|
UPDATE ai_mip_query_task
|
|
|
|
|
|
SET status = {ph}, {timestamp_field} = NOW()
|
|
|
|
|
|
WHERE id = {ph}
|
|
|
|
|
|
"""
|
|
|
|
|
|
params = (status, task_id)
|
|
|
|
|
|
else:
|
|
|
|
|
|
sql = f"UPDATE ai_mip_query_task SET status = {ph} WHERE id = {ph}"
|
|
|
|
|
|
params = (status, task_id)
|
|
|
|
|
|
|
|
|
|
|
|
# 如果有错误信息,更新error_message
|
|
|
|
|
|
if error_message:
|
|
|
|
|
|
sql = sql.replace('WHERE', f", error_message = '{error_message}' WHERE")
|
|
|
|
|
|
|
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
cursor.execute(sql, params)
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"更新任务状态: {task_id} -> {status}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"更新任务状态失败: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def increment_crawl_count(self, task_id: int,
|
|
|
|
|
|
crawl_count: int = 1,
|
|
|
|
|
|
valid_count: int = 0) -> bool:
|
|
|
|
|
|
"""
|
|
|
|
|
|
增加抓取计数
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
task_id: 任务ID
|
|
|
|
|
|
crawl_count: 抓取URL数量
|
|
|
|
|
|
valid_count: 有效URL数量(带广告)
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
|
|
|
|
|
|
sql = f"""
|
|
|
|
|
|
UPDATE ai_mip_query_task
|
|
|
|
|
|
SET crawl_url_count = crawl_url_count + {ph},
|
|
|
|
|
|
valid_url_count = valid_url_count + {ph},
|
|
|
|
|
|
current_count = current_count + {ph}
|
|
|
|
|
|
WHERE id = {ph}
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
cursor.execute(sql, (crawl_count, valid_count, valid_count, task_id))
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"更新抓取计数失败: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def check_threshold(self, task_id: int) -> bool:
|
|
|
|
|
|
"""
|
|
|
|
|
|
检查是否达到阈值,达到则自动关闭任务
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
True=已达到阈值, False=未达到
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
task = self.get_task_by_id(task_id)
|
|
|
|
|
|
if not task:
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
if task['current_count'] >= task['threshold_max']:
|
|
|
|
|
|
self.update_task_status(task_id, 'closed')
|
|
|
|
|
|
logger.info(f"任务达到阈值并关闭: {task['query_word']} ({task['current_count']}/{task['threshold_max']})")
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"检查阈值失败: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def get_task_statistics(self, task_date: str = None) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
获取任务统计信息
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
task_date: 日期,为None则统计所有
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn = self.get_connection()
|
|
|
|
|
|
|
|
|
|
|
|
if task_date:
|
|
|
|
|
|
ph = self._get_placeholder()
|
|
|
|
|
|
where_clause = f"WHERE task_date = {ph}"
|
|
|
|
|
|
params = (task_date,)
|
|
|
|
|
|
else:
|
|
|
|
|
|
where_clause = ""
|
|
|
|
|
|
params = None
|
|
|
|
|
|
|
|
|
|
|
|
sql = f"""
|
|
|
|
|
|
SELECT
|
|
|
|
|
|
COUNT(*) as total_tasks,
|
|
|
|
|
|
SUM(CASE WHEN status = 'ready' THEN 1 ELSE 0 END) as ready_count,
|
|
|
|
|
|
SUM(CASE WHEN status = 'doing' THEN 1 ELSE 0 END) as doing_count,
|
|
|
|
|
|
SUM(CASE WHEN status = 'finished' THEN 1 ELSE 0 END) as finished_count,
|
|
|
|
|
|
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed_count,
|
|
|
|
|
|
SUM(CASE WHEN status = 'closed' THEN 1 ELSE 0 END) as closed_count,
|
|
|
|
|
|
SUM(crawl_url_count) as total_crawled,
|
|
|
|
|
|
SUM(valid_url_count) as total_valid
|
|
|
|
|
|
FROM ai_mip_query_task
|
|
|
|
|
|
{where_clause}
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
cursor = self._execute_query(conn, sql, params)
|
|
|
|
|
|
row = cursor.fetchone()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
return self._dict_from_row(row) if row else {}
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"获取任务统计失败: {str(e)}")
|
|
|
|
|
|
return {}
|