819 lines
29 KiB
Python
819 lines
29 KiB
Python
"""
|
||
数据库管理器
|
||
使用 MySQL 数据库
|
||
"""
|
||
|
||
import json
|
||
import random
|
||
from datetime import datetime
|
||
from typing import List, Dict, Optional, Union
|
||
from pathlib import Path
|
||
from loguru import logger
|
||
from config import Config
|
||
|
||
try:
|
||
import pymysql
|
||
MYSQL_AVAILABLE = True
|
||
except ImportError:
|
||
MYSQL_AVAILABLE = False
|
||
logger.error("pymysql 未安装,请安装: pip install pymysql")
|
||
raise ImportError("使用 MySQL 需要安装 pymysql: pip install pymysql")
|
||
|
||
|
||
class DatabaseManager:
|
||
"""数据库管理器,使用 MySQL"""
|
||
|
||
def __init__(self, db_path: str = None):
|
||
"""
|
||
初始化数据库连接
|
||
|
||
Args:
|
||
db_path: 忽略,仅为了兼容性
|
||
"""
|
||
if not MYSQL_AVAILABLE:
|
||
raise ImportError("使用 MySQL 需要安装 pymysql: pip install pymysql")
|
||
|
||
self.db_config = {
|
||
'host': Config.MYSQL_HOST,
|
||
'port': Config.MYSQL_PORT,
|
||
'user': Config.MYSQL_USER,
|
||
'password': Config.MYSQL_PASSWORD,
|
||
'database': Config.MYSQL_DATABASE,
|
||
'charset': 'utf8mb4'
|
||
}
|
||
|
||
def get_connection(self) -> 'pymysql.Connection':
|
||
"""获取MySQL数据库连接"""
|
||
conn = pymysql.connect(**self.db_config)
|
||
return conn
|
||
|
||
def _dict_from_row(self, row) -> Dict:
|
||
"""将数据库行转换为字典"""
|
||
if row is None:
|
||
return None
|
||
return dict(row) if isinstance(row, dict) else row
|
||
|
||
def _get_placeholder(self) -> str:
|
||
"""获取SQL占位符,MySQL使用 %s"""
|
||
return '%s'
|
||
|
||
def _execute_query(self, conn, sql: str, params: tuple = None):
|
||
"""执行SQL查询,使用DictCursor"""
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
if params:
|
||
cursor.execute(sql, params)
|
||
else:
|
||
cursor.execute(sql)
|
||
|
||
return cursor
|
||
|
||
|
||
class SiteManager(DatabaseManager):
|
||
"""站点管理"""
|
||
|
||
def add_site(self, site_url: str, site_name: str = None,
|
||
site_dimension: str = None, query_word: str = None,
|
||
frequency: int = None,
|
||
time_start: str = None, time_end: str = None,
|
||
interval_minutes: int = None) -> Optional[int]:
|
||
"""
|
||
添加新站点
|
||
|
||
Args:
|
||
site_url: 网站URL
|
||
site_name: 网站名称
|
||
site_dimension: 网站维度标签
|
||
query_word: 来源查询词(从哪个关键词抓取)
|
||
frequency: 频次
|
||
time_start: 开始时间
|
||
time_end: 结束时间
|
||
interval_minutes: 执行间隔(分钟)
|
||
|
||
Returns:
|
||
站点ID,失败返回None
|
||
"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
# 生成随机目标点击次数(兼容原有逻辑)
|
||
target_clicks = random.randint(
|
||
getattr(Config, 'MIN_CLICK_COUNT', 1),
|
||
getattr(Config, 'MAX_CLICK_COUNT', 10)
|
||
)
|
||
|
||
sql = f"""
|
||
INSERT INTO ai_mip_site (
|
||
site_url, site_name, status, frequency,
|
||
time_start, time_end, interval_minutes,
|
||
site_dimension, query_word, created_by
|
||
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
|
||
"""
|
||
|
||
cursor.execute(sql, (
|
||
site_url,
|
||
site_name or site_url,
|
||
'active',
|
||
frequency or 1,
|
||
time_start or '09:00:00',
|
||
time_end or '21:00:00',
|
||
interval_minutes or 30,
|
||
site_dimension,
|
||
query_word, # 新增:来源查询词
|
||
'system'
|
||
))
|
||
|
||
site_id = cursor.lastrowid
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
logger.info(f"成功添加站点: {site_url} (ID: {site_id}, 查询词: {query_word})")
|
||
return site_id
|
||
|
||
except pymysql.IntegrityError:
|
||
logger.warning(f"站点URL已存在: {site_url}")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"添加站点失败: {str(e)}")
|
||
return None
|
||
|
||
def get_site_by_url(self, site_url: str) -> Optional[Dict]:
|
||
"""根据URL获取站点信息"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
cursor = self._execute_query(conn, f"SELECT * FROM ai_mip_site WHERE site_url = {ph}", (site_url,))
|
||
row = cursor.fetchone()
|
||
conn.close()
|
||
return self._dict_from_row(row) if row else None
|
||
except Exception as e:
|
||
logger.error(f"查询站点失败: {str(e)}")
|
||
return None
|
||
|
||
def get_site_by_id(self, site_id: int) -> Optional[Dict]:
|
||
"""根据ID获取站点信息"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
cursor = self._execute_query(conn, f"SELECT * FROM ai_mip_site WHERE id = {ph}", (site_id,))
|
||
row = cursor.fetchone()
|
||
conn.close()
|
||
return self._dict_from_row(row) if row else None
|
||
except Exception as e:
|
||
logger.error(f"查询站点失败: {str(e)}")
|
||
return None
|
||
|
||
def get_active_sites(self) -> List[Dict]:
|
||
"""获取所有活跃站点"""
|
||
try:
|
||
conn = self.get_connection()
|
||
cursor = self._execute_query(conn, "SELECT * FROM ai_mip_site WHERE status = 'active' ORDER BY created_at DESC")
|
||
rows = cursor.fetchall()
|
||
conn.close()
|
||
return [self._dict_from_row(row) for row in rows]
|
||
except Exception as e:
|
||
logger.error(f"查询活跃站点失败: {str(e)}")
|
||
return []
|
||
|
||
def get_all_sites(self) -> List[Dict]:
|
||
"""获取所有站点"""
|
||
try:
|
||
conn = self.get_connection()
|
||
cursor = self._execute_query(conn, "SELECT * FROM ai_mip_site ORDER BY created_at DESC")
|
||
rows = cursor.fetchall()
|
||
conn.close()
|
||
return [self._dict_from_row(row) for row in rows]
|
||
except Exception as e:
|
||
logger.error(f"查询所有站点失败: {str(e)}")
|
||
return []
|
||
|
||
def update_site_status(self, site_id: int, status: str) -> bool:
|
||
"""更新站点状态"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
cursor = self._execute_query(
|
||
conn,
|
||
f"UPDATE ai_mip_site SET status = {ph}, updated_by = {ph} WHERE id = {ph}",
|
||
(status, 'system', site_id)
|
||
)
|
||
conn.commit()
|
||
conn.close()
|
||
logger.info(f"更新站点状态: ID={site_id}, status={status}")
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"更新站点状态失败: {str(e)}")
|
||
return False
|
||
|
||
def increment_click_count(self, site_id: int, count: int = 1) -> bool:
|
||
"""增加点击次数"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
cursor = self._execute_query(
|
||
conn,
|
||
f"UPDATE ai_mip_site SET click_count = click_count + {ph} WHERE id = {ph}",
|
||
(count, site_id)
|
||
)
|
||
conn.commit()
|
||
conn.close()
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"更新点击次数失败: {str(e)}")
|
||
return False
|
||
|
||
def increment_reply_count(self, site_id: int, count: int = 1) -> bool:
|
||
"""增加回复次数"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
cursor = self._execute_query(
|
||
conn,
|
||
f"UPDATE ai_mip_site SET reply_count = reply_count + {ph} WHERE id = {ph}",
|
||
(count, site_id)
|
||
)
|
||
conn.commit()
|
||
conn.close()
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"更新回复次数失败: {str(e)}")
|
||
return False
|
||
|
||
def delete_site(self, site_id: int) -> bool:
|
||
"""删除站点"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
cursor = self._execute_query(conn, f"DELETE FROM ai_mip_site WHERE id = {ph}", (site_id,))
|
||
conn.commit()
|
||
conn.close()
|
||
logger.info(f"已删除站点: ID={site_id}")
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"删除站点失败: {str(e)}")
|
||
return False
|
||
|
||
|
||
class ClickManager(DatabaseManager):
|
||
"""点击记录管理"""
|
||
|
||
def record_click(self, site_id: int, site_url: str,
|
||
user_ip: str = None, device_type: str = 'pc',
|
||
task_id: str = None) -> Optional[int]:
|
||
"""
|
||
记录一次点击
|
||
|
||
Args:
|
||
site_id: 站点ID
|
||
site_url: 站点URL
|
||
user_ip: 用户IP(代理IP)
|
||
device_type: 设备类型
|
||
task_id: 任务ID
|
||
|
||
Returns:
|
||
点击记录ID
|
||
"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
sql = f"""
|
||
INSERT INTO ai_mip_click (
|
||
site_id, site_url, click_time, user_ip,
|
||
device_type, task_id, operator
|
||
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
|
||
"""
|
||
|
||
cursor.execute(sql, (
|
||
site_id,
|
||
site_url,
|
||
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||
user_ip,
|
||
device_type,
|
||
task_id or f'TASK_{datetime.now().strftime("%Y%m%d%H%M%S")}',
|
||
'RPA_SYSTEM'
|
||
))
|
||
|
||
click_id = cursor.lastrowid
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
# 更新站点点击次数
|
||
site_mgr = SiteManager()
|
||
site_mgr.increment_click_count(site_id)
|
||
|
||
logger.info(f"记录点击: site_id={site_id}, click_id={click_id}")
|
||
return click_id
|
||
|
||
except Exception as e:
|
||
logger.error(f"记录点击失败: {str(e)}")
|
||
return None
|
||
|
||
def get_clicks_by_site(self, site_id: int, limit: int = 100) -> List[Dict]:
|
||
"""获取站点的点击记录"""
|
||
try:
|
||
conn = self.get_connection()
|
||
cursor = conn.cursor()
|
||
cursor.execute("""
|
||
SELECT * FROM ai_mip_click
|
||
WHERE site_id = ?
|
||
ORDER BY click_time DESC
|
||
LIMIT ?
|
||
""", (site_id, limit))
|
||
rows = cursor.fetchall()
|
||
conn.close()
|
||
return [self._dict_from_row(row) for row in rows]
|
||
except Exception as e:
|
||
logger.error(f"查询点击记录失败: {str(e)}")
|
||
return []
|
||
|
||
def get_click_count_by_site(self, site_id: int) -> int:
|
||
"""获取站点的总点击次数"""
|
||
try:
|
||
conn = self.get_connection()
|
||
cursor = conn.cursor()
|
||
cursor.execute("SELECT COUNT(*) as count FROM ai_mip_click WHERE site_id = ?", (site_id,))
|
||
row = cursor.fetchone()
|
||
conn.close()
|
||
return row['count'] if row else 0
|
||
except Exception as e:
|
||
logger.error(f"查询点击次数失败: {str(e)}")
|
||
return 0
|
||
|
||
|
||
class InteractionManager(DatabaseManager):
|
||
"""互动记录管理"""
|
||
|
||
def record_interaction(self, site_id: int, click_id: int = None,
|
||
task_id: str = None, interaction_type: str = 'reply',
|
||
reply_content: str = None, is_successful: bool = False,
|
||
response_received: bool = False, response_content: str = None,
|
||
proxy_ip: str = None, fingerprint_id: str = None,
|
||
error_message: str = None) -> Optional[int]:
|
||
"""
|
||
记录一次互动
|
||
|
||
Args:
|
||
site_id: 站点ID
|
||
click_id: 关联的点击记录ID
|
||
task_id: 任务ID
|
||
interaction_type: 互动类型(reply/comment等)
|
||
reply_content: 回复内容
|
||
is_successful: 是否成功
|
||
response_received: 是否收到回复
|
||
response_content: 对方回复内容
|
||
proxy_ip: 使用的代理IP
|
||
fingerprint_id: 浏览器指纹ID
|
||
error_message: 错误信息
|
||
|
||
Returns:
|
||
互动记录ID
|
||
"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||
|
||
sql = f"""
|
||
INSERT INTO ai_mip_interaction (
|
||
site_id, click_id, task_id, interaction_type,
|
||
interaction_time, interaction_status, reply_content,
|
||
execution_mode, browser_type, proxy_ip, fingerprint_id,
|
||
response_received, response_content, is_successful,
|
||
error_message, operator
|
||
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
|
||
"""
|
||
|
||
cursor.execute(sql, (
|
||
site_id,
|
||
click_id,
|
||
task_id or f'TASK_{datetime.now().strftime("%Y%m%d%H%M%S")}',
|
||
interaction_type,
|
||
now,
|
||
'success' if is_successful else 'failed',
|
||
reply_content,
|
||
'auto',
|
||
'playwright',
|
||
proxy_ip,
|
||
fingerprint_id,
|
||
1 if response_received else 0,
|
||
response_content,
|
||
1 if is_successful else 0,
|
||
error_message,
|
||
'RPA_SYSTEM'
|
||
))
|
||
|
||
interaction_id = cursor.lastrowid
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
# 如果收到回复,更新站点回复次数
|
||
if response_received:
|
||
site_mgr = SiteManager()
|
||
site_mgr.increment_reply_count(site_id)
|
||
|
||
logger.info(f"记录互动: site_id={site_id}, interaction_id={interaction_id}, success={is_successful}")
|
||
return interaction_id
|
||
|
||
except Exception as e:
|
||
logger.error(f"记录互动失败: {str(e)}")
|
||
return None
|
||
|
||
def get_interactions_by_site(self, site_id: int, limit: int = 100) -> List[Dict]:
|
||
"""获取站点的互动记录"""
|
||
try:
|
||
conn = self.get_connection()
|
||
cursor = conn.cursor()
|
||
cursor.execute("""
|
||
SELECT * FROM ai_mip_interaction
|
||
WHERE site_id = ?
|
||
ORDER BY interaction_time DESC
|
||
LIMIT ?
|
||
""", (site_id, limit))
|
||
rows = cursor.fetchall()
|
||
conn.close()
|
||
return [self._dict_from_row(row) for row in rows]
|
||
except Exception as e:
|
||
logger.error(f"查询互动记录失败: {str(e)}")
|
||
return []
|
||
|
||
def get_successful_interactions_count(self, site_id: int) -> int:
|
||
"""获取站点的成功互动次数"""
|
||
try:
|
||
conn = self.get_connection()
|
||
cursor = conn.cursor()
|
||
cursor.execute("""
|
||
SELECT COUNT(*) as count FROM ai_mip_interaction
|
||
WHERE site_id = ? AND is_successful = 1
|
||
""", (site_id,))
|
||
row = cursor.fetchone()
|
||
conn.close()
|
||
return row['count'] if row else 0
|
||
except Exception as e:
|
||
logger.error(f"查询成功互动次数失败: {str(e)}")
|
||
return 0
|
||
|
||
|
||
class StatisticsManager(DatabaseManager):
|
||
"""统计数据管理"""
|
||
|
||
def get_statistics(self) -> Dict:
|
||
"""
|
||
获取全局统计数据
|
||
|
||
Returns:
|
||
统计数据字典
|
||
"""
|
||
try:
|
||
conn = self.get_connection()
|
||
|
||
# 站点统计
|
||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_site")
|
||
total_sites = cursor.fetchone()['total']
|
||
|
||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_site WHERE status = 'active'")
|
||
active_sites = cursor.fetchone()['total']
|
||
|
||
# 点击统计
|
||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_click")
|
||
total_clicks = cursor.fetchone()['total']
|
||
|
||
# 互动统计
|
||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE response_received = 1")
|
||
total_replies = cursor.fetchone()['total']
|
||
|
||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE is_successful = 1")
|
||
successful_interactions = cursor.fetchone()['total']
|
||
|
||
conn.close()
|
||
|
||
reply_rate = (total_replies / total_clicks * 100) if total_clicks > 0 else 0
|
||
success_rate = (successful_interactions / total_clicks * 100) if total_clicks > 0 else 0
|
||
|
||
return {
|
||
'total_sites': total_sites,
|
||
'active_sites': active_sites,
|
||
'total_clicks': total_clicks,
|
||
'total_replies': total_replies,
|
||
'successful_interactions': successful_interactions,
|
||
'reply_rate': f"{reply_rate:.2f}%",
|
||
'success_rate': f"{success_rate:.2f}%"
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取统计数据失败: {str(e)}")
|
||
return {}
|
||
|
||
def get_site_statistics(self, site_id: int) -> Dict:
|
||
"""
|
||
获取单个站点的统计数据
|
||
|
||
Args:
|
||
site_id: 站点ID
|
||
|
||
Returns:
|
||
站点统计数据
|
||
"""
|
||
try:
|
||
site_mgr = SiteManager(self.db_path)
|
||
click_mgr = ClickManager(self.db_path)
|
||
interaction_mgr = InteractionManager(self.db_path)
|
||
|
||
site = site_mgr.get_site_by_id(site_id)
|
||
if not site:
|
||
return {}
|
||
|
||
click_count = click_mgr.get_click_count_by_site(site_id)
|
||
success_count = interaction_mgr.get_successful_interactions_count(site_id)
|
||
|
||
return {
|
||
'site_url': site['site_url'],
|
||
'site_name': site['site_name'],
|
||
'status': site['status'],
|
||
'click_count': click_count,
|
||
'reply_count': site['reply_count'],
|
||
'successful_interactions': success_count,
|
||
'reply_rate': f"{(site['reply_count'] / click_count * 100) if click_count > 0 else 0:.2f}%"
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取站点统计失败: {str(e)}")
|
||
return {}
|
||
|
||
|
||
class QueryTaskManager(DatabaseManager):
|
||
"""查询任务管理器"""
|
||
|
||
def create_task(self, query_word: str, task_date: str = None,
|
||
query_type: str = 'keyword', threshold_max: int = 100,
|
||
priority: int = 5, category: str = None,
|
||
source_platform: str = 'baidu',
|
||
created_by: str = 'system',
|
||
remark: str = None) -> Optional[int]:
|
||
"""
|
||
创建查询任务
|
||
|
||
Args:
|
||
query_word: 查询词
|
||
task_date: 任务日期 YYYYMMDD,默认今天
|
||
query_type: 查询类型
|
||
threshold_max: 最大抓取数量
|
||
priority: 优先级 1-10
|
||
category: 分类标签
|
||
source_platform: 来源平台
|
||
created_by: 创建人
|
||
remark: 备注
|
||
|
||
Returns:
|
||
任务ID,失败返回None
|
||
"""
|
||
try:
|
||
if task_date is None:
|
||
task_date = datetime.now().strftime('%Y%m%d')
|
||
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
|
||
sql = f"""
|
||
INSERT INTO ai_mip_query_task (
|
||
query_word, query_type, task_date, threshold_max,
|
||
priority, category, source_platform, created_by, remark
|
||
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
|
||
"""
|
||
|
||
cursor = conn.cursor()
|
||
cursor.execute(sql, (
|
||
query_word, query_type, task_date, threshold_max,
|
||
priority, category, source_platform, created_by, remark
|
||
))
|
||
|
||
task_id = cursor.lastrowid
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
logger.info(f"创建查询任务成功: {query_word} (ID: {task_id})")
|
||
return task_id
|
||
|
||
except pymysql.IntegrityError:
|
||
logger.warning(f"查询任务已存在: {query_word} @ {task_date}")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"创建查询任务失败: {str(e)}")
|
||
return None
|
||
|
||
def get_task_by_id(self, task_id: int) -> Optional[Dict]:
|
||
"""根据ID获取任务"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
cursor = self._execute_query(
|
||
conn,
|
||
f"SELECT * FROM ai_mip_query_task WHERE id = {ph}",
|
||
(task_id,)
|
||
)
|
||
row = cursor.fetchone()
|
||
conn.close()
|
||
return self._dict_from_row(row) if row else None
|
||
except Exception as e:
|
||
logger.error(f"查询任务失败: {str(e)}")
|
||
return None
|
||
|
||
def get_ready_tasks(self, limit: int = None) -> List[Dict]:
|
||
"""
|
||
获取准备执行的任务(按优先级排序)
|
||
|
||
Args:
|
||
limit: 限制数量
|
||
|
||
Returns:
|
||
任务列表
|
||
"""
|
||
try:
|
||
conn = self.get_connection()
|
||
sql = "SELECT * FROM ai_mip_query_task WHERE status = 'ready' ORDER BY priority ASC, created_at ASC"
|
||
|
||
if limit:
|
||
sql += f" LIMIT {limit}"
|
||
|
||
cursor = self._execute_query(conn, sql)
|
||
rows = cursor.fetchall()
|
||
conn.close()
|
||
return [self._dict_from_row(row) for row in rows]
|
||
except Exception as e:
|
||
logger.error(f"查询ready任务失败: {str(e)}")
|
||
return []
|
||
|
||
def get_tasks_by_date(self, task_date: str) -> List[Dict]:
|
||
"""根据日期获取任务"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
cursor = self._execute_query(
|
||
conn,
|
||
f"SELECT * FROM ai_mip_query_task WHERE task_date = {ph} ORDER BY priority ASC",
|
||
(task_date,)
|
||
)
|
||
rows = cursor.fetchall()
|
||
conn.close()
|
||
return [self._dict_from_row(row) for row in rows]
|
||
except Exception as e:
|
||
logger.error(f"查询日期任务失败: {str(e)}")
|
||
return []
|
||
|
||
def update_task_status(self, task_id: int, status: str,
|
||
error_message: str = None) -> bool:
|
||
"""
|
||
更新任务状态
|
||
|
||
Args:
|
||
task_id: 任务ID
|
||
status: 状态 ready/doing/failed/finished/closed
|
||
error_message: 错误信息(失败时)
|
||
"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
|
||
# 根据状态更新时间字段
|
||
timestamp_field = None
|
||
if status == 'doing':
|
||
timestamp_field = 'started_at'
|
||
elif status in ['finished', 'failed']:
|
||
timestamp_field = 'finished_at'
|
||
elif status == 'closed':
|
||
timestamp_field = 'closed_at'
|
||
|
||
if timestamp_field:
|
||
sql = f"""
|
||
UPDATE ai_mip_query_task
|
||
SET status = {ph}, {timestamp_field} = NOW()
|
||
WHERE id = {ph}
|
||
"""
|
||
params = (status, task_id)
|
||
else:
|
||
sql = f"UPDATE ai_mip_query_task SET status = {ph} WHERE id = {ph}"
|
||
params = (status, task_id)
|
||
|
||
# 如果有错误信息,更新error_message
|
||
if error_message:
|
||
sql = sql.replace('WHERE', f", error_message = '{error_message}' WHERE")
|
||
|
||
cursor = conn.cursor()
|
||
cursor.execute(sql, params)
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
logger.info(f"更新任务状态: {task_id} -> {status}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"更新任务状态失败: {str(e)}")
|
||
return False
|
||
|
||
def increment_crawl_count(self, task_id: int,
|
||
crawl_count: int = 1,
|
||
valid_count: int = 0) -> bool:
|
||
"""
|
||
增加抓取计数
|
||
|
||
Args:
|
||
task_id: 任务ID
|
||
crawl_count: 抓取URL数量
|
||
valid_count: 有效URL数量(带广告)
|
||
"""
|
||
try:
|
||
conn = self.get_connection()
|
||
ph = self._get_placeholder()
|
||
|
||
sql = f"""
|
||
UPDATE ai_mip_query_task
|
||
SET crawl_url_count = crawl_url_count + {ph},
|
||
valid_url_count = valid_url_count + {ph},
|
||
current_count = current_count + {ph}
|
||
WHERE id = {ph}
|
||
"""
|
||
|
||
cursor = conn.cursor()
|
||
cursor.execute(sql, (crawl_count, valid_count, valid_count, task_id))
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"更新抓取计数失败: {str(e)}")
|
||
return False
|
||
|
||
def check_threshold(self, task_id: int) -> bool:
|
||
"""
|
||
检查是否达到阈值,达到则自动关闭任务
|
||
|
||
Returns:
|
||
True=已达到阈值, False=未达到
|
||
"""
|
||
try:
|
||
task = self.get_task_by_id(task_id)
|
||
if not task:
|
||
return False
|
||
|
||
if task['current_count'] >= task['threshold_max']:
|
||
self.update_task_status(task_id, 'closed')
|
||
logger.info(f"任务达到阈值并关闭: {task['query_word']} ({task['current_count']}/{task['threshold_max']})")
|
||
return True
|
||
|
||
return False
|
||
|
||
except Exception as e:
|
||
logger.error(f"检查阈值失败: {str(e)}")
|
||
return False
|
||
|
||
def get_task_statistics(self, task_date: str = None) -> Dict:
|
||
"""
|
||
获取任务统计信息
|
||
|
||
Args:
|
||
task_date: 日期,为None则统计所有
|
||
"""
|
||
try:
|
||
conn = self.get_connection()
|
||
|
||
if task_date:
|
||
ph = self._get_placeholder()
|
||
where_clause = f"WHERE task_date = {ph}"
|
||
params = (task_date,)
|
||
else:
|
||
where_clause = ""
|
||
params = None
|
||
|
||
sql = f"""
|
||
SELECT
|
||
COUNT(*) as total_tasks,
|
||
SUM(CASE WHEN status = 'ready' THEN 1 ELSE 0 END) as ready_count,
|
||
SUM(CASE WHEN status = 'doing' THEN 1 ELSE 0 END) as doing_count,
|
||
SUM(CASE WHEN status = 'finished' THEN 1 ELSE 0 END) as finished_count,
|
||
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed_count,
|
||
SUM(CASE WHEN status = 'closed' THEN 1 ELSE 0 END) as closed_count,
|
||
SUM(crawl_url_count) as total_crawled,
|
||
SUM(valid_url_count) as total_valid
|
||
FROM ai_mip_query_task
|
||
{where_clause}
|
||
"""
|
||
|
||
cursor = self._execute_query(conn, sql, params)
|
||
row = cursor.fetchone()
|
||
conn.close()
|
||
|
||
return self._dict_from_row(row) if row else {}
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取任务统计失败: {str(e)}")
|
||
return {}
|