Files
ai_mip/db_manager.py
2026-01-21 14:33:10 +08:00

819 lines
29 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
数据库管理器
使用 MySQL 数据库
"""
import json
import random
from datetime import datetime
from typing import List, Dict, Optional, Union
from pathlib import Path
from loguru import logger
from config import Config
try:
import pymysql
MYSQL_AVAILABLE = True
except ImportError:
MYSQL_AVAILABLE = False
logger.error("pymysql 未安装,请安装: pip install pymysql")
raise ImportError("使用 MySQL 需要安装 pymysql: pip install pymysql")
class DatabaseManager:
"""数据库管理器,使用 MySQL"""
def __init__(self, db_path: str = None):
"""
初始化数据库连接
Args:
db_path: 忽略,仅为了兼容性
"""
if not MYSQL_AVAILABLE:
raise ImportError("使用 MySQL 需要安装 pymysql: pip install pymysql")
self.db_config = {
'host': Config.MYSQL_HOST,
'port': Config.MYSQL_PORT,
'user': Config.MYSQL_USER,
'password': Config.MYSQL_PASSWORD,
'database': Config.MYSQL_DATABASE,
'charset': 'utf8mb4'
}
def get_connection(self) -> 'pymysql.Connection':
"""获取MySQL数据库连接"""
conn = pymysql.connect(**self.db_config)
return conn
def _dict_from_row(self, row) -> Dict:
"""将数据库行转换为字典"""
if row is None:
return None
return dict(row) if isinstance(row, dict) else row
def _get_placeholder(self) -> str:
"""获取SQL占位符MySQL使用 %s"""
return '%s'
def _execute_query(self, conn, sql: str, params: tuple = None):
"""执行SQL查询使用DictCursor"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
if params:
cursor.execute(sql, params)
else:
cursor.execute(sql)
return cursor
class SiteManager(DatabaseManager):
"""站点管理"""
def add_site(self, site_url: str, site_name: str = None,
site_dimension: str = None, query_word: str = None,
frequency: int = None,
time_start: str = None, time_end: str = None,
interval_minutes: int = None) -> Optional[int]:
"""
添加新站点
Args:
site_url: 网站URL
site_name: 网站名称
site_dimension: 网站维度标签
query_word: 来源查询词(从哪个关键词抓取)
frequency: 频次
time_start: 开始时间
time_end: 结束时间
interval_minutes: 执行间隔(分钟)
Returns:
站点ID失败返回None
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = conn.cursor(pymysql.cursors.DictCursor)
# 生成随机目标点击次数(兼容原有逻辑)
target_clicks = random.randint(
getattr(Config, 'MIN_CLICK_COUNT', 1),
getattr(Config, 'MAX_CLICK_COUNT', 10)
)
sql = f"""
INSERT INTO ai_mip_site (
site_url, site_name, status, frequency,
time_start, time_end, interval_minutes,
site_dimension, query_word, created_by
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
"""
cursor.execute(sql, (
site_url,
site_name or site_url,
'active',
frequency or 1,
time_start or '09:00:00',
time_end or '21:00:00',
interval_minutes or 30,
site_dimension,
query_word, # 新增:来源查询词
'system'
))
site_id = cursor.lastrowid
conn.commit()
conn.close()
logger.info(f"成功添加站点: {site_url} (ID: {site_id}, 查询词: {query_word})")
return site_id
except pymysql.IntegrityError:
logger.warning(f"站点URL已存在: {site_url}")
return None
except Exception as e:
logger.error(f"添加站点失败: {str(e)}")
return None
def get_site_by_url(self, site_url: str) -> Optional[Dict]:
"""根据URL获取站点信息"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(conn, f"SELECT * FROM ai_mip_site WHERE site_url = {ph}", (site_url,))
row = cursor.fetchone()
conn.close()
return self._dict_from_row(row) if row else None
except Exception as e:
logger.error(f"查询站点失败: {str(e)}")
return None
def get_site_by_id(self, site_id: int) -> Optional[Dict]:
"""根据ID获取站点信息"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(conn, f"SELECT * FROM ai_mip_site WHERE id = {ph}", (site_id,))
row = cursor.fetchone()
conn.close()
return self._dict_from_row(row) if row else None
except Exception as e:
logger.error(f"查询站点失败: {str(e)}")
return None
def get_active_sites(self) -> List[Dict]:
"""获取所有活跃站点"""
try:
conn = self.get_connection()
cursor = self._execute_query(conn, "SELECT * FROM ai_mip_site WHERE status = 'active' ORDER BY created_at DESC")
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"查询活跃站点失败: {str(e)}")
return []
def get_all_sites(self) -> List[Dict]:
"""获取所有站点"""
try:
conn = self.get_connection()
cursor = self._execute_query(conn, "SELECT * FROM ai_mip_site ORDER BY created_at DESC")
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"查询所有站点失败: {str(e)}")
return []
def update_site_status(self, site_id: int, status: str) -> bool:
"""更新站点状态"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(
conn,
f"UPDATE ai_mip_site SET status = {ph}, updated_by = {ph} WHERE id = {ph}",
(status, 'system', site_id)
)
conn.commit()
conn.close()
logger.info(f"更新站点状态: ID={site_id}, status={status}")
return True
except Exception as e:
logger.error(f"更新站点状态失败: {str(e)}")
return False
def increment_click_count(self, site_id: int, count: int = 1) -> bool:
"""增加点击次数"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(
conn,
f"UPDATE ai_mip_site SET click_count = click_count + {ph} WHERE id = {ph}",
(count, site_id)
)
conn.commit()
conn.close()
return True
except Exception as e:
logger.error(f"更新点击次数失败: {str(e)}")
return False
def increment_reply_count(self, site_id: int, count: int = 1) -> bool:
"""增加回复次数"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(
conn,
f"UPDATE ai_mip_site SET reply_count = reply_count + {ph} WHERE id = {ph}",
(count, site_id)
)
conn.commit()
conn.close()
return True
except Exception as e:
logger.error(f"更新回复次数失败: {str(e)}")
return False
def delete_site(self, site_id: int) -> bool:
"""删除站点"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(conn, f"DELETE FROM ai_mip_site WHERE id = {ph}", (site_id,))
conn.commit()
conn.close()
logger.info(f"已删除站点: ID={site_id}")
return True
except Exception as e:
logger.error(f"删除站点失败: {str(e)}")
return False
class ClickManager(DatabaseManager):
"""点击记录管理"""
def record_click(self, site_id: int, site_url: str,
user_ip: str = None, device_type: str = 'pc',
task_id: str = None) -> Optional[int]:
"""
记录一次点击
Args:
site_id: 站点ID
site_url: 站点URL
user_ip: 用户IP代理IP
device_type: 设备类型
task_id: 任务ID
Returns:
点击记录ID
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = conn.cursor(pymysql.cursors.DictCursor)
sql = f"""
INSERT INTO ai_mip_click (
site_id, site_url, click_time, user_ip,
device_type, task_id, operator
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
"""
cursor.execute(sql, (
site_id,
site_url,
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
user_ip,
device_type,
task_id or f'TASK_{datetime.now().strftime("%Y%m%d%H%M%S")}',
'RPA_SYSTEM'
))
click_id = cursor.lastrowid
conn.commit()
conn.close()
# 更新站点点击次数
site_mgr = SiteManager()
site_mgr.increment_click_count(site_id)
logger.info(f"记录点击: site_id={site_id}, click_id={click_id}")
return click_id
except Exception as e:
logger.error(f"记录点击失败: {str(e)}")
return None
def get_clicks_by_site(self, site_id: int, limit: int = 100) -> List[Dict]:
"""获取站点的点击记录"""
try:
conn = self.get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT * FROM ai_mip_click
WHERE site_id = ?
ORDER BY click_time DESC
LIMIT ?
""", (site_id, limit))
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"查询点击记录失败: {str(e)}")
return []
def get_click_count_by_site(self, site_id: int) -> int:
"""获取站点的总点击次数"""
try:
conn = self.get_connection()
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) as count FROM ai_mip_click WHERE site_id = ?", (site_id,))
row = cursor.fetchone()
conn.close()
return row['count'] if row else 0
except Exception as e:
logger.error(f"查询点击次数失败: {str(e)}")
return 0
class InteractionManager(DatabaseManager):
"""互动记录管理"""
def record_interaction(self, site_id: int, click_id: int = None,
task_id: str = None, interaction_type: str = 'reply',
reply_content: str = None, is_successful: bool = False,
response_received: bool = False, response_content: str = None,
proxy_ip: str = None, fingerprint_id: str = None,
error_message: str = None) -> Optional[int]:
"""
记录一次互动
Args:
site_id: 站点ID
click_id: 关联的点击记录ID
task_id: 任务ID
interaction_type: 互动类型reply/comment等
reply_content: 回复内容
is_successful: 是否成功
response_received: 是否收到回复
response_content: 对方回复内容
proxy_ip: 使用的代理IP
fingerprint_id: 浏览器指纹ID
error_message: 错误信息
Returns:
互动记录ID
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = conn.cursor(pymysql.cursors.DictCursor)
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
sql = f"""
INSERT INTO ai_mip_interaction (
site_id, click_id, task_id, interaction_type,
interaction_time, interaction_status, reply_content,
execution_mode, browser_type, proxy_ip, fingerprint_id,
response_received, response_content, is_successful,
error_message, operator
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
"""
cursor.execute(sql, (
site_id,
click_id,
task_id or f'TASK_{datetime.now().strftime("%Y%m%d%H%M%S")}',
interaction_type,
now,
'success' if is_successful else 'failed',
reply_content,
'auto',
'playwright',
proxy_ip,
fingerprint_id,
1 if response_received else 0,
response_content,
1 if is_successful else 0,
error_message,
'RPA_SYSTEM'
))
interaction_id = cursor.lastrowid
conn.commit()
conn.close()
# 如果收到回复,更新站点回复次数
if response_received:
site_mgr = SiteManager()
site_mgr.increment_reply_count(site_id)
logger.info(f"记录互动: site_id={site_id}, interaction_id={interaction_id}, success={is_successful}")
return interaction_id
except Exception as e:
logger.error(f"记录互动失败: {str(e)}")
return None
def get_interactions_by_site(self, site_id: int, limit: int = 100) -> List[Dict]:
"""获取站点的互动记录"""
try:
conn = self.get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT * FROM ai_mip_interaction
WHERE site_id = ?
ORDER BY interaction_time DESC
LIMIT ?
""", (site_id, limit))
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"查询互动记录失败: {str(e)}")
return []
def get_successful_interactions_count(self, site_id: int) -> int:
"""获取站点的成功互动次数"""
try:
conn = self.get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT COUNT(*) as count FROM ai_mip_interaction
WHERE site_id = ? AND is_successful = 1
""", (site_id,))
row = cursor.fetchone()
conn.close()
return row['count'] if row else 0
except Exception as e:
logger.error(f"查询成功互动次数失败: {str(e)}")
return 0
class StatisticsManager(DatabaseManager):
"""统计数据管理"""
def get_statistics(self) -> Dict:
"""
获取全局统计数据
Returns:
统计数据字典
"""
try:
conn = self.get_connection()
# 站点统计
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_site")
total_sites = cursor.fetchone()['total']
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_site WHERE status = 'active'")
active_sites = cursor.fetchone()['total']
# 点击统计
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_click")
total_clicks = cursor.fetchone()['total']
# 互动统计
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE response_received = 1")
total_replies = cursor.fetchone()['total']
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE is_successful = 1")
successful_interactions = cursor.fetchone()['total']
conn.close()
reply_rate = (total_replies / total_clicks * 100) if total_clicks > 0 else 0
success_rate = (successful_interactions / total_clicks * 100) if total_clicks > 0 else 0
return {
'total_sites': total_sites,
'active_sites': active_sites,
'total_clicks': total_clicks,
'total_replies': total_replies,
'successful_interactions': successful_interactions,
'reply_rate': f"{reply_rate:.2f}%",
'success_rate': f"{success_rate:.2f}%"
}
except Exception as e:
logger.error(f"获取统计数据失败: {str(e)}")
return {}
def get_site_statistics(self, site_id: int) -> Dict:
"""
获取单个站点的统计数据
Args:
site_id: 站点ID
Returns:
站点统计数据
"""
try:
site_mgr = SiteManager(self.db_path)
click_mgr = ClickManager(self.db_path)
interaction_mgr = InteractionManager(self.db_path)
site = site_mgr.get_site_by_id(site_id)
if not site:
return {}
click_count = click_mgr.get_click_count_by_site(site_id)
success_count = interaction_mgr.get_successful_interactions_count(site_id)
return {
'site_url': site['site_url'],
'site_name': site['site_name'],
'status': site['status'],
'click_count': click_count,
'reply_count': site['reply_count'],
'successful_interactions': success_count,
'reply_rate': f"{(site['reply_count'] / click_count * 100) if click_count > 0 else 0:.2f}%"
}
except Exception as e:
logger.error(f"获取站点统计失败: {str(e)}")
return {}
class QueryTaskManager(DatabaseManager):
"""查询任务管理器"""
def create_task(self, query_word: str, task_date: str = None,
query_type: str = 'keyword', threshold_max: int = 100,
priority: int = 5, category: str = None,
source_platform: str = 'baidu',
created_by: str = 'system',
remark: str = None) -> Optional[int]:
"""
创建查询任务
Args:
query_word: 查询词
task_date: 任务日期 YYYYMMDD默认今天
query_type: 查询类型
threshold_max: 最大抓取数量
priority: 优先级 1-10
category: 分类标签
source_platform: 来源平台
created_by: 创建人
remark: 备注
Returns:
任务ID失败返回None
"""
try:
if task_date is None:
task_date = datetime.now().strftime('%Y%m%d')
conn = self.get_connection()
ph = self._get_placeholder()
sql = f"""
INSERT INTO ai_mip_query_task (
query_word, query_type, task_date, threshold_max,
priority, category, source_platform, created_by, remark
) VALUES ({ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph}, {ph})
"""
cursor = conn.cursor()
cursor.execute(sql, (
query_word, query_type, task_date, threshold_max,
priority, category, source_platform, created_by, remark
))
task_id = cursor.lastrowid
conn.commit()
conn.close()
logger.info(f"创建查询任务成功: {query_word} (ID: {task_id})")
return task_id
except pymysql.IntegrityError:
logger.warning(f"查询任务已存在: {query_word} @ {task_date}")
return None
except Exception as e:
logger.error(f"创建查询任务失败: {str(e)}")
return None
def get_task_by_id(self, task_id: int) -> Optional[Dict]:
"""根据ID获取任务"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(
conn,
f"SELECT * FROM ai_mip_query_task WHERE id = {ph}",
(task_id,)
)
row = cursor.fetchone()
conn.close()
return self._dict_from_row(row) if row else None
except Exception as e:
logger.error(f"查询任务失败: {str(e)}")
return None
def get_ready_tasks(self, limit: int = None) -> List[Dict]:
"""
获取准备执行的任务(按优先级排序)
Args:
limit: 限制数量
Returns:
任务列表
"""
try:
conn = self.get_connection()
sql = "SELECT * FROM ai_mip_query_task WHERE status = 'ready' ORDER BY priority ASC, created_at ASC"
if limit:
sql += f" LIMIT {limit}"
cursor = self._execute_query(conn, sql)
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"查询ready任务失败: {str(e)}")
return []
def get_tasks_by_date(self, task_date: str) -> List[Dict]:
"""根据日期获取任务"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(
conn,
f"SELECT * FROM ai_mip_query_task WHERE task_date = {ph} ORDER BY priority ASC",
(task_date,)
)
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"查询日期任务失败: {str(e)}")
return []
def update_task_status(self, task_id: int, status: str,
error_message: str = None) -> bool:
"""
更新任务状态
Args:
task_id: 任务ID
status: 状态 ready/doing/failed/finished/closed
error_message: 错误信息(失败时)
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
# 根据状态更新时间字段
timestamp_field = None
if status == 'doing':
timestamp_field = 'started_at'
elif status in ['finished', 'failed']:
timestamp_field = 'finished_at'
elif status == 'closed':
timestamp_field = 'closed_at'
if timestamp_field:
sql = f"""
UPDATE ai_mip_query_task
SET status = {ph}, {timestamp_field} = NOW()
WHERE id = {ph}
"""
params = (status, task_id)
else:
sql = f"UPDATE ai_mip_query_task SET status = {ph} WHERE id = {ph}"
params = (status, task_id)
# 如果有错误信息更新error_message
if error_message:
sql = sql.replace('WHERE', f", error_message = '{error_message}' WHERE")
cursor = conn.cursor()
cursor.execute(sql, params)
conn.commit()
conn.close()
logger.info(f"更新任务状态: {task_id} -> {status}")
return True
except Exception as e:
logger.error(f"更新任务状态失败: {str(e)}")
return False
def increment_crawl_count(self, task_id: int,
crawl_count: int = 1,
valid_count: int = 0) -> bool:
"""
增加抓取计数
Args:
task_id: 任务ID
crawl_count: 抓取URL数量
valid_count: 有效URL数量带广告
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
sql = f"""
UPDATE ai_mip_query_task
SET crawl_url_count = crawl_url_count + {ph},
valid_url_count = valid_url_count + {ph},
current_count = current_count + {ph}
WHERE id = {ph}
"""
cursor = conn.cursor()
cursor.execute(sql, (crawl_count, valid_count, valid_count, task_id))
conn.commit()
conn.close()
return True
except Exception as e:
logger.error(f"更新抓取计数失败: {str(e)}")
return False
def check_threshold(self, task_id: int) -> bool:
"""
检查是否达到阈值,达到则自动关闭任务
Returns:
True=已达到阈值, False=未达到
"""
try:
task = self.get_task_by_id(task_id)
if not task:
return False
if task['current_count'] >= task['threshold_max']:
self.update_task_status(task_id, 'closed')
logger.info(f"任务达到阈值并关闭: {task['query_word']} ({task['current_count']}/{task['threshold_max']})")
return True
return False
except Exception as e:
logger.error(f"检查阈值失败: {str(e)}")
return False
def get_task_statistics(self, task_date: str = None) -> Dict:
"""
获取任务统计信息
Args:
task_date: 日期为None则统计所有
"""
try:
conn = self.get_connection()
if task_date:
ph = self._get_placeholder()
where_clause = f"WHERE task_date = {ph}"
params = (task_date,)
else:
where_clause = ""
params = None
sql = f"""
SELECT
COUNT(*) as total_tasks,
SUM(CASE WHEN status = 'ready' THEN 1 ELSE 0 END) as ready_count,
SUM(CASE WHEN status = 'doing' THEN 1 ELSE 0 END) as doing_count,
SUM(CASE WHEN status = 'finished' THEN 1 ELSE 0 END) as finished_count,
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed_count,
SUM(CASE WHEN status = 'closed' THEN 1 ELSE 0 END) as closed_count,
SUM(crawl_url_count) as total_crawled,
SUM(valid_url_count) as total_valid
FROM ai_mip_query_task
{where_clause}
"""
cursor = self._execute_query(conn, sql, params)
row = cursor.fetchone()
conn.close()
return self._dict_from_row(row) if row else {}
except Exception as e:
logger.error(f"获取任务统计失败: {str(e)}")
return {}