This commit is contained in:
sjk
2026-02-24 12:46:35 +08:00
parent be0c13e1a6
commit 85224e01e6
116 changed files with 8380 additions and 9642 deletions

View File

@@ -48,10 +48,32 @@ class DatabaseManager:
return conn
def _dict_from_row(self, row) -> Dict:
"""将数据库行转换为字典"""
"""将数据库行转换为字典,处理特殊类型"""
if row is None:
return None
return dict(row) if isinstance(row, dict) else row
result = dict(row) if isinstance(row, dict) else row
# 处理特殊类型确保JSON可序列化
if isinstance(result, dict):
from datetime import datetime, date, timedelta
from decimal import Decimal
for key, value in result.items():
if isinstance(value, datetime):
result[key] = value.strftime('%Y-%m-%d %H:%M:%S')
elif isinstance(value, date):
result[key] = value.strftime('%Y-%m-%d')
elif isinstance(value, timedelta):
# 将timedelta转换为字符串格式 HH:MM:SS
total_seconds = int(value.total_seconds())
hours, remainder = divmod(total_seconds, 3600)
minutes, seconds = divmod(remainder, 60)
result[key] = f'{hours:02d}:{minutes:02d}:{seconds:02d}'
elif isinstance(value, Decimal):
result[key] = float(value)
return result
def _get_placeholder(self) -> str:
"""获取SQL占位符MySQL使用 %s"""
@@ -816,3 +838,801 @@ class QueryTaskManager(DatabaseManager):
except Exception as e:
logger.error(f"获取任务统计失败: {str(e)}")
return {}
class EnhancedSiteManager(SiteManager):
"""增强的站点管理器,支持分页、排序、筛选"""
def get_sites_paginated(
self,
page: int = 1,
page_size: int = 20,
status: str = None,
keyword: str = None,
sort_by: str = 'created_at',
sort_order: str = 'desc'
) -> tuple:
"""
分页获取站点列表
Returns:
(站点列表, 总数)
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
# 构建WHERE条件
conditions = []
params = []
if status:
conditions.append(f"status = {ph}")
params.append(status)
if keyword:
conditions.append(f"(site_url LIKE {ph} OR site_name LIKE {ph})")
params.extend([f'%{keyword}%', f'%{keyword}%'])
where_clause = ' AND '.join(conditions) if conditions else '1=1'
# 允许的排序字段
allowed_sort_fields = ['created_at', 'click_count', 'reply_count', 'site_url', 'status']
if sort_by not in allowed_sort_fields:
sort_by = 'created_at'
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
# 查询总数
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_site WHERE {where_clause}"
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
total = cursor.fetchone()['total']
# 查询数据
offset = (page - 1) * page_size
data_sql = f"""
SELECT * FROM ai_mip_site
WHERE {where_clause}
ORDER BY {sort_by} {sort_order}
LIMIT {ph} OFFSET {ph}
"""
params.extend([page_size, offset])
cursor = self._execute_query(conn, data_sql, tuple(params))
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows], total
except Exception as e:
logger.error(f"分页查询站点失败: {str(e)}")
return [], 0
def delete_sites_batch(self, site_ids: List[int]) -> int:
"""
批量删除站点
Returns:
成功删除的数量
"""
if not site_ids:
return 0
try:
conn = self.get_connection()
placeholders = ','.join(['%s'] * len(site_ids))
sql = f"DELETE FROM ai_mip_site WHERE id IN ({placeholders})"
cursor = conn.cursor()
cursor.execute(sql, tuple(site_ids))
deleted = cursor.rowcount
conn.commit()
conn.close()
logger.info(f"批量删除站点: {deleted}/{len(site_ids)}")
return deleted
except Exception as e:
logger.error(f"批量删除站点失败: {str(e)}")
return 0
def update_sites_status_batch(self, site_ids: List[int], status: str) -> int:
"""
批量更新站点状态
Returns:
成功更新的数量
"""
if not site_ids:
return 0
try:
conn = self.get_connection()
placeholders = ','.join(['%s'] * len(site_ids))
sql = f"UPDATE ai_mip_site SET status = %s WHERE id IN ({placeholders})"
cursor = conn.cursor()
cursor.execute(sql, (status, *site_ids))
updated = cursor.rowcount
conn.commit()
conn.close()
logger.info(f"批量更新站点状态为{status}: {updated}/{len(site_ids)}")
return updated
except Exception as e:
logger.error(f"批量更新站点状态失败: {str(e)}")
return 0
def export_sites(self, status: str = None, keyword: str = None) -> List[Dict]:
"""导出站点数据"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
conditions = []
params = []
if status:
conditions.append(f"status = {ph}")
params.append(status)
if keyword:
conditions.append(f"(site_url LIKE {ph} OR site_name LIKE {ph})")
params.extend([f'%{keyword}%', f'%{keyword}%'])
where_clause = ' AND '.join(conditions) if conditions else '1=1'
sql = f"""
SELECT id, site_url, site_name, status, click_count, reply_count,
frequency, time_start, time_end, site_dimension, query_word,
created_at
FROM ai_mip_site
WHERE {where_clause}
ORDER BY created_at DESC
"""
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"导出站点数据失败: {str(e)}")
return []
class EnhancedClickManager(ClickManager):
"""增强的点击记录管理器"""
def get_clicks_paginated(
self,
page: int = 1,
page_size: int = 20,
site_id: int = None,
start_date: str = None,
end_date: str = None,
sort_by: str = 'click_time',
sort_order: str = 'desc'
) -> tuple:
"""
分页获取点击记录
Returns:
(点击记录列表, 总数)
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
conditions = []
params = []
if site_id:
conditions.append(f"c.site_id = {ph}")
params.append(site_id)
if start_date:
conditions.append(f"c.click_time >= {ph}")
params.append(f"{start_date} 00:00:00")
if end_date:
conditions.append(f"c.click_time <= {ph}")
params.append(f"{end_date} 23:59:59")
where_clause = ' AND '.join(conditions) if conditions else '1=1'
allowed_sort_fields = ['click_time', 'site_id', 'device_type']
if sort_by not in allowed_sort_fields:
sort_by = 'click_time'
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
# 查询总数
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_click c WHERE {where_clause}"
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
total = cursor.fetchone()['total']
# 查询数据
offset = (page - 1) * page_size
data_sql = f"""
SELECT c.*, s.site_name
FROM ai_mip_click c
LEFT JOIN ai_mip_site s ON c.site_id = s.id
WHERE {where_clause}
ORDER BY c.{sort_by} {sort_order}
LIMIT {ph} OFFSET {ph}
"""
params.extend([page_size, offset])
cursor = self._execute_query(conn, data_sql, tuple(params))
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows], total
except Exception as e:
logger.error(f"分页查询点击记录失败: {str(e)}")
return [], 0
def export_clicks(
self,
site_id: int = None,
start_date: str = None,
end_date: str = None
) -> List[Dict]:
"""导出点击记录"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
conditions = []
params = []
if site_id:
conditions.append(f"c.site_id = {ph}")
params.append(site_id)
if start_date:
conditions.append(f"c.click_time >= {ph}")
params.append(f"{start_date} 00:00:00")
if end_date:
conditions.append(f"c.click_time <= {ph}")
params.append(f"{end_date} 23:59:59")
where_clause = ' AND '.join(conditions) if conditions else '1=1'
sql = f"""
SELECT c.id, c.site_id, s.site_name, c.site_url, c.click_time,
c.user_ip, c.device_type, c.task_id
FROM ai_mip_click c
LEFT JOIN ai_mip_site s ON c.site_id = s.id
WHERE {where_clause}
ORDER BY c.click_time DESC
"""
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"导出点击记录失败: {str(e)}")
return []
class EnhancedInteractionManager(InteractionManager):
"""增强的互动记录管理器"""
def get_interactions_paginated(
self,
page: int = 1,
page_size: int = 20,
site_id: int = None,
start_date: str = None,
end_date: str = None,
status: str = None,
sort_by: str = 'interaction_time',
sort_order: str = 'desc'
) -> tuple:
"""
分页获取互动记录
Returns:
(互动记录列表, 总数)
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
conditions = []
params = []
if site_id:
conditions.append(f"i.site_id = {ph}")
params.append(site_id)
if start_date:
conditions.append(f"i.interaction_time >= {ph}")
params.append(f"{start_date} 00:00:00")
if end_date:
conditions.append(f"i.interaction_time <= {ph}")
params.append(f"{end_date} 23:59:59")
if status:
conditions.append(f"i.interaction_status = {ph}")
params.append(status)
where_clause = ' AND '.join(conditions) if conditions else '1=1'
allowed_sort_fields = ['interaction_time', 'site_id', 'interaction_status']
if sort_by not in allowed_sort_fields:
sort_by = 'interaction_time'
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
# 查询总数
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_interaction i WHERE {where_clause}"
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
total = cursor.fetchone()['total']
# 查询数据
offset = (page - 1) * page_size
data_sql = f"""
SELECT i.*, s.site_name, s.site_url as site_url_ref
FROM ai_mip_interaction i
LEFT JOIN ai_mip_site s ON i.site_id = s.id
WHERE {where_clause}
ORDER BY i.{sort_by} {sort_order}
LIMIT {ph} OFFSET {ph}
"""
params.extend([page_size, offset])
cursor = self._execute_query(conn, data_sql, tuple(params))
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows], total
except Exception as e:
logger.error(f"分页查询互动记录失败: {str(e)}")
return [], 0
def export_interactions(
self,
site_id: int = None,
start_date: str = None,
end_date: str = None
) -> List[Dict]:
"""导出互动记录"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
conditions = []
params = []
if site_id:
conditions.append(f"i.site_id = {ph}")
params.append(site_id)
if start_date:
conditions.append(f"i.interaction_time >= {ph}")
params.append(f"{start_date} 00:00:00")
if end_date:
conditions.append(f"i.interaction_time <= {ph}")
params.append(f"{end_date} 23:59:59")
where_clause = ' AND '.join(conditions) if conditions else '1=1'
sql = f"""
SELECT i.id, i.site_id, s.site_name, s.site_url, i.interaction_time,
i.interaction_type, i.interaction_status, i.reply_content,
i.response_received, i.response_content, i.proxy_ip
FROM ai_mip_interaction i
LEFT JOIN ai_mip_site s ON i.site_id = s.id
WHERE {where_clause}
ORDER BY i.interaction_time DESC
"""
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"导出互动记录失败: {str(e)}")
return []
class EnhancedStatisticsManager(StatisticsManager):
"""增强的统计管理器,支持图表数据"""
def get_click_trend(self, days: int = 7) -> Dict:
"""
获取点击趋势数据
Args:
days: 天数
Returns:
{'dates': [...], 'clicks': [...], 'successes': [...]}
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
# 点击趋势
click_sql = f"""
SELECT DATE(click_time) as date, COUNT(*) as count
FROM ai_mip_click
WHERE click_time >= DATE_SUB(CURDATE(), INTERVAL {ph} DAY)
GROUP BY DATE(click_time)
ORDER BY date
"""
cursor = self._execute_query(conn, click_sql, (days,))
click_rows = cursor.fetchall()
# 成功次数趋势is_successful=1
success_sql = f"""
SELECT DATE(interaction_time) as date, COUNT(*) as count
FROM ai_mip_interaction
WHERE interaction_time >= DATE_SUB(CURDATE(), INTERVAL {ph} DAY)
AND is_successful = 1
GROUP BY DATE(interaction_time)
ORDER BY date
"""
cursor = self._execute_query(conn, success_sql, (days,))
success_rows = cursor.fetchall()
conn.close()
# 构建结果
from datetime import timedelta
dates = []
clicks = []
successes = []
click_map = {str(row['date']): row['count'] for row in click_rows}
success_map = {str(row['date']): row['count'] for row in success_rows}
for i in range(days - 1, -1, -1):
date = (datetime.now() - timedelta(days=i)).strftime('%Y-%m-%d')
dates.append(date)
clicks.append(click_map.get(date, 0))
successes.append(success_map.get(date, 0))
return {
'dates': dates,
'clicks': clicks,
'successes': successes
}
except Exception as e:
logger.error(f"获取点击趋势失败: {str(e)}")
return {'dates': [], 'clicks': [], 'successes': []}
def get_hourly_distribution(self) -> Dict:
"""
获取按小时分布的点击数据
Returns:
{'hours': [0-23], 'clicks': [...]}
"""
try:
conn = self.get_connection()
sql = """
SELECT HOUR(click_time) as hour, COUNT(*) as count
FROM ai_mip_click
WHERE click_time >= DATE_SUB(NOW(), INTERVAL 7 DAY)
GROUP BY HOUR(click_time)
ORDER BY hour
"""
cursor = self._execute_query(conn, sql)
rows = cursor.fetchall()
conn.close()
hour_map = {row['hour']: row['count'] for row in rows}
hours = list(range(24))
clicks = [hour_map.get(h, 0) for h in hours]
return {
'hours': hours,
'clicks': clicks
}
except Exception as e:
logger.error(f"获取时段分布失败: {str(e)}")
return {'hours': list(range(24)), 'clicks': [0] * 24}
def get_top_sites(self, limit: int = 10) -> List[Dict]:
"""
获取Top活跃站点
Args:
limit: 数量
Returns:
站点列表 [{'site_name', 'click_count', 'reply_count'}, ...]
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
sql = f"""
SELECT id, site_name, site_url, click_count, reply_count
FROM ai_mip_site
WHERE status = 'active'
ORDER BY click_count DESC
LIMIT {ph}
"""
cursor = self._execute_query(conn, sql, (limit,))
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"获取Top站点失败: {str(e)}")
return []
def get_reply_rate_distribution(self) -> Dict:
"""
获取回复率分布数据(用于饼图)
Returns:
{'labels': [...], 'values': [...]}
"""
try:
conn = self.get_connection()
# 获取总点击和回复
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_click")
total_clicks = cursor.fetchone()['total']
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE response_received = 1")
total_replies = cursor.fetchone()['total']
conn.close()
no_reply = total_clicks - total_replies if total_clicks > total_replies else 0
return {
'labels': ['有回复', '无回复'],
'values': [total_replies, no_reply]
}
except Exception as e:
logger.error(f"获取回复率分布失败: {str(e)}")
return {'labels': ['有回复', '无回复'], 'values': [0, 0]}
class QueryImportLogManager(DatabaseManager):
"""Query导入日志管理器"""
def ensure_table(self):
"""确保 query_import_log 表存在"""
try:
conn = self.get_connection()
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS `query_import_log` (
`id` INT AUTO_INCREMENT PRIMARY KEY,
`filename` VARCHAR(255) NOT NULL COMMENT '上传的文件名',
`filepath` VARCHAR(500) NOT NULL COMMENT '文件完整路径',
`upload_time` DATETIME NOT NULL COMMENT '上传时间',
`import_time` DATETIME NULL COMMENT '实际导入时间',
`status` VARCHAR(20) DEFAULT 'pending' COMMENT '导入状态',
`total_count` INT DEFAULT 0 COMMENT '总行数',
`success_count` INT DEFAULT 0 COMMENT '成功插入数',
`skip_count` INT DEFAULT 0 COMMENT '跳过数(已存在)',
`fail_count` INT DEFAULT 0 COMMENT '失败数',
`error_message` TEXT NULL COMMENT '错误信息',
`created_at` DATETIME DEFAULT CURRENT_TIMESTAMP,
`updated_at` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX `idx_status` (`status`),
INDEX `idx_upload_time` (`upload_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='关键词导入日志表'
""")
conn.commit()
conn.close()
except Exception as e:
logger.error(f"创建 query_import_log 表失败: {e}")
def create_log(self, filename: str, filepath: str) -> Optional[int]:
"""创建导入日志记录"""
try:
self.ensure_table()
conn = self.get_connection()
ph = self._get_placeholder()
cursor = conn.cursor()
cursor.execute(
f"INSERT INTO query_import_log (filename, filepath, upload_time, status) VALUES ({ph}, {ph}, NOW(), 'pending')",
(filename, filepath)
)
log_id = cursor.lastrowid
conn.commit()
conn.close()
logger.info(f"创建导入日志: {filename} (ID: {log_id})")
return log_id
except Exception as e:
logger.error(f"创建导入日志失败: {e}")
return None
def update_status(self, log_id: int, status: str,
total_count: int = 0, success_count: int = 0,
skip_count: int = 0, fail_count: int = 0,
error_message: str = None):
"""更新导入状态和统计数据"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = conn.cursor()
import_time_sql = ", import_time = NOW()" if status in ('running', 'completed', 'failed') else ""
cursor.execute(
f"""UPDATE query_import_log
SET status = {ph}, total_count = {ph}, success_count = {ph},
skip_count = {ph}, fail_count = {ph}, error_message = {ph}
{import_time_sql}
WHERE id = {ph}""",
(status, total_count, success_count, skip_count, fail_count, error_message, log_id)
)
conn.commit()
conn.close()
except Exception as e:
logger.error(f"更新导入日志失败: {e}")
def get_pending_logs(self) -> List[Dict]:
"""获取待处理的导入日志"""
try:
self.ensure_table()
conn = self.get_connection()
cursor = self._execute_query(
conn, "SELECT * FROM query_import_log WHERE status = 'pending' ORDER BY created_at ASC"
)
rows = cursor.fetchall()
conn.close()
return [self._dict_from_row(row) for row in rows]
except Exception as e:
logger.error(f"查询待处理日志失败: {e}")
return []
def get_logs_paginated(self, page: int = 1, page_size: int = 20) -> Dict:
"""分页获取导入日志"""
try:
self.ensure_table()
conn = self.get_connection()
ph = self._get_placeholder()
# 总数
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM query_import_log")
total = cursor.fetchone()['total']
# 分页数据
offset = (page - 1) * page_size
cursor = self._execute_query(
conn,
f"SELECT * FROM query_import_log ORDER BY created_at DESC LIMIT {ph} OFFSET {ph}",
(page_size, offset)
)
rows = cursor.fetchall()
conn.close()
return {
'items': [self._dict_from_row(row) for row in rows],
'total': total,
'page': page,
'page_size': page_size
}
except Exception as e:
logger.error(f"分页查询导入日志失败: {e}")
return {'items': [], 'total': 0, 'page': page, 'page_size': page_size}
def is_file_logged(self, filepath: str) -> bool:
"""检查文件是否已有导入记录"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = self._execute_query(
conn,
f"SELECT COUNT(*) as cnt FROM query_import_log WHERE filepath = {ph}",
(filepath,)
)
cnt = cursor.fetchone()['cnt']
conn.close()
return cnt > 0
except Exception as e:
logger.error(f"检查文件日志失败: {e}")
return False
class QueryKeywordManager(DatabaseManager):
"""Query关键词管理器 - 操作 baidu_keyword 表"""
def insert_keyword(self, keyword: str, seed_id: int = 9999, seed_name: str = '手动提交',
crawled: int = 1, department: str = '', department_id: int = 0,
author_id: int = 0, author_name: str = '') -> int:
"""
插入单条关键词到 baidu_keyword 表INSERT IGNORE
Returns:
affected rows: 1=新插入, 0=已存在被跳过, -1=失败
"""
try:
conn = self.get_connection()
ph = self._get_placeholder()
cursor = conn.cursor()
cursor.execute(
f"""INSERT IGNORE INTO baidu_keyword
(keyword, seed_id, seed_name, crawled, parents_id, created_at,
department, department_id, query_status, author_id, author_name)
VALUES ({ph}, {ph}, {ph}, {ph}, 0, NOW(), {ph}, {ph}, 'manual_review', {ph}, {ph})""",
(keyword, seed_id, seed_name, crawled, department, department_id, author_id, author_name)
)
affected = cursor.rowcount
conn.commit()
conn.close()
return affected
except Exception as e:
logger.error(f"插入关键词失败: {keyword} - {e}")
return -1
def batch_insert_keywords(self, keyword_list: list, seed_id: int = 9999,
seed_name: str = '手动提交', crawled: int = 1,
query_status: str = 'manual_review') -> dict:
"""
批量插入关键词到 baidu_keyword 表INSERT IGNORE
Args:
keyword_list: [{'keyword': str, 'department': str, 'seed_name': str(可选)}, ...]
query_status: 写入的query_status值'draft''manual_review'
Returns:
{'success': int, 'skip': int, 'fail': int}
"""
stats = {'success': 0, 'skip': 0, 'fail': 0}
if not keyword_list:
return stats
try:
conn = self.get_connection()
cursor = conn.cursor()
values = []
for item in keyword_list:
values.append((
item['keyword'], seed_id, seed_name, crawled,
item.get('department', ''), query_status
))
cursor.executemany(
"""INSERT IGNORE INTO baidu_keyword
(keyword, seed_id, seed_name, crawled, parents_id, created_at,
department, department_id, query_status, author_id, author_name)
VALUES (%s, %s, %s, %s, 0, NOW(), %s, 0, %s, 0, '')""",
values
)
# executemany 的 rowcount 返回实际插入的行数
inserted = cursor.rowcount
conn.commit()
conn.close()
stats['success'] = inserted
stats['skip'] = len(keyword_list) - inserted
return stats
except Exception as e:
logger.error(f"批量插入关键词失败: {e}")
stats['fail'] = len(keyword_list)
return stats