commit
This commit is contained in:
824
db_manager.py
824
db_manager.py
@@ -48,10 +48,32 @@ class DatabaseManager:
|
||||
return conn
|
||||
|
||||
def _dict_from_row(self, row) -> Dict:
|
||||
"""将数据库行转换为字典"""
|
||||
"""将数据库行转换为字典,处理特殊类型"""
|
||||
if row is None:
|
||||
return None
|
||||
return dict(row) if isinstance(row, dict) else row
|
||||
|
||||
result = dict(row) if isinstance(row, dict) else row
|
||||
|
||||
# 处理特殊类型,确保JSON可序列化
|
||||
if isinstance(result, dict):
|
||||
from datetime import datetime, date, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
for key, value in result.items():
|
||||
if isinstance(value, datetime):
|
||||
result[key] = value.strftime('%Y-%m-%d %H:%M:%S')
|
||||
elif isinstance(value, date):
|
||||
result[key] = value.strftime('%Y-%m-%d')
|
||||
elif isinstance(value, timedelta):
|
||||
# 将timedelta转换为字符串格式 HH:MM:SS
|
||||
total_seconds = int(value.total_seconds())
|
||||
hours, remainder = divmod(total_seconds, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
result[key] = f'{hours:02d}:{minutes:02d}:{seconds:02d}'
|
||||
elif isinstance(value, Decimal):
|
||||
result[key] = float(value)
|
||||
|
||||
return result
|
||||
|
||||
def _get_placeholder(self) -> str:
|
||||
"""获取SQL占位符,MySQL使用 %s"""
|
||||
@@ -816,3 +838,801 @@ class QueryTaskManager(DatabaseManager):
|
||||
except Exception as e:
|
||||
logger.error(f"获取任务统计失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
|
||||
class EnhancedSiteManager(SiteManager):
|
||||
"""增强的站点管理器,支持分页、排序、筛选"""
|
||||
|
||||
def get_sites_paginated(
|
||||
self,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
status: str = None,
|
||||
keyword: str = None,
|
||||
sort_by: str = 'created_at',
|
||||
sort_order: str = 'desc'
|
||||
) -> tuple:
|
||||
"""
|
||||
分页获取站点列表
|
||||
|
||||
Returns:
|
||||
(站点列表, 总数)
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
# 构建WHERE条件
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if status:
|
||||
conditions.append(f"status = {ph}")
|
||||
params.append(status)
|
||||
|
||||
if keyword:
|
||||
conditions.append(f"(site_url LIKE {ph} OR site_name LIKE {ph})")
|
||||
params.extend([f'%{keyword}%', f'%{keyword}%'])
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
# 允许的排序字段
|
||||
allowed_sort_fields = ['created_at', 'click_count', 'reply_count', 'site_url', 'status']
|
||||
if sort_by not in allowed_sort_fields:
|
||||
sort_by = 'created_at'
|
||||
|
||||
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
|
||||
|
||||
# 查询总数
|
||||
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_site WHERE {where_clause}"
|
||||
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
|
||||
total = cursor.fetchone()['total']
|
||||
|
||||
# 查询数据
|
||||
offset = (page - 1) * page_size
|
||||
data_sql = f"""
|
||||
SELECT * FROM ai_mip_site
|
||||
WHERE {where_clause}
|
||||
ORDER BY {sort_by} {sort_order}
|
||||
LIMIT {ph} OFFSET {ph}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
|
||||
cursor = self._execute_query(conn, data_sql, tuple(params))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows], total
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"分页查询站点失败: {str(e)}")
|
||||
return [], 0
|
||||
|
||||
def delete_sites_batch(self, site_ids: List[int]) -> int:
|
||||
"""
|
||||
批量删除站点
|
||||
|
||||
Returns:
|
||||
成功删除的数量
|
||||
"""
|
||||
if not site_ids:
|
||||
return 0
|
||||
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
placeholders = ','.join(['%s'] * len(site_ids))
|
||||
sql = f"DELETE FROM ai_mip_site WHERE id IN ({placeholders})"
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(sql, tuple(site_ids))
|
||||
deleted = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
logger.info(f"批量删除站点: {deleted}/{len(site_ids)}")
|
||||
return deleted
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量删除站点失败: {str(e)}")
|
||||
return 0
|
||||
|
||||
def update_sites_status_batch(self, site_ids: List[int], status: str) -> int:
|
||||
"""
|
||||
批量更新站点状态
|
||||
|
||||
Returns:
|
||||
成功更新的数量
|
||||
"""
|
||||
if not site_ids:
|
||||
return 0
|
||||
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
placeholders = ','.join(['%s'] * len(site_ids))
|
||||
sql = f"UPDATE ai_mip_site SET status = %s WHERE id IN ({placeholders})"
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(sql, (status, *site_ids))
|
||||
updated = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
logger.info(f"批量更新站点状态为{status}: {updated}/{len(site_ids)}")
|
||||
return updated
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量更新站点状态失败: {str(e)}")
|
||||
return 0
|
||||
|
||||
def export_sites(self, status: str = None, keyword: str = None) -> List[Dict]:
|
||||
"""导出站点数据"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if status:
|
||||
conditions.append(f"status = {ph}")
|
||||
params.append(status)
|
||||
|
||||
if keyword:
|
||||
conditions.append(f"(site_url LIKE {ph} OR site_name LIKE {ph})")
|
||||
params.extend([f'%{keyword}%', f'%{keyword}%'])
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
sql = f"""
|
||||
SELECT id, site_url, site_name, status, click_count, reply_count,
|
||||
frequency, time_start, time_end, site_dimension, query_word,
|
||||
created_at
|
||||
FROM ai_mip_site
|
||||
WHERE {where_clause}
|
||||
ORDER BY created_at DESC
|
||||
"""
|
||||
|
||||
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"导出站点数据失败: {str(e)}")
|
||||
return []
|
||||
|
||||
|
||||
class EnhancedClickManager(ClickManager):
|
||||
"""增强的点击记录管理器"""
|
||||
|
||||
def get_clicks_paginated(
|
||||
self,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
site_id: int = None,
|
||||
start_date: str = None,
|
||||
end_date: str = None,
|
||||
sort_by: str = 'click_time',
|
||||
sort_order: str = 'desc'
|
||||
) -> tuple:
|
||||
"""
|
||||
分页获取点击记录
|
||||
|
||||
Returns:
|
||||
(点击记录列表, 总数)
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if site_id:
|
||||
conditions.append(f"c.site_id = {ph}")
|
||||
params.append(site_id)
|
||||
|
||||
if start_date:
|
||||
conditions.append(f"c.click_time >= {ph}")
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
conditions.append(f"c.click_time <= {ph}")
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
allowed_sort_fields = ['click_time', 'site_id', 'device_type']
|
||||
if sort_by not in allowed_sort_fields:
|
||||
sort_by = 'click_time'
|
||||
|
||||
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
|
||||
|
||||
# 查询总数
|
||||
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_click c WHERE {where_clause}"
|
||||
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
|
||||
total = cursor.fetchone()['total']
|
||||
|
||||
# 查询数据
|
||||
offset = (page - 1) * page_size
|
||||
data_sql = f"""
|
||||
SELECT c.*, s.site_name
|
||||
FROM ai_mip_click c
|
||||
LEFT JOIN ai_mip_site s ON c.site_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY c.{sort_by} {sort_order}
|
||||
LIMIT {ph} OFFSET {ph}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
|
||||
cursor = self._execute_query(conn, data_sql, tuple(params))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows], total
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"分页查询点击记录失败: {str(e)}")
|
||||
return [], 0
|
||||
|
||||
def export_clicks(
|
||||
self,
|
||||
site_id: int = None,
|
||||
start_date: str = None,
|
||||
end_date: str = None
|
||||
) -> List[Dict]:
|
||||
"""导出点击记录"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if site_id:
|
||||
conditions.append(f"c.site_id = {ph}")
|
||||
params.append(site_id)
|
||||
|
||||
if start_date:
|
||||
conditions.append(f"c.click_time >= {ph}")
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
conditions.append(f"c.click_time <= {ph}")
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
sql = f"""
|
||||
SELECT c.id, c.site_id, s.site_name, c.site_url, c.click_time,
|
||||
c.user_ip, c.device_type, c.task_id
|
||||
FROM ai_mip_click c
|
||||
LEFT JOIN ai_mip_site s ON c.site_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY c.click_time DESC
|
||||
"""
|
||||
|
||||
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"导出点击记录失败: {str(e)}")
|
||||
return []
|
||||
|
||||
|
||||
class EnhancedInteractionManager(InteractionManager):
|
||||
"""增强的互动记录管理器"""
|
||||
|
||||
def get_interactions_paginated(
|
||||
self,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
site_id: int = None,
|
||||
start_date: str = None,
|
||||
end_date: str = None,
|
||||
status: str = None,
|
||||
sort_by: str = 'interaction_time',
|
||||
sort_order: str = 'desc'
|
||||
) -> tuple:
|
||||
"""
|
||||
分页获取互动记录
|
||||
|
||||
Returns:
|
||||
(互动记录列表, 总数)
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if site_id:
|
||||
conditions.append(f"i.site_id = {ph}")
|
||||
params.append(site_id)
|
||||
|
||||
if start_date:
|
||||
conditions.append(f"i.interaction_time >= {ph}")
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
conditions.append(f"i.interaction_time <= {ph}")
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
if status:
|
||||
conditions.append(f"i.interaction_status = {ph}")
|
||||
params.append(status)
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
allowed_sort_fields = ['interaction_time', 'site_id', 'interaction_status']
|
||||
if sort_by not in allowed_sort_fields:
|
||||
sort_by = 'interaction_time'
|
||||
|
||||
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
|
||||
|
||||
# 查询总数
|
||||
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_interaction i WHERE {where_clause}"
|
||||
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
|
||||
total = cursor.fetchone()['total']
|
||||
|
||||
# 查询数据
|
||||
offset = (page - 1) * page_size
|
||||
data_sql = f"""
|
||||
SELECT i.*, s.site_name, s.site_url as site_url_ref
|
||||
FROM ai_mip_interaction i
|
||||
LEFT JOIN ai_mip_site s ON i.site_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY i.{sort_by} {sort_order}
|
||||
LIMIT {ph} OFFSET {ph}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
|
||||
cursor = self._execute_query(conn, data_sql, tuple(params))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows], total
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"分页查询互动记录失败: {str(e)}")
|
||||
return [], 0
|
||||
|
||||
def export_interactions(
|
||||
self,
|
||||
site_id: int = None,
|
||||
start_date: str = None,
|
||||
end_date: str = None
|
||||
) -> List[Dict]:
|
||||
"""导出互动记录"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if site_id:
|
||||
conditions.append(f"i.site_id = {ph}")
|
||||
params.append(site_id)
|
||||
|
||||
if start_date:
|
||||
conditions.append(f"i.interaction_time >= {ph}")
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
conditions.append(f"i.interaction_time <= {ph}")
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
sql = f"""
|
||||
SELECT i.id, i.site_id, s.site_name, s.site_url, i.interaction_time,
|
||||
i.interaction_type, i.interaction_status, i.reply_content,
|
||||
i.response_received, i.response_content, i.proxy_ip
|
||||
FROM ai_mip_interaction i
|
||||
LEFT JOIN ai_mip_site s ON i.site_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY i.interaction_time DESC
|
||||
"""
|
||||
|
||||
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"导出互动记录失败: {str(e)}")
|
||||
return []
|
||||
|
||||
|
||||
class EnhancedStatisticsManager(StatisticsManager):
|
||||
"""增强的统计管理器,支持图表数据"""
|
||||
|
||||
def get_click_trend(self, days: int = 7) -> Dict:
|
||||
"""
|
||||
获取点击趋势数据
|
||||
|
||||
Args:
|
||||
days: 天数
|
||||
|
||||
Returns:
|
||||
{'dates': [...], 'clicks': [...], 'successes': [...]}
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
# 点击趋势
|
||||
click_sql = f"""
|
||||
SELECT DATE(click_time) as date, COUNT(*) as count
|
||||
FROM ai_mip_click
|
||||
WHERE click_time >= DATE_SUB(CURDATE(), INTERVAL {ph} DAY)
|
||||
GROUP BY DATE(click_time)
|
||||
ORDER BY date
|
||||
"""
|
||||
cursor = self._execute_query(conn, click_sql, (days,))
|
||||
click_rows = cursor.fetchall()
|
||||
|
||||
# 成功次数趋势(is_successful=1)
|
||||
success_sql = f"""
|
||||
SELECT DATE(interaction_time) as date, COUNT(*) as count
|
||||
FROM ai_mip_interaction
|
||||
WHERE interaction_time >= DATE_SUB(CURDATE(), INTERVAL {ph} DAY)
|
||||
AND is_successful = 1
|
||||
GROUP BY DATE(interaction_time)
|
||||
ORDER BY date
|
||||
"""
|
||||
cursor = self._execute_query(conn, success_sql, (days,))
|
||||
success_rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
# 构建结果
|
||||
from datetime import timedelta
|
||||
|
||||
dates = []
|
||||
clicks = []
|
||||
successes = []
|
||||
|
||||
click_map = {str(row['date']): row['count'] for row in click_rows}
|
||||
success_map = {str(row['date']): row['count'] for row in success_rows}
|
||||
|
||||
for i in range(days - 1, -1, -1):
|
||||
date = (datetime.now() - timedelta(days=i)).strftime('%Y-%m-%d')
|
||||
dates.append(date)
|
||||
clicks.append(click_map.get(date, 0))
|
||||
successes.append(success_map.get(date, 0))
|
||||
|
||||
return {
|
||||
'dates': dates,
|
||||
'clicks': clicks,
|
||||
'successes': successes
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取点击趋势失败: {str(e)}")
|
||||
return {'dates': [], 'clicks': [], 'successes': []}
|
||||
|
||||
def get_hourly_distribution(self) -> Dict:
|
||||
"""
|
||||
获取按小时分布的点击数据
|
||||
|
||||
Returns:
|
||||
{'hours': [0-23], 'clicks': [...]}
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
|
||||
sql = """
|
||||
SELECT HOUR(click_time) as hour, COUNT(*) as count
|
||||
FROM ai_mip_click
|
||||
WHERE click_time >= DATE_SUB(NOW(), INTERVAL 7 DAY)
|
||||
GROUP BY HOUR(click_time)
|
||||
ORDER BY hour
|
||||
"""
|
||||
cursor = self._execute_query(conn, sql)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
hour_map = {row['hour']: row['count'] for row in rows}
|
||||
|
||||
hours = list(range(24))
|
||||
clicks = [hour_map.get(h, 0) for h in hours]
|
||||
|
||||
return {
|
||||
'hours': hours,
|
||||
'clicks': clicks
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取时段分布失败: {str(e)}")
|
||||
return {'hours': list(range(24)), 'clicks': [0] * 24}
|
||||
|
||||
def get_top_sites(self, limit: int = 10) -> List[Dict]:
|
||||
"""
|
||||
获取Top活跃站点
|
||||
|
||||
Args:
|
||||
limit: 数量
|
||||
|
||||
Returns:
|
||||
站点列表 [{'site_name', 'click_count', 'reply_count'}, ...]
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
sql = f"""
|
||||
SELECT id, site_name, site_url, click_count, reply_count
|
||||
FROM ai_mip_site
|
||||
WHERE status = 'active'
|
||||
ORDER BY click_count DESC
|
||||
LIMIT {ph}
|
||||
"""
|
||||
cursor = self._execute_query(conn, sql, (limit,))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取Top站点失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def get_reply_rate_distribution(self) -> Dict:
|
||||
"""
|
||||
获取回复率分布数据(用于饼图)
|
||||
|
||||
Returns:
|
||||
{'labels': [...], 'values': [...]}
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
|
||||
# 获取总点击和回复
|
||||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_click")
|
||||
total_clicks = cursor.fetchone()['total']
|
||||
|
||||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE response_received = 1")
|
||||
total_replies = cursor.fetchone()['total']
|
||||
|
||||
conn.close()
|
||||
|
||||
no_reply = total_clicks - total_replies if total_clicks > total_replies else 0
|
||||
|
||||
return {
|
||||
'labels': ['有回复', '无回复'],
|
||||
'values': [total_replies, no_reply]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取回复率分布失败: {str(e)}")
|
||||
return {'labels': ['有回复', '无回复'], 'values': [0, 0]}
|
||||
|
||||
|
||||
class QueryImportLogManager(DatabaseManager):
|
||||
"""Query导入日志管理器"""
|
||||
|
||||
def ensure_table(self):
|
||||
"""确保 query_import_log 表存在"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS `query_import_log` (
|
||||
`id` INT AUTO_INCREMENT PRIMARY KEY,
|
||||
`filename` VARCHAR(255) NOT NULL COMMENT '上传的文件名',
|
||||
`filepath` VARCHAR(500) NOT NULL COMMENT '文件完整路径',
|
||||
`upload_time` DATETIME NOT NULL COMMENT '上传时间',
|
||||
`import_time` DATETIME NULL COMMENT '实际导入时间',
|
||||
`status` VARCHAR(20) DEFAULT 'pending' COMMENT '导入状态',
|
||||
`total_count` INT DEFAULT 0 COMMENT '总行数',
|
||||
`success_count` INT DEFAULT 0 COMMENT '成功插入数',
|
||||
`skip_count` INT DEFAULT 0 COMMENT '跳过数(已存在)',
|
||||
`fail_count` INT DEFAULT 0 COMMENT '失败数',
|
||||
`error_message` TEXT NULL COMMENT '错误信息',
|
||||
`created_at` DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
INDEX `idx_status` (`status`),
|
||||
INDEX `idx_upload_time` (`upload_time`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='关键词导入日志表'
|
||||
""")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.error(f"创建 query_import_log 表失败: {e}")
|
||||
|
||||
def create_log(self, filename: str, filepath: str) -> Optional[int]:
|
||||
"""创建导入日志记录"""
|
||||
try:
|
||||
self.ensure_table()
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
f"INSERT INTO query_import_log (filename, filepath, upload_time, status) VALUES ({ph}, {ph}, NOW(), 'pending')",
|
||||
(filename, filepath)
|
||||
)
|
||||
log_id = cursor.lastrowid
|
||||
conn.commit()
|
||||
conn.close()
|
||||
logger.info(f"创建导入日志: {filename} (ID: {log_id})")
|
||||
return log_id
|
||||
except Exception as e:
|
||||
logger.error(f"创建导入日志失败: {e}")
|
||||
return None
|
||||
|
||||
def update_status(self, log_id: int, status: str,
|
||||
total_count: int = 0, success_count: int = 0,
|
||||
skip_count: int = 0, fail_count: int = 0,
|
||||
error_message: str = None):
|
||||
"""更新导入状态和统计数据"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
cursor = conn.cursor()
|
||||
|
||||
import_time_sql = ", import_time = NOW()" if status in ('running', 'completed', 'failed') else ""
|
||||
|
||||
cursor.execute(
|
||||
f"""UPDATE query_import_log
|
||||
SET status = {ph}, total_count = {ph}, success_count = {ph},
|
||||
skip_count = {ph}, fail_count = {ph}, error_message = {ph}
|
||||
{import_time_sql}
|
||||
WHERE id = {ph}""",
|
||||
(status, total_count, success_count, skip_count, fail_count, error_message, log_id)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.error(f"更新导入日志失败: {e}")
|
||||
|
||||
def get_pending_logs(self) -> List[Dict]:
|
||||
"""获取待处理的导入日志"""
|
||||
try:
|
||||
self.ensure_table()
|
||||
conn = self.get_connection()
|
||||
cursor = self._execute_query(
|
||||
conn, "SELECT * FROM query_import_log WHERE status = 'pending' ORDER BY created_at ASC"
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
except Exception as e:
|
||||
logger.error(f"查询待处理日志失败: {e}")
|
||||
return []
|
||||
|
||||
def get_logs_paginated(self, page: int = 1, page_size: int = 20) -> Dict:
|
||||
"""分页获取导入日志"""
|
||||
try:
|
||||
self.ensure_table()
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
# 总数
|
||||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM query_import_log")
|
||||
total = cursor.fetchone()['total']
|
||||
|
||||
# 分页数据
|
||||
offset = (page - 1) * page_size
|
||||
cursor = self._execute_query(
|
||||
conn,
|
||||
f"SELECT * FROM query_import_log ORDER BY created_at DESC LIMIT {ph} OFFSET {ph}",
|
||||
(page_size, offset)
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'items': [self._dict_from_row(row) for row in rows],
|
||||
'total': total,
|
||||
'page': page,
|
||||
'page_size': page_size
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"分页查询导入日志失败: {e}")
|
||||
return {'items': [], 'total': 0, 'page': page, 'page_size': page_size}
|
||||
|
||||
def is_file_logged(self, filepath: str) -> bool:
|
||||
"""检查文件是否已有导入记录"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
cursor = self._execute_query(
|
||||
conn,
|
||||
f"SELECT COUNT(*) as cnt FROM query_import_log WHERE filepath = {ph}",
|
||||
(filepath,)
|
||||
)
|
||||
cnt = cursor.fetchone()['cnt']
|
||||
conn.close()
|
||||
return cnt > 0
|
||||
except Exception as e:
|
||||
logger.error(f"检查文件日志失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
class QueryKeywordManager(DatabaseManager):
|
||||
"""Query关键词管理器 - 操作 baidu_keyword 表"""
|
||||
|
||||
def insert_keyword(self, keyword: str, seed_id: int = 9999, seed_name: str = '手动提交',
|
||||
crawled: int = 1, department: str = '', department_id: int = 0,
|
||||
author_id: int = 0, author_name: str = '') -> int:
|
||||
"""
|
||||
插入单条关键词到 baidu_keyword 表(INSERT IGNORE)
|
||||
|
||||
Returns:
|
||||
affected rows: 1=新插入, 0=已存在被跳过, -1=失败
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
f"""INSERT IGNORE INTO baidu_keyword
|
||||
(keyword, seed_id, seed_name, crawled, parents_id, created_at,
|
||||
department, department_id, query_status, author_id, author_name)
|
||||
VALUES ({ph}, {ph}, {ph}, {ph}, 0, NOW(), {ph}, {ph}, 'manual_review', {ph}, {ph})""",
|
||||
(keyword, seed_id, seed_name, crawled, department, department_id, author_id, author_name)
|
||||
)
|
||||
affected = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return affected
|
||||
except Exception as e:
|
||||
logger.error(f"插入关键词失败: {keyword} - {e}")
|
||||
return -1
|
||||
|
||||
def batch_insert_keywords(self, keyword_list: list, seed_id: int = 9999,
|
||||
seed_name: str = '手动提交', crawled: int = 1,
|
||||
query_status: str = 'manual_review') -> dict:
|
||||
"""
|
||||
批量插入关键词到 baidu_keyword 表(INSERT IGNORE)
|
||||
|
||||
Args:
|
||||
keyword_list: [{'keyword': str, 'department': str, 'seed_name': str(可选)}, ...]
|
||||
query_status: 写入的query_status值,如 'draft' 或 'manual_review'
|
||||
|
||||
Returns:
|
||||
{'success': int, 'skip': int, 'fail': int}
|
||||
"""
|
||||
stats = {'success': 0, 'skip': 0, 'fail': 0}
|
||||
if not keyword_list:
|
||||
return stats
|
||||
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
values = []
|
||||
for item in keyword_list:
|
||||
values.append((
|
||||
item['keyword'], seed_id, seed_name, crawled,
|
||||
item.get('department', ''), query_status
|
||||
))
|
||||
|
||||
cursor.executemany(
|
||||
"""INSERT IGNORE INTO baidu_keyword
|
||||
(keyword, seed_id, seed_name, crawled, parents_id, created_at,
|
||||
department, department_id, query_status, author_id, author_name)
|
||||
VALUES (%s, %s, %s, %s, 0, NOW(), %s, 0, %s, 0, '')""",
|
||||
values
|
||||
)
|
||||
|
||||
# executemany 的 rowcount 返回实际插入的行数
|
||||
inserted = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
stats['success'] = inserted
|
||||
stats['skip'] = len(keyword_list) - inserted
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量插入关键词失败: {e}")
|
||||
stats['fail'] = len(keyword_list)
|
||||
return stats
|
||||
|
||||
Reference in New Issue
Block a user