feat: 完善代理重试机制,添加数据验证告警,新增README文档

This commit is contained in:
shengyudong@yunqueai.net
2026-01-16 18:36:52 +08:00
parent 322ac74336
commit b518e6aacf
55 changed files with 13202 additions and 34781 deletions

View File

@@ -154,10 +154,8 @@ class CSVImporter:
continue
try:
# 处理slide_ratio值
# 处理slide_ratio值CSV中已是小数格式
slide_ratio_value = float(self.convert_value(row.get('slide_ratio', '0'), 'float') or 0.0)
if slide_ratio_value > 10:
slide_ratio_value = slide_ratio_value / 100
slide_ratio_value = min(slide_ratio_value, 9.9999)
# 获取channel
@@ -271,9 +269,8 @@ class CSVImporter:
continue
try:
# 处理avg_slide_ratio值CSV中已是小数格式
avg_slide_ratio_value = float(self.convert_value(row.get('avg_slide_ratio', '0'), 'float') or 0.0)
if avg_slide_ratio_value > 10:
avg_slide_ratio_value = avg_slide_ratio_value / 100
avg_slide_ratio_value = min(avg_slide_ratio_value, 9.9999)
# 获取channel并查询author_id
@@ -348,13 +345,14 @@ class CSVImporter:
return success_count > 0
def import_ai_statistics_days(self, batch_size: int = 50) -> bool:
"""导入 ai_statistics_days 表数据(使用批量提交
"""导入 ai_statistics_days 表数据(仅当日数据day_revenue
同时自动拆分数据到 ai_statistics_weekly 和 ai_statistics_monthly 表
Args:
batch_size: 批量提交大小默认50条
"""
print("\n" + "="*70)
print("开始导入 ai_statistics_days 表数据")
print("开始导入 ai_statistics_days 表数据拆分到3个表")
print("="*70)
csv_file = self.csv_files['ai_statistics_days']
@@ -365,14 +363,27 @@ class CSVImporter:
self.logger.warning("ai_statistics_days表没有数据可导入")
return False
self.logger.info(f"开始导入ai_statistics_days表数据,共 {len(rows)} 条记录,批量大小: {batch_size}")
print(f"\n总计 {len(rows)} 条记录,分批导入(每批 {batch_size} 条)\n")
self.logger.info(f"开始导入数据,共 {len(rows)} 条记录,批量大小: {batch_size}")
print(f"\n总计 {len(rows)} 条记录,将拆分到3个表\n")
success_count = 0
# 三个表的统计
days_success = 0
weekly_success = 0
monthly_success = 0
failed_count = 0
batch_params = []
first_record_keys = None
sql_template = None
# 批量参数
days_batch = []
weekly_batch = []
monthly_batch = []
# SQL模板
days_sql = None
weekly_sql = None
monthly_sql = None
days_keys = None
weekly_keys = None
monthly_keys = None
for idx, row in enumerate(rows, 1):
author_name = row.get('author_name', '').strip()
@@ -388,68 +399,153 @@ class CSVImporter:
failed_count += 1
continue
# 处理day_revenue字段每日收益
day_revenue_value = self.convert_value(row.get('day_revenue', '0'), 'decimal')
if day_revenue_value is None:
day_revenue_value = Decimal('0')
stat_date = row.get('stat_date', '').strip()
record = {
# 1. ai_statistics_days 表数据(仅当日数据)
day_revenue = self.convert_value(row.get('day_revenue', '0'), 'decimal') or Decimal('0')
daily_published_count = self.convert_value(row.get('daily_published_count', '0'), 'int') or 0
cumulative_published_count = self.convert_value(row.get('cumulative_published_count', '0'), 'int') or 0
days_record = {
'author_id': author_id,
'author_name': author_name,
'channel': channel,
'stat_date': row.get('stat_date', '').strip(),
'daily_published_count': self.convert_value(row.get('daily_published_count', '0'), 'int') or 0,
'cumulative_published_count': self.convert_value(row.get('cumulative_published_count', '0'), 'int') or 0,
'day_revenue': day_revenue_value, # 每日收益
'monthly_revenue': self.convert_value(row.get('monthly_revenue', '0'), 'decimal') or Decimal('0'),
'weekly_revenue': self.convert_value(row.get('weekly_revenue', '0'), 'decimal') or Decimal('0'),
'revenue_mom_growth_rate': self.convert_value(row.get('revenue_mom_growth_rate', '0'), 'decimal') or Decimal('0'),
'revenue_wow_growth_rate': self.convert_value(row.get('revenue_wow_growth_rate', '0'), 'decimal') or Decimal('0'),
'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), # 添加更新时间戳,强制更新
'stat_date': stat_date,
'daily_published_count': daily_published_count,
'day_revenue': day_revenue,
'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
}
if sql_template is None:
first_record_keys = list(record.keys())
columns = ', '.join(first_record_keys)
placeholders = ', '.join(['%s'] * len(first_record_keys))
update_parts = [f"{key} = VALUES({key})" for key in first_record_keys if key not in ['author_name', 'channel', 'stat_date']]
sql_template = f"""
# 2. ai_statistics_weekly 表数据
weekly_revenue = self.convert_value(row.get('weekly_revenue', '0'), 'decimal') or Decimal('0')
revenue_wow_growth_rate = self.convert_value(row.get('revenue_wow_growth_rate', '0'), 'decimal') or Decimal('0')
# 计算该日期所在周次格式WW如51
from datetime import datetime as dt, timedelta
date_obj = dt.strptime(stat_date, '%Y-%m-%d')
# 使用isocalendar()获取ISO周数周一为一周开始
year, week_num, _ = date_obj.isocalendar()
stat_weekly = week_num # 直接使用数字
weekly_record = {
'author_id': author_id,
'author_name': author_name,
'channel': channel,
'stat_weekly': stat_weekly,
'weekly_revenue': weekly_revenue,
'revenue_wow_growth_rate': revenue_wow_growth_rate,
'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
}
# 3. ai_statistics_monthly 表数据
monthly_revenue = self.convert_value(row.get('monthly_revenue', '0'), 'decimal') or Decimal('0')
revenue_mom_growth_rate = self.convert_value(row.get('revenue_mom_growth_rate', '0'), 'decimal') or Decimal('0')
# 计算该日期所在月份格式YYYY-MM如2025-12
stat_monthly = date_obj.strftime('%Y-%m')
monthly_record = {
'author_id': author_id,
'author_name': author_name,
'channel': channel,
'stat_monthly': stat_monthly,
'monthly_revenue': monthly_revenue,
'revenue_mom_growth_rate': revenue_mom_growth_rate,
'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
}
# 构建SQL模板首次
if days_sql is None:
days_keys = list(days_record.keys())
columns = ', '.join(days_keys)
placeholders = ', '.join(['%s'] * len(days_keys))
update_parts = [f"{key} = VALUES({key})" for key in days_keys if key not in ['author_name', 'channel', 'stat_date']]
days_sql = f"""
INSERT INTO ai_statistics_days ({columns})
VALUES ({placeholders})
ON DUPLICATE KEY UPDATE {', '.join(update_parts)}
"""
if first_record_keys is not None:
batch_params.append(tuple(record[key] for key in first_record_keys))
if weekly_sql is None:
weekly_keys = list(weekly_record.keys())
columns = ', '.join(weekly_keys)
placeholders = ', '.join(['%s'] * len(weekly_keys))
update_parts = [f"{key} = VALUES({key})" for key in weekly_keys if key not in ['author_name', 'channel', 'stat_weekly']]
weekly_sql = f"""
INSERT INTO ai_statistics_weekly ({columns})
VALUES ({placeholders})
ON DUPLICATE KEY UPDATE {', '.join(update_parts)}
"""
if len(batch_params) >= batch_size or idx == len(rows):
if monthly_sql is None:
monthly_keys = list(monthly_record.keys())
columns = ', '.join(monthly_keys)
placeholders = ', '.join(['%s'] * len(monthly_keys))
update_parts = [f"{key} = VALUES({key})" for key in monthly_keys if key not in ['author_name', 'channel', 'stat_monthly']]
monthly_sql = f"""
INSERT INTO ai_statistics_monthly ({columns})
VALUES ({placeholders})
ON DUPLICATE KEY UPDATE {', '.join(update_parts)}
"""
# 添加到批量参数
days_batch.append(tuple(days_record[key] for key in days_keys))
weekly_batch.append(tuple(weekly_record[key] for key in weekly_keys))
monthly_batch.append(tuple(monthly_record[key] for key in monthly_keys))
# 批量提交
if len(days_batch) >= batch_size or idx == len(rows):
try:
result_count = self.db_manager.execute_many(sql_template, batch_params, autocommit=True)
success_count += result_count
print(f"[批次提交] 已导入 {success_count} 条记录(本批: {result_count}/{len(batch_params)}")
self.logger.info(f"ai_statistics_days表批量提交: {result_count}/{len(batch_params)}记录")
batch_params = []
except Exception as batch_error:
failed_count += len(batch_params)
print(f" [X] 批次提交失败: {batch_error}")
self.logger.error(f"ai_statistics_days表批量提交失败: {batch_error}")
batch_params = []
# 提交 ai_statistics_days
result = self.db_manager.execute_many(days_sql, days_batch, autocommit=True)
days_success += result
print(f"[days] 已导入 {days_success}")
days_batch = []
except Exception as e:
print(f" [X] days表提交失败: {e}")
self.logger.error(f"ai_statistics_days批量提交失败: {e}")
failed_count += len(days_batch)
days_batch = []
try:
# 提交 ai_statistics_weekly
result = self.db_manager.execute_many(weekly_sql, weekly_batch, autocommit=True)
weekly_success += result
print(f"[weekly] 已导入 {weekly_success}")
weekly_batch = []
except Exception as e:
print(f" [X] weekly表提交失败: {e}")
self.logger.error(f"ai_statistics_weekly批量提交失败: {e}")
weekly_batch = []
try:
# 提交 ai_statistics_monthly
result = self.db_manager.execute_many(monthly_sql, monthly_batch, autocommit=True)
monthly_success += result
print(f"[monthly] 已导入 {monthly_success}")
monthly_batch = []
except Exception as e:
print(f" [X] monthly表提交失败: {e}")
self.logger.error(f"ai_statistics_monthly批量提交失败: {e}")
monthly_batch = []
except Exception as e:
failed_count += 1
print(f" [X] 处理失败 ({author_name}): {e}")
self.logger.error(f"ai_statistics_days表处理失败: {author_name}, 错误: {e}")
self.logger.error(f"数据处理失败: {author_name}, 错误: {e}")
continue
print("\n" + "="*70)
print(f"[OK] ai_statistics_days 表数据导入完成")
print(f" 成功: {success_count}记录")
print(f"[OK] 数据导入完成拆分到3个表")
print(f" ai_statistics_days: {days_success}")
print(f" ai_statistics_weekly: {weekly_success}")
print(f" ai_statistics_monthly: {monthly_success}")
if failed_count > 0:
print(f" 失败: {failed_count}记录")
print(f" 失败: {failed_count}")
print("="*70)
self.logger.info(f"ai_statistics_days表数据导入完成: 成功 {success_count} 条,失败 {failed_count}")
return success_count > 0
self.logger.info(f"数据导入完成: days={days_success}, weekly={weekly_success}, monthly={monthly_success}, failed={failed_count}")
return days_success > 0
def import_all(self) -> bool:
"""导入所有CSV文件"""