feat: 完善代理重试机制,添加数据验证告警,新增README文档
This commit is contained in:
@@ -154,10 +154,8 @@ class CSVImporter:
|
||||
continue
|
||||
|
||||
try:
|
||||
# 处理slide_ratio值
|
||||
# 处理slide_ratio值(CSV中已是小数格式)
|
||||
slide_ratio_value = float(self.convert_value(row.get('slide_ratio', '0'), 'float') or 0.0)
|
||||
if slide_ratio_value > 10:
|
||||
slide_ratio_value = slide_ratio_value / 100
|
||||
slide_ratio_value = min(slide_ratio_value, 9.9999)
|
||||
|
||||
# 获取channel
|
||||
@@ -271,9 +269,8 @@ class CSVImporter:
|
||||
continue
|
||||
|
||||
try:
|
||||
# 处理avg_slide_ratio值(CSV中已是小数格式)
|
||||
avg_slide_ratio_value = float(self.convert_value(row.get('avg_slide_ratio', '0'), 'float') or 0.0)
|
||||
if avg_slide_ratio_value > 10:
|
||||
avg_slide_ratio_value = avg_slide_ratio_value / 100
|
||||
avg_slide_ratio_value = min(avg_slide_ratio_value, 9.9999)
|
||||
|
||||
# 获取channel并查询author_id
|
||||
@@ -348,13 +345,14 @@ class CSVImporter:
|
||||
return success_count > 0
|
||||
|
||||
def import_ai_statistics_days(self, batch_size: int = 50) -> bool:
|
||||
"""导入 ai_statistics_days 表数据(使用批量提交)
|
||||
"""导入 ai_statistics_days 表数据(仅当日数据:day_revenue)
|
||||
同时自动拆分数据到 ai_statistics_weekly 和 ai_statistics_monthly 表
|
||||
|
||||
Args:
|
||||
batch_size: 批量提交大小,默认50条
|
||||
"""
|
||||
print("\n" + "="*70)
|
||||
print("开始导入 ai_statistics_days 表数据")
|
||||
print("开始导入 ai_statistics_days 表数据(拆分到3个表)")
|
||||
print("="*70)
|
||||
|
||||
csv_file = self.csv_files['ai_statistics_days']
|
||||
@@ -365,14 +363,27 @@ class CSVImporter:
|
||||
self.logger.warning("ai_statistics_days表没有数据可导入")
|
||||
return False
|
||||
|
||||
self.logger.info(f"开始导入ai_statistics_days表数据,共 {len(rows)} 条记录,批量大小: {batch_size}")
|
||||
print(f"\n总计 {len(rows)} 条记录,分批导入(每批 {batch_size} 条)\n")
|
||||
self.logger.info(f"开始导入数据,共 {len(rows)} 条记录,批量大小: {batch_size}")
|
||||
print(f"\n总计 {len(rows)} 条记录,将拆分到3个表\n")
|
||||
|
||||
success_count = 0
|
||||
# 三个表的统计
|
||||
days_success = 0
|
||||
weekly_success = 0
|
||||
monthly_success = 0
|
||||
failed_count = 0
|
||||
batch_params = []
|
||||
first_record_keys = None
|
||||
sql_template = None
|
||||
|
||||
# 批量参数
|
||||
days_batch = []
|
||||
weekly_batch = []
|
||||
monthly_batch = []
|
||||
|
||||
# SQL模板
|
||||
days_sql = None
|
||||
weekly_sql = None
|
||||
monthly_sql = None
|
||||
days_keys = None
|
||||
weekly_keys = None
|
||||
monthly_keys = None
|
||||
|
||||
for idx, row in enumerate(rows, 1):
|
||||
author_name = row.get('author_name', '').strip()
|
||||
@@ -388,68 +399,153 @@ class CSVImporter:
|
||||
failed_count += 1
|
||||
continue
|
||||
|
||||
# 处理day_revenue字段(每日收益)
|
||||
day_revenue_value = self.convert_value(row.get('day_revenue', '0'), 'decimal')
|
||||
if day_revenue_value is None:
|
||||
day_revenue_value = Decimal('0')
|
||||
stat_date = row.get('stat_date', '').strip()
|
||||
|
||||
record = {
|
||||
# 1. ai_statistics_days 表数据(仅当日数据)
|
||||
day_revenue = self.convert_value(row.get('day_revenue', '0'), 'decimal') or Decimal('0')
|
||||
daily_published_count = self.convert_value(row.get('daily_published_count', '0'), 'int') or 0
|
||||
cumulative_published_count = self.convert_value(row.get('cumulative_published_count', '0'), 'int') or 0
|
||||
|
||||
days_record = {
|
||||
'author_id': author_id,
|
||||
'author_name': author_name,
|
||||
'channel': channel,
|
||||
'stat_date': row.get('stat_date', '').strip(),
|
||||
'daily_published_count': self.convert_value(row.get('daily_published_count', '0'), 'int') or 0,
|
||||
'cumulative_published_count': self.convert_value(row.get('cumulative_published_count', '0'), 'int') or 0,
|
||||
'day_revenue': day_revenue_value, # 每日收益
|
||||
'monthly_revenue': self.convert_value(row.get('monthly_revenue', '0'), 'decimal') or Decimal('0'),
|
||||
'weekly_revenue': self.convert_value(row.get('weekly_revenue', '0'), 'decimal') or Decimal('0'),
|
||||
'revenue_mom_growth_rate': self.convert_value(row.get('revenue_mom_growth_rate', '0'), 'decimal') or Decimal('0'),
|
||||
'revenue_wow_growth_rate': self.convert_value(row.get('revenue_wow_growth_rate', '0'), 'decimal') or Decimal('0'),
|
||||
'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), # 添加更新时间戳,强制更新
|
||||
'stat_date': stat_date,
|
||||
'daily_published_count': daily_published_count,
|
||||
'day_revenue': day_revenue,
|
||||
'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
}
|
||||
|
||||
if sql_template is None:
|
||||
first_record_keys = list(record.keys())
|
||||
columns = ', '.join(first_record_keys)
|
||||
placeholders = ', '.join(['%s'] * len(first_record_keys))
|
||||
update_parts = [f"{key} = VALUES({key})" for key in first_record_keys if key not in ['author_name', 'channel', 'stat_date']]
|
||||
sql_template = f"""
|
||||
# 2. ai_statistics_weekly 表数据
|
||||
weekly_revenue = self.convert_value(row.get('weekly_revenue', '0'), 'decimal') or Decimal('0')
|
||||
revenue_wow_growth_rate = self.convert_value(row.get('revenue_wow_growth_rate', '0'), 'decimal') or Decimal('0')
|
||||
|
||||
# 计算该日期所在周次(格式:WW,如51)
|
||||
from datetime import datetime as dt, timedelta
|
||||
date_obj = dt.strptime(stat_date, '%Y-%m-%d')
|
||||
# 使用isocalendar()获取ISO周数(周一为一周开始)
|
||||
year, week_num, _ = date_obj.isocalendar()
|
||||
stat_weekly = week_num # 直接使用数字
|
||||
|
||||
weekly_record = {
|
||||
'author_id': author_id,
|
||||
'author_name': author_name,
|
||||
'channel': channel,
|
||||
'stat_weekly': stat_weekly,
|
||||
'weekly_revenue': weekly_revenue,
|
||||
'revenue_wow_growth_rate': revenue_wow_growth_rate,
|
||||
'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
}
|
||||
|
||||
# 3. ai_statistics_monthly 表数据
|
||||
monthly_revenue = self.convert_value(row.get('monthly_revenue', '0'), 'decimal') or Decimal('0')
|
||||
revenue_mom_growth_rate = self.convert_value(row.get('revenue_mom_growth_rate', '0'), 'decimal') or Decimal('0')
|
||||
|
||||
# 计算该日期所在月份(格式:YYYY-MM,如2025-12)
|
||||
stat_monthly = date_obj.strftime('%Y-%m')
|
||||
|
||||
monthly_record = {
|
||||
'author_id': author_id,
|
||||
'author_name': author_name,
|
||||
'channel': channel,
|
||||
'stat_monthly': stat_monthly,
|
||||
'monthly_revenue': monthly_revenue,
|
||||
'revenue_mom_growth_rate': revenue_mom_growth_rate,
|
||||
'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
}
|
||||
|
||||
# 构建SQL模板(首次)
|
||||
if days_sql is None:
|
||||
days_keys = list(days_record.keys())
|
||||
columns = ', '.join(days_keys)
|
||||
placeholders = ', '.join(['%s'] * len(days_keys))
|
||||
update_parts = [f"{key} = VALUES({key})" for key in days_keys if key not in ['author_name', 'channel', 'stat_date']]
|
||||
days_sql = f"""
|
||||
INSERT INTO ai_statistics_days ({columns})
|
||||
VALUES ({placeholders})
|
||||
ON DUPLICATE KEY UPDATE {', '.join(update_parts)}
|
||||
"""
|
||||
|
||||
if first_record_keys is not None:
|
||||
batch_params.append(tuple(record[key] for key in first_record_keys))
|
||||
if weekly_sql is None:
|
||||
weekly_keys = list(weekly_record.keys())
|
||||
columns = ', '.join(weekly_keys)
|
||||
placeholders = ', '.join(['%s'] * len(weekly_keys))
|
||||
update_parts = [f"{key} = VALUES({key})" for key in weekly_keys if key not in ['author_name', 'channel', 'stat_weekly']]
|
||||
weekly_sql = f"""
|
||||
INSERT INTO ai_statistics_weekly ({columns})
|
||||
VALUES ({placeholders})
|
||||
ON DUPLICATE KEY UPDATE {', '.join(update_parts)}
|
||||
"""
|
||||
|
||||
if len(batch_params) >= batch_size or idx == len(rows):
|
||||
if monthly_sql is None:
|
||||
monthly_keys = list(monthly_record.keys())
|
||||
columns = ', '.join(monthly_keys)
|
||||
placeholders = ', '.join(['%s'] * len(monthly_keys))
|
||||
update_parts = [f"{key} = VALUES({key})" for key in monthly_keys if key not in ['author_name', 'channel', 'stat_monthly']]
|
||||
monthly_sql = f"""
|
||||
INSERT INTO ai_statistics_monthly ({columns})
|
||||
VALUES ({placeholders})
|
||||
ON DUPLICATE KEY UPDATE {', '.join(update_parts)}
|
||||
"""
|
||||
|
||||
# 添加到批量参数
|
||||
days_batch.append(tuple(days_record[key] for key in days_keys))
|
||||
weekly_batch.append(tuple(weekly_record[key] for key in weekly_keys))
|
||||
monthly_batch.append(tuple(monthly_record[key] for key in monthly_keys))
|
||||
|
||||
# 批量提交
|
||||
if len(days_batch) >= batch_size or idx == len(rows):
|
||||
try:
|
||||
result_count = self.db_manager.execute_many(sql_template, batch_params, autocommit=True)
|
||||
success_count += result_count
|
||||
print(f"[批次提交] 已导入 {success_count} 条记录(本批: {result_count}/{len(batch_params)})")
|
||||
self.logger.info(f"ai_statistics_days表批量提交: {result_count}/{len(batch_params)} 条记录")
|
||||
batch_params = []
|
||||
except Exception as batch_error:
|
||||
failed_count += len(batch_params)
|
||||
print(f" [X] 批次提交失败: {batch_error}")
|
||||
self.logger.error(f"ai_statistics_days表批量提交失败: {batch_error}")
|
||||
batch_params = []
|
||||
# 提交 ai_statistics_days
|
||||
result = self.db_manager.execute_many(days_sql, days_batch, autocommit=True)
|
||||
days_success += result
|
||||
print(f"[days] 已导入 {days_success} 条")
|
||||
days_batch = []
|
||||
except Exception as e:
|
||||
print(f" [X] days表提交失败: {e}")
|
||||
self.logger.error(f"ai_statistics_days批量提交失败: {e}")
|
||||
failed_count += len(days_batch)
|
||||
days_batch = []
|
||||
|
||||
try:
|
||||
# 提交 ai_statistics_weekly
|
||||
result = self.db_manager.execute_many(weekly_sql, weekly_batch, autocommit=True)
|
||||
weekly_success += result
|
||||
print(f"[weekly] 已导入 {weekly_success} 条")
|
||||
weekly_batch = []
|
||||
except Exception as e:
|
||||
print(f" [X] weekly表提交失败: {e}")
|
||||
self.logger.error(f"ai_statistics_weekly批量提交失败: {e}")
|
||||
weekly_batch = []
|
||||
|
||||
try:
|
||||
# 提交 ai_statistics_monthly
|
||||
result = self.db_manager.execute_many(monthly_sql, monthly_batch, autocommit=True)
|
||||
monthly_success += result
|
||||
print(f"[monthly] 已导入 {monthly_success} 条")
|
||||
monthly_batch = []
|
||||
except Exception as e:
|
||||
print(f" [X] monthly表提交失败: {e}")
|
||||
self.logger.error(f"ai_statistics_monthly批量提交失败: {e}")
|
||||
monthly_batch = []
|
||||
|
||||
except Exception as e:
|
||||
failed_count += 1
|
||||
print(f" [X] 处理失败 ({author_name}): {e}")
|
||||
self.logger.error(f"ai_statistics_days表处理失败: {author_name}, 错误: {e}")
|
||||
self.logger.error(f"数据处理失败: {author_name}, 错误: {e}")
|
||||
continue
|
||||
|
||||
print("\n" + "="*70)
|
||||
print(f"[OK] ai_statistics_days 表数据导入完成")
|
||||
print(f" 成功: {success_count} 条记录")
|
||||
print(f"[OK] 数据导入完成(拆分到3个表)")
|
||||
print(f" ai_statistics_days: {days_success} 条")
|
||||
print(f" ai_statistics_weekly: {weekly_success} 条")
|
||||
print(f" ai_statistics_monthly: {monthly_success} 条")
|
||||
if failed_count > 0:
|
||||
print(f" 失败: {failed_count} 条记录")
|
||||
print(f" 失败: {failed_count} 条")
|
||||
print("="*70)
|
||||
|
||||
self.logger.info(f"ai_statistics_days表数据导入完成: 成功 {success_count} 条,失败 {failed_count} 条")
|
||||
return success_count > 0
|
||||
self.logger.info(f"数据导入完成: days={days_success}, weekly={weekly_success}, monthly={monthly_success}, failed={failed_count}")
|
||||
return days_success > 0
|
||||
|
||||
def import_all(self) -> bool:
|
||||
"""导入所有CSV文件"""
|
||||
|
||||
Reference in New Issue
Block a user