feat: 完善代理重试机制,添加数据验证告警,新增README文档
This commit is contained in:
355
export_to_csv.py
355
export_to_csv.py
@@ -12,6 +12,7 @@ import sys
|
||||
import os
|
||||
import json
|
||||
import csv
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
from decimal import Decimal
|
||||
@@ -67,6 +68,10 @@ class DataExporter:
|
||||
self.output_ai_statistics_day = os.path.join(self.script_dir, "ai_statistics_day.csv")
|
||||
self.output_ai_statistics_days = os.path.join(self.script_dir, "ai_statistics_days.csv")
|
||||
|
||||
# 备份文件夹路径
|
||||
self.backup_dir = os.path.join(self.script_dir, "csv_backups")
|
||||
self._ensure_backup_dir()
|
||||
|
||||
# 数据库模式
|
||||
self.use_database = use_database
|
||||
self.db_manager = None
|
||||
@@ -90,6 +95,51 @@ class DataExporter:
|
||||
# 缓存author_id映射(author_name -> author_id)
|
||||
self.author_id_cache = {}
|
||||
|
||||
def _ensure_backup_dir(self):
|
||||
"""确保备份文件夹存在"""
|
||||
try:
|
||||
if not os.path.exists(self.backup_dir):
|
||||
os.makedirs(self.backup_dir)
|
||||
print(f"[OK] 创建备份文件夹: {self.backup_dir}")
|
||||
except Exception as e:
|
||||
print(f"[!] 创建备份文件夹失败: {e}")
|
||||
|
||||
def _backup_csv_file(self, csv_file_path: str) -> bool:
|
||||
"""备份CSV文件
|
||||
|
||||
Args:
|
||||
csv_file_path: CSV文件的完整路径
|
||||
|
||||
Returns:
|
||||
bool: 备份是否成功
|
||||
"""
|
||||
try:
|
||||
if not os.path.exists(csv_file_path):
|
||||
print(f"[!] 文件不存在,跳过备份: {csv_file_path}")
|
||||
return False
|
||||
|
||||
# 获取文件名
|
||||
file_name = os.path.basename(csv_file_path)
|
||||
|
||||
# 生成时间戳(只保留日期)
|
||||
timestamp = datetime.now().strftime('%Y%m%d')
|
||||
|
||||
# 备份文件名:20251226_ai_statistics.csv
|
||||
backup_file_name = f"{timestamp}_{file_name}"
|
||||
backup_file_path = os.path.join(self.backup_dir, backup_file_name)
|
||||
|
||||
# 复制文件
|
||||
shutil.copy2(csv_file_path, backup_file_path)
|
||||
|
||||
print(f" [备份] {file_name} -> {backup_file_name}")
|
||||
self.logger.info(f"备份CSV文件: {backup_file_path}")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" [!] 备份失败: {e}")
|
||||
self.logger.error(f"备份CSV文件失败: {e}")
|
||||
return False
|
||||
|
||||
def get_author_id(self, author_name: str) -> int:
|
||||
"""获取作者ID
|
||||
|
||||
@@ -286,21 +336,25 @@ class DataExporter:
|
||||
print(f" [!] 从数据库计算当周发文量失败: {e}")
|
||||
return 0
|
||||
|
||||
def calculate_weekly_revenue_from_db(self, author_id: int, stat_date: str) -> float:
|
||||
"""从ai_statistics_days表汇总计算当周收益(周一至周日)
|
||||
def calculate_weekly_revenue_from_db(self, author_id: int, stat_date: str, today_revenue: float = 0.0) -> float:
|
||||
"""从ai_statistics_days表汇总计算当周收益(周一至当前日期)
|
||||
|
||||
基于day_revenue字段进行汇总计算
|
||||
计算逻辑:
|
||||
1. 从数据库查询本周一到stat_date前一天的day_revenue总和
|
||||
2. 加上today_revenue(当日收益,从API获取)
|
||||
3. 得到本周累计收益
|
||||
|
||||
Args:
|
||||
author_id: 作者ID
|
||||
stat_date: 统计日期 (YYYY-MM-DD)
|
||||
today_revenue: 当日收益(从API获取),默认0.0
|
||||
|
||||
Returns:
|
||||
当周收益总额
|
||||
"""
|
||||
if not self.db_manager or author_id == 0:
|
||||
print(f" [数据库] 未连接或author_id无效,无法计算当周收益")
|
||||
return 0.0
|
||||
return today_revenue # 如果数据库不可用,返回当日收益
|
||||
|
||||
try:
|
||||
from datetime import datetime, timedelta
|
||||
@@ -311,14 +365,21 @@ class DataExporter:
|
||||
# 计算本周一的日期(weekday: 0=周一, 6=周日)
|
||||
weekday = target_date.weekday()
|
||||
monday = target_date - timedelta(days=weekday)
|
||||
sunday = monday + timedelta(days=6)
|
||||
|
||||
# 昨天的日期(stat_date的前一天)
|
||||
yesterday = target_date - timedelta(days=1)
|
||||
|
||||
monday_str = monday.strftime('%Y-%m-%d')
|
||||
sunday_str = sunday.strftime('%Y-%m-%d')
|
||||
yesterday_str = yesterday.strftime('%Y-%m-%d')
|
||||
|
||||
print(f" [调试] 目标日期: {stat_date}, 周一: {monday_str}, 周日: {sunday_str}")
|
||||
print(f" [调试] 目标日期: {stat_date}, 本周一: {monday_str}, 昨天: {yesterday_str}")
|
||||
|
||||
# 查询数据库中本周的day_revenue总和
|
||||
# 如果stat_date就是周一,则没有历史数据,直接返回今日收益
|
||||
if target_date == monday:
|
||||
print(f" [数据库] 目标日期是周一,当周收益 = 今日收益: ¥{today_revenue:.2f}")
|
||||
return today_revenue
|
||||
|
||||
# 查询数据库中本周一到昨天的day_revenue总和
|
||||
sql = """
|
||||
SELECT SUM(day_revenue) as weekly_total, COUNT(*) as day_count
|
||||
FROM ai_statistics_days
|
||||
@@ -330,25 +391,33 @@ class DataExporter:
|
||||
|
||||
result = self.db_manager.execute_query(
|
||||
sql,
|
||||
(author_id, monday_str, sunday_str),
|
||||
(author_id, monday_str, yesterday_str),
|
||||
fetch_one=True,
|
||||
dict_cursor=True
|
||||
)
|
||||
|
||||
print(f" [调试] 查询结果: {result}")
|
||||
print(f" [调试] 数据库查询结果: {result}")
|
||||
|
||||
# 计算当周收益 = 本周历史收益 + 今日收益
|
||||
historical_revenue = 0.0
|
||||
day_count = 0
|
||||
|
||||
if result and result.get('weekly_total') is not None:
|
||||
weekly_total = float(result['weekly_total'] or 0)
|
||||
historical_revenue = float(result['weekly_total'] or 0)
|
||||
day_count = int(result.get('day_count', 0) or 0)
|
||||
print(f" [数据库] 当周收益 ({monday_str} 至 {sunday_str}): ¥{weekly_total:.2f} (基于{day_count}天的数据)")
|
||||
return weekly_total
|
||||
else:
|
||||
print(f" [数据库] 未找到当周数据 ({monday_str} 至 {sunday_str}),返回0")
|
||||
return 0.0
|
||||
|
||||
weekly_total = historical_revenue + today_revenue
|
||||
|
||||
print(f" [数据库] 当周收益计算:")
|
||||
print(f" 本周一至昨天 ({monday_str} ~ {yesterday_str}): ¥{historical_revenue:.2f} (基于{day_count}天)")
|
||||
print(f" 今日收益 ({stat_date}): ¥{today_revenue:.2f}")
|
||||
print(f" 当周总收益: ¥{weekly_total:.2f}")
|
||||
|
||||
return weekly_total
|
||||
|
||||
except Exception as e:
|
||||
print(f" [!] 从数据库计算当周收益失败: {e}")
|
||||
return 0.0
|
||||
return today_revenue # 出错时返回当日收益
|
||||
|
||||
def calculate_last_week_revenue_from_db(self, author_id: int, stat_date: str) -> float:
|
||||
"""从ai_statistics_days表汇总计算上周收益(上周一至上周日)
|
||||
@@ -407,6 +476,77 @@ class DataExporter:
|
||||
print(f" [!] 从数据库计算上周收益失败: {e}")
|
||||
return 0.0
|
||||
|
||||
def calculate_monthly_revenue_from_db(self, author_id: int, stat_date: str, today_revenue: float = 0.0) -> float:
|
||||
"""从ai_statistics_days表汇总计算当月收益(当月1日至当前日期)
|
||||
|
||||
计算逻辑:
|
||||
1. 从数据库查询当月1日到stat_date前一天的day_revenue总和
|
||||
2. 加上today_revenue(当日收益,从API获取)
|
||||
3. 得到当月累计收益
|
||||
|
||||
Args:
|
||||
author_id: 作者ID
|
||||
stat_date: 统计日期 (YYYY-MM-DD)
|
||||
today_revenue: 当日收益(从API获取),默认0.0
|
||||
|
||||
Returns:
|
||||
当月收益总额
|
||||
"""
|
||||
if not self.db_manager or author_id == 0:
|
||||
print(f" [数据库] 未连接或author_id无效,无法计算当月收益")
|
||||
return today_revenue # 如果数据库不可用,返回当日收益
|
||||
|
||||
try:
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# 解析日期
|
||||
target_date = datetime.strptime(stat_date, '%Y-%m-%d')
|
||||
|
||||
# 当月第一天
|
||||
month_first = target_date.replace(day=1)
|
||||
# stat_date的前一天(因为当日数据可能还未写入数据库)
|
||||
yesterday = target_date - timedelta(days=1)
|
||||
|
||||
month_first_str = month_first.strftime('%Y-%m-%d')
|
||||
yesterday_str = yesterday.strftime('%Y-%m-%d')
|
||||
|
||||
# 如果stat_date就是当月第一天,直接返回当日收益
|
||||
if target_date.day == 1:
|
||||
print(f" [数据库] 当月第一天,当月收益 = 当日收益: ¥{today_revenue:.2f}")
|
||||
return today_revenue
|
||||
|
||||
# 查询当月1日到stat_date前一天的收益总和
|
||||
sql = """
|
||||
SELECT SUM(day_revenue) as monthly_total
|
||||
FROM ai_statistics_days
|
||||
WHERE author_id = %s
|
||||
AND stat_date >= %s
|
||||
AND stat_date <= %s
|
||||
AND channel = 1
|
||||
"""
|
||||
|
||||
result = self.db_manager.execute_query(
|
||||
sql,
|
||||
(author_id, month_first_str, yesterday_str),
|
||||
fetch_one=True,
|
||||
dict_cursor=True
|
||||
)
|
||||
|
||||
if result and result.get('monthly_total') is not None:
|
||||
db_total = float(result['monthly_total'] or 0)
|
||||
# 加上当日收益
|
||||
monthly_total = db_total + today_revenue
|
||||
print(f" [数据库] 当月收益 ({month_first_str} 至 {stat_date}): 数据库¥{db_total:.2f} + 当日¥{today_revenue:.2f} = ¥{monthly_total:.2f}")
|
||||
return monthly_total
|
||||
else:
|
||||
# 没有历史数据,返回当日收益
|
||||
print(f" [数据库] 未找到当月历史数据 ({month_first_str} 至 {yesterday_str}),当月收益 = 当日收益: ¥{today_revenue:.2f}")
|
||||
return today_revenue
|
||||
|
||||
except Exception as e:
|
||||
print(f" [!] 从数据库计算当月收益失败: {e}")
|
||||
return today_revenue
|
||||
|
||||
def calculate_last_month_revenue_from_db(self, author_id: int, stat_date: str) -> float:
|
||||
"""从ai_statistics_days表汇总计算上月收益
|
||||
|
||||
@@ -510,14 +650,20 @@ class DataExporter:
|
||||
metrics['submission_count'] = int(total_info.get('publish_count', 0) or 0)
|
||||
metrics['read_count'] = int(total_info.get('view_count', 0) or 0)
|
||||
metrics['comment_count'] = int(total_info.get('comment_count', 0) or 0)
|
||||
metrics['comment_rate'] = float(total_info.get('comment_rate', 0) or 0)
|
||||
# 所有rate字段API返回的都是百分制(如0.30表示0.30%),需要除以100转换为小数
|
||||
comment_rate_raw = float(total_info.get('comment_rate', 0) or 0)
|
||||
metrics['comment_rate'] = comment_rate_raw / 100 if comment_rate_raw > 0 else 0.0
|
||||
metrics['like_count'] = int(total_info.get('likes_count', 0) or 0)
|
||||
metrics['like_rate'] = float(total_info.get('likes_rate', 0) or 0)
|
||||
like_rate_raw = float(total_info.get('likes_rate', 0) or 0)
|
||||
metrics['like_rate'] = like_rate_raw / 100 if like_rate_raw > 0 else 0.0
|
||||
metrics['favorite_count'] = int(total_info.get('collect_count', 0) or 0)
|
||||
metrics['favorite_rate'] = float(total_info.get('collect_rate', 0) or 0)
|
||||
favorite_rate_raw = float(total_info.get('collect_rate', 0) or 0)
|
||||
metrics['favorite_rate'] = favorite_rate_raw / 100 if favorite_rate_raw > 0 else 0.0
|
||||
metrics['share_count'] = int(total_info.get('share_count', 0) or 0)
|
||||
metrics['share_rate'] = float(total_info.get('share_rate', 0) or 0)
|
||||
metrics['slide_ratio'] = float(total_info.get('pic_slide_rate', 0) or 0)
|
||||
share_rate_raw = float(total_info.get('share_rate', 0) or 0)
|
||||
metrics['share_rate'] = share_rate_raw / 100 if share_rate_raw > 0 else 0.0
|
||||
slide_ratio_raw = float(total_info.get('pic_slide_rate', 0) or 0)
|
||||
metrics['slide_ratio'] = slide_ratio_raw / 100 if slide_ratio_raw > 0 else 0.0
|
||||
metrics['baidu_search_volume'] = int(total_info.get('disp_pv', 0) or 0) # 修正:使用disp_pv
|
||||
except Exception as e:
|
||||
print(f" [!] 提取汇总指标失败: {e}")
|
||||
@@ -529,7 +675,7 @@ class DataExporter:
|
||||
|
||||
注意:
|
||||
- weekly_revenue: 不再从API获取,在export_ai_statistics_days中从数据库计算
|
||||
- monthly_revenue: 使用currentMonth(当前自然月收益)
|
||||
- monthly_revenue: 不再从API获取,在export_ai_statistics_days中从数据库计算
|
||||
- day_revenue: 从yesterday提取昨日收益(当日收益)
|
||||
- revenue_wow_growth_rate: 周环比,从数据库计算(本周 vs 上周)
|
||||
- revenue_mom_growth_rate: 月环比,从数据库计算(当月 vs 上月)
|
||||
@@ -564,10 +710,8 @@ class DataExporter:
|
||||
# 这里保持为0,由export_ai_statistics_days方法计算
|
||||
print(f" 环比增长率: 将从数据库计算")
|
||||
|
||||
# 当前自然月收入(currentMonth)
|
||||
current_month = income_data.get('currentMonth', {})
|
||||
if current_month:
|
||||
metrics['monthly_revenue'] = float(current_month.get('income', 0) or 0)
|
||||
# monthly_revenue 不再从API获取,在导出时从数据库的day_revenue汇总计算
|
||||
print(f" 当月收益: 将从数据库计算")
|
||||
|
||||
except Exception as e:
|
||||
print(f" [!] 提取收入指标失败: {e}")
|
||||
@@ -650,6 +794,10 @@ class DataExporter:
|
||||
print(f"[OK] ai_statistics 表数据已导出到: {self.output_ai_statistics}")
|
||||
print(f" 共 {len(csv_rows)} 条记录")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# 备份CSV文件
|
||||
self._backup_csv_file(self.output_ai_statistics)
|
||||
|
||||
return True
|
||||
else:
|
||||
print("\n[!] 没有数据可导出")
|
||||
@@ -729,11 +877,12 @@ class DataExporter:
|
||||
'total_like_count': int(latest_day_data.get('likes_count', 0) or 0),
|
||||
'total_favorite_count': int(latest_day_data.get('collect_count', 0) or 0),
|
||||
'total_share_count': int(latest_day_data.get('share_count', 0) or 0),
|
||||
'avg_comment_rate': f"{float(latest_day_data.get('comment_rate', 0) or 0):.4f}",
|
||||
'avg_like_rate': f"{float(latest_day_data.get('likes_rate', 0) or 0):.4f}",
|
||||
'avg_favorite_rate': f"{float(latest_day_data.get('collect_rate', 0) or 0):.4f}",
|
||||
'avg_share_rate': f"{float(latest_day_data.get('share_rate', 0) or 0):.4f}",
|
||||
'avg_slide_ratio': f"{float(latest_day_data.get('pic_slide_rate', 0) or 0):.4f}",
|
||||
# 所有rate字段API返回的都是百分制,需要除以100转换为小数
|
||||
'avg_comment_rate': f"{(float(latest_day_data.get('comment_rate', 0) or 0) / 100):.4f}",
|
||||
'avg_like_rate': f"{(float(latest_day_data.get('likes_rate', 0) or 0) / 100):.4f}",
|
||||
'avg_favorite_rate': f"{(float(latest_day_data.get('collect_rate', 0) or 0) / 100):.4f}",
|
||||
'avg_share_rate': f"{(float(latest_day_data.get('share_rate', 0) or 0) / 100):.4f}",
|
||||
'avg_slide_ratio': f"{(float(latest_day_data.get('pic_slide_rate', 0) or 0) / 100):.4f}",
|
||||
'total_baidu_search_volume': int(latest_day_data.get('disp_pv', 0) or 0),
|
||||
}
|
||||
|
||||
@@ -763,6 +912,10 @@ class DataExporter:
|
||||
print(f"[OK] ai_statistics_day 表数据已导出到: {self.output_ai_statistics_day}")
|
||||
print(f" 共 {len(csv_rows)} 条记录")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# 备份CSV文件
|
||||
self._backup_csv_file(self.output_ai_statistics_day)
|
||||
|
||||
return True
|
||||
else:
|
||||
print("\n[!] 没有数据可导出")
|
||||
@@ -779,7 +932,7 @@ class DataExporter:
|
||||
注意:
|
||||
- daily_published_count: 优先从ai_articles表查询,否则使用API数据
|
||||
- cumulative_published_count: 优先从ai_articles表查询(从起始日到stat_date的累计发文量)
|
||||
- monthly_revenue: stat_date所在自然月的总收益(使用近30天收益作为近似值)
|
||||
- monthly_revenue: 从ai_statistics_days表汇总计算(当月1日至stat_date的day_revenue总和)
|
||||
- weekly_revenue: 优先从ai_statistics_days表汇总计算,否则使用API数据
|
||||
|
||||
Args:
|
||||
@@ -851,38 +1004,49 @@ class DataExporter:
|
||||
daily_published = int(latest_day_data.get('publish_count', 0) or 0)
|
||||
print(f" [使用API] 文章数据: 单日={daily_published}, 累计={cumulative_count}")
|
||||
|
||||
# 计算当周收益:数据库中本周已有的收益 + 当日新抓取的收益
|
||||
# 计算当周收益:从数据库汇总本周一至周日的day_revenue总和
|
||||
if use_db_weekly_revenue and author_id > 0:
|
||||
# 从数据库查询本周已有的收益(不包括今天,因为今天的数据还没导入)
|
||||
weekly_revenue_db = self.calculate_weekly_revenue_from_db(author_id, formatted_date)
|
||||
# 当周收益 = 数据库中的历史收益 + 当日新抓取的收益
|
||||
day_revenue = income_metrics['day_revenue']
|
||||
weekly_revenue_total = weekly_revenue_db + day_revenue
|
||||
# 从数据库查询本周的收益总和(传入当日收益)
|
||||
weekly_revenue_total = self.calculate_weekly_revenue_from_db(
|
||||
author_id,
|
||||
formatted_date,
|
||||
today_revenue=income_metrics['day_revenue'] # 传入当日收益
|
||||
)
|
||||
income_metrics['weekly_revenue'] = weekly_revenue_total
|
||||
print(f" [数据库] 本周已有收益: ¥{weekly_revenue_db:.2f}")
|
||||
print(f" [API] 当日新增收益: ¥{day_revenue:.2f}")
|
||||
print(f" [计算] 当周总收益: ¥{weekly_revenue_total:.2f}")
|
||||
print(f" [数据库] 当周收益: ¥{weekly_revenue_total:.2f}")
|
||||
|
||||
# 计算当月收益:从数据库汇总当月1日至stat_date的day_revenue总和
|
||||
monthly_revenue_total = self.calculate_monthly_revenue_from_db(
|
||||
author_id,
|
||||
formatted_date,
|
||||
today_revenue=income_metrics['day_revenue'] # 传入当日收益
|
||||
)
|
||||
income_metrics['monthly_revenue'] = monthly_revenue_total
|
||||
|
||||
# 计算周环比:本周 vs 上周
|
||||
# 公式:周环比 = (本周收益 - 上周收益) / 上周收益
|
||||
last_week_revenue = self.calculate_last_week_revenue_from_db(author_id, formatted_date)
|
||||
if last_week_revenue > 0:
|
||||
income_metrics['revenue_wow_growth_rate'] = (weekly_revenue_total - last_week_revenue) / last_week_revenue
|
||||
print(f" [计算] 周环比: {income_metrics['revenue_wow_growth_rate']:.2%} (本周¥{weekly_revenue_total:.2f} vs 上周¥{last_week_revenue:.2f})")
|
||||
else:
|
||||
print(f" [计算] 周环比: 无法计算(上周没有数据)")
|
||||
# 分母为0时设为1,避免除零错误
|
||||
denominator = last_week_revenue if last_week_revenue > 0 else 1
|
||||
wow_rate = (weekly_revenue_total - last_week_revenue) / denominator
|
||||
income_metrics['revenue_wow_growth_rate'] = wow_rate
|
||||
print(f" [计算] 周环比: {wow_rate:.4f} (本周¥{weekly_revenue_total:.2f} vs 上周¥{last_week_revenue:.2f})")
|
||||
|
||||
# 计算月环比:当月 vs 上月
|
||||
# 公式:月环比 = (当月收益 - 上月收益) / 上月收益
|
||||
last_month_revenue = self.calculate_last_month_revenue_from_db(author_id, formatted_date)
|
||||
monthly_revenue = income_metrics['monthly_revenue']
|
||||
if last_month_revenue > 0:
|
||||
income_metrics['revenue_mom_growth_rate'] = (monthly_revenue - last_month_revenue) / last_month_revenue
|
||||
print(f" [计算] 月环比: {income_metrics['revenue_mom_growth_rate']:.2%} (当月¥{monthly_revenue:.2f} vs 上月¥{last_month_revenue:.2f})")
|
||||
else:
|
||||
print(f" [计算] 月环比: 无法计算(上月没有数据)")
|
||||
# 分母为0时设为1,避免除零错误
|
||||
denominator = last_month_revenue if last_month_revenue > 0 else 1
|
||||
mom_rate = (monthly_revenue - last_month_revenue) / denominator
|
||||
income_metrics['revenue_mom_growth_rate'] = mom_rate
|
||||
print(f" [计算] 月环比: {mom_rate:.4f} (当月¥{monthly_revenue:.2f} vs 上月¥{last_month_revenue:.2f})")
|
||||
else:
|
||||
# 如果不使用数据库,weekly_revenue = 当日收益
|
||||
income_metrics['weekly_revenue'] = income_metrics['day_revenue']
|
||||
income_metrics['monthly_revenue'] = income_metrics['day_revenue']
|
||||
print(f" [跳过数据库] 当周收益 = 当日收益: ¥{income_metrics['day_revenue']:.2f}")
|
||||
print(f" [跳过数据库] 当月收益 = 当日收益: ¥{income_metrics['day_revenue']:.2f}")
|
||||
|
||||
row = {
|
||||
'author_id': author_id,
|
||||
@@ -940,6 +1104,10 @@ class DataExporter:
|
||||
print(f"[OK] ai_statistics_days 表数据已导出到: {self.output_ai_statistics_days}")
|
||||
print(f" 共 {len(csv_rows)} 条记录")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# 备份CSV文件
|
||||
self._backup_csv_file(self.output_ai_statistics_days)
|
||||
|
||||
return True
|
||||
else:
|
||||
print("\n[!] 没有数据可导出")
|
||||
@@ -1439,9 +1607,6 @@ class DataExporter:
|
||||
|
||||
# 滑图占比需要限制在decimal(5,4)范围内(0-9.9999)
|
||||
slide_ratio_value = float(metrics['slide_ratio'])
|
||||
# 如果值大于10,说明是百分比格式,需要除以100
|
||||
if slide_ratio_value > 10:
|
||||
slide_ratio_value = slide_ratio_value / 100
|
||||
# 确保不超过9.9999
|
||||
slide_ratio_value = min(slide_ratio_value, 9.9999)
|
||||
|
||||
@@ -1547,14 +1712,28 @@ class DataExporter:
|
||||
else:
|
||||
print(f" [使用API] 投稿量: {total_submission_count}")
|
||||
|
||||
# 滑图占比需要限制在decimal(5,4)范围内(0-9.9999)
|
||||
slide_ratio_value = float(latest_day_data.get('pic_slide_rate', 0) or 0)
|
||||
# 如果值大于10,说明是百分比格式,需要除以100
|
||||
if slide_ratio_value > 10:
|
||||
slide_ratio_value = slide_ratio_value / 100
|
||||
# 确保不超过9.9999
|
||||
# 所有rate字段需要限制在decimal(5,4)范围内(0-9.9999)
|
||||
# API返回的都是百分制,需要除以100转换为小数
|
||||
slide_ratio_raw = float(latest_day_data.get('pic_slide_rate', 0) or 0)
|
||||
slide_ratio_value = (slide_ratio_raw / 100 if slide_ratio_raw > 0 else 0.0)
|
||||
slide_ratio_value = min(slide_ratio_value, 9.9999)
|
||||
|
||||
comment_rate_raw = float(latest_day_data.get('comment_rate', 0) or 0)
|
||||
comment_rate_value = (comment_rate_raw / 100 if comment_rate_raw > 0 else 0.0)
|
||||
comment_rate_value = min(comment_rate_value, 9.9999)
|
||||
|
||||
like_rate_raw = float(latest_day_data.get('likes_rate', 0) or 0)
|
||||
like_rate_value = (like_rate_raw / 100 if like_rate_raw > 0 else 0.0)
|
||||
like_rate_value = min(like_rate_value, 9.9999)
|
||||
|
||||
favorite_rate_raw = float(latest_day_data.get('collect_rate', 0) or 0)
|
||||
favorite_rate_value = (favorite_rate_raw / 100 if favorite_rate_raw > 0 else 0.0)
|
||||
favorite_rate_value = min(favorite_rate_value, 9.9999)
|
||||
|
||||
share_rate_raw = float(latest_day_data.get('share_rate', 0) or 0)
|
||||
share_rate_value = (share_rate_raw / 100 if share_rate_raw > 0 else 0.0)
|
||||
share_rate_value = min(share_rate_value, 9.9999)
|
||||
|
||||
record = {
|
||||
'author_id': author_id,
|
||||
'author_name': account_id,
|
||||
@@ -1566,10 +1745,10 @@ class DataExporter:
|
||||
'total_like_count': int(latest_day_data.get('likes_count', 0) or 0),
|
||||
'total_favorite_count': int(latest_day_data.get('collect_count', 0) or 0),
|
||||
'total_share_count': int(latest_day_data.get('share_count', 0) or 0),
|
||||
'avg_comment_rate': float(latest_day_data.get('comment_rate', 0) or 0),
|
||||
'avg_like_rate': float(latest_day_data.get('likes_rate', 0) or 0),
|
||||
'avg_favorite_rate': float(latest_day_data.get('collect_rate', 0) or 0),
|
||||
'avg_share_rate': float(latest_day_data.get('share_rate', 0) or 0),
|
||||
'avg_comment_rate': comment_rate_value,
|
||||
'avg_like_rate': like_rate_value,
|
||||
'avg_favorite_rate': favorite_rate_value,
|
||||
'avg_share_rate': share_rate_value,
|
||||
'avg_slide_ratio': slide_ratio_value,
|
||||
'total_baidu_search_volume': int(latest_day_data.get('disp_pv', 0) or 0),
|
||||
}
|
||||
@@ -1698,18 +1877,38 @@ class DataExporter:
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
# 解析命令行参数
|
||||
parser = argparse.ArgumentParser(
|
||||
description='百家号数据导出工具 - 从 bjh_integrated_data.json 导出',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--mode',
|
||||
type=str,
|
||||
choices=['csv', 'database'],
|
||||
default='csv',
|
||||
help='导出模式:csv=导出CSV文件, database=直接插入数据库 (默认: csv)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--no-confirm',
|
||||
action='store_true',
|
||||
help='跳过确认提示,直接执行(用于批量脚本)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("百家号数据导出工具 - 从 bjh_integrated_data.json 导出")
|
||||
print("="*70)
|
||||
|
||||
# 选择导出模式
|
||||
print("\n请选择导出模式:")
|
||||
print(" 1. 导出CSV文件")
|
||||
print(" 2. 直接插入数据库")
|
||||
use_database = (args.mode == 'database')
|
||||
|
||||
mode = input("\n输入选项 (1/2, 默认1): ").strip() or '1'
|
||||
|
||||
if mode == '2':
|
||||
if use_database:
|
||||
# 数据库模式
|
||||
exporter = DataExporter(use_database=True)
|
||||
|
||||
@@ -1728,13 +1927,15 @@ def main():
|
||||
print(" 3. ai_statistics_days.csv - 核心指标统计表(含发文量、收益、环比)")
|
||||
print("="*70)
|
||||
|
||||
confirm = input("\n是否继续? (y/n): ").strip().lower()
|
||||
# 确认执行(除非使用--no-confirm参数)
|
||||
if not args.no_confirm:
|
||||
confirm = input("\n是否继续? (y/n): ").strip().lower()
|
||||
|
||||
if confirm != 'y':
|
||||
print("\n已取消")
|
||||
return
|
||||
|
||||
if confirm == 'y':
|
||||
exporter.export_all_tables()
|
||||
else:
|
||||
print("\n已取消")
|
||||
return
|
||||
exporter.export_all_tables()
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("完成")
|
||||
|
||||
Reference in New Issue
Block a user