feat: 完善代理重试机制,添加数据验证告警,新增README文档
This commit is contained in:
296
bjh_analytics.py
296
bjh_analytics.py
@@ -29,12 +29,12 @@ from database_config import DatabaseManager, DB_CONFIG
|
||||
|
||||
# 代理配置 - 大麦代理IP
|
||||
PROXY_API_URL = (
|
||||
'https://api2.damaiip.com/index.php?s=/front/user/getIPlist&xsn=e054861d08471263d970bde4f4905181&osn=TC_NO176655872088456223&tiqu=1'
|
||||
'https://api2.damaiip.com/index.php?s=/front/user/getIPlist&xsn=2912cb2b22d3b7ae724f045012790479&osn=TC_NO176707424165606223&tiqu=1'
|
||||
)
|
||||
|
||||
# 大麦代理账号密码认证
|
||||
PROXY_USERNAME = '694b8c3172af7'
|
||||
PROXY_PASSWORD = 'q8yA8x1dwCpdyIK'
|
||||
PROXY_USERNAME = '69538fdef04e1'
|
||||
PROXY_PASSWORD = '63v0kQBr2yJXnjf'
|
||||
|
||||
# 备用固定代理IP池(格式:'IP:端口', '用户名', '密码')
|
||||
BACKUP_PROXY_POOL = [
|
||||
@@ -62,7 +62,8 @@ class BaijiahaoAnalytics:
|
||||
|
||||
# 代理配置
|
||||
self.use_proxy = use_proxy
|
||||
self.current_proxy = None
|
||||
self.current_proxy = None # 当前IP,使用完后/失败后才重新获取
|
||||
self.proxy_fail_count = 0 # 当前代理失败次数
|
||||
|
||||
# 数据库配置
|
||||
self.load_from_db = load_from_db
|
||||
@@ -76,6 +77,8 @@ class BaijiahaoAnalytics:
|
||||
if self.use_proxy:
|
||||
self.logger.info("已启用代理模式")
|
||||
print("[配置] 已启用代理模式")
|
||||
# 初始化时获取第一个代理
|
||||
self.fetch_proxy(force_new=True)
|
||||
|
||||
if self.load_from_db:
|
||||
self.logger.info("已启用数据库加载模式")
|
||||
@@ -99,6 +102,12 @@ class BaijiahaoAnalytics:
|
||||
self.analytics_output = os.path.join(self.script_dir, "bjh_analytics_data.json")
|
||||
self.income_output = os.path.join(self.script_dir, "bjh_income_data_v2.json")
|
||||
|
||||
# 创建备份文件夹
|
||||
self.backup_dir = os.path.join(self.script_dir, "backup")
|
||||
if not os.path.exists(self.backup_dir):
|
||||
os.makedirs(self.backup_dir)
|
||||
print(f"[OK] 创建备份文件夹: {self.backup_dir}")
|
||||
|
||||
def cookie_string_to_dict(self, cookie_string: str) -> Dict:
|
||||
"""将Cookie字符串转换为字典格式
|
||||
|
||||
@@ -230,15 +239,23 @@ class BaijiahaoAnalytics:
|
||||
print(f"[OK] 已设置账号 {account_id} 的Cookie ({len(cookies)} 个字段)")
|
||||
return True
|
||||
|
||||
def fetch_proxy(self) -> Optional[Dict]:
|
||||
def fetch_proxy(self, force_new: bool = False) -> Optional[Dict]:
|
||||
"""从代理服务获取一个可用代理,失败时使用备用固定代理
|
||||
|
||||
Args:
|
||||
force_new: 是否强制获取新代理,默认False(优先使用当前IP)
|
||||
|
||||
Returns:
|
||||
代理配置字典,格式: {'http': 'http://...', 'https': 'http://...'}
|
||||
"""
|
||||
if not self.use_proxy:
|
||||
return None
|
||||
|
||||
# 如果已有可用代理且不强制获取新代理,直接返回
|
||||
if self.current_proxy and not force_new:
|
||||
return self.current_proxy
|
||||
|
||||
# 获取新代理
|
||||
try:
|
||||
# 使用大麦代理API获取IP
|
||||
resp = requests.get(PROXY_API_URL, timeout=10)
|
||||
@@ -247,21 +264,30 @@ class BaijiahaoAnalytics:
|
||||
# 首先尝试解析为纯文本格式(最常见)
|
||||
text = resp.text.strip()
|
||||
|
||||
# 检测是否返回错误信息
|
||||
if text.upper().startswith('ERROR'):
|
||||
raise Exception(f"代理API返回错误: {text}")
|
||||
|
||||
# 尝试直接解析为IP:PORT格式
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if ':' in line and not line.startswith('{') and not line.startswith('['):
|
||||
# 找到第一个IP:PORT格式
|
||||
ip_port = line.split()[0] if ' ' in line else line # 处理可能带有其他信息的情况
|
||||
ip_port = line.split()[0] if ' ' in line else line
|
||||
|
||||
if ip_port.count(':') == 1: # 确保是IP:PORT格式
|
||||
nowtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
||||
self.logger.info(f'提取大麦代理IP(文本): {ip_port} at {nowtime}')
|
||||
print(f'[代理] 提取大麦IP: {ip_port}')
|
||||
|
||||
# 大麦代理使用账号密码认证
|
||||
host, port = ip_port.split(':', 1)
|
||||
proxy_url = f'http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{host}:{port}'
|
||||
if PROXY_USERNAME and PROXY_PASSWORD:
|
||||
proxy_url = f'http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{host}:{port}'
|
||||
else:
|
||||
proxy_url = f'http://{host}:{port}'
|
||||
|
||||
self.current_proxy = {
|
||||
'http': proxy_url,
|
||||
'https': proxy_url,
|
||||
@@ -282,8 +308,12 @@ class BaijiahaoAnalytics:
|
||||
self.logger.info(f'提取大麦代理IP(JSON): {ip_port} at {nowtime}')
|
||||
print(f'[代理] 提取大麦IP: {ip_port}')
|
||||
|
||||
# 构建带账密的代理URL: http://username:password@host:port
|
||||
proxy_url = f'http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{ip_info["ip"]}:{ip_info["port"]}'
|
||||
# 大麦代理使用账号密码认证
|
||||
if PROXY_USERNAME and PROXY_PASSWORD:
|
||||
proxy_url = f'http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{ip_info["ip"]}:{ip_info["port"]}'
|
||||
else:
|
||||
proxy_url = f'http://{ip_info["ip"]}:{ip_info["port"]}'
|
||||
|
||||
self.current_proxy = {
|
||||
'http': proxy_url,
|
||||
'https': proxy_url,
|
||||
@@ -316,6 +346,34 @@ class BaijiahaoAnalytics:
|
||||
}
|
||||
return self.current_proxy
|
||||
|
||||
def mark_proxy_failed(self):
|
||||
"""标记当前代理失败,失败超过3次后重新获取代理
|
||||
|
||||
Returns:
|
||||
bool: 是否需要重新获取代理
|
||||
"""
|
||||
if not self.use_proxy or not self.current_proxy:
|
||||
return False
|
||||
|
||||
self.proxy_fail_count += 1
|
||||
self.logger.warning(f"当前代理失败次数: {self.proxy_fail_count}")
|
||||
|
||||
# 失败超过3次,重新获取代理
|
||||
if self.proxy_fail_count >= 3:
|
||||
self.logger.info("当前代理失败次数过多,重新获取新代理")
|
||||
print(f"[代理] 失败{self.proxy_fail_count}次,重新获取新代理")
|
||||
self.current_proxy = None
|
||||
self.proxy_fail_count = 0
|
||||
# 强制获取新代理
|
||||
self.fetch_proxy(force_new=True)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def reset_proxy_fail_count(self):
|
||||
"""重置代理失败计数(请求成功后调用)"""
|
||||
self.proxy_fail_count = 0
|
||||
|
||||
def get_common_headers(self) -> Dict:
|
||||
"""获取通用请求头"""
|
||||
return {
|
||||
@@ -425,6 +483,8 @@ class BaijiahaoAnalytics:
|
||||
|
||||
successful_data = []
|
||||
retry_count = 0
|
||||
proxy_change_count = 0 # 代理更换次数计数器
|
||||
max_proxy_changes = 3 # 最多更换3次代理(即最多使用4个不同代理)
|
||||
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
@@ -438,6 +498,21 @@ class BaijiahaoAnalytics:
|
||||
# 获取代理(如果启用)
|
||||
proxies = self.fetch_proxy() if self.use_proxy else None
|
||||
|
||||
# 调试信息:显示代理使用情况
|
||||
if self.use_proxy:
|
||||
if proxies:
|
||||
proxy_url = proxies.get('http', '')
|
||||
if '@' in proxy_url:
|
||||
# 提取IP部分(隐藏账号密码)
|
||||
proxy_ip = proxy_url.split('@')[1]
|
||||
else:
|
||||
proxy_ip = proxy_url.replace('http://', '').replace('https://', '')
|
||||
self.logger.info(f"发文统计API 使用代理: {proxy_ip}")
|
||||
print(f" [代理] 使用IP: {proxy_ip}")
|
||||
else:
|
||||
self.logger.warning(f"发文统计API 代理未生效!use_proxy={self.use_proxy}")
|
||||
print(f" [!] 警告:代理未生效!use_proxy={self.use_proxy}")
|
||||
|
||||
response = self.session.get(
|
||||
api_url,
|
||||
headers=headers,
|
||||
@@ -462,6 +537,9 @@ class BaijiahaoAnalytics:
|
||||
self.logger.info("发文统计API调用成功")
|
||||
print(f" [✓] API调用成功")
|
||||
|
||||
# 请求成功,重置代理失败计数
|
||||
self.reset_proxy_fail_count()
|
||||
|
||||
# 提取发文统计数据
|
||||
total_info = data.get('data', {}).get('total_info', {})
|
||||
|
||||
@@ -490,6 +568,34 @@ class BaijiahaoAnalytics:
|
||||
else:
|
||||
self.logger.error(f"API返回错误: errno={errno}, errmsg={errmsg}")
|
||||
print(f" [X] API返回错误: errno={errno}, errmsg={errmsg}")
|
||||
|
||||
# 特别处理 errno=10000015 (异常请求),这通常是代理未生效
|
||||
if errno == 10000015 and self.use_proxy:
|
||||
self.logger.warning("检测到 errno=10000015(异常请求),代理未生效,立即强制更换新代理")
|
||||
print(f" [!] 检测到代理未生效,立即更换新代理")
|
||||
|
||||
# 检查是否超过代理更换上限
|
||||
if proxy_change_count >= max_proxy_changes:
|
||||
print(f" [X] 已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
break
|
||||
|
||||
# 立即强制获取新代理(不等待3次)
|
||||
self.current_proxy = None
|
||||
self.proxy_fail_count = 0
|
||||
new_proxy = self.fetch_proxy(force_new=True)
|
||||
|
||||
if new_proxy:
|
||||
# 如果还没达到重试上限,尝试重试
|
||||
if retry_count < max_retries:
|
||||
proxy_change_count += 1
|
||||
self.logger.info(f"已更换新代理({proxy_change_count}/{max_proxy_changes}),将重试,当前第{retry_count+1}次")
|
||||
print(f" [!] 已更换新代理({proxy_change_count}/{max_proxy_changes}),将重试...")
|
||||
retry_count += 1
|
||||
continue
|
||||
else:
|
||||
self.logger.error("无法获取新代理,放弃重试")
|
||||
print(f" [X] 无法获取新代理")
|
||||
|
||||
break # API错误,不重试
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
@@ -521,6 +627,58 @@ class BaijiahaoAnalytics:
|
||||
if retry_count < max_retries:
|
||||
self.logger.warning(f"发文统计API代理连接错误: {error_type},将重试")
|
||||
print(f" [!] 代理连接错误: {error_type}")
|
||||
|
||||
# 标记代理失败
|
||||
self.mark_proxy_failed()
|
||||
|
||||
# 超时或连接错误立即更换代理(不等待3次失败)
|
||||
if self.use_proxy and ('Timeout' in error_type or 'Connection' in error_type or 'ProxyError' in error_type):
|
||||
# 检查是否超过代理更换上限
|
||||
if proxy_change_count >= max_proxy_changes:
|
||||
self.logger.error(f"已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
print(f" [X] 已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
break
|
||||
|
||||
self.logger.warning(f"检测到{error_type}错误,立即更换新代理")
|
||||
print(f" [!] 检测到{error_type},立即更换新代理")
|
||||
self.current_proxy = None
|
||||
self.proxy_fail_count = 0
|
||||
new_proxy = self.fetch_proxy(force_new=True)
|
||||
|
||||
if new_proxy:
|
||||
proxy_change_count += 1
|
||||
self.logger.info(f"已更换新代理({proxy_change_count}/{max_proxy_changes}),继续重试")
|
||||
print(f" [✓] 已更换新代理({proxy_change_count}/{max_proxy_changes}),继续重试")
|
||||
# 更换代理后,不增加retry_count,直接continue重试
|
||||
continue
|
||||
else:
|
||||
self.logger.error("无法获取新代理,放弃重试")
|
||||
print(f" [X] 无法获取新代理")
|
||||
break
|
||||
# 其他代理错误,等待3次失败后更换
|
||||
elif self.proxy_fail_count >= 3 and self.use_proxy:
|
||||
# 检查是否超过代理更换上限
|
||||
if proxy_change_count >= max_proxy_changes:
|
||||
self.logger.error(f"已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
print(f" [X] 已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
break
|
||||
|
||||
print(f" [!] 代理已失败{self.proxy_fail_count}次,强制更换新代理")
|
||||
self.current_proxy = None
|
||||
self.proxy_fail_count = 0
|
||||
new_proxy = self.fetch_proxy(force_new=True)
|
||||
if new_proxy:
|
||||
proxy_change_count += 1
|
||||
self.logger.info(f"已更换新代理({proxy_change_count}/{max_proxy_changes}),继续重试")
|
||||
print(f" [✓] 已更换新代理({proxy_change_count}/{max_proxy_changes}),继续重试")
|
||||
# 更换代理后,不增加retry_count,直接continue重试
|
||||
continue
|
||||
else:
|
||||
self.logger.error("无法获取新代理")
|
||||
print(f" [X] 无法获取新代理")
|
||||
break
|
||||
|
||||
# 其他情况才增加retry_count
|
||||
retry_count += 1
|
||||
continue
|
||||
else:
|
||||
@@ -701,6 +859,8 @@ class BaijiahaoAnalytics:
|
||||
print(f" API: {api_url}")
|
||||
|
||||
retry_count = 0
|
||||
proxy_change_count = 0 # 代理更换次数计数器
|
||||
max_proxy_changes = 3 # 最多更换3次代理(即最多使用4个不同代理)
|
||||
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
@@ -714,6 +874,21 @@ class BaijiahaoAnalytics:
|
||||
# 获取代理(如果启用)
|
||||
proxies = self.fetch_proxy() if self.use_proxy else None
|
||||
|
||||
# 调试信息:显示代理使用情况
|
||||
if self.use_proxy:
|
||||
if proxies:
|
||||
proxy_url = proxies.get('http', '')
|
||||
if '@' in proxy_url:
|
||||
# 提取IP部分(隐藏账号密码)
|
||||
proxy_ip = proxy_url.split('@')[1]
|
||||
else:
|
||||
proxy_ip = proxy_url.replace('http://', '').replace('https://', '')
|
||||
self.logger.info(f"收入API 使用代理: {proxy_ip}")
|
||||
print(f" [代理] 使用IP: {proxy_ip}")
|
||||
else:
|
||||
self.logger.warning(f"收入API 代理未生效!use_proxy={self.use_proxy}")
|
||||
print(f" [!] 警告:代理未生效!use_proxy={self.use_proxy}")
|
||||
|
||||
response = self.session.get(
|
||||
api_url,
|
||||
headers=headers,
|
||||
@@ -735,6 +910,9 @@ class BaijiahaoAnalytics:
|
||||
self.logger.info("收入数据API调用成功")
|
||||
print(f" [✓] API调用成功")
|
||||
|
||||
# 请求成功,重置代理失败计数
|
||||
self.reset_proxy_fail_count()
|
||||
|
||||
# 显示收入数据摘要
|
||||
income_data = data.get('data', {}).get('income', {})
|
||||
if 'recent7Days' in income_data:
|
||||
@@ -752,6 +930,34 @@ class BaijiahaoAnalytics:
|
||||
else:
|
||||
self.logger.error(f"收入API返回错误: errno={errno}, errmsg={errmsg}")
|
||||
print(f" [X] API返回错误: errno={errno}, errmsg={errmsg}")
|
||||
|
||||
# 特别处理 errno=10000015 (异常请求),这通常是代理未生效
|
||||
if errno == 10000015 and self.use_proxy:
|
||||
self.logger.warning("检测到收入API errno=10000015(异常请求),代理未生效,立即强制更换新代理")
|
||||
print(f" [!] 检测到代理未生效,立即更换新代理")
|
||||
|
||||
# 检查是否超过代理更换上限
|
||||
if proxy_change_count >= max_proxy_changes:
|
||||
print(f" [X] 已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
return None
|
||||
|
||||
# 立即强制获取新代理(不等待3次)
|
||||
self.current_proxy = None
|
||||
self.proxy_fail_count = 0
|
||||
new_proxy = self.fetch_proxy(force_new=True)
|
||||
|
||||
if new_proxy:
|
||||
# 如果还没达到重试上限,尝试重试
|
||||
if retry_count < max_retries:
|
||||
proxy_change_count += 1
|
||||
self.logger.info(f"已更换新代理({proxy_change_count}/{max_proxy_changes}),将重试收入API,当前第{retry_count+1}次")
|
||||
print(f" [!] 已更换新代理({proxy_change_count}/{max_proxy_changes}),将重试...")
|
||||
retry_count += 1
|
||||
continue
|
||||
else:
|
||||
self.logger.error("无法获取新代理,放弃重试")
|
||||
print(f" [X] 无法获取新代理")
|
||||
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.error(f"收入数据JSON解析失败: {e}")
|
||||
@@ -781,6 +987,58 @@ class BaijiahaoAnalytics:
|
||||
if retry_count < max_retries:
|
||||
self.logger.warning(f"收入数据API代理连接错误: {error_type},将重试")
|
||||
print(f" [!] 代理连接错误: {error_type}")
|
||||
|
||||
# 标记代理失败
|
||||
self.mark_proxy_failed()
|
||||
|
||||
# 超时或连接错误立即更换代理(不等待3次失败)
|
||||
if self.use_proxy and ('Timeout' in error_type or 'Connection' in error_type or 'ProxyError' in error_type):
|
||||
# 检查是否超过代理更换上限
|
||||
if proxy_change_count >= max_proxy_changes:
|
||||
self.logger.error(f"已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
print(f" [X] 已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
return None
|
||||
|
||||
self.logger.warning(f"检测到{error_type}错误,立即更换新代理")
|
||||
print(f" [!] 检测到{error_type},立即更换新代理")
|
||||
self.current_proxy = None
|
||||
self.proxy_fail_count = 0
|
||||
new_proxy = self.fetch_proxy(force_new=True)
|
||||
|
||||
if new_proxy:
|
||||
proxy_change_count += 1
|
||||
self.logger.info(f"已更换新代理({proxy_change_count}/{max_proxy_changes}),继续重试")
|
||||
print(f" [✓] 已更换新代理({proxy_change_count}/{max_proxy_changes}),继续重试")
|
||||
# 更换代理后,不增加retry_count,直接continue重试
|
||||
continue
|
||||
else:
|
||||
self.logger.error("无法获取新代理,放弃重试")
|
||||
print(f" [X] 无法获取新代理")
|
||||
return None
|
||||
# 其他代理错误,等待3次失败后更换
|
||||
elif self.proxy_fail_count >= 3 and self.use_proxy:
|
||||
# 检查是否超过代理更换上限
|
||||
if proxy_change_count >= max_proxy_changes:
|
||||
self.logger.error(f"已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
print(f" [X] 已达代理更换上限({max_proxy_changes}次),放弃重试")
|
||||
return None
|
||||
|
||||
print(f" [!] 代理已失败{self.proxy_fail_count}次,强制更换新代理")
|
||||
self.current_proxy = None
|
||||
self.proxy_fail_count = 0
|
||||
new_proxy = self.fetch_proxy(force_new=True)
|
||||
if new_proxy:
|
||||
proxy_change_count += 1
|
||||
self.logger.info(f"已更换新代理({proxy_change_count}/{max_proxy_changes}),继续重试")
|
||||
print(f" [✓] 已更换新代理({proxy_change_count}/{max_proxy_changes}),继续重试")
|
||||
# 更换代理后,不增加retry_count,直接continue重试
|
||||
continue
|
||||
else:
|
||||
self.logger.error("无法获取新代理")
|
||||
print(f" [X] 无法获取新代理")
|
||||
return None
|
||||
|
||||
# 其他情况才增加retry_count
|
||||
retry_count += 1
|
||||
continue
|
||||
else:
|
||||
@@ -866,6 +1124,8 @@ class BaijiahaoAnalytics:
|
||||
errno = data.get('errno', -1)
|
||||
|
||||
if errno == 0:
|
||||
# 请求成功,重置代理失败计数
|
||||
self.reset_proxy_fail_count()
|
||||
return data
|
||||
else:
|
||||
self.logger.error(f"单日收入API返回错误: errno={errno}")
|
||||
@@ -895,6 +1155,10 @@ class BaijiahaoAnalytics:
|
||||
if is_proxy_error:
|
||||
if retry_count < max_retries:
|
||||
self.logger.warning(f"单日收入代理连接错误 ({target_date.strftime('%Y-%m-%d')}): {error_type},将重试")
|
||||
|
||||
# 标记代理失败
|
||||
self.mark_proxy_failed()
|
||||
|
||||
retry_count += 1
|
||||
continue
|
||||
else:
|
||||
@@ -1068,17 +1332,29 @@ class BaijiahaoAnalytics:
|
||||
return results
|
||||
|
||||
def save_results(self, results: List[Dict]):
|
||||
"""保存结果到文件
|
||||
"""保存结果到文件(同时备份带日期的副本)
|
||||
|
||||
Args:
|
||||
results: 数据分析结果列表
|
||||
"""
|
||||
import shutil
|
||||
|
||||
try:
|
||||
# 1. 保存到主文件(不带时间戳)
|
||||
with open(self.output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"[OK] 数据已保存到: {self.output_file}")
|
||||
|
||||
# 2. 创建带日期的备份文件(只保留日期)
|
||||
timestamp = datetime.now().strftime('%Y%m%d')
|
||||
backup_filename = f"bjh_integrated_data_{timestamp}.json"
|
||||
backup_file = os.path.join(self.backup_dir, backup_filename)
|
||||
|
||||
# 复制文件到备份目录
|
||||
shutil.copy2(self.output_file, backup_file)
|
||||
print(f"[OK] 备份已保存到: {backup_file}")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# 显示统计
|
||||
|
||||
Reference in New Issue
Block a user