#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ AI文章自动生成监控脚本 监控数据库中status为topic的记录,自动调用Coze API生成文章并提交 """ import os import sys import time import json import logging import requests import pymysql from datetime import datetime from typing import Dict, List, Optional, Any import traceback import threading from concurrent.futures import ThreadPoolExecutor, as_completed from queue import Queue, Empty import random from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry # 添加项目根目录到Python路径 sys.path.append(os.path.dirname(os.path.abspath(__file__))) from database_config import get_db_manager from log_config import setup_logger # 配置日志记录器,支持按日期切割和控制台输出 logger = setup_logger( name='push_article', log_file='logs/push_article_published.log', error_log_file='logs/push_article_published_error.log', level=logging.INFO, console_output=True ) # 配置常量 #BASE_URL = "http://47.99.184.230:8324" BASE_URL = "http://127.0.0.1:8324" SLEEP_INTERVAL = 5 # 监控间隔(秒) WORKER_COUNT = 10 # 并行处理worker数量,可配置 # 新增:批量发布配置 BATCH_SIZE = 8 # 一次处理的文章数量,可调 BATCH_INTERVAL = 2 # 批次间隔时间(秒),可调 # 网络重试配置 MAX_RETRIES = 3 # 最大重试次数 BACKOFF_FACTOR = 1 # 退避因子 RETRY_STATUS_CODES = [500, 502, 503, 504, 429] # 需要重试的HTTP状态码 CONNECTION_TIMEOUT = 30 # 连接超时(秒) READ_TIMEOUT = 120 # 读取超时(秒) # 全局变量 AUTH_TOKEN = None WORKFLOW_ID = None JWT_TOKEN = None class PushArticlePublished: def __init__(self): # API配置 self.base_url = BASE_URL # 认证信息 self.auth_token = None self.workflow_id = None self.jwt_token = None # 使用统一的数据库管理器 self.db_manager = get_db_manager() # 登录配置 self.login_credentials = { 'username': 'user010', 'password': '@5^2W6R7' } # 禁用代理 self.proxies = { 'http': None, 'https': None } # 并行处理相关 self.processing_lock = threading.Lock() # 用于线程安全的记录分配 self.processed_ids = set() # 已处理的记录ID集合 # 创建会话和配置重试策略 self.session = self._create_session() # 网络统计 self.request_stats = { 'total_requests': 0, 'successful_requests': 0, 'failed_requests': 0, 'retry_attempts': 0, 'connection_errors': 0, 'timeout_errors': 0 } logger.info("PushArticlePublished 初始化完成") def _create_session(self): """创建配置了重试策略的requests会话""" session = requests.Session() # 配置重试策略 retry_strategy = Retry( total=MAX_RETRIES, status_forcelist=RETRY_STATUS_CODES, backoff_factor=BACKOFF_FACTOR, allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"] ) # 配置HTTP适配器 adapter = HTTPAdapter( max_retries=retry_strategy, pool_connections=10, pool_maxsize=20 ) session.mount("http://", adapter) session.mount("https://", adapter) # 设置默认超时 session.timeout = (CONNECTION_TIMEOUT, READ_TIMEOUT) return session def _make_request_with_retry(self, method, url, **kwargs): """带重试机制的网络请求方法""" self.request_stats['total_requests'] += 1 for attempt in range(MAX_RETRIES + 1): try: # 使用会话发送请求 response = self.session.request( method=method, url=url, timeout=(CONNECTION_TIMEOUT, READ_TIMEOUT), proxies=self.proxies, **kwargs ) # 请求成功 self.request_stats['successful_requests'] += 1 if attempt > 0: logger.info(f"网络请求在第 {attempt + 1} 次尝试后成功") return response except requests.exceptions.ConnectionError as e: self.request_stats['connection_errors'] += 1 if attempt < MAX_RETRIES: self.request_stats['retry_attempts'] += 1 backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1) logger.warning(f"连接错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}") logger.info(f"等待 {backoff_time:.2f} 秒后重试...") time.sleep(backoff_time) else: self.request_stats['failed_requests'] += 1 logger.error(f"连接最终失败,已重试 {MAX_RETRIES} 次: {e}") raise except requests.exceptions.Timeout as e: self.request_stats['timeout_errors'] += 1 if attempt < MAX_RETRIES: self.request_stats['retry_attempts'] += 1 backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1) logger.warning(f"请求超时 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}") logger.info(f"等待 {backoff_time:.2f} 秒后重试...") time.sleep(backoff_time) else: self.request_stats['failed_requests'] += 1 logger.error(f"请求超时最终失败,已重试 {MAX_RETRIES} 次: {e}") raise except requests.exceptions.ChunkedEncodingError as e: if attempt < MAX_RETRIES: self.request_stats['retry_attempts'] += 1 backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1) logger.warning(f"数据传输错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}") logger.info(f"等待 {backoff_time:.2f} 秒后重试...") time.sleep(backoff_time) else: self.request_stats['failed_requests'] += 1 logger.error(f"数据传输最终失败,已重试 {MAX_RETRIES} 次: {e}") raise except Exception as e: self.request_stats['failed_requests'] += 1 logger.error(f"网络请求发生未预期错误: {e}") raise def log_network_stats(self): """记录网络统计信息""" stats = self.request_stats success_rate = (stats['successful_requests'] / stats['total_requests'] * 100) if stats['total_requests'] > 0 else 0 stats_msg = ( f"网络统计 - 总请求: {stats['total_requests']}, " f"成功: {stats['successful_requests']}, " f"失败: {stats['failed_requests']}, " f"重试: {stats['retry_attempts']}, " f"连接错误: {stats['connection_errors']}, " f"超时错误: {stats['timeout_errors']}, " f"成功率: {success_rate:.1f}%" ) logger.info(stats_msg) self.log_to_database('INFO', '网络统计', stats_msg) def get_db_connection(self): """获取数据库连接""" try: return self.db_manager.get_connection() except Exception as e: logger.error(f"数据库连接失败: {e}") return None def log_to_database(self, level: str, message: str, details: str = None): """记录日志到数据库ai_logs表""" try: with self.db_manager.get_cursor() as cursor: # 映射日志级别到数据库状态 status_map = { 'INFO': 'success', 'WARNING': 'warning', 'ERROR': 'error' } status = status_map.get(level, 'success') sql = """ INSERT INTO ai_logs (user_id, action, description, status, error_message, created_at) VALUES (%s, %s, %s, %s, %s, NOW()) """ cursor.execute(sql, (None, 'coze_generator', message, status, details)) logger.info(f"日志已记录到数据库: {level} - {message}") except Exception as e: logger.error(f"记录日志到数据库失败: {e}") def login_and_get_jwt_token(self) -> bool: """登录获取JWT token,参考JavaScript逻辑""" try: login_url = f"{self.base_url}/api/auth/login" login_data = { "username": "user010", # 使用用户指定的账号 "password": "@5^2W6R7" } logger.info(f"尝试登录: {login_data['username']}") logger.info(f"登录URL: {login_url}") self.log_to_database('INFO', f"尝试登录用户: {login_data['username']}") response = self._make_request_with_retry( 'POST', login_url, json=login_data, headers={'Content-Type': 'application/json'} ) logger.info(f"响应状态码: {response.status_code}") logger.info(f"响应内容: {response.text[:500]}...") if response.status_code == 200: result = response.json() if result.get('code') == 200: self.jwt_token = result['data']['token'] logger.info("JWT token获取成功") self.log_to_database('INFO', "JWT token获取成功", json.dumps(result['data'])) return True else: error_msg = f"登录失败: {result.get('message', '未知错误')}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, json.dumps(result)) return False else: error_msg = f"登录请求失败: {response.status_code}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, response.text) return False except Exception as e: error_msg = f"登录异常: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, traceback.format_exc()) return False def batch_publish_auto(self, article_ids: List[int]) -> bool: """批量提交文章到/api/articles/batch-publish-auto接口""" try: logger.info(f"开始批量提交 {len(article_ids)} 篇文章到batch-publish-auto接口") self.log_to_database('INFO', f"开始批量提交文章", f"article_ids: {article_ids}") # 确保有JWT token if not self.jwt_token: logger.warning("JWT token缺失,尝试重新登录") self.log_to_database('WARNING', "JWT token缺失,重新登录") if not self.login_and_get_jwt_token(): error_msg = "重新登录失败" logger.error(error_msg) self.log_to_database('ERROR', error_msg) return False # 构建批量发布数据 - 根据接口要求只需要article_ids publish_data = { "article_ids": article_ids } logger.info(f"准备批量提交的数据: {json.dumps(publish_data, ensure_ascii=False)}") # 发送请求 - 修正接口路径 upload_url = f"{self.base_url}/api/articles/batch-publish-auto" headers = { 'Authorization': f'Bearer {self.jwt_token}', 'Content-Type': 'application/json', 'Accept': 'application/json' } response = self._make_request_with_retry( 'POST', upload_url, json=publish_data, headers=headers ) logger.info(f"批量提交响应状态码: {response.status_code}") if response.status_code == 200: try: result = response.json() logger.info(f"批量提交响应内容: {result}") # 根据接口实际返回格式判断成功 if result.get('code') == 200: data = result.get('data', {}) published_count = data.get('published_count', 0) failed_count = data.get('failed_count', 0) success_msg = f"批量提交成功,发布: {published_count}篇,失败: {failed_count}篇" logger.info(success_msg) self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}") return True else: error_msg = f"批量提交失败: {result.get('message', '未知错误')}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}") return False except json.JSONDecodeError as e: error_msg = f"解析批量提交响应失败: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"response_text: {response.text}") return False elif response.status_code == 401: # Token过期,尝试重新登录并重试一次 logger.warning("收到401错误,JWT token可能已过期,尝试重新登录") self.log_to_database('WARNING', "JWT token过期,重新登录", f"article_ids: {article_ids}") if self.login_and_get_jwt_token(): logger.info("重新登录成功,重试批量提交请求") # 更新headers中的token headers['Authorization'] = f'Bearer {self.jwt_token}' # 重试请求 retry_response = self._make_request_with_retry( 'POST', upload_url, json=publish_data, headers=headers ) if retry_response.status_code == 200: try: result = retry_response.json() logger.info(f"重试批量提交响应内容: {result}") if result.get('code') == 200: data = result.get('data', {}) published_count = data.get('published_count', 0) failed_count = data.get('failed_count', 0) success_msg = f"重试批量提交成功,发布: {published_count}篇,失败: {failed_count}篇" logger.info(success_msg) self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}") return True else: error_msg = f"重试批量提交失败: {result.get('message', '未知错误')}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}") return False except json.JSONDecodeError as e: error_msg = f"解析重试批量提交响应失败: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}") return False else: error_msg = f"重试批量提交请求失败,状态码: {retry_response.status_code}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}") return False else: error_msg = "重新登录失败,无法重试批量提交" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}") return False else: error_msg = f"批量提交请求失败,状态码: {response.status_code}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"response_text: {response.text}") return False except requests.exceptions.Timeout as e: error_msg = f"批量提交请求超时: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, timeout: {CONNECTION_TIMEOUT}s/{READ_TIMEOUT}s") return False except requests.exceptions.ConnectionError as e: error_msg = f"批量提交连接错误: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, base_url: {self.base_url}") return False except requests.exceptions.RequestException as e: error_msg = f"批量提交网络异常: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, exception_type: {type(e).__name__}") return False except Exception as e: error_msg = f"批量提交异常: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, traceback: {traceback.format_exc()}") return False def is_publish_time_allowed(self) -> bool: """检查当前时间是否在允许发布的时间窗口内(北京时间6:00-23:59)""" current_hour = datetime.now().hour # 凌晨00:00-05:59禁止发布,6:00-23:59允许发布 if current_hour >= 6: logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 可以推送") return True else: logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 在禁止发布时段(00:00-05:59),跳过推送") return False def filter_articles_by_daily_limit(self, articles: List[Dict]) -> List[Dict]: """根据作者每日发文限制过滤文章 检查ai_statistics_days表中daily_published_count是否超过daily_post_max 如果超过,则该作者的文章今日不发 """ if not articles: return [] try: today_date = datetime.now().strftime('%Y-%m-%d') filtered_articles = [] with self.db_manager.get_cursor() as cursor: for article in articles: author_id = article.get('author_id') if not author_id: logger.warning(f"文章ID {article['id']} 缺少author_id,跳过") continue # 先检查ai_authors表:作者必须满足 daily_post_max > 0, status = 'active', channel = 1 author_check_sql = """ SELECT id, author_name, daily_post_max, status, channel FROM ai_authors WHERE id = %s AND daily_post_max > 0 AND status = 'active' AND channel = 1 """ cursor.execute(author_check_sql, (author_id,)) author_result = cursor.fetchone() if not author_result: logger.info(f"[业务日志] 作者ID {author_id} 不符合发文条件(daily_post_max>0 AND status=active AND channel=1),文章ID {article['id']} 过滤掉") # 将文章状态更新为pending_review,重新走审批流程 update_sql = "UPDATE ai_articles SET status = 'pending_review', updated_at = NOW() WHERE id = %s" cursor.execute(update_sql, (article['id'],)) logger.info(f"[业务日志] 文章ID {article['id']} 状态已更新为pending_review,需重新审批") continue # 查询该作者当天的发文统计 sql = """ SELECT daily_published_count, daily_post_max FROM ai_statistics_days WHERE author_id = %s AND stat_date = %s """ cursor.execute(sql, (author_id, today_date)) result = cursor.fetchone() if result: daily_published_count = result['daily_published_count'] or 0 daily_post_max = result['daily_post_max'] or 0 # 检查daily_post_max是否小于1,小于1则不允许发文 if daily_post_max < 1: #logger.info(f"[业务日志] 作者ID {author_id} daily_post_max={daily_post_max} 小于1,文章ID {article['id']} 过滤掉,不允许发文") continue if daily_published_count >= daily_post_max: #logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count} 篇,达到上限 {daily_post_max},文章ID {article['id']} 跳过") continue else: #logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count}/{daily_post_max},文章ID {article['id']} 允许发布") filtered_articles.append(article) else: # 没有统计记录,默认不允许发布(需要先初始化统计记录) logger.info(f"[业务日志] 作者ID {author_id} 无当日统计记录,文章ID {article['id']} 过滤掉,需先初始化统计记录") continue logger.info(f"每日限制过滤完成: 原始 {len(articles)} 篇 -> 允许发布 {len(filtered_articles)} 篇") return filtered_articles except Exception as e: error_msg = f"检查每日发文限制异常: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, traceback.format_exc()) # 异常时返回原始列表,避免阻塞 return articles def get_published_review_articles(self) -> List[Dict]: """获取状态为published_review的待发布文章""" try: with self.db_manager.get_cursor() as cursor: # 查询published_review状态的文章 sql = """ SELECT id, title, status, created_at, updated_at, author_id FROM ( SELECT id, title, status, created_at, updated_at, author_id, ROW_NUMBER() OVER ( PARTITION BY author_id ORDER BY updated_at ASC, id ASC ) as author_rank FROM ai_articles WHERE status = 'published_review' AND author_id > 0 ) ranked_articles """ cursor.execute(sql) results = cursor.fetchall() if results: logger.info(f"查询到 {len(results)} 个待发布文章") for result in results: logger.info(f"待发布文章 - ID: {result['id']}, 标题: {result['title']}, 状态: {result['status']}") #self.log_to_database('INFO', f"发现待发布文章: {result['title']}", #f"ID: {result['id']}, 状态: {result['status']}") else: logger.info("未查询到待发布文章") return results except Exception as e: error_msg = f"查询待发布文章异常: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, traceback.format_exc()) return [] def process_published_review_articles(self, published_articles: List[Dict], worker_id: int) -> int: """Worker线程处理published_review状态文章的方法""" processed_count = 0 thread_name = f"PublishWorker-{worker_id}" threading.current_thread().name = thread_name logger.info(f"[{thread_name}] 启动,准备处理待发布文章") # 按批次处理文章 for i in range(0, len(published_articles), BATCH_SIZE): batch = published_articles[i:i + BATCH_SIZE] article_ids = [article['id'] for article in batch] logger.info(f"[{thread_name}] 处理批次 {i//BATCH_SIZE + 1},文章ID: {article_ids}") # 批量提交文章 if self.batch_publish_auto(article_ids): processed_count += len(article_ids) logger.info(f"[{thread_name}] 成功处理批次,文章数量: {len(article_ids)}") else: logger.error(f"[{thread_name}] 处理批次失败,文章ID: {article_ids}") # 批次间隔 if i + BATCH_SIZE < len(published_articles): logger.info(f"[{thread_name}] 等待 {BATCH_INTERVAL} 秒后处理下一批次") time.sleep(BATCH_INTERVAL) logger.info(f"[{thread_name}] 完成,共处理 {processed_count} 篇文章") return processed_count def run_monitor(self): """运行监控循环,支持多worker并行处理""" logger.info(f"开始监控ai_articles表,使用 {WORKER_COUNT} 个worker并行处理...") self.log_to_database('INFO', f'启动文章自动生成监控服务,worker数量: {WORKER_COUNT}', 'run_monitor') # 统计计数器 loop_count = 0 stats_interval = 60 # 每60次循环记录一次统计(约5分钟) while True: try: # 获取待发布的文章 published_articles = self.get_published_review_articles() # 逻辑1: 检查时间窗口(北京时间6:00-23:59允许,00:00-05:59禁止) if not self.is_publish_time_allowed(): published_articles = [] logger.info("当前处于禁止发布时段,清空待发布列表") # 逻辑2: 根据作者每日发文限制过滤文章 if published_articles: published_articles = self.filter_articles_by_daily_limit(published_articles) # 处理待发布文章 if published_articles: logger.info(f"发现 {len(published_articles)} 篇待发布文章,启动批量发布处理") self.log_to_database('INFO', f'发现待发布文章,启动批量处理', f'文章数量: {len(published_articles)}') # 使用单个worker处理批量发布(避免并发冲突) try: processed_count = self.process_published_review_articles(published_articles, 1) logger.info(f"批量发布处理完成,共处理 {processed_count} 篇文章") self.log_to_database('INFO', f'批量发布处理完成', f'共处理 {processed_count} 篇文章') except Exception as e: logger.error(f"批量发布处理异常: {e}") self.log_to_database('ERROR', f'批量发布处理异常', str(e)) # 如果没有任何待处理任务 if not published_articles: logger.info("暂无待处理任务,继续监控...") # 每次循环后休息 time.sleep(SLEEP_INTERVAL) # 定期记录网络统计 loop_count += 1 if loop_count % stats_interval == 0: self.log_network_stats() except KeyboardInterrupt: logger.info("收到中断信号,停止监控") self.log_to_database('INFO', '监控服务手动停止', 'KeyboardInterrupt') break except Exception as e: error_msg = f"监控循环异常: {e}" logger.error(error_msg) self.log_to_database('ERROR', error_msg, traceback.format_exc()) time.sleep(5) # 异常时等待5秒再继续 def main(): """主函数""" generator = PushArticlePublished() try: # 先登录获取JWT token logger.info("开始登录获取JWT token") if not generator.login_and_get_jwt_token(): logger.error("登录失败,程序退出") return # 开始监控 generator.run_monitor() except Exception as e: logger.error(f"程序运行异常: {e}") generator.log_to_database('ERROR', f'程序运行异常: {e}', traceback.format_exc()) if __name__ == "__main__": main()