680 lines
30 KiB
Python
680 lines
30 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
AI文章自动生成监控脚本
|
|||
|
|
监控数据库中status为topic的记录,自动调用Coze API生成文章并提交
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import time
|
|||
|
|
import json
|
|||
|
|
import logging
|
|||
|
|
import requests
|
|||
|
|
import pymysql
|
|||
|
|
from datetime import datetime
|
|||
|
|
from typing import Dict, List, Optional, Any
|
|||
|
|
import traceback
|
|||
|
|
import threading
|
|||
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|||
|
|
from queue import Queue, Empty
|
|||
|
|
import random
|
|||
|
|
from requests.adapters import HTTPAdapter
|
|||
|
|
from urllib3.util.retry import Retry
|
|||
|
|
|
|||
|
|
# 添加项目根目录到Python路径
|
|||
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|||
|
|
|
|||
|
|
from database_config import get_db_manager
|
|||
|
|
from log_config import setup_logger
|
|||
|
|
|
|||
|
|
# 配置日志记录器,支持按日期切割和控制台输出
|
|||
|
|
logger = setup_logger(
|
|||
|
|
name='push_article',
|
|||
|
|
log_file='logs/push_article_published.log',
|
|||
|
|
error_log_file='logs/push_article_published_error.log',
|
|||
|
|
level=logging.INFO,
|
|||
|
|
console_output=True
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 配置常量
|
|||
|
|
#BASE_URL = "http://47.99.184.230:8324"
|
|||
|
|
BASE_URL = "http://127.0.0.1:8324"
|
|||
|
|
SLEEP_INTERVAL = 5 # 监控间隔(秒)
|
|||
|
|
WORKER_COUNT = 10 # 并行处理worker数量,可配置
|
|||
|
|
|
|||
|
|
# 新增:批量发布配置
|
|||
|
|
BATCH_SIZE = 8 # 一次处理的文章数量,可调
|
|||
|
|
BATCH_INTERVAL = 2 # 批次间隔时间(秒),可调
|
|||
|
|
|
|||
|
|
# 网络重试配置
|
|||
|
|
MAX_RETRIES = 3 # 最大重试次数
|
|||
|
|
BACKOFF_FACTOR = 1 # 退避因子
|
|||
|
|
RETRY_STATUS_CODES = [500, 502, 503, 504, 429] # 需要重试的HTTP状态码
|
|||
|
|
CONNECTION_TIMEOUT = 30 # 连接超时(秒)
|
|||
|
|
READ_TIMEOUT = 120 # 读取超时(秒)
|
|||
|
|
|
|||
|
|
# 全局变量
|
|||
|
|
AUTH_TOKEN = None
|
|||
|
|
WORKFLOW_ID = None
|
|||
|
|
JWT_TOKEN = None
|
|||
|
|
|
|||
|
|
class PushArticlePublished:
|
|||
|
|
def __init__(self):
|
|||
|
|
# API配置
|
|||
|
|
self.base_url = BASE_URL
|
|||
|
|
|
|||
|
|
# 认证信息
|
|||
|
|
self.auth_token = None
|
|||
|
|
self.workflow_id = None
|
|||
|
|
self.jwt_token = None
|
|||
|
|
|
|||
|
|
# 使用统一的数据库管理器
|
|||
|
|
self.db_manager = get_db_manager()
|
|||
|
|
|
|||
|
|
# 登录配置
|
|||
|
|
self.login_credentials = {
|
|||
|
|
'username': 'user010',
|
|||
|
|
'password': '@5^2W6R7'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 禁用代理
|
|||
|
|
self.proxies = {
|
|||
|
|
'http': None,
|
|||
|
|
'https': None
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 并行处理相关
|
|||
|
|
self.processing_lock = threading.Lock() # 用于线程安全的记录分配
|
|||
|
|
self.processed_ids = set() # 已处理的记录ID集合
|
|||
|
|
|
|||
|
|
# 创建会话和配置重试策略
|
|||
|
|
self.session = self._create_session()
|
|||
|
|
|
|||
|
|
# 网络统计
|
|||
|
|
self.request_stats = {
|
|||
|
|
'total_requests': 0,
|
|||
|
|
'successful_requests': 0,
|
|||
|
|
'failed_requests': 0,
|
|||
|
|
'retry_attempts': 0,
|
|||
|
|
'connection_errors': 0,
|
|||
|
|
'timeout_errors': 0
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
logger.info("PushArticlePublished 初始化完成")
|
|||
|
|
|
|||
|
|
def _create_session(self):
|
|||
|
|
"""创建配置了重试策略的requests会话"""
|
|||
|
|
session = requests.Session()
|
|||
|
|
|
|||
|
|
# 配置重试策略
|
|||
|
|
retry_strategy = Retry(
|
|||
|
|
total=MAX_RETRIES,
|
|||
|
|
status_forcelist=RETRY_STATUS_CODES,
|
|||
|
|
backoff_factor=BACKOFF_FACTOR,
|
|||
|
|
allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 配置HTTP适配器
|
|||
|
|
adapter = HTTPAdapter(
|
|||
|
|
max_retries=retry_strategy,
|
|||
|
|
pool_connections=10,
|
|||
|
|
pool_maxsize=20
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
session.mount("http://", adapter)
|
|||
|
|
session.mount("https://", adapter)
|
|||
|
|
|
|||
|
|
# 设置默认超时
|
|||
|
|
session.timeout = (CONNECTION_TIMEOUT, READ_TIMEOUT)
|
|||
|
|
|
|||
|
|
return session
|
|||
|
|
|
|||
|
|
def _make_request_with_retry(self, method, url, **kwargs):
|
|||
|
|
"""带重试机制的网络请求方法"""
|
|||
|
|
self.request_stats['total_requests'] += 1
|
|||
|
|
|
|||
|
|
for attempt in range(MAX_RETRIES + 1):
|
|||
|
|
try:
|
|||
|
|
# 使用会话发送请求
|
|||
|
|
response = self.session.request(
|
|||
|
|
method=method,
|
|||
|
|
url=url,
|
|||
|
|
timeout=(CONNECTION_TIMEOUT, READ_TIMEOUT),
|
|||
|
|
proxies=self.proxies,
|
|||
|
|
**kwargs
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 请求成功
|
|||
|
|
self.request_stats['successful_requests'] += 1
|
|||
|
|
if attempt > 0:
|
|||
|
|
logger.info(f"网络请求在第 {attempt + 1} 次尝试后成功")
|
|||
|
|
return response
|
|||
|
|
|
|||
|
|
except requests.exceptions.ConnectionError as e:
|
|||
|
|
self.request_stats['connection_errors'] += 1
|
|||
|
|
if attempt < MAX_RETRIES:
|
|||
|
|
self.request_stats['retry_attempts'] += 1
|
|||
|
|
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
|
|||
|
|
logger.warning(f"连接错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
|
|||
|
|
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
|
|||
|
|
time.sleep(backoff_time)
|
|||
|
|
else:
|
|||
|
|
self.request_stats['failed_requests'] += 1
|
|||
|
|
logger.error(f"连接最终失败,已重试 {MAX_RETRIES} 次: {e}")
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
except requests.exceptions.Timeout as e:
|
|||
|
|
self.request_stats['timeout_errors'] += 1
|
|||
|
|
if attempt < MAX_RETRIES:
|
|||
|
|
self.request_stats['retry_attempts'] += 1
|
|||
|
|
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
|
|||
|
|
logger.warning(f"请求超时 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
|
|||
|
|
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
|
|||
|
|
time.sleep(backoff_time)
|
|||
|
|
else:
|
|||
|
|
self.request_stats['failed_requests'] += 1
|
|||
|
|
logger.error(f"请求超时最终失败,已重试 {MAX_RETRIES} 次: {e}")
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
except requests.exceptions.ChunkedEncodingError as e:
|
|||
|
|
if attempt < MAX_RETRIES:
|
|||
|
|
self.request_stats['retry_attempts'] += 1
|
|||
|
|
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
|
|||
|
|
logger.warning(f"数据传输错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
|
|||
|
|
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
|
|||
|
|
time.sleep(backoff_time)
|
|||
|
|
else:
|
|||
|
|
self.request_stats['failed_requests'] += 1
|
|||
|
|
logger.error(f"数据传输最终失败,已重试 {MAX_RETRIES} 次: {e}")
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
self.request_stats['failed_requests'] += 1
|
|||
|
|
logger.error(f"网络请求发生未预期错误: {e}")
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def log_network_stats(self):
|
|||
|
|
"""记录网络统计信息"""
|
|||
|
|
stats = self.request_stats
|
|||
|
|
success_rate = (stats['successful_requests'] / stats['total_requests'] * 100) if stats['total_requests'] > 0 else 0
|
|||
|
|
|
|||
|
|
stats_msg = (
|
|||
|
|
f"网络统计 - 总请求: {stats['total_requests']}, "
|
|||
|
|
f"成功: {stats['successful_requests']}, "
|
|||
|
|
f"失败: {stats['failed_requests']}, "
|
|||
|
|
f"重试: {stats['retry_attempts']}, "
|
|||
|
|
f"连接错误: {stats['connection_errors']}, "
|
|||
|
|
f"超时错误: {stats['timeout_errors']}, "
|
|||
|
|
f"成功率: {success_rate:.1f}%"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
logger.info(stats_msg)
|
|||
|
|
self.log_to_database('INFO', '网络统计', stats_msg)
|
|||
|
|
|
|||
|
|
def get_db_connection(self):
|
|||
|
|
"""获取数据库连接"""
|
|||
|
|
try:
|
|||
|
|
return self.db_manager.get_connection()
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"数据库连接失败: {e}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def log_to_database(self, level: str, message: str, details: str = None):
|
|||
|
|
"""记录日志到数据库ai_logs表"""
|
|||
|
|
try:
|
|||
|
|
with self.db_manager.get_cursor() as cursor:
|
|||
|
|
# 映射日志级别到数据库状态
|
|||
|
|
status_map = {
|
|||
|
|
'INFO': 'success',
|
|||
|
|
'WARNING': 'warning',
|
|||
|
|
'ERROR': 'error'
|
|||
|
|
}
|
|||
|
|
status = status_map.get(level, 'success')
|
|||
|
|
|
|||
|
|
sql = """
|
|||
|
|
INSERT INTO ai_logs (user_id, action, description, status, error_message, created_at)
|
|||
|
|
VALUES (%s, %s, %s, %s, %s, NOW())
|
|||
|
|
"""
|
|||
|
|
cursor.execute(sql, (None, 'coze_generator', message, status, details))
|
|||
|
|
logger.info(f"日志已记录到数据库: {level} - {message}")
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"记录日志到数据库失败: {e}")
|
|||
|
|
|
|||
|
|
def login_and_get_jwt_token(self) -> bool:
|
|||
|
|
"""登录获取JWT token,参考JavaScript逻辑"""
|
|||
|
|
try:
|
|||
|
|
login_url = f"{self.base_url}/api/auth/login"
|
|||
|
|
login_data = {
|
|||
|
|
"username": "user010", # 使用用户指定的账号
|
|||
|
|
"password": "@5^2W6R7"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
logger.info(f"尝试登录: {login_data['username']}")
|
|||
|
|
logger.info(f"登录URL: {login_url}")
|
|||
|
|
self.log_to_database('INFO', f"尝试登录用户: {login_data['username']}")
|
|||
|
|
|
|||
|
|
response = self._make_request_with_retry(
|
|||
|
|
'POST',
|
|||
|
|
login_url,
|
|||
|
|
json=login_data,
|
|||
|
|
headers={'Content-Type': 'application/json'}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
logger.info(f"响应状态码: {response.status_code}")
|
|||
|
|
logger.info(f"响应内容: {response.text[:500]}...")
|
|||
|
|
|
|||
|
|
if response.status_code == 200:
|
|||
|
|
result = response.json()
|
|||
|
|
if result.get('code') == 200:
|
|||
|
|
self.jwt_token = result['data']['token']
|
|||
|
|
logger.info("JWT token获取成功")
|
|||
|
|
self.log_to_database('INFO', "JWT token获取成功", json.dumps(result['data']))
|
|||
|
|
return True
|
|||
|
|
else:
|
|||
|
|
error_msg = f"登录失败: {result.get('message', '未知错误')}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, json.dumps(result))
|
|||
|
|
return False
|
|||
|
|
else:
|
|||
|
|
error_msg = f"登录请求失败: {response.status_code}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, response.text)
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
error_msg = f"登录异常: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def batch_publish_auto(self, article_ids: List[int]) -> bool:
|
|||
|
|
"""批量提交文章到/api/articles/batch-publish-auto接口"""
|
|||
|
|
try:
|
|||
|
|
logger.info(f"开始批量提交 {len(article_ids)} 篇文章到batch-publish-auto接口")
|
|||
|
|
self.log_to_database('INFO', f"开始批量提交文章", f"article_ids: {article_ids}")
|
|||
|
|
|
|||
|
|
# 确保有JWT token
|
|||
|
|
if not self.jwt_token:
|
|||
|
|
logger.warning("JWT token缺失,尝试重新登录")
|
|||
|
|
self.log_to_database('WARNING', "JWT token缺失,重新登录")
|
|||
|
|
if not self.login_and_get_jwt_token():
|
|||
|
|
error_msg = "重新登录失败"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg)
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
# 构建批量发布数据 - 根据接口要求只需要article_ids
|
|||
|
|
publish_data = {
|
|||
|
|
"article_ids": article_ids
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
logger.info(f"准备批量提交的数据: {json.dumps(publish_data, ensure_ascii=False)}")
|
|||
|
|
|
|||
|
|
# 发送请求 - 修正接口路径
|
|||
|
|
upload_url = f"{self.base_url}/api/articles/batch-publish-auto"
|
|||
|
|
headers = {
|
|||
|
|
'Authorization': f'Bearer {self.jwt_token}',
|
|||
|
|
'Content-Type': 'application/json',
|
|||
|
|
'Accept': 'application/json'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
response = self._make_request_with_retry(
|
|||
|
|
'POST',
|
|||
|
|
upload_url,
|
|||
|
|
json=publish_data,
|
|||
|
|
headers=headers
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
logger.info(f"批量提交响应状态码: {response.status_code}")
|
|||
|
|
|
|||
|
|
if response.status_code == 200:
|
|||
|
|
try:
|
|||
|
|
result = response.json()
|
|||
|
|
logger.info(f"批量提交响应内容: {result}")
|
|||
|
|
|
|||
|
|
# 根据接口实际返回格式判断成功
|
|||
|
|
if result.get('code') == 200:
|
|||
|
|
data = result.get('data', {})
|
|||
|
|
published_count = data.get('published_count', 0)
|
|||
|
|
failed_count = data.get('failed_count', 0)
|
|||
|
|
|
|||
|
|
success_msg = f"批量提交成功,发布: {published_count}篇,失败: {failed_count}篇"
|
|||
|
|
logger.info(success_msg)
|
|||
|
|
self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}")
|
|||
|
|
return True
|
|||
|
|
else:
|
|||
|
|
error_msg = f"批量提交失败: {result.get('message', '未知错误')}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}")
|
|||
|
|
return False
|
|||
|
|
except json.JSONDecodeError as e:
|
|||
|
|
error_msg = f"解析批量提交响应失败: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
|
|||
|
|
return False
|
|||
|
|
elif response.status_code == 401:
|
|||
|
|
# Token过期,尝试重新登录并重试一次
|
|||
|
|
logger.warning("收到401错误,JWT token可能已过期,尝试重新登录")
|
|||
|
|
self.log_to_database('WARNING', "JWT token过期,重新登录", f"article_ids: {article_ids}")
|
|||
|
|
|
|||
|
|
if self.login_and_get_jwt_token():
|
|||
|
|
logger.info("重新登录成功,重试批量提交请求")
|
|||
|
|
# 更新headers中的token
|
|||
|
|
headers['Authorization'] = f'Bearer {self.jwt_token}'
|
|||
|
|
|
|||
|
|
# 重试请求
|
|||
|
|
retry_response = self._make_request_with_retry(
|
|||
|
|
'POST',
|
|||
|
|
upload_url,
|
|||
|
|
json=publish_data,
|
|||
|
|
headers=headers
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if retry_response.status_code == 200:
|
|||
|
|
try:
|
|||
|
|
result = retry_response.json()
|
|||
|
|
logger.info(f"重试批量提交响应内容: {result}")
|
|||
|
|
|
|||
|
|
if result.get('code') == 200:
|
|||
|
|
data = result.get('data', {})
|
|||
|
|
published_count = data.get('published_count', 0)
|
|||
|
|
failed_count = data.get('failed_count', 0)
|
|||
|
|
|
|||
|
|
success_msg = f"重试批量提交成功,发布: {published_count}篇,失败: {failed_count}篇"
|
|||
|
|
logger.info(success_msg)
|
|||
|
|
self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}")
|
|||
|
|
return True
|
|||
|
|
else:
|
|||
|
|
error_msg = f"重试批量提交失败: {result.get('message', '未知错误')}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}")
|
|||
|
|
return False
|
|||
|
|
except json.JSONDecodeError as e:
|
|||
|
|
error_msg = f"解析重试批量提交响应失败: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}")
|
|||
|
|
return False
|
|||
|
|
else:
|
|||
|
|
error_msg = f"重试批量提交请求失败,状态码: {retry_response.status_code}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}")
|
|||
|
|
return False
|
|||
|
|
else:
|
|||
|
|
error_msg = "重新登录失败,无法重试批量提交"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}")
|
|||
|
|
return False
|
|||
|
|
else:
|
|||
|
|
error_msg = f"批量提交请求失败,状态码: {response.status_code}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
except requests.exceptions.Timeout as e:
|
|||
|
|
error_msg = f"批量提交请求超时: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, timeout: {CONNECTION_TIMEOUT}s/{READ_TIMEOUT}s")
|
|||
|
|
return False
|
|||
|
|
except requests.exceptions.ConnectionError as e:
|
|||
|
|
error_msg = f"批量提交连接错误: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, base_url: {self.base_url}")
|
|||
|
|
return False
|
|||
|
|
except requests.exceptions.RequestException as e:
|
|||
|
|
error_msg = f"批量提交网络异常: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, exception_type: {type(e).__name__}")
|
|||
|
|
return False
|
|||
|
|
except Exception as e:
|
|||
|
|
error_msg = f"批量提交异常: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, traceback: {traceback.format_exc()}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
def is_publish_time_allowed(self) -> bool:
|
|||
|
|
"""检查当前时间是否在允许发布的时间窗口内(北京时间6:00-23:59)"""
|
|||
|
|
current_hour = datetime.now().hour
|
|||
|
|
# 凌晨00:00-05:59禁止发布,6:00-23:59允许发布
|
|||
|
|
if current_hour >= 6:
|
|||
|
|
logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 可以推送")
|
|||
|
|
return True
|
|||
|
|
else:
|
|||
|
|
logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 在禁止发布时段(00:00-05:59),跳过推送")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def filter_articles_by_daily_limit(self, articles: List[Dict]) -> List[Dict]:
|
|||
|
|
"""根据作者每日发文限制过滤文章
|
|||
|
|
|
|||
|
|
检查ai_statistics_days表中daily_published_count是否超过daily_post_max
|
|||
|
|
如果超过,则该作者的文章今日不发
|
|||
|
|
"""
|
|||
|
|
if not articles:
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
today_date = datetime.now().strftime('%Y-%m-%d')
|
|||
|
|
filtered_articles = []
|
|||
|
|
|
|||
|
|
with self.db_manager.get_cursor() as cursor:
|
|||
|
|
for article in articles:
|
|||
|
|
author_id = article.get('author_id')
|
|||
|
|
if not author_id:
|
|||
|
|
logger.warning(f"文章ID {article['id']} 缺少author_id,跳过")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 先检查ai_authors表:作者必须满足 daily_post_max > 0, status = 'active', channel = 1
|
|||
|
|
author_check_sql = """
|
|||
|
|
SELECT id, author_name, daily_post_max, status, channel
|
|||
|
|
FROM ai_authors
|
|||
|
|
WHERE id = %s AND daily_post_max > 0 AND status = 'active' AND channel = 1
|
|||
|
|
"""
|
|||
|
|
cursor.execute(author_check_sql, (author_id,))
|
|||
|
|
author_result = cursor.fetchone()
|
|||
|
|
|
|||
|
|
if not author_result:
|
|||
|
|
logger.info(f"[业务日志] 作者ID {author_id} 不符合发文条件(daily_post_max>0 AND status=active AND channel=1),文章ID {article['id']} 过滤掉")
|
|||
|
|
# 将文章状态更新为pending_review,重新走审批流程
|
|||
|
|
update_sql = "UPDATE ai_articles SET status = 'pending_review', updated_at = NOW() WHERE id = %s"
|
|||
|
|
cursor.execute(update_sql, (article['id'],))
|
|||
|
|
logger.info(f"[业务日志] 文章ID {article['id']} 状态已更新为pending_review,需重新审批")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 查询该作者当天的发文统计
|
|||
|
|
sql = """
|
|||
|
|
SELECT daily_published_count, daily_post_max
|
|||
|
|
FROM ai_statistics_days
|
|||
|
|
WHERE author_id = %s AND stat_date = %s
|
|||
|
|
"""
|
|||
|
|
cursor.execute(sql, (author_id, today_date))
|
|||
|
|
result = cursor.fetchone()
|
|||
|
|
|
|||
|
|
if result:
|
|||
|
|
daily_published_count = result['daily_published_count'] or 0
|
|||
|
|
daily_post_max = result['daily_post_max'] or 0
|
|||
|
|
|
|||
|
|
# 检查daily_post_max是否小于1,小于1则不允许发文
|
|||
|
|
if daily_post_max < 1:
|
|||
|
|
#logger.info(f"[业务日志] 作者ID {author_id} daily_post_max={daily_post_max} 小于1,文章ID {article['id']} 过滤掉,不允许发文")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
if daily_published_count >= daily_post_max:
|
|||
|
|
#logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count} 篇,达到上限 {daily_post_max},文章ID {article['id']} 跳过")
|
|||
|
|
continue
|
|||
|
|
else:
|
|||
|
|
#logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count}/{daily_post_max},文章ID {article['id']} 允许发布")
|
|||
|
|
filtered_articles.append(article)
|
|||
|
|
else:
|
|||
|
|
# 没有统计记录,默认不允许发布(需要先初始化统计记录)
|
|||
|
|
logger.info(f"[业务日志] 作者ID {author_id} 无当日统计记录,文章ID {article['id']} 过滤掉,需先初始化统计记录")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
logger.info(f"每日限制过滤完成: 原始 {len(articles)} 篇 -> 允许发布 {len(filtered_articles)} 篇")
|
|||
|
|
return filtered_articles
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
error_msg = f"检查每日发文限制异常: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
|||
|
|
# 异常时返回原始列表,避免阻塞
|
|||
|
|
return articles
|
|||
|
|
|
|||
|
|
def get_published_review_articles(self) -> List[Dict]:
|
|||
|
|
"""获取状态为published_review的待发布文章"""
|
|||
|
|
try:
|
|||
|
|
with self.db_manager.get_cursor() as cursor:
|
|||
|
|
# 查询published_review状态的文章
|
|||
|
|
sql = """
|
|||
|
|
SELECT
|
|||
|
|
id,
|
|||
|
|
title,
|
|||
|
|
status,
|
|||
|
|
created_at,
|
|||
|
|
updated_at,
|
|||
|
|
author_id
|
|||
|
|
FROM (
|
|||
|
|
SELECT
|
|||
|
|
id,
|
|||
|
|
title,
|
|||
|
|
status,
|
|||
|
|
created_at,
|
|||
|
|
updated_at,
|
|||
|
|
author_id,
|
|||
|
|
ROW_NUMBER() OVER (
|
|||
|
|
PARTITION BY author_id
|
|||
|
|
ORDER BY updated_at ASC, id ASC
|
|||
|
|
) as author_rank
|
|||
|
|
FROM ai_articles
|
|||
|
|
WHERE status = 'published_review'
|
|||
|
|
AND author_id > 0
|
|||
|
|
) ranked_articles
|
|||
|
|
"""
|
|||
|
|
cursor.execute(sql)
|
|||
|
|
results = cursor.fetchall()
|
|||
|
|
|
|||
|
|
if results:
|
|||
|
|
logger.info(f"查询到 {len(results)} 个待发布文章")
|
|||
|
|
for result in results:
|
|||
|
|
logger.info(f"待发布文章 - ID: {result['id']}, 标题: {result['title']}, 状态: {result['status']}")
|
|||
|
|
#self.log_to_database('INFO', f"发现待发布文章: {result['title']}",
|
|||
|
|
#f"ID: {result['id']}, 状态: {result['status']}")
|
|||
|
|
else:
|
|||
|
|
logger.info("未查询到待发布文章")
|
|||
|
|
|
|||
|
|
return results
|
|||
|
|
except Exception as e:
|
|||
|
|
error_msg = f"查询待发布文章异常: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
def process_published_review_articles(self, published_articles: List[Dict], worker_id: int) -> int:
|
|||
|
|
"""Worker线程处理published_review状态文章的方法"""
|
|||
|
|
processed_count = 0
|
|||
|
|
thread_name = f"PublishWorker-{worker_id}"
|
|||
|
|
threading.current_thread().name = thread_name
|
|||
|
|
|
|||
|
|
logger.info(f"[{thread_name}] 启动,准备处理待发布文章")
|
|||
|
|
|
|||
|
|
# 按批次处理文章
|
|||
|
|
for i in range(0, len(published_articles), BATCH_SIZE):
|
|||
|
|
batch = published_articles[i:i + BATCH_SIZE]
|
|||
|
|
article_ids = [article['id'] for article in batch]
|
|||
|
|
|
|||
|
|
logger.info(f"[{thread_name}] 处理批次 {i//BATCH_SIZE + 1},文章ID: {article_ids}")
|
|||
|
|
|
|||
|
|
# 批量提交文章
|
|||
|
|
if self.batch_publish_auto(article_ids):
|
|||
|
|
processed_count += len(article_ids)
|
|||
|
|
logger.info(f"[{thread_name}] 成功处理批次,文章数量: {len(article_ids)}")
|
|||
|
|
else:
|
|||
|
|
logger.error(f"[{thread_name}] 处理批次失败,文章ID: {article_ids}")
|
|||
|
|
|
|||
|
|
# 批次间隔
|
|||
|
|
if i + BATCH_SIZE < len(published_articles):
|
|||
|
|
logger.info(f"[{thread_name}] 等待 {BATCH_INTERVAL} 秒后处理下一批次")
|
|||
|
|
time.sleep(BATCH_INTERVAL)
|
|||
|
|
|
|||
|
|
logger.info(f"[{thread_name}] 完成,共处理 {processed_count} 篇文章")
|
|||
|
|
return processed_count
|
|||
|
|
|
|||
|
|
def run_monitor(self):
|
|||
|
|
"""运行监控循环,支持多worker并行处理"""
|
|||
|
|
logger.info(f"开始监控ai_articles表,使用 {WORKER_COUNT} 个worker并行处理...")
|
|||
|
|
self.log_to_database('INFO', f'启动文章自动生成监控服务,worker数量: {WORKER_COUNT}', 'run_monitor')
|
|||
|
|
|
|||
|
|
# 统计计数器
|
|||
|
|
loop_count = 0
|
|||
|
|
stats_interval = 60 # 每60次循环记录一次统计(约5分钟)
|
|||
|
|
|
|||
|
|
while True:
|
|||
|
|
try:
|
|||
|
|
# 获取待发布的文章
|
|||
|
|
published_articles = self.get_published_review_articles()
|
|||
|
|
|
|||
|
|
# 逻辑1: 检查时间窗口(北京时间6:00-23:59允许,00:00-05:59禁止)
|
|||
|
|
if not self.is_publish_time_allowed():
|
|||
|
|
published_articles = []
|
|||
|
|
logger.info("当前处于禁止发布时段,清空待发布列表")
|
|||
|
|
|
|||
|
|
# 逻辑2: 根据作者每日发文限制过滤文章
|
|||
|
|
if published_articles:
|
|||
|
|
published_articles = self.filter_articles_by_daily_limit(published_articles)
|
|||
|
|
|
|||
|
|
# 处理待发布文章
|
|||
|
|
if published_articles:
|
|||
|
|
logger.info(f"发现 {len(published_articles)} 篇待发布文章,启动批量发布处理")
|
|||
|
|
self.log_to_database('INFO', f'发现待发布文章,启动批量处理', f'文章数量: {len(published_articles)}')
|
|||
|
|
|
|||
|
|
# 使用单个worker处理批量发布(避免并发冲突)
|
|||
|
|
try:
|
|||
|
|
processed_count = self.process_published_review_articles(published_articles, 1)
|
|||
|
|
logger.info(f"批量发布处理完成,共处理 {processed_count} 篇文章")
|
|||
|
|
self.log_to_database('INFO', f'批量发布处理完成', f'共处理 {processed_count} 篇文章')
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"批量发布处理异常: {e}")
|
|||
|
|
self.log_to_database('ERROR', f'批量发布处理异常', str(e))
|
|||
|
|
|
|||
|
|
# 如果没有任何待处理任务
|
|||
|
|
if not published_articles:
|
|||
|
|
logger.info("暂无待处理任务,继续监控...")
|
|||
|
|
|
|||
|
|
# 每次循环后休息
|
|||
|
|
time.sleep(SLEEP_INTERVAL)
|
|||
|
|
|
|||
|
|
# 定期记录网络统计
|
|||
|
|
loop_count += 1
|
|||
|
|
if loop_count % stats_interval == 0:
|
|||
|
|
self.log_network_stats()
|
|||
|
|
|
|||
|
|
except KeyboardInterrupt:
|
|||
|
|
logger.info("收到中断信号,停止监控")
|
|||
|
|
self.log_to_database('INFO', '监控服务手动停止', 'KeyboardInterrupt')
|
|||
|
|
break
|
|||
|
|
except Exception as e:
|
|||
|
|
error_msg = f"监控循环异常: {e}"
|
|||
|
|
logger.error(error_msg)
|
|||
|
|
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
|||
|
|
time.sleep(5) # 异常时等待5秒再继续
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
"""主函数"""
|
|||
|
|
generator = PushArticlePublished()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 先登录获取JWT token
|
|||
|
|
logger.info("开始登录获取JWT token")
|
|||
|
|
if not generator.login_and_get_jwt_token():
|
|||
|
|
logger.error("登录失败,程序退出")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 开始监控
|
|||
|
|
generator.run_monitor()
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"程序运行异常: {e}")
|
|||
|
|
generator.log_to_database('ERROR', f'程序运行异常: {e}', traceback.format_exc())
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|