909 lines
44 KiB
Python
909 lines
44 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
AI文章自动生成监控脚本
|
||
监控数据库中status为topic的记录,自动调用Coze API生成文章并提交
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import time
|
||
import json
|
||
import logging
|
||
import requests
|
||
import pymysql
|
||
import random
|
||
from datetime import datetime
|
||
from typing import Dict, List, Optional, Any
|
||
import traceback
|
||
import threading
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
from queue import Queue, Empty
|
||
import emoji
|
||
|
||
# 添加项目根目录到Python路径
|
||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
from database_config import get_db_manager
|
||
from log_config import setup_logger
|
||
from dashvector_get_similar_topic import search_chinese, init_dashvector_client
|
||
|
||
# 配置日志记录器,支持按日期切割和控制台输出
|
||
logger = setup_logger(
|
||
name='generate_Atlas_Qianwen',
|
||
log_file='logs/generate_Atlas_Qianwen_article.log',
|
||
error_log_file='logs/generate_Atlas_Qianwen_error.log',
|
||
level=logging.INFO,
|
||
console_output=True
|
||
)
|
||
|
||
# 配置常量
|
||
#BASE_URL = "http://47.99.184.230:8321"
|
||
BASE_URL_LOGIN = "http://127.0.0.1:8216" # 使用主API服务端口
|
||
BASE_URL = "http://127.0.0.1:8215" # 使用主API服务端口
|
||
COZE_API_URL = "https://api.coze.cn/v1/workflow/stream_run"
|
||
SLEEP_INTERVAL = 5 # 监控间隔(秒)
|
||
WORKER_COUNT = 6 # 并行处理worker数量,可配置
|
||
|
||
# 全局变量
|
||
AUTH_TOKEN = None
|
||
WORKFLOW_ID = None
|
||
JWT_TOKEN = None
|
||
|
||
class CozeArticleGenerator:
|
||
def __init__(self):
|
||
# API配置
|
||
self.base_url = BASE_URL
|
||
self.base_url_login = BASE_URL_LOGIN
|
||
self.coze_api_url = COZE_API_URL
|
||
|
||
# 认证信息
|
||
self.jwt_token = None
|
||
|
||
# 使用统一的数据库管理器
|
||
self.db_manager = get_db_manager()
|
||
|
||
# 登录配置
|
||
self.login_credentials = {
|
||
'username': 'user010',
|
||
'password': '@5^2W6R7'
|
||
}
|
||
|
||
# 禁用代理
|
||
self.proxies = {
|
||
'http': None,
|
||
'https': None
|
||
}
|
||
|
||
# 并行处理相关
|
||
self.processing_lock = threading.Lock() # 用于线程安全的记录分配
|
||
self.processed_ids = set() # 已处理的记录ID集合
|
||
|
||
# 初始化DashVector客户端(向量检索)
|
||
logger.info("开始初始化DashVector客户端")
|
||
if init_dashvector_client():
|
||
logger.info("DashVector客户端初始化成功")
|
||
else:
|
||
logger.warning("DashVector客户端初始化失败,相似topic检索功能将不可用")
|
||
|
||
logger.info("CozeArticleGenerator 初始化完成")
|
||
|
||
def log_to_database(self, level: str, message: str, details: str = None):
|
||
"""获取数据库连接"""
|
||
try:
|
||
return self.db_manager.get_connection()
|
||
except Exception as e:
|
||
logger.error(f"数据库连接失败: {e}")
|
||
return None
|
||
|
||
def get_article_contents_by_ids(self, article_ids: List[int]) -> str:
|
||
"""
|
||
根据article_id列表从数据库获取content内容
|
||
|
||
Args:
|
||
article_ids: 文章ID列表
|
||
|
||
Returns:
|
||
str: 合并后的content内容,多条用\n\n分隔
|
||
"""
|
||
if not article_ids:
|
||
logger.info("没有文章ID,返回空字符串")
|
||
return ""
|
||
|
||
try:
|
||
# 去重并限制最多2条
|
||
article_ids = list(set(article_ids))[:2]
|
||
|
||
logger.info(f"开始查询文章content,article_ids: {article_ids}")
|
||
|
||
with self.db_manager.get_cursor() as cursor:
|
||
# 构建IN查询
|
||
placeholders = ','.join(['%s'] * len(article_ids))
|
||
sql = f"""
|
||
SELECT id, content, created_at
|
||
FROM ai_articles
|
||
WHERE id IN ({placeholders})
|
||
AND content IS NOT NULL AND content != ''
|
||
ORDER BY created_at DESC
|
||
LIMIT 2
|
||
"""
|
||
|
||
cursor.execute(sql, article_ids)
|
||
results = cursor.fetchall()
|
||
|
||
if not results:
|
||
logger.warning(f"未查询到文章content,article_ids: {article_ids}")
|
||
return ""
|
||
|
||
logger.info(f"查询到 {len(results)} 条文章content")
|
||
|
||
# 合并content
|
||
contents = []
|
||
for row in results:
|
||
content = row.get('content', '').strip()
|
||
if content:
|
||
contents.append(content)
|
||
logger.info(f"添加文章content,ID: {row.get('id')}, 长度: {len(content)} 字符")
|
||
|
||
# 用两个换行符分隔
|
||
merged_content = "\n\n".join(contents)
|
||
logger.info(f"合并后的content总长度: {len(merged_content)} 字符")
|
||
|
||
return merged_content
|
||
|
||
except Exception as e:
|
||
error_msg = f"查询文章content异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
return ""
|
||
|
||
def log_to_database(self, level: str, message: str, details: str = None):
|
||
"""记录日志到数据库ai_logs表"""
|
||
try:
|
||
with self.db_manager.get_cursor() as cursor:
|
||
# 映射日志级别到数据库状态
|
||
status_map = {
|
||
'INFO': 'success',
|
||
'WARNING': 'warning',
|
||
'ERROR': 'error'
|
||
}
|
||
status = status_map.get(level, 'success')
|
||
|
||
sql = """
|
||
INSERT INTO ai_logs (user_id, action, description, status, error_message, created_at)
|
||
VALUES (%s, %s, %s, %s, %s, NOW())
|
||
"""
|
||
cursor.execute(sql, (None, 'coze_generator', message, status, details))
|
||
logger.info(f"日志已记录到数据库: {level} - {message}")
|
||
except Exception as e:
|
||
logger.error(f"记录日志到数据库失败: {e}")
|
||
|
||
def login_and_get_jwt_token(self) -> bool:
|
||
"""登录获取JWT token,参考JavaScript逻辑"""
|
||
try:
|
||
login_url = f"{self.base_url_login}/api/auth/login"
|
||
login_data = {
|
||
"username": "13621242430", # 使用用户指定的账号
|
||
"password": "admin123"
|
||
}
|
||
|
||
logger.info(f"尝试登录: {login_data['username']}")
|
||
self.log_to_database('INFO', f"尝试登录用户: {login_data['username']}")
|
||
|
||
response = requests.post(
|
||
login_url,
|
||
json=login_data,
|
||
headers={'Content-Type': 'application/json'},
|
||
proxies=self.proxies # 禁用代理
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
result = response.json()
|
||
if result.get('code') == 200:
|
||
self.jwt_token = result['data']['token']
|
||
logger.info("JWT token获取成功")
|
||
self.log_to_database('INFO', "JWT token获取成功", json.dumps(result['data']))
|
||
return True
|
||
else:
|
||
error_msg = f"登录失败: {result.get('message', '未知错误')}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, json.dumps(result))
|
||
return False
|
||
else:
|
||
error_msg = f"登录请求失败: {response.status_code}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, response.text)
|
||
return False
|
||
|
||
except Exception as e:
|
||
error_msg = f"登录异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
return False
|
||
|
||
def generate_article_from_coze(self, title: str, context_injection: str, workflow_id: str, auth_token: str) -> Optional[Dict]:
|
||
"""调用Coze API生成文章,100%参考JavaScript流式处理逻辑"""
|
||
try:
|
||
logger.info(f"开始为主题'{title}'生成文章...")
|
||
logger.info(f"上下文注入内容长度: {len(context_injection)} 字符")
|
||
self.log_to_database('INFO', f"开始为主题生成文章: {title}", f"context_injection长度: {len(context_injection)}")
|
||
|
||
# 验证传入的认证信息
|
||
if not auth_token or not workflow_id:
|
||
error_msg = f"'{title}' - workflow_id 或 auth_token 参数缺失"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg)
|
||
return None
|
||
|
||
# 构建请求数据,增加context_injection参数
|
||
request_data = {
|
||
'workflow_id': workflow_id,
|
||
'parameters': {
|
||
'title': title,
|
||
'context_injection': context_injection # 新增上下文注入参数
|
||
}
|
||
}
|
||
logger.info(f"提交coze工作流数据详情: {json.dumps(request_data['parameters'], ensure_ascii=False)[:200]}...")
|
||
|
||
# 发送流式请求
|
||
headers = {
|
||
'Authorization': f'Bearer {auth_token}',
|
||
'Content-Type': 'application/json'
|
||
}
|
||
|
||
logger.info(f"'{title}' - 发送Coze API请求...")
|
||
|
||
response = requests.post(
|
||
COZE_API_URL,
|
||
json=request_data,
|
||
headers=headers,
|
||
stream=True,
|
||
timeout=300 # 5分钟超时
|
||
)
|
||
|
||
logger.info(f"'{title}' - Coze API响应状态码: {response.status_code}")
|
||
|
||
if not response.ok:
|
||
error_msg = f"'{title}' - Coze API请求失败,状态码: {response.status_code}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, response.text)
|
||
return None
|
||
|
||
# 调用流式响应解析方法
|
||
return self.parse_stream_response(response, title)
|
||
|
||
except Exception as e:
|
||
error_msg = f"生成文章异常: {e}, 主题: {title}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
return None
|
||
|
||
def parse_stream_response(self, response, title: str) -> Optional[Dict[str, Any]]:
|
||
"""解析流式响应,100%参考JavaScript事件处理逻辑"""
|
||
try:
|
||
buffer = ''
|
||
last_structured = None
|
||
all_img = []
|
||
|
||
logger.info(f"'{title}' - 开始接收流式数据...")
|
||
|
||
# 设置响应编码为UTF-8
|
||
response.encoding = 'utf-8'
|
||
|
||
for chunk in response.iter_content(chunk_size=1024, decode_unicode=True):
|
||
if chunk:
|
||
buffer += chunk
|
||
events = buffer.split('\n\n')
|
||
buffer = events.pop() or ''
|
||
|
||
for event_str in events:
|
||
if not event_str.strip():
|
||
continue
|
||
|
||
lines = event_str.split('\n')
|
||
event_type = ''
|
||
data_str = ''
|
||
|
||
# 解析事件类型和数据,完全按照JavaScript逻辑
|
||
for line in lines:
|
||
if line.startswith('event:'):
|
||
event_type = line[6:].strip()
|
||
elif line.startswith('data:'):
|
||
data_str = line[5:].strip()
|
||
|
||
logger.info(f"'{title}' - 收到事件: {event_type}")
|
||
self.log_to_database('INFO', f"收到Coze事件: {event_type}", f"主题: {title}")
|
||
|
||
# 处理错误事件
|
||
if event_type == 'Error':
|
||
logger.error(f"'{title}' - Coze API返回错误: {data_str}")
|
||
self.log_to_database('ERROR', f"Coze API返回错误: {title}", data_str)
|
||
try:
|
||
err_data = json.loads(data_str)
|
||
error_detail = f"错误代码: {err_data.get('error_code', '未知错误')}, 错误信息: {err_data.get('error_message', '无详细信息')}"
|
||
logger.error(f"'{title}' - {error_detail}")
|
||
self.log_to_database('ERROR', f"Coze API错误详情: {title}", error_detail)
|
||
except json.JSONDecodeError:
|
||
logger.error(f"'{title}' - 无法解析错误数据")
|
||
self.log_to_database('ERROR', f"无法解析Coze错误数据: {title}", data_str)
|
||
return None
|
||
|
||
# 跳过PING和End事件
|
||
if event_type in ['PING', 'End']:
|
||
continue
|
||
|
||
# 处理Message事件
|
||
if event_type == 'Message':
|
||
try:
|
||
logger.info(f"'{title}' - 收到Message事件,数据: {data_str[:200]}...")
|
||
data = json.loads(data_str)
|
||
|
||
# 解析content字段为JSON对象
|
||
content_obj = {}
|
||
if data.get('content') and isinstance(data['content'], str):
|
||
try:
|
||
content_obj = json.loads(data['content'])
|
||
logger.info(f"'{title}' - 解析后的content: {list(content_obj.keys())}")
|
||
except json.JSONDecodeError as e:
|
||
logger.error(f"'{title}' - 解析content字段失败: {e}")
|
||
continue
|
||
|
||
# 保存结构化数据 - 修改逻辑:即使API返回的title为空也保存数据
|
||
if content_obj.get('title') or content_obj.get('contents') or content_obj.get('introduction'):
|
||
# 使用API返回的title,如果为空则使用原始输入的title
|
||
final_title = content_obj.get('title') or title
|
||
last_structured = {
|
||
'title': final_title,
|
||
'tags': content_obj.get('tags', ''),
|
||
'introduction': content_obj.get('introduction', ''),
|
||
'conclusion': content_obj.get('conclusion', ''),
|
||
'contents': content_obj.get('contents', []) if isinstance(content_obj.get('contents'), list) else []
|
||
}
|
||
logger.info(f"'{title}' - 保存结构化数据,最终标题: {final_title}")
|
||
logger.info(f"'{title}' - 内容项数量: {len(last_structured['contents'])}")
|
||
|
||
except json.JSONDecodeError as e:
|
||
logger.error(f"'{title}' - 解析消息错误: {e}")
|
||
continue
|
||
|
||
if last_structured:
|
||
success_msg = f"'{title}' - 文章生成成功,包含{len(all_img)}张图片"
|
||
logger.info(success_msg)
|
||
self.log_to_database('INFO', success_msg, json.dumps(last_structured, ensure_ascii=False))
|
||
return last_structured
|
||
else:
|
||
warning_msg = f"'{title}' - 未获取到有效的文章内容"
|
||
logger.warning(warning_msg)
|
||
self.log_to_database('WARNING', warning_msg)
|
||
return None
|
||
|
||
except Exception as e:
|
||
error_msg = f"'{title}' - 解析流式响应异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
return None
|
||
|
||
def convert_structured_to_dynamic(self, structured_data: Dict) -> str:
|
||
"""将结构化数据转换为Dynamic格式,参考JavaScript的convertStructuredToDynamic函数"""
|
||
try:
|
||
title = structured_data.get('title', '')
|
||
introduction = structured_data.get('introduction', '')
|
||
contents = structured_data.get('contents', [])
|
||
conclusion = structured_data.get('conclusion', '')
|
||
tags = structured_data.get('tags', '')
|
||
|
||
logger.info(f"'{title}' - 开始转换Dynamic格式")
|
||
|
||
html_content = ''
|
||
|
||
# 添加title
|
||
if title:
|
||
html_content += f"{title}\n\n"
|
||
|
||
# 添加引子部分
|
||
if introduction and introduction.strip():
|
||
html_content += f"{introduction.strip()}\n\n"
|
||
logger.info(f"'{title}' - 添加引言段落")
|
||
|
||
# 添加内容项
|
||
if contents and isinstance(contents, list):
|
||
for i, content in enumerate(contents):
|
||
if isinstance(content, dict):
|
||
# 修复bug:使用content_item字段而不是content字段,与JavaScript保持一致
|
||
content_text = content.get('content_item') or content.get('content', '')
|
||
# Emoji前缀逻辑处理
|
||
if content_text and content_text.strip():
|
||
# 检查content_text最前面是否有emoji
|
||
emojis_text = emoji.emoji_list(content_text)
|
||
has_emoji_text = emojis_text and emojis_text[0]['match_start'] == 0
|
||
|
||
if not has_emoji_text:
|
||
# content_text没有emoji,检查content_title
|
||
emojis_title = emoji.emoji_list(content_title) if content_title else []
|
||
has_emoji_title = emojis_title and emojis_title[0]['match_start'] == 0
|
||
|
||
if has_emoji_title:
|
||
# content_title有emoji,补充到content_text前缀
|
||
first_emoji = emojis_title[0]['emoji']
|
||
content_text = first_emoji + content_text
|
||
logger.info(f"'{title}' - contents[{i}] 从content_title补充emoji: {first_emoji}")
|
||
else:
|
||
# content_title也没有emoji,按索引循环选择
|
||
emoji_list = ['🔥', '💡', '⭕', '🌟']
|
||
rand_emoji = emoji_list[i % len(emoji_list)]
|
||
content_text = rand_emoji + content_text
|
||
logger.info(f"'{title}' - contents[{i}] content_text: none, content_title: none, rand_emoji: {rand_emoji}")
|
||
if content_text and content_text.strip():
|
||
# 将换行符转换为段落标签
|
||
paragraphs = content_text.split('\n')
|
||
filtered_paragraphs = [p.strip() for p in paragraphs if p.strip()]
|
||
for paragraph in filtered_paragraphs:
|
||
html_content += f"{paragraph}\n\n"
|
||
logger.info(f"'{title}' - 添加内容段落 {i+1},字段: {'content_item' if content.get('content_item') else 'content'}")
|
||
elif isinstance(content, str) and content.strip():
|
||
# 将换行符转换为段落标签
|
||
paragraphs = content.split('\n')
|
||
filtered_paragraphs = [p.strip() for p in paragraphs if p.strip()]
|
||
for paragraph in filtered_paragraphs:
|
||
html_content += f"{paragraph}\n\n"
|
||
logger.info(f"'{title}' - 添加内容段落 {i+1}")
|
||
|
||
# 添加结论部分
|
||
if conclusion and conclusion.strip():
|
||
html_content += f"{conclusion.strip()}\n\n"
|
||
logger.info(f"'{title}' - 添加结论段落")
|
||
|
||
# 添加tags
|
||
if tags:
|
||
#html_content += f"{tags}\n\n"
|
||
logger.info(f"'{title}' - 添加标签")
|
||
|
||
logger.info(f"'{title}' - Dynamic格式转换完成")
|
||
|
||
return html_content
|
||
|
||
except Exception as e:
|
||
error_msg = f"转换HTML格式异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
return ""
|
||
|
||
def generate_article(self, structured_data: Dict, article_id: int, existing_batch_id: int) -> bool:
|
||
"""提交文章到generate_article接口,100%参考JavaScript的sendInfoToBaijiahao函数"""
|
||
try:
|
||
# 增加判断:structured_data['contents']为空,报错
|
||
if not structured_data or not structured_data.get('contents'):
|
||
logger.error(f"[Worker] 生成文章失败: structured_data['contents']为空")
|
||
# 移除直接数据库操作:不再直接更新状态为generate_failed
|
||
# 状态管理交给接口处理
|
||
return False
|
||
|
||
title = structured_data.get('title', 'Unknown')
|
||
logger.info(f"'{title}' - 开始提交文章到generate_article接口")
|
||
self.log_to_database('INFO', f"开始提交文章: {title}", f"article_id: {article_id}")
|
||
|
||
# 确保有JWT token
|
||
if not self.jwt_token:
|
||
logger.warning(f"'{title}' - JWT token缺失,尝试重新登录")
|
||
self.log_to_database('WARNING', f"JWT token缺失,重新登录: {title}")
|
||
if not self.login_and_get_jwt_token():
|
||
error_msg = f"'{title}' - 重新登录失败"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg)
|
||
return False
|
||
|
||
# 如果没有找到现有batch_id,生成新的unique_id
|
||
if not existing_batch_id:
|
||
timestamp = int(time.time())
|
||
random_num = str(int(time.time() * 1000) % 10000).zfill(4)
|
||
existing_batch_id = f"{timestamp}{random_num}"
|
||
logger.warning(f"'{title}' - 生成新的batch_id: {existing_batch_id}")
|
||
logger.error(f"'{title}' - 查询batch_id失败: {e}")
|
||
|
||
# 转换内容为HTML格式
|
||
html_content = self.convert_structured_to_dynamic(structured_data)
|
||
|
||
# 构建发文数据,使用现有的batch_id以触发更新模式
|
||
publish_data = {
|
||
"title": structured_data['title'],
|
||
"content": html_content,
|
||
"tags": structured_data.get('tags', ''),
|
||
"cover_image": structured_data.get('home_img', ''),
|
||
"article_id": article_id,
|
||
"batch_id": existing_batch_id, # 使用现有的batch_id
|
||
"uniq_id": existing_batch_id,
|
||
"source": "coze_auto_generator", # 标识来源
|
||
"username": self.login_credentials['username']
|
||
}
|
||
|
||
logger.info(f"'{title}' - 准备提交的数据: article_id={article_id}, batch_id={existing_batch_id}")
|
||
logger.info(f"'{title}' - 提交数据详情: {json.dumps(publish_data, ensure_ascii=False)[:200]}...")
|
||
|
||
# 发送请求
|
||
upload_url = f"{self.base_url}/api/generate_article"
|
||
headers = {
|
||
'Authorization': f'Bearer {self.jwt_token}',
|
||
'Content-Type': 'application/json',
|
||
'Accept': 'application/json'
|
||
}
|
||
|
||
response = requests.post(
|
||
upload_url,
|
||
json=publish_data,
|
||
headers=headers,
|
||
timeout=60,
|
||
proxies=self.proxies
|
||
)
|
||
|
||
logger.info(f"'{title}' - 提交响应状态码: {response.status_code}")
|
||
|
||
if response.status_code == 200:
|
||
try:
|
||
result = response.json()
|
||
logger.info(f"'{title}' - 提交响应内容: {result}")
|
||
|
||
if result.get('success') or result.get('errno') == 0:
|
||
success_msg = f"'{title}' - 文章提交成功, ID: {existing_batch_id}"
|
||
logger.info(success_msg)
|
||
self.log_to_database('INFO', success_msg, f"article_id: {article_id}, batch_id: {existing_batch_id}")
|
||
return True
|
||
else:
|
||
error_msg = f"'{title}' - 文章提交失败: {result.get('message', result.get('errmsg', '未知错误'))}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}, response: {result}")
|
||
return False
|
||
except json.JSONDecodeError as e:
|
||
error_msg = f"'{title}' - 解析提交响应失败: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
|
||
return False
|
||
elif response.status_code == 401:
|
||
# 处理401错误:JWT token过期,重新登录后重试
|
||
logger.warning(f"'{title}' - JWT token过期(401),尝试重新登录")
|
||
self.log_to_database('WARNING', f"JWT token过期,重新登录: {title}", f"article_id: {article_id}")
|
||
|
||
if self.login_and_get_jwt_token():
|
||
logger.info(f"'{title}' - 重新登录成功,重试提交文章")
|
||
# 更新headers中的token
|
||
headers['Authorization'] = f'Bearer {self.jwt_token}'
|
||
|
||
# 重试请求
|
||
retry_response = requests.post(
|
||
upload_url,
|
||
json=publish_data,
|
||
headers=headers,
|
||
timeout=60,
|
||
proxies=self.proxies
|
||
)
|
||
|
||
logger.info(f"'{title}' - 重试响应状态码: {retry_response.status_code}")
|
||
|
||
if retry_response.status_code == 200:
|
||
try:
|
||
retry_result = retry_response.json()
|
||
logger.info(f"'{title}' - 重试响应内容: {retry_result}")
|
||
|
||
if retry_result.get('success') or retry_result.get('errno') == 0:
|
||
success_msg = f"'{title}' - 重试提交成功, ID: {existing_batch_id}"
|
||
logger.info(success_msg)
|
||
self.log_to_database('INFO', success_msg, f"article_id: {article_id}, batch_id: {existing_batch_id}")
|
||
return True
|
||
else:
|
||
error_msg = f"'{title}' - 重试提交失败: {retry_result.get('message', retry_result.get('errmsg', '未知错误'))}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}, retry_response: {retry_result}")
|
||
return False
|
||
except json.JSONDecodeError as e:
|
||
error_msg = f"'{title}' - 解析重试响应失败: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"retry_response_text: {retry_response.text}")
|
||
return False
|
||
else:
|
||
error_msg = f"'{title}' - 重试请求仍然失败,状态码: {retry_response.status_code}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"retry_response_text: {retry_response.text}")
|
||
return False
|
||
else:
|
||
error_msg = f"'{title}' - 重新登录失败,无法重试"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}")
|
||
return False
|
||
else:
|
||
error_msg = f"'{title}' - 文章提交请求失败,状态码: {response.status_code}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
|
||
return False
|
||
|
||
except requests.exceptions.Timeout:
|
||
error_msg = f"'{title}' - 提交文章请求超时"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}")
|
||
return False
|
||
except requests.exceptions.RequestException as e:
|
||
error_msg = f"'{title}' - 提交文章网络异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
return False
|
||
except Exception as e:
|
||
error_msg = f"'{title}' - 提交文章异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
return False
|
||
|
||
def get_generate_topics(self) -> List[Dict]:
|
||
"""获取状态为topic或failed的待处理数据,支持失败重试"""
|
||
try:
|
||
with self.db_manager.get_cursor() as cursor:
|
||
# 查询topic状态和failed状态的文章(支持失败重试)
|
||
# LEFT JOIN ai_prompt_workflow 表获取 auth_token 和 workflow_id
|
||
sql = """
|
||
SELECT a.id, a.topic, a.batch_id, a.status, a.created_at, a.updated_at,
|
||
p.auth_token, p.workflow_id, p.prompt_workflow_name
|
||
FROM ai_articles a
|
||
LEFT JOIN ai_prompt_workflow p ON a.prompt_workflow_id = p.id
|
||
WHERE a.status IN ('generate', 'generate_failed')
|
||
AND a.topic > '' AND a.prompt_workflow_id > 0
|
||
ORDER BY
|
||
CASE WHEN a.status = 'generate' THEN 1 ELSE 2 END,
|
||
a.id ASC
|
||
LIMIT 1000
|
||
"""
|
||
cursor.execute(sql)
|
||
results = cursor.fetchall()
|
||
|
||
if results:
|
||
logger.info(f"查询到 {len(results)} 个待处理主题")
|
||
for result in results:
|
||
logger.info(f"待处理文章 - ID: {result['id']}, 主题: {result['topic']}, 状态: {result['status']}, auth_token: {result.get('auth_token', 'N/A')}, workflow_id: {result.get('workflow_id', 'N/A')}")
|
||
self.log_to_database('INFO', f"发现待处理文章: {result['topic']}",
|
||
f"ID: {result['id']}, 状态: {result['status']}, auth_token: {result.get('auth_token', 'N/A')}, workflow_id: {result.get('workflow_id', 'N/A')}")
|
||
else:
|
||
logger.info("未查询到待处理主题")
|
||
|
||
return results
|
||
except Exception as e:
|
||
error_msg = f"查询待处理主题异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
return []
|
||
|
||
def get_next_available_topic(self, pending_topics: List[Dict]) -> Optional[Dict]:
|
||
"""线程安全地获取下一个可处理的主题"""
|
||
with self.processing_lock:
|
||
for topic_data in pending_topics:
|
||
article_id = topic_data['id']
|
||
if article_id not in self.processed_ids:
|
||
self.processed_ids.add(article_id)
|
||
return topic_data
|
||
return None
|
||
|
||
def process_single_topic(self, topic_data: Dict) -> bool:
|
||
"""处理单个主题"""
|
||
article_id = topic_data['id']
|
||
topic = topic_data['topic']
|
||
workflow_id = topic_data.get('workflow_id')
|
||
auth_token = topic_data.get('auth_token')
|
||
prompt_workflow_name = topic_data.get('prompt_workflow_name')
|
||
worker_id = threading.current_thread().name
|
||
batch_id = topic_data.get('batch_id')
|
||
|
||
# ====== 新增:查找相似topic ======
|
||
context_injection = "" # 初始化上下文注入内容
|
||
|
||
try:
|
||
logger.info(f"[Worker-{worker_id}] 开始查找相似topic,当前topic: '{topic}'")
|
||
self.log_to_database('INFO', f"开始查找相似topic: {topic}", f"article_id: {article_id}")
|
||
|
||
# 调用向量检索接口
|
||
similar_topics = search_chinese(
|
||
query_text=topic,
|
||
topk=3, # 查询top3,后面会过滤到最多2条
|
||
similarity_threshold=0.5 # 相似度阈值0.5
|
||
)
|
||
|
||
if similar_topics:
|
||
logger.info(f"[Worker-{worker_id}] 找到 {len(similar_topics)} 个相似topic")
|
||
logger.info(f"[Worker-{worker_id}] similar_topics完整返回值: {json.dumps(similar_topics, ensure_ascii=False)}")
|
||
|
||
# 提取article_id列表(注意:返回的字段是id,不是article_id)
|
||
article_ids = []
|
||
for item in similar_topics:
|
||
aid = item.get('id', '')
|
||
if aid:
|
||
try:
|
||
# 将字符串id转换为int
|
||
article_ids.append(int(aid))
|
||
except (ValueError, TypeError):
|
||
logger.warning(f"[Worker-{worker_id}] 无法转换id为整数: {aid}")
|
||
article_ids = [702, 699] #test测试rwl
|
||
if article_ids:
|
||
logger.info(f"[Worker-{worker_id}] 提取到文章ID列表: {article_ids}")
|
||
|
||
# 从数据库查询content
|
||
context_injection = self.get_article_contents_by_ids(article_ids)
|
||
|
||
if context_injection:
|
||
logger.info(f"[Worker-{worker_id}] 获取到上下文注入内容,长度: {len(context_injection)} 字符")
|
||
self.log_to_database(
|
||
'INFO',
|
||
f"获取上下文注入内容: {topic}",
|
||
f"article_id: {article_id}, 相似文章IDs: {article_ids}, 内容长度: {len(context_injection)}"
|
||
)
|
||
else:
|
||
logger.warning(f"[Worker-{worker_id}] 未从article_ids {article_ids} 查询到content")
|
||
self.log_to_database('WARNING', f"未查询到content: {topic}", f"article_ids: {article_ids}")
|
||
else:
|
||
logger.warning(f"[Worker-{worker_id}] 相似topic中没有有效的id")
|
||
|
||
# 打印相似topic详情
|
||
for i, similar in enumerate(similar_topics, 1):
|
||
logger.info(f"[Worker-{worker_id}] 相似topic[{i}]: {similar.get('title', 'N/A')}, 相似度: {similar.get('similar', 0):.4f}, 文章ID: {similar.get('id', 'N/A')}")
|
||
self.log_to_database(
|
||
'INFO',
|
||
f"找到相似topic: {topic}",
|
||
f"article_id: {article_id}, 相似topic数量: {len(similar_topics)}, 详情: {json.dumps(similar_topics, ensure_ascii=False)}"
|
||
)
|
||
else:
|
||
logger.info(f"[Worker-{worker_id}] 未找到相似topic(相似度>0.5)")
|
||
self.log_to_database('INFO', f"未找到相似topic: {topic}", f"article_id: {article_id}")
|
||
except Exception as e:
|
||
error_msg = f"[Worker-{worker_id}] 查找相似topic异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
# 即使查找相似topic失败,也继续处理文章生成
|
||
# ====== 相似topic查找结束 ======
|
||
|
||
try:
|
||
logger.info(f"[Worker-{worker_id}] 开始处理主题 ID:{article_id}, Topic:'{topic}', Prompt={prompt_workflow_name}")
|
||
|
||
# 验证必要的参数
|
||
if not workflow_id or not auth_token:
|
||
error_msg = f"[Worker-{worker_id}] workflow_id 或 auth_token 缺失,Topic:'{topic}'"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}")
|
||
return False
|
||
|
||
# 生成文章 - 开始计时
|
||
start_time = time.time()
|
||
structured_data = self.generate_article_from_coze(topic, context_injection, workflow_id, auth_token)
|
||
end_time = time.time()
|
||
elapsed_time = end_time - start_time
|
||
|
||
logger.info(f"[Worker-{worker_id}] Coze文章生成耗时: {elapsed_time:.2f}秒, Topic:'{topic}'")
|
||
|
||
if not structured_data:
|
||
logger.error(f"[Worker-{worker_id}] 生成文章失败: {topic}")
|
||
# 移除直接数据库操作:不再直接更新状态为generate_failed
|
||
# 状态管理交给接口处理
|
||
return False
|
||
|
||
# 增加判断:structured_data['contents']为空,报错
|
||
if not structured_data.get('contents'):
|
||
logger.error(f"[Worker-{worker_id}] 生成文章失败: {topic} - structured_data['contents']为空")
|
||
# 移除直接数据库操作:不再直接更新状态为generate_failed
|
||
# 状态管理交给接口处理
|
||
return False
|
||
|
||
# 提交文章
|
||
if self.generate_article(structured_data, article_id, batch_id):
|
||
logger.info(f"[Worker-{worker_id}] 文章处理完成: {topic}")
|
||
# Bug修复:正确发文状态应该是pending_review,不是draft
|
||
# 注意:调用接口后不应再直接操作数据库,接口内部会处理状态
|
||
return True
|
||
else:
|
||
logger.error(f"[Worker-{worker_id}] 文章提交失败: {topic}")
|
||
# 移除直接数据库操作:不再直接更新状态为generate_failed
|
||
# 状态管理交给接口处理
|
||
return False
|
||
|
||
except Exception as e:
|
||
logger.error(f"[Worker-{worker_id}] 处理主题异常: {e}")
|
||
# 移除直接数据库操作:不再直接更新状态为generate_failed
|
||
# 状态管理交给接口处理
|
||
return False
|
||
|
||
def worker_process_topics(self, pending_topics: List[Dict], worker_id: int) -> int:
|
||
"""Worker线程处理主题的方法"""
|
||
processed_count = 0
|
||
thread_name = f"Worker-{worker_id}"
|
||
threading.current_thread().name = thread_name
|
||
|
||
logger.info(f"[{thread_name}] 启动,准备处理主题")
|
||
|
||
while True:
|
||
# 线程安全地获取下一个待处理主题
|
||
topic_data = self.get_next_available_topic(pending_topics)
|
||
if not topic_data:
|
||
logger.info(f"[{thread_name}] 没有更多待处理主题,退出")
|
||
break
|
||
|
||
# 处理主题
|
||
if self.process_single_topic(topic_data):
|
||
processed_count += 1
|
||
logger.info(f"[{thread_name}] 成功处理主题: {topic_data['topic']}")
|
||
else:
|
||
logger.error(f"[{thread_name}] 处理主题失败: {topic_data['topic']}")
|
||
|
||
logger.info(f"[{thread_name}] 完成,共处理 {processed_count} 个主题")
|
||
return processed_count
|
||
|
||
def run_monitor(self):
|
||
"""运行监控循环,支持多worker并行处理"""
|
||
logger.info(f"开始监控ai_articles表,使用 {WORKER_COUNT} 个worker并行处理...")
|
||
self.log_to_database('INFO', f'启动文章自动生成监控服务,worker数量: {WORKER_COUNT}', 'run_monitor')
|
||
|
||
while True:
|
||
try:
|
||
# 获取待处理的主题
|
||
pending_topics = self.get_generate_topics()
|
||
|
||
if pending_topics:
|
||
logger.info(f"发现 {len(pending_topics)} 个待处理主题,启动 {WORKER_COUNT} 个worker并行处理")
|
||
self.log_to_database('INFO', f'发现待处理主题,启动并行处理', f'主题数量: {len(pending_topics)}, worker数量: {WORKER_COUNT}')
|
||
|
||
# 清空已处理记录集合
|
||
with self.processing_lock:
|
||
self.processed_ids.clear()
|
||
|
||
# 使用线程池并行处理
|
||
with ThreadPoolExecutor(max_workers=WORKER_COUNT, thread_name_prefix="CozeWorker") as executor:
|
||
# 提交worker任务
|
||
future_to_worker = {}
|
||
for worker_id in range(1, WORKER_COUNT + 1):
|
||
future = executor.submit(self.worker_process_topics, pending_topics, worker_id)
|
||
future_to_worker[future] = worker_id
|
||
|
||
# 等待所有worker完成
|
||
total_processed = 0
|
||
for future in as_completed(future_to_worker):
|
||
worker_id = future_to_worker[future]
|
||
try:
|
||
processed_count = future.result()
|
||
total_processed += processed_count
|
||
logger.info(f"Worker-{worker_id} 完成,处理了 {processed_count} 个主题")
|
||
except Exception as e:
|
||
logger.error(f"Worker-{worker_id} 执行异常: {e}")
|
||
self.log_to_database('ERROR', f'Worker-{worker_id} 执行异常', str(e))
|
||
|
||
logger.info(f"本轮并行处理完成,共处理 {total_processed} 个主题")
|
||
self.log_to_database('INFO', f'本轮并行处理完成', f'共处理 {total_processed} 个主题')
|
||
|
||
# 处理完一轮后稍作休息
|
||
time.sleep(5)
|
||
else:
|
||
logger.info("暂无待处理主题,继续监控...")
|
||
|
||
# 每秒检查一次
|
||
time.sleep(SLEEP_INTERVAL)
|
||
|
||
except KeyboardInterrupt:
|
||
logger.info("收到中断信号,停止监控")
|
||
self.log_to_database('INFO', '监控服务手动停止', 'KeyboardInterrupt')
|
||
break
|
||
except Exception as e:
|
||
error_msg = f"监控循环异常: {e}"
|
||
logger.error(error_msg)
|
||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||
time.sleep(5) # 异常时等待5秒再继续
|
||
|
||
def main():
|
||
"""主函数"""
|
||
generator = CozeArticleGenerator()
|
||
|
||
try:
|
||
# 先登录获取JWT token
|
||
logger.info("开始登录获取JWT token")
|
||
if not generator.login_and_get_jwt_token():
|
||
logger.error("登录失败,程序退出")
|
||
return
|
||
|
||
# 开始监控
|
||
generator.run_monitor()
|
||
|
||
except Exception as e:
|
||
logger.error(f"程序运行异常: {e}")
|
||
generator.log_to_database('ERROR', f'程序运行异常: {e}', traceback.format_exc())
|
||
|
||
if __name__ == "__main__":
|
||
main()
|