This commit is contained in:
“shengyudong”
2026-01-06 14:18:39 +08:00
commit 5a384b694e
10345 changed files with 2050918 additions and 0 deletions

View File

@@ -0,0 +1,813 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
文章管理接口
"""
from flask import Blueprint, request, jsonify
import logging
import time
import random
from datetime import datetime
from auth_utils import require_auth, AuthUtils
from database_config import get_db_manager, format_datetime_fields
from log_utils import log_create, log_update, log_delete, log_error, log_operation
logger = logging.getLogger('article_server')
# 创建蓝图
article_bp = Blueprint('article', __name__, url_prefix='/api/articles')
@article_bp.route('/list', methods=['GET'])
@require_auth
def get_articles_list():
"""获取文章列表(聚合图片和标签)"""
client_ip = request.environ.get('HTTP_X_FORWARDED_FOR', request.environ.get('REMOTE_ADDR', '未知'))
logger.info(f"[获取文章列表] 开始处理请求, IP: {client_ip}")
try:
current_user = AuthUtils.get_current_user()
enterprise_id = current_user.get('enterprise_id')
logger.info(f"[获取文章列表] 用户信息 - 用户ID: {current_user.get('user_id')}, 企业ID: {enterprise_id}, IP: {client_ip}")
if not enterprise_id:
logger.warning(f"[获取文章列表] 无法获取企业ID, IP: {client_ip}")
return jsonify({
'code': 400,
'message': '无法获取企业ID',
'data': None
}), 400
# 获取查询参数
page = int(request.args.get('page', 1))
page_size = int(request.args.get('pageSize', 20))
keyword = request.args.get('keyword', '').strip()
product_id = request.args.get('product_id', '').strip()
status = request.args.get('status', '').strip()
logger.info(f"[获取文章列表] 查询参数: page={page}, pageSize={page_size}, keyword={keyword}, product_id={product_id}, status={status}, 企业ID: {enterprise_id}, IP: {client_ip}")
# 构建查询条件
where_conditions = ["a.enterprise_id = %s"]
params = [enterprise_id]
if keyword:
where_conditions.append("(a.title LIKE %s OR a.content LIKE %s OR a.topic LIKE %s)")
keyword_pattern = f"%{keyword}%"
params.extend([keyword_pattern, keyword_pattern, keyword_pattern])
if product_id:
where_conditions.append("a.product_id = %s")
params.append(product_id)
if status:
where_conditions.append("a.status = %s")
params.append(status)
where_clause = " AND ".join(where_conditions)
# 计算偏移量
offset = (page - 1) * page_size
db_manager = get_db_manager()
# 查询总数
count_sql = f"""
SELECT COUNT(*) as total
FROM ai_articles a
WHERE {where_clause}
"""
count_result = db_manager.execute_query(count_sql, params)
total = count_result[0]['total']
# ✅ 查询文章列表(聚合图片和标签)
sql = f"""
SELECT a.id, a.batch_id, a.enterprise_id, a.product_id, a.topic_type_id,
a.prompt_workflow_id, a.topic, a.title, a.content, a.department,
a.departmentids, a.author_id, a.author_name, a.department_id, a.department_name,
a.created_user_id, a.review_user_id, a.publish_user_id, a.status, a.channel,
a.review_comment, a.publish_time, a.publish_link, a.baijiahao_id, a.baijiahao_status,
a.word_count, a.image_count, a.coze_tag, a.created_at, a.updated_at,
p.name as product_name,
pw.prompt_workflow_name as prompt_name
FROM ai_articles a
LEFT JOIN ai_products p ON a.product_id = p.id
LEFT JOIN ai_prompt_workflow pw ON a.prompt_workflow_id = pw.id
WHERE {where_clause}
ORDER BY a.created_at DESC
LIMIT %s OFFSET %s
"""
params.extend([page_size, offset])
articles = db_manager.execute_query(sql, params)
# ✅ 聚合每篇文章的图片和标签
for article in articles:
article_id = article['id']
# 查询文章图片
images_sql = """
SELECT id, image_id, image_url, image_thumb_url, image_tag_id,
sort_order, keywords_id, keywords_name, department_id,
department_name, image_source, created_at
FROM ai_article_images
WHERE article_id = %s
ORDER BY sort_order ASC, created_at ASC
"""
article['images'] = db_manager.execute_query(images_sql, (article_id,))
# 查询文章标签
tags_sql = """
SELECT id, coze_tag, created_at
FROM ai_article_tags
WHERE article_id = %s
"""
tags_result = db_manager.execute_query(tags_sql, (article_id,))
article['tags'] = tags_result[0] if tags_result else None
# 格式化日期时间字段
articles = format_datetime_fields(articles)
logger.info(f"[获取文章列表] 查询成功, 总数: {total}, 当前页: {page}, 每页: {page_size}, 返回数量: {len(articles)}, 企业ID: {enterprise_id}, IP: {client_ip}")
return jsonify({
'code': 200,
'message': 'success',
'data': {
'total': total,
'list': articles
},
'timestamp': int(datetime.now().timestamp() * 1000)
})
except Exception as e:
logger.error(f"[获取文章列表] 处理请求时发生错误: {str(e)}", exc_info=True)
return jsonify({
'code': 500,
'message': '服务器内部错误',
'data': None
}), 500
@article_bp.route('/generate', methods=['POST'])
@require_auth
def generate_article():
"""生成文案"""
client_ip = request.environ.get('HTTP_X_FORWARDED_FOR', request.environ.get('REMOTE_ADDR', '未知'))
logger.info(f"[生成文案] 开始处理生成文案请求, IP: {client_ip}")
try:
current_user = AuthUtils.get_current_user()
enterprise_id = current_user.get('enterprise_id')
logger.info(f"[生成文案] 用户信息 - 用户ID: {current_user.get('user_id')}, 企业ID: {enterprise_id}, IP: {client_ip}")
if not enterprise_id:
logger.warning(f"[生成文案] 无法获取企业ID, IP: {client_ip}")
return jsonify({
'code': 400,
'message': '无法获取企业ID',
'data': None
}), 400
data = request.get_json()
if not data:
logger.warning(f"[生成文案] 请求参数为空, 企业ID: {enterprise_id}, IP: {client_ip}")
return jsonify({
'code': 400,
'message': '请求参数错误',
'data': None
}), 400
logger.info(f"[生成文案] 收到生成请求, 产品ID: {data.get('product_id')}, 提示词ID: {data.get('prompt_workflow_id')}, 主题数: {len(data.get('topics', []))}, 企业ID: {enterprise_id}, IP: {client_ip}")
# 验证必需字段
required_fields = ['product_id', 'prompt_workflow_id', 'topics']
for field in required_fields:
if not data.get(field):
logger.warning(f"[生成文案] 缺少必需字段: {field}, 企业ID: {enterprise_id}, IP: {client_ip}")
return jsonify({
'code': 400,
'message': f'缺少必需字段: {field}',
'data': None
}), 400
db_manager = get_db_manager()
# 验证产品是否存在
logger.info(f"[生成文案] 验证产品是否存在, 产品ID: {data['product_id']}, 企业ID: {enterprise_id}")
check_product_sql = "SELECT id, name FROM ai_products WHERE id = %s AND enterprise_id = %s"
product = db_manager.execute_query(check_product_sql, (data['product_id'], enterprise_id))
if not product:
logger.warning(f"[生成文案] 产品不存在, 产品ID: {data['product_id']}, 企业ID: {enterprise_id}, IP: {client_ip}")
return jsonify({
'code': 404,
'message': '产品不存在',
'data': None
}), 404
logger.info(f"[生成文案] 产品验证成功, 产品名称: {product[0]['name']}, ID: {data['product_id']}")
# 验证提示词是否存在
logger.info(f"[生成文案] 验证提示词是否存在, 提示词ID: {data['prompt_workflow_id']}, 企业ID: {enterprise_id}")
check_prompt_sql = "SELECT id FROM ai_prompt_workflow WHERE id = %s AND enterprise_id = %s"
prompt = db_manager.execute_query(check_prompt_sql, (data['prompt_workflow_id'], enterprise_id))
if not prompt:
logger.warning(f"[生成文案] 提示词不存在, 提示词ID: {data['prompt_workflow_id']}, 企业ID: {enterprise_id}, IP: {client_ip}")
return jsonify({
'code': 404,
'message': '提示词不存在',
'data': None
}), 404
logger.info(f"[生成文案] 提示词验证成功, ID: {data['prompt_workflow_id']}")
count = data.get('count', 1)
topics = data['topics'][:count] if len(data['topics']) >= count else data['topics']
logger.info(f"[生成文案] 开始生成文案, 主题数量: {len(topics)}, 产品: {product[0]['name']}, 企业ID: {enterprise_id}")
generated_articles = []
# 生成batch_id时间戳 + 6位随机数
timestamp = int(time.time())
random_num = random.randint(100000, 999999)
batch_id = f"{timestamp}{random_num}"
logger.info(f"[批量生成文章] 生成batch_id: {batch_id}, 待处理数据行数: {len(topics)}")
for topic in topics:
logger.info(f"[生成文案] 开始生成主题文案: {topic}, 产品: {product[0]['name']}")
# TODO: 这里应该调用AI接口生成文案内容
# 目前使用模拟数据
title = f"{topic}"
#content = f"这是一篇关于{topic}的精彩内容..."
# 插入文案记录content字段先为空等待后续脚本填充
sql = """
INSERT INTO ai_articles
(enterprise_id, product_id, prompt_workflow_id, title, topic, content, status, batch_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
"""
article_id = db_manager.execute_insert(sql, (
enterprise_id,
data['product_id'],
data['prompt_workflow_id'],
title,
topic,
'', # ✅ content字段先设为空等待后续脚本填充
'generate',
batch_id
))
logger.info(f"[生成文案] 文案生成成功, 文案ID: {article_id}, 主题: {topic}, 标题: {title}")
generated_articles.append({
'id': article_id,
'title': title,
'topic': topic
})
# 更新产品和企业文案总数
update_product_sql = "UPDATE ai_products SET articles_total = articles_total + %s WHERE id = %s"
db_manager.execute_update(update_product_sql, (len(generated_articles), data['product_id']))
update_enterprise_sql = "UPDATE ai_enterprises SET articles_total = articles_total + %s WHERE id = %s"
db_manager.execute_update(update_enterprise_sql, (len(generated_articles), enterprise_id))
# 更新提示词使用次数
update_prompt_sql = "UPDATE ai_prompt_workflow SET usage_count = usage_count + %s WHERE id = %s"
db_manager.execute_update(update_prompt_sql, (len(generated_articles), data['prompt_workflow_id']))
logger.info(f"生成文案成功: {len(generated_articles)}")
return jsonify({
'code': 200,
'message': '生成成功',
'data': {
'generated': len(generated_articles),
'articles': generated_articles
},
'timestamp': int(datetime.now().timestamp() * 1000)
})
except Exception as e:
logger.error(f"[生成文案] 处理请求时发生错误: {str(e)}", exc_info=True)
return jsonify({
'code': 500,
'message': '服务器内部错误',
'data': None
}), 500
@article_bp.route('/create_Discard', methods=['POST'])
@require_auth
def create_article():
"""创建文章"""
client_ip = request.environ.get('HTTP_X_FORWARDED_FOR', request.environ.get('REMOTE_ADDR', '未知'))
logger.info(f"[创建文章] 开始处理请求, IP: {client_ip}")
try:
current_user = AuthUtils.get_current_user()
enterprise_id = current_user.get('enterprise_id')
user_id = current_user.get('user_id', 0)
if not enterprise_id:
return jsonify({
'code': 400,
'message': '无法获取企业ID',
'data': None
}), 400
data = request.get_json()
if not data:
return jsonify({
'code': 400,
'message': '请求参数错误',
'data': None
}), 400
# 验证必需字段
required_fields = ['title', 'content']
for field in required_fields:
if not data.get(field):
return jsonify({
'code': 400,
'message': f'缺少必需字段: {field}',
'data': None
}), 400
db_manager = get_db_manager()
# ✅ 插入文章主表
article_sql = """
INSERT INTO ai_articles
(enterprise_id, product_id, topic_type_id, prompt_workflow_id, topic, title, content,
department, departmentids, author_id, author_name, department_id, department_name,
created_user_id, status, channel, word_count, image_count, batch_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
article_id = db_manager.execute_insert(article_sql, (
enterprise_id,
data.get('product_id', 0),
data.get('topic_type_id', 0),
data.get('prompt_workflow_id', 0),
data.get('topic', ''),
data['title'],
data['content'],
data.get('department', ''),
data.get('departmentids', ''),
data.get('author_id'),
data.get('author_name'),
data.get('department_id'),
data.get('department_name'),
user_id,
data.get('status', 'draft'),
data.get('channel', 1),
data.get('word_count', len(data['content'])),
data.get('image_count', 0),
data.get('batch_id', 0)
))
# ✅ 插入文章图片
if data.get('images'):
for img in data['images']:
image_sql = """
INSERT INTO ai_article_images
(enterprise_id, article_id, image_id, image_url, image_thumb_url,
image_tag_id, sort_order, keywords_id, keywords_name,
department_id, department_name, image_source)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
db_manager.execute_insert(image_sql, (
enterprise_id,
article_id,
img.get('image_id', 0),
img.get('image_url', ''),
img.get('image_thumb_url', ''),
img.get('image_tag_id', 0),
img.get('sort_order', 0),
img.get('keywords_id', 0),
img.get('keywords_name', ''),
img.get('department_id', 0),
img.get('department_name', ''),
img.get('image_source', 0)
))
# ✅ 插入文章标签
if data.get('coze_tag'):
tag_sql = """
INSERT INTO ai_article_tags (enterprise_id, article_id, coze_tag)
VALUES (%s, %s, %s)
"""
db_manager.execute_insert(tag_sql, (enterprise_id, article_id, data['coze_tag']))
logger.info(f"[创建文章] 创建成功, 文章ID: {article_id}, 企业ID: {enterprise_id}")
return jsonify({
'code': 200,
'message': '创建成功',
'data': {
'id': article_id,
'title': data['title']
},
'timestamp': int(datetime.now().timestamp() * 1000)
})
except Exception as e:
logger.error(f"[创建文章] 处理请求时发生错误: {str(e)}", exc_info=True)
return jsonify({
'code': 500,
'message': '服务器内部错误',
'data': None
}), 500
@article_bp.route('/<int:article_id>', methods=['PUT'])
@require_auth
def update_article(article_id):
"""更新文章"""
client_ip = request.environ.get('HTTP_X_FORWARDED_FOR', request.environ.get('REMOTE_ADDR', '未知'))
logger.info(f"[更新文章] 开始处理请求, 文章ID: {article_id}, IP: {client_ip}")
try:
current_user = AuthUtils.get_current_user()
enterprise_id = current_user.get('enterprise_id')
if not enterprise_id:
return jsonify({
'code': 400,
'message': '无法获取企业ID',
'data': None
}), 400
data = request.get_json()
if not data:
return jsonify({
'code': 400,
'message': '请求参数错误',
'data': None
}), 400
db_manager = get_db_manager()
# 检查文章是否存在且属于当前企业
check_sql = "SELECT id, status FROM ai_articles WHERE id = %s AND enterprise_id = %s"
existing = db_manager.execute_query(check_sql, (article_id, enterprise_id))
if not existing:
return jsonify({
'code': 404,
'message': '文章不存在',
'data': None
}), 404
old_status = existing[0]['status']
# ✅ 构建更新字段
update_fields = []
params = []
field_mapping = {
'product_id': 'product_id',
'topic_type_id': 'topic_type_id',
'prompt_workflow_id': 'prompt_workflow_id',
'topic': 'topic',
'title': 'title',
'content': 'content',
'department': 'department',
'departmentids': 'departmentids',
'author_id': 'author_id',
'author_name': 'author_name',
'department_id': 'department_id',
'department_name': 'department_name',
'status': 'status',
'channel': 'channel',
'review_comment': 'review_comment',
'publish_time': 'publish_time',
'publish_link': 'publish_link',
'baijiahao_id': 'baijiahao_id',
'baijiahao_status': 'baijiahao_status',
'word_count': 'word_count',
'image_count': 'image_count',
'batch_id': 'batch_id'
}
for field, db_field in field_mapping.items():
if field in data:
update_fields.append(f"{db_field} = %s")
params.append(data[field])
if update_fields:
params.append(article_id)
sql = f"UPDATE ai_articles SET {', '.join(update_fields)}, updated_at = NOW() WHERE id = %s"
db_manager.execute_update(sql, params)
# ✅ 更新文章图片(先删除后插入)
if 'images' in data:
# 删除旧图片
db_manager.execute_update("DELETE FROM ai_article_images WHERE article_id = %s", (article_id,))
# 插入新图片
if data['images']:
for img in data['images']:
image_sql = """
INSERT INTO ai_article_images
(enterprise_id, article_id, image_id, image_url, image_thumb_url,
image_tag_id, sort_order, keywords_id, keywords_name,
department_id, department_name, image_source)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
db_manager.execute_insert(image_sql, (
enterprise_id,
article_id,
img.get('image_id', 0),
img.get('image_url', ''),
img.get('image_thumb_url', ''),
img.get('image_tag_id', 0),
img.get('sort_order', 0),
img.get('keywords_id', 0),
img.get('keywords_name', ''),
img.get('department_id', 0),
img.get('department_name', ''),
img.get('image_source', 0)
))
# ✅ 更新文章标签
if 'coze_tag' in data:
# 检查是否已存在标签
tag_check = db_manager.execute_query(
"SELECT id FROM ai_article_tags WHERE article_id = %s",
(article_id,)
)
if tag_check:
# 更新标签
db_manager.execute_update(
"UPDATE ai_article_tags SET coze_tag = %s WHERE article_id = %s",
(data['coze_tag'], article_id)
)
else:
# 插入标签
db_manager.execute_insert(
"INSERT INTO ai_article_tags (enterprise_id, article_id, coze_tag) VALUES (%s, %s, %s)",
(enterprise_id, article_id, data['coze_tag'])
)
# ✅ 如果状态发生变化,记录到发布记录表
new_status = data.get('status')
if new_status and new_status != old_status:
# 查询文章信息
article_info = db_manager.execute_query(
"SELECT product_id, topic, title FROM ai_articles WHERE id = %s",
(article_id,)
)
if article_info:
record_sql = """
INSERT INTO ai_article_published_records
(article_id, enterprise_id, product_id, topic, title, created_user_id,
status, channel, word_count, image_count, publish_time, publish_link)
SELECT id, enterprise_id, product_id, topic, title, created_user_id,
status, channel, word_count, image_count, publish_time, publish_link
FROM ai_articles
WHERE id = %s
"""
db_manager.execute_insert(record_sql, (article_id,))
logger.info(f"[更新文章] 更新成功, 文章ID: {article_id}, 企业ID: {enterprise_id}")
return jsonify({
'code': 200,
'message': '更新成功',
'data': None,
'timestamp': int(datetime.now().timestamp() * 1000)
})
except Exception as e:
logger.error(f"[更新文章] 处理请求时发生错误: {str(e)}", exc_info=True)
return jsonify({
'code': 500,
'message': '服务器内部错误',
'data': None
}), 500
@article_bp.route('/<int:article_id>', methods=['GET'])
@require_auth
def get_article_detail(article_id):
"""获取文章详情(包含图片、标签、发布记录)"""
try:
current_user = AuthUtils.get_current_user()
enterprise_id = current_user.get('enterprise_id')
if not enterprise_id:
return jsonify({
'code': 400,
'message': '无法获取企业ID',
'data': None
}), 400
db_manager = get_db_manager()
# ✅ 查询文章详情(包含所有字段)
sql = """
SELECT a.id, a.batch_id, a.enterprise_id, a.product_id, a.topic_type_id,
a.prompt_workflow_id, a.topic, a.title, a.content, a.department,
a.departmentids, a.author_id, a.author_name, a.department_id, a.department_name,
a.created_user_id, a.review_user_id, a.publish_user_id, a.status, a.channel,
a.review_comment, a.publish_time, a.publish_link, a.baijiahao_id, a.baijiahao_status,
a.word_count, a.image_count, a.coze_tag, a.created_at, a.updated_at,
p.name as product_name,
pw.prompt_workflow_name as prompt_name
FROM ai_articles a
LEFT JOIN ai_products p ON a.product_id = p.id
LEFT JOIN ai_prompt_workflow pw ON a.prompt_workflow_id = pw.id
WHERE a.id = %s AND a.enterprise_id = %s
"""
result = db_manager.execute_query(sql, (article_id, enterprise_id))
if not result:
return jsonify({
'code': 404,
'message': '文章不存在',
'data': None
}), 404
article = result[0]
# ✅ 查询文章图片
images_sql = """
SELECT id, image_id, image_url, image_thumb_url, image_tag_id,
sort_order, keywords_id, keywords_name, department_id,
department_name, image_source, created_at
FROM ai_article_images
WHERE article_id = %s
ORDER BY sort_order ASC, created_at ASC
"""
article['images'] = db_manager.execute_query(images_sql, (article_id,))
# ✅ 查询文章标签
tags_sql = """
SELECT id, coze_tag, created_at
FROM ai_article_tags
WHERE article_id = %s
"""
tags_result = db_manager.execute_query(tags_sql, (article_id,))
article['tags'] = tags_result[0] if tags_result else None
# ✅ 查询文章发布记录
records_sql = """
SELECT id, status, created_user_id, review_user_id, publish_user_id,
review_comment, publish_time, publish_link, word_count, image_count, created_at
FROM ai_article_published_records
WHERE article_id = %s
ORDER BY created_at DESC
"""
article['publish_records'] = db_manager.execute_query(records_sql, (article_id,))
# 格式化日期字段
article = format_datetime_fields([article])[0]
logger.info(f"获取文章详情成功: ID {article_id}")
return jsonify({
'code': 200,
'message': 'success',
'data': article,
'timestamp': int(datetime.now().timestamp() * 1000)
})
except Exception as e:
logger.error(f"[获取文章详情] 处理请求时发生错误: {str(e)}", exc_info=True)
return jsonify({
'code': 500,
'message': '服务器内部错误',
'data': None
}), 500
@article_bp.route('/<int:article_id>', methods=['DELETE'])
@require_auth
def delete_article(article_id):
"""删除文章(级联删除图片和标签)"""
try:
current_user = AuthUtils.get_current_user()
enterprise_id = current_user.get('enterprise_id')
if not enterprise_id:
return jsonify({
'code': 400,
'message': '无法获取企业ID',
'data': None
}), 400
db_manager = get_db_manager()
# 检查文章是否存在且属于当前企业
check_sql = "SELECT id, title FROM ai_articles WHERE id = %s AND enterprise_id = %s"
existing = db_manager.execute_query(check_sql, (article_id, enterprise_id))
if not existing:
return jsonify({
'code': 404,
'message': '文章不存在',
'data': None
}), 404
# ✅ 删除文章图片
db_manager.execute_update("DELETE FROM ai_article_images WHERE article_id = %s", (article_id,))
# ✅ 删除文章标签
db_manager.execute_update("DELETE FROM ai_article_tags WHERE article_id = %s", (article_id,))
# ✅ 删除文章主表
sql = "DELETE FROM ai_articles WHERE id = %s"
db_manager.execute_update(sql, (article_id,))
logger.info(f"删除文章成功: ID {article_id}")
return jsonify({
'code': 200,
'message': '删除成功',
'data': None,
'timestamp': int(datetime.now().timestamp() * 1000)
})
except Exception as e:
logger.error(f"[删除文章] 处理请求时发生错误: {str(e)}", exc_info=True)
return jsonify({
'code': 500,
'message': '服务器内部错误',
'data': None
}), 500
@article_bp.route('/list_dashboard', methods=['GET'])
@require_auth
def get_articles_dashboard():
"""获取文章仪表盘统计"""
client_ip = request.environ.get('HTTP_X_FORWARDED_FOR', request.environ.get('REMOTE_ADDR', '未知'))
logger.info(f"[文章仪表盘] 开始处理请求, IP: {client_ip}")
try:
current_user = AuthUtils.get_current_user()
enterprise_id = current_user.get('enterprise_id')
if not enterprise_id:
logger.warning(f"[文章仪表盘] 无法获取企业ID, IP: {client_ip}")
return jsonify({
'code': 400,
'message': '无法获取企业ID',
'data': None
}), 400
db_manager = get_db_manager()
# ✅ 1. 文章总数status != 'draft'
articles_total_sql = """
SELECT COUNT(id) as total
FROM ai_articles
WHERE enterprise_id = %s
"""
articles_total_result = db_manager.execute_query(articles_total_sql, (enterprise_id,))
articles_total = articles_total_result[0]['total'] if articles_total_result else 0
# ✅ 2. 能发的文章status = 'published_review'
articles_available_sql = """
SELECT COUNT(id) as total
FROM ai_articles
WHERE enterprise_id = %s AND status = 'published_review'
"""
articles_available_result = db_manager.execute_query(articles_available_sql, (enterprise_id,))
articles_available = articles_available_result[0]['total'] if articles_available_result else 0
# ✅ 3. 发布成功status = 'published'
articles_published_sql = """
SELECT COUNT(id) as total
FROM ai_articles
WHERE enterprise_id = %s AND status = 'published'
"""
articles_published_result = db_manager.execute_query(articles_published_sql, (enterprise_id,))
articles_published = articles_published_result[0]['total'] if articles_published_result else 0
stats = {
'articles_total': articles_total,
'articles_available': articles_available,
'articles_published': articles_published
}
logger.info(f"[文章仪表盘] 查询成功, 企业ID: {enterprise_id}, 总数: {articles_total}, 可发: {articles_available}, 已发: {articles_published}, IP: {client_ip}")
return jsonify({
'code': 200,
'message': 'success',
'data': stats,
'timestamp': int(datetime.now().timestamp() * 1000)
})
except Exception as e:
logger.error(f"[文章仪表盘] 处理请求时发生错误: {str(e)}", exc_info=True)
return jsonify({
'code': 500,
'message': '服务器内部错误',
'data': None
}), 500

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,906 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AI文章自动生成监控脚本
监控数据库中status为topic的记录自动调用Coze API生成文章并提交
"""
import os
import sys
import time
import json
import logging
import requests
import pymysql
import random
from datetime import datetime
from typing import Dict, List, Optional, Any
import traceback
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from queue import Queue, Empty
import emoji
# 添加项目根目录到Python路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database_config import get_db_manager
from log_config import setup_logger
from dashvector_get_similar_topic import search_chinese, init_dashvector_client
# 配置日志记录器,支持按日期切割和控制台输出
logger = setup_logger(
name='generate_Atlas_Qianwen',
log_file='logs/generate_Atlas_Qianwen_article.log',
error_log_file='logs/generate_Atlas_Qianwen_error.log',
level=logging.INFO,
console_output=True
)
# 配置常量
#BASE_URL = "http://47.99.184.230:8321"
BASE_URL = "http://127.0.0.1:8215"
COZE_API_URL = "https://api.coze.cn/v1/workflow/stream_run"
SLEEP_INTERVAL = 5 # 监控间隔(秒)
WORKER_COUNT = 6 # 并行处理worker数量可配置
# 全局变量
AUTH_TOKEN = None
WORKFLOW_ID = None
JWT_TOKEN = None
class CozeArticleGenerator:
def __init__(self):
# API配置
self.base_url = BASE_URL
self.coze_api_url = COZE_API_URL
# 认证信息
self.jwt_token = None
# 使用统一的数据库管理器
self.db_manager = get_db_manager()
# 登录配置
self.login_credentials = {
'username': 'user010',
'password': '@5^2W6R7'
}
# 禁用代理
self.proxies = {
'http': None,
'https': None
}
# 并行处理相关
self.processing_lock = threading.Lock() # 用于线程安全的记录分配
self.processed_ids = set() # 已处理的记录ID集合
# 初始化DashVector客户端向量检索
logger.info("开始初始化DashVector客户端")
if init_dashvector_client():
logger.info("DashVector客户端初始化成功")
else:
logger.warning("DashVector客户端初始化失败相似topic检索功能将不可用")
logger.info("CozeArticleGenerator 初始化完成")
def log_to_database(self, level: str, message: str, details: str = None):
"""获取数据库连接"""
try:
return self.db_manager.get_connection()
except Exception as e:
logger.error(f"数据库连接失败: {e}")
return None
def get_article_contents_by_ids(self, article_ids: List[int]) -> str:
"""
根据article_id列表从数据库获取content内容
Args:
article_ids: 文章ID列表
Returns:
str: 合并后的content内容多条用\n\n分隔
"""
if not article_ids:
logger.info("没有文章ID返回空字符串")
return ""
try:
# 去重并限制最多2条
article_ids = list(set(article_ids))[:2]
logger.info(f"开始查询文章contentarticle_ids: {article_ids}")
with self.db_manager.get_cursor() as cursor:
# 构建IN查询
placeholders = ','.join(['%s'] * len(article_ids))
sql = f"""
SELECT id, content, created_at
FROM ai_articles
WHERE id IN ({placeholders})
AND content IS NOT NULL AND content != ''
ORDER BY created_at DESC
LIMIT 2
"""
cursor.execute(sql, article_ids)
results = cursor.fetchall()
if not results:
logger.warning(f"未查询到文章contentarticle_ids: {article_ids}")
return ""
logger.info(f"查询到 {len(results)} 条文章content")
# 合并content
contents = []
for row in results:
content = row.get('content', '').strip()
if content:
contents.append(content)
logger.info(f"添加文章contentID: {row.get('id')}, 长度: {len(content)} 字符")
# 用两个换行符分隔
merged_content = "\n\n".join(contents)
logger.info(f"合并后的content总长度: {len(merged_content)} 字符")
return merged_content
except Exception as e:
error_msg = f"查询文章content异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return ""
def log_to_database(self, level: str, message: str, details: str = None):
"""记录日志到数据库ai_logs表"""
try:
with self.db_manager.get_cursor() as cursor:
# 映射日志级别到数据库状态
status_map = {
'INFO': 'success',
'WARNING': 'warning',
'ERROR': 'error'
}
status = status_map.get(level, 'success')
sql = """
INSERT INTO ai_logs (user_id, action, description, status, error_message, created_at)
VALUES (%s, %s, %s, %s, %s, NOW())
"""
cursor.execute(sql, (None, 'coze_generator', message, status, details))
logger.info(f"日志已记录到数据库: {level} - {message}")
except Exception as e:
logger.error(f"记录日志到数据库失败: {e}")
def login_and_get_jwt_token(self) -> bool:
"""登录获取JWT token参考JavaScript逻辑"""
try:
login_url = f"{self.base_url}/api/auth/login"
login_data = {
"username": "13621242430", # 使用用户指定的账号
"password": "admin123"
}
logger.info(f"尝试登录: {login_data['username']}")
self.log_to_database('INFO', f"尝试登录用户: {login_data['username']}")
response = requests.post(
login_url,
json=login_data,
headers={'Content-Type': 'application/json'},
proxies=self.proxies # 禁用代理
)
if response.status_code == 200:
result = response.json()
if result.get('code') == 200:
self.jwt_token = result['data']['token']
logger.info("JWT token获取成功")
self.log_to_database('INFO', "JWT token获取成功", json.dumps(result['data']))
return True
else:
error_msg = f"登录失败: {result.get('message', '未知错误')}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, json.dumps(result))
return False
else:
error_msg = f"登录请求失败: {response.status_code}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, response.text)
return False
except Exception as e:
error_msg = f"登录异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return False
def generate_article_from_coze(self, title: str, context_injection: str, workflow_id: str, auth_token: str) -> Optional[Dict]:
"""调用Coze API生成文章100%参考JavaScript流式处理逻辑"""
try:
logger.info(f"开始为主题'{title}'生成文章...")
logger.info(f"上下文注入内容长度: {len(context_injection)} 字符")
self.log_to_database('INFO', f"开始为主题生成文章: {title}", f"context_injection长度: {len(context_injection)}")
# 验证传入的认证信息
if not auth_token or not workflow_id:
error_msg = f"'{title}' - workflow_id 或 auth_token 参数缺失"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg)
return None
# 构建请求数据增加context_injection参数
request_data = {
'workflow_id': workflow_id,
'parameters': {
'title': title,
'context_injection': context_injection # 新增上下文注入参数
}
}
logger.info(f"提交coze工作流数据详情: {json.dumps(request_data['parameters'], ensure_ascii=False)[:200]}...")
# 发送流式请求
headers = {
'Authorization': f'Bearer {auth_token}',
'Content-Type': 'application/json'
}
logger.info(f"'{title}' - 发送Coze API请求...")
response = requests.post(
COZE_API_URL,
json=request_data,
headers=headers,
stream=True,
timeout=300 # 5分钟超时
)
logger.info(f"'{title}' - Coze API响应状态码: {response.status_code}")
if not response.ok:
error_msg = f"'{title}' - Coze API请求失败状态码: {response.status_code}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, response.text)
return None
# 调用流式响应解析方法
return self.parse_stream_response(response, title)
except Exception as e:
error_msg = f"生成文章异常: {e}, 主题: {title}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return None
def parse_stream_response(self, response, title: str) -> Optional[Dict[str, Any]]:
"""解析流式响应100%参考JavaScript事件处理逻辑"""
try:
buffer = ''
last_structured = None
all_img = []
logger.info(f"'{title}' - 开始接收流式数据...")
# 设置响应编码为UTF-8
response.encoding = 'utf-8'
for chunk in response.iter_content(chunk_size=1024, decode_unicode=True):
if chunk:
buffer += chunk
events = buffer.split('\n\n')
buffer = events.pop() or ''
for event_str in events:
if not event_str.strip():
continue
lines = event_str.split('\n')
event_type = ''
data_str = ''
# 解析事件类型和数据完全按照JavaScript逻辑
for line in lines:
if line.startswith('event:'):
event_type = line[6:].strip()
elif line.startswith('data:'):
data_str = line[5:].strip()
logger.info(f"'{title}' - 收到事件: {event_type}")
self.log_to_database('INFO', f"收到Coze事件: {event_type}", f"主题: {title}")
# 处理错误事件
if event_type == 'Error':
logger.error(f"'{title}' - Coze API返回错误: {data_str}")
self.log_to_database('ERROR', f"Coze API返回错误: {title}", data_str)
try:
err_data = json.loads(data_str)
error_detail = f"错误代码: {err_data.get('error_code', '未知错误')}, 错误信息: {err_data.get('error_message', '无详细信息')}"
logger.error(f"'{title}' - {error_detail}")
self.log_to_database('ERROR', f"Coze API错误详情: {title}", error_detail)
except json.JSONDecodeError:
logger.error(f"'{title}' - 无法解析错误数据")
self.log_to_database('ERROR', f"无法解析Coze错误数据: {title}", data_str)
return None
# 跳过PING和End事件
if event_type in ['PING', 'End']:
continue
# 处理Message事件
if event_type == 'Message':
try:
logger.info(f"'{title}' - 收到Message事件数据: {data_str[:200]}...")
data = json.loads(data_str)
# 解析content字段为JSON对象
content_obj = {}
if data.get('content') and isinstance(data['content'], str):
try:
content_obj = json.loads(data['content'])
logger.info(f"'{title}' - 解析后的content: {list(content_obj.keys())}")
except json.JSONDecodeError as e:
logger.error(f"'{title}' - 解析content字段失败: {e}")
continue
# 保存结构化数据 - 修改逻辑即使API返回的title为空也保存数据
if content_obj.get('title') or content_obj.get('contents') or content_obj.get('introduction'):
# 使用API返回的title如果为空则使用原始输入的title
final_title = content_obj.get('title') or title
last_structured = {
'title': final_title,
'tags': content_obj.get('tags', ''),
'introduction': content_obj.get('introduction', ''),
'conclusion': content_obj.get('conclusion', ''),
'contents': content_obj.get('contents', []) if isinstance(content_obj.get('contents'), list) else []
}
logger.info(f"'{title}' - 保存结构化数据,最终标题: {final_title}")
logger.info(f"'{title}' - 内容项数量: {len(last_structured['contents'])}")
except json.JSONDecodeError as e:
logger.error(f"'{title}' - 解析消息错误: {e}")
continue
if last_structured:
success_msg = f"'{title}' - 文章生成成功,包含{len(all_img)}张图片"
logger.info(success_msg)
self.log_to_database('INFO', success_msg, json.dumps(last_structured, ensure_ascii=False))
return last_structured
else:
warning_msg = f"'{title}' - 未获取到有效的文章内容"
logger.warning(warning_msg)
self.log_to_database('WARNING', warning_msg)
return None
except Exception as e:
error_msg = f"'{title}' - 解析流式响应异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return None
def convert_structured_to_dynamic(self, structured_data: Dict) -> str:
"""将结构化数据转换为Dynamic格式参考JavaScript的convertStructuredToDynamic函数"""
try:
title = structured_data.get('title', '')
introduction = structured_data.get('introduction', '')
contents = structured_data.get('contents', [])
conclusion = structured_data.get('conclusion', '')
tags = structured_data.get('tags', '')
logger.info(f"'{title}' - 开始转换Dynamic格式")
html_content = ''
# 添加title
if title:
html_content += f"{title}\n\n"
# 添加引子部分
if introduction and introduction.strip():
html_content += f"{introduction.strip()}\n\n"
logger.info(f"'{title}' - 添加引言段落")
# 添加内容项
if contents and isinstance(contents, list):
for i, content in enumerate(contents):
if isinstance(content, dict):
# 修复bug使用content_item字段而不是content字段与JavaScript保持一致
content_text = content.get('content_item') or content.get('content', '')
# Emoji前缀逻辑处理
if content_text and content_text.strip():
# 检查content_text最前面是否有emoji
emojis_text = emoji.emoji_list(content_text)
has_emoji_text = emojis_text and emojis_text[0]['match_start'] == 0
if not has_emoji_text:
# content_text没有emoji检查content_title
emojis_title = emoji.emoji_list(content_title) if content_title else []
has_emoji_title = emojis_title and emojis_title[0]['match_start'] == 0
if has_emoji_title:
# content_title有emoji补充到content_text前缀
first_emoji = emojis_title[0]['emoji']
content_text = first_emoji + content_text
logger.info(f"'{title}' - contents[{i}] 从content_title补充emoji: {first_emoji}")
else:
# content_title也没有emoji按索引循环选择
emoji_list = ['🔥', '💡', '', '🌟']
rand_emoji = emoji_list[i % len(emoji_list)]
content_text = rand_emoji + content_text
logger.info(f"'{title}' - contents[{i}] content_text: none, content_title: none, rand_emoji: {rand_emoji}")
if content_text and content_text.strip():
# 将换行符转换为段落标签
paragraphs = content_text.split('\n')
filtered_paragraphs = [p.strip() for p in paragraphs if p.strip()]
for paragraph in filtered_paragraphs:
html_content += f"{paragraph}\n\n"
logger.info(f"'{title}' - 添加内容段落 {i+1},字段: {'content_item' if content.get('content_item') else 'content'}")
elif isinstance(content, str) and content.strip():
# 将换行符转换为段落标签
paragraphs = content.split('\n')
filtered_paragraphs = [p.strip() for p in paragraphs if p.strip()]
for paragraph in filtered_paragraphs:
html_content += f"{paragraph}\n\n"
logger.info(f"'{title}' - 添加内容段落 {i+1}")
# 添加结论部分
if conclusion and conclusion.strip():
html_content += f"{conclusion.strip()}\n\n"
logger.info(f"'{title}' - 添加结论段落")
# 添加tags
if tags:
#html_content += f"{tags}\n\n"
logger.info(f"'{title}' - 添加标签")
logger.info(f"'{title}' - Dynamic格式转换完成")
return html_content
except Exception as e:
error_msg = f"转换HTML格式异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return ""
def generate_article(self, structured_data: Dict, article_id: int, existing_batch_id: int) -> bool:
"""提交文章到generate_article接口100%参考JavaScript的sendInfoToBaijiahao函数"""
try:
# 增加判断structured_data['contents']为空,报错
if not structured_data or not structured_data.get('contents'):
logger.error(f"[Worker] 生成文章失败: structured_data['contents']为空")
# 移除直接数据库操作不再直接更新状态为generate_failed
# 状态管理交给接口处理
return False
title = structured_data.get('title', 'Unknown')
logger.info(f"'{title}' - 开始提交文章到generate_article接口")
self.log_to_database('INFO', f"开始提交文章: {title}", f"article_id: {article_id}")
# 确保有JWT token
if not self.jwt_token:
logger.warning(f"'{title}' - JWT token缺失尝试重新登录")
self.log_to_database('WARNING', f"JWT token缺失重新登录: {title}")
if not self.login_and_get_jwt_token():
error_msg = f"'{title}' - 重新登录失败"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg)
return False
# 如果没有找到现有batch_id生成新的unique_id
if not existing_batch_id:
timestamp = int(time.time())
random_num = str(int(time.time() * 1000) % 10000).zfill(4)
existing_batch_id = f"{timestamp}{random_num}"
logger.warning(f"'{title}' - 生成新的batch_id: {existing_batch_id}")
logger.error(f"'{title}' - 查询batch_id失败: {e}")
# 转换内容为HTML格式
html_content = self.convert_structured_to_dynamic(structured_data)
# 构建发文数据使用现有的batch_id以触发更新模式
publish_data = {
"title": structured_data['title'],
"content": html_content,
"tags": structured_data.get('tags', ''),
"cover_image": structured_data.get('home_img', ''),
"article_id": article_id,
"batch_id": existing_batch_id, # 使用现有的batch_id
"uniq_id": existing_batch_id,
"source": "coze_auto_generator", # 标识来源
"username": self.login_credentials['username']
}
logger.info(f"'{title}' - 准备提交的数据: article_id={article_id}, batch_id={existing_batch_id}")
logger.info(f"'{title}' - 提交数据详情: {json.dumps(publish_data, ensure_ascii=False)[:200]}...")
# 发送请求
upload_url = f"{self.base_url}/api/generate_article"
headers = {
'Authorization': f'Bearer {self.jwt_token}',
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.post(
upload_url,
json=publish_data,
headers=headers,
timeout=60,
proxies=self.proxies
)
logger.info(f"'{title}' - 提交响应状态码: {response.status_code}")
if response.status_code == 200:
try:
result = response.json()
logger.info(f"'{title}' - 提交响应内容: {result}")
if result.get('success') or result.get('errno') == 0:
success_msg = f"'{title}' - 文章提交成功, ID: {existing_batch_id}"
logger.info(success_msg)
self.log_to_database('INFO', success_msg, f"article_id: {article_id}, batch_id: {existing_batch_id}")
return True
else:
error_msg = f"'{title}' - 文章提交失败: {result.get('message', result.get('errmsg', '未知错误'))}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}, response: {result}")
return False
except json.JSONDecodeError as e:
error_msg = f"'{title}' - 解析提交响应失败: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
return False
elif response.status_code == 401:
# 处理401错误JWT token过期重新登录后重试
logger.warning(f"'{title}' - JWT token过期(401),尝试重新登录")
self.log_to_database('WARNING', f"JWT token过期重新登录: {title}", f"article_id: {article_id}")
if self.login_and_get_jwt_token():
logger.info(f"'{title}' - 重新登录成功,重试提交文章")
# 更新headers中的token
headers['Authorization'] = f'Bearer {self.jwt_token}'
# 重试请求
retry_response = requests.post(
upload_url,
json=publish_data,
headers=headers,
timeout=60,
proxies=self.proxies
)
logger.info(f"'{title}' - 重试响应状态码: {retry_response.status_code}")
if retry_response.status_code == 200:
try:
retry_result = retry_response.json()
logger.info(f"'{title}' - 重试响应内容: {retry_result}")
if retry_result.get('success') or retry_result.get('errno') == 0:
success_msg = f"'{title}' - 重试提交成功, ID: {existing_batch_id}"
logger.info(success_msg)
self.log_to_database('INFO', success_msg, f"article_id: {article_id}, batch_id: {existing_batch_id}")
return True
else:
error_msg = f"'{title}' - 重试提交失败: {retry_result.get('message', retry_result.get('errmsg', '未知错误'))}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}, retry_response: {retry_result}")
return False
except json.JSONDecodeError as e:
error_msg = f"'{title}' - 解析重试响应失败: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"retry_response_text: {retry_response.text}")
return False
else:
error_msg = f"'{title}' - 重试请求仍然失败,状态码: {retry_response.status_code}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"retry_response_text: {retry_response.text}")
return False
else:
error_msg = f"'{title}' - 重新登录失败,无法重试"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}")
return False
else:
error_msg = f"'{title}' - 文章提交请求失败,状态码: {response.status_code}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
return False
except requests.exceptions.Timeout:
error_msg = f"'{title}' - 提交文章请求超时"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}")
return False
except requests.exceptions.RequestException as e:
error_msg = f"'{title}' - 提交文章网络异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return False
except Exception as e:
error_msg = f"'{title}' - 提交文章异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return False
def get_generate_topics(self) -> List[Dict]:
"""获取状态为topic或failed的待处理数据支持失败重试"""
try:
with self.db_manager.get_cursor() as cursor:
# 查询topic状态和failed状态的文章支持失败重试
# LEFT JOIN ai_prompt_workflow 表获取 auth_token 和 workflow_id
sql = """
SELECT a.id, a.topic, a.batch_id, a.status, a.created_at, a.updated_at,
p.auth_token, p.workflow_id, p.prompt_workflow_name
FROM ai_articles a
LEFT JOIN ai_prompt_workflow p ON a.prompt_workflow_id = p.id
WHERE a.status IN ('generate', 'generate_failed')
AND a.topic > '' AND a.prompt_workflow_id = 28
ORDER BY
CASE WHEN a.status = 'generate' THEN 1 ELSE 2 END,
a.id ASC
LIMIT 1000
"""
cursor.execute(sql)
results = cursor.fetchall()
if results:
logger.info(f"查询到 {len(results)} 个待处理主题")
for result in results:
logger.info(f"待处理文章 - ID: {result['id']}, 主题: {result['topic']}, 状态: {result['status']}, auth_token: {result.get('auth_token', 'N/A')}, workflow_id: {result.get('workflow_id', 'N/A')}")
self.log_to_database('INFO', f"发现待处理文章: {result['topic']}",
f"ID: {result['id']}, 状态: {result['status']}, auth_token: {result.get('auth_token', 'N/A')}, workflow_id: {result.get('workflow_id', 'N/A')}")
else:
logger.info("未查询到待处理主题")
return results
except Exception as e:
error_msg = f"查询待处理主题异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return []
def get_next_available_topic(self, pending_topics: List[Dict]) -> Optional[Dict]:
"""线程安全地获取下一个可处理的主题"""
with self.processing_lock:
for topic_data in pending_topics:
article_id = topic_data['id']
if article_id not in self.processed_ids:
self.processed_ids.add(article_id)
return topic_data
return None
def process_single_topic(self, topic_data: Dict) -> bool:
"""处理单个主题"""
article_id = topic_data['id']
topic = topic_data['topic']
workflow_id = topic_data.get('workflow_id')
auth_token = topic_data.get('auth_token')
prompt_workflow_name = topic_data.get('prompt_workflow_name')
worker_id = threading.current_thread().name
batch_id = topic_data.get('batch_id')
# ====== 新增查找相似topic ======
context_injection = "" # 初始化上下文注入内容
try:
logger.info(f"[Worker-{worker_id}] 开始查找相似topic当前topic: '{topic}'")
self.log_to_database('INFO', f"开始查找相似topic: {topic}", f"article_id: {article_id}")
# 调用向量检索接口
similar_topics = search_chinese(
query_text=topic,
topk=3, # 查询top3后面会过滤到最多2条
similarity_threshold=0.5 # 相似度阈值0.5
)
if similar_topics:
logger.info(f"[Worker-{worker_id}] 找到 {len(similar_topics)} 个相似topic")
logger.info(f"[Worker-{worker_id}] similar_topics完整返回值: {json.dumps(similar_topics, ensure_ascii=False)}")
# 提取article_id列表注意返回的字段是id不是article_id
article_ids = []
for item in similar_topics:
aid = item.get('id', '')
if aid:
try:
# 将字符串id转换为int
article_ids.append(int(aid))
except (ValueError, TypeError):
logger.warning(f"[Worker-{worker_id}] 无法转换id为整数: {aid}")
article_ids = [702, 699] #test测试rwl
if article_ids:
logger.info(f"[Worker-{worker_id}] 提取到文章ID列表: {article_ids}")
# 从数据库查询content
context_injection = self.get_article_contents_by_ids(article_ids)
if context_injection:
logger.info(f"[Worker-{worker_id}] 获取到上下文注入内容,长度: {len(context_injection)} 字符")
self.log_to_database(
'INFO',
f"获取上下文注入内容: {topic}",
f"article_id: {article_id}, 相似文章IDs: {article_ids}, 内容长度: {len(context_injection)}"
)
else:
logger.warning(f"[Worker-{worker_id}] 未从article_ids {article_ids} 查询到content")
self.log_to_database('WARNING', f"未查询到content: {topic}", f"article_ids: {article_ids}")
else:
logger.warning(f"[Worker-{worker_id}] 相似topic中没有有效的id")
# 打印相似topic详情
for i, similar in enumerate(similar_topics, 1):
logger.info(f"[Worker-{worker_id}] 相似topic[{i}]: {similar.get('title', 'N/A')}, 相似度: {similar.get('similar', 0):.4f}, 文章ID: {similar.get('id', 'N/A')}")
self.log_to_database(
'INFO',
f"找到相似topic: {topic}",
f"article_id: {article_id}, 相似topic数量: {len(similar_topics)}, 详情: {json.dumps(similar_topics, ensure_ascii=False)}"
)
else:
logger.info(f"[Worker-{worker_id}] 未找到相似topic相似度>0.5")
self.log_to_database('INFO', f"未找到相似topic: {topic}", f"article_id: {article_id}")
except Exception as e:
error_msg = f"[Worker-{worker_id}] 查找相似topic异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
# 即使查找相似topic失败也继续处理文章生成
# ====== 相似topic查找结束 ======
try:
logger.info(f"[Worker-{worker_id}] 开始处理主题 ID:{article_id}, Topic:'{topic}', Prompt={prompt_workflow_name}")
# 验证必要的参数
if not workflow_id or not auth_token:
error_msg = f"[Worker-{worker_id}] workflow_id 或 auth_token 缺失Topic:'{topic}'"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_id: {article_id}")
return False
# 生成文章 - 开始计时
start_time = time.time()
structured_data = self.generate_article_from_coze(topic, context_injection, workflow_id, auth_token)
end_time = time.time()
elapsed_time = end_time - start_time
logger.info(f"[Worker-{worker_id}] Coze文章生成耗时: {elapsed_time:.2f}秒, Topic:'{topic}'")
if not structured_data:
logger.error(f"[Worker-{worker_id}] 生成文章失败: {topic}")
# 移除直接数据库操作不再直接更新状态为generate_failed
# 状态管理交给接口处理
return False
# 增加判断structured_data['contents']为空,报错
if not structured_data.get('contents'):
logger.error(f"[Worker-{worker_id}] 生成文章失败: {topic} - structured_data['contents']为空")
# 移除直接数据库操作不再直接更新状态为generate_failed
# 状态管理交给接口处理
return False
# 提交文章
if self.generate_article(structured_data, article_id, batch_id):
logger.info(f"[Worker-{worker_id}] 文章处理完成: {topic}")
# Bug修复正确发文状态应该是pending_review不是draft
# 注意:调用接口后不应再直接操作数据库,接口内部会处理状态
return True
else:
logger.error(f"[Worker-{worker_id}] 文章提交失败: {topic}")
# 移除直接数据库操作不再直接更新状态为generate_failed
# 状态管理交给接口处理
return False
except Exception as e:
logger.error(f"[Worker-{worker_id}] 处理主题异常: {e}")
# 移除直接数据库操作不再直接更新状态为generate_failed
# 状态管理交给接口处理
return False
def worker_process_topics(self, pending_topics: List[Dict], worker_id: int) -> int:
"""Worker线程处理主题的方法"""
processed_count = 0
thread_name = f"Worker-{worker_id}"
threading.current_thread().name = thread_name
logger.info(f"[{thread_name}] 启动,准备处理主题")
while True:
# 线程安全地获取下一个待处理主题
topic_data = self.get_next_available_topic(pending_topics)
if not topic_data:
logger.info(f"[{thread_name}] 没有更多待处理主题,退出")
break
# 处理主题
if self.process_single_topic(topic_data):
processed_count += 1
logger.info(f"[{thread_name}] 成功处理主题: {topic_data['topic']}")
else:
logger.error(f"[{thread_name}] 处理主题失败: {topic_data['topic']}")
logger.info(f"[{thread_name}] 完成,共处理 {processed_count} 个主题")
return processed_count
def run_monitor(self):
"""运行监控循环支持多worker并行处理"""
logger.info(f"开始监控ai_articles表使用 {WORKER_COUNT} 个worker并行处理...")
self.log_to_database('INFO', f'启动文章自动生成监控服务worker数量: {WORKER_COUNT}', 'run_monitor')
while True:
try:
# 获取待处理的主题
pending_topics = self.get_generate_topics()
if pending_topics:
logger.info(f"发现 {len(pending_topics)} 个待处理主题,启动 {WORKER_COUNT} 个worker并行处理")
self.log_to_database('INFO', f'发现待处理主题,启动并行处理', f'主题数量: {len(pending_topics)}, worker数量: {WORKER_COUNT}')
# 清空已处理记录集合
with self.processing_lock:
self.processed_ids.clear()
# 使用线程池并行处理
with ThreadPoolExecutor(max_workers=WORKER_COUNT, thread_name_prefix="CozeWorker") as executor:
# 提交worker任务
future_to_worker = {}
for worker_id in range(1, WORKER_COUNT + 1):
future = executor.submit(self.worker_process_topics, pending_topics, worker_id)
future_to_worker[future] = worker_id
# 等待所有worker完成
total_processed = 0
for future in as_completed(future_to_worker):
worker_id = future_to_worker[future]
try:
processed_count = future.result()
total_processed += processed_count
logger.info(f"Worker-{worker_id} 完成,处理了 {processed_count} 个主题")
except Exception as e:
logger.error(f"Worker-{worker_id} 执行异常: {e}")
self.log_to_database('ERROR', f'Worker-{worker_id} 执行异常', str(e))
logger.info(f"本轮并行处理完成,共处理 {total_processed} 个主题")
self.log_to_database('INFO', f'本轮并行处理完成', f'共处理 {total_processed} 个主题')
# 处理完一轮后稍作休息
time.sleep(5)
else:
logger.info("暂无待处理主题,继续监控...")
# 每秒检查一次
time.sleep(SLEEP_INTERVAL)
except KeyboardInterrupt:
logger.info("收到中断信号,停止监控")
self.log_to_database('INFO', '监控服务手动停止', 'KeyboardInterrupt')
break
except Exception as e:
error_msg = f"监控循环异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
time.sleep(5) # 异常时等待5秒再继续
def main():
"""主函数"""
generator = CozeArticleGenerator()
try:
# 先登录获取JWT token
logger.info("开始登录获取JWT token")
if not generator.login_and_get_jwt_token():
logger.error("登录失败,程序退出")
return
# 开始监控
generator.run_monitor()
except Exception as e:
logger.error(f"程序运行异常: {e}")
generator.log_to_database('ERROR', f'程序运行异常: {e}', traceback.format_exc())
if __name__ == "__main__":
main()