初始提交:文字匹配图片项目
55
.gitignore
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Virtual Environment
|
||||
venv/
|
||||
ENV/
|
||||
env/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
*.log
|
||||
|
||||
# Database
|
||||
*.db
|
||||
*.sqlite3
|
||||
|
||||
# CSV files
|
||||
*.csv
|
||||
|
||||
# Temporary files
|
||||
temp_*
|
||||
*.tmp
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Project specific
|
||||
article_image_match_results.csv
|
||||
32
11111111.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from PIL import Image
|
||||
from google.genai.client import HttpOptions
|
||||
client = genai.Client(http_options=HttpOptions(base_url="https://work.poloapi.com"),api_key="sk-V4tPnDgzFPa7nxWrvKnNJsW8ZcBXXPuGmjfgvPVRnwpHoeob")
|
||||
|
||||
prompt = ("Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme")
|
||||
response = client.models.generate_content(
|
||||
model="gemini-3-pro-image-preview",
|
||||
contents=[prompt],
|
||||
)
|
||||
|
||||
# 检查是否有候选答案
|
||||
if not response.candidates:
|
||||
print("API未返回任何候选答案")
|
||||
else:
|
||||
candidate = response.candidates[0]
|
||||
if not candidate.content:
|
||||
print("API返回的候选答案中没有内容")
|
||||
elif not hasattr(candidate.content, 'parts') or not candidate.content.parts:
|
||||
print("API返回的候选答案内容中没有parts")
|
||||
else:
|
||||
for part in candidate.content.parts:
|
||||
if hasattr(part, 'text') and part.text is not None:
|
||||
print(part.text)
|
||||
elif hasattr(part, 'inline_data') and part.inline_data is not None:
|
||||
image_data = part.inline_data
|
||||
if image_data.data is not None:
|
||||
# 保存图片数据到文件
|
||||
with open('generated_image.png', 'wb') as f:
|
||||
f.write(image_data.data)
|
||||
print("图片生成成功: generated_image.png")
|
||||
BIN
20260108/1767867138994556.png
Normal file
|
After Width: | Height: | Size: 644 KiB |
BIN
20260108/1767867138994556_thumb.png
Normal file
|
After Width: | Height: | Size: 32 KiB |
BIN
20260108/1767867148035776.png
Normal file
|
After Width: | Height: | Size: 621 KiB |
BIN
20260108/1767867148035776_thumb.png
Normal file
|
After Width: | Height: | Size: 30 KiB |
BIN
20260108/1767867156936619.png
Normal file
|
After Width: | Height: | Size: 658 KiB |
BIN
20260108/1767867156936619_thumb.png
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
20260108/1767867165665952.png
Normal file
|
After Width: | Height: | Size: 688 KiB |
BIN
20260108/1767867165665952_thumb.png
Normal file
|
After Width: | Height: | Size: 34 KiB |
61
create_test_csv.py
Normal file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
创建测试CSV文件用于验证图片文章挂靠效果
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
|
||||
|
||||
def create_test_articles_csv():
|
||||
"""创建测试文章CSV文件"""
|
||||
# 创建测试文章数据,与20260108文件夹中的图片主题相关
|
||||
articles_data = [
|
||||
{'ID': 1, '标题': '美丽的自然风景欣赏', '内容': '自然界的风景总是让人感到心旷神怡。无论是山川河流还是森林草原,大自然的美景总能带给我们视觉上的享受和心灵上的宁静。', '标签': json.dumps(['自然', '风景', '美丽'], ensure_ascii=False)},
|
||||
{'ID': 2, '标题': '户外活动的乐趣', '内容': '走出室内,亲近大自然是一种极好的放松方式。户外活动不仅能锻炼身体,还能让我们欣赏到美丽的自然风光。', '标签': json.dumps(['户外', '活动', '自然'], ensure_ascii=False)},
|
||||
{'ID': 3, '标题': '摄影艺术中的自然之美', '内容': '摄影师们常常将镜头对准大自然的美景,捕捉那些令人惊叹的瞬间。每一张风景照片都是对自然之美的独特诠释。', '标签': json.dumps(['摄影', '自然', '艺术'], ensure_ascii=False)},
|
||||
{'ID': 4, '标题': '风景旅游推荐指南', '内容': '想要寻找美丽的风景胜地吗?这里有几处绝佳的风景旅游目的地,每一处都有其独特的魅力和美景等待你的探索。', '标签': json.dumps(['旅游', '风景', '推荐'], ensure_ascii=False)},
|
||||
{'ID': 5, '标题': '数字图像处理技术', '内容': '现代数字图像处理技术使得图片的缩放、裁剪和优化变得更加容易。无论是原图还是缩略图,都能在保持质量的同时方便使用。', '标签': json.dumps(['图像处理', '技术', '缩略图'], ensure_ascii=False)}
|
||||
]
|
||||
|
||||
# 写入CSV文件
|
||||
with open('test_articles.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
|
||||
fieldnames = ['ID', '标题', '内容', '标签']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
writer.writeheader()
|
||||
for row in articles_data:
|
||||
writer.writerow(row)
|
||||
|
||||
print('测试文章CSV文件已创建: test_articles.csv')
|
||||
|
||||
|
||||
def create_test_images_csv():
|
||||
"""创建测试图片CSV文件"""
|
||||
# 创建测试图片数据,使用20260108文件夹中的图片
|
||||
images_data = [
|
||||
{'ID': 1, '图像ID': 'IMG001', '图像名称': '风景图1', '图像URL': '20260108/1767867138994556.png', '标签名称': '风景,自然,美丽', '关键词名称': 'Landscape,Nature,Beauty', '部门名称': '生活部', '附加文章数量': 2},
|
||||
{'ID': 2, '图像ID': 'IMG002', '图像名称': '风景图2', '图像URL': '20260108/1767867148035776.png', '标签名称': '自然,风光,户外', '关键词名称': 'Nature,Landscape,Outdoor', '部门名称': '生活部', '附加文章数量': 1},
|
||||
{'ID': 3, '图像ID': 'IMG003', '图像名称': '风景图3', '图像URL': '20260108/1767867156936619.png', '标签名称': '景色,自然,美丽', '关键词名称': 'Scenery,Nature,Beautiful', '部门名称': '生活部', '附加文章数量': 3},
|
||||
{'ID': 4, '图像ID': 'IMG004', '图像名称': '风景图4', '图像URL': '20260108/1767867165665952.png', '标签名称': '自然风光,户外,美景', '关键词名称': 'Natural Scenery,Outdoor,Beautiful View', '部门名称': '生活部', '附加文章数量': 0},
|
||||
{'ID': 5, '图像ID': 'IMG005', '图像名称': '缩略图1', '图像URL': '20260108/1767867138994556_thumb.png', '标签名称': '缩略图,小图,预览', '关键词名称': 'Thumbnail,Small Image,Preview', '部门名称': '技术部', '附加文章数量': 4},
|
||||
{'ID': 6, '图像ID': 'IMG006', '图像名称': '缩略图2', '图像URL': '20260108/1767867148035776_thumb.png', '标签名称': '缩略图,预览,小尺寸', '关键词名称': 'Thumbnail,Preview,Small Size', '部门名称': '技术部', '附加文章数量': 1}
|
||||
]
|
||||
|
||||
# 写入CSV文件
|
||||
with open('test_images.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
|
||||
fieldnames = ['ID', '图像ID', '图像名称', '图像URL', '标签名称', '关键词名称', '部门名称', '附加文章数量']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
writer.writeheader()
|
||||
for row in images_data:
|
||||
writer.writerow(row)
|
||||
|
||||
print('测试图片CSV文件已创建: test_images.csv')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_test_articles_csv()
|
||||
create_test_images_csv()
|
||||
print('\n两个测试CSV文件已创建完成,可用于测试图片文章挂靠效果。')
|
||||
69
create_test_excel.py
Normal file
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
创建测试Excel文件用于验证图片文章挂靠效果
|
||||
"""
|
||||
|
||||
from openpyxl import Workbook
|
||||
import json
|
||||
|
||||
|
||||
def create_test_articles_excel():
|
||||
"""创建测试文章Excel文件"""
|
||||
# 创建测试文章数据
|
||||
articles_data = [
|
||||
['ID', '标题', '内容', '标签'], # 表头
|
||||
[1, '人工智能发展趋势', '人工智能技术正在快速发展,在各个领域都有广泛应用。机器学习、深度学习等技术不断突破,推动着社会进步。', json.dumps(['人工智能', '科技', '趋势'], ensure_ascii=False)],
|
||||
[2, '健康饮食的重要性', '合理膳食是保持身体健康的基础。均衡摄入各种营养素,有助于提高免疫力,预防疾病。', json.dumps(['健康', '饮食', '营养'], ensure_ascii=False)],
|
||||
[3, '环境保护与可持续发展', '环境保护是当今世界面临的重要挑战。通过可持续发展策略,我们可以平衡经济发展与生态保护。', json.dumps(['环保', '可持续发展', '生态'], ensure_ascii=False)],
|
||||
[4, '数字化转型对企业的影响', '数字化转型正在重塑企业运营模式。通过引入新技术,企业可以提升效率,优化客户体验。', json.dumps(['数字化', '企业', '转型'], ensure_ascii=False)],
|
||||
[5, '教育创新的未来方向', '教育创新是培养未来人才的关键。利用新技术手段,可以创造更加个性化和高效的学习环境。', json.dumps(['教育', '创新', '学习'], ensure_ascii=False)]
|
||||
]
|
||||
|
||||
# 创建Excel工作簿
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
if ws:
|
||||
ws.title = '测试文章数据'
|
||||
|
||||
# 将数据添加到工作表
|
||||
for row_data in articles_data:
|
||||
ws.append(row_data)
|
||||
|
||||
# 保存Excel文件
|
||||
wb.save('test_articles.xlsx')
|
||||
print('测试文章Excel文件已创建: test_articles.xlsx')
|
||||
|
||||
|
||||
def create_test_images_excel():
|
||||
"""创建测试图片Excel文件"""
|
||||
# 创建测试图片数据
|
||||
images_data = [
|
||||
['ID', '图像ID', '图像名称', '图像URL', '标签名称', '关键词名称', '部门名称', '附加文章数量'], # 表头
|
||||
[1, 'IMG001', 'AI概念图', 'https://example.com/images/ai_concept.jpg', '人工智能,科技,趋势', 'AI,Machine Learning,Deep Learning', '科技部', 2],
|
||||
[2, 'IMG002', '健康饮食图', 'https://example.com/images/healthy_food.jpg', '健康,饮食,营养', 'Nutrition,Health,Diet', '生活部', 1],
|
||||
[3, 'IMG003', '环保地球图', 'https://example.com/images/environment.jpg', '环保,可持续发展,生态', 'Environment,Sustainability,Eco', '环保部', 3],
|
||||
[4, 'IMG004', '数字化办公图', 'https://example.com/images/digital_office.jpg', '数字化,企业,转型', 'Digital,Enterprise,Transformation', '科技部', 0],
|
||||
[5, 'IMG005', '教育创新图', 'https://example.com/images/education_innovation.jpg', '教育,创新,学习', 'Education,Innovation,Learning', '教育部', 4],
|
||||
[6, 'IMG006', '网络安全图', 'https://example.com/images/cyber_security.jpg', '安全,网络,防护', 'Security,Cyber,Protection', '安全部', 1]
|
||||
]
|
||||
|
||||
# 创建Excel工作簿
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
if ws:
|
||||
ws.title = '测试图片数据'
|
||||
|
||||
# 将数据添加到工作表
|
||||
for row_data in images_data:
|
||||
ws.append(row_data)
|
||||
|
||||
# 保存Excel文件
|
||||
wb.save('test_images.xlsx')
|
||||
print('测试图片Excel文件已创建: test_images.xlsx')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_test_articles_excel()
|
||||
create_test_images_excel()
|
||||
print('\n两个测试Excel文件已创建完成,可用于测试图片文章挂靠效果。')
|
||||
160
database_config.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
数据库配置管理模块
|
||||
统一管理数据库连接和SQL操作
|
||||
"""
|
||||
import pymysql
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 数据库配置
|
||||
DB_CONFIG = {
|
||||
'host': '8.149.233.36',
|
||||
'user': 'ai_article_read',
|
||||
'password': '7aK_H2yvokVumr84lLNDt8fDBp6P',
|
||||
'database': 'ai_article',
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
|
||||
class DatabaseManager:
|
||||
"""数据库管理器:统一管理数据库连接和操作"""
|
||||
|
||||
def __init__(self, config=None):
|
||||
"""初始化数据库管理器
|
||||
|
||||
Args:
|
||||
config: 数据库配置字典,默认使用 DB_CONFIG
|
||||
"""
|
||||
self.config = config or DB_CONFIG
|
||||
|
||||
def get_connection(self, autocommit=False):
|
||||
"""获取数据库连接
|
||||
|
||||
Args:
|
||||
autocommit: 是否启用自动提交模式
|
||||
|
||||
Returns:
|
||||
pymysql连接对象
|
||||
"""
|
||||
return pymysql.connect(**self.config, autocommit=autocommit)
|
||||
|
||||
def execute_query(self, sql, params=None, fetch_one=False):
|
||||
"""执行查询SQL(SELECT)
|
||||
|
||||
Args:
|
||||
sql: SQL语句
|
||||
params: SQL参数(tuple或list)
|
||||
fetch_one: True返回单条记录,False返回所有记录
|
||||
|
||||
Returns:
|
||||
查询结果
|
||||
"""
|
||||
conn = None
|
||||
cursor = None
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
logger.info(f'[SQL] {sql.strip()} | params: {params}')
|
||||
cursor.execute(sql, params or ())
|
||||
|
||||
if fetch_one:
|
||||
result = cursor.fetchone()
|
||||
else:
|
||||
result = cursor.fetchall()
|
||||
|
||||
logger.debug(f'[SQL结果] 返回 {len(result) if not fetch_one and result else (1 if result else 0)} 条记录')
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f'执行查询失败:{e}', exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
def execute_update(self, sql, params=None, autocommit=True):
|
||||
"""执行更新SQL(INSERT/UPDATE/DELETE)
|
||||
|
||||
Args:
|
||||
sql: SQL语句
|
||||
params: SQL参数(tuple或list)
|
||||
autocommit: 是否自动提交
|
||||
|
||||
Returns:
|
||||
影响的行数
|
||||
"""
|
||||
conn = None
|
||||
cursor = None
|
||||
try:
|
||||
conn = self.get_connection(autocommit=autocommit)
|
||||
cursor = conn.cursor()
|
||||
|
||||
logger.info(f'[SQL] {sql.strip()} | params: {params}')
|
||||
result = cursor.execute(sql, params or ())
|
||||
|
||||
if not autocommit:
|
||||
conn.commit()
|
||||
|
||||
logger.info(f'[SQL执行] 影响 {result} 行')
|
||||
return result
|
||||
except Exception as e:
|
||||
if not autocommit and conn:
|
||||
conn.rollback()
|
||||
logger.error(f'执行更新失败:{e}', exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
def execute_many(self, sql, params_list, autocommit=True):
|
||||
"""批量执行SQL
|
||||
|
||||
Args:
|
||||
sql: SQL语句
|
||||
params_list: 参数列表,每个元素是一组参数
|
||||
autocommit: 是否自动提交
|
||||
|
||||
Returns:
|
||||
成功执行的行数
|
||||
"""
|
||||
conn = None
|
||||
cursor = None
|
||||
try:
|
||||
conn = self.get_connection(autocommit=autocommit)
|
||||
cursor = conn.cursor()
|
||||
|
||||
logger.info(f'[SQL批量] {sql.strip()} | 批次数: {len(params_list)}')
|
||||
|
||||
success_count = 0
|
||||
for params in params_list:
|
||||
try:
|
||||
result = cursor.execute(sql, params)
|
||||
if result > 0:
|
||||
success_count += 1
|
||||
except Exception as e:
|
||||
logger.debug(f'批量执行跳过:params={params},错误:{e}')
|
||||
|
||||
if not autocommit:
|
||||
conn.commit()
|
||||
|
||||
logger.info(f'[SQL批量执行] 成功 {success_count}/{len(params_list)} 条')
|
||||
return success_count
|
||||
except Exception as e:
|
||||
if not autocommit and conn:
|
||||
conn.rollback()
|
||||
logger.error(f'批量执行失败:{e}', exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
|
||||
# 创建全局数据库管理器实例
|
||||
db_manager = DatabaseManager()
|
||||
55
db/ai_articles.sql
Normal file
@@ -0,0 +1,55 @@
|
||||
-- AI文章内容表
|
||||
-- 存储由AI生成的文章内容及其生命周期状态
|
||||
-- 支持多渠道发布(百度百家号、头条、微信等)
|
||||
-- 记录文章从选题、生成、审核到发布的完整流程
|
||||
|
||||
CREATE TABLE `ai_articles` (
|
||||
`id` int NOT NULL AUTO_INCREMENT COMMENT '主键',
|
||||
`batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID,用于批量生成文章的分组',
|
||||
`topic_type_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '选题类型ID',
|
||||
`prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '提示词工作流ID,关联AI生成模板',
|
||||
`topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '文章选题/主题',
|
||||
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '文章标题',
|
||||
`content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '文章正文内容',
|
||||
`department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '部门名称(遗留字段)',
|
||||
`departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '部门ID列表(遗留字段)',
|
||||
`author_id` int NULL DEFAULT NULL COMMENT '作者ID,关联ai_authors.id(百家号账号)',
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称(百家号账号名)',
|
||||
`department_id` int NULL DEFAULT NULL COMMENT '部门ID',
|
||||
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '部门名称',
|
||||
`created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID,关联ai_users.id',
|
||||
`review_user_id` int NULL DEFAULT NULL COMMENT '审核用户ID,关联ai_users.id',
|
||||
`publish_user_id` int NULL DEFAULT NULL COMMENT '发布用户ID,关联ai_users.id',
|
||||
`status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT '文章状态:topic=选题|cover_image=封面图|generate=生成中|generate_failed=生成失败|draft=草稿|pending_review=待审核(文章已生成)|approved=审核通过|rejected=审核拒绝|published_review=发布审核中|published=已发布|failed=发布失败',
|
||||
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '发布渠道:1=百度百家号|2=今日头条|3=微信公众号',
|
||||
`review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '审核意见/备注',
|
||||
`publish_time` timestamp NULL DEFAULT NULL COMMENT '发布时间',
|
||||
`baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '百家号文章ID',
|
||||
`baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '百家号平台状态',
|
||||
`word_count` int NULL DEFAULT 0 COMMENT '文章字数',
|
||||
`image_count` int NULL DEFAULT 0 COMMENT '文章配图数量',
|
||||
`coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
|
||||
-- 索引定义
|
||||
INDEX `created_user_id`(`created_user_id` ASC) USING BTREE COMMENT '创建用户索引',
|
||||
INDEX `review_user_id`(`review_user_id` ASC) USING BTREE COMMENT '审核用户索引',
|
||||
INDEX `publish_user_id`(`publish_user_id` ASC) USING BTREE COMMENT '发布用户索引',
|
||||
INDEX `idx_articles_status_user_created`(`status` ASC, `created_user_id` ASC, `created_at` DESC) USING BTREE COMMENT '状态+创建用户+创建时间组合索引',
|
||||
INDEX `idx_articles_status_created`(`status` ASC, `created_at` DESC) USING BTREE COMMENT '状态+创建时间索引',
|
||||
INDEX `idx_articles_status`(`status` ASC) USING BTREE COMMENT '状态索引',
|
||||
INDEX `idx_articles_created_at`(`created_at` DESC) USING BTREE COMMENT '创建时间索引',
|
||||
INDEX `idx_status_id_author`(`status` ASC, `id` ASC, `author_id` ASC) USING BTREE COMMENT '状态+ID+作者组合索引',
|
||||
INDEX `idx_articles_updated_at`(`updated_at` DESC) USING BTREE COMMENT '更新时间索引',
|
||||
INDEX `idx_articles_status_prompt_topic_id`(`status` ASC, `prompt_workflow_id` ASC, `topic` ASC, `id` ASC) USING BTREE COMMENT '状态+工作流+选题+ID组合索引',
|
||||
INDEX `idx_status_author_updated_id`(`status` ASC, `author_id` ASC, `updated_at` ASC, `id` ASC) USING BTREE COMMENT '状态+作者+更新时间+ID组合索引',
|
||||
INDEX `idx_author_status_updated_id`(`author_id` ASC, `status` ASC, `updated_at` ASC, `id` ASC) USING BTREE COMMENT '作者+状态+更新时间+ID组合索引',
|
||||
|
||||
-- 外键约束
|
||||
CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
|
||||
CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
|
||||
CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
|
||||
CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 1115 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
|
||||
BIN
db/ai_articles_backup_20260114_121742.sql
Normal file
BIN
db/split_tables.zip
Normal file
24
db/split_tables/ai_article_images.sql
Normal file
@@ -0,0 +1,24 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_article_images
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_article_images` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`article_id` int NOT NULL DEFAULT '0',
|
||||
`image_id` int NOT NULL DEFAULT '0',
|
||||
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`image_tag_id` int NOT NULL DEFAULT '0',
|
||||
`sort_order` int DEFAULT '0',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`keywords_id` int NOT NULL DEFAULT '0',
|
||||
`keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`department_id` int NOT NULL DEFAULT '0',
|
||||
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`image_source` tinyint(1) NOT NULL DEFAULT '0' COMMENT '1=tag|2=change',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_article_image` (`article_id`,`image_id`) USING BTREE,
|
||||
KEY `image_id` (`image_id`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=1053298 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
14
db/split_tables/ai_article_tags.sql
Normal file
@@ -0,0 +1,14 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_article_tags
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_article_tags` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`article_id` int NOT NULL,
|
||||
`coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT 'Coze生成的标签',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_article_tag` (`article_id`) USING BTREE,
|
||||
CONSTRAINT `ai_article_tags_ibfk_1` FOREIGN KEY (`article_id`) REFERENCES `ai_articles` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=476258 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
52
db/split_tables/ai_articles.sql
Normal file
@@ -0,0 +1,52 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_articles
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_articles` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`batch_id` bigint unsigned NOT NULL DEFAULT '0' COMMENT '批次ID',
|
||||
`topic_type_id` int unsigned NOT NULL DEFAULT '0',
|
||||
`prompt_workflow_id` int unsigned NOT NULL DEFAULT '0',
|
||||
`topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`author_id` int DEFAULT NULL,
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`department_id` int DEFAULT NULL,
|
||||
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`created_user_id` int NOT NULL DEFAULT '0',
|
||||
`review_user_id` int DEFAULT NULL,
|
||||
`publish_user_id` int DEFAULT NULL,
|
||||
`status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'draft',
|
||||
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
|
||||
`review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
`publish_time` timestamp NULL DEFAULT NULL,
|
||||
`baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`word_count` int DEFAULT '0',
|
||||
`image_count` int DEFAULT '0',
|
||||
`coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT 'Coze生成的标签',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `author_id` (`author_id`) USING BTREE,
|
||||
KEY `created_user_id` (`created_user_id`) USING BTREE,
|
||||
KEY `review_user_id` (`review_user_id`) USING BTREE,
|
||||
KEY `publish_user_id` (`publish_user_id`) USING BTREE,
|
||||
KEY `idx_articles_status_user_created` (`status`,`created_user_id`,`created_at` DESC),
|
||||
KEY `idx_articles_status_created` (`status`,`created_at` DESC),
|
||||
KEY `idx_articles_status` (`status`),
|
||||
KEY `idx_articles_created_at` (`created_at` DESC),
|
||||
KEY `idx_status_id_author` (`status`,`id`,`author_id`),
|
||||
KEY `idx_articles_updated_at` (`updated_at` DESC) USING BTREE,
|
||||
KEY `idx_articles_status_prompt_topic_id` (`status`,`prompt_workflow_id`,`topic`,`id`),
|
||||
KEY `idx_status_author_updated_id` (`status`,`author_id`,`updated_at`,`id`),
|
||||
KEY `idx_author_status_updated_id` (`author_id`,`status`,`updated_at`,`id`),
|
||||
CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
|
||||
CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
|
||||
CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
|
||||
CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=535975 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
31
db/split_tables/ai_authors.sql
Normal file
@@ -0,0 +1,31 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_authors
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_authors` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`app_id` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`app_token` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`department_id` int NOT NULL DEFAULT '0',
|
||||
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`title` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`hospital` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`specialty` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
`toutiao_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
`toutiao_images_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
`introduction` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
`avatar_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`cumulative_published_count` int DEFAULT '0' COMMENT '累计发文量(从起始日到stat_date的总和)',
|
||||
`cumulative_revenue_sum` int DEFAULT '0' COMMENT '累计收入(从起始日到stat_date的总和)',
|
||||
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
|
||||
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `idx_ai_authors_status` (`status`),
|
||||
KEY `idx_ai_authors_status_id` (`status`,`id`),
|
||||
KEY `idx_ai_authors_department_id` (`department_id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=392 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
21
db/split_tables/ai_batch_uploads.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_batch_uploads
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_batch_uploads` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`user_id` int NOT NULL,
|
||||
`file_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`file_path` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`total_count` int DEFAULT '0',
|
||||
`success_count` int DEFAULT '0',
|
||||
`failed_count` int DEFAULT '0',
|
||||
`status` enum('processing','completed','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'processing',
|
||||
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `user_id` (`user_id`) USING BTREE,
|
||||
CONSTRAINT `ai_batch_uploads_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
13
db/split_tables/ai_departments.sql
Normal file
@@ -0,0 +1,13 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_departments
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_departments` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`department_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `idx_ai_departments_created_at` (`created_at` DESC)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=110 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
33
db/split_tables/ai_image_tags.sql
Normal file
@@ -0,0 +1,33 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_image_tags
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_image_tags` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`image_id` int NOT NULL,
|
||||
`image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`tag_id` int NOT NULL,
|
||||
`tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`keywords_id` int NOT NULL,
|
||||
`keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`department_id` int NOT NULL,
|
||||
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`image_source` tinyint unsigned NOT NULL DEFAULT '1' COMMENT '1=clean_images|2=Flower_character',
|
||||
`created_user_id` int NOT NULL DEFAULT '0',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
`image_attached_article_count` int NOT NULL DEFAULT '0' COMMENT 'Number of articles the image is attached to',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_image_tag` (`image_id`,`tag_id`) USING BTREE,
|
||||
KEY `tag_id` (`tag_id`) USING BTREE,
|
||||
KEY `idx_id_desc` (`id` DESC),
|
||||
KEY `idx_image_id_id` (`image_id`,`id` DESC),
|
||||
KEY `idx_created_at` (`created_at` DESC),
|
||||
KEY `idx_department_id` (`department_id`),
|
||||
KEY `idx_keywords_id` (`keywords_id`),
|
||||
KEY `idx_dept_keywords` (`department_id`,`keywords_id`),
|
||||
CONSTRAINT `ai_image_tags_ibfk_2` FOREIGN KEY (`tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=29065 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
25
db/split_tables/ai_images.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_images
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_images` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`thumbnail_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`image_type` enum('medical','lifestyle','instruction') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'medical',
|
||||
`file_size` bigint DEFAULT NULL,
|
||||
`width` int DEFAULT NULL,
|
||||
`height` int DEFAULT NULL,
|
||||
`upload_user_id` int NOT NULL,
|
||||
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `upload_user_id` (`upload_user_id`) USING BTREE,
|
||||
CONSTRAINT `ai_images_ibfk_1` FOREIGN KEY (`upload_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=47096 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
15
db/split_tables/ai_keywords.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_keywords
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_keywords` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`keywords_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`department_id` int NOT NULL DEFAULT '0',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `idx_ai_keywords_dept_created` (`department_id`,`created_at` DESC),
|
||||
KEY `idx_ai_keywords_created_at` (`created_at` DESC)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=417 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
21
db/split_tables/ai_prompt_workflow.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_prompt_workflow
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_prompt_workflow` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`auth_token` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`workflow_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`workflow_type_id` int unsigned NOT NULL DEFAULT '0',
|
||||
`workflow_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`created_user_id` int NOT NULL DEFAULT '0',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `idx_created_user_time` (`created_user_id`,`created_at`) USING BTREE,
|
||||
KEY `idx_created_at` (`created_at`) USING BTREE,
|
||||
KEY `idx_workflow_id` (`workflow_id`) USING BTREE,
|
||||
KEY `idx_prompt_workflow_name` (`prompt_workflow_name`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=27 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
14
db/split_tables/ai_query_category.sql
Normal file
@@ -0,0 +1,14 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_query_category
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_query_category` (
|
||||
`id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID',
|
||||
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
|
||||
`created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID',
|
||||
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active' COMMENT '状态',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
20
db/split_tables/ai_query_strategies.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_query_strategies
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_query_strategies` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID',
|
||||
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
|
||||
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
|
||||
`query_type_id` int NOT NULL DEFAULT '0' COMMENT '类型ID',
|
||||
`define_context` varchar(2048) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '定义上下文',
|
||||
`for_example` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '案例',
|
||||
`created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `query_type_id` (`query_type_id`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=136 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
16
db/split_tables/ai_query_type.sql
Normal file
@@ -0,0 +1,16 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_query_type
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_query_type` (
|
||||
`id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID',
|
||||
`category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID',
|
||||
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
|
||||
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
|
||||
`created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID',
|
||||
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active' COMMENT '状态',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=137 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
30
db/split_tables/ai_statistics.sql
Normal file
@@ -0,0 +1,30 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_statistics
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_statistics` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT 'Auto-increment ID',
|
||||
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
|
||||
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
|
||||
`date` date NOT NULL COMMENT 'Date of statistics',
|
||||
`submission_count` int DEFAULT '0' COMMENT 'Number of submissions (投稿量)',
|
||||
`read_count` int DEFAULT '0' COMMENT 'Number of reads (阅读量)',
|
||||
`comment_count` int DEFAULT '0' COMMENT 'Number of comments (评论量)',
|
||||
`comment_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Comment rate (评论率)',
|
||||
`like_count` int DEFAULT '0' COMMENT 'Number of likes (点赞量)',
|
||||
`like_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Like rate (点赞率)',
|
||||
`favorite_count` int DEFAULT '0' COMMENT 'Number of favorites (收藏量)',
|
||||
`favorite_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Favorite rate (收藏率)',
|
||||
`share_count` int DEFAULT '0' COMMENT 'Number of shares (分享量)',
|
||||
`share_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Share rate (分享率)',
|
||||
`slide_ratio` decimal(5,4) DEFAULT '0.0000' COMMENT 'Slide view ratio (滑图占比)',
|
||||
`baidu_search_volume` int DEFAULT '0' COMMENT 'Baidu search volume (百度搜索量)',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'Creation timestamp',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Update timestamp',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `unique_date` (`date`,`author_id`) USING BTREE,
|
||||
KEY `idx_date` (`date`) USING BTREE,
|
||||
KEY `idx_author_id` (`author_id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=40720 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI Content Statistics';
|
||||
30
db/split_tables/ai_statistics_day.sql
Normal file
@@ -0,0 +1,30 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_statistics_day
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_statistics_day` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
|
||||
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
|
||||
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
|
||||
`stat_date` date NOT NULL COMMENT '统计日期(天)',
|
||||
`total_submission_count` int DEFAULT '0' COMMENT '投稿量(当日总计)',
|
||||
`total_read_count` int DEFAULT '0' COMMENT '阅读量(当日总计)',
|
||||
`total_comment_count` int DEFAULT '0' COMMENT '评论量(当日总计)',
|
||||
`total_like_count` int DEFAULT '0' COMMENT '点赞量(当日总计)',
|
||||
`total_favorite_count` int DEFAULT '0' COMMENT '收藏量(当日总计)',
|
||||
`total_share_count` int DEFAULT '0' COMMENT '分享量(当日总计)',
|
||||
`avg_comment_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '评论率(当日平均)',
|
||||
`avg_like_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '点赞率(当日平均)',
|
||||
`avg_favorite_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '收藏率(当日平均)',
|
||||
`avg_share_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '分享率(当日平均)',
|
||||
`avg_slide_ratio` decimal(5,4) DEFAULT '0.0000' COMMENT '滑图占比(当日平均)',
|
||||
`total_baidu_search_volume` int DEFAULT '0' COMMENT '百度搜索量(当日总计)',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_stat_date` (`stat_date`,`author_id`) USING BTREE,
|
||||
KEY `idx_stat_date` (`stat_date`) USING BTREE,
|
||||
KEY `idx_author_id` (`author_id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=41142 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每日汇总统计表';
|
||||
25
db/split_tables/ai_statistics_days.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_statistics_days
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_statistics_days` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
|
||||
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
|
||||
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
|
||||
`stat_date` date NOT NULL COMMENT '统计日期(自然日)',
|
||||
`daily_published_count` int DEFAULT '0' COMMENT '单日发文量',
|
||||
`day_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当天收益(stat_date所在自然日)',
|
||||
`cumulative_published_count` int DEFAULT '0' COMMENT '累计发文量(从起始日到stat_date的总和)',
|
||||
`monthly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当月收益(stat_date所在自然月的总收益)',
|
||||
`weekly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当周收益(stat_date所在自然周的总收益,周一至周日)',
|
||||
`revenue_mom_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0))',
|
||||
`revenue_wow_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0))',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_stat_date` (`stat_date`,`author_id`) USING BTREE,
|
||||
KEY `idx_stat_date` (`stat_date`) USING BTREE,
|
||||
KEY `idx_author_id` (`author_id`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=98484 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每日核心指标汇总表(含累计、收益及环比)';
|
||||
20
db/split_tables/ai_statistics_monthly.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_statistics_monthly
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_statistics_monthly` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
|
||||
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
|
||||
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
|
||||
`stat_monthly` varchar(48) NOT NULL COMMENT '统计日期(自然月)',
|
||||
`monthly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当月收益(stat_date所在自然月的总收益)',
|
||||
`revenue_mom_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0))',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_author_stat_date` (`author_id`,`stat_monthly`) USING BTREE,
|
||||
KEY `idx_stat_date` (`stat_monthly`) USING BTREE,
|
||||
KEY `idx_author_id` (`author_id`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=41278 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每月核心指标汇总表(含累计、收益及环比)';
|
||||
20
db/split_tables/ai_statistics_weekly.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_statistics_weekly
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_statistics_weekly` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
|
||||
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
|
||||
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
|
||||
`stat_weekly` varchar(48) NOT NULL COMMENT '统计日期(自然周)',
|
||||
`weekly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当周收益(stat_date所在自然周的总收益,周一至周日)',
|
||||
`revenue_wow_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0))',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_author_stat_date` (`author_id`,`stat_weekly`) USING BTREE,
|
||||
KEY `idx_stat_date` (`stat_weekly`) USING BTREE,
|
||||
KEY `idx_author_id` (`author_id`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=47934 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每周核心指标汇总表(含累计、收益及环比)';
|
||||
18
db/split_tables/ai_tag_subsets.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_tag_subsets
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_tag_subsets` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`parent_tag_id` int NOT NULL,
|
||||
`subset_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`subset_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `parent_tag_id` (`parent_tag_id`) USING BTREE,
|
||||
CONSTRAINT `ai_tag_subsets_ibfk_1` FOREIGN KEY (`parent_tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=25903 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
18
db/split_tables/ai_tags.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_tags
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_tags` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`tag_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`tag_category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
`usage_count` int DEFAULT '0',
|
||||
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_tag_name` (`tag_name`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=13492 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
21
db/split_tables/ai_topic_type.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_topic_type
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_topic_type` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`topic_type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`type_id` int NOT NULL DEFAULT '0',
|
||||
`type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`prompt_workflow_id` int unsigned NOT NULL DEFAULT '0',
|
||||
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`created_user_id` int NOT NULL DEFAULT '0',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `idx_created_user_time` (`created_user_id`,`created_at`) USING BTREE,
|
||||
KEY `idx_created_at` (`created_at`) USING BTREE,
|
||||
KEY `idx_type_id` (`type_id`) USING BTREE,
|
||||
KEY `idx_topic_type_name` (`topic_type_name`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=28 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
16
db/split_tables/ai_user_authors.sql
Normal file
@@ -0,0 +1,16 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_user_authors
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_user_authors` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`user_id` int unsigned NOT NULL DEFAULT '0',
|
||||
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`author_id` int NOT NULL DEFAULT '0',
|
||||
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT '',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_user_author` (`user_id`,`author_id`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=15935 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
21
db/split_tables/ai_user_topics.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_user_topics
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_user_topics` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`user_id` int unsigned NOT NULL DEFAULT '0',
|
||||
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`topic_type_id` int unsigned NOT NULL DEFAULT '0',
|
||||
`topic_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
|
||||
`prompt_workflow_id` int NOT NULL DEFAULT '0',
|
||||
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT '',
|
||||
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'inactive',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
KEY `idx_topic_type_id` (`topic_type_id`) USING BTREE,
|
||||
KEY `idx_prompt_workflow_id` (`prompt_workflow_id`) USING BTREE,
|
||||
KEY `idx_created_at` (`created_at`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=127 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
20
db/split_tables/ai_users.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: ai_users
|
||||
--
|
||||
|
||||
CREATE TABLE `ai_users` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`password` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
|
||||
`real_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`email` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
|
||||
`role` enum('admin','editor','reviewer','publisher') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'editor',
|
||||
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `uk_username` (`username`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=239 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;
|
||||
38
db/split_tables/baidu_keyword.sql
Normal file
@@ -0,0 +1,38 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: baidu_keyword
|
||||
--
|
||||
|
||||
CREATE TABLE `baidu_keyword` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`keyword` varchar(255) NOT NULL,
|
||||
`crawled` tinyint DEFAULT '0',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`parents_id` int unsigned NOT NULL DEFAULT '0' COMMENT '父层级',
|
||||
`seed_id` int unsigned NOT NULL DEFAULT '0' COMMENT '种子',
|
||||
`seed_name` varchar(512) NOT NULL DEFAULT '' COMMENT '种子名称',
|
||||
`department` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室',
|
||||
`department_id` int unsigned NOT NULL DEFAULT '0' COMMENT '科室ID',
|
||||
`partsof_speech` varchar(128) NOT NULL DEFAULT '' COMMENT '词性',
|
||||
`partsof_speech_id` int unsigned NOT NULL DEFAULT '0' COMMENT '词性ID',
|
||||
`type` varchar(128) NOT NULL DEFAULT '' COMMENT '类型',
|
||||
`type_id` int unsigned NOT NULL DEFAULT '0' COMMENT '类型ID',
|
||||
`yesorno_question` enum('yes','no','unprocessed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'unprocessed' COMMENT '是否是问题?',
|
||||
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
|
||||
`category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID',
|
||||
`query_type_id` int NOT NULL DEFAULT '0' COMMENT '类型ID',
|
||||
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
|
||||
`created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID',
|
||||
`query_summary_status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'ready',
|
||||
`query_status` enum('draft','ready','doing','failed','finished','similarity','automated_review','manual_review','generate','published') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'draft' COMMENT 'query完整扭转流程状态',
|
||||
`blocking_reason` varchar(255) NOT NULL DEFAULT '' COMMENT '审核不通过原因',
|
||||
`article_id` int NOT NULL DEFAULT '0' COMMENT '文章ID',
|
||||
`query_stage` enum('draft','created','summary','reviewed','generated','published') NOT NULL DEFAULT 'draft' COMMENT '分5个阶段,创建|总结|审核|生文|发布',
|
||||
`status` enum('draft','available','unavailable','successful','failed') NOT NULL DEFAULT 'draft' COMMENT '状态_分2个阶段|可用|不可用|发布成功|发布失败',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
`review_user_id` int NOT NULL DEFAULT '0' COMMENT '审核用户ID',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `keyword` (`keyword`),
|
||||
KEY `idx_crawled_seed` (`crawled`,`seed_id`),
|
||||
KEY `idx_created_at` (`created_at`)
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=798537 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
15
db/split_tables/baidu_seed_keywords.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
-- SQL table definition
|
||||
-- Generated from splitting a larger SQL file
|
||||
-- Table: baidu_seed_keywords
|
||||
--
|
||||
|
||||
CREATE TABLE `baidu_seed_keywords` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
|
||||
`crawled` tinyint DEFAULT '0',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'ready',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE KEY `keyword` (`keyword`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=231 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC;
|
||||
137
export_approved_articles.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
导出审核通过的文章内容和标签到CSV文件
|
||||
此脚本将从ai_articles表中导出status为approved的文章内容和标签
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from log_config import setup_logger
|
||||
|
||||
|
||||
def export_approved_articles_to_csv(output_file='approved_articles_export.csv'):
|
||||
"""
|
||||
导出审核通过的文章内容和标签到CSV文件
|
||||
|
||||
Args:
|
||||
output_file: 输出的CSV文件名
|
||||
"""
|
||||
# 设置日志记录器
|
||||
logger = setup_logger('article_export', 'logs/article_export.log', 'logs/article_export_error.log')
|
||||
|
||||
try:
|
||||
# 从数据库获取真实数据
|
||||
from database_config import db_manager
|
||||
|
||||
# 查询审核通过的文章,包含内容和标签
|
||||
sql = """
|
||||
SELECT id, title, content, coze_tag, created_at, updated_at
|
||||
FROM ai_articles
|
||||
WHERE status = 'approved'
|
||||
ORDER BY id
|
||||
"""
|
||||
|
||||
logger.info("开始查询审核通过的文章数据...")
|
||||
results = db_manager.execute_query(sql)
|
||||
|
||||
if not results:
|
||||
logger.warning("没有找到状态为approved的文章")
|
||||
print("没有找到状态为approved的文章")
|
||||
return
|
||||
|
||||
logger.info(f"查询到 {len(results)} 条审核通过的文章")
|
||||
print(f"查询到 {len(results)} 条审核通过的文章")
|
||||
|
||||
# 准备输出目录
|
||||
output_dir = os.path.dirname(output_file)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
# 写入CSV文件
|
||||
with open(output_file, 'w', newline='', encoding='utf-8-sig') as csvfile:
|
||||
fieldnames = ['ID', '标题', '内容', '标签', '创建时间', '更新时间']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
# 写入表头
|
||||
writer.writeheader()
|
||||
|
||||
# 写入数据
|
||||
for row in results:
|
||||
id_val, title, content, coze_tag, created_at, updated_at = row
|
||||
|
||||
# 尝试解析标签,如果是JSON格式则转换为字符串
|
||||
parsed_tags = coze_tag
|
||||
if coze_tag:
|
||||
try:
|
||||
# 尝试解析JSON格式的标签
|
||||
tags_data = json.loads(coze_tag)
|
||||
if isinstance(tags_data, list):
|
||||
parsed_tags = ', '.join(tags_data)
|
||||
elif isinstance(tags_data, dict):
|
||||
# 如果是字典格式,提取值
|
||||
parsed_tags = ', '.join(str(v) for v in tags_data.values())
|
||||
except json.JSONDecodeError:
|
||||
# 如果不是JSON格式,保持原样
|
||||
parsed_tags = coze_tag
|
||||
|
||||
writer.writerow({
|
||||
'ID': id_val,
|
||||
'标题': title,
|
||||
'内容': content,
|
||||
'标签': parsed_tags or '',
|
||||
'创建时间': created_at.strftime('%Y-%m-%d %H:%M:%S') if created_at else '',
|
||||
'更新时间': updated_at.strftime('%Y-%m-%d %H:%M:%S') if updated_at else ''
|
||||
})
|
||||
|
||||
logger.info(f"成功导出 {len(results)} 条文章到 {output_file}")
|
||||
print(f"成功导出 {len(results)} 条文章到 {output_file}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"导出文章数据时发生错误: {e}", exc_info=True)
|
||||
print(f"导出文章数据时发生错误: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def test_db_connection():
|
||||
"""
|
||||
测试数据库连接
|
||||
"""
|
||||
try:
|
||||
from database_config import db_manager
|
||||
# 尝试执行一个简单的查询来测试连接
|
||||
test_sql = "SELECT 1 as test"
|
||||
result = db_manager.execute_query(test_sql)
|
||||
print("数据库连接测试成功:", result)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"数据库连接测试失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 创建logs目录
|
||||
if not os.path.exists('logs'):
|
||||
os.makedirs('logs')
|
||||
|
||||
# 检查命令行参数
|
||||
import sys
|
||||
if len(sys.argv) > 1:
|
||||
output_filename = sys.argv[1]
|
||||
else:
|
||||
output_filename = 'approved_articles_export.csv'
|
||||
|
||||
# 测试数据库连接
|
||||
print("正在测试数据库连接...")
|
||||
if not test_db_connection():
|
||||
print("数据库连接失败,请检查数据库配置。")
|
||||
print("请确认以下信息:")
|
||||
print("- 数据库服务器是否正常运行")
|
||||
print("- 数据库地址、用户名、密码是否正确")
|
||||
print("- 网络连接是否正常")
|
||||
print("- 用户是否有查询ai_articles表的权限")
|
||||
exit(1)
|
||||
|
||||
export_approved_articles_to_csv(output_filename)
|
||||
137
export_image_tags.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
导出符合条件的图像标签数据到CSV文件
|
||||
导出条件:image_attached_article_count < 5
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
from datetime import datetime
|
||||
from database_config import db_manager
|
||||
from log_config import setup_logger
|
||||
|
||||
|
||||
def export_image_tags_to_csv(output_file='image_tags_filtered.csv'):
|
||||
"""
|
||||
导出符合条件的图像标签数据到CSV文件
|
||||
|
||||
Args:
|
||||
output_file: 输出的CSV文件名
|
||||
"""
|
||||
# 设置日志记录器
|
||||
logger = setup_logger('image_tags_export', 'logs/image_tags_export.log', 'logs/image_tags_export_error.log')
|
||||
|
||||
# 从数据库获取真实数据
|
||||
from database_config import db_manager
|
||||
|
||||
# 查询符合条件的图像标签数据
|
||||
sql = """
|
||||
SELECT id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name,
|
||||
keywords_id, keywords_name, department_id, department_name, image_source,
|
||||
created_user_id, created_at, updated_at, image_attached_article_count
|
||||
FROM ai_image_tags
|
||||
WHERE image_attached_article_count < 5
|
||||
ORDER BY id
|
||||
"""
|
||||
|
||||
logger.info("开始查询符合条件的图像标签数据...")
|
||||
results = db_manager.execute_query(sql)
|
||||
|
||||
if not results:
|
||||
logger.warning("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)")
|
||||
print("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)")
|
||||
return
|
||||
|
||||
logger.info(f"查询到 {len(results)} 条符合条件的图像标签数据")
|
||||
print(f"查询到 {len(results)} 条符合条件的图像标签数据")
|
||||
|
||||
# 准备输出目录
|
||||
output_dir = os.path.dirname(output_file)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
# 写入CSV文件
|
||||
with open(output_file, 'w', newline='', encoding='utf-8-sig') as csvfile:
|
||||
fieldnames = [
|
||||
'ID', '图像ID', '图像名称', '图像URL', '缩略图URL', '标签ID', '标签名称',
|
||||
'关键词ID', '关键词名称', '部门ID', '部门名称', '图像来源',
|
||||
'创建用户ID', '创建时间', '更新时间', '附加文章数量'
|
||||
]
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
# 写入表头
|
||||
writer.writeheader()
|
||||
|
||||
# 写入数据
|
||||
for row in results:
|
||||
(
|
||||
id_val, image_id, image_name, image_url, image_thumb_url,
|
||||
tag_id, tag_name, keywords_id, keywords_name,
|
||||
department_id, department_name, image_source,
|
||||
created_user_id, created_at, updated_at, image_attached_article_count
|
||||
) = row
|
||||
|
||||
writer.writerow({
|
||||
'ID': id_val,
|
||||
'图像ID': image_id,
|
||||
'图像名称': image_name,
|
||||
'图像URL': image_url,
|
||||
'缩略图URL': image_thumb_url,
|
||||
'标签ID': tag_id,
|
||||
'标签名称': tag_name,
|
||||
'关键词ID': keywords_id,
|
||||
'关键词名称': keywords_name,
|
||||
'部门ID': department_id,
|
||||
'部门名称': department_name,
|
||||
'图像来源': image_source,
|
||||
'创建用户ID': created_user_id,
|
||||
'创建时间': created_at.strftime('%Y-%m-%d %H:%M:%S') if created_at else '',
|
||||
'更新时间': updated_at.strftime('%Y-%m-%d %H:%M:%S') if updated_at else '',
|
||||
'附加文章数量': image_attached_article_count
|
||||
})
|
||||
|
||||
logger.info(f"成功导出 {len(results)} 条图像标签数据到 {output_file}")
|
||||
print(f"成功导出 {len(results)} 条图像标签数据到 {output_file}")
|
||||
|
||||
|
||||
def test_db_connection():
|
||||
"""
|
||||
测试数据库连接
|
||||
"""
|
||||
try:
|
||||
# 尝试执行一个简单的查询来测试连接
|
||||
test_sql = "SELECT 1 as test"
|
||||
result = db_manager.execute_query(test_sql)
|
||||
print("数据库连接测试成功:", result)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"数据库连接测试失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 创建logs目录
|
||||
if not os.path.exists('logs'):
|
||||
os.makedirs('logs')
|
||||
|
||||
# 检查数据库连接
|
||||
print("正在测试数据库连接...")
|
||||
if not test_db_connection():
|
||||
print("数据库连接失败,请检查数据库配置。")
|
||||
print("请确认以下信息:")
|
||||
print("- 数据库服务器是否正常运行")
|
||||
print("- 数据库地址、用户名、密码是否正确")
|
||||
print("- 网络连接是否正常")
|
||||
print("- 用户是否有查询ai_image_tags表的权限")
|
||||
exit(1)
|
||||
|
||||
# 默认输出文件名
|
||||
output_filename = 'image_tags_filtered.csv'
|
||||
|
||||
# 可以从命令行参数获取输出文件名
|
||||
import sys
|
||||
if len(sys.argv) > 1:
|
||||
output_filename = sys.argv[1]
|
||||
|
||||
export_image_tags_to_csv(output_filename)
|
||||
BIN
generated_image.png
Normal file
|
After Width: | Height: | Size: 708 KiB |
BIN
generated_image_3679d898-fab5-41b2-97c7-9ccd7168d0fc.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
generated_image_6d5ade2f-633c-4782-93c2-6c8247ea5dee.png
Normal file
|
After Width: | Height: | Size: 617 KiB |
BIN
generated_image_93dfbfee-d664-4778-abc1-c9f3ef080de4.png
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
337
log_config.py
Normal file
@@ -0,0 +1,337 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
统一日志配置模块
|
||||
提供按日期自动切割日志文件的功能
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
from logging.handlers import TimedRotatingFileHandler
|
||||
from datetime import datetime
|
||||
|
||||
def setup_logger(name, log_file, error_log_file=None, level=logging.INFO,
|
||||
backup_count=30, error_backup_count=90, console_output=True, force_reinit=False):
|
||||
"""
|
||||
设置日志记录器,支持按日期自动切割
|
||||
|
||||
Args:
|
||||
name: 日志记录器名称
|
||||
log_file: 主日志文件路径
|
||||
error_log_file: 错误日志文件路径(可选)
|
||||
level: 日志级别
|
||||
backup_count: 主日志文件保留天数
|
||||
error_backup_count: 错误日志文件保留天数
|
||||
console_output: 是否输出到控制台
|
||||
force_reinit: 是否强制重新初始化(删除现有handlers)
|
||||
|
||||
Returns:
|
||||
logging.Logger: 配置好的日志记录器
|
||||
"""
|
||||
# 创建logs目录
|
||||
log_dir = os.path.dirname(log_file)
|
||||
if log_dir and not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
# 获取或创建logger
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(level)
|
||||
|
||||
# 检查是否需要重新初始化
|
||||
need_reinit = force_reinit or not logger.handlers
|
||||
|
||||
# 如果强制重新初始化或没有handlers,则清除现有handlers
|
||||
if force_reinit and logger.handlers:
|
||||
print(f"强制重新初始化日志记录器: {name}")
|
||||
for handler in logger.handlers[:]: # 使用切片创建副本
|
||||
logger.removeHandler(handler)
|
||||
need_reinit = True
|
||||
|
||||
# 如果没有handlers,则添加新的handlers
|
||||
if need_reinit:
|
||||
# 创建日志格式
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
# 1. 主日志文件处理器 - 按日期切割
|
||||
file_handler = TimedRotatingFileHandler(
|
||||
filename=log_file,
|
||||
when='midnight', # 每天午夜切割
|
||||
interval=1, # 每1天切割一次
|
||||
backupCount=backup_count, # 保留天数
|
||||
encoding='utf-8'
|
||||
)
|
||||
file_handler.setLevel(level)
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
# 设置切割后的文件名格式:filename.log.2025-07-21
|
||||
file_handler.suffix = "%Y-%m-%d"
|
||||
|
||||
# 自定义文件名生成函数,确保格式正确
|
||||
def namer(default_name):
|
||||
# 确保文件名格式为 filename.log.2025-07-21
|
||||
return default_name
|
||||
file_handler.namer = namer
|
||||
|
||||
# 添加主日志处理器
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
# 2. 错误日志文件处理器(如果指定)
|
||||
if error_log_file:
|
||||
error_file_handler = TimedRotatingFileHandler(
|
||||
filename=error_log_file,
|
||||
when='midnight',
|
||||
interval=1,
|
||||
backupCount=error_backup_count, # 错误日志保留更长时间
|
||||
encoding='utf-8'
|
||||
)
|
||||
error_file_handler.setLevel(logging.ERROR)
|
||||
error_file_handler.setFormatter(formatter)
|
||||
error_file_handler.suffix = "%Y-%m-%d"
|
||||
error_file_handler.namer = namer
|
||||
logger.addHandler(error_file_handler)
|
||||
|
||||
# 3. 控制台处理器(如果启用)
|
||||
if console_output:
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(level)
|
||||
console_formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
console_handler.setFormatter(console_formatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# 设置第三方库的日志级别
|
||||
logging.getLogger('requests').setLevel(logging.WARNING)
|
||||
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
||||
logging.getLogger('whoosh').setLevel(logging.WARNING)
|
||||
|
||||
# 记录日志系统启动信息
|
||||
logger.info(f"日志系统已启动 - 记录器: {name}")
|
||||
logger.info(f"主日志文件: {log_file}")
|
||||
if error_log_file:
|
||||
logger.info(f"错误日志文件: {error_log_file}")
|
||||
logger.info(f"日志保留策略: 每天午夜分割,主日志保留{backup_count}天")
|
||||
if error_log_file:
|
||||
logger.info(f"错误日志保留策略: 每天午夜分割,保留{error_backup_count}天")
|
||||
|
||||
return logger
|
||||
|
||||
def setup_curl_convert_logger(force_reinit=False):
|
||||
"""设置curl_convert.py的日志记录器"""
|
||||
return setup_logger(
|
||||
name='curl_convert',
|
||||
log_file='logs/curl_convert.log',
|
||||
error_log_file='logs/curl_convert_error.log',
|
||||
level=logging.INFO,
|
||||
backup_count=30,
|
||||
error_backup_count=90,
|
||||
console_output=True,
|
||||
force_reinit=force_reinit
|
||||
)
|
||||
|
||||
def setup_article_server_logger(force_reinit=False):
|
||||
"""设置flask_article_server.py的日志记录器"""
|
||||
return setup_logger(
|
||||
name='article_server',
|
||||
log_file='logs/article_server.log',
|
||||
error_log_file='logs/article_error.log',
|
||||
level=logging.INFO,
|
||||
backup_count=3,
|
||||
error_backup_count=9,
|
||||
console_output=True,
|
||||
force_reinit=force_reinit
|
||||
)
|
||||
|
||||
def setup_article_server_search_logger(force_reinit=False):
|
||||
"""设置flask_article_server_search.py的日志记录器"""
|
||||
return setup_logger(
|
||||
name='article_server_search',
|
||||
log_file='logs/article_server_search.log',
|
||||
error_log_file='logs/article_server_search_error.log',
|
||||
level=logging.INFO,
|
||||
backup_count=3,
|
||||
error_backup_count=9,
|
||||
console_output=True,
|
||||
force_reinit=force_reinit
|
||||
)
|
||||
|
||||
def setup_aiarticle_server_logger(force_reinit=False):
|
||||
"""设置flask_aiarticle_server.py的日志记录器"""
|
||||
return setup_logger(
|
||||
name='aiarticle_server',
|
||||
log_file='logs/aiarticle_server.log',
|
||||
error_log_file='logs/aiarticle_server_error.log',
|
||||
level=logging.INFO,
|
||||
backup_count=30,
|
||||
error_backup_count=90,
|
||||
console_output=True,
|
||||
force_reinit=force_reinit
|
||||
)
|
||||
|
||||
def setup_whoosh_search_tags_logger(force_reinit=False):
|
||||
"""设置whoosh_search_tags.py的日志记录器"""
|
||||
return setup_logger(
|
||||
name='whoosh_search_tags',
|
||||
log_file='logs/whoosh_search_tags.log',
|
||||
error_log_file='logs/whoosh_search_tags_error.log',
|
||||
level=logging.INFO,
|
||||
backup_count=30,
|
||||
error_backup_count=90,
|
||||
console_output=True,
|
||||
force_reinit=force_reinit
|
||||
)
|
||||
|
||||
def setup_baidu_crawl_logger(force_reinit=False):
|
||||
"""设置baidu_crawl.py的日志记录器"""
|
||||
return setup_logger(
|
||||
name='baidu_crawl',
|
||||
log_file='logs/baidu_crawl.log',
|
||||
error_log_file='logs/baidu_crawl_error.log',
|
||||
level=logging.INFO,
|
||||
backup_count=3,
|
||||
error_backup_count=3,
|
||||
console_output=True,
|
||||
force_reinit=force_reinit
|
||||
)
|
||||
|
||||
def setup_baidu_seed_logger(force_reinit=False):
|
||||
"""设置baidu_seed.py的日志记录器"""
|
||||
return setup_logger(
|
||||
name='baidu_seed',
|
||||
log_file='logs/baidu_seed.log',
|
||||
error_log_file='logs/baidu_seed_error.log',
|
||||
level=logging.INFO,
|
||||
backup_count=3,
|
||||
error_backup_count=3,
|
||||
console_output=True,
|
||||
force_reinit=force_reinit
|
||||
)
|
||||
|
||||
def setup_baidu_crawl_again_logger(force_reinit=False):
|
||||
"""设置baidu_seed.py的日志记录器"""
|
||||
return setup_logger(
|
||||
name='baidu_crawl_again',
|
||||
log_file='logs/baidu_crawl_again.log',
|
||||
error_log_file='logs/baidu_crawl_again_error.log',
|
||||
level=logging.INFO,
|
||||
backup_count=3,
|
||||
error_backup_count=3,
|
||||
console_output=True,
|
||||
force_reinit=force_reinit
|
||||
)
|
||||
|
||||
def reinitialize_all_loggers():
|
||||
"""重新初始化所有日志记录器"""
|
||||
print("重新初始化所有日志记录器...")
|
||||
|
||||
# 重新初始化所有日志记录器
|
||||
setup_curl_convert_logger(force_reinit=True)
|
||||
setup_article_server_logger(force_reinit=True)
|
||||
setup_article_server_search_logger(force_reinit=True)
|
||||
setup_aiarticle_server_logger(force_reinit=True)
|
||||
setup_whoosh_search_tags_logger(force_reinit=True)
|
||||
setup_baidu_crawl_logger(force_reinit=True)
|
||||
setup_baidu_seed_logger(force_reinit=True)
|
||||
|
||||
print("所有日志记录器重新初始化完成")
|
||||
|
||||
def cleanup_old_logs(log_dir='logs', days_to_keep=30):
|
||||
"""
|
||||
清理旧的日志文件
|
||||
|
||||
Args:
|
||||
log_dir: 日志目录
|
||||
days_to_keep: 保留天数
|
||||
"""
|
||||
import glob
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
if not os.path.exists(log_dir):
|
||||
return
|
||||
|
||||
cutoff_date = datetime.now() - timedelta(days=days_to_keep)
|
||||
|
||||
# 查找所有日志文件
|
||||
log_patterns = [
|
||||
os.path.join(log_dir, '*.log.*'), # 切割后的日志文件
|
||||
os.path.join(log_dir, '*.log') # 当前日志文件
|
||||
]
|
||||
|
||||
for pattern in log_patterns:
|
||||
for log_file in glob.glob(pattern):
|
||||
try:
|
||||
# 获取文件修改时间
|
||||
file_mtime = datetime.fromtimestamp(os.path.getmtime(log_file))
|
||||
if file_mtime < cutoff_date:
|
||||
os.remove(log_file)
|
||||
print(f"已删除旧日志文件: {log_file}")
|
||||
except Exception as e:
|
||||
print(f"删除日志文件失败 {log_file}: {e}")
|
||||
|
||||
def get_log_file_info(log_dir='logs'):
|
||||
"""
|
||||
获取日志文件信息
|
||||
|
||||
Args:
|
||||
log_dir: 日志目录
|
||||
|
||||
Returns:
|
||||
dict: 日志文件信息
|
||||
"""
|
||||
if not os.path.exists(log_dir):
|
||||
return {}
|
||||
|
||||
log_info = {}
|
||||
|
||||
for filename in os.listdir(log_dir):
|
||||
if filename.endswith('.log'):
|
||||
file_path = os.path.join(log_dir, filename)
|
||||
try:
|
||||
size = os.path.getsize(file_path)
|
||||
mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
|
||||
log_info[filename] = {
|
||||
'size': size,
|
||||
'size_mb': round(size / (1024 * 1024), 2),
|
||||
'modified': mtime.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'path': file_path
|
||||
}
|
||||
except Exception as e:
|
||||
log_info[filename] = {'error': str(e)}
|
||||
|
||||
return log_info
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 测试日志配置
|
||||
print("测试日志配置...")
|
||||
|
||||
# 测试各个日志记录器
|
||||
logger1 = setup_curl_convert_logger()
|
||||
logger1.info("curl_convert 日志测试")
|
||||
|
||||
logger2 = setup_article_server_logger()
|
||||
logger2.info("article_server 日志测试")
|
||||
|
||||
logger3 = setup_article_server_search_logger()
|
||||
logger3.info("article_server_search 日志测试")
|
||||
|
||||
logger4 = setup_aiarticle_server_logger()
|
||||
logger4.info("aiarticle_server 日志测试")
|
||||
|
||||
logger5 = setup_whoosh_search_tags_logger()
|
||||
logger5.info("whoosh_search_tags 日志测试")
|
||||
|
||||
# 显示日志文件信息
|
||||
print("\n当前日志文件信息:")
|
||||
log_info = get_log_file_info()
|
||||
for filename, info in log_info.items():
|
||||
if 'error' not in info:
|
||||
print(f"{filename}: {info['size_mb']}MB, 修改时间: {info['modified']}")
|
||||
else:
|
||||
print(f"{filename}: 错误 - {info['error']}")
|
||||
|
||||
print("\n日志配置测试完成!")
|
||||
910
match_article_images.py
Normal file
@@ -0,0 +1,910 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
文章与图片智能挂靠脚本
|
||||
根据文章标签匹配ai_image_tags表中的图片,使用大模型进行处理,
|
||||
如果挂靠失败或没有相同标签的图片,则使用Gemini生成图片
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
import csv
|
||||
import pymysql
|
||||
from typing import List, Dict, Tuple, Optional
|
||||
from collections import defaultdict
|
||||
from database_config import db_manager
|
||||
from log_config import setup_logger
|
||||
import time
|
||||
import random
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
|
||||
def get_articles_with_tags_from_db() -> List[Dict]:
|
||||
"""
|
||||
从数据库获取文章及其标签
|
||||
|
||||
Returns:
|
||||
包含文章信息的字典列表
|
||||
"""
|
||||
# 设置日志记录器
|
||||
logger = setup_logger('article_matching', 'logs/article_matching.log', 'logs/article_matching_error.log')
|
||||
|
||||
articles = []
|
||||
|
||||
try:
|
||||
# 查询审核通过的文章,包含内容和标签
|
||||
sql = """
|
||||
SELECT id, title, content, coze_tag
|
||||
FROM ai_articles
|
||||
WHERE status = 'approved'
|
||||
ORDER BY id
|
||||
"""
|
||||
|
||||
logger.info("开始查询审核通过的文章数据...")
|
||||
results = db_manager.execute_query(sql)
|
||||
|
||||
if not results:
|
||||
logger.warning("没有找到状态为approved的文章")
|
||||
print("没有找到状态为approved的文章")
|
||||
return articles
|
||||
|
||||
logger.info(f"查询到 {len(results)} 条审核通过的文章")
|
||||
print(f"查询到 {len(results)} 条审核通过的文章")
|
||||
|
||||
for row in results:
|
||||
article_id, title, content, coze_tag = row
|
||||
|
||||
# 解析标签
|
||||
tags = []
|
||||
if coze_tag:
|
||||
try:
|
||||
# 尝试解析JSON格式的标签
|
||||
tags_data = json.loads(coze_tag)
|
||||
if isinstance(tags_data, list):
|
||||
tags = tags_data
|
||||
elif isinstance(tags_data, dict):
|
||||
# 如果是字典格式,提取值
|
||||
tags = list(tags_data.values()) if isinstance(list(tags_data.values())[0], list) else list(tags_data.values())
|
||||
else:
|
||||
# 如果是字符串,尝试按逗号分割
|
||||
tags = [tag.strip() for tag in str(tags_data).split(',') if tag.strip()]
|
||||
except json.JSONDecodeError:
|
||||
# 如果不是JSON格式,按逗号分割
|
||||
tags = [tag.strip() for tag in str(coze_tag).split(',') if tag.strip()]
|
||||
|
||||
articles.append({
|
||||
'id': article_id,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'tags': tags
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"从数据库获取文章数据时发生错误: {e}", exc_info=True)
|
||||
print(f"从数据库获取文章数据时发生错误: {e}")
|
||||
raise
|
||||
|
||||
return articles
|
||||
|
||||
|
||||
def get_images_by_tags_from_db(tags: List[str] = [], used_counts: Dict[str, int] = {}) -> List[Dict]:
|
||||
"""
|
||||
从数据库根据标签获取图片
|
||||
|
||||
Args:
|
||||
tags: 标签列表
|
||||
used_counts: 已使用次数的字典,key为图片ID,value为使用次数
|
||||
|
||||
Returns:
|
||||
包含图片信息的字典列表
|
||||
"""
|
||||
if not tags:
|
||||
return []
|
||||
|
||||
# 设置日志记录器
|
||||
logger = setup_logger('article_matching', 'logs/article_matching.log', 'logs/article_matching_error.log')
|
||||
|
||||
images = []
|
||||
|
||||
try:
|
||||
# 查询符合条件的图像标签数据
|
||||
sql = """
|
||||
SELECT id, image_id, image_name, image_url, tag_name, keywords_name, department_name, image_attached_article_count
|
||||
FROM ai_image_tags
|
||||
WHERE image_attached_article_count < 5
|
||||
ORDER BY id
|
||||
"""
|
||||
|
||||
logger.info("开始查询符合条件的图像标签数据...")
|
||||
results = db_manager.execute_query(sql)
|
||||
|
||||
if not results:
|
||||
logger.warning("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)")
|
||||
print("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)")
|
||||
return images
|
||||
|
||||
logger.info(f"查询到 {len(results)} 条符合条件的图像标签数据")
|
||||
print(f"查询到 {len(results)} 条符合条件的图像标签数据")
|
||||
|
||||
for row in results:
|
||||
(
|
||||
image_id, db_image_id, image_name, image_url, tag_name,
|
||||
keywords_name, department_name, base_count
|
||||
) = row
|
||||
|
||||
# 检查图片的附加文章数量是否小于5,考虑已使用次数
|
||||
used_count = used_counts.get(str(image_id), 0)
|
||||
total_count = base_count + used_count
|
||||
|
||||
if total_count >= 5:
|
||||
continue
|
||||
|
||||
# 检查标签是否匹配
|
||||
if any(tag.lower() in tag_name.lower() for tag in tags):
|
||||
images.append({
|
||||
'id': str(image_id),
|
||||
'image_id': db_image_id,
|
||||
'image_name': image_name,
|
||||
'image_url': image_url,
|
||||
'tag_name': tag_name,
|
||||
'keywords_name': keywords_name,
|
||||
'department_name': department_name,
|
||||
'base_count': base_count
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"从数据库获取图片数据时发生错误: {e}", exc_info=True)
|
||||
print(f"从数据库获取图片数据时发生错误: {e}")
|
||||
raise
|
||||
|
||||
print(f"从数据库找到 {len(images)} 张符合条件的匹配图片")
|
||||
return images
|
||||
|
||||
|
||||
def call_qwen_model(article: Dict, image_urls: List[str]) -> bool:
|
||||
"""
|
||||
调用通义千问大模型进行文章与图片挂靠评估
|
||||
|
||||
Args:
|
||||
article: 文章信息
|
||||
image_urls: 图片URL列表
|
||||
|
||||
Returns:
|
||||
挂靠是否成功
|
||||
"""
|
||||
# 通义千问API配置
|
||||
api_key = "sk-e6a38204022a4b538b8954f0584712af"
|
||||
api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
|
||||
|
||||
# 构建请求内容
|
||||
content = f"""
|
||||
请评估以下文章与图片的匹配度:
|
||||
|
||||
文章标题: {article['title']}
|
||||
文章内容: {article['content'][:500]}... # 限制内容长度
|
||||
|
||||
图片URLs: {', '.join(image_urls)}
|
||||
|
||||
请判断这些图片是否适合用于这篇文章。如果匹配度高,请回复"匹配成功";如果匹配度低,请回复"匹配失败"。
|
||||
"""
|
||||
|
||||
headers = {
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": "qwen-max", # 或其他合适的模型
|
||||
"input": {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": content
|
||||
}
|
||||
]
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(api_url, headers=headers, json=payload)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
# 解析响应,判断匹配结果
|
||||
if 'output' in result and 'text' in result['output']:
|
||||
response_text = result['output']['text'].lower()
|
||||
# 根据响应内容判断是否匹配
|
||||
if '匹配成功' in response_text or '是的' in response_text or '合适' in response_text:
|
||||
print(f"通义千问评估结果: 匹配成功 - 文章 '{article['title']}'")
|
||||
return True
|
||||
else:
|
||||
print(f"通义千问评估结果: 匹配失败 - 文章 '{article['title']}'")
|
||||
return False
|
||||
else:
|
||||
print(f"通义千问API响应格式异常: {result}")
|
||||
return False
|
||||
else:
|
||||
print(f"通义千问API调用失败: {response.status_code} - {response.text}")
|
||||
# API调用失败时,仍然尝试匹配,这里返回False触发图片生成
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"调用通义千问API时发生错误: {e}")
|
||||
# 发生错误时,返回False以触发图片生成
|
||||
return False
|
||||
|
||||
|
||||
def insert_generated_image_to_db(image_name: str, image_url: str, article_tags: List[str]) -> Optional[Dict]:
|
||||
"""
|
||||
将Gemini生成的图片信息插入数据库
|
||||
|
||||
Args:
|
||||
image_name: 图片文件名,如 "1755310671174988.png"
|
||||
image_url: 图片URL路径,如 "20250816/1755310671174988.png"
|
||||
article_tags: 文章标签列表,用于查询department和keywords
|
||||
|
||||
Returns:
|
||||
包含插入信息的字典:{
|
||||
'tag_image_id': tag_image_id,
|
||||
'image_id': image_id,
|
||||
'image_url': image_url,
|
||||
'image_thumb_url': image_thumb_url,
|
||||
'keywords_id': keywords_id,
|
||||
'keywords_name': keywords_name,
|
||||
'department_id': department_id,
|
||||
'department_name': department_name
|
||||
}
|
||||
"""
|
||||
connection = db_manager.get_connection()
|
||||
if connection is None:
|
||||
print("无法连接到数据库")
|
||||
return None
|
||||
|
||||
try:
|
||||
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
|
||||
# 1. 根据文章标签查询ai_image_tags表,获取department和keywords信息
|
||||
if article_tags:
|
||||
# 使用第一个标签查询
|
||||
query = """
|
||||
SELECT department_name, keywords_name, department_id, keywords_id, tag_id
|
||||
FROM ai_image_tags
|
||||
WHERE tag_name = %s
|
||||
LIMIT 1
|
||||
"""
|
||||
cursor.execute(query, (article_tags[0],))
|
||||
tag_info = cursor.fetchone()
|
||||
|
||||
if tag_info:
|
||||
department = tag_info['department_name']
|
||||
keywords = tag_info['keywords_name']
|
||||
department_id = tag_info['department_id']
|
||||
keywords_id = tag_info['keywords_id']
|
||||
tag_id = tag_info['tag_id']
|
||||
tag_name = article_tags[0]
|
||||
else:
|
||||
# 如果没有找到,使用默认值
|
||||
department = "AI生成"
|
||||
keywords = "AI图片"
|
||||
department_id = 1
|
||||
keywords_id = 1
|
||||
tag_id = 1
|
||||
tag_name = article_tags[0] if article_tags else "AI生成"
|
||||
else:
|
||||
# 没有标签,使用默认值
|
||||
department = "AI生成"
|
||||
keywords = "AI图片"
|
||||
department_id = 1
|
||||
keywords_id = 1
|
||||
tag_id = 1
|
||||
tag_name = "AI生成"
|
||||
|
||||
# 2. 插入ai_images表
|
||||
insert_image_query = """
|
||||
INSERT INTO ai_images
|
||||
(image_name, image_url, image_thumb_url, department, keywords, image_type, upload_user_id, status)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
cursor.execute(insert_image_query, (
|
||||
image_name,
|
||||
image_url,
|
||||
'', # image_thumb_url
|
||||
department,
|
||||
keywords,
|
||||
'medical', # image_type
|
||||
1, # upload_user_id(默认用户ID)
|
||||
'active' # status
|
||||
))
|
||||
image_id = cursor.lastrowid
|
||||
print(f"图片信息已插入ai_images表,image_id: {image_id}")
|
||||
|
||||
# 3. 插入ai_image_tags表
|
||||
insert_tag_query = """
|
||||
INSERT INTO ai_image_tags
|
||||
(image_id, image_name, image_url, image_thumb_url, tag_id, tag_name,
|
||||
keywords_id, keywords_name, department_id, department_name,
|
||||
image_source, created_user_id, image_attached_article_count)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
cursor.execute(insert_tag_query, (
|
||||
image_id,
|
||||
image_name,
|
||||
image_url,
|
||||
'', # image_thumb_url
|
||||
tag_id,
|
||||
tag_name,
|
||||
keywords_id,
|
||||
keywords,
|
||||
department_id,
|
||||
department,
|
||||
3, # image_source: 3表示AI生成
|
||||
1, # created_user_id
|
||||
0 # image_attached_article_count
|
||||
))
|
||||
tag_image_id = cursor.lastrowid
|
||||
print(f"图片标签信息已插入ai_image_tags表,tag_image_id: {tag_image_id}")
|
||||
|
||||
# 提交事务
|
||||
connection.commit()
|
||||
|
||||
# 返回包含所有需要信息的字典
|
||||
return {
|
||||
'tag_image_id': tag_image_id,
|
||||
'image_id': image_id,
|
||||
'image_url': image_url,
|
||||
'image_thumb_url': '',
|
||||
'keywords_id': keywords_id,
|
||||
'keywords_name': keywords,
|
||||
'department_id': department_id,
|
||||
'department_name': department
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"插入图片信息到数据库失败: {e}")
|
||||
connection.rollback()
|
||||
return None
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
def insert_article_image_relation(article_id: int, image_id: int, image_url: str, image_thumb_url: str,
|
||||
tag_image_id: int, keywords_id: int, keywords_name: str,
|
||||
department_id: int, department_name: str, image_source: int = 0) -> Optional[int]:
|
||||
"""
|
||||
将文章与图片的关联信息插入ai_article_images表
|
||||
|
||||
Args:
|
||||
article_id: 文章ID
|
||||
image_id: 图片ID(ai_images表的id)
|
||||
image_url: 图片URL
|
||||
image_thumb_url: 缩略图URL
|
||||
tag_image_id: 图片标签ID(ai_image_tags表的id)
|
||||
keywords_id: 关键词ID
|
||||
keywords_name: 关键词名称
|
||||
department_id: 部门ID
|
||||
department_name: 部门名称
|
||||
image_source: 图片来源(0表示默认)
|
||||
|
||||
Returns:
|
||||
插入的ai_article_images表的ID
|
||||
"""
|
||||
connection = db_manager.get_connection()
|
||||
if connection is None:
|
||||
print("无法连接到数据库")
|
||||
return None
|
||||
|
||||
try:
|
||||
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
|
||||
# 1. 查询当前文章下已有图片的最大sort_order
|
||||
query_max_sort = """
|
||||
SELECT COALESCE(MAX(sort_order), 0) as max_sort_order
|
||||
FROM ai_article_images
|
||||
WHERE article_id = %s
|
||||
"""
|
||||
cursor.execute(query_max_sort, (article_id,))
|
||||
result = cursor.fetchone()
|
||||
max_sort_order = result['max_sort_order'] if result else 0
|
||||
new_sort_order = max_sort_order + 1
|
||||
|
||||
print(f"文章 {article_id} 当前最大sort_order: {max_sort_order}, 新图片sort_order: {new_sort_order}")
|
||||
|
||||
# 2. 插入ai_article_images表
|
||||
insert_query = """
|
||||
INSERT INTO ai_article_images
|
||||
(article_id, image_id, image_url, image_thumb_url, image_tag_id, sort_order,
|
||||
keywords_id, keywords_name, department_id, department_name, image_source)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
cursor.execute(insert_query, (
|
||||
article_id,
|
||||
image_id,
|
||||
image_url,
|
||||
image_thumb_url,
|
||||
tag_image_id,
|
||||
new_sort_order,
|
||||
keywords_id,
|
||||
keywords_name,
|
||||
department_id,
|
||||
department_name,
|
||||
image_source
|
||||
))
|
||||
article_image_id = cursor.lastrowid
|
||||
print(f"文章图片关联信息已插入ai_article_images表,id: {article_image_id}")
|
||||
|
||||
# 提交事务
|
||||
connection.commit()
|
||||
|
||||
return article_image_id
|
||||
|
||||
except Exception as e:
|
||||
print(f"插入文章图片关联信息失败: {e}")
|
||||
connection.rollback()
|
||||
return None
|
||||
finally:
|
||||
connection.close()
|
||||
|
||||
|
||||
def generate_image_with_gemini(prompt: str, article_tags: List[str], article_id: int) -> str:
|
||||
"""
|
||||
使用Gemini生成图片并上传到服务器
|
||||
|
||||
Args:
|
||||
prompt: 图片生成提示词
|
||||
article_tags: 文章标签列表,用于查询department和keywords
|
||||
article_id: 文章ID,用于关联图片
|
||||
|
||||
Returns:
|
||||
上传后的图片URL
|
||||
"""
|
||||
# 导入必要的库
|
||||
try:
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from google.genai.client import HttpOptions
|
||||
|
||||
except ImportError:
|
||||
print("错误:未安装google-genai库,请运行 'pip install google-genai' 进行安装")
|
||||
raise
|
||||
|
||||
client = genai.Client(http_options=HttpOptions(base_url="https://work.poloapi.com"),
|
||||
api_key="sk-V4tPnDgzFPa7nxWrvKnNJsW8ZcBXXPuGmjfgvPVRnwpHoeob")
|
||||
|
||||
print(f"正在调用Gemini API生成图片,提示词: {prompt[:50]}...")
|
||||
|
||||
# 生成内容
|
||||
response = client.models.generate_content(
|
||||
model="gemini-3-pro-image-preview",
|
||||
contents=[prompt],
|
||||
)
|
||||
|
||||
# 检查是否有候选答案
|
||||
if not response.candidates:
|
||||
raise Exception("Gemini API未返回任何候选答案")
|
||||
|
||||
# 处理响应 - 遍历第一个候选答案的内容部分
|
||||
candidate = response.candidates[0]
|
||||
if not candidate.content or not candidate.content.parts:
|
||||
raise Exception("Gemini API返回的候选答案中没有内容部分")
|
||||
|
||||
for part in candidate.content.parts:
|
||||
if hasattr(part, 'text') and part.text is not None:
|
||||
print(f"Gemini响应文本: {part.text}")
|
||||
elif hasattr(part, 'inline_data') and part.inline_data is not None:
|
||||
image_data = part.inline_data
|
||||
if image_data.data is not None:
|
||||
# 生成唯一的文件名(基于时间戳)
|
||||
import time
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
timestamp_ms = int(time.time() * 1000) # 毫秒级时间戳
|
||||
image_filename = f"{timestamp_ms}.png"
|
||||
today_date = datetime.now().strftime("%Y%m%d")
|
||||
image_url_path = f"{today_date}/{image_filename}"
|
||||
|
||||
temp_filename = f"temp_generated_image_{timestamp_ms}.png"
|
||||
# 保存图片数据到临时文件
|
||||
with open(temp_filename, 'wb') as f:
|
||||
f.write(image_data.data)
|
||||
print(f"Gemini生成图片成功: {temp_filename}")
|
||||
|
||||
# 先将图片信息插入数据库,获取相关信息
|
||||
image_info = insert_generated_image_to_db(image_filename, image_url_path, article_tags)
|
||||
|
||||
if not image_info:
|
||||
raise Exception("插入图片信息到数据库失败")
|
||||
|
||||
print(f"图片信息已插入数据库,tag_image_id: {image_info['tag_image_id']}, image_id: {image_info['image_id']}")
|
||||
|
||||
# 使用tag_image_id上传图片到服务器
|
||||
uploaded_url = upload_image_to_server(temp_filename, image_info['tag_image_id'])
|
||||
|
||||
# 将文章与图片的关联信息插入ai_article_images表
|
||||
article_image_id = insert_article_image_relation(
|
||||
article_id=article_id,
|
||||
image_id=image_info['image_id'],
|
||||
image_url=image_info['image_url'],
|
||||
image_thumb_url=image_info['image_thumb_url'],
|
||||
tag_image_id=image_info['tag_image_id'],
|
||||
keywords_id=image_info['keywords_id'],
|
||||
keywords_name=image_info['keywords_name'],
|
||||
department_id=image_info['department_id'],
|
||||
department_name=image_info['department_name'],
|
||||
image_source=0 # 默认值
|
||||
)
|
||||
|
||||
if article_image_id:
|
||||
print(f"文章图片关联信息已创建,ai_article_images.id: {article_image_id}")
|
||||
|
||||
# 删除临时文件
|
||||
os.remove(temp_filename)
|
||||
|
||||
print(f"图片已上传到服务器: {uploaded_url}")
|
||||
# 返回上传后的图片URL
|
||||
return uploaded_url
|
||||
|
||||
# 如果没有返回图片数据,抛出异常
|
||||
raise Exception("Gemini API未返回有效的图片数据")
|
||||
|
||||
|
||||
def upload_image_to_server(image_path: str, tag_image_id: int) -> str:
|
||||
"""
|
||||
上传图片到服务器
|
||||
|
||||
Args:
|
||||
image_path: 本地图片路径
|
||||
tag_image_id: 图片标签ID
|
||||
|
||||
Returns:
|
||||
服务器上的图片URL
|
||||
"""
|
||||
import requests
|
||||
import json
|
||||
|
||||
# 登录获取JWT token
|
||||
base_url = "http://47.99.184.230:8324" # 使用外网API地址
|
||||
jwt_token = login_and_get_jwt_token(base_url)
|
||||
|
||||
if not jwt_token:
|
||||
raise Exception("获取JWT token失败,无法上传图片")
|
||||
|
||||
# 准备上传请求
|
||||
upload_url = f"{base_url}/api/images/upload"
|
||||
headers = {
|
||||
'Authorization': f'Bearer {jwt_token}',
|
||||
}
|
||||
|
||||
# 读取图片文件
|
||||
with open(image_path, 'rb') as image_file:
|
||||
files = {'file': image_file}
|
||||
data = {'tag_image_id': tag_image_id} # 添加必传参数
|
||||
|
||||
response = requests.post(upload_url, headers=headers, files=files, data=data)
|
||||
|
||||
print(f"图片上传响应状态码: {response.status_code}")
|
||||
print(f"图片上传响应内容: {response.text}")
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if result.get('code') == 200:
|
||||
# 返回服务器上的图片URL
|
||||
return result['data']['http_image_url']
|
||||
else:
|
||||
raise Exception(f"图片上传失败: {result.get('message', '未知错误')}")
|
||||
else:
|
||||
raise Exception(f"图片上传请求失败,状态码: {response.status_code}, 响应: {response.text}")
|
||||
|
||||
|
||||
def login_and_get_jwt_token(base_url: str) -> Optional[str]:
|
||||
"""
|
||||
登录获取JWT token
|
||||
"""
|
||||
login_url = f"{base_url}/api/auth/login"
|
||||
login_data = {
|
||||
"username": "user010", # 使用固定的账号
|
||||
"password": "@5^2W6R7"
|
||||
}
|
||||
|
||||
print(f"尝试登录: {login_data['username']}")
|
||||
print(f"登录URL: {login_url}")
|
||||
|
||||
try:
|
||||
response = requests.post(login_url, json=login_data, headers={'Content-Type': 'application/json'})
|
||||
print(f"响应状态码: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if result.get('code') == 200:
|
||||
jwt_token = result['data']['token']
|
||||
print("JWT token获取成功")
|
||||
return jwt_token
|
||||
else:
|
||||
print(f"登录失败: {result.get('message', '未知错误')}")
|
||||
return None
|
||||
else:
|
||||
print(f"登录请求失败: {response.status_code}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"登录异常: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def batch_publish_articles(base_url: str, jwt_token: str, article_ids: List[int]) -> bool:
|
||||
"""
|
||||
批量提交文章到/api/articles/batch-publish-auto接口
|
||||
"""
|
||||
try:
|
||||
print(f"开始批量提交 {len(article_ids)} 篇文章到batch-publish-auto接口")
|
||||
|
||||
# 构建批量发布数据
|
||||
publish_data = {
|
||||
"article_ids": article_ids
|
||||
}
|
||||
|
||||
print(f"准备批量提交的数据: {json.dumps(publish_data, ensure_ascii=False)}")
|
||||
|
||||
# 发送请求
|
||||
upload_url = f"{base_url}/api/articles/batch-publish-auto"
|
||||
headers = {
|
||||
'Authorization': f'Bearer {jwt_token}',
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.post(upload_url, json=publish_data, headers=headers)
|
||||
|
||||
print(f"批量提交响应状态码: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
result = response.json()
|
||||
print(f"批量提交响应内容: {result}")
|
||||
|
||||
# 根据接口实际返回格式判断成功
|
||||
if result.get('code') == 200:
|
||||
data = result.get('data', {})
|
||||
published_count = data.get('published_count', 0)
|
||||
failed_count = data.get('failed_count', 0)
|
||||
|
||||
success_msg = f"批量提交成功,发布: {published_count}篇,失败: {failed_count}篇"
|
||||
print(success_msg)
|
||||
return True
|
||||
else:
|
||||
print(f"批量提交失败: {result.get('message', '未知错误')}")
|
||||
return False
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"解析批量提交响应失败: {e}")
|
||||
return False
|
||||
elif response.status_code == 401:
|
||||
# Token过期
|
||||
print("收到401错误,JWT token可能已过期")
|
||||
return False
|
||||
else:
|
||||
print(f"批量提交请求失败,状态码: {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"批量提交异常: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def process_single_article(article, used_image_counts, match_results):
|
||||
"""
|
||||
处理单个文章与图片的匹配和挂靠
|
||||
|
||||
Args:
|
||||
article: 单个文章数据
|
||||
used_image_counts: 图片使用计数
|
||||
match_results: 匹配结果列表
|
||||
|
||||
Returns:
|
||||
是否处理成功
|
||||
"""
|
||||
print(f"\n处理文章: {article['title']} (ID: {article['id']})")
|
||||
|
||||
# 根据文章标签获取匹配的图片(考虑已使用次数)
|
||||
matched_images = get_images_by_tags_from_db(article['tags'], used_image_counts)
|
||||
|
||||
if matched_images:
|
||||
print(f"找到 {len(matched_images)} 张符合条件的匹配图片")
|
||||
|
||||
# 按基础使用次数排序,优先使用基础计数较低的图片
|
||||
matched_images.sort(key=lambda x: x['base_count'])
|
||||
|
||||
matched = False
|
||||
for img in matched_images:
|
||||
# 提取图片URL并添加前缀
|
||||
image_url = "http://images11.bxmkb.cn/Images/" + img['image_url']
|
||||
|
||||
if image_url: # 确保图片URL存在
|
||||
# 调用通义千问大模型进行挂靠评估
|
||||
match_success = call_qwen_model(article, [image_url])
|
||||
|
||||
if match_success:
|
||||
print(f"文章与图片挂靠成功: {article['title']}")
|
||||
|
||||
# 更新图片使用次数
|
||||
used_image_counts[img['id']] += 1
|
||||
|
||||
# 记录匹配结果
|
||||
match_results.append({
|
||||
'文章ID': article['id'],
|
||||
'文章标题': article['title'],
|
||||
'文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'], # 限制内容长度
|
||||
'标签': ', '.join(article['tags']),
|
||||
'匹配的图片URL': image_url,
|
||||
'图片ID': img['id'],
|
||||
'图片名称': img['image_name'],
|
||||
'图片标签': img['tag_name'],
|
||||
'图片关键词': img['keywords_name'],
|
||||
'图片部门': img['department_name'],
|
||||
'匹配状态': '成功'
|
||||
})
|
||||
|
||||
return True
|
||||
|
||||
if not matched:
|
||||
print(f"文章未能与任何图片成功匹配,使用Gemini生成图片: {article['title']}")
|
||||
|
||||
# 使用文章标题和标签生成提示词
|
||||
prompt = f"与'{article['title']}'相关的插图,标签: {', '.join(article['tags'])}"
|
||||
generated_image_url = generate_image_with_gemini(prompt, article['tags'], article['id'])
|
||||
print(f"生成的图片URL: {generated_image_url}")
|
||||
|
||||
# 记录生成图片的结果
|
||||
match_results.append({
|
||||
'文章ID': article['id'],
|
||||
'文章标题': article['title'],
|
||||
'文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'],
|
||||
'标签': ', '.join(article['tags']),
|
||||
'匹配的图片URL': generated_image_url,
|
||||
'图片ID': 'N/A',
|
||||
'图片名称': 'Generated',
|
||||
'图片标签': 'N/A',
|
||||
'图片关键词': 'N/A',
|
||||
'图片部门': 'N/A',
|
||||
'匹配状态': '生成图片'
|
||||
})
|
||||
|
||||
return True
|
||||
else:
|
||||
print(f"没有找到符合条件的匹配图片,使用Gemini生成图片: {article['title']}")
|
||||
|
||||
# 使用文章标题和标签生成提示词
|
||||
prompt = f"与'{article['title']}'相关的插图,标签: {', '.join(article['tags'])}"
|
||||
generated_image_url = generate_image_with_gemini(prompt, article['tags'], article['id'])
|
||||
print(f"生成的图片URL: {generated_image_url}")
|
||||
|
||||
# 记录生成图片的结果
|
||||
match_results.append({
|
||||
'文章ID': article['id'],
|
||||
'文章标题': article['title'],
|
||||
'文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'],
|
||||
'标签': ', '.join(article['tags']),
|
||||
'匹配的图片URL': generated_image_url,
|
||||
'图片ID': 'N/A',
|
||||
'图片名称': 'Generated',
|
||||
'图片标签': 'N/A',
|
||||
'图片关键词': 'N/A',
|
||||
'图片部门': 'N/A',
|
||||
'匹配状态': '生成图片'
|
||||
})
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def process_article_image_matching(test_mode=False, test_count=None):
|
||||
"""
|
||||
处理文章与图片的匹配和挂靠
|
||||
|
||||
Args:
|
||||
test_mode: 是否为测试模式
|
||||
test_count: 测试文章数量(仅在测试模式下使用)
|
||||
"""
|
||||
# 用于跟踪每张图片的使用次数
|
||||
used_image_counts = defaultdict(int)
|
||||
# 存储匹配结果
|
||||
match_results = []
|
||||
|
||||
try:
|
||||
# 根据模式决定获取哪些文章
|
||||
articles = get_articles_with_tags_from_db()
|
||||
|
||||
if not articles:
|
||||
print("没有找到文章")
|
||||
return
|
||||
|
||||
# 如果是测试模式,只取前test_count条数据
|
||||
if test_mode:
|
||||
if test_count is None:
|
||||
test_count = 3 # 默认测试前3条
|
||||
articles = articles[:test_count]
|
||||
print(f"测试模式:处理前 {len(articles)} 篇文章")
|
||||
|
||||
success_count = 0
|
||||
generated_count = 0
|
||||
|
||||
# 收集所有处理后的文章ID用于发布
|
||||
processed_article_ids = []
|
||||
|
||||
for article in articles:
|
||||
if process_single_article(article, used_image_counts, match_results):
|
||||
success_count += 1
|
||||
processed_article_ids.append(article['id'])
|
||||
else:
|
||||
print(f"处理文章 {article['id']} 失败")
|
||||
|
||||
# 将匹配结果写入CSV文件
|
||||
output_csv = 'article_image_match_results.csv'
|
||||
with open(output_csv, 'w', newline='', encoding='utf-8-sig') as csvfile:
|
||||
fieldnames = [
|
||||
'文章ID', '文章标题', '文章内容', '标签',
|
||||
'匹配的图片URL', '图片ID', '图片名称',
|
||||
'图片标签', '图片关键词', '图片部门', '匹配状态'
|
||||
]
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
writer.writeheader()
|
||||
for result in match_results:
|
||||
writer.writerow(result)
|
||||
|
||||
if not test_mode:
|
||||
print(f"\n处理完成! 成功挂靠: {success_count} 篇, 生成图片: {generated_count} 篇")
|
||||
print(f"匹配结果已保存至: {output_csv}")
|
||||
|
||||
# 如果有处理过的文章,将它们提交到发布接口
|
||||
if processed_article_ids:
|
||||
print(f"\n开始发布处理过的 {len(processed_article_ids)} 篇文章...")
|
||||
|
||||
# 登录获取JWT token
|
||||
base_url = "http://47.99.184.230:8324" # 使用外网API地址
|
||||
jwt_token = login_and_get_jwt_token(base_url)
|
||||
|
||||
if jwt_token:
|
||||
# 批量发布文章
|
||||
if batch_publish_articles(base_url, jwt_token, processed_article_ids):
|
||||
print(f"成功发布 {len(processed_article_ids)} 篇文章")
|
||||
else:
|
||||
print("批量发布失败")
|
||||
else:
|
||||
print("获取JWT token失败,无法发布文章")
|
||||
else:
|
||||
print("\n没有处理过的文章,跳过发布步骤")
|
||||
else:
|
||||
print(f"\n测试模式完成! 处理了 {len(articles)} 篇文章,成功挂靠: {success_count} 篇, 生成图片: {len([r for r in match_results if r['匹配状态'] == '生成图片'])} 篇")
|
||||
print(f"处理结果已保存至: {output_csv}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理文章图片匹配时发生错误: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
print("开始处理文章与图片的智能挂靠...")
|
||||
|
||||
# 检查命令行参数
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == "--test" and len(sys.argv) > 2:
|
||||
# 测试模式:处理前N篇文章
|
||||
test_count = int(sys.argv[2])
|
||||
print(f"启动测试模式,处理前 {test_count} 篇文章")
|
||||
process_article_image_matching(test_mode=True, test_count=test_count)
|
||||
elif sys.argv[1] == "--test" and len(sys.argv) == 2:
|
||||
# 提示用户输入要测试的文章数量
|
||||
test_count_input = input("请输入要测试的文章数量 (默认3): ")
|
||||
test_count = int(test_count_input) if test_count_input.strip().isdigit() else 3
|
||||
print(f"启动测试模式,处理前 {test_count} 篇文章")
|
||||
process_article_image_matching(test_mode=True, test_count=test_count)
|
||||
else:
|
||||
print("使用方法:")
|
||||
print(" 正常模式: python match_article_images.py")
|
||||
print(" 测试模式: python match_article_images.py --test [文章ID]")
|
||||
else:
|
||||
# 正常模式:处理所有文章
|
||||
process_article_image_matching()
|
||||
680
push_article_published.py
Normal file
@@ -0,0 +1,680 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI文章自动生成监控脚本
|
||||
监控数据库中status为topic的记录,自动调用Coze API生成文章并提交
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
import requests
|
||||
import pymysql
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Any
|
||||
import traceback
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from queue import Queue, Empty
|
||||
import random
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
# 添加项目根目录到Python路径
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from database_config import get_db_manager
|
||||
from log_config import setup_logger
|
||||
|
||||
# 配置日志记录器,支持按日期切割和控制台输出
|
||||
logger = setup_logger(
|
||||
name='push_article',
|
||||
log_file='logs/push_article_published.log',
|
||||
error_log_file='logs/push_article_published_error.log',
|
||||
level=logging.INFO,
|
||||
console_output=True
|
||||
)
|
||||
|
||||
# 配置常量
|
||||
#BASE_URL = "http://47.99.184.230:8324"
|
||||
BASE_URL = "http://127.0.0.1:8324"
|
||||
SLEEP_INTERVAL = 5 # 监控间隔(秒)
|
||||
WORKER_COUNT = 10 # 并行处理worker数量,可配置
|
||||
|
||||
# 新增:批量发布配置
|
||||
BATCH_SIZE = 8 # 一次处理的文章数量,可调
|
||||
BATCH_INTERVAL = 2 # 批次间隔时间(秒),可调
|
||||
|
||||
# 网络重试配置
|
||||
MAX_RETRIES = 3 # 最大重试次数
|
||||
BACKOFF_FACTOR = 1 # 退避因子
|
||||
RETRY_STATUS_CODES = [500, 502, 503, 504, 429] # 需要重试的HTTP状态码
|
||||
CONNECTION_TIMEOUT = 30 # 连接超时(秒)
|
||||
READ_TIMEOUT = 120 # 读取超时(秒)
|
||||
|
||||
# 全局变量
|
||||
AUTH_TOKEN = None
|
||||
WORKFLOW_ID = None
|
||||
JWT_TOKEN = None
|
||||
|
||||
class PushArticlePublished:
|
||||
def __init__(self):
|
||||
# API配置
|
||||
self.base_url = BASE_URL
|
||||
|
||||
# 认证信息
|
||||
self.auth_token = None
|
||||
self.workflow_id = None
|
||||
self.jwt_token = None
|
||||
|
||||
# 使用统一的数据库管理器
|
||||
self.db_manager = get_db_manager()
|
||||
|
||||
# 登录配置
|
||||
self.login_credentials = {
|
||||
'username': 'user010',
|
||||
'password': '@5^2W6R7'
|
||||
}
|
||||
|
||||
# 禁用代理
|
||||
self.proxies = {
|
||||
'http': None,
|
||||
'https': None
|
||||
}
|
||||
|
||||
# 并行处理相关
|
||||
self.processing_lock = threading.Lock() # 用于线程安全的记录分配
|
||||
self.processed_ids = set() # 已处理的记录ID集合
|
||||
|
||||
# 创建会话和配置重试策略
|
||||
self.session = self._create_session()
|
||||
|
||||
# 网络统计
|
||||
self.request_stats = {
|
||||
'total_requests': 0,
|
||||
'successful_requests': 0,
|
||||
'failed_requests': 0,
|
||||
'retry_attempts': 0,
|
||||
'connection_errors': 0,
|
||||
'timeout_errors': 0
|
||||
}
|
||||
|
||||
logger.info("PushArticlePublished 初始化完成")
|
||||
|
||||
def _create_session(self):
|
||||
"""创建配置了重试策略的requests会话"""
|
||||
session = requests.Session()
|
||||
|
||||
# 配置重试策略
|
||||
retry_strategy = Retry(
|
||||
total=MAX_RETRIES,
|
||||
status_forcelist=RETRY_STATUS_CODES,
|
||||
backoff_factor=BACKOFF_FACTOR,
|
||||
allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"]
|
||||
)
|
||||
|
||||
# 配置HTTP适配器
|
||||
adapter = HTTPAdapter(
|
||||
max_retries=retry_strategy,
|
||||
pool_connections=10,
|
||||
pool_maxsize=20
|
||||
)
|
||||
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
|
||||
# 设置默认超时
|
||||
session.timeout = (CONNECTION_TIMEOUT, READ_TIMEOUT)
|
||||
|
||||
return session
|
||||
|
||||
def _make_request_with_retry(self, method, url, **kwargs):
|
||||
"""带重试机制的网络请求方法"""
|
||||
self.request_stats['total_requests'] += 1
|
||||
|
||||
for attempt in range(MAX_RETRIES + 1):
|
||||
try:
|
||||
# 使用会话发送请求
|
||||
response = self.session.request(
|
||||
method=method,
|
||||
url=url,
|
||||
timeout=(CONNECTION_TIMEOUT, READ_TIMEOUT),
|
||||
proxies=self.proxies,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
# 请求成功
|
||||
self.request_stats['successful_requests'] += 1
|
||||
if attempt > 0:
|
||||
logger.info(f"网络请求在第 {attempt + 1} 次尝试后成功")
|
||||
return response
|
||||
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
self.request_stats['connection_errors'] += 1
|
||||
if attempt < MAX_RETRIES:
|
||||
self.request_stats['retry_attempts'] += 1
|
||||
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
|
||||
logger.warning(f"连接错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
|
||||
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
|
||||
time.sleep(backoff_time)
|
||||
else:
|
||||
self.request_stats['failed_requests'] += 1
|
||||
logger.error(f"连接最终失败,已重试 {MAX_RETRIES} 次: {e}")
|
||||
raise
|
||||
|
||||
except requests.exceptions.Timeout as e:
|
||||
self.request_stats['timeout_errors'] += 1
|
||||
if attempt < MAX_RETRIES:
|
||||
self.request_stats['retry_attempts'] += 1
|
||||
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
|
||||
logger.warning(f"请求超时 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
|
||||
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
|
||||
time.sleep(backoff_time)
|
||||
else:
|
||||
self.request_stats['failed_requests'] += 1
|
||||
logger.error(f"请求超时最终失败,已重试 {MAX_RETRIES} 次: {e}")
|
||||
raise
|
||||
|
||||
except requests.exceptions.ChunkedEncodingError as e:
|
||||
if attempt < MAX_RETRIES:
|
||||
self.request_stats['retry_attempts'] += 1
|
||||
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
|
||||
logger.warning(f"数据传输错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
|
||||
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
|
||||
time.sleep(backoff_time)
|
||||
else:
|
||||
self.request_stats['failed_requests'] += 1
|
||||
logger.error(f"数据传输最终失败,已重试 {MAX_RETRIES} 次: {e}")
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
self.request_stats['failed_requests'] += 1
|
||||
logger.error(f"网络请求发生未预期错误: {e}")
|
||||
raise
|
||||
|
||||
def log_network_stats(self):
|
||||
"""记录网络统计信息"""
|
||||
stats = self.request_stats
|
||||
success_rate = (stats['successful_requests'] / stats['total_requests'] * 100) if stats['total_requests'] > 0 else 0
|
||||
|
||||
stats_msg = (
|
||||
f"网络统计 - 总请求: {stats['total_requests']}, "
|
||||
f"成功: {stats['successful_requests']}, "
|
||||
f"失败: {stats['failed_requests']}, "
|
||||
f"重试: {stats['retry_attempts']}, "
|
||||
f"连接错误: {stats['connection_errors']}, "
|
||||
f"超时错误: {stats['timeout_errors']}, "
|
||||
f"成功率: {success_rate:.1f}%"
|
||||
)
|
||||
|
||||
logger.info(stats_msg)
|
||||
self.log_to_database('INFO', '网络统计', stats_msg)
|
||||
|
||||
def get_db_connection(self):
|
||||
"""获取数据库连接"""
|
||||
try:
|
||||
return self.db_manager.get_connection()
|
||||
except Exception as e:
|
||||
logger.error(f"数据库连接失败: {e}")
|
||||
return None
|
||||
|
||||
def log_to_database(self, level: str, message: str, details: str = None):
|
||||
"""记录日志到数据库ai_logs表"""
|
||||
try:
|
||||
with self.db_manager.get_cursor() as cursor:
|
||||
# 映射日志级别到数据库状态
|
||||
status_map = {
|
||||
'INFO': 'success',
|
||||
'WARNING': 'warning',
|
||||
'ERROR': 'error'
|
||||
}
|
||||
status = status_map.get(level, 'success')
|
||||
|
||||
sql = """
|
||||
INSERT INTO ai_logs (user_id, action, description, status, error_message, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s, NOW())
|
||||
"""
|
||||
cursor.execute(sql, (None, 'coze_generator', message, status, details))
|
||||
logger.info(f"日志已记录到数据库: {level} - {message}")
|
||||
except Exception as e:
|
||||
logger.error(f"记录日志到数据库失败: {e}")
|
||||
|
||||
def login_and_get_jwt_token(self) -> bool:
|
||||
"""登录获取JWT token,参考JavaScript逻辑"""
|
||||
try:
|
||||
login_url = f"{self.base_url}/api/auth/login"
|
||||
login_data = {
|
||||
"username": "user010", # 使用用户指定的账号
|
||||
"password": "@5^2W6R7"
|
||||
}
|
||||
|
||||
logger.info(f"尝试登录: {login_data['username']}")
|
||||
logger.info(f"登录URL: {login_url}")
|
||||
self.log_to_database('INFO', f"尝试登录用户: {login_data['username']}")
|
||||
|
||||
response = self._make_request_with_retry(
|
||||
'POST',
|
||||
login_url,
|
||||
json=login_data,
|
||||
headers={'Content-Type': 'application/json'}
|
||||
)
|
||||
|
||||
logger.info(f"响应状态码: {response.status_code}")
|
||||
logger.info(f"响应内容: {response.text[:500]}...")
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if result.get('code') == 200:
|
||||
self.jwt_token = result['data']['token']
|
||||
logger.info("JWT token获取成功")
|
||||
self.log_to_database('INFO', "JWT token获取成功", json.dumps(result['data']))
|
||||
return True
|
||||
else:
|
||||
error_msg = f"登录失败: {result.get('message', '未知错误')}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, json.dumps(result))
|
||||
return False
|
||||
else:
|
||||
error_msg = f"登录请求失败: {response.status_code}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, response.text)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"登录异常: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||||
return False
|
||||
|
||||
def batch_publish_auto(self, article_ids: List[int]) -> bool:
|
||||
"""批量提交文章到/api/articles/batch-publish-auto接口"""
|
||||
try:
|
||||
logger.info(f"开始批量提交 {len(article_ids)} 篇文章到batch-publish-auto接口")
|
||||
self.log_to_database('INFO', f"开始批量提交文章", f"article_ids: {article_ids}")
|
||||
|
||||
# 确保有JWT token
|
||||
if not self.jwt_token:
|
||||
logger.warning("JWT token缺失,尝试重新登录")
|
||||
self.log_to_database('WARNING', "JWT token缺失,重新登录")
|
||||
if not self.login_and_get_jwt_token():
|
||||
error_msg = "重新登录失败"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg)
|
||||
return False
|
||||
|
||||
# 构建批量发布数据 - 根据接口要求只需要article_ids
|
||||
publish_data = {
|
||||
"article_ids": article_ids
|
||||
}
|
||||
|
||||
logger.info(f"准备批量提交的数据: {json.dumps(publish_data, ensure_ascii=False)}")
|
||||
|
||||
# 发送请求 - 修正接口路径
|
||||
upload_url = f"{self.base_url}/api/articles/batch-publish-auto"
|
||||
headers = {
|
||||
'Authorization': f'Bearer {self.jwt_token}',
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
|
||||
response = self._make_request_with_retry(
|
||||
'POST',
|
||||
upload_url,
|
||||
json=publish_data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
logger.info(f"批量提交响应状态码: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
result = response.json()
|
||||
logger.info(f"批量提交响应内容: {result}")
|
||||
|
||||
# 根据接口实际返回格式判断成功
|
||||
if result.get('code') == 200:
|
||||
data = result.get('data', {})
|
||||
published_count = data.get('published_count', 0)
|
||||
failed_count = data.get('failed_count', 0)
|
||||
|
||||
success_msg = f"批量提交成功,发布: {published_count}篇,失败: {failed_count}篇"
|
||||
logger.info(success_msg)
|
||||
self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}")
|
||||
return True
|
||||
else:
|
||||
error_msg = f"批量提交失败: {result.get('message', '未知错误')}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}")
|
||||
return False
|
||||
except json.JSONDecodeError as e:
|
||||
error_msg = f"解析批量提交响应失败: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
|
||||
return False
|
||||
elif response.status_code == 401:
|
||||
# Token过期,尝试重新登录并重试一次
|
||||
logger.warning("收到401错误,JWT token可能已过期,尝试重新登录")
|
||||
self.log_to_database('WARNING', "JWT token过期,重新登录", f"article_ids: {article_ids}")
|
||||
|
||||
if self.login_and_get_jwt_token():
|
||||
logger.info("重新登录成功,重试批量提交请求")
|
||||
# 更新headers中的token
|
||||
headers['Authorization'] = f'Bearer {self.jwt_token}'
|
||||
|
||||
# 重试请求
|
||||
retry_response = self._make_request_with_retry(
|
||||
'POST',
|
||||
upload_url,
|
||||
json=publish_data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
if retry_response.status_code == 200:
|
||||
try:
|
||||
result = retry_response.json()
|
||||
logger.info(f"重试批量提交响应内容: {result}")
|
||||
|
||||
if result.get('code') == 200:
|
||||
data = result.get('data', {})
|
||||
published_count = data.get('published_count', 0)
|
||||
failed_count = data.get('failed_count', 0)
|
||||
|
||||
success_msg = f"重试批量提交成功,发布: {published_count}篇,失败: {failed_count}篇"
|
||||
logger.info(success_msg)
|
||||
self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}")
|
||||
return True
|
||||
else:
|
||||
error_msg = f"重试批量提交失败: {result.get('message', '未知错误')}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}")
|
||||
return False
|
||||
except json.JSONDecodeError as e:
|
||||
error_msg = f"解析重试批量提交响应失败: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}")
|
||||
return False
|
||||
else:
|
||||
error_msg = f"重试批量提交请求失败,状态码: {retry_response.status_code}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}")
|
||||
return False
|
||||
else:
|
||||
error_msg = "重新登录失败,无法重试批量提交"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}")
|
||||
return False
|
||||
else:
|
||||
error_msg = f"批量提交请求失败,状态码: {response.status_code}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
|
||||
return False
|
||||
|
||||
except requests.exceptions.Timeout as e:
|
||||
error_msg = f"批量提交请求超时: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, timeout: {CONNECTION_TIMEOUT}s/{READ_TIMEOUT}s")
|
||||
return False
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
error_msg = f"批量提交连接错误: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, base_url: {self.base_url}")
|
||||
return False
|
||||
except requests.exceptions.RequestException as e:
|
||||
error_msg = f"批量提交网络异常: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, exception_type: {type(e).__name__}")
|
||||
return False
|
||||
except Exception as e:
|
||||
error_msg = f"批量提交异常: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
|
||||
def is_publish_time_allowed(self) -> bool:
|
||||
"""检查当前时间是否在允许发布的时间窗口内(北京时间6:00-23:59)"""
|
||||
current_hour = datetime.now().hour
|
||||
# 凌晨00:00-05:59禁止发布,6:00-23:59允许发布
|
||||
if current_hour >= 6:
|
||||
logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 可以推送")
|
||||
return True
|
||||
else:
|
||||
logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 在禁止发布时段(00:00-05:59),跳过推送")
|
||||
return False
|
||||
|
||||
def filter_articles_by_daily_limit(self, articles: List[Dict]) -> List[Dict]:
|
||||
"""根据作者每日发文限制过滤文章
|
||||
|
||||
检查ai_statistics_days表中daily_published_count是否超过daily_post_max
|
||||
如果超过,则该作者的文章今日不发
|
||||
"""
|
||||
if not articles:
|
||||
return []
|
||||
|
||||
try:
|
||||
today_date = datetime.now().strftime('%Y-%m-%d')
|
||||
filtered_articles = []
|
||||
|
||||
with self.db_manager.get_cursor() as cursor:
|
||||
for article in articles:
|
||||
author_id = article.get('author_id')
|
||||
if not author_id:
|
||||
logger.warning(f"文章ID {article['id']} 缺少author_id,跳过")
|
||||
continue
|
||||
|
||||
# 先检查ai_authors表:作者必须满足 daily_post_max > 0, status = 'active', channel = 1
|
||||
author_check_sql = """
|
||||
SELECT id, author_name, daily_post_max, status, channel
|
||||
FROM ai_authors
|
||||
WHERE id = %s AND daily_post_max > 0 AND status = 'active' AND channel = 1
|
||||
"""
|
||||
cursor.execute(author_check_sql, (author_id,))
|
||||
author_result = cursor.fetchone()
|
||||
|
||||
if not author_result:
|
||||
logger.info(f"[业务日志] 作者ID {author_id} 不符合发文条件(daily_post_max>0 AND status=active AND channel=1),文章ID {article['id']} 过滤掉")
|
||||
# 将文章状态更新为pending_review,重新走审批流程
|
||||
update_sql = "UPDATE ai_articles SET status = 'pending_review', updated_at = NOW() WHERE id = %s"
|
||||
cursor.execute(update_sql, (article['id'],))
|
||||
logger.info(f"[业务日志] 文章ID {article['id']} 状态已更新为pending_review,需重新审批")
|
||||
continue
|
||||
|
||||
# 查询该作者当天的发文统计
|
||||
sql = """
|
||||
SELECT daily_published_count, daily_post_max
|
||||
FROM ai_statistics_days
|
||||
WHERE author_id = %s AND stat_date = %s
|
||||
"""
|
||||
cursor.execute(sql, (author_id, today_date))
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
daily_published_count = result['daily_published_count'] or 0
|
||||
daily_post_max = result['daily_post_max'] or 0
|
||||
|
||||
# 检查daily_post_max是否小于1,小于1则不允许发文
|
||||
if daily_post_max < 1:
|
||||
#logger.info(f"[业务日志] 作者ID {author_id} daily_post_max={daily_post_max} 小于1,文章ID {article['id']} 过滤掉,不允许发文")
|
||||
continue
|
||||
|
||||
if daily_published_count >= daily_post_max:
|
||||
#logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count} 篇,达到上限 {daily_post_max},文章ID {article['id']} 跳过")
|
||||
continue
|
||||
else:
|
||||
#logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count}/{daily_post_max},文章ID {article['id']} 允许发布")
|
||||
filtered_articles.append(article)
|
||||
else:
|
||||
# 没有统计记录,默认不允许发布(需要先初始化统计记录)
|
||||
logger.info(f"[业务日志] 作者ID {author_id} 无当日统计记录,文章ID {article['id']} 过滤掉,需先初始化统计记录")
|
||||
continue
|
||||
|
||||
logger.info(f"每日限制过滤完成: 原始 {len(articles)} 篇 -> 允许发布 {len(filtered_articles)} 篇")
|
||||
return filtered_articles
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"检查每日发文限制异常: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||||
# 异常时返回原始列表,避免阻塞
|
||||
return articles
|
||||
|
||||
def get_published_review_articles(self) -> List[Dict]:
|
||||
"""获取状态为published_review的待发布文章"""
|
||||
try:
|
||||
with self.db_manager.get_cursor() as cursor:
|
||||
# 查询published_review状态的文章
|
||||
sql = """
|
||||
SELECT
|
||||
id,
|
||||
title,
|
||||
status,
|
||||
created_at,
|
||||
updated_at,
|
||||
author_id
|
||||
FROM (
|
||||
SELECT
|
||||
id,
|
||||
title,
|
||||
status,
|
||||
created_at,
|
||||
updated_at,
|
||||
author_id,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY author_id
|
||||
ORDER BY updated_at ASC, id ASC
|
||||
) as author_rank
|
||||
FROM ai_articles
|
||||
WHERE status = 'published_review'
|
||||
AND author_id > 0
|
||||
) ranked_articles
|
||||
"""
|
||||
cursor.execute(sql)
|
||||
results = cursor.fetchall()
|
||||
|
||||
if results:
|
||||
logger.info(f"查询到 {len(results)} 个待发布文章")
|
||||
for result in results:
|
||||
logger.info(f"待发布文章 - ID: {result['id']}, 标题: {result['title']}, 状态: {result['status']}")
|
||||
#self.log_to_database('INFO', f"发现待发布文章: {result['title']}",
|
||||
#f"ID: {result['id']}, 状态: {result['status']}")
|
||||
else:
|
||||
logger.info("未查询到待发布文章")
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
error_msg = f"查询待发布文章异常: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||||
return []
|
||||
|
||||
def process_published_review_articles(self, published_articles: List[Dict], worker_id: int) -> int:
|
||||
"""Worker线程处理published_review状态文章的方法"""
|
||||
processed_count = 0
|
||||
thread_name = f"PublishWorker-{worker_id}"
|
||||
threading.current_thread().name = thread_name
|
||||
|
||||
logger.info(f"[{thread_name}] 启动,准备处理待发布文章")
|
||||
|
||||
# 按批次处理文章
|
||||
for i in range(0, len(published_articles), BATCH_SIZE):
|
||||
batch = published_articles[i:i + BATCH_SIZE]
|
||||
article_ids = [article['id'] for article in batch]
|
||||
|
||||
logger.info(f"[{thread_name}] 处理批次 {i//BATCH_SIZE + 1},文章ID: {article_ids}")
|
||||
|
||||
# 批量提交文章
|
||||
if self.batch_publish_auto(article_ids):
|
||||
processed_count += len(article_ids)
|
||||
logger.info(f"[{thread_name}] 成功处理批次,文章数量: {len(article_ids)}")
|
||||
else:
|
||||
logger.error(f"[{thread_name}] 处理批次失败,文章ID: {article_ids}")
|
||||
|
||||
# 批次间隔
|
||||
if i + BATCH_SIZE < len(published_articles):
|
||||
logger.info(f"[{thread_name}] 等待 {BATCH_INTERVAL} 秒后处理下一批次")
|
||||
time.sleep(BATCH_INTERVAL)
|
||||
|
||||
logger.info(f"[{thread_name}] 完成,共处理 {processed_count} 篇文章")
|
||||
return processed_count
|
||||
|
||||
def run_monitor(self):
|
||||
"""运行监控循环,支持多worker并行处理"""
|
||||
logger.info(f"开始监控ai_articles表,使用 {WORKER_COUNT} 个worker并行处理...")
|
||||
self.log_to_database('INFO', f'启动文章自动生成监控服务,worker数量: {WORKER_COUNT}', 'run_monitor')
|
||||
|
||||
# 统计计数器
|
||||
loop_count = 0
|
||||
stats_interval = 60 # 每60次循环记录一次统计(约5分钟)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# 获取待发布的文章
|
||||
published_articles = self.get_published_review_articles()
|
||||
|
||||
# 逻辑1: 检查时间窗口(北京时间6:00-23:59允许,00:00-05:59禁止)
|
||||
if not self.is_publish_time_allowed():
|
||||
published_articles = []
|
||||
logger.info("当前处于禁止发布时段,清空待发布列表")
|
||||
|
||||
# 逻辑2: 根据作者每日发文限制过滤文章
|
||||
if published_articles:
|
||||
published_articles = self.filter_articles_by_daily_limit(published_articles)
|
||||
|
||||
# 处理待发布文章
|
||||
if published_articles:
|
||||
logger.info(f"发现 {len(published_articles)} 篇待发布文章,启动批量发布处理")
|
||||
self.log_to_database('INFO', f'发现待发布文章,启动批量处理', f'文章数量: {len(published_articles)}')
|
||||
|
||||
# 使用单个worker处理批量发布(避免并发冲突)
|
||||
try:
|
||||
processed_count = self.process_published_review_articles(published_articles, 1)
|
||||
logger.info(f"批量发布处理完成,共处理 {processed_count} 篇文章")
|
||||
self.log_to_database('INFO', f'批量发布处理完成', f'共处理 {processed_count} 篇文章')
|
||||
except Exception as e:
|
||||
logger.error(f"批量发布处理异常: {e}")
|
||||
self.log_to_database('ERROR', f'批量发布处理异常', str(e))
|
||||
|
||||
# 如果没有任何待处理任务
|
||||
if not published_articles:
|
||||
logger.info("暂无待处理任务,继续监控...")
|
||||
|
||||
# 每次循环后休息
|
||||
time.sleep(SLEEP_INTERVAL)
|
||||
|
||||
# 定期记录网络统计
|
||||
loop_count += 1
|
||||
if loop_count % stats_interval == 0:
|
||||
self.log_network_stats()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("收到中断信号,停止监控")
|
||||
self.log_to_database('INFO', '监控服务手动停止', 'KeyboardInterrupt')
|
||||
break
|
||||
except Exception as e:
|
||||
error_msg = f"监控循环异常: {e}"
|
||||
logger.error(error_msg)
|
||||
self.log_to_database('ERROR', error_msg, traceback.format_exc())
|
||||
time.sleep(5) # 异常时等待5秒再继续
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
generator = PushArticlePublished()
|
||||
|
||||
try:
|
||||
# 先登录获取JWT token
|
||||
logger.info("开始登录获取JWT token")
|
||||
if not generator.login_and_get_jwt_token():
|
||||
logger.error("登录失败,程序退出")
|
||||
return
|
||||
|
||||
# 开始监控
|
||||
generator.run_monitor()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"程序运行异常: {e}")
|
||||
generator.log_to_database('ERROR', f'程序运行异常: {e}', traceback.format_exc())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
requests==2.31.0
|
||||
google-genai==0.1.0
|
||||
Pillow==10.0.0
|
||||
openpyxl==3.1.2
|
||||
14
setup_env.bat
Normal file
@@ -0,0 +1,14 @@
|
||||
@echo off
|
||||
echo 正在创建虚拟环境...
|
||||
python -m venv venv
|
||||
echo 虚拟环境创建完成!
|
||||
|
||||
echo 正在激活虚拟环境...
|
||||
call venv\Scripts\activate.bat
|
||||
|
||||
echo 正在安装依赖...
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo 虚拟环境设置完成!
|
||||
echo 激活虚拟环境的命令: venv\Scripts\activate
|
||||
pause
|
||||
13
setup_env.sh
Normal file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
echo "正在创建虚拟环境..."
|
||||
python3 -m venv venv
|
||||
echo "虚拟环境创建完成!"
|
||||
|
||||
echo "正在激活虚拟环境..."
|
||||
source venv/bin/activate
|
||||
|
||||
echo "正在安装依赖..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "虚拟环境设置完成!"
|
||||
echo "激活虚拟环境的命令: source venv/bin/activate"
|
||||
297
split_sql_tables.py
Normal file
@@ -0,0 +1,297 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
将包含多个表的SQL文件拆分为单个表的SQL文件
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def split_sql_tables(input_file_path):
|
||||
"""
|
||||
将SQL文件中的每个表拆分为单独的文件
|
||||
"""
|
||||
# 读取输入文件
|
||||
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 分割SQL内容,查找CREATE TABLE语句
|
||||
# 使用正则表达式匹配CREATE TABLE语句
|
||||
table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\([^;]*END\s*OF\s*DATA;)?)'
|
||||
|
||||
# 更精确的匹配模式,寻找CREATE TABLE语句直到遇到下一个CREATE TABLE或文件结尾
|
||||
create_table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(.+?)(?=\nCREATE TABLE|\Z)'
|
||||
|
||||
# 分离出每个CREATE TABLE语句
|
||||
tables = re.findall(create_table_pattern, content, re.DOTALL | re.IGNORECASE)
|
||||
|
||||
# 如果上面的正则没匹配到,尝试另一种方式
|
||||
if not tables:
|
||||
# 分割CREATE TABLE部分
|
||||
parts = re.split(r'\n(?=CREATE TABLE)', content)
|
||||
tables = []
|
||||
|
||||
for part in parts:
|
||||
if part.strip().upper().startswith('CREATE TABLE'):
|
||||
# 提取表名
|
||||
table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', part, re.IGNORECASE)
|
||||
if table_name_match:
|
||||
table_name = table_name_match.group(1)
|
||||
tables.append((part.strip(), table_name))
|
||||
|
||||
# 确保输出目录存在
|
||||
output_dir = Path(input_file_path).parent / "split_tables"
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
# 为每个表创建单独的文件
|
||||
for table_sql, table_name in tables:
|
||||
# 清理表名,确保它是有效的文件名
|
||||
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
|
||||
|
||||
# 创建输出文件路径
|
||||
output_file_path = output_dir / f"{clean_table_name}.sql"
|
||||
|
||||
# 写入表定义到单独的文件
|
||||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||||
f.write("-- SQL table definition\n")
|
||||
f.write("-- Generated from splitting a larger SQL file\n")
|
||||
f.write("\n")
|
||||
f.write(table_sql.strip())
|
||||
f.write("\n")
|
||||
|
||||
print(f"已创建表文件: {output_file_path}")
|
||||
|
||||
|
||||
def split_sql_tables_advanced(input_file_path):
|
||||
"""
|
||||
高级方法拆分SQL文件中的表定义
|
||||
"""
|
||||
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# 确保输出目录存在
|
||||
output_dir = Path(input_file_path).parent / "split_tables"
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
current_table_lines = []
|
||||
in_table_definition = False
|
||||
current_table_name = ""
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# 检查是否是CREATE TABLE语句
|
||||
if line.upper().startswith('CREATE TABLE'):
|
||||
# 如果之前已经在处理表定义,保存之前的表
|
||||
if in_table_definition and current_table_lines:
|
||||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||||
current_table_lines = []
|
||||
|
||||
# 开始新的表定义
|
||||
in_table_definition = True
|
||||
current_table_lines.append(lines[i])
|
||||
|
||||
# 提取表名
|
||||
table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', line, re.IGNORECASE)
|
||||
if table_name_match:
|
||||
current_table_name = table_name_match.group(1)
|
||||
|
||||
# 检查这一行是否以分号结束
|
||||
if line.endswith(';'):
|
||||
# 单行CREATE TABLE语句
|
||||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||||
current_table_lines = []
|
||||
in_table_definition = False
|
||||
else:
|
||||
# 多行CREATE TABLE语句,继续收集行直到遇到分号
|
||||
pass
|
||||
elif in_table_definition:
|
||||
current_table_lines.append(lines[i])
|
||||
# 检查是否以分号结束
|
||||
if line.endswith(';'):
|
||||
# 结束当前表定义
|
||||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||||
current_table_lines = []
|
||||
in_table_definition = False
|
||||
# 如果不在表定义中且遇到CREATE TABLE之前的行,忽略或处理其他内容
|
||||
|
||||
i += 1
|
||||
|
||||
# 处理最后一个表(如果有)
|
||||
if in_table_definition and current_table_lines:
|
||||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||||
|
||||
|
||||
def save_table_to_file(table_name, table_lines, output_dir):
|
||||
"""
|
||||
将表定义保存到文件
|
||||
"""
|
||||
# 清理表名,确保它是有效的文件名
|
||||
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
|
||||
|
||||
# 创建输出文件路径
|
||||
output_file_path = output_dir / f"{clean_table_name}.sql"
|
||||
|
||||
# 写入表定义到单独的文件
|
||||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||||
f.write("-- SQL table definition\n")
|
||||
f.write("-- Generated from splitting a larger SQL file\n")
|
||||
f.write("-- Table: " + table_name + "\n")
|
||||
f.write("\n")
|
||||
|
||||
for line in table_lines:
|
||||
f.write(line.rstrip() + '\n')
|
||||
|
||||
print(f"已创建表文件: {output_file_path}")
|
||||
|
||||
|
||||
def extract_create_table_statements(input_file_path):
|
||||
"""
|
||||
提取SQL文件中的所有CREATE TABLE语句
|
||||
"""
|
||||
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 正则表达式匹配CREATE TABLE语句
|
||||
# 匹配从CREATE TABLE开始到遇到下一个CREATE TABLE或文件结尾的内容
|
||||
pattern = r'(CREATE TABLE\s+`?\w+`?[^;]*(?:;|ENGINE.*?;))'
|
||||
|
||||
# 更复杂的正则表达式,考虑多行和嵌套括号
|
||||
# complex_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(((?>[^()]+|\((?<DEPTH>)|\)(?<-DEPTH>))*(?(DEPTH)(?!)))\)[^;]*;)'
|
||||
|
||||
# 使用简单方法,逐行解析
|
||||
lines = content.split('\n')
|
||||
|
||||
# 确保输出目录存在
|
||||
output_dir = Path(input_file_path).parent / "split_tables"
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
current_table_lines = []
|
||||
in_table_definition = False
|
||||
current_table_name = ""
|
||||
|
||||
for line in lines:
|
||||
stripped_line = line.strip()
|
||||
|
||||
if stripped_line.upper().startswith('CREATE TABLE'):
|
||||
# 如果正在处理上一个表,保存它
|
||||
if in_table_definition and current_table_lines:
|
||||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||||
|
||||
# 开始新表
|
||||
in_table_definition = True
|
||||
current_table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', stripped_line, re.IGNORECASE)
|
||||
if current_table_name_match:
|
||||
current_table_name = current_table_name_match.group(1)
|
||||
current_table_lines = [line]
|
||||
elif in_table_definition:
|
||||
current_table_lines.append(line)
|
||||
# 检查行是否以分号结尾,表示表定义结束
|
||||
if stripped_line.endswith(';'):
|
||||
# 这可能是一个完整的表定义
|
||||
# 简单检查是否是表定义的结尾
|
||||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||||
current_table_lines = []
|
||||
in_table_definition = False
|
||||
# 否则跳过非表定义的行
|
||||
|
||||
# 处理最后一个表
|
||||
if in_table_definition and current_table_lines:
|
||||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||||
|
||||
|
||||
def parse_sql_file(input_file_path):
|
||||
"""
|
||||
解析SQL文件并拆分表定义
|
||||
"""
|
||||
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 查找所有CREATE TABLE语句
|
||||
# 更安全的解析方法 - 逐行处理
|
||||
lines = content.split('\n')
|
||||
|
||||
# 确保输出目录存在
|
||||
output_dir = Path(input_file_path).parent / "split_tables"
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
current_table_lines = []
|
||||
in_table_definition = False
|
||||
current_table_name = ""
|
||||
|
||||
for line in lines:
|
||||
stripped_line = line.strip()
|
||||
|
||||
if stripped_line.upper().startswith('CREATE TABLE'):
|
||||
# 如果正在处理上一个表,保存它
|
||||
if in_table_definition and current_table_lines:
|
||||
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
|
||||
|
||||
# 开始新表
|
||||
in_table_definition = True
|
||||
# 提取表名
|
||||
table_name_match = re.search(r'CREATE TABLE\s+(?:IF NOT EXISTS\s+)?`?(\w+)`?', stripped_line, re.IGNORECASE)
|
||||
if table_name_match:
|
||||
current_table_name = table_name_match.group(1)
|
||||
current_table_lines = [line]
|
||||
elif in_table_definition:
|
||||
current_table_lines.append(line)
|
||||
# 检查行是否以分号结尾,表示表定义结束
|
||||
if stripped_line and stripped_line.endswith(';'):
|
||||
# 检查是否包含表定义的关键元素,如ENGINE, CHARACTER SET等
|
||||
# 或者是完整的CREATE TABLE语句
|
||||
if ('ENGINE' in stripped_line or 'CHARACTER SET' in stripped_line or
|
||||
'ROW_FORMAT' in stripped_line or ') ENGINE' in line or line.count('(') <= line.count(')')):
|
||||
# 这是一个完整的表定义
|
||||
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
|
||||
current_table_lines = []
|
||||
in_table_definition = False
|
||||
# 否则跳过非表定义的行
|
||||
|
||||
# 处理最后一个表
|
||||
if in_table_definition and current_table_lines:
|
||||
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
|
||||
|
||||
|
||||
def save_table_to_file_simple(table_name, table_lines, output_dir):
|
||||
"""
|
||||
将表定义保存到文件(简化版)
|
||||
"""
|
||||
# 清理表名
|
||||
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
|
||||
|
||||
# 创建输出文件路径
|
||||
output_file_path = output_dir / f"{clean_table_name}.sql"
|
||||
|
||||
# 写入表定义到单独的文件
|
||||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||||
f.write("-- SQL table definition\n")
|
||||
f.write("-- Generated from splitting a larger SQL file\n")
|
||||
f.write(f"-- Table: {table_name}\n")
|
||||
f.write("--\n\n")
|
||||
|
||||
for line in table_lines:
|
||||
f.write(line)
|
||||
f.write('\n')
|
||||
|
||||
print(f"已创建表文件: {output_file_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
input_file = input("请输入SQL文件路径: ").strip().strip('"\'')
|
||||
else:
|
||||
input_file = sys.argv[1].strip('"\'')
|
||||
|
||||
if not os.path.exists(input_file):
|
||||
print(f"错误: 文件 {input_file} 不存在")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"正在拆分SQL文件: {input_file}")
|
||||
parse_sql_file(input_file)
|
||||
print("拆分完成!")
|
||||