初始提交:文字匹配图片项目

This commit is contained in:
2026-01-30 18:09:55 +08:00
commit fbf12f3fa3
57 changed files with 3552 additions and 0 deletions

55
.gitignore vendored Normal file
View File

@@ -0,0 +1,55 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual Environment
venv/
ENV/
env/
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# Logs
logs/
*.log
# Database
*.db
*.sqlite3
# CSV files
*.csv
# Temporary files
temp_*
*.tmp
# OS
.DS_Store
Thumbs.db
# Project specific
article_image_match_results.csv

32
11111111.py Normal file
View File

@@ -0,0 +1,32 @@
from google import genai
from google.genai import types
from PIL import Image
from google.genai.client import HttpOptions
client = genai.Client(http_options=HttpOptions(base_url="https://work.poloapi.com"),api_key="sk-V4tPnDgzFPa7nxWrvKnNJsW8ZcBXXPuGmjfgvPVRnwpHoeob")
prompt = ("Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme")
response = client.models.generate_content(
model="gemini-3-pro-image-preview",
contents=[prompt],
)
# 检查是否有候选答案
if not response.candidates:
print("API未返回任何候选答案")
else:
candidate = response.candidates[0]
if not candidate.content:
print("API返回的候选答案中没有内容")
elif not hasattr(candidate.content, 'parts') or not candidate.content.parts:
print("API返回的候选答案内容中没有parts")
else:
for part in candidate.content.parts:
if hasattr(part, 'text') and part.text is not None:
print(part.text)
elif hasattr(part, 'inline_data') and part.inline_data is not None:
image_data = part.inline_data
if image_data.data is not None:
# 保存图片数据到文件
with open('generated_image.png', 'wb') as f:
f.write(image_data.data)
print("图片生成成功: generated_image.png")

Binary file not shown.

After

Width:  |  Height:  |  Size: 644 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 621 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 658 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 688 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

61
create_test_csv.py Normal file
View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
创建测试CSV文件用于验证图片文章挂靠效果
"""
import csv
import json
def create_test_articles_csv():
"""创建测试文章CSV文件"""
# 创建测试文章数据与20260108文件夹中的图片主题相关
articles_data = [
{'ID': 1, '标题': '美丽的自然风景欣赏', '内容': '自然界的风景总是让人感到心旷神怡。无论是山川河流还是森林草原,大自然的美景总能带给我们视觉上的享受和心灵上的宁静。', '标签': json.dumps(['自然', '风景', '美丽'], ensure_ascii=False)},
{'ID': 2, '标题': '户外活动的乐趣', '内容': '走出室内,亲近大自然是一种极好的放松方式。户外活动不仅能锻炼身体,还能让我们欣赏到美丽的自然风光。', '标签': json.dumps(['户外', '活动', '自然'], ensure_ascii=False)},
{'ID': 3, '标题': '摄影艺术中的自然之美', '内容': '摄影师们常常将镜头对准大自然的美景,捕捉那些令人惊叹的瞬间。每一张风景照片都是对自然之美的独特诠释。', '标签': json.dumps(['摄影', '自然', '艺术'], ensure_ascii=False)},
{'ID': 4, '标题': '风景旅游推荐指南', '内容': '想要寻找美丽的风景胜地吗?这里有几处绝佳的风景旅游目的地,每一处都有其独特的魅力和美景等待你的探索。', '标签': json.dumps(['旅游', '风景', '推荐'], ensure_ascii=False)},
{'ID': 5, '标题': '数字图像处理技术', '内容': '现代数字图像处理技术使得图片的缩放、裁剪和优化变得更加容易。无论是原图还是缩略图,都能在保持质量的同时方便使用。', '标签': json.dumps(['图像处理', '技术', '缩略图'], ensure_ascii=False)}
]
# 写入CSV文件
with open('test_articles.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
fieldnames = ['ID', '标题', '内容', '标签']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in articles_data:
writer.writerow(row)
print('测试文章CSV文件已创建: test_articles.csv')
def create_test_images_csv():
"""创建测试图片CSV文件"""
# 创建测试图片数据使用20260108文件夹中的图片
images_data = [
{'ID': 1, '图像ID': 'IMG001', '图像名称': '风景图1', '图像URL': '20260108/1767867138994556.png', '标签名称': '风景,自然,美丽', '关键词名称': 'Landscape,Nature,Beauty', '部门名称': '生活部', '附加文章数量': 2},
{'ID': 2, '图像ID': 'IMG002', '图像名称': '风景图2', '图像URL': '20260108/1767867148035776.png', '标签名称': '自然,风光,户外', '关键词名称': 'Nature,Landscape,Outdoor', '部门名称': '生活部', '附加文章数量': 1},
{'ID': 3, '图像ID': 'IMG003', '图像名称': '风景图3', '图像URL': '20260108/1767867156936619.png', '标签名称': '景色,自然,美丽', '关键词名称': 'Scenery,Nature,Beautiful', '部门名称': '生活部', '附加文章数量': 3},
{'ID': 4, '图像ID': 'IMG004', '图像名称': '风景图4', '图像URL': '20260108/1767867165665952.png', '标签名称': '自然风光,户外,美景', '关键词名称': 'Natural Scenery,Outdoor,Beautiful View', '部门名称': '生活部', '附加文章数量': 0},
{'ID': 5, '图像ID': 'IMG005', '图像名称': '缩略图1', '图像URL': '20260108/1767867138994556_thumb.png', '标签名称': '缩略图,小图,预览', '关键词名称': 'Thumbnail,Small Image,Preview', '部门名称': '技术部', '附加文章数量': 4},
{'ID': 6, '图像ID': 'IMG006', '图像名称': '缩略图2', '图像URL': '20260108/1767867148035776_thumb.png', '标签名称': '缩略图,预览,小尺寸', '关键词名称': 'Thumbnail,Preview,Small Size', '部门名称': '技术部', '附加文章数量': 1}
]
# 写入CSV文件
with open('test_images.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
fieldnames = ['ID', '图像ID', '图像名称', '图像URL', '标签名称', '关键词名称', '部门名称', '附加文章数量']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in images_data:
writer.writerow(row)
print('测试图片CSV文件已创建: test_images.csv')
if __name__ == "__main__":
create_test_articles_csv()
create_test_images_csv()
print('\n两个测试CSV文件已创建完成可用于测试图片文章挂靠效果。')

69
create_test_excel.py Normal file
View File

@@ -0,0 +1,69 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
创建测试Excel文件用于验证图片文章挂靠效果
"""
from openpyxl import Workbook
import json
def create_test_articles_excel():
"""创建测试文章Excel文件"""
# 创建测试文章数据
articles_data = [
['ID', '标题', '内容', '标签'], # 表头
[1, '人工智能发展趋势', '人工智能技术正在快速发展,在各个领域都有广泛应用。机器学习、深度学习等技术不断突破,推动着社会进步。', json.dumps(['人工智能', '科技', '趋势'], ensure_ascii=False)],
[2, '健康饮食的重要性', '合理膳食是保持身体健康的基础。均衡摄入各种营养素,有助于提高免疫力,预防疾病。', json.dumps(['健康', '饮食', '营养'], ensure_ascii=False)],
[3, '环境保护与可持续发展', '环境保护是当今世界面临的重要挑战。通过可持续发展策略,我们可以平衡经济发展与生态保护。', json.dumps(['环保', '可持续发展', '生态'], ensure_ascii=False)],
[4, '数字化转型对企业的影响', '数字化转型正在重塑企业运营模式。通过引入新技术,企业可以提升效率,优化客户体验。', json.dumps(['数字化', '企业', '转型'], ensure_ascii=False)],
[5, '教育创新的未来方向', '教育创新是培养未来人才的关键。利用新技术手段,可以创造更加个性化和高效的学习环境。', json.dumps(['教育', '创新', '学习'], ensure_ascii=False)]
]
# 创建Excel工作簿
wb = Workbook()
ws = wb.active
if ws:
ws.title = '测试文章数据'
# 将数据添加到工作表
for row_data in articles_data:
ws.append(row_data)
# 保存Excel文件
wb.save('test_articles.xlsx')
print('测试文章Excel文件已创建: test_articles.xlsx')
def create_test_images_excel():
"""创建测试图片Excel文件"""
# 创建测试图片数据
images_data = [
['ID', '图像ID', '图像名称', '图像URL', '标签名称', '关键词名称', '部门名称', '附加文章数量'], # 表头
[1, 'IMG001', 'AI概念图', 'https://example.com/images/ai_concept.jpg', '人工智能,科技,趋势', 'AI,Machine Learning,Deep Learning', '科技部', 2],
[2, 'IMG002', '健康饮食图', 'https://example.com/images/healthy_food.jpg', '健康,饮食,营养', 'Nutrition,Health,Diet', '生活部', 1],
[3, 'IMG003', '环保地球图', 'https://example.com/images/environment.jpg', '环保,可持续发展,生态', 'Environment,Sustainability,Eco', '环保部', 3],
[4, 'IMG004', '数字化办公图', 'https://example.com/images/digital_office.jpg', '数字化,企业,转型', 'Digital,Enterprise,Transformation', '科技部', 0],
[5, 'IMG005', '教育创新图', 'https://example.com/images/education_innovation.jpg', '教育,创新,学习', 'Education,Innovation,Learning', '教育部', 4],
[6, 'IMG006', '网络安全图', 'https://example.com/images/cyber_security.jpg', '安全,网络,防护', 'Security,Cyber,Protection', '安全部', 1]
]
# 创建Excel工作簿
wb = Workbook()
ws = wb.active
if ws:
ws.title = '测试图片数据'
# 将数据添加到工作表
for row_data in images_data:
ws.append(row_data)
# 保存Excel文件
wb.save('test_images.xlsx')
print('测试图片Excel文件已创建: test_images.xlsx')
if __name__ == "__main__":
create_test_articles_excel()
create_test_images_excel()
print('\n两个测试Excel文件已创建完成可用于测试图片文章挂靠效果。')

160
database_config.py Normal file
View File

@@ -0,0 +1,160 @@
"""
数据库配置管理模块
统一管理数据库连接和SQL操作
"""
import pymysql
import logging
logger = logging.getLogger(__name__)
# 数据库配置
DB_CONFIG = {
'host': '8.149.233.36',
'user': 'ai_article_read',
'password': '7aK_H2yvokVumr84lLNDt8fDBp6P',
'database': 'ai_article',
'charset': 'utf8mb4'
}
class DatabaseManager:
"""数据库管理器:统一管理数据库连接和操作"""
def __init__(self, config=None):
"""初始化数据库管理器
Args:
config: 数据库配置字典,默认使用 DB_CONFIG
"""
self.config = config or DB_CONFIG
def get_connection(self, autocommit=False):
"""获取数据库连接
Args:
autocommit: 是否启用自动提交模式
Returns:
pymysql连接对象
"""
return pymysql.connect(**self.config, autocommit=autocommit)
def execute_query(self, sql, params=None, fetch_one=False):
"""执行查询SQLSELECT
Args:
sql: SQL语句
params: SQL参数tuple或list
fetch_one: True返回单条记录False返回所有记录
Returns:
查询结果
"""
conn = None
cursor = None
try:
conn = self.get_connection()
cursor = conn.cursor()
logger.info(f'[SQL] {sql.strip()} | params: {params}')
cursor.execute(sql, params or ())
if fetch_one:
result = cursor.fetchone()
else:
result = cursor.fetchall()
logger.debug(f'[SQL结果] 返回 {len(result) if not fetch_one and result else (1 if result else 0)} 条记录')
return result
except Exception as e:
logger.error(f'执行查询失败:{e}', exc_info=True)
raise
finally:
if cursor:
cursor.close()
if conn:
conn.close()
def execute_update(self, sql, params=None, autocommit=True):
"""执行更新SQLINSERT/UPDATE/DELETE
Args:
sql: SQL语句
params: SQL参数tuple或list
autocommit: 是否自动提交
Returns:
影响的行数
"""
conn = None
cursor = None
try:
conn = self.get_connection(autocommit=autocommit)
cursor = conn.cursor()
logger.info(f'[SQL] {sql.strip()} | params: {params}')
result = cursor.execute(sql, params or ())
if not autocommit:
conn.commit()
logger.info(f'[SQL执行] 影响 {result}')
return result
except Exception as e:
if not autocommit and conn:
conn.rollback()
logger.error(f'执行更新失败:{e}', exc_info=True)
raise
finally:
if cursor:
cursor.close()
if conn:
conn.close()
def execute_many(self, sql, params_list, autocommit=True):
"""批量执行SQL
Args:
sql: SQL语句
params_list: 参数列表,每个元素是一组参数
autocommit: 是否自动提交
Returns:
成功执行的行数
"""
conn = None
cursor = None
try:
conn = self.get_connection(autocommit=autocommit)
cursor = conn.cursor()
logger.info(f'[SQL批量] {sql.strip()} | 批次数: {len(params_list)}')
success_count = 0
for params in params_list:
try:
result = cursor.execute(sql, params)
if result > 0:
success_count += 1
except Exception as e:
logger.debug(f'批量执行跳过params={params},错误:{e}')
if not autocommit:
conn.commit()
logger.info(f'[SQL批量执行] 成功 {success_count}/{len(params_list)}')
return success_count
except Exception as e:
if not autocommit and conn:
conn.rollback()
logger.error(f'批量执行失败:{e}', exc_info=True)
raise
finally:
if cursor:
cursor.close()
if conn:
conn.close()
# 创建全局数据库管理器实例
db_manager = DatabaseManager()

55
db/ai_articles.sql Normal file
View File

@@ -0,0 +1,55 @@
-- AI文章内容表
-- 存储由AI生成的文章内容及其生命周期状态
-- 支持多渠道发布(百度百家号、头条、微信等)
-- 记录文章从选题、生成、审核到发布的完整流程
CREATE TABLE `ai_articles` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '主键',
`batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID用于批量生成文章的分组',
`topic_type_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '选题类型ID',
`prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '提示词工作流ID关联AI生成模板',
`topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '文章选题/主题',
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '文章标题',
`content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '文章正文内容',
`department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '部门名称(遗留字段)',
`departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '部门ID列表遗留字段',
`author_id` int NULL DEFAULT NULL COMMENT '作者ID关联ai_authors.id百家号账号',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称(百家号账号名)',
`department_id` int NULL DEFAULT NULL COMMENT '部门ID',
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '部门名称',
`created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID关联ai_users.id',
`review_user_id` int NULL DEFAULT NULL COMMENT '审核用户ID关联ai_users.id',
`publish_user_id` int NULL DEFAULT NULL COMMENT '发布用户ID关联ai_users.id',
`status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT '文章状态topic=选题|cover_image=封面图|generate=生成中|generate_failed=生成失败|draft=草稿|pending_review=待审核(文章已生成)|approved=审核通过|rejected=审核拒绝|published_review=发布审核中|published=已发布|failed=发布失败',
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '发布渠道1=百度百家号|2=今日头条|3=微信公众号',
`review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '审核意见/备注',
`publish_time` timestamp NULL DEFAULT NULL COMMENT '发布时间',
`baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '百家号文章ID',
`baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '百家号平台状态',
`word_count` int NULL DEFAULT 0 COMMENT '文章字数',
`image_count` int NULL DEFAULT 0 COMMENT '文章配图数量',
`coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
-- 索引定义
INDEX `created_user_id`(`created_user_id` ASC) USING BTREE COMMENT '创建用户索引',
INDEX `review_user_id`(`review_user_id` ASC) USING BTREE COMMENT '审核用户索引',
INDEX `publish_user_id`(`publish_user_id` ASC) USING BTREE COMMENT '发布用户索引',
INDEX `idx_articles_status_user_created`(`status` ASC, `created_user_id` ASC, `created_at` DESC) USING BTREE COMMENT '状态+创建用户+创建时间组合索引',
INDEX `idx_articles_status_created`(`status` ASC, `created_at` DESC) USING BTREE COMMENT '状态+创建时间索引',
INDEX `idx_articles_status`(`status` ASC) USING BTREE COMMENT '状态索引',
INDEX `idx_articles_created_at`(`created_at` DESC) USING BTREE COMMENT '创建时间索引',
INDEX `idx_status_id_author`(`status` ASC, `id` ASC, `author_id` ASC) USING BTREE COMMENT '状态+ID+作者组合索引',
INDEX `idx_articles_updated_at`(`updated_at` DESC) USING BTREE COMMENT '更新时间索引',
INDEX `idx_articles_status_prompt_topic_id`(`status` ASC, `prompt_workflow_id` ASC, `topic` ASC, `id` ASC) USING BTREE COMMENT '状态+工作流+选题+ID组合索引',
INDEX `idx_status_author_updated_id`(`status` ASC, `author_id` ASC, `updated_at` ASC, `id` ASC) USING BTREE COMMENT '状态+作者+更新时间+ID组合索引',
INDEX `idx_author_status_updated_id`(`author_id` ASC, `status` ASC, `updated_at` ASC, `id` ASC) USING BTREE COMMENT '作者+状态+更新时间+ID组合索引',
-- 外键约束
CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 1115 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;

Binary file not shown.

BIN
db/split_tables.zip Normal file

Binary file not shown.

View File

@@ -0,0 +1,24 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_article_images
--
CREATE TABLE `ai_article_images` (
`id` int NOT NULL AUTO_INCREMENT,
`article_id` int NOT NULL DEFAULT '0',
`image_id` int NOT NULL DEFAULT '0',
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`image_tag_id` int NOT NULL DEFAULT '0',
`sort_order` int DEFAULT '0',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`keywords_id` int NOT NULL DEFAULT '0',
`keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`department_id` int NOT NULL DEFAULT '0',
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`image_source` tinyint(1) NOT NULL DEFAULT '0' COMMENT '1=tag|2=change',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_article_image` (`article_id`,`image_id`) USING BTREE,
KEY `image_id` (`image_id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=1053298 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,14 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_article_tags
--
CREATE TABLE `ai_article_tags` (
`id` int NOT NULL AUTO_INCREMENT,
`article_id` int NOT NULL,
`coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT 'Coze生成的标签',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_article_tag` (`article_id`) USING BTREE,
CONSTRAINT `ai_article_tags_ibfk_1` FOREIGN KEY (`article_id`) REFERENCES `ai_articles` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
) ENGINE=InnoDB AUTO_INCREMENT=476258 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,52 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_articles
--
CREATE TABLE `ai_articles` (
`id` int NOT NULL AUTO_INCREMENT,
`batch_id` bigint unsigned NOT NULL DEFAULT '0' COMMENT '批次ID',
`topic_type_id` int unsigned NOT NULL DEFAULT '0',
`prompt_workflow_id` int unsigned NOT NULL DEFAULT '0',
`topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`author_id` int DEFAULT NULL,
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`department_id` int DEFAULT NULL,
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`created_user_id` int NOT NULL DEFAULT '0',
`review_user_id` int DEFAULT NULL,
`publish_user_id` int DEFAULT NULL,
`status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'draft',
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
`review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`publish_time` timestamp NULL DEFAULT NULL,
`baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`word_count` int DEFAULT '0',
`image_count` int DEFAULT '0',
`coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT 'Coze生成的标签',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `author_id` (`author_id`) USING BTREE,
KEY `created_user_id` (`created_user_id`) USING BTREE,
KEY `review_user_id` (`review_user_id`) USING BTREE,
KEY `publish_user_id` (`publish_user_id`) USING BTREE,
KEY `idx_articles_status_user_created` (`status`,`created_user_id`,`created_at` DESC),
KEY `idx_articles_status_created` (`status`,`created_at` DESC),
KEY `idx_articles_status` (`status`),
KEY `idx_articles_created_at` (`created_at` DESC),
KEY `idx_status_id_author` (`status`,`id`,`author_id`),
KEY `idx_articles_updated_at` (`updated_at` DESC) USING BTREE,
KEY `idx_articles_status_prompt_topic_id` (`status`,`prompt_workflow_id`,`topic`,`id`),
KEY `idx_status_author_updated_id` (`status`,`author_id`,`updated_at`,`id`),
KEY `idx_author_status_updated_id` (`author_id`,`status`,`updated_at`,`id`),
CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE=InnoDB AUTO_INCREMENT=535975 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,31 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_authors
--
CREATE TABLE `ai_authors` (
`id` int NOT NULL AUTO_INCREMENT,
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`app_id` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`app_token` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department_id` int NOT NULL DEFAULT '0',
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`title` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`hospital` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`specialty` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`toutiao_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`toutiao_images_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`introduction` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`avatar_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`cumulative_published_count` int DEFAULT '0' COMMENT '累计发文量从起始日到stat_date的总和',
`cumulative_revenue_sum` int DEFAULT '0' COMMENT '累计收入从起始日到stat_date的总和',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `idx_ai_authors_status` (`status`),
KEY `idx_ai_authors_status_id` (`status`,`id`),
KEY `idx_ai_authors_department_id` (`department_id`)
) ENGINE=InnoDB AUTO_INCREMENT=392 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,21 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_batch_uploads
--
CREATE TABLE `ai_batch_uploads` (
`id` int NOT NULL AUTO_INCREMENT,
`user_id` int NOT NULL,
`file_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`file_path` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`total_count` int DEFAULT '0',
`success_count` int DEFAULT '0',
`failed_count` int DEFAULT '0',
`status` enum('processing','completed','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'processing',
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `user_id` (`user_id`) USING BTREE,
CONSTRAINT `ai_batch_uploads_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,13 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_departments
--
CREATE TABLE `ai_departments` (
`id` int NOT NULL AUTO_INCREMENT,
`department_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `idx_ai_departments_created_at` (`created_at` DESC)
) ENGINE=InnoDB AUTO_INCREMENT=110 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,33 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_image_tags
--
CREATE TABLE `ai_image_tags` (
`id` int NOT NULL AUTO_INCREMENT,
`image_id` int NOT NULL,
`image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`tag_id` int NOT NULL,
`tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`keywords_id` int NOT NULL,
`keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department_id` int NOT NULL,
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_source` tinyint unsigned NOT NULL DEFAULT '1' COMMENT '1=clean_images|2=Flower_character',
`created_user_id` int NOT NULL DEFAULT '0',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`image_attached_article_count` int NOT NULL DEFAULT '0' COMMENT 'Number of articles the image is attached to',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_image_tag` (`image_id`,`tag_id`) USING BTREE,
KEY `tag_id` (`tag_id`) USING BTREE,
KEY `idx_id_desc` (`id` DESC),
KEY `idx_image_id_id` (`image_id`,`id` DESC),
KEY `idx_created_at` (`created_at` DESC),
KEY `idx_department_id` (`department_id`),
KEY `idx_keywords_id` (`keywords_id`),
KEY `idx_dept_keywords` (`department_id`,`keywords_id`),
CONSTRAINT `ai_image_tags_ibfk_2` FOREIGN KEY (`tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
) ENGINE=InnoDB AUTO_INCREMENT=29065 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,25 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_images
--
CREATE TABLE `ai_images` (
`id` int NOT NULL AUTO_INCREMENT,
`image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`thumbnail_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`image_type` enum('medical','lifestyle','instruction') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'medical',
`file_size` bigint DEFAULT NULL,
`width` int DEFAULT NULL,
`height` int DEFAULT NULL,
`upload_user_id` int NOT NULL,
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `upload_user_id` (`upload_user_id`) USING BTREE,
CONSTRAINT `ai_images_ibfk_1` FOREIGN KEY (`upload_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE=InnoDB AUTO_INCREMENT=47096 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,15 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_keywords
--
CREATE TABLE `ai_keywords` (
`id` int NOT NULL AUTO_INCREMENT,
`keywords_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department_id` int NOT NULL DEFAULT '0',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `idx_ai_keywords_dept_created` (`department_id`,`created_at` DESC),
KEY `idx_ai_keywords_created_at` (`created_at` DESC)
) ENGINE=InnoDB AUTO_INCREMENT=417 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,21 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_prompt_workflow
--
CREATE TABLE `ai_prompt_workflow` (
`id` int NOT NULL AUTO_INCREMENT,
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`auth_token` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`workflow_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`workflow_type_id` int unsigned NOT NULL DEFAULT '0',
`workflow_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`created_user_id` int NOT NULL DEFAULT '0',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `idx_created_user_time` (`created_user_id`,`created_at`) USING BTREE,
KEY `idx_created_at` (`created_at`) USING BTREE,
KEY `idx_workflow_id` (`workflow_id`) USING BTREE,
KEY `idx_prompt_workflow_name` (`prompt_workflow_name`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=27 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,14 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_query_category
--
CREATE TABLE `ai_query_category` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID',
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
`created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active' COMMENT '状态',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,20 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_query_strategies
--
CREATE TABLE `ai_query_strategies` (
`id` int NOT NULL AUTO_INCREMENT,
`category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID',
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
`query_type_id` int NOT NULL DEFAULT '0' COMMENT '类型ID',
`define_context` varchar(2048) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '定义上下文',
`for_example` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '案例',
`created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
PRIMARY KEY (`id`) USING BTREE,
KEY `query_type_id` (`query_type_id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=136 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,16 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_query_type
--
CREATE TABLE `ai_query_type` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID',
`category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID',
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
`created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active' COMMENT '状态',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=137 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,30 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_statistics
--
CREATE TABLE `ai_statistics` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT 'Auto-increment ID',
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
`date` date NOT NULL COMMENT 'Date of statistics',
`submission_count` int DEFAULT '0' COMMENT 'Number of submissions (投稿量)',
`read_count` int DEFAULT '0' COMMENT 'Number of reads (阅读量)',
`comment_count` int DEFAULT '0' COMMENT 'Number of comments (评论量)',
`comment_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Comment rate (评论率)',
`like_count` int DEFAULT '0' COMMENT 'Number of likes (点赞量)',
`like_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Like rate (点赞率)',
`favorite_count` int DEFAULT '0' COMMENT 'Number of favorites (收藏量)',
`favorite_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Favorite rate (收藏率)',
`share_count` int DEFAULT '0' COMMENT 'Number of shares (分享量)',
`share_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Share rate (分享率)',
`slide_ratio` decimal(5,4) DEFAULT '0.0000' COMMENT 'Slide view ratio (滑图占比)',
`baidu_search_volume` int DEFAULT '0' COMMENT 'Baidu search volume (百度搜索量)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'Creation timestamp',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Update timestamp',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `unique_date` (`date`,`author_id`) USING BTREE,
KEY `idx_date` (`date`) USING BTREE,
KEY `idx_author_id` (`author_id`)
) ENGINE=InnoDB AUTO_INCREMENT=40720 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI Content Statistics';

View File

@@ -0,0 +1,30 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_statistics_day
--
CREATE TABLE `ai_statistics_day` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
`stat_date` date NOT NULL COMMENT '统计日期(天)',
`total_submission_count` int DEFAULT '0' COMMENT '投稿量(当日总计)',
`total_read_count` int DEFAULT '0' COMMENT '阅读量(当日总计)',
`total_comment_count` int DEFAULT '0' COMMENT '评论量(当日总计)',
`total_like_count` int DEFAULT '0' COMMENT '点赞量(当日总计)',
`total_favorite_count` int DEFAULT '0' COMMENT '收藏量(当日总计)',
`total_share_count` int DEFAULT '0' COMMENT '分享量(当日总计)',
`avg_comment_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '评论率(当日平均)',
`avg_like_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '点赞率(当日平均)',
`avg_favorite_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '收藏率(当日平均)',
`avg_share_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '分享率(当日平均)',
`avg_slide_ratio` decimal(5,4) DEFAULT '0.0000' COMMENT '滑图占比(当日平均)',
`total_baidu_search_volume` int DEFAULT '0' COMMENT '百度搜索量(当日总计)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_stat_date` (`stat_date`,`author_id`) USING BTREE,
KEY `idx_stat_date` (`stat_date`) USING BTREE,
KEY `idx_author_id` (`author_id`)
) ENGINE=InnoDB AUTO_INCREMENT=41142 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每日汇总统计表';

View File

@@ -0,0 +1,25 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_statistics_days
--
CREATE TABLE `ai_statistics_days` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
`stat_date` date NOT NULL COMMENT '统计日期(自然日)',
`daily_published_count` int DEFAULT '0' COMMENT '单日发文量',
`day_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当天收益stat_date所在自然日',
`cumulative_published_count` int DEFAULT '0' COMMENT '累计发文量从起始日到stat_date的总和',
`monthly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当月收益stat_date所在自然月的总收益',
`weekly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当周收益stat_date所在自然周的总收益周一至周日',
`revenue_mom_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0)',
`revenue_wow_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_stat_date` (`stat_date`,`author_id`) USING BTREE,
KEY `idx_stat_date` (`stat_date`) USING BTREE,
KEY `idx_author_id` (`author_id`)
) ENGINE=InnoDB AUTO_INCREMENT=98484 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每日核心指标汇总表含累计、收益及环比';

View File

@@ -0,0 +1,20 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_statistics_monthly
--
CREATE TABLE `ai_statistics_monthly` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
`stat_monthly` varchar(48) NOT NULL COMMENT '统计日期(自然月)',
`monthly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当月收益stat_date所在自然月的总收益',
`revenue_mom_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_author_stat_date` (`author_id`,`stat_monthly`) USING BTREE,
KEY `idx_stat_date` (`stat_monthly`) USING BTREE,
KEY `idx_author_id` (`author_id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=41278 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每月核心指标汇总表含累计、收益及环比';

View File

@@ -0,0 +1,20 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_statistics_weekly
--
CREATE TABLE `ai_statistics_weekly` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin',
`stat_weekly` varchar(48) NOT NULL COMMENT '统计日期(自然周)',
`weekly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当周收益stat_date所在自然周的总收益周一至周日',
`revenue_wow_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_author_stat_date` (`author_id`,`stat_weekly`) USING BTREE,
KEY `idx_stat_date` (`stat_weekly`) USING BTREE,
KEY `idx_author_id` (`author_id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=47934 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每周核心指标汇总表含累计、收益及环比';

View File

@@ -0,0 +1,18 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_tag_subsets
--
CREATE TABLE `ai_tag_subsets` (
`id` int NOT NULL AUTO_INCREMENT,
`parent_tag_id` int NOT NULL,
`subset_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`subset_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `parent_tag_id` (`parent_tag_id`) USING BTREE,
CONSTRAINT `ai_tag_subsets_ibfk_1` FOREIGN KEY (`parent_tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
) ENGINE=InnoDB AUTO_INCREMENT=25903 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,18 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_tags
--
CREATE TABLE `ai_tags` (
`id` int NOT NULL AUTO_INCREMENT,
`tag_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`tag_category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`usage_count` int DEFAULT '0',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_tag_name` (`tag_name`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=13492 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,21 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_topic_type
--
CREATE TABLE `ai_topic_type` (
`id` int NOT NULL AUTO_INCREMENT,
`topic_type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`type_id` int NOT NULL DEFAULT '0',
`type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`prompt_workflow_id` int unsigned NOT NULL DEFAULT '0',
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`created_user_id` int NOT NULL DEFAULT '0',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `idx_created_user_time` (`created_user_id`,`created_at`) USING BTREE,
KEY `idx_created_at` (`created_at`) USING BTREE,
KEY `idx_type_id` (`type_id`) USING BTREE,
KEY `idx_topic_type_name` (`topic_type_name`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=28 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,16 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_user_authors
--
CREATE TABLE `ai_user_authors` (
`id` int NOT NULL AUTO_INCREMENT,
`user_id` int unsigned NOT NULL DEFAULT '0',
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`author_id` int NOT NULL DEFAULT '0',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT '',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_user_author` (`user_id`,`author_id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=15935 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,21 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_user_topics
--
CREATE TABLE `ai_user_topics` (
`id` int NOT NULL AUTO_INCREMENT,
`user_id` int unsigned NOT NULL DEFAULT '0',
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`topic_type_id` int unsigned NOT NULL DEFAULT '0',
`topic_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`prompt_workflow_id` int NOT NULL DEFAULT '0',
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT '',
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'inactive',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
KEY `idx_topic_type_id` (`topic_type_id`) USING BTREE,
KEY `idx_prompt_workflow_id` (`prompt_workflow_id`) USING BTREE,
KEY `idx_created_at` (`created_at`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=127 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,20 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: ai_users
--
CREATE TABLE `ai_users` (
`id` int NOT NULL AUTO_INCREMENT,
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`password` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`real_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`email` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`role` enum('admin','editor','reviewer','publisher') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'editor',
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `uk_username` (`username`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=239 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC;

View File

@@ -0,0 +1,38 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: baidu_keyword
--
CREATE TABLE `baidu_keyword` (
`id` int NOT NULL AUTO_INCREMENT,
`keyword` varchar(255) NOT NULL,
`crawled` tinyint DEFAULT '0',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`parents_id` int unsigned NOT NULL DEFAULT '0' COMMENT '父层级',
`seed_id` int unsigned NOT NULL DEFAULT '0' COMMENT '种子',
`seed_name` varchar(512) NOT NULL DEFAULT '' COMMENT '种子名称',
`department` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室',
`department_id` int unsigned NOT NULL DEFAULT '0' COMMENT '科室ID',
`partsof_speech` varchar(128) NOT NULL DEFAULT '' COMMENT '词性',
`partsof_speech_id` int unsigned NOT NULL DEFAULT '0' COMMENT '词性ID',
`type` varchar(128) NOT NULL DEFAULT '' COMMENT '类型',
`type_id` int unsigned NOT NULL DEFAULT '0' COMMENT '类型ID',
`yesorno_question` enum('yes','no','unprocessed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'unprocessed' COMMENT '是否是问题?',
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
`category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID',
`query_type_id` int NOT NULL DEFAULT '0' COMMENT '类型ID',
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
`created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID',
`query_summary_status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'ready',
`query_status` enum('draft','ready','doing','failed','finished','similarity','automated_review','manual_review','generate','published') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'draft' COMMENT 'query完整扭转流程状态',
`blocking_reason` varchar(255) NOT NULL DEFAULT '' COMMENT '审核不通过原因',
`article_id` int NOT NULL DEFAULT '0' COMMENT '文章ID',
`query_stage` enum('draft','created','summary','reviewed','generated','published') NOT NULL DEFAULT 'draft' COMMENT '分5个阶段创建|总结|审核|生文|发布',
`status` enum('draft','available','unavailable','successful','failed') NOT NULL DEFAULT 'draft' COMMENT '状态_分2个阶段|可用|不可用|发布成功|发布失败',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`review_user_id` int NOT NULL DEFAULT '0' COMMENT '审核用户ID',
PRIMARY KEY (`id`),
UNIQUE KEY `keyword` (`keyword`),
KEY `idx_crawled_seed` (`crawled`,`seed_id`),
KEY `idx_created_at` (`created_at`)
) ENGINE=InnoDB AUTO_INCREMENT=798537 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

View File

@@ -0,0 +1,15 @@
-- SQL table definition
-- Generated from splitting a larger SQL file
-- Table: baidu_seed_keywords
--
CREATE TABLE `baidu_seed_keywords` (
`id` int NOT NULL AUTO_INCREMENT,
`keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
`crawled` tinyint DEFAULT '0',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'ready',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `keyword` (`keyword`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=231 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC;

137
export_approved_articles.py Normal file
View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
导出审核通过的文章内容和标签到CSV文件
此脚本将从ai_articles表中导出status为approved的文章内容和标签
"""
import csv
import json
import os
from datetime import datetime
from log_config import setup_logger
def export_approved_articles_to_csv(output_file='approved_articles_export.csv'):
"""
导出审核通过的文章内容和标签到CSV文件
Args:
output_file: 输出的CSV文件名
"""
# 设置日志记录器
logger = setup_logger('article_export', 'logs/article_export.log', 'logs/article_export_error.log')
try:
# 从数据库获取真实数据
from database_config import db_manager
# 查询审核通过的文章,包含内容和标签
sql = """
SELECT id, title, content, coze_tag, created_at, updated_at
FROM ai_articles
WHERE status = 'approved'
ORDER BY id
"""
logger.info("开始查询审核通过的文章数据...")
results = db_manager.execute_query(sql)
if not results:
logger.warning("没有找到状态为approved的文章")
print("没有找到状态为approved的文章")
return
logger.info(f"查询到 {len(results)} 条审核通过的文章")
print(f"查询到 {len(results)} 条审核通过的文章")
# 准备输出目录
output_dir = os.path.dirname(output_file)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
# 写入CSV文件
with open(output_file, 'w', newline='', encoding='utf-8-sig') as csvfile:
fieldnames = ['ID', '标题', '内容', '标签', '创建时间', '更新时间']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# 写入表头
writer.writeheader()
# 写入数据
for row in results:
id_val, title, content, coze_tag, created_at, updated_at = row
# 尝试解析标签如果是JSON格式则转换为字符串
parsed_tags = coze_tag
if coze_tag:
try:
# 尝试解析JSON格式的标签
tags_data = json.loads(coze_tag)
if isinstance(tags_data, list):
parsed_tags = ', '.join(tags_data)
elif isinstance(tags_data, dict):
# 如果是字典格式,提取值
parsed_tags = ', '.join(str(v) for v in tags_data.values())
except json.JSONDecodeError:
# 如果不是JSON格式保持原样
parsed_tags = coze_tag
writer.writerow({
'ID': id_val,
'标题': title,
'内容': content,
'标签': parsed_tags or '',
'创建时间': created_at.strftime('%Y-%m-%d %H:%M:%S') if created_at else '',
'更新时间': updated_at.strftime('%Y-%m-%d %H:%M:%S') if updated_at else ''
})
logger.info(f"成功导出 {len(results)} 条文章到 {output_file}")
print(f"成功导出 {len(results)} 条文章到 {output_file}")
except Exception as e:
logger.error(f"导出文章数据时发生错误: {e}", exc_info=True)
print(f"导出文章数据时发生错误: {e}")
raise
def test_db_connection():
"""
测试数据库连接
"""
try:
from database_config import db_manager
# 尝试执行一个简单的查询来测试连接
test_sql = "SELECT 1 as test"
result = db_manager.execute_query(test_sql)
print("数据库连接测试成功:", result)
return True
except Exception as e:
print(f"数据库连接测试失败: {e}")
return False
if __name__ == "__main__":
# 创建logs目录
if not os.path.exists('logs'):
os.makedirs('logs')
# 检查命令行参数
import sys
if len(sys.argv) > 1:
output_filename = sys.argv[1]
else:
output_filename = 'approved_articles_export.csv'
# 测试数据库连接
print("正在测试数据库连接...")
if not test_db_connection():
print("数据库连接失败,请检查数据库配置。")
print("请确认以下信息:")
print("- 数据库服务器是否正常运行")
print("- 数据库地址、用户名、密码是否正确")
print("- 网络连接是否正常")
print("- 用户是否有查询ai_articles表的权限")
exit(1)
export_approved_articles_to_csv(output_filename)

137
export_image_tags.py Normal file
View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
导出符合条件的图像标签数据到CSV文件
导出条件image_attached_article_count < 5
"""
import csv
import os
from datetime import datetime
from database_config import db_manager
from log_config import setup_logger
def export_image_tags_to_csv(output_file='image_tags_filtered.csv'):
"""
导出符合条件的图像标签数据到CSV文件
Args:
output_file: 输出的CSV文件名
"""
# 设置日志记录器
logger = setup_logger('image_tags_export', 'logs/image_tags_export.log', 'logs/image_tags_export_error.log')
# 从数据库获取真实数据
from database_config import db_manager
# 查询符合条件的图像标签数据
sql = """
SELECT id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name,
keywords_id, keywords_name, department_id, department_name, image_source,
created_user_id, created_at, updated_at, image_attached_article_count
FROM ai_image_tags
WHERE image_attached_article_count < 5
ORDER BY id
"""
logger.info("开始查询符合条件的图像标签数据...")
results = db_manager.execute_query(sql)
if not results:
logger.warning("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)")
print("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)")
return
logger.info(f"查询到 {len(results)} 条符合条件的图像标签数据")
print(f"查询到 {len(results)} 条符合条件的图像标签数据")
# 准备输出目录
output_dir = os.path.dirname(output_file)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
# 写入CSV文件
with open(output_file, 'w', newline='', encoding='utf-8-sig') as csvfile:
fieldnames = [
'ID', '图像ID', '图像名称', '图像URL', '缩略图URL', '标签ID', '标签名称',
'关键词ID', '关键词名称', '部门ID', '部门名称', '图像来源',
'创建用户ID', '创建时间', '更新时间', '附加文章数量'
]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# 写入表头
writer.writeheader()
# 写入数据
for row in results:
(
id_val, image_id, image_name, image_url, image_thumb_url,
tag_id, tag_name, keywords_id, keywords_name,
department_id, department_name, image_source,
created_user_id, created_at, updated_at, image_attached_article_count
) = row
writer.writerow({
'ID': id_val,
'图像ID': image_id,
'图像名称': image_name,
'图像URL': image_url,
'缩略图URL': image_thumb_url,
'标签ID': tag_id,
'标签名称': tag_name,
'关键词ID': keywords_id,
'关键词名称': keywords_name,
'部门ID': department_id,
'部门名称': department_name,
'图像来源': image_source,
'创建用户ID': created_user_id,
'创建时间': created_at.strftime('%Y-%m-%d %H:%M:%S') if created_at else '',
'更新时间': updated_at.strftime('%Y-%m-%d %H:%M:%S') if updated_at else '',
'附加文章数量': image_attached_article_count
})
logger.info(f"成功导出 {len(results)} 条图像标签数据到 {output_file}")
print(f"成功导出 {len(results)} 条图像标签数据到 {output_file}")
def test_db_connection():
"""
测试数据库连接
"""
try:
# 尝试执行一个简单的查询来测试连接
test_sql = "SELECT 1 as test"
result = db_manager.execute_query(test_sql)
print("数据库连接测试成功:", result)
return True
except Exception as e:
print(f"数据库连接测试失败: {e}")
return False
if __name__ == "__main__":
# 创建logs目录
if not os.path.exists('logs'):
os.makedirs('logs')
# 检查数据库连接
print("正在测试数据库连接...")
if not test_db_connection():
print("数据库连接失败,请检查数据库配置。")
print("请确认以下信息:")
print("- 数据库服务器是否正常运行")
print("- 数据库地址、用户名、密码是否正确")
print("- 网络连接是否正常")
print("- 用户是否有查询ai_image_tags表的权限")
exit(1)
# 默认输出文件名
output_filename = 'image_tags_filtered.csv'
# 可以从命令行参数获取输出文件名
import sys
if len(sys.argv) > 1:
output_filename = sys.argv[1]
export_image_tags_to_csv(output_filename)

BIN
generated_image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 708 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 617 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

337
log_config.py Normal file
View File

@@ -0,0 +1,337 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
统一日志配置模块
提供按日期自动切割日志文件的功能
"""
import os
import logging
import sys
from logging.handlers import TimedRotatingFileHandler
from datetime import datetime
def setup_logger(name, log_file, error_log_file=None, level=logging.INFO,
backup_count=30, error_backup_count=90, console_output=True, force_reinit=False):
"""
设置日志记录器,支持按日期自动切割
Args:
name: 日志记录器名称
log_file: 主日志文件路径
error_log_file: 错误日志文件路径(可选)
level: 日志级别
backup_count: 主日志文件保留天数
error_backup_count: 错误日志文件保留天数
console_output: 是否输出到控制台
force_reinit: 是否强制重新初始化删除现有handlers
Returns:
logging.Logger: 配置好的日志记录器
"""
# 创建logs目录
log_dir = os.path.dirname(log_file)
if log_dir and not os.path.exists(log_dir):
os.makedirs(log_dir)
# 获取或创建logger
logger = logging.getLogger(name)
logger.setLevel(level)
# 检查是否需要重新初始化
need_reinit = force_reinit or not logger.handlers
# 如果强制重新初始化或没有handlers则清除现有handlers
if force_reinit and logger.handlers:
print(f"强制重新初始化日志记录器: {name}")
for handler in logger.handlers[:]: # 使用切片创建副本
logger.removeHandler(handler)
need_reinit = True
# 如果没有handlers则添加新的handlers
if need_reinit:
# 创建日志格式
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# 1. 主日志文件处理器 - 按日期切割
file_handler = TimedRotatingFileHandler(
filename=log_file,
when='midnight', # 每天午夜切割
interval=1, # 每1天切割一次
backupCount=backup_count, # 保留天数
encoding='utf-8'
)
file_handler.setLevel(level)
file_handler.setFormatter(formatter)
# 设置切割后的文件名格式filename.log.2025-07-21
file_handler.suffix = "%Y-%m-%d"
# 自定义文件名生成函数,确保格式正确
def namer(default_name):
# 确保文件名格式为 filename.log.2025-07-21
return default_name
file_handler.namer = namer
# 添加主日志处理器
logger.addHandler(file_handler)
# 2. 错误日志文件处理器(如果指定)
if error_log_file:
error_file_handler = TimedRotatingFileHandler(
filename=error_log_file,
when='midnight',
interval=1,
backupCount=error_backup_count, # 错误日志保留更长时间
encoding='utf-8'
)
error_file_handler.setLevel(logging.ERROR)
error_file_handler.setFormatter(formatter)
error_file_handler.suffix = "%Y-%m-%d"
error_file_handler.namer = namer
logger.addHandler(error_file_handler)
# 3. 控制台处理器(如果启用)
if console_output:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(level)
console_formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%H:%M:%S'
)
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
# 设置第三方库的日志级别
logging.getLogger('requests').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('whoosh').setLevel(logging.WARNING)
# 记录日志系统启动信息
logger.info(f"日志系统已启动 - 记录器: {name}")
logger.info(f"主日志文件: {log_file}")
if error_log_file:
logger.info(f"错误日志文件: {error_log_file}")
logger.info(f"日志保留策略: 每天午夜分割,主日志保留{backup_count}")
if error_log_file:
logger.info(f"错误日志保留策略: 每天午夜分割,保留{error_backup_count}")
return logger
def setup_curl_convert_logger(force_reinit=False):
"""设置curl_convert.py的日志记录器"""
return setup_logger(
name='curl_convert',
log_file='logs/curl_convert.log',
error_log_file='logs/curl_convert_error.log',
level=logging.INFO,
backup_count=30,
error_backup_count=90,
console_output=True,
force_reinit=force_reinit
)
def setup_article_server_logger(force_reinit=False):
"""设置flask_article_server.py的日志记录器"""
return setup_logger(
name='article_server',
log_file='logs/article_server.log',
error_log_file='logs/article_error.log',
level=logging.INFO,
backup_count=3,
error_backup_count=9,
console_output=True,
force_reinit=force_reinit
)
def setup_article_server_search_logger(force_reinit=False):
"""设置flask_article_server_search.py的日志记录器"""
return setup_logger(
name='article_server_search',
log_file='logs/article_server_search.log',
error_log_file='logs/article_server_search_error.log',
level=logging.INFO,
backup_count=3,
error_backup_count=9,
console_output=True,
force_reinit=force_reinit
)
def setup_aiarticle_server_logger(force_reinit=False):
"""设置flask_aiarticle_server.py的日志记录器"""
return setup_logger(
name='aiarticle_server',
log_file='logs/aiarticle_server.log',
error_log_file='logs/aiarticle_server_error.log',
level=logging.INFO,
backup_count=30,
error_backup_count=90,
console_output=True,
force_reinit=force_reinit
)
def setup_whoosh_search_tags_logger(force_reinit=False):
"""设置whoosh_search_tags.py的日志记录器"""
return setup_logger(
name='whoosh_search_tags',
log_file='logs/whoosh_search_tags.log',
error_log_file='logs/whoosh_search_tags_error.log',
level=logging.INFO,
backup_count=30,
error_backup_count=90,
console_output=True,
force_reinit=force_reinit
)
def setup_baidu_crawl_logger(force_reinit=False):
"""设置baidu_crawl.py的日志记录器"""
return setup_logger(
name='baidu_crawl',
log_file='logs/baidu_crawl.log',
error_log_file='logs/baidu_crawl_error.log',
level=logging.INFO,
backup_count=3,
error_backup_count=3,
console_output=True,
force_reinit=force_reinit
)
def setup_baidu_seed_logger(force_reinit=False):
"""设置baidu_seed.py的日志记录器"""
return setup_logger(
name='baidu_seed',
log_file='logs/baidu_seed.log',
error_log_file='logs/baidu_seed_error.log',
level=logging.INFO,
backup_count=3,
error_backup_count=3,
console_output=True,
force_reinit=force_reinit
)
def setup_baidu_crawl_again_logger(force_reinit=False):
"""设置baidu_seed.py的日志记录器"""
return setup_logger(
name='baidu_crawl_again',
log_file='logs/baidu_crawl_again.log',
error_log_file='logs/baidu_crawl_again_error.log',
level=logging.INFO,
backup_count=3,
error_backup_count=3,
console_output=True,
force_reinit=force_reinit
)
def reinitialize_all_loggers():
"""重新初始化所有日志记录器"""
print("重新初始化所有日志记录器...")
# 重新初始化所有日志记录器
setup_curl_convert_logger(force_reinit=True)
setup_article_server_logger(force_reinit=True)
setup_article_server_search_logger(force_reinit=True)
setup_aiarticle_server_logger(force_reinit=True)
setup_whoosh_search_tags_logger(force_reinit=True)
setup_baidu_crawl_logger(force_reinit=True)
setup_baidu_seed_logger(force_reinit=True)
print("所有日志记录器重新初始化完成")
def cleanup_old_logs(log_dir='logs', days_to_keep=30):
"""
清理旧的日志文件
Args:
log_dir: 日志目录
days_to_keep: 保留天数
"""
import glob
from datetime import datetime, timedelta
if not os.path.exists(log_dir):
return
cutoff_date = datetime.now() - timedelta(days=days_to_keep)
# 查找所有日志文件
log_patterns = [
os.path.join(log_dir, '*.log.*'), # 切割后的日志文件
os.path.join(log_dir, '*.log') # 当前日志文件
]
for pattern in log_patterns:
for log_file in glob.glob(pattern):
try:
# 获取文件修改时间
file_mtime = datetime.fromtimestamp(os.path.getmtime(log_file))
if file_mtime < cutoff_date:
os.remove(log_file)
print(f"已删除旧日志文件: {log_file}")
except Exception as e:
print(f"删除日志文件失败 {log_file}: {e}")
def get_log_file_info(log_dir='logs'):
"""
获取日志文件信息
Args:
log_dir: 日志目录
Returns:
dict: 日志文件信息
"""
if not os.path.exists(log_dir):
return {}
log_info = {}
for filename in os.listdir(log_dir):
if filename.endswith('.log'):
file_path = os.path.join(log_dir, filename)
try:
size = os.path.getsize(file_path)
mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
log_info[filename] = {
'size': size,
'size_mb': round(size / (1024 * 1024), 2),
'modified': mtime.strftime('%Y-%m-%d %H:%M:%S'),
'path': file_path
}
except Exception as e:
log_info[filename] = {'error': str(e)}
return log_info
if __name__ == "__main__":
# 测试日志配置
print("测试日志配置...")
# 测试各个日志记录器
logger1 = setup_curl_convert_logger()
logger1.info("curl_convert 日志测试")
logger2 = setup_article_server_logger()
logger2.info("article_server 日志测试")
logger3 = setup_article_server_search_logger()
logger3.info("article_server_search 日志测试")
logger4 = setup_aiarticle_server_logger()
logger4.info("aiarticle_server 日志测试")
logger5 = setup_whoosh_search_tags_logger()
logger5.info("whoosh_search_tags 日志测试")
# 显示日志文件信息
print("\n当前日志文件信息:")
log_info = get_log_file_info()
for filename, info in log_info.items():
if 'error' not in info:
print(f"{filename}: {info['size_mb']}MB, 修改时间: {info['modified']}")
else:
print(f"{filename}: 错误 - {info['error']}")
print("\n日志配置测试完成!")

910
match_article_images.py Normal file
View File

@@ -0,0 +1,910 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
文章与图片智能挂靠脚本
根据文章标签匹配ai_image_tags表中的图片使用大模型进行处理
如果挂靠失败或没有相同标签的图片则使用Gemini生成图片
"""
import json
import os
import re
import requests
import csv
import pymysql
from typing import List, Dict, Tuple, Optional
from collections import defaultdict
from database_config import db_manager
from log_config import setup_logger
import time
import random
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
def get_articles_with_tags_from_db() -> List[Dict]:
"""
从数据库获取文章及其标签
Returns:
包含文章信息的字典列表
"""
# 设置日志记录器
logger = setup_logger('article_matching', 'logs/article_matching.log', 'logs/article_matching_error.log')
articles = []
try:
# 查询审核通过的文章,包含内容和标签
sql = """
SELECT id, title, content, coze_tag
FROM ai_articles
WHERE status = 'approved'
ORDER BY id
"""
logger.info("开始查询审核通过的文章数据...")
results = db_manager.execute_query(sql)
if not results:
logger.warning("没有找到状态为approved的文章")
print("没有找到状态为approved的文章")
return articles
logger.info(f"查询到 {len(results)} 条审核通过的文章")
print(f"查询到 {len(results)} 条审核通过的文章")
for row in results:
article_id, title, content, coze_tag = row
# 解析标签
tags = []
if coze_tag:
try:
# 尝试解析JSON格式的标签
tags_data = json.loads(coze_tag)
if isinstance(tags_data, list):
tags = tags_data
elif isinstance(tags_data, dict):
# 如果是字典格式,提取值
tags = list(tags_data.values()) if isinstance(list(tags_data.values())[0], list) else list(tags_data.values())
else:
# 如果是字符串,尝试按逗号分割
tags = [tag.strip() for tag in str(tags_data).split(',') if tag.strip()]
except json.JSONDecodeError:
# 如果不是JSON格式按逗号分割
tags = [tag.strip() for tag in str(coze_tag).split(',') if tag.strip()]
articles.append({
'id': article_id,
'title': title,
'content': content,
'tags': tags
})
except Exception as e:
logger.error(f"从数据库获取文章数据时发生错误: {e}", exc_info=True)
print(f"从数据库获取文章数据时发生错误: {e}")
raise
return articles
def get_images_by_tags_from_db(tags: List[str] = [], used_counts: Dict[str, int] = {}) -> List[Dict]:
"""
从数据库根据标签获取图片
Args:
tags: 标签列表
used_counts: 已使用次数的字典key为图片IDvalue为使用次数
Returns:
包含图片信息的字典列表
"""
if not tags:
return []
# 设置日志记录器
logger = setup_logger('article_matching', 'logs/article_matching.log', 'logs/article_matching_error.log')
images = []
try:
# 查询符合条件的图像标签数据
sql = """
SELECT id, image_id, image_name, image_url, tag_name, keywords_name, department_name, image_attached_article_count
FROM ai_image_tags
WHERE image_attached_article_count < 5
ORDER BY id
"""
logger.info("开始查询符合条件的图像标签数据...")
results = db_manager.execute_query(sql)
if not results:
logger.warning("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)")
print("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)")
return images
logger.info(f"查询到 {len(results)} 条符合条件的图像标签数据")
print(f"查询到 {len(results)} 条符合条件的图像标签数据")
for row in results:
(
image_id, db_image_id, image_name, image_url, tag_name,
keywords_name, department_name, base_count
) = row
# 检查图片的附加文章数量是否小于5考虑已使用次数
used_count = used_counts.get(str(image_id), 0)
total_count = base_count + used_count
if total_count >= 5:
continue
# 检查标签是否匹配
if any(tag.lower() in tag_name.lower() for tag in tags):
images.append({
'id': str(image_id),
'image_id': db_image_id,
'image_name': image_name,
'image_url': image_url,
'tag_name': tag_name,
'keywords_name': keywords_name,
'department_name': department_name,
'base_count': base_count
})
except Exception as e:
logger.error(f"从数据库获取图片数据时发生错误: {e}", exc_info=True)
print(f"从数据库获取图片数据时发生错误: {e}")
raise
print(f"从数据库找到 {len(images)} 张符合条件的匹配图片")
return images
def call_qwen_model(article: Dict, image_urls: List[str]) -> bool:
"""
调用通义千问大模型进行文章与图片挂靠评估
Args:
article: 文章信息
image_urls: 图片URL列表
Returns:
挂靠是否成功
"""
# 通义千问API配置
api_key = "sk-e6a38204022a4b538b8954f0584712af"
api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
# 构建请求内容
content = f"""
请评估以下文章与图片的匹配度:
文章标题: {article['title']}
文章内容: {article['content'][:500]}... # 限制内容长度
图片URLs: {', '.join(image_urls)}
请判断这些图片是否适合用于这篇文章。如果匹配度高,请回复"匹配成功";如果匹配度低,请回复"匹配失败"
"""
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
payload = {
"model": "qwen-max", # 或其他合适的模型
"input": {
"messages": [
{
"role": "user",
"content": content
}
]
},
"parameters": {
"temperature": 0.7
}
}
try:
response = requests.post(api_url, headers=headers, json=payload)
if response.status_code == 200:
result = response.json()
# 解析响应,判断匹配结果
if 'output' in result and 'text' in result['output']:
response_text = result['output']['text'].lower()
# 根据响应内容判断是否匹配
if '匹配成功' in response_text or '是的' in response_text or '合适' in response_text:
print(f"通义千问评估结果: 匹配成功 - 文章 '{article['title']}'")
return True
else:
print(f"通义千问评估结果: 匹配失败 - 文章 '{article['title']}'")
return False
else:
print(f"通义千问API响应格式异常: {result}")
return False
else:
print(f"通义千问API调用失败: {response.status_code} - {response.text}")
# API调用失败时仍然尝试匹配这里返回False触发图片生成
return False
except Exception as e:
print(f"调用通义千问API时发生错误: {e}")
# 发生错误时返回False以触发图片生成
return False
def insert_generated_image_to_db(image_name: str, image_url: str, article_tags: List[str]) -> Optional[Dict]:
"""
将Gemini生成的图片信息插入数据库
Args:
image_name: 图片文件名,如 "1755310671174988.png"
image_url: 图片URL路径"20250816/1755310671174988.png"
article_tags: 文章标签列表用于查询department和keywords
Returns:
包含插入信息的字典:{
'tag_image_id': tag_image_id,
'image_id': image_id,
'image_url': image_url,
'image_thumb_url': image_thumb_url,
'keywords_id': keywords_id,
'keywords_name': keywords_name,
'department_id': department_id,
'department_name': department_name
}
"""
connection = db_manager.get_connection()
if connection is None:
print("无法连接到数据库")
return None
try:
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
# 1. 根据文章标签查询ai_image_tags表获取department和keywords信息
if article_tags:
# 使用第一个标签查询
query = """
SELECT department_name, keywords_name, department_id, keywords_id, tag_id
FROM ai_image_tags
WHERE tag_name = %s
LIMIT 1
"""
cursor.execute(query, (article_tags[0],))
tag_info = cursor.fetchone()
if tag_info:
department = tag_info['department_name']
keywords = tag_info['keywords_name']
department_id = tag_info['department_id']
keywords_id = tag_info['keywords_id']
tag_id = tag_info['tag_id']
tag_name = article_tags[0]
else:
# 如果没有找到,使用默认值
department = "AI生成"
keywords = "AI图片"
department_id = 1
keywords_id = 1
tag_id = 1
tag_name = article_tags[0] if article_tags else "AI生成"
else:
# 没有标签,使用默认值
department = "AI生成"
keywords = "AI图片"
department_id = 1
keywords_id = 1
tag_id = 1
tag_name = "AI生成"
# 2. 插入ai_images表
insert_image_query = """
INSERT INTO ai_images
(image_name, image_url, image_thumb_url, department, keywords, image_type, upload_user_id, status)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
"""
cursor.execute(insert_image_query, (
image_name,
image_url,
'', # image_thumb_url
department,
keywords,
'medical', # image_type
1, # upload_user_id默认用户ID
'active' # status
))
image_id = cursor.lastrowid
print(f"图片信息已插入ai_images表image_id: {image_id}")
# 3. 插入ai_image_tags表
insert_tag_query = """
INSERT INTO ai_image_tags
(image_id, image_name, image_url, image_thumb_url, tag_id, tag_name,
keywords_id, keywords_name, department_id, department_name,
image_source, created_user_id, image_attached_article_count)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
cursor.execute(insert_tag_query, (
image_id,
image_name,
image_url,
'', # image_thumb_url
tag_id,
tag_name,
keywords_id,
keywords,
department_id,
department,
3, # image_source: 3表示AI生成
1, # created_user_id
0 # image_attached_article_count
))
tag_image_id = cursor.lastrowid
print(f"图片标签信息已插入ai_image_tags表tag_image_id: {tag_image_id}")
# 提交事务
connection.commit()
# 返回包含所有需要信息的字典
return {
'tag_image_id': tag_image_id,
'image_id': image_id,
'image_url': image_url,
'image_thumb_url': '',
'keywords_id': keywords_id,
'keywords_name': keywords,
'department_id': department_id,
'department_name': department
}
except Exception as e:
print(f"插入图片信息到数据库失败: {e}")
connection.rollback()
return None
finally:
connection.close()
def insert_article_image_relation(article_id: int, image_id: int, image_url: str, image_thumb_url: str,
tag_image_id: int, keywords_id: int, keywords_name: str,
department_id: int, department_name: str, image_source: int = 0) -> Optional[int]:
"""
将文章与图片的关联信息插入ai_article_images表
Args:
article_id: 文章ID
image_id: 图片IDai_images表的id
image_url: 图片URL
image_thumb_url: 缩略图URL
tag_image_id: 图片标签IDai_image_tags表的id
keywords_id: 关键词ID
keywords_name: 关键词名称
department_id: 部门ID
department_name: 部门名称
image_source: 图片来源0表示默认
Returns:
插入的ai_article_images表的ID
"""
connection = db_manager.get_connection()
if connection is None:
print("无法连接到数据库")
return None
try:
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
# 1. 查询当前文章下已有图片的最大sort_order
query_max_sort = """
SELECT COALESCE(MAX(sort_order), 0) as max_sort_order
FROM ai_article_images
WHERE article_id = %s
"""
cursor.execute(query_max_sort, (article_id,))
result = cursor.fetchone()
max_sort_order = result['max_sort_order'] if result else 0
new_sort_order = max_sort_order + 1
print(f"文章 {article_id} 当前最大sort_order: {max_sort_order}, 新图片sort_order: {new_sort_order}")
# 2. 插入ai_article_images表
insert_query = """
INSERT INTO ai_article_images
(article_id, image_id, image_url, image_thumb_url, image_tag_id, sort_order,
keywords_id, keywords_name, department_id, department_name, image_source)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
cursor.execute(insert_query, (
article_id,
image_id,
image_url,
image_thumb_url,
tag_image_id,
new_sort_order,
keywords_id,
keywords_name,
department_id,
department_name,
image_source
))
article_image_id = cursor.lastrowid
print(f"文章图片关联信息已插入ai_article_images表id: {article_image_id}")
# 提交事务
connection.commit()
return article_image_id
except Exception as e:
print(f"插入文章图片关联信息失败: {e}")
connection.rollback()
return None
finally:
connection.close()
def generate_image_with_gemini(prompt: str, article_tags: List[str], article_id: int) -> str:
"""
使用Gemini生成图片并上传到服务器
Args:
prompt: 图片生成提示词
article_tags: 文章标签列表用于查询department和keywords
article_id: 文章ID用于关联图片
Returns:
上传后的图片URL
"""
# 导入必要的库
try:
from google import genai
from google.genai import types
from google.genai.client import HttpOptions
except ImportError:
print("错误未安装google-genai库请运行 'pip install google-genai' 进行安装")
raise
client = genai.Client(http_options=HttpOptions(base_url="https://work.poloapi.com"),
api_key="sk-V4tPnDgzFPa7nxWrvKnNJsW8ZcBXXPuGmjfgvPVRnwpHoeob")
print(f"正在调用Gemini API生成图片提示词: {prompt[:50]}...")
# 生成内容
response = client.models.generate_content(
model="gemini-3-pro-image-preview",
contents=[prompt],
)
# 检查是否有候选答案
if not response.candidates:
raise Exception("Gemini API未返回任何候选答案")
# 处理响应 - 遍历第一个候选答案的内容部分
candidate = response.candidates[0]
if not candidate.content or not candidate.content.parts:
raise Exception("Gemini API返回的候选答案中没有内容部分")
for part in candidate.content.parts:
if hasattr(part, 'text') and part.text is not None:
print(f"Gemini响应文本: {part.text}")
elif hasattr(part, 'inline_data') and part.inline_data is not None:
image_data = part.inline_data
if image_data.data is not None:
# 生成唯一的文件名(基于时间戳)
import time
import os
from datetime import datetime
timestamp_ms = int(time.time() * 1000) # 毫秒级时间戳
image_filename = f"{timestamp_ms}.png"
today_date = datetime.now().strftime("%Y%m%d")
image_url_path = f"{today_date}/{image_filename}"
temp_filename = f"temp_generated_image_{timestamp_ms}.png"
# 保存图片数据到临时文件
with open(temp_filename, 'wb') as f:
f.write(image_data.data)
print(f"Gemini生成图片成功: {temp_filename}")
# 先将图片信息插入数据库,获取相关信息
image_info = insert_generated_image_to_db(image_filename, image_url_path, article_tags)
if not image_info:
raise Exception("插入图片信息到数据库失败")
print(f"图片信息已插入数据库tag_image_id: {image_info['tag_image_id']}, image_id: {image_info['image_id']}")
# 使用tag_image_id上传图片到服务器
uploaded_url = upload_image_to_server(temp_filename, image_info['tag_image_id'])
# 将文章与图片的关联信息插入ai_article_images表
article_image_id = insert_article_image_relation(
article_id=article_id,
image_id=image_info['image_id'],
image_url=image_info['image_url'],
image_thumb_url=image_info['image_thumb_url'],
tag_image_id=image_info['tag_image_id'],
keywords_id=image_info['keywords_id'],
keywords_name=image_info['keywords_name'],
department_id=image_info['department_id'],
department_name=image_info['department_name'],
image_source=0 # 默认值
)
if article_image_id:
print(f"文章图片关联信息已创建ai_article_images.id: {article_image_id}")
# 删除临时文件
os.remove(temp_filename)
print(f"图片已上传到服务器: {uploaded_url}")
# 返回上传后的图片URL
return uploaded_url
# 如果没有返回图片数据,抛出异常
raise Exception("Gemini API未返回有效的图片数据")
def upload_image_to_server(image_path: str, tag_image_id: int) -> str:
"""
上传图片到服务器
Args:
image_path: 本地图片路径
tag_image_id: 图片标签ID
Returns:
服务器上的图片URL
"""
import requests
import json
# 登录获取JWT token
base_url = "http://47.99.184.230:8324" # 使用外网API地址
jwt_token = login_and_get_jwt_token(base_url)
if not jwt_token:
raise Exception("获取JWT token失败无法上传图片")
# 准备上传请求
upload_url = f"{base_url}/api/images/upload"
headers = {
'Authorization': f'Bearer {jwt_token}',
}
# 读取图片文件
with open(image_path, 'rb') as image_file:
files = {'file': image_file}
data = {'tag_image_id': tag_image_id} # 添加必传参数
response = requests.post(upload_url, headers=headers, files=files, data=data)
print(f"图片上传响应状态码: {response.status_code}")
print(f"图片上传响应内容: {response.text}")
if response.status_code == 200:
result = response.json()
if result.get('code') == 200:
# 返回服务器上的图片URL
return result['data']['http_image_url']
else:
raise Exception(f"图片上传失败: {result.get('message', '未知错误')}")
else:
raise Exception(f"图片上传请求失败,状态码: {response.status_code}, 响应: {response.text}")
def login_and_get_jwt_token(base_url: str) -> Optional[str]:
"""
登录获取JWT token
"""
login_url = f"{base_url}/api/auth/login"
login_data = {
"username": "user010", # 使用固定的账号
"password": "@5^2W6R7"
}
print(f"尝试登录: {login_data['username']}")
print(f"登录URL: {login_url}")
try:
response = requests.post(login_url, json=login_data, headers={'Content-Type': 'application/json'})
print(f"响应状态码: {response.status_code}")
if response.status_code == 200:
result = response.json()
if result.get('code') == 200:
jwt_token = result['data']['token']
print("JWT token获取成功")
return jwt_token
else:
print(f"登录失败: {result.get('message', '未知错误')}")
return None
else:
print(f"登录请求失败: {response.status_code}")
return None
except Exception as e:
print(f"登录异常: {e}")
return None
def batch_publish_articles(base_url: str, jwt_token: str, article_ids: List[int]) -> bool:
"""
批量提交文章到/api/articles/batch-publish-auto接口
"""
try:
print(f"开始批量提交 {len(article_ids)} 篇文章到batch-publish-auto接口")
# 构建批量发布数据
publish_data = {
"article_ids": article_ids
}
print(f"准备批量提交的数据: {json.dumps(publish_data, ensure_ascii=False)}")
# 发送请求
upload_url = f"{base_url}/api/articles/batch-publish-auto"
headers = {
'Authorization': f'Bearer {jwt_token}',
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.post(upload_url, json=publish_data, headers=headers)
print(f"批量提交响应状态码: {response.status_code}")
if response.status_code == 200:
try:
result = response.json()
print(f"批量提交响应内容: {result}")
# 根据接口实际返回格式判断成功
if result.get('code') == 200:
data = result.get('data', {})
published_count = data.get('published_count', 0)
failed_count = data.get('failed_count', 0)
success_msg = f"批量提交成功,发布: {published_count}篇,失败: {failed_count}"
print(success_msg)
return True
else:
print(f"批量提交失败: {result.get('message', '未知错误')}")
return False
except json.JSONDecodeError as e:
print(f"解析批量提交响应失败: {e}")
return False
elif response.status_code == 401:
# Token过期
print("收到401错误JWT token可能已过期")
return False
else:
print(f"批量提交请求失败,状态码: {response.status_code}")
return False
except Exception as e:
print(f"批量提交异常: {e}")
return False
def process_single_article(article, used_image_counts, match_results):
"""
处理单个文章与图片的匹配和挂靠
Args:
article: 单个文章数据
used_image_counts: 图片使用计数
match_results: 匹配结果列表
Returns:
是否处理成功
"""
print(f"\n处理文章: {article['title']} (ID: {article['id']})")
# 根据文章标签获取匹配的图片(考虑已使用次数)
matched_images = get_images_by_tags_from_db(article['tags'], used_image_counts)
if matched_images:
print(f"找到 {len(matched_images)} 张符合条件的匹配图片")
# 按基础使用次数排序,优先使用基础计数较低的图片
matched_images.sort(key=lambda x: x['base_count'])
matched = False
for img in matched_images:
# 提取图片URL并添加前缀
image_url = "http://images11.bxmkb.cn/Images/" + img['image_url']
if image_url: # 确保图片URL存在
# 调用通义千问大模型进行挂靠评估
match_success = call_qwen_model(article, [image_url])
if match_success:
print(f"文章与图片挂靠成功: {article['title']}")
# 更新图片使用次数
used_image_counts[img['id']] += 1
# 记录匹配结果
match_results.append({
'文章ID': article['id'],
'文章标题': article['title'],
'文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'], # 限制内容长度
'标签': ', '.join(article['tags']),
'匹配的图片URL': image_url,
'图片ID': img['id'],
'图片名称': img['image_name'],
'图片标签': img['tag_name'],
'图片关键词': img['keywords_name'],
'图片部门': img['department_name'],
'匹配状态': '成功'
})
return True
if not matched:
print(f"文章未能与任何图片成功匹配使用Gemini生成图片: {article['title']}")
# 使用文章标题和标签生成提示词
prompt = f"'{article['title']}'相关的插图,标签: {', '.join(article['tags'])}"
generated_image_url = generate_image_with_gemini(prompt, article['tags'], article['id'])
print(f"生成的图片URL: {generated_image_url}")
# 记录生成图片的结果
match_results.append({
'文章ID': article['id'],
'文章标题': article['title'],
'文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'],
'标签': ', '.join(article['tags']),
'匹配的图片URL': generated_image_url,
'图片ID': 'N/A',
'图片名称': 'Generated',
'图片标签': 'N/A',
'图片关键词': 'N/A',
'图片部门': 'N/A',
'匹配状态': '生成图片'
})
return True
else:
print(f"没有找到符合条件的匹配图片使用Gemini生成图片: {article['title']}")
# 使用文章标题和标签生成提示词
prompt = f"'{article['title']}'相关的插图,标签: {', '.join(article['tags'])}"
generated_image_url = generate_image_with_gemini(prompt, article['tags'], article['id'])
print(f"生成的图片URL: {generated_image_url}")
# 记录生成图片的结果
match_results.append({
'文章ID': article['id'],
'文章标题': article['title'],
'文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'],
'标签': ', '.join(article['tags']),
'匹配的图片URL': generated_image_url,
'图片ID': 'N/A',
'图片名称': 'Generated',
'图片标签': 'N/A',
'图片关键词': 'N/A',
'图片部门': 'N/A',
'匹配状态': '生成图片'
})
return True
def process_article_image_matching(test_mode=False, test_count=None):
"""
处理文章与图片的匹配和挂靠
Args:
test_mode: 是否为测试模式
test_count: 测试文章数量(仅在测试模式下使用)
"""
# 用于跟踪每张图片的使用次数
used_image_counts = defaultdict(int)
# 存储匹配结果
match_results = []
try:
# 根据模式决定获取哪些文章
articles = get_articles_with_tags_from_db()
if not articles:
print("没有找到文章")
return
# 如果是测试模式只取前test_count条数据
if test_mode:
if test_count is None:
test_count = 3 # 默认测试前3条
articles = articles[:test_count]
print(f"测试模式:处理前 {len(articles)} 篇文章")
success_count = 0
generated_count = 0
# 收集所有处理后的文章ID用于发布
processed_article_ids = []
for article in articles:
if process_single_article(article, used_image_counts, match_results):
success_count += 1
processed_article_ids.append(article['id'])
else:
print(f"处理文章 {article['id']} 失败")
# 将匹配结果写入CSV文件
output_csv = 'article_image_match_results.csv'
with open(output_csv, 'w', newline='', encoding='utf-8-sig') as csvfile:
fieldnames = [
'文章ID', '文章标题', '文章内容', '标签',
'匹配的图片URL', '图片ID', '图片名称',
'图片标签', '图片关键词', '图片部门', '匹配状态'
]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for result in match_results:
writer.writerow(result)
if not test_mode:
print(f"\n处理完成! 成功挂靠: {success_count} 篇, 生成图片: {generated_count}")
print(f"匹配结果已保存至: {output_csv}")
# 如果有处理过的文章,将它们提交到发布接口
if processed_article_ids:
print(f"\n开始发布处理过的 {len(processed_article_ids)} 篇文章...")
# 登录获取JWT token
base_url = "http://47.99.184.230:8324" # 使用外网API地址
jwt_token = login_and_get_jwt_token(base_url)
if jwt_token:
# 批量发布文章
if batch_publish_articles(base_url, jwt_token, processed_article_ids):
print(f"成功发布 {len(processed_article_ids)} 篇文章")
else:
print("批量发布失败")
else:
print("获取JWT token失败无法发布文章")
else:
print("\n没有处理过的文章,跳过发布步骤")
else:
print(f"\n测试模式完成! 处理了 {len(articles)} 篇文章,成功挂靠: {success_count} 篇, 生成图片: {len([r for r in match_results if r['匹配状态'] == '生成图片'])}")
print(f"处理结果已保存至: {output_csv}")
except Exception as e:
print(f"处理文章图片匹配时发生错误: {e}")
raise
if __name__ == "__main__":
import sys
print("开始处理文章与图片的智能挂靠...")
# 检查命令行参数
if len(sys.argv) > 1:
if sys.argv[1] == "--test" and len(sys.argv) > 2:
# 测试模式处理前N篇文章
test_count = int(sys.argv[2])
print(f"启动测试模式,处理前 {test_count} 篇文章")
process_article_image_matching(test_mode=True, test_count=test_count)
elif sys.argv[1] == "--test" and len(sys.argv) == 2:
# 提示用户输入要测试的文章数量
test_count_input = input("请输入要测试的文章数量 (默认3): ")
test_count = int(test_count_input) if test_count_input.strip().isdigit() else 3
print(f"启动测试模式,处理前 {test_count} 篇文章")
process_article_image_matching(test_mode=True, test_count=test_count)
else:
print("使用方法:")
print(" 正常模式: python match_article_images.py")
print(" 测试模式: python match_article_images.py --test [文章ID]")
else:
# 正常模式:处理所有文章
process_article_image_matching()

680
push_article_published.py Normal file
View File

@@ -0,0 +1,680 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AI文章自动生成监控脚本
监控数据库中status为topic的记录自动调用Coze API生成文章并提交
"""
import os
import sys
import time
import json
import logging
import requests
import pymysql
from datetime import datetime
from typing import Dict, List, Optional, Any
import traceback
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from queue import Queue, Empty
import random
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
# 添加项目根目录到Python路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database_config import get_db_manager
from log_config import setup_logger
# 配置日志记录器,支持按日期切割和控制台输出
logger = setup_logger(
name='push_article',
log_file='logs/push_article_published.log',
error_log_file='logs/push_article_published_error.log',
level=logging.INFO,
console_output=True
)
# 配置常量
#BASE_URL = "http://47.99.184.230:8324"
BASE_URL = "http://127.0.0.1:8324"
SLEEP_INTERVAL = 5 # 监控间隔(秒)
WORKER_COUNT = 10 # 并行处理worker数量可配置
# 新增:批量发布配置
BATCH_SIZE = 8 # 一次处理的文章数量,可调
BATCH_INTERVAL = 2 # 批次间隔时间(秒),可调
# 网络重试配置
MAX_RETRIES = 3 # 最大重试次数
BACKOFF_FACTOR = 1 # 退避因子
RETRY_STATUS_CODES = [500, 502, 503, 504, 429] # 需要重试的HTTP状态码
CONNECTION_TIMEOUT = 30 # 连接超时(秒)
READ_TIMEOUT = 120 # 读取超时(秒)
# 全局变量
AUTH_TOKEN = None
WORKFLOW_ID = None
JWT_TOKEN = None
class PushArticlePublished:
def __init__(self):
# API配置
self.base_url = BASE_URL
# 认证信息
self.auth_token = None
self.workflow_id = None
self.jwt_token = None
# 使用统一的数据库管理器
self.db_manager = get_db_manager()
# 登录配置
self.login_credentials = {
'username': 'user010',
'password': '@5^2W6R7'
}
# 禁用代理
self.proxies = {
'http': None,
'https': None
}
# 并行处理相关
self.processing_lock = threading.Lock() # 用于线程安全的记录分配
self.processed_ids = set() # 已处理的记录ID集合
# 创建会话和配置重试策略
self.session = self._create_session()
# 网络统计
self.request_stats = {
'total_requests': 0,
'successful_requests': 0,
'failed_requests': 0,
'retry_attempts': 0,
'connection_errors': 0,
'timeout_errors': 0
}
logger.info("PushArticlePublished 初始化完成")
def _create_session(self):
"""创建配置了重试策略的requests会话"""
session = requests.Session()
# 配置重试策略
retry_strategy = Retry(
total=MAX_RETRIES,
status_forcelist=RETRY_STATUS_CODES,
backoff_factor=BACKOFF_FACTOR,
allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"]
)
# 配置HTTP适配器
adapter = HTTPAdapter(
max_retries=retry_strategy,
pool_connections=10,
pool_maxsize=20
)
session.mount("http://", adapter)
session.mount("https://", adapter)
# 设置默认超时
session.timeout = (CONNECTION_TIMEOUT, READ_TIMEOUT)
return session
def _make_request_with_retry(self, method, url, **kwargs):
"""带重试机制的网络请求方法"""
self.request_stats['total_requests'] += 1
for attempt in range(MAX_RETRIES + 1):
try:
# 使用会话发送请求
response = self.session.request(
method=method,
url=url,
timeout=(CONNECTION_TIMEOUT, READ_TIMEOUT),
proxies=self.proxies,
**kwargs
)
# 请求成功
self.request_stats['successful_requests'] += 1
if attempt > 0:
logger.info(f"网络请求在第 {attempt + 1} 次尝试后成功")
return response
except requests.exceptions.ConnectionError as e:
self.request_stats['connection_errors'] += 1
if attempt < MAX_RETRIES:
self.request_stats['retry_attempts'] += 1
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f"连接错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
time.sleep(backoff_time)
else:
self.request_stats['failed_requests'] += 1
logger.error(f"连接最终失败,已重试 {MAX_RETRIES} 次: {e}")
raise
except requests.exceptions.Timeout as e:
self.request_stats['timeout_errors'] += 1
if attempt < MAX_RETRIES:
self.request_stats['retry_attempts'] += 1
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f"请求超时 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
time.sleep(backoff_time)
else:
self.request_stats['failed_requests'] += 1
logger.error(f"请求超时最终失败,已重试 {MAX_RETRIES} 次: {e}")
raise
except requests.exceptions.ChunkedEncodingError as e:
if attempt < MAX_RETRIES:
self.request_stats['retry_attempts'] += 1
backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f"数据传输错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}")
logger.info(f"等待 {backoff_time:.2f} 秒后重试...")
time.sleep(backoff_time)
else:
self.request_stats['failed_requests'] += 1
logger.error(f"数据传输最终失败,已重试 {MAX_RETRIES} 次: {e}")
raise
except Exception as e:
self.request_stats['failed_requests'] += 1
logger.error(f"网络请求发生未预期错误: {e}")
raise
def log_network_stats(self):
"""记录网络统计信息"""
stats = self.request_stats
success_rate = (stats['successful_requests'] / stats['total_requests'] * 100) if stats['total_requests'] > 0 else 0
stats_msg = (
f"网络统计 - 总请求: {stats['total_requests']}, "
f"成功: {stats['successful_requests']}, "
f"失败: {stats['failed_requests']}, "
f"重试: {stats['retry_attempts']}, "
f"连接错误: {stats['connection_errors']}, "
f"超时错误: {stats['timeout_errors']}, "
f"成功率: {success_rate:.1f}%"
)
logger.info(stats_msg)
self.log_to_database('INFO', '网络统计', stats_msg)
def get_db_connection(self):
"""获取数据库连接"""
try:
return self.db_manager.get_connection()
except Exception as e:
logger.error(f"数据库连接失败: {e}")
return None
def log_to_database(self, level: str, message: str, details: str = None):
"""记录日志到数据库ai_logs表"""
try:
with self.db_manager.get_cursor() as cursor:
# 映射日志级别到数据库状态
status_map = {
'INFO': 'success',
'WARNING': 'warning',
'ERROR': 'error'
}
status = status_map.get(level, 'success')
sql = """
INSERT INTO ai_logs (user_id, action, description, status, error_message, created_at)
VALUES (%s, %s, %s, %s, %s, NOW())
"""
cursor.execute(sql, (None, 'coze_generator', message, status, details))
logger.info(f"日志已记录到数据库: {level} - {message}")
except Exception as e:
logger.error(f"记录日志到数据库失败: {e}")
def login_and_get_jwt_token(self) -> bool:
"""登录获取JWT token参考JavaScript逻辑"""
try:
login_url = f"{self.base_url}/api/auth/login"
login_data = {
"username": "user010", # 使用用户指定的账号
"password": "@5^2W6R7"
}
logger.info(f"尝试登录: {login_data['username']}")
logger.info(f"登录URL: {login_url}")
self.log_to_database('INFO', f"尝试登录用户: {login_data['username']}")
response = self._make_request_with_retry(
'POST',
login_url,
json=login_data,
headers={'Content-Type': 'application/json'}
)
logger.info(f"响应状态码: {response.status_code}")
logger.info(f"响应内容: {response.text[:500]}...")
if response.status_code == 200:
result = response.json()
if result.get('code') == 200:
self.jwt_token = result['data']['token']
logger.info("JWT token获取成功")
self.log_to_database('INFO', "JWT token获取成功", json.dumps(result['data']))
return True
else:
error_msg = f"登录失败: {result.get('message', '未知错误')}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, json.dumps(result))
return False
else:
error_msg = f"登录请求失败: {response.status_code}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, response.text)
return False
except Exception as e:
error_msg = f"登录异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return False
def batch_publish_auto(self, article_ids: List[int]) -> bool:
"""批量提交文章到/api/articles/batch-publish-auto接口"""
try:
logger.info(f"开始批量提交 {len(article_ids)} 篇文章到batch-publish-auto接口")
self.log_to_database('INFO', f"开始批量提交文章", f"article_ids: {article_ids}")
# 确保有JWT token
if not self.jwt_token:
logger.warning("JWT token缺失尝试重新登录")
self.log_to_database('WARNING', "JWT token缺失重新登录")
if not self.login_and_get_jwt_token():
error_msg = "重新登录失败"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg)
return False
# 构建批量发布数据 - 根据接口要求只需要article_ids
publish_data = {
"article_ids": article_ids
}
logger.info(f"准备批量提交的数据: {json.dumps(publish_data, ensure_ascii=False)}")
# 发送请求 - 修正接口路径
upload_url = f"{self.base_url}/api/articles/batch-publish-auto"
headers = {
'Authorization': f'Bearer {self.jwt_token}',
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = self._make_request_with_retry(
'POST',
upload_url,
json=publish_data,
headers=headers
)
logger.info(f"批量提交响应状态码: {response.status_code}")
if response.status_code == 200:
try:
result = response.json()
logger.info(f"批量提交响应内容: {result}")
# 根据接口实际返回格式判断成功
if result.get('code') == 200:
data = result.get('data', {})
published_count = data.get('published_count', 0)
failed_count = data.get('failed_count', 0)
success_msg = f"批量提交成功,发布: {published_count}篇,失败: {failed_count}"
logger.info(success_msg)
self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}")
return True
else:
error_msg = f"批量提交失败: {result.get('message', '未知错误')}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}")
return False
except json.JSONDecodeError as e:
error_msg = f"解析批量提交响应失败: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
return False
elif response.status_code == 401:
# Token过期尝试重新登录并重试一次
logger.warning("收到401错误JWT token可能已过期尝试重新登录")
self.log_to_database('WARNING', "JWT token过期重新登录", f"article_ids: {article_ids}")
if self.login_and_get_jwt_token():
logger.info("重新登录成功,重试批量提交请求")
# 更新headers中的token
headers['Authorization'] = f'Bearer {self.jwt_token}'
# 重试请求
retry_response = self._make_request_with_retry(
'POST',
upload_url,
json=publish_data,
headers=headers
)
if retry_response.status_code == 200:
try:
result = retry_response.json()
logger.info(f"重试批量提交响应内容: {result}")
if result.get('code') == 200:
data = result.get('data', {})
published_count = data.get('published_count', 0)
failed_count = data.get('failed_count', 0)
success_msg = f"重试批量提交成功,发布: {published_count}篇,失败: {failed_count}"
logger.info(success_msg)
self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}")
return True
else:
error_msg = f"重试批量提交失败: {result.get('message', '未知错误')}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}")
return False
except json.JSONDecodeError as e:
error_msg = f"解析重试批量提交响应失败: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}")
return False
else:
error_msg = f"重试批量提交请求失败,状态码: {retry_response.status_code}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}")
return False
else:
error_msg = "重新登录失败,无法重试批量提交"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}")
return False
else:
error_msg = f"批量提交请求失败,状态码: {response.status_code}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"response_text: {response.text}")
return False
except requests.exceptions.Timeout as e:
error_msg = f"批量提交请求超时: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, timeout: {CONNECTION_TIMEOUT}s/{READ_TIMEOUT}s")
return False
except requests.exceptions.ConnectionError as e:
error_msg = f"批量提交连接错误: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, base_url: {self.base_url}")
return False
except requests.exceptions.RequestException as e:
error_msg = f"批量提交网络异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, exception_type: {type(e).__name__}")
return False
except Exception as e:
error_msg = f"批量提交异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, traceback: {traceback.format_exc()}")
return False
def is_publish_time_allowed(self) -> bool:
"""检查当前时间是否在允许发布的时间窗口内北京时间6:00-23:59"""
current_hour = datetime.now().hour
# 凌晨00:00-05:59禁止发布6:00-23:59允许发布
if current_hour >= 6:
logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 可以推送")
return True
else:
logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 在禁止发布时段00:00-05:59跳过推送")
return False
def filter_articles_by_daily_limit(self, articles: List[Dict]) -> List[Dict]:
"""根据作者每日发文限制过滤文章
检查ai_statistics_days表中daily_published_count是否超过daily_post_max
如果超过,则该作者的文章今日不发
"""
if not articles:
return []
try:
today_date = datetime.now().strftime('%Y-%m-%d')
filtered_articles = []
with self.db_manager.get_cursor() as cursor:
for article in articles:
author_id = article.get('author_id')
if not author_id:
logger.warning(f"文章ID {article['id']} 缺少author_id跳过")
continue
# 先检查ai_authors表作者必须满足 daily_post_max > 0, status = 'active', channel = 1
author_check_sql = """
SELECT id, author_name, daily_post_max, status, channel
FROM ai_authors
WHERE id = %s AND daily_post_max > 0 AND status = 'active' AND channel = 1
"""
cursor.execute(author_check_sql, (author_id,))
author_result = cursor.fetchone()
if not author_result:
logger.info(f"[业务日志] 作者ID {author_id} 不符合发文条件(daily_post_max>0 AND status=active AND channel=1)文章ID {article['id']} 过滤掉")
# 将文章状态更新为pending_review重新走审批流程
update_sql = "UPDATE ai_articles SET status = 'pending_review', updated_at = NOW() WHERE id = %s"
cursor.execute(update_sql, (article['id'],))
logger.info(f"[业务日志] 文章ID {article['id']} 状态已更新为pending_review需重新审批")
continue
# 查询该作者当天的发文统计
sql = """
SELECT daily_published_count, daily_post_max
FROM ai_statistics_days
WHERE author_id = %s AND stat_date = %s
"""
cursor.execute(sql, (author_id, today_date))
result = cursor.fetchone()
if result:
daily_published_count = result['daily_published_count'] or 0
daily_post_max = result['daily_post_max'] or 0
# 检查daily_post_max是否小于1小于1则不允许发文
if daily_post_max < 1:
#logger.info(f"[业务日志] 作者ID {author_id} daily_post_max={daily_post_max} 小于1文章ID {article['id']} 过滤掉,不允许发文")
continue
if daily_published_count >= daily_post_max:
#logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count} 篇,达到上限 {daily_post_max}文章ID {article['id']} 跳过")
continue
else:
#logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count}/{daily_post_max}文章ID {article['id']} 允许发布")
filtered_articles.append(article)
else:
# 没有统计记录,默认不允许发布(需要先初始化统计记录)
logger.info(f"[业务日志] 作者ID {author_id} 无当日统计记录文章ID {article['id']} 过滤掉,需先初始化统计记录")
continue
logger.info(f"每日限制过滤完成: 原始 {len(articles)} 篇 -> 允许发布 {len(filtered_articles)}")
return filtered_articles
except Exception as e:
error_msg = f"检查每日发文限制异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
# 异常时返回原始列表,避免阻塞
return articles
def get_published_review_articles(self) -> List[Dict]:
"""获取状态为published_review的待发布文章"""
try:
with self.db_manager.get_cursor() as cursor:
# 查询published_review状态的文章
sql = """
SELECT
id,
title,
status,
created_at,
updated_at,
author_id
FROM (
SELECT
id,
title,
status,
created_at,
updated_at,
author_id,
ROW_NUMBER() OVER (
PARTITION BY author_id
ORDER BY updated_at ASC, id ASC
) as author_rank
FROM ai_articles
WHERE status = 'published_review'
AND author_id > 0
) ranked_articles
"""
cursor.execute(sql)
results = cursor.fetchall()
if results:
logger.info(f"查询到 {len(results)} 个待发布文章")
for result in results:
logger.info(f"待发布文章 - ID: {result['id']}, 标题: {result['title']}, 状态: {result['status']}")
#self.log_to_database('INFO', f"发现待发布文章: {result['title']}",
#f"ID: {result['id']}, 状态: {result['status']}")
else:
logger.info("未查询到待发布文章")
return results
except Exception as e:
error_msg = f"查询待发布文章异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return []
def process_published_review_articles(self, published_articles: List[Dict], worker_id: int) -> int:
"""Worker线程处理published_review状态文章的方法"""
processed_count = 0
thread_name = f"PublishWorker-{worker_id}"
threading.current_thread().name = thread_name
logger.info(f"[{thread_name}] 启动,准备处理待发布文章")
# 按批次处理文章
for i in range(0, len(published_articles), BATCH_SIZE):
batch = published_articles[i:i + BATCH_SIZE]
article_ids = [article['id'] for article in batch]
logger.info(f"[{thread_name}] 处理批次 {i//BATCH_SIZE + 1}文章ID: {article_ids}")
# 批量提交文章
if self.batch_publish_auto(article_ids):
processed_count += len(article_ids)
logger.info(f"[{thread_name}] 成功处理批次,文章数量: {len(article_ids)}")
else:
logger.error(f"[{thread_name}] 处理批次失败文章ID: {article_ids}")
# 批次间隔
if i + BATCH_SIZE < len(published_articles):
logger.info(f"[{thread_name}] 等待 {BATCH_INTERVAL} 秒后处理下一批次")
time.sleep(BATCH_INTERVAL)
logger.info(f"[{thread_name}] 完成,共处理 {processed_count} 篇文章")
return processed_count
def run_monitor(self):
"""运行监控循环支持多worker并行处理"""
logger.info(f"开始监控ai_articles表使用 {WORKER_COUNT} 个worker并行处理...")
self.log_to_database('INFO', f'启动文章自动生成监控服务worker数量: {WORKER_COUNT}', 'run_monitor')
# 统计计数器
loop_count = 0
stats_interval = 60 # 每60次循环记录一次统计约5分钟
while True:
try:
# 获取待发布的文章
published_articles = self.get_published_review_articles()
# 逻辑1: 检查时间窗口北京时间6:00-23:59允许00:00-05:59禁止
if not self.is_publish_time_allowed():
published_articles = []
logger.info("当前处于禁止发布时段,清空待发布列表")
# 逻辑2: 根据作者每日发文限制过滤文章
if published_articles:
published_articles = self.filter_articles_by_daily_limit(published_articles)
# 处理待发布文章
if published_articles:
logger.info(f"发现 {len(published_articles)} 篇待发布文章,启动批量发布处理")
self.log_to_database('INFO', f'发现待发布文章,启动批量处理', f'文章数量: {len(published_articles)}')
# 使用单个worker处理批量发布避免并发冲突
try:
processed_count = self.process_published_review_articles(published_articles, 1)
logger.info(f"批量发布处理完成,共处理 {processed_count} 篇文章")
self.log_to_database('INFO', f'批量发布处理完成', f'共处理 {processed_count} 篇文章')
except Exception as e:
logger.error(f"批量发布处理异常: {e}")
self.log_to_database('ERROR', f'批量发布处理异常', str(e))
# 如果没有任何待处理任务
if not published_articles:
logger.info("暂无待处理任务,继续监控...")
# 每次循环后休息
time.sleep(SLEEP_INTERVAL)
# 定期记录网络统计
loop_count += 1
if loop_count % stats_interval == 0:
self.log_network_stats()
except KeyboardInterrupt:
logger.info("收到中断信号,停止监控")
self.log_to_database('INFO', '监控服务手动停止', 'KeyboardInterrupt')
break
except Exception as e:
error_msg = f"监控循环异常: {e}"
logger.error(error_msg)
self.log_to_database('ERROR', error_msg, traceback.format_exc())
time.sleep(5) # 异常时等待5秒再继续
def main():
"""主函数"""
generator = PushArticlePublished()
try:
# 先登录获取JWT token
logger.info("开始登录获取JWT token")
if not generator.login_and_get_jwt_token():
logger.error("登录失败,程序退出")
return
# 开始监控
generator.run_monitor()
except Exception as e:
logger.error(f"程序运行异常: {e}")
generator.log_to_database('ERROR', f'程序运行异常: {e}', traceback.format_exc())
if __name__ == "__main__":
main()

4
requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
requests==2.31.0
google-genai==0.1.0
Pillow==10.0.0
openpyxl==3.1.2

14
setup_env.bat Normal file
View File

@@ -0,0 +1,14 @@
@echo off
echo 正在创建虚拟环境...
python -m venv venv
echo 虚拟环境创建完成!
echo 正在激活虚拟环境...
call venv\Scripts\activate.bat
echo 正在安装依赖...
pip install -r requirements.txt
echo 虚拟环境设置完成!
echo 激活虚拟环境的命令: venv\Scripts\activate
pause

13
setup_env.sh Normal file
View File

@@ -0,0 +1,13 @@
#!/bin/bash
echo "正在创建虚拟环境..."
python3 -m venv venv
echo "虚拟环境创建完成!"
echo "正在激活虚拟环境..."
source venv/bin/activate
echo "正在安装依赖..."
pip install -r requirements.txt
echo "虚拟环境设置完成!"
echo "激活虚拟环境的命令: source venv/bin/activate"

297
split_sql_tables.py Normal file
View File

@@ -0,0 +1,297 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
将包含多个表的SQL文件拆分为单个表的SQL文件
"""
import os
import re
from pathlib import Path
def split_sql_tables(input_file_path):
"""
将SQL文件中的每个表拆分为单独的文件
"""
# 读取输入文件
with open(input_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 分割SQL内容查找CREATE TABLE语句
# 使用正则表达式匹配CREATE TABLE语句
table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\([^;]*END\s*OF\s*DATA;)?)'
# 更精确的匹配模式寻找CREATE TABLE语句直到遇到下一个CREATE TABLE或文件结尾
create_table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(.+?)(?=\nCREATE TABLE|\Z)'
# 分离出每个CREATE TABLE语句
tables = re.findall(create_table_pattern, content, re.DOTALL | re.IGNORECASE)
# 如果上面的正则没匹配到,尝试另一种方式
if not tables:
# 分割CREATE TABLE部分
parts = re.split(r'\n(?=CREATE TABLE)', content)
tables = []
for part in parts:
if part.strip().upper().startswith('CREATE TABLE'):
# 提取表名
table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', part, re.IGNORECASE)
if table_name_match:
table_name = table_name_match.group(1)
tables.append((part.strip(), table_name))
# 确保输出目录存在
output_dir = Path(input_file_path).parent / "split_tables"
output_dir.mkdir(exist_ok=True)
# 为每个表创建单独的文件
for table_sql, table_name in tables:
# 清理表名,确保它是有效的文件名
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
# 创建输出文件路径
output_file_path = output_dir / f"{clean_table_name}.sql"
# 写入表定义到单独的文件
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write("-- SQL table definition\n")
f.write("-- Generated from splitting a larger SQL file\n")
f.write("\n")
f.write(table_sql.strip())
f.write("\n")
print(f"已创建表文件: {output_file_path}")
def split_sql_tables_advanced(input_file_path):
"""
高级方法拆分SQL文件中的表定义
"""
with open(input_file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# 确保输出目录存在
output_dir = Path(input_file_path).parent / "split_tables"
output_dir.mkdir(exist_ok=True)
current_table_lines = []
in_table_definition = False
current_table_name = ""
i = 0
while i < len(lines):
line = lines[i].strip()
# 检查是否是CREATE TABLE语句
if line.upper().startswith('CREATE TABLE'):
# 如果之前已经在处理表定义,保存之前的表
if in_table_definition and current_table_lines:
save_table_to_file(current_table_name, current_table_lines, output_dir)
current_table_lines = []
# 开始新的表定义
in_table_definition = True
current_table_lines.append(lines[i])
# 提取表名
table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', line, re.IGNORECASE)
if table_name_match:
current_table_name = table_name_match.group(1)
# 检查这一行是否以分号结束
if line.endswith(';'):
# 单行CREATE TABLE语句
save_table_to_file(current_table_name, current_table_lines, output_dir)
current_table_lines = []
in_table_definition = False
else:
# 多行CREATE TABLE语句继续收集行直到遇到分号
pass
elif in_table_definition:
current_table_lines.append(lines[i])
# 检查是否以分号结束
if line.endswith(';'):
# 结束当前表定义
save_table_to_file(current_table_name, current_table_lines, output_dir)
current_table_lines = []
in_table_definition = False
# 如果不在表定义中且遇到CREATE TABLE之前的行忽略或处理其他内容
i += 1
# 处理最后一个表(如果有)
if in_table_definition and current_table_lines:
save_table_to_file(current_table_name, current_table_lines, output_dir)
def save_table_to_file(table_name, table_lines, output_dir):
"""
将表定义保存到文件
"""
# 清理表名,确保它是有效的文件名
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
# 创建输出文件路径
output_file_path = output_dir / f"{clean_table_name}.sql"
# 写入表定义到单独的文件
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write("-- SQL table definition\n")
f.write("-- Generated from splitting a larger SQL file\n")
f.write("-- Table: " + table_name + "\n")
f.write("\n")
for line in table_lines:
f.write(line.rstrip() + '\n')
print(f"已创建表文件: {output_file_path}")
def extract_create_table_statements(input_file_path):
"""
提取SQL文件中的所有CREATE TABLE语句
"""
with open(input_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 正则表达式匹配CREATE TABLE语句
# 匹配从CREATE TABLE开始到遇到下一个CREATE TABLE或文件结尾的内容
pattern = r'(CREATE TABLE\s+`?\w+`?[^;]*(?:;|ENGINE.*?;))'
# 更复杂的正则表达式,考虑多行和嵌套括号
# complex_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(((?>[^()]+|\((?<DEPTH>)|\)(?<-DEPTH>))*(?(DEPTH)(?!)))\)[^;]*;)'
# 使用简单方法,逐行解析
lines = content.split('\n')
# 确保输出目录存在
output_dir = Path(input_file_path).parent / "split_tables"
output_dir.mkdir(exist_ok=True)
current_table_lines = []
in_table_definition = False
current_table_name = ""
for line in lines:
stripped_line = line.strip()
if stripped_line.upper().startswith('CREATE TABLE'):
# 如果正在处理上一个表,保存它
if in_table_definition and current_table_lines:
save_table_to_file(current_table_name, current_table_lines, output_dir)
# 开始新表
in_table_definition = True
current_table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', stripped_line, re.IGNORECASE)
if current_table_name_match:
current_table_name = current_table_name_match.group(1)
current_table_lines = [line]
elif in_table_definition:
current_table_lines.append(line)
# 检查行是否以分号结尾,表示表定义结束
if stripped_line.endswith(';'):
# 这可能是一个完整的表定义
# 简单检查是否是表定义的结尾
save_table_to_file(current_table_name, current_table_lines, output_dir)
current_table_lines = []
in_table_definition = False
# 否则跳过非表定义的行
# 处理最后一个表
if in_table_definition and current_table_lines:
save_table_to_file(current_table_name, current_table_lines, output_dir)
def parse_sql_file(input_file_path):
"""
解析SQL文件并拆分表定义
"""
with open(input_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 查找所有CREATE TABLE语句
# 更安全的解析方法 - 逐行处理
lines = content.split('\n')
# 确保输出目录存在
output_dir = Path(input_file_path).parent / "split_tables"
output_dir.mkdir(exist_ok=True)
current_table_lines = []
in_table_definition = False
current_table_name = ""
for line in lines:
stripped_line = line.strip()
if stripped_line.upper().startswith('CREATE TABLE'):
# 如果正在处理上一个表,保存它
if in_table_definition and current_table_lines:
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
# 开始新表
in_table_definition = True
# 提取表名
table_name_match = re.search(r'CREATE TABLE\s+(?:IF NOT EXISTS\s+)?`?(\w+)`?', stripped_line, re.IGNORECASE)
if table_name_match:
current_table_name = table_name_match.group(1)
current_table_lines = [line]
elif in_table_definition:
current_table_lines.append(line)
# 检查行是否以分号结尾,表示表定义结束
if stripped_line and stripped_line.endswith(';'):
# 检查是否包含表定义的关键元素如ENGINE, CHARACTER SET等
# 或者是完整的CREATE TABLE语句
if ('ENGINE' in stripped_line or 'CHARACTER SET' in stripped_line or
'ROW_FORMAT' in stripped_line or ') ENGINE' in line or line.count('(') <= line.count(')')):
# 这是一个完整的表定义
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
current_table_lines = []
in_table_definition = False
# 否则跳过非表定义的行
# 处理最后一个表
if in_table_definition and current_table_lines:
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
def save_table_to_file_simple(table_name, table_lines, output_dir):
"""
将表定义保存到文件(简化版)
"""
# 清理表名
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
# 创建输出文件路径
output_file_path = output_dir / f"{clean_table_name}.sql"
# 写入表定义到单独的文件
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write("-- SQL table definition\n")
f.write("-- Generated from splitting a larger SQL file\n")
f.write(f"-- Table: {table_name}\n")
f.write("--\n\n")
for line in table_lines:
f.write(line)
f.write('\n')
print(f"已创建表文件: {output_file_path}")
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
input_file = input("请输入SQL文件路径: ").strip().strip('"\'')
else:
input_file = sys.argv[1].strip('"\'')
if not os.path.exists(input_file):
print(f"错误: 文件 {input_file} 不存在")
sys.exit(1)
print(f"正在拆分SQL文件: {input_file}")
parse_sql_file(input_file)
print("拆分完成!")

BIN
test_articles.xlsx Normal file

Binary file not shown.

BIN
test_images.xlsx Normal file

Binary file not shown.