commit fbf12f3fa3d5227f78a6e7ab2fc24bda64ba1b7c Author: shengyudong Date: Fri Jan 30 18:09:55 2026 +0800 初始提交:文字匹配图片项目 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a5e340b --- /dev/null +++ b/.gitignore @@ -0,0 +1,55 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Logs +logs/ +*.log + +# Database +*.db +*.sqlite3 + +# CSV files +*.csv + +# Temporary files +temp_* +*.tmp + +# OS +.DS_Store +Thumbs.db + +# Project specific +article_image_match_results.csv diff --git a/11111111.py b/11111111.py new file mode 100644 index 0000000..d21a6c6 --- /dev/null +++ b/11111111.py @@ -0,0 +1,32 @@ +from google import genai +from google.genai import types +from PIL import Image +from google.genai.client import HttpOptions +client = genai.Client(http_options=HttpOptions(base_url="https://work.poloapi.com"),api_key="sk-V4tPnDgzFPa7nxWrvKnNJsW8ZcBXXPuGmjfgvPVRnwpHoeob") + +prompt = ("Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme") +response = client.models.generate_content( + model="gemini-3-pro-image-preview", + contents=[prompt], +) + +# 检查是否有候选答案 +if not response.candidates: + print("API未返回任何候选答案") +else: + candidate = response.candidates[0] + if not candidate.content: + print("API返回的候选答案中没有内容") + elif not hasattr(candidate.content, 'parts') or not candidate.content.parts: + print("API返回的候选答案内容中没有parts") + else: + for part in candidate.content.parts: + if hasattr(part, 'text') and part.text is not None: + print(part.text) + elif hasattr(part, 'inline_data') and part.inline_data is not None: + image_data = part.inline_data + if image_data.data is not None: + # 保存图片数据到文件 + with open('generated_image.png', 'wb') as f: + f.write(image_data.data) + print("图片生成成功: generated_image.png") \ No newline at end of file diff --git a/20260108/1767867138994556.png b/20260108/1767867138994556.png new file mode 100644 index 0000000..52c00fc Binary files /dev/null and b/20260108/1767867138994556.png differ diff --git a/20260108/1767867138994556_thumb.png b/20260108/1767867138994556_thumb.png new file mode 100644 index 0000000..8bc1cfc Binary files /dev/null and b/20260108/1767867138994556_thumb.png differ diff --git a/20260108/1767867148035776.png b/20260108/1767867148035776.png new file mode 100644 index 0000000..c33596d Binary files /dev/null and b/20260108/1767867148035776.png differ diff --git a/20260108/1767867148035776_thumb.png b/20260108/1767867148035776_thumb.png new file mode 100644 index 0000000..a484859 Binary files /dev/null and b/20260108/1767867148035776_thumb.png differ diff --git a/20260108/1767867156936619.png b/20260108/1767867156936619.png new file mode 100644 index 0000000..18bbd6e Binary files /dev/null and b/20260108/1767867156936619.png differ diff --git a/20260108/1767867156936619_thumb.png b/20260108/1767867156936619_thumb.png new file mode 100644 index 0000000..c5f6bfa Binary files /dev/null and b/20260108/1767867156936619_thumb.png differ diff --git a/20260108/1767867165665952.png b/20260108/1767867165665952.png new file mode 100644 index 0000000..b1a38da Binary files /dev/null and b/20260108/1767867165665952.png differ diff --git a/20260108/1767867165665952_thumb.png b/20260108/1767867165665952_thumb.png new file mode 100644 index 0000000..e975b28 Binary files /dev/null and b/20260108/1767867165665952_thumb.png differ diff --git a/create_test_csv.py b/create_test_csv.py new file mode 100644 index 0000000..f4b0ed3 --- /dev/null +++ b/create_test_csv.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +创建测试CSV文件用于验证图片文章挂靠效果 +""" + +import csv +import json + + +def create_test_articles_csv(): + """创建测试文章CSV文件""" + # 创建测试文章数据,与20260108文件夹中的图片主题相关 + articles_data = [ + {'ID': 1, '标题': '美丽的自然风景欣赏', '内容': '自然界的风景总是让人感到心旷神怡。无论是山川河流还是森林草原,大自然的美景总能带给我们视觉上的享受和心灵上的宁静。', '标签': json.dumps(['自然', '风景', '美丽'], ensure_ascii=False)}, + {'ID': 2, '标题': '户外活动的乐趣', '内容': '走出室内,亲近大自然是一种极好的放松方式。户外活动不仅能锻炼身体,还能让我们欣赏到美丽的自然风光。', '标签': json.dumps(['户外', '活动', '自然'], ensure_ascii=False)}, + {'ID': 3, '标题': '摄影艺术中的自然之美', '内容': '摄影师们常常将镜头对准大自然的美景,捕捉那些令人惊叹的瞬间。每一张风景照片都是对自然之美的独特诠释。', '标签': json.dumps(['摄影', '自然', '艺术'], ensure_ascii=False)}, + {'ID': 4, '标题': '风景旅游推荐指南', '内容': '想要寻找美丽的风景胜地吗?这里有几处绝佳的风景旅游目的地,每一处都有其独特的魅力和美景等待你的探索。', '标签': json.dumps(['旅游', '风景', '推荐'], ensure_ascii=False)}, + {'ID': 5, '标题': '数字图像处理技术', '内容': '现代数字图像处理技术使得图片的缩放、裁剪和优化变得更加容易。无论是原图还是缩略图,都能在保持质量的同时方便使用。', '标签': json.dumps(['图像处理', '技术', '缩略图'], ensure_ascii=False)} + ] + + # 写入CSV文件 + with open('test_articles.csv', 'w', newline='', encoding='utf-8-sig') as csvfile: + fieldnames = ['ID', '标题', '内容', '标签'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for row in articles_data: + writer.writerow(row) + + print('测试文章CSV文件已创建: test_articles.csv') + + +def create_test_images_csv(): + """创建测试图片CSV文件""" + # 创建测试图片数据,使用20260108文件夹中的图片 + images_data = [ + {'ID': 1, '图像ID': 'IMG001', '图像名称': '风景图1', '图像URL': '20260108/1767867138994556.png', '标签名称': '风景,自然,美丽', '关键词名称': 'Landscape,Nature,Beauty', '部门名称': '生活部', '附加文章数量': 2}, + {'ID': 2, '图像ID': 'IMG002', '图像名称': '风景图2', '图像URL': '20260108/1767867148035776.png', '标签名称': '自然,风光,户外', '关键词名称': 'Nature,Landscape,Outdoor', '部门名称': '生活部', '附加文章数量': 1}, + {'ID': 3, '图像ID': 'IMG003', '图像名称': '风景图3', '图像URL': '20260108/1767867156936619.png', '标签名称': '景色,自然,美丽', '关键词名称': 'Scenery,Nature,Beautiful', '部门名称': '生活部', '附加文章数量': 3}, + {'ID': 4, '图像ID': 'IMG004', '图像名称': '风景图4', '图像URL': '20260108/1767867165665952.png', '标签名称': '自然风光,户外,美景', '关键词名称': 'Natural Scenery,Outdoor,Beautiful View', '部门名称': '生活部', '附加文章数量': 0}, + {'ID': 5, '图像ID': 'IMG005', '图像名称': '缩略图1', '图像URL': '20260108/1767867138994556_thumb.png', '标签名称': '缩略图,小图,预览', '关键词名称': 'Thumbnail,Small Image,Preview', '部门名称': '技术部', '附加文章数量': 4}, + {'ID': 6, '图像ID': 'IMG006', '图像名称': '缩略图2', '图像URL': '20260108/1767867148035776_thumb.png', '标签名称': '缩略图,预览,小尺寸', '关键词名称': 'Thumbnail,Preview,Small Size', '部门名称': '技术部', '附加文章数量': 1} + ] + + # 写入CSV文件 + with open('test_images.csv', 'w', newline='', encoding='utf-8-sig') as csvfile: + fieldnames = ['ID', '图像ID', '图像名称', '图像URL', '标签名称', '关键词名称', '部门名称', '附加文章数量'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for row in images_data: + writer.writerow(row) + + print('测试图片CSV文件已创建: test_images.csv') + + +if __name__ == "__main__": + create_test_articles_csv() + create_test_images_csv() + print('\n两个测试CSV文件已创建完成,可用于测试图片文章挂靠效果。') \ No newline at end of file diff --git a/create_test_excel.py b/create_test_excel.py new file mode 100644 index 0000000..73aeef8 --- /dev/null +++ b/create_test_excel.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +创建测试Excel文件用于验证图片文章挂靠效果 +""" + +from openpyxl import Workbook +import json + + +def create_test_articles_excel(): + """创建测试文章Excel文件""" + # 创建测试文章数据 + articles_data = [ + ['ID', '标题', '内容', '标签'], # 表头 + [1, '人工智能发展趋势', '人工智能技术正在快速发展,在各个领域都有广泛应用。机器学习、深度学习等技术不断突破,推动着社会进步。', json.dumps(['人工智能', '科技', '趋势'], ensure_ascii=False)], + [2, '健康饮食的重要性', '合理膳食是保持身体健康的基础。均衡摄入各种营养素,有助于提高免疫力,预防疾病。', json.dumps(['健康', '饮食', '营养'], ensure_ascii=False)], + [3, '环境保护与可持续发展', '环境保护是当今世界面临的重要挑战。通过可持续发展策略,我们可以平衡经济发展与生态保护。', json.dumps(['环保', '可持续发展', '生态'], ensure_ascii=False)], + [4, '数字化转型对企业的影响', '数字化转型正在重塑企业运营模式。通过引入新技术,企业可以提升效率,优化客户体验。', json.dumps(['数字化', '企业', '转型'], ensure_ascii=False)], + [5, '教育创新的未来方向', '教育创新是培养未来人才的关键。利用新技术手段,可以创造更加个性化和高效的学习环境。', json.dumps(['教育', '创新', '学习'], ensure_ascii=False)] + ] + + # 创建Excel工作簿 + wb = Workbook() + ws = wb.active + if ws: + ws.title = '测试文章数据' + + # 将数据添加到工作表 + for row_data in articles_data: + ws.append(row_data) + + # 保存Excel文件 + wb.save('test_articles.xlsx') + print('测试文章Excel文件已创建: test_articles.xlsx') + + +def create_test_images_excel(): + """创建测试图片Excel文件""" + # 创建测试图片数据 + images_data = [ + ['ID', '图像ID', '图像名称', '图像URL', '标签名称', '关键词名称', '部门名称', '附加文章数量'], # 表头 + [1, 'IMG001', 'AI概念图', 'https://example.com/images/ai_concept.jpg', '人工智能,科技,趋势', 'AI,Machine Learning,Deep Learning', '科技部', 2], + [2, 'IMG002', '健康饮食图', 'https://example.com/images/healthy_food.jpg', '健康,饮食,营养', 'Nutrition,Health,Diet', '生活部', 1], + [3, 'IMG003', '环保地球图', 'https://example.com/images/environment.jpg', '环保,可持续发展,生态', 'Environment,Sustainability,Eco', '环保部', 3], + [4, 'IMG004', '数字化办公图', 'https://example.com/images/digital_office.jpg', '数字化,企业,转型', 'Digital,Enterprise,Transformation', '科技部', 0], + [5, 'IMG005', '教育创新图', 'https://example.com/images/education_innovation.jpg', '教育,创新,学习', 'Education,Innovation,Learning', '教育部', 4], + [6, 'IMG006', '网络安全图', 'https://example.com/images/cyber_security.jpg', '安全,网络,防护', 'Security,Cyber,Protection', '安全部', 1] + ] + + # 创建Excel工作簿 + wb = Workbook() + ws = wb.active + if ws: + ws.title = '测试图片数据' + + # 将数据添加到工作表 + for row_data in images_data: + ws.append(row_data) + + # 保存Excel文件 + wb.save('test_images.xlsx') + print('测试图片Excel文件已创建: test_images.xlsx') + + +if __name__ == "__main__": + create_test_articles_excel() + create_test_images_excel() + print('\n两个测试Excel文件已创建完成,可用于测试图片文章挂靠效果。') \ No newline at end of file diff --git a/database_config.py b/database_config.py new file mode 100644 index 0000000..d5d7455 --- /dev/null +++ b/database_config.py @@ -0,0 +1,160 @@ +""" +数据库配置管理模块 +统一管理数据库连接和SQL操作 +""" +import pymysql +import logging + +logger = logging.getLogger(__name__) + +# 数据库配置 +DB_CONFIG = { + 'host': '8.149.233.36', + 'user': 'ai_article_read', + 'password': '7aK_H2yvokVumr84lLNDt8fDBp6P', + 'database': 'ai_article', + 'charset': 'utf8mb4' +} + + +class DatabaseManager: + """数据库管理器:统一管理数据库连接和操作""" + + def __init__(self, config=None): + """初始化数据库管理器 + + Args: + config: 数据库配置字典,默认使用 DB_CONFIG + """ + self.config = config or DB_CONFIG + + def get_connection(self, autocommit=False): + """获取数据库连接 + + Args: + autocommit: 是否启用自动提交模式 + + Returns: + pymysql连接对象 + """ + return pymysql.connect(**self.config, autocommit=autocommit) + + def execute_query(self, sql, params=None, fetch_one=False): + """执行查询SQL(SELECT) + + Args: + sql: SQL语句 + params: SQL参数(tuple或list) + fetch_one: True返回单条记录,False返回所有记录 + + Returns: + 查询结果 + """ + conn = None + cursor = None + try: + conn = self.get_connection() + cursor = conn.cursor() + + logger.info(f'[SQL] {sql.strip()} | params: {params}') + cursor.execute(sql, params or ()) + + if fetch_one: + result = cursor.fetchone() + else: + result = cursor.fetchall() + + logger.debug(f'[SQL结果] 返回 {len(result) if not fetch_one and result else (1 if result else 0)} 条记录') + return result + except Exception as e: + logger.error(f'执行查询失败:{e}', exc_info=True) + raise + finally: + if cursor: + cursor.close() + if conn: + conn.close() + + def execute_update(self, sql, params=None, autocommit=True): + """执行更新SQL(INSERT/UPDATE/DELETE) + + Args: + sql: SQL语句 + params: SQL参数(tuple或list) + autocommit: 是否自动提交 + + Returns: + 影响的行数 + """ + conn = None + cursor = None + try: + conn = self.get_connection(autocommit=autocommit) + cursor = conn.cursor() + + logger.info(f'[SQL] {sql.strip()} | params: {params}') + result = cursor.execute(sql, params or ()) + + if not autocommit: + conn.commit() + + logger.info(f'[SQL执行] 影响 {result} 行') + return result + except Exception as e: + if not autocommit and conn: + conn.rollback() + logger.error(f'执行更新失败:{e}', exc_info=True) + raise + finally: + if cursor: + cursor.close() + if conn: + conn.close() + + def execute_many(self, sql, params_list, autocommit=True): + """批量执行SQL + + Args: + sql: SQL语句 + params_list: 参数列表,每个元素是一组参数 + autocommit: 是否自动提交 + + Returns: + 成功执行的行数 + """ + conn = None + cursor = None + try: + conn = self.get_connection(autocommit=autocommit) + cursor = conn.cursor() + + logger.info(f'[SQL批量] {sql.strip()} | 批次数: {len(params_list)}') + + success_count = 0 + for params in params_list: + try: + result = cursor.execute(sql, params) + if result > 0: + success_count += 1 + except Exception as e: + logger.debug(f'批量执行跳过:params={params},错误:{e}') + + if not autocommit: + conn.commit() + + logger.info(f'[SQL批量执行] 成功 {success_count}/{len(params_list)} 条') + return success_count + except Exception as e: + if not autocommit and conn: + conn.rollback() + logger.error(f'批量执行失败:{e}', exc_info=True) + raise + finally: + if cursor: + cursor.close() + if conn: + conn.close() + + +# 创建全局数据库管理器实例 +db_manager = DatabaseManager() diff --git a/db/ai_articles.sql b/db/ai_articles.sql new file mode 100644 index 0000000..1aabb8a --- /dev/null +++ b/db/ai_articles.sql @@ -0,0 +1,55 @@ +-- AI文章内容表 +-- 存储由AI生成的文章内容及其生命周期状态 +-- 支持多渠道发布(百度百家号、头条、微信等) +-- 记录文章从选题、生成、审核到发布的完整流程 + +CREATE TABLE `ai_articles` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '主键', + `batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID,用于批量生成文章的分组', + `topic_type_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '选题类型ID', + `prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '提示词工作流ID,关联AI生成模板', + `topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '文章选题/主题', + `title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '文章标题', + `content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '文章正文内容', + `department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '部门名称(遗留字段)', + `departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '部门ID列表(遗留字段)', + `author_id` int NULL DEFAULT NULL COMMENT '作者ID,关联ai_authors.id(百家号账号)', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称(百家号账号名)', + `department_id` int NULL DEFAULT NULL COMMENT '部门ID', + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '部门名称', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID,关联ai_users.id', + `review_user_id` int NULL DEFAULT NULL COMMENT '审核用户ID,关联ai_users.id', + `publish_user_id` int NULL DEFAULT NULL COMMENT '发布用户ID,关联ai_users.id', + `status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT '文章状态:topic=选题|cover_image=封面图|generate=生成中|generate_failed=生成失败|draft=草稿|pending_review=待审核(文章已生成)|approved=审核通过|rejected=审核拒绝|published_review=发布审核中|published=已发布|failed=发布失败', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '发布渠道:1=百度百家号|2=今日头条|3=微信公众号', + `review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '审核意见/备注', + `publish_time` timestamp NULL DEFAULT NULL COMMENT '发布时间', + `baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '百家号文章ID', + `baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '百家号平台状态', + `word_count` int NULL DEFAULT 0 COMMENT '文章字数', + `image_count` int NULL DEFAULT 0 COMMENT '文章配图数量', + `coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + + -- 索引定义 + INDEX `created_user_id`(`created_user_id` ASC) USING BTREE COMMENT '创建用户索引', + INDEX `review_user_id`(`review_user_id` ASC) USING BTREE COMMENT '审核用户索引', + INDEX `publish_user_id`(`publish_user_id` ASC) USING BTREE COMMENT '发布用户索引', + INDEX `idx_articles_status_user_created`(`status` ASC, `created_user_id` ASC, `created_at` DESC) USING BTREE COMMENT '状态+创建用户+创建时间组合索引', + INDEX `idx_articles_status_created`(`status` ASC, `created_at` DESC) USING BTREE COMMENT '状态+创建时间索引', + INDEX `idx_articles_status`(`status` ASC) USING BTREE COMMENT '状态索引', + INDEX `idx_articles_created_at`(`created_at` DESC) USING BTREE COMMENT '创建时间索引', + INDEX `idx_status_id_author`(`status` ASC, `id` ASC, `author_id` ASC) USING BTREE COMMENT '状态+ID+作者组合索引', + INDEX `idx_articles_updated_at`(`updated_at` DESC) USING BTREE COMMENT '更新时间索引', + INDEX `idx_articles_status_prompt_topic_id`(`status` ASC, `prompt_workflow_id` ASC, `topic` ASC, `id` ASC) USING BTREE COMMENT '状态+工作流+选题+ID组合索引', + INDEX `idx_status_author_updated_id`(`status` ASC, `author_id` ASC, `updated_at` ASC, `id` ASC) USING BTREE COMMENT '状态+作者+更新时间+ID组合索引', + INDEX `idx_author_status_updated_id`(`author_id` ASC, `status` ASC, `updated_at` ASC, `id` ASC) USING BTREE COMMENT '作者+状态+更新时间+ID组合索引', + + -- 外键约束 + CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 1115 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; diff --git a/db/ai_articles_backup_20260114_121742.sql b/db/ai_articles_backup_20260114_121742.sql new file mode 100644 index 0000000..86c182f Binary files /dev/null and b/db/ai_articles_backup_20260114_121742.sql differ diff --git a/db/split_tables.zip b/db/split_tables.zip new file mode 100644 index 0000000..9045f6b Binary files /dev/null and b/db/split_tables.zip differ diff --git a/db/split_tables/ai_article_images.sql b/db/split_tables/ai_article_images.sql new file mode 100644 index 0000000..e589586 --- /dev/null +++ b/db/split_tables/ai_article_images.sql @@ -0,0 +1,24 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_article_images +-- + +CREATE TABLE `ai_article_images` ( + `id` int NOT NULL AUTO_INCREMENT, + `article_id` int NOT NULL DEFAULT '0', + `image_id` int NOT NULL DEFAULT '0', + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_tag_id` int NOT NULL DEFAULT '0', + `sort_order` int DEFAULT '0', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `keywords_id` int NOT NULL DEFAULT '0', + `keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `department_id` int NOT NULL DEFAULT '0', + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_source` tinyint(1) NOT NULL DEFAULT '0' COMMENT '1=tag|2=change', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_article_image` (`article_id`,`image_id`) USING BTREE, + KEY `image_id` (`image_id`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=1053298 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_article_tags.sql b/db/split_tables/ai_article_tags.sql new file mode 100644 index 0000000..33a2398 --- /dev/null +++ b/db/split_tables/ai_article_tags.sql @@ -0,0 +1,14 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_article_tags +-- + +CREATE TABLE `ai_article_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `article_id` int NOT NULL, + `coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT 'Coze生成的标签', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_article_tag` (`article_id`) USING BTREE, + CONSTRAINT `ai_article_tags_ibfk_1` FOREIGN KEY (`article_id`) REFERENCES `ai_articles` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB AUTO_INCREMENT=476258 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_articles.sql b/db/split_tables/ai_articles.sql new file mode 100644 index 0000000..cf50bb5 --- /dev/null +++ b/db/split_tables/ai_articles.sql @@ -0,0 +1,52 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_articles +-- + +CREATE TABLE `ai_articles` ( + `id` int NOT NULL AUTO_INCREMENT, + `batch_id` bigint unsigned NOT NULL DEFAULT '0' COMMENT '批次ID', + `topic_type_id` int unsigned NOT NULL DEFAULT '0', + `prompt_workflow_id` int unsigned NOT NULL DEFAULT '0', + `topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `author_id` int DEFAULT NULL, + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `department_id` int DEFAULT NULL, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `created_user_id` int NOT NULL DEFAULT '0', + `review_user_id` int DEFAULT NULL, + `publish_user_id` int DEFAULT NULL, + `status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'draft', + `channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin', + `review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + `publish_time` timestamp NULL DEFAULT NULL, + `baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `word_count` int DEFAULT '0', + `image_count` int DEFAULT '0', + `coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT 'Coze生成的标签', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `author_id` (`author_id`) USING BTREE, + KEY `created_user_id` (`created_user_id`) USING BTREE, + KEY `review_user_id` (`review_user_id`) USING BTREE, + KEY `publish_user_id` (`publish_user_id`) USING BTREE, + KEY `idx_articles_status_user_created` (`status`,`created_user_id`,`created_at` DESC), + KEY `idx_articles_status_created` (`status`,`created_at` DESC), + KEY `idx_articles_status` (`status`), + KEY `idx_articles_created_at` (`created_at` DESC), + KEY `idx_status_id_author` (`status`,`id`,`author_id`), + KEY `idx_articles_updated_at` (`updated_at` DESC) USING BTREE, + KEY `idx_articles_status_prompt_topic_id` (`status`,`prompt_workflow_id`,`topic`,`id`), + KEY `idx_status_author_updated_id` (`status`,`author_id`,`updated_at`,`id`), + KEY `idx_author_status_updated_id` (`author_id`,`status`,`updated_at`,`id`), + CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE=InnoDB AUTO_INCREMENT=535975 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_authors.sql b/db/split_tables/ai_authors.sql new file mode 100644 index 0000000..b41a8b0 --- /dev/null +++ b/db/split_tables/ai_authors.sql @@ -0,0 +1,31 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_authors +-- + +CREATE TABLE `ai_authors` ( + `id` int NOT NULL AUTO_INCREMENT, + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `app_id` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `app_token` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL DEFAULT '0', + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `title` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `hospital` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `specialty` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + `toutiao_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + `toutiao_images_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + `introduction` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + `avatar_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `cumulative_published_count` int DEFAULT '0' COMMENT '累计发文量(从起始日到stat_date的总和)', + `cumulative_revenue_sum` int DEFAULT '0' COMMENT '累计收入(从起始日到stat_date的总和)', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active', + `channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_ai_authors_status` (`status`), + KEY `idx_ai_authors_status_id` (`status`,`id`), + KEY `idx_ai_authors_department_id` (`department_id`) +) ENGINE=InnoDB AUTO_INCREMENT=392 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_batch_uploads.sql b/db/split_tables/ai_batch_uploads.sql new file mode 100644 index 0000000..c5a315c --- /dev/null +++ b/db/split_tables/ai_batch_uploads.sql @@ -0,0 +1,21 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_batch_uploads +-- + +CREATE TABLE `ai_batch_uploads` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int NOT NULL, + `file_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `file_path` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `total_count` int DEFAULT '0', + `success_count` int DEFAULT '0', + `failed_count` int DEFAULT '0', + `status` enum('processing','completed','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'processing', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `user_id` (`user_id`) USING BTREE, + CONSTRAINT `ai_batch_uploads_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_departments.sql b/db/split_tables/ai_departments.sql new file mode 100644 index 0000000..5d9ae4e --- /dev/null +++ b/db/split_tables/ai_departments.sql @@ -0,0 +1,13 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_departments +-- + +CREATE TABLE `ai_departments` ( + `id` int NOT NULL AUTO_INCREMENT, + `department_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_ai_departments_created_at` (`created_at` DESC) +) ENGINE=InnoDB AUTO_INCREMENT=110 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_image_tags.sql b/db/split_tables/ai_image_tags.sql new file mode 100644 index 0000000..ea997e1 --- /dev/null +++ b/db/split_tables/ai_image_tags.sql @@ -0,0 +1,33 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_image_tags +-- + +CREATE TABLE `ai_image_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `image_id` int NOT NULL, + `image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `tag_id` int NOT NULL, + `tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `keywords_id` int NOT NULL, + `keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_source` tinyint unsigned NOT NULL DEFAULT '1' COMMENT '1=clean_images|2=Flower_character', + `created_user_id` int NOT NULL DEFAULT '0', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `image_attached_article_count` int NOT NULL DEFAULT '0' COMMENT 'Number of articles the image is attached to', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_image_tag` (`image_id`,`tag_id`) USING BTREE, + KEY `tag_id` (`tag_id`) USING BTREE, + KEY `idx_id_desc` (`id` DESC), + KEY `idx_image_id_id` (`image_id`,`id` DESC), + KEY `idx_created_at` (`created_at` DESC), + KEY `idx_department_id` (`department_id`), + KEY `idx_keywords_id` (`keywords_id`), + KEY `idx_dept_keywords` (`department_id`,`keywords_id`), + CONSTRAINT `ai_image_tags_ibfk_2` FOREIGN KEY (`tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB AUTO_INCREMENT=29065 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_images.sql b/db/split_tables/ai_images.sql new file mode 100644 index 0000000..0ce3152 --- /dev/null +++ b/db/split_tables/ai_images.sql @@ -0,0 +1,25 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_images +-- + +CREATE TABLE `ai_images` ( + `id` int NOT NULL AUTO_INCREMENT, + `image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `thumbnail_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `image_type` enum('medical','lifestyle','instruction') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'medical', + `file_size` bigint DEFAULT NULL, + `width` int DEFAULT NULL, + `height` int DEFAULT NULL, + `upload_user_id` int NOT NULL, + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `upload_user_id` (`upload_user_id`) USING BTREE, + CONSTRAINT `ai_images_ibfk_1` FOREIGN KEY (`upload_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE=InnoDB AUTO_INCREMENT=47096 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_keywords.sql b/db/split_tables/ai_keywords.sql new file mode 100644 index 0000000..bd59f69 --- /dev/null +++ b/db/split_tables/ai_keywords.sql @@ -0,0 +1,15 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_keywords +-- + +CREATE TABLE `ai_keywords` ( + `id` int NOT NULL AUTO_INCREMENT, + `keywords_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL DEFAULT '0', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_ai_keywords_dept_created` (`department_id`,`created_at` DESC), + KEY `idx_ai_keywords_created_at` (`created_at` DESC) +) ENGINE=InnoDB AUTO_INCREMENT=417 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_prompt_workflow.sql b/db/split_tables/ai_prompt_workflow.sql new file mode 100644 index 0000000..826e7b6 --- /dev/null +++ b/db/split_tables/ai_prompt_workflow.sql @@ -0,0 +1,21 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_prompt_workflow +-- + +CREATE TABLE `ai_prompt_workflow` ( + `id` int NOT NULL AUTO_INCREMENT, + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `auth_token` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `workflow_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `workflow_type_id` int unsigned NOT NULL DEFAULT '0', + `workflow_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `created_user_id` int NOT NULL DEFAULT '0', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_created_user_time` (`created_user_id`,`created_at`) USING BTREE, + KEY `idx_created_at` (`created_at`) USING BTREE, + KEY `idx_workflow_id` (`workflow_id`) USING BTREE, + KEY `idx_prompt_workflow_name` (`prompt_workflow_name`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=27 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_query_category.sql b/db/split_tables/ai_query_category.sql new file mode 100644 index 0000000..5ddab76 --- /dev/null +++ b/db/split_tables/ai_query_category.sql @@ -0,0 +1,14 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_query_category +-- + +CREATE TABLE `ai_query_category` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active' COMMENT '状态', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_query_strategies.sql b/db/split_tables/ai_query_strategies.sql new file mode 100644 index 0000000..ca72e24 --- /dev/null +++ b/db/split_tables/ai_query_strategies.sql @@ -0,0 +1,20 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_query_strategies +-- + +CREATE TABLE `ai_query_strategies` ( + `id` int NOT NULL AUTO_INCREMENT, + `category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `query_type_id` int NOT NULL DEFAULT '0' COMMENT '类型ID', + `define_context` varchar(2048) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '定义上下文', + `for_example` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '案例', + `created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active', + PRIMARY KEY (`id`) USING BTREE, + KEY `query_type_id` (`query_type_id`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=136 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_query_type.sql b/db/split_tables/ai_query_type.sql new file mode 100644 index 0000000..b6205de --- /dev/null +++ b/db/split_tables/ai_query_type.sql @@ -0,0 +1,16 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_query_type +-- + +CREATE TABLE `ai_query_type` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID', + `category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active' COMMENT '状态', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=137 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_statistics.sql b/db/split_tables/ai_statistics.sql new file mode 100644 index 0000000..ade6115 --- /dev/null +++ b/db/split_tables/ai_statistics.sql @@ -0,0 +1,30 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_statistics +-- + +CREATE TABLE `ai_statistics` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT 'Auto-increment ID', + `author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin', + `date` date NOT NULL COMMENT 'Date of statistics', + `submission_count` int DEFAULT '0' COMMENT 'Number of submissions (投稿量)', + `read_count` int DEFAULT '0' COMMENT 'Number of reads (阅读量)', + `comment_count` int DEFAULT '0' COMMENT 'Number of comments (评论量)', + `comment_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Comment rate (评论率)', + `like_count` int DEFAULT '0' COMMENT 'Number of likes (点赞量)', + `like_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Like rate (点赞率)', + `favorite_count` int DEFAULT '0' COMMENT 'Number of favorites (收藏量)', + `favorite_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Favorite rate (收藏率)', + `share_count` int DEFAULT '0' COMMENT 'Number of shares (分享量)', + `share_rate` decimal(5,4) DEFAULT '0.0000' COMMENT 'Share rate (分享率)', + `slide_ratio` decimal(5,4) DEFAULT '0.0000' COMMENT 'Slide view ratio (滑图占比)', + `baidu_search_volume` int DEFAULT '0' COMMENT 'Baidu search volume (百度搜索量)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'Creation timestamp', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Update timestamp', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `unique_date` (`date`,`author_id`) USING BTREE, + KEY `idx_date` (`date`) USING BTREE, + KEY `idx_author_id` (`author_id`) +) ENGINE=InnoDB AUTO_INCREMENT=40720 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI Content Statistics'; diff --git a/db/split_tables/ai_statistics_day.sql b/db/split_tables/ai_statistics_day.sql new file mode 100644 index 0000000..5e8caf2 --- /dev/null +++ b/db/split_tables/ai_statistics_day.sql @@ -0,0 +1,30 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_statistics_day +-- + +CREATE TABLE `ai_statistics_day` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_date` date NOT NULL COMMENT '统计日期(天)', + `total_submission_count` int DEFAULT '0' COMMENT '投稿量(当日总计)', + `total_read_count` int DEFAULT '0' COMMENT '阅读量(当日总计)', + `total_comment_count` int DEFAULT '0' COMMENT '评论量(当日总计)', + `total_like_count` int DEFAULT '0' COMMENT '点赞量(当日总计)', + `total_favorite_count` int DEFAULT '0' COMMENT '收藏量(当日总计)', + `total_share_count` int DEFAULT '0' COMMENT '分享量(当日总计)', + `avg_comment_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '评论率(当日平均)', + `avg_like_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '点赞率(当日平均)', + `avg_favorite_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '收藏率(当日平均)', + `avg_share_rate` decimal(5,4) DEFAULT '0.0000' COMMENT '分享率(当日平均)', + `avg_slide_ratio` decimal(5,4) DEFAULT '0.0000' COMMENT '滑图占比(当日平均)', + `total_baidu_search_volume` int DEFAULT '0' COMMENT '百度搜索量(当日总计)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_stat_date` (`stat_date`,`author_id`) USING BTREE, + KEY `idx_stat_date` (`stat_date`) USING BTREE, + KEY `idx_author_id` (`author_id`) +) ENGINE=InnoDB AUTO_INCREMENT=41142 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每日汇总统计表'; diff --git a/db/split_tables/ai_statistics_days.sql b/db/split_tables/ai_statistics_days.sql new file mode 100644 index 0000000..8d18e31 --- /dev/null +++ b/db/split_tables/ai_statistics_days.sql @@ -0,0 +1,25 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_statistics_days +-- + +CREATE TABLE `ai_statistics_days` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_date` date NOT NULL COMMENT '统计日期(自然日)', + `daily_published_count` int DEFAULT '0' COMMENT '单日发文量', + `day_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当天收益(stat_date所在自然日)', + `cumulative_published_count` int DEFAULT '0' COMMENT '累计发文量(从起始日到stat_date的总和)', + `monthly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当月收益(stat_date所在自然月的总收益)', + `weekly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当周收益(stat_date所在自然周的总收益,周一至周日)', + `revenue_mom_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0))', + `revenue_wow_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0))', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_stat_date` (`stat_date`,`author_id`) USING BTREE, + KEY `idx_stat_date` (`stat_date`) USING BTREE, + KEY `idx_author_id` (`author_id`) +) ENGINE=InnoDB AUTO_INCREMENT=98484 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每日核心指标汇总表(含累计、收益及环比)'; diff --git a/db/split_tables/ai_statistics_monthly.sql b/db/split_tables/ai_statistics_monthly.sql new file mode 100644 index 0000000..5f2fdd5 --- /dev/null +++ b/db/split_tables/ai_statistics_monthly.sql @@ -0,0 +1,20 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_statistics_monthly +-- + +CREATE TABLE `ai_statistics_monthly` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_monthly` varchar(48) NOT NULL COMMENT '统计日期(自然月)', + `monthly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当月收益(stat_date所在自然月的总收益)', + `revenue_mom_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0))', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_author_stat_date` (`author_id`,`stat_monthly`) USING BTREE, + KEY `idx_stat_date` (`stat_monthly`) USING BTREE, + KEY `idx_author_id` (`author_id`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=41278 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每月核心指标汇总表(含累计、收益及环比)'; diff --git a/db/split_tables/ai_statistics_weekly.sql b/db/split_tables/ai_statistics_weekly.sql new file mode 100644 index 0000000..6a8bd3a --- /dev/null +++ b/db/split_tables/ai_statistics_weekly.sql @@ -0,0 +1,20 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_statistics_weekly +-- + +CREATE TABLE `ai_statistics_weekly` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT '0' COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT '1' COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_weekly` varchar(48) NOT NULL COMMENT '统计日期(自然周)', + `weekly_revenue` decimal(18,2) DEFAULT '0.00' COMMENT '当周收益(stat_date所在自然周的总收益,周一至周日)', + `revenue_wow_growth_rate` decimal(10,6) DEFAULT '0.000000' COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0))', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_author_stat_date` (`author_id`,`stat_weekly`) USING BTREE, + KEY `idx_stat_date` (`stat_weekly`) USING BTREE, + KEY `idx_author_id` (`author_id`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=47934 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC COMMENT='AI内容每周核心指标汇总表(含累计、收益及环比)'; diff --git a/db/split_tables/ai_tag_subsets.sql b/db/split_tables/ai_tag_subsets.sql new file mode 100644 index 0000000..d2e9369 --- /dev/null +++ b/db/split_tables/ai_tag_subsets.sql @@ -0,0 +1,18 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_tag_subsets +-- + +CREATE TABLE `ai_tag_subsets` ( + `id` int NOT NULL AUTO_INCREMENT, + `parent_tag_id` int NOT NULL, + `subset_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `subset_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `parent_tag_id` (`parent_tag_id`) USING BTREE, + CONSTRAINT `ai_tag_subsets_ibfk_1` FOREIGN KEY (`parent_tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB AUTO_INCREMENT=25903 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_tags.sql b/db/split_tables/ai_tags.sql new file mode 100644 index 0000000..bcb5333 --- /dev/null +++ b/db/split_tables/ai_tags.sql @@ -0,0 +1,18 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_tags +-- + +CREATE TABLE `ai_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `tag_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `tag_category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + `usage_count` int DEFAULT '0', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_tag_name` (`tag_name`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=13492 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_topic_type.sql b/db/split_tables/ai_topic_type.sql new file mode 100644 index 0000000..6c5f42b --- /dev/null +++ b/db/split_tables/ai_topic_type.sql @@ -0,0 +1,21 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_topic_type +-- + +CREATE TABLE `ai_topic_type` ( + `id` int NOT NULL AUTO_INCREMENT, + `topic_type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `type_id` int NOT NULL DEFAULT '0', + `type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `prompt_workflow_id` int unsigned NOT NULL DEFAULT '0', + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `created_user_id` int NOT NULL DEFAULT '0', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_created_user_time` (`created_user_id`,`created_at`) USING BTREE, + KEY `idx_created_at` (`created_at`) USING BTREE, + KEY `idx_type_id` (`type_id`) USING BTREE, + KEY `idx_topic_type_name` (`topic_type_name`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=28 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_user_authors.sql b/db/split_tables/ai_user_authors.sql new file mode 100644 index 0000000..0545cf1 --- /dev/null +++ b/db/split_tables/ai_user_authors.sql @@ -0,0 +1,16 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_user_authors +-- + +CREATE TABLE `ai_user_authors` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int unsigned NOT NULL DEFAULT '0', + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `author_id` int NOT NULL DEFAULT '0', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT '', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_user_author` (`user_id`,`author_id`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=15935 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_user_topics.sql b/db/split_tables/ai_user_topics.sql new file mode 100644 index 0000000..c34aa6f --- /dev/null +++ b/db/split_tables/ai_user_topics.sql @@ -0,0 +1,21 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_user_topics +-- + +CREATE TABLE `ai_user_topics` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int unsigned NOT NULL DEFAULT '0', + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `topic_type_id` int unsigned NOT NULL DEFAULT '0', + `topic_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `prompt_workflow_id` int NOT NULL DEFAULT '0', + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT '', + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'inactive', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + KEY `idx_topic_type_id` (`topic_type_id`) USING BTREE, + KEY `idx_prompt_workflow_id` (`prompt_workflow_id`) USING BTREE, + KEY `idx_created_at` (`created_at`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=127 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/ai_users.sql b/db/split_tables/ai_users.sql new file mode 100644 index 0000000..95623dd --- /dev/null +++ b/db/split_tables/ai_users.sql @@ -0,0 +1,20 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: ai_users +-- + +CREATE TABLE `ai_users` ( + `id` int NOT NULL AUTO_INCREMENT, + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `password` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `real_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `email` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL, + `role` enum('admin','editor','reviewer','publisher') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'editor', + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `uk_username` (`username`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=239 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ROW_FORMAT=DYNAMIC; diff --git a/db/split_tables/baidu_keyword.sql b/db/split_tables/baidu_keyword.sql new file mode 100644 index 0000000..6fb386b --- /dev/null +++ b/db/split_tables/baidu_keyword.sql @@ -0,0 +1,38 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: baidu_keyword +-- + +CREATE TABLE `baidu_keyword` ( + `id` int NOT NULL AUTO_INCREMENT, + `keyword` varchar(255) NOT NULL, + `crawled` tinyint DEFAULT '0', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `parents_id` int unsigned NOT NULL DEFAULT '0' COMMENT '父层级', + `seed_id` int unsigned NOT NULL DEFAULT '0' COMMENT '种子', + `seed_name` varchar(512) NOT NULL DEFAULT '' COMMENT '种子名称', + `department` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室', + `department_id` int unsigned NOT NULL DEFAULT '0' COMMENT '科室ID', + `partsof_speech` varchar(128) NOT NULL DEFAULT '' COMMENT '词性', + `partsof_speech_id` int unsigned NOT NULL DEFAULT '0' COMMENT '词性ID', + `type` varchar(128) NOT NULL DEFAULT '' COMMENT '类型', + `type_id` int unsigned NOT NULL DEFAULT '0' COMMENT '类型ID', + `yesorno_question` enum('yes','no','unprocessed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'unprocessed' COMMENT '是否是问题?', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `category_id` int NOT NULL DEFAULT '0' COMMENT '分类ID', + `query_type_id` int NOT NULL DEFAULT '0' COMMENT '类型ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `created_user_id` int NOT NULL DEFAULT '0' COMMENT '创建用户ID', + `query_summary_status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'ready', + `query_status` enum('draft','ready','doing','failed','finished','similarity','automated_review','manual_review','generate','published') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'draft' COMMENT 'query完整扭转流程状态', + `blocking_reason` varchar(255) NOT NULL DEFAULT '' COMMENT '审核不通过原因', + `article_id` int NOT NULL DEFAULT '0' COMMENT '文章ID', + `query_stage` enum('draft','created','summary','reviewed','generated','published') NOT NULL DEFAULT 'draft' COMMENT '分5个阶段,创建|总结|审核|生文|发布', + `status` enum('draft','available','unavailable','successful','failed') NOT NULL DEFAULT 'draft' COMMENT '状态_分2个阶段|可用|不可用|发布成功|发布失败', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `review_user_id` int NOT NULL DEFAULT '0' COMMENT '审核用户ID', + PRIMARY KEY (`id`), + UNIQUE KEY `keyword` (`keyword`), + KEY `idx_crawled_seed` (`crawled`,`seed_id`), + KEY `idx_created_at` (`created_at`) +) ENGINE=InnoDB AUTO_INCREMENT=798537 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; diff --git a/db/split_tables/baidu_seed_keywords.sql b/db/split_tables/baidu_seed_keywords.sql new file mode 100644 index 0000000..848c539 --- /dev/null +++ b/db/split_tables/baidu_seed_keywords.sql @@ -0,0 +1,15 @@ +-- SQL table definition +-- Generated from splitting a larger SQL file +-- Table: baidu_seed_keywords +-- + +CREATE TABLE `baidu_seed_keywords` ( + `id` int NOT NULL AUTO_INCREMENT, + `keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL, + `crawled` tinyint DEFAULT '0', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT 'ready', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE KEY `keyword` (`keyword`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=231 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC; diff --git a/export_approved_articles.py b/export_approved_articles.py new file mode 100644 index 0000000..0657f0b --- /dev/null +++ b/export_approved_articles.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +导出审核通过的文章内容和标签到CSV文件 +此脚本将从ai_articles表中导出status为approved的文章内容和标签 +""" + +import csv +import json +import os +from datetime import datetime +from log_config import setup_logger + + +def export_approved_articles_to_csv(output_file='approved_articles_export.csv'): + """ + 导出审核通过的文章内容和标签到CSV文件 + + Args: + output_file: 输出的CSV文件名 + """ + # 设置日志记录器 + logger = setup_logger('article_export', 'logs/article_export.log', 'logs/article_export_error.log') + + try: + # 从数据库获取真实数据 + from database_config import db_manager + + # 查询审核通过的文章,包含内容和标签 + sql = """ + SELECT id, title, content, coze_tag, created_at, updated_at + FROM ai_articles + WHERE status = 'approved' + ORDER BY id + """ + + logger.info("开始查询审核通过的文章数据...") + results = db_manager.execute_query(sql) + + if not results: + logger.warning("没有找到状态为approved的文章") + print("没有找到状态为approved的文章") + return + + logger.info(f"查询到 {len(results)} 条审核通过的文章") + print(f"查询到 {len(results)} 条审核通过的文章") + + # 准备输出目录 + output_dir = os.path.dirname(output_file) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # 写入CSV文件 + with open(output_file, 'w', newline='', encoding='utf-8-sig') as csvfile: + fieldnames = ['ID', '标题', '内容', '标签', '创建时间', '更新时间'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + # 写入表头 + writer.writeheader() + + # 写入数据 + for row in results: + id_val, title, content, coze_tag, created_at, updated_at = row + + # 尝试解析标签,如果是JSON格式则转换为字符串 + parsed_tags = coze_tag + if coze_tag: + try: + # 尝试解析JSON格式的标签 + tags_data = json.loads(coze_tag) + if isinstance(tags_data, list): + parsed_tags = ', '.join(tags_data) + elif isinstance(tags_data, dict): + # 如果是字典格式,提取值 + parsed_tags = ', '.join(str(v) for v in tags_data.values()) + except json.JSONDecodeError: + # 如果不是JSON格式,保持原样 + parsed_tags = coze_tag + + writer.writerow({ + 'ID': id_val, + '标题': title, + '内容': content, + '标签': parsed_tags or '', + '创建时间': created_at.strftime('%Y-%m-%d %H:%M:%S') if created_at else '', + '更新时间': updated_at.strftime('%Y-%m-%d %H:%M:%S') if updated_at else '' + }) + + logger.info(f"成功导出 {len(results)} 条文章到 {output_file}") + print(f"成功导出 {len(results)} 条文章到 {output_file}") + + except Exception as e: + logger.error(f"导出文章数据时发生错误: {e}", exc_info=True) + print(f"导出文章数据时发生错误: {e}") + raise + + +def test_db_connection(): + """ + 测试数据库连接 + """ + try: + from database_config import db_manager + # 尝试执行一个简单的查询来测试连接 + test_sql = "SELECT 1 as test" + result = db_manager.execute_query(test_sql) + print("数据库连接测试成功:", result) + return True + except Exception as e: + print(f"数据库连接测试失败: {e}") + return False + + +if __name__ == "__main__": + # 创建logs目录 + if not os.path.exists('logs'): + os.makedirs('logs') + + # 检查命令行参数 + import sys + if len(sys.argv) > 1: + output_filename = sys.argv[1] + else: + output_filename = 'approved_articles_export.csv' + + # 测试数据库连接 + print("正在测试数据库连接...") + if not test_db_connection(): + print("数据库连接失败,请检查数据库配置。") + print("请确认以下信息:") + print("- 数据库服务器是否正常运行") + print("- 数据库地址、用户名、密码是否正确") + print("- 网络连接是否正常") + print("- 用户是否有查询ai_articles表的权限") + exit(1) + + export_approved_articles_to_csv(output_filename) \ No newline at end of file diff --git a/export_image_tags.py b/export_image_tags.py new file mode 100644 index 0000000..c7e5b5d --- /dev/null +++ b/export_image_tags.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +导出符合条件的图像标签数据到CSV文件 +导出条件:image_attached_article_count < 5 +""" + +import csv +import os +from datetime import datetime +from database_config import db_manager +from log_config import setup_logger + + +def export_image_tags_to_csv(output_file='image_tags_filtered.csv'): + """ + 导出符合条件的图像标签数据到CSV文件 + + Args: + output_file: 输出的CSV文件名 + """ + # 设置日志记录器 + logger = setup_logger('image_tags_export', 'logs/image_tags_export.log', 'logs/image_tags_export_error.log') + + # 从数据库获取真实数据 + from database_config import db_manager + + # 查询符合条件的图像标签数据 + sql = """ + SELECT id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name, + keywords_id, keywords_name, department_id, department_name, image_source, + created_user_id, created_at, updated_at, image_attached_article_count + FROM ai_image_tags + WHERE image_attached_article_count < 5 + ORDER BY id + """ + + logger.info("开始查询符合条件的图像标签数据...") + results = db_manager.execute_query(sql) + + if not results: + logger.warning("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)") + print("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)") + return + + logger.info(f"查询到 {len(results)} 条符合条件的图像标签数据") + print(f"查询到 {len(results)} 条符合条件的图像标签数据") + + # 准备输出目录 + output_dir = os.path.dirname(output_file) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # 写入CSV文件 + with open(output_file, 'w', newline='', encoding='utf-8-sig') as csvfile: + fieldnames = [ + 'ID', '图像ID', '图像名称', '图像URL', '缩略图URL', '标签ID', '标签名称', + '关键词ID', '关键词名称', '部门ID', '部门名称', '图像来源', + '创建用户ID', '创建时间', '更新时间', '附加文章数量' + ] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + # 写入表头 + writer.writeheader() + + # 写入数据 + for row in results: + ( + id_val, image_id, image_name, image_url, image_thumb_url, + tag_id, tag_name, keywords_id, keywords_name, + department_id, department_name, image_source, + created_user_id, created_at, updated_at, image_attached_article_count + ) = row + + writer.writerow({ + 'ID': id_val, + '图像ID': image_id, + '图像名称': image_name, + '图像URL': image_url, + '缩略图URL': image_thumb_url, + '标签ID': tag_id, + '标签名称': tag_name, + '关键词ID': keywords_id, + '关键词名称': keywords_name, + '部门ID': department_id, + '部门名称': department_name, + '图像来源': image_source, + '创建用户ID': created_user_id, + '创建时间': created_at.strftime('%Y-%m-%d %H:%M:%S') if created_at else '', + '更新时间': updated_at.strftime('%Y-%m-%d %H:%M:%S') if updated_at else '', + '附加文章数量': image_attached_article_count + }) + + logger.info(f"成功导出 {len(results)} 条图像标签数据到 {output_file}") + print(f"成功导出 {len(results)} 条图像标签数据到 {output_file}") + + +def test_db_connection(): + """ + 测试数据库连接 + """ + try: + # 尝试执行一个简单的查询来测试连接 + test_sql = "SELECT 1 as test" + result = db_manager.execute_query(test_sql) + print("数据库连接测试成功:", result) + return True + except Exception as e: + print(f"数据库连接测试失败: {e}") + return False + + +if __name__ == "__main__": + # 创建logs目录 + if not os.path.exists('logs'): + os.makedirs('logs') + + # 检查数据库连接 + print("正在测试数据库连接...") + if not test_db_connection(): + print("数据库连接失败,请检查数据库配置。") + print("请确认以下信息:") + print("- 数据库服务器是否正常运行") + print("- 数据库地址、用户名、密码是否正确") + print("- 网络连接是否正常") + print("- 用户是否有查询ai_image_tags表的权限") + exit(1) + + # 默认输出文件名 + output_filename = 'image_tags_filtered.csv' + + # 可以从命令行参数获取输出文件名 + import sys + if len(sys.argv) > 1: + output_filename = sys.argv[1] + + export_image_tags_to_csv(output_filename) \ No newline at end of file diff --git a/generated_image.png b/generated_image.png new file mode 100644 index 0000000..7fb674a Binary files /dev/null and b/generated_image.png differ diff --git a/generated_image_3679d898-fab5-41b2-97c7-9ccd7168d0fc.png b/generated_image_3679d898-fab5-41b2-97c7-9ccd7168d0fc.png new file mode 100644 index 0000000..a2a0bda Binary files /dev/null and b/generated_image_3679d898-fab5-41b2-97c7-9ccd7168d0fc.png differ diff --git a/generated_image_6d5ade2f-633c-4782-93c2-6c8247ea5dee.png b/generated_image_6d5ade2f-633c-4782-93c2-6c8247ea5dee.png new file mode 100644 index 0000000..d57e61b Binary files /dev/null and b/generated_image_6d5ade2f-633c-4782-93c2-6c8247ea5dee.png differ diff --git a/generated_image_93dfbfee-d664-4778-abc1-c9f3ef080de4.png b/generated_image_93dfbfee-d664-4778-abc1-c9f3ef080de4.png new file mode 100644 index 0000000..29f1414 Binary files /dev/null and b/generated_image_93dfbfee-d664-4778-abc1-c9f3ef080de4.png differ diff --git a/log_config.py b/log_config.py new file mode 100644 index 0000000..b0a05c7 --- /dev/null +++ b/log_config.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +统一日志配置模块 +提供按日期自动切割日志文件的功能 +""" + +import os +import logging +import sys +from logging.handlers import TimedRotatingFileHandler +from datetime import datetime + +def setup_logger(name, log_file, error_log_file=None, level=logging.INFO, + backup_count=30, error_backup_count=90, console_output=True, force_reinit=False): + """ + 设置日志记录器,支持按日期自动切割 + + Args: + name: 日志记录器名称 + log_file: 主日志文件路径 + error_log_file: 错误日志文件路径(可选) + level: 日志级别 + backup_count: 主日志文件保留天数 + error_backup_count: 错误日志文件保留天数 + console_output: 是否输出到控制台 + force_reinit: 是否强制重新初始化(删除现有handlers) + + Returns: + logging.Logger: 配置好的日志记录器 + """ + # 创建logs目录 + log_dir = os.path.dirname(log_file) + if log_dir and not os.path.exists(log_dir): + os.makedirs(log_dir) + + # 获取或创建logger + logger = logging.getLogger(name) + logger.setLevel(level) + + # 检查是否需要重新初始化 + need_reinit = force_reinit or not logger.handlers + + # 如果强制重新初始化或没有handlers,则清除现有handlers + if force_reinit and logger.handlers: + print(f"强制重新初始化日志记录器: {name}") + for handler in logger.handlers[:]: # 使用切片创建副本 + logger.removeHandler(handler) + need_reinit = True + + # 如果没有handlers,则添加新的handlers + if need_reinit: + # 创建日志格式 + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + + # 1. 主日志文件处理器 - 按日期切割 + file_handler = TimedRotatingFileHandler( + filename=log_file, + when='midnight', # 每天午夜切割 + interval=1, # 每1天切割一次 + backupCount=backup_count, # 保留天数 + encoding='utf-8' + ) + file_handler.setLevel(level) + file_handler.setFormatter(formatter) + + # 设置切割后的文件名格式:filename.log.2025-07-21 + file_handler.suffix = "%Y-%m-%d" + + # 自定义文件名生成函数,确保格式正确 + def namer(default_name): + # 确保文件名格式为 filename.log.2025-07-21 + return default_name + file_handler.namer = namer + + # 添加主日志处理器 + logger.addHandler(file_handler) + + # 2. 错误日志文件处理器(如果指定) + if error_log_file: + error_file_handler = TimedRotatingFileHandler( + filename=error_log_file, + when='midnight', + interval=1, + backupCount=error_backup_count, # 错误日志保留更长时间 + encoding='utf-8' + ) + error_file_handler.setLevel(logging.ERROR) + error_file_handler.setFormatter(formatter) + error_file_handler.suffix = "%Y-%m-%d" + error_file_handler.namer = namer + logger.addHandler(error_file_handler) + + # 3. 控制台处理器(如果启用) + if console_output: + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(level) + console_formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%H:%M:%S' + ) + console_handler.setFormatter(console_formatter) + logger.addHandler(console_handler) + + # 设置第三方库的日志级别 + logging.getLogger('requests').setLevel(logging.WARNING) + logging.getLogger('urllib3').setLevel(logging.WARNING) + logging.getLogger('whoosh').setLevel(logging.WARNING) + + # 记录日志系统启动信息 + logger.info(f"日志系统已启动 - 记录器: {name}") + logger.info(f"主日志文件: {log_file}") + if error_log_file: + logger.info(f"错误日志文件: {error_log_file}") + logger.info(f"日志保留策略: 每天午夜分割,主日志保留{backup_count}天") + if error_log_file: + logger.info(f"错误日志保留策略: 每天午夜分割,保留{error_backup_count}天") + + return logger + +def setup_curl_convert_logger(force_reinit=False): + """设置curl_convert.py的日志记录器""" + return setup_logger( + name='curl_convert', + log_file='logs/curl_convert.log', + error_log_file='logs/curl_convert_error.log', + level=logging.INFO, + backup_count=30, + error_backup_count=90, + console_output=True, + force_reinit=force_reinit + ) + +def setup_article_server_logger(force_reinit=False): + """设置flask_article_server.py的日志记录器""" + return setup_logger( + name='article_server', + log_file='logs/article_server.log', + error_log_file='logs/article_error.log', + level=logging.INFO, + backup_count=3, + error_backup_count=9, + console_output=True, + force_reinit=force_reinit + ) + +def setup_article_server_search_logger(force_reinit=False): + """设置flask_article_server_search.py的日志记录器""" + return setup_logger( + name='article_server_search', + log_file='logs/article_server_search.log', + error_log_file='logs/article_server_search_error.log', + level=logging.INFO, + backup_count=3, + error_backup_count=9, + console_output=True, + force_reinit=force_reinit + ) + +def setup_aiarticle_server_logger(force_reinit=False): + """设置flask_aiarticle_server.py的日志记录器""" + return setup_logger( + name='aiarticle_server', + log_file='logs/aiarticle_server.log', + error_log_file='logs/aiarticle_server_error.log', + level=logging.INFO, + backup_count=30, + error_backup_count=90, + console_output=True, + force_reinit=force_reinit + ) + +def setup_whoosh_search_tags_logger(force_reinit=False): + """设置whoosh_search_tags.py的日志记录器""" + return setup_logger( + name='whoosh_search_tags', + log_file='logs/whoosh_search_tags.log', + error_log_file='logs/whoosh_search_tags_error.log', + level=logging.INFO, + backup_count=30, + error_backup_count=90, + console_output=True, + force_reinit=force_reinit + ) + +def setup_baidu_crawl_logger(force_reinit=False): + """设置baidu_crawl.py的日志记录器""" + return setup_logger( + name='baidu_crawl', + log_file='logs/baidu_crawl.log', + error_log_file='logs/baidu_crawl_error.log', + level=logging.INFO, + backup_count=3, + error_backup_count=3, + console_output=True, + force_reinit=force_reinit + ) + +def setup_baidu_seed_logger(force_reinit=False): + """设置baidu_seed.py的日志记录器""" + return setup_logger( + name='baidu_seed', + log_file='logs/baidu_seed.log', + error_log_file='logs/baidu_seed_error.log', + level=logging.INFO, + backup_count=3, + error_backup_count=3, + console_output=True, + force_reinit=force_reinit + ) + +def setup_baidu_crawl_again_logger(force_reinit=False): + """设置baidu_seed.py的日志记录器""" + return setup_logger( + name='baidu_crawl_again', + log_file='logs/baidu_crawl_again.log', + error_log_file='logs/baidu_crawl_again_error.log', + level=logging.INFO, + backup_count=3, + error_backup_count=3, + console_output=True, + force_reinit=force_reinit + ) + +def reinitialize_all_loggers(): + """重新初始化所有日志记录器""" + print("重新初始化所有日志记录器...") + + # 重新初始化所有日志记录器 + setup_curl_convert_logger(force_reinit=True) + setup_article_server_logger(force_reinit=True) + setup_article_server_search_logger(force_reinit=True) + setup_aiarticle_server_logger(force_reinit=True) + setup_whoosh_search_tags_logger(force_reinit=True) + setup_baidu_crawl_logger(force_reinit=True) + setup_baidu_seed_logger(force_reinit=True) + + print("所有日志记录器重新初始化完成") + +def cleanup_old_logs(log_dir='logs', days_to_keep=30): + """ + 清理旧的日志文件 + + Args: + log_dir: 日志目录 + days_to_keep: 保留天数 + """ + import glob + from datetime import datetime, timedelta + + if not os.path.exists(log_dir): + return + + cutoff_date = datetime.now() - timedelta(days=days_to_keep) + + # 查找所有日志文件 + log_patterns = [ + os.path.join(log_dir, '*.log.*'), # 切割后的日志文件 + os.path.join(log_dir, '*.log') # 当前日志文件 + ] + + for pattern in log_patterns: + for log_file in glob.glob(pattern): + try: + # 获取文件修改时间 + file_mtime = datetime.fromtimestamp(os.path.getmtime(log_file)) + if file_mtime < cutoff_date: + os.remove(log_file) + print(f"已删除旧日志文件: {log_file}") + except Exception as e: + print(f"删除日志文件失败 {log_file}: {e}") + +def get_log_file_info(log_dir='logs'): + """ + 获取日志文件信息 + + Args: + log_dir: 日志目录 + + Returns: + dict: 日志文件信息 + """ + if not os.path.exists(log_dir): + return {} + + log_info = {} + + for filename in os.listdir(log_dir): + if filename.endswith('.log'): + file_path = os.path.join(log_dir, filename) + try: + size = os.path.getsize(file_path) + mtime = datetime.fromtimestamp(os.path.getmtime(file_path)) + log_info[filename] = { + 'size': size, + 'size_mb': round(size / (1024 * 1024), 2), + 'modified': mtime.strftime('%Y-%m-%d %H:%M:%S'), + 'path': file_path + } + except Exception as e: + log_info[filename] = {'error': str(e)} + + return log_info + +if __name__ == "__main__": + # 测试日志配置 + print("测试日志配置...") + + # 测试各个日志记录器 + logger1 = setup_curl_convert_logger() + logger1.info("curl_convert 日志测试") + + logger2 = setup_article_server_logger() + logger2.info("article_server 日志测试") + + logger3 = setup_article_server_search_logger() + logger3.info("article_server_search 日志测试") + + logger4 = setup_aiarticle_server_logger() + logger4.info("aiarticle_server 日志测试") + + logger5 = setup_whoosh_search_tags_logger() + logger5.info("whoosh_search_tags 日志测试") + + # 显示日志文件信息 + print("\n当前日志文件信息:") + log_info = get_log_file_info() + for filename, info in log_info.items(): + if 'error' not in info: + print(f"{filename}: {info['size_mb']}MB, 修改时间: {info['modified']}") + else: + print(f"{filename}: 错误 - {info['error']}") + + print("\n日志配置测试完成!") \ No newline at end of file diff --git a/match_article_images.py b/match_article_images.py new file mode 100644 index 0000000..2eaf13a --- /dev/null +++ b/match_article_images.py @@ -0,0 +1,910 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +文章与图片智能挂靠脚本 +根据文章标签匹配ai_image_tags表中的图片,使用大模型进行处理, +如果挂靠失败或没有相同标签的图片,则使用Gemini生成图片 +""" + +import json +import os +import re +import requests +import csv +import pymysql +from typing import List, Dict, Tuple, Optional +from collections import defaultdict +from database_config import db_manager +from log_config import setup_logger +import time +import random +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + + +def get_articles_with_tags_from_db() -> List[Dict]: + """ + 从数据库获取文章及其标签 + + Returns: + 包含文章信息的字典列表 + """ + # 设置日志记录器 + logger = setup_logger('article_matching', 'logs/article_matching.log', 'logs/article_matching_error.log') + + articles = [] + + try: + # 查询审核通过的文章,包含内容和标签 + sql = """ + SELECT id, title, content, coze_tag + FROM ai_articles + WHERE status = 'approved' + ORDER BY id + """ + + logger.info("开始查询审核通过的文章数据...") + results = db_manager.execute_query(sql) + + if not results: + logger.warning("没有找到状态为approved的文章") + print("没有找到状态为approved的文章") + return articles + + logger.info(f"查询到 {len(results)} 条审核通过的文章") + print(f"查询到 {len(results)} 条审核通过的文章") + + for row in results: + article_id, title, content, coze_tag = row + + # 解析标签 + tags = [] + if coze_tag: + try: + # 尝试解析JSON格式的标签 + tags_data = json.loads(coze_tag) + if isinstance(tags_data, list): + tags = tags_data + elif isinstance(tags_data, dict): + # 如果是字典格式,提取值 + tags = list(tags_data.values()) if isinstance(list(tags_data.values())[0], list) else list(tags_data.values()) + else: + # 如果是字符串,尝试按逗号分割 + tags = [tag.strip() for tag in str(tags_data).split(',') if tag.strip()] + except json.JSONDecodeError: + # 如果不是JSON格式,按逗号分割 + tags = [tag.strip() for tag in str(coze_tag).split(',') if tag.strip()] + + articles.append({ + 'id': article_id, + 'title': title, + 'content': content, + 'tags': tags + }) + except Exception as e: + logger.error(f"从数据库获取文章数据时发生错误: {e}", exc_info=True) + print(f"从数据库获取文章数据时发生错误: {e}") + raise + + return articles + + +def get_images_by_tags_from_db(tags: List[str] = [], used_counts: Dict[str, int] = {}) -> List[Dict]: + """ + 从数据库根据标签获取图片 + + Args: + tags: 标签列表 + used_counts: 已使用次数的字典,key为图片ID,value为使用次数 + + Returns: + 包含图片信息的字典列表 + """ + if not tags: + return [] + + # 设置日志记录器 + logger = setup_logger('article_matching', 'logs/article_matching.log', 'logs/article_matching_error.log') + + images = [] + + try: + # 查询符合条件的图像标签数据 + sql = """ + SELECT id, image_id, image_name, image_url, tag_name, keywords_name, department_name, image_attached_article_count + FROM ai_image_tags + WHERE image_attached_article_count < 5 + ORDER BY id + """ + + logger.info("开始查询符合条件的图像标签数据...") + results = db_manager.execute_query(sql) + + if not results: + logger.warning("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)") + print("没有找到符合条件的图像标签数据 (image_attached_article_count < 5)") + return images + + logger.info(f"查询到 {len(results)} 条符合条件的图像标签数据") + print(f"查询到 {len(results)} 条符合条件的图像标签数据") + + for row in results: + ( + image_id, db_image_id, image_name, image_url, tag_name, + keywords_name, department_name, base_count + ) = row + + # 检查图片的附加文章数量是否小于5,考虑已使用次数 + used_count = used_counts.get(str(image_id), 0) + total_count = base_count + used_count + + if total_count >= 5: + continue + + # 检查标签是否匹配 + if any(tag.lower() in tag_name.lower() for tag in tags): + images.append({ + 'id': str(image_id), + 'image_id': db_image_id, + 'image_name': image_name, + 'image_url': image_url, + 'tag_name': tag_name, + 'keywords_name': keywords_name, + 'department_name': department_name, + 'base_count': base_count + }) + except Exception as e: + logger.error(f"从数据库获取图片数据时发生错误: {e}", exc_info=True) + print(f"从数据库获取图片数据时发生错误: {e}") + raise + + print(f"从数据库找到 {len(images)} 张符合条件的匹配图片") + return images + + +def call_qwen_model(article: Dict, image_urls: List[str]) -> bool: + """ + 调用通义千问大模型进行文章与图片挂靠评估 + + Args: + article: 文章信息 + image_urls: 图片URL列表 + + Returns: + 挂靠是否成功 + """ + # 通义千问API配置 + api_key = "sk-e6a38204022a4b538b8954f0584712af" + api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation" + + # 构建请求内容 + content = f""" + 请评估以下文章与图片的匹配度: + + 文章标题: {article['title']} + 文章内容: {article['content'][:500]}... # 限制内容长度 + + 图片URLs: {', '.join(image_urls)} + + 请判断这些图片是否适合用于这篇文章。如果匹配度高,请回复"匹配成功";如果匹配度低,请回复"匹配失败"。 + """ + + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + + payload = { + "model": "qwen-max", # 或其他合适的模型 + "input": { + "messages": [ + { + "role": "user", + "content": content + } + ] + }, + "parameters": { + "temperature": 0.7 + } + } + + try: + response = requests.post(api_url, headers=headers, json=payload) + + if response.status_code == 200: + result = response.json() + # 解析响应,判断匹配结果 + if 'output' in result and 'text' in result['output']: + response_text = result['output']['text'].lower() + # 根据响应内容判断是否匹配 + if '匹配成功' in response_text or '是的' in response_text or '合适' in response_text: + print(f"通义千问评估结果: 匹配成功 - 文章 '{article['title']}'") + return True + else: + print(f"通义千问评估结果: 匹配失败 - 文章 '{article['title']}'") + return False + else: + print(f"通义千问API响应格式异常: {result}") + return False + else: + print(f"通义千问API调用失败: {response.status_code} - {response.text}") + # API调用失败时,仍然尝试匹配,这里返回False触发图片生成 + return False + + except Exception as e: + print(f"调用通义千问API时发生错误: {e}") + # 发生错误时,返回False以触发图片生成 + return False + + +def insert_generated_image_to_db(image_name: str, image_url: str, article_tags: List[str]) -> Optional[Dict]: + """ + 将Gemini生成的图片信息插入数据库 + + Args: + image_name: 图片文件名,如 "1755310671174988.png" + image_url: 图片URL路径,如 "20250816/1755310671174988.png" + article_tags: 文章标签列表,用于查询department和keywords + + Returns: + 包含插入信息的字典:{ + 'tag_image_id': tag_image_id, + 'image_id': image_id, + 'image_url': image_url, + 'image_thumb_url': image_thumb_url, + 'keywords_id': keywords_id, + 'keywords_name': keywords_name, + 'department_id': department_id, + 'department_name': department_name + } + """ + connection = db_manager.get_connection() + if connection is None: + print("无法连接到数据库") + return None + + try: + with connection.cursor(pymysql.cursors.DictCursor) as cursor: + # 1. 根据文章标签查询ai_image_tags表,获取department和keywords信息 + if article_tags: + # 使用第一个标签查询 + query = """ + SELECT department_name, keywords_name, department_id, keywords_id, tag_id + FROM ai_image_tags + WHERE tag_name = %s + LIMIT 1 + """ + cursor.execute(query, (article_tags[0],)) + tag_info = cursor.fetchone() + + if tag_info: + department = tag_info['department_name'] + keywords = tag_info['keywords_name'] + department_id = tag_info['department_id'] + keywords_id = tag_info['keywords_id'] + tag_id = tag_info['tag_id'] + tag_name = article_tags[0] + else: + # 如果没有找到,使用默认值 + department = "AI生成" + keywords = "AI图片" + department_id = 1 + keywords_id = 1 + tag_id = 1 + tag_name = article_tags[0] if article_tags else "AI生成" + else: + # 没有标签,使用默认值 + department = "AI生成" + keywords = "AI图片" + department_id = 1 + keywords_id = 1 + tag_id = 1 + tag_name = "AI生成" + + # 2. 插入ai_images表 + insert_image_query = """ + INSERT INTO ai_images + (image_name, image_url, image_thumb_url, department, keywords, image_type, upload_user_id, status) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s) + """ + cursor.execute(insert_image_query, ( + image_name, + image_url, + '', # image_thumb_url + department, + keywords, + 'medical', # image_type + 1, # upload_user_id(默认用户ID) + 'active' # status + )) + image_id = cursor.lastrowid + print(f"图片信息已插入ai_images表,image_id: {image_id}") + + # 3. 插入ai_image_tags表 + insert_tag_query = """ + INSERT INTO ai_image_tags + (image_id, image_name, image_url, image_thumb_url, tag_id, tag_name, + keywords_id, keywords_name, department_id, department_name, + image_source, created_user_id, image_attached_article_count) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + """ + cursor.execute(insert_tag_query, ( + image_id, + image_name, + image_url, + '', # image_thumb_url + tag_id, + tag_name, + keywords_id, + keywords, + department_id, + department, + 3, # image_source: 3表示AI生成 + 1, # created_user_id + 0 # image_attached_article_count + )) + tag_image_id = cursor.lastrowid + print(f"图片标签信息已插入ai_image_tags表,tag_image_id: {tag_image_id}") + + # 提交事务 + connection.commit() + + # 返回包含所有需要信息的字典 + return { + 'tag_image_id': tag_image_id, + 'image_id': image_id, + 'image_url': image_url, + 'image_thumb_url': '', + 'keywords_id': keywords_id, + 'keywords_name': keywords, + 'department_id': department_id, + 'department_name': department + } + + except Exception as e: + print(f"插入图片信息到数据库失败: {e}") + connection.rollback() + return None + finally: + connection.close() + + +def insert_article_image_relation(article_id: int, image_id: int, image_url: str, image_thumb_url: str, + tag_image_id: int, keywords_id: int, keywords_name: str, + department_id: int, department_name: str, image_source: int = 0) -> Optional[int]: + """ + 将文章与图片的关联信息插入ai_article_images表 + + Args: + article_id: 文章ID + image_id: 图片ID(ai_images表的id) + image_url: 图片URL + image_thumb_url: 缩略图URL + tag_image_id: 图片标签ID(ai_image_tags表的id) + keywords_id: 关键词ID + keywords_name: 关键词名称 + department_id: 部门ID + department_name: 部门名称 + image_source: 图片来源(0表示默认) + + Returns: + 插入的ai_article_images表的ID + """ + connection = db_manager.get_connection() + if connection is None: + print("无法连接到数据库") + return None + + try: + with connection.cursor(pymysql.cursors.DictCursor) as cursor: + # 1. 查询当前文章下已有图片的最大sort_order + query_max_sort = """ + SELECT COALESCE(MAX(sort_order), 0) as max_sort_order + FROM ai_article_images + WHERE article_id = %s + """ + cursor.execute(query_max_sort, (article_id,)) + result = cursor.fetchone() + max_sort_order = result['max_sort_order'] if result else 0 + new_sort_order = max_sort_order + 1 + + print(f"文章 {article_id} 当前最大sort_order: {max_sort_order}, 新图片sort_order: {new_sort_order}") + + # 2. 插入ai_article_images表 + insert_query = """ + INSERT INTO ai_article_images + (article_id, image_id, image_url, image_thumb_url, image_tag_id, sort_order, + keywords_id, keywords_name, department_id, department_name, image_source) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + """ + cursor.execute(insert_query, ( + article_id, + image_id, + image_url, + image_thumb_url, + tag_image_id, + new_sort_order, + keywords_id, + keywords_name, + department_id, + department_name, + image_source + )) + article_image_id = cursor.lastrowid + print(f"文章图片关联信息已插入ai_article_images表,id: {article_image_id}") + + # 提交事务 + connection.commit() + + return article_image_id + + except Exception as e: + print(f"插入文章图片关联信息失败: {e}") + connection.rollback() + return None + finally: + connection.close() + + +def generate_image_with_gemini(prompt: str, article_tags: List[str], article_id: int) -> str: + """ + 使用Gemini生成图片并上传到服务器 + + Args: + prompt: 图片生成提示词 + article_tags: 文章标签列表,用于查询department和keywords + article_id: 文章ID,用于关联图片 + + Returns: + 上传后的图片URL + """ + # 导入必要的库 + try: + from google import genai + from google.genai import types + from google.genai.client import HttpOptions + + except ImportError: + print("错误:未安装google-genai库,请运行 'pip install google-genai' 进行安装") + raise + + client = genai.Client(http_options=HttpOptions(base_url="https://work.poloapi.com"), + api_key="sk-V4tPnDgzFPa7nxWrvKnNJsW8ZcBXXPuGmjfgvPVRnwpHoeob") + + print(f"正在调用Gemini API生成图片,提示词: {prompt[:50]}...") + + # 生成内容 + response = client.models.generate_content( + model="gemini-3-pro-image-preview", + contents=[prompt], + ) + + # 检查是否有候选答案 + if not response.candidates: + raise Exception("Gemini API未返回任何候选答案") + + # 处理响应 - 遍历第一个候选答案的内容部分 + candidate = response.candidates[0] + if not candidate.content or not candidate.content.parts: + raise Exception("Gemini API返回的候选答案中没有内容部分") + + for part in candidate.content.parts: + if hasattr(part, 'text') and part.text is not None: + print(f"Gemini响应文本: {part.text}") + elif hasattr(part, 'inline_data') and part.inline_data is not None: + image_data = part.inline_data + if image_data.data is not None: + # 生成唯一的文件名(基于时间戳) + import time + import os + from datetime import datetime + + timestamp_ms = int(time.time() * 1000) # 毫秒级时间戳 + image_filename = f"{timestamp_ms}.png" + today_date = datetime.now().strftime("%Y%m%d") + image_url_path = f"{today_date}/{image_filename}" + + temp_filename = f"temp_generated_image_{timestamp_ms}.png" + # 保存图片数据到临时文件 + with open(temp_filename, 'wb') as f: + f.write(image_data.data) + print(f"Gemini生成图片成功: {temp_filename}") + + # 先将图片信息插入数据库,获取相关信息 + image_info = insert_generated_image_to_db(image_filename, image_url_path, article_tags) + + if not image_info: + raise Exception("插入图片信息到数据库失败") + + print(f"图片信息已插入数据库,tag_image_id: {image_info['tag_image_id']}, image_id: {image_info['image_id']}") + + # 使用tag_image_id上传图片到服务器 + uploaded_url = upload_image_to_server(temp_filename, image_info['tag_image_id']) + + # 将文章与图片的关联信息插入ai_article_images表 + article_image_id = insert_article_image_relation( + article_id=article_id, + image_id=image_info['image_id'], + image_url=image_info['image_url'], + image_thumb_url=image_info['image_thumb_url'], + tag_image_id=image_info['tag_image_id'], + keywords_id=image_info['keywords_id'], + keywords_name=image_info['keywords_name'], + department_id=image_info['department_id'], + department_name=image_info['department_name'], + image_source=0 # 默认值 + ) + + if article_image_id: + print(f"文章图片关联信息已创建,ai_article_images.id: {article_image_id}") + + # 删除临时文件 + os.remove(temp_filename) + + print(f"图片已上传到服务器: {uploaded_url}") + # 返回上传后的图片URL + return uploaded_url + + # 如果没有返回图片数据,抛出异常 + raise Exception("Gemini API未返回有效的图片数据") + + +def upload_image_to_server(image_path: str, tag_image_id: int) -> str: + """ + 上传图片到服务器 + + Args: + image_path: 本地图片路径 + tag_image_id: 图片标签ID + + Returns: + 服务器上的图片URL + """ + import requests + import json + + # 登录获取JWT token + base_url = "http://47.99.184.230:8324" # 使用外网API地址 + jwt_token = login_and_get_jwt_token(base_url) + + if not jwt_token: + raise Exception("获取JWT token失败,无法上传图片") + + # 准备上传请求 + upload_url = f"{base_url}/api/images/upload" + headers = { + 'Authorization': f'Bearer {jwt_token}', + } + + # 读取图片文件 + with open(image_path, 'rb') as image_file: + files = {'file': image_file} + data = {'tag_image_id': tag_image_id} # 添加必传参数 + + response = requests.post(upload_url, headers=headers, files=files, data=data) + + print(f"图片上传响应状态码: {response.status_code}") + print(f"图片上传响应内容: {response.text}") + + if response.status_code == 200: + result = response.json() + if result.get('code') == 200: + # 返回服务器上的图片URL + return result['data']['http_image_url'] + else: + raise Exception(f"图片上传失败: {result.get('message', '未知错误')}") + else: + raise Exception(f"图片上传请求失败,状态码: {response.status_code}, 响应: {response.text}") + + +def login_and_get_jwt_token(base_url: str) -> Optional[str]: + """ + 登录获取JWT token + """ + login_url = f"{base_url}/api/auth/login" + login_data = { + "username": "user010", # 使用固定的账号 + "password": "@5^2W6R7" + } + + print(f"尝试登录: {login_data['username']}") + print(f"登录URL: {login_url}") + + try: + response = requests.post(login_url, json=login_data, headers={'Content-Type': 'application/json'}) + print(f"响应状态码: {response.status_code}") + + if response.status_code == 200: + result = response.json() + if result.get('code') == 200: + jwt_token = result['data']['token'] + print("JWT token获取成功") + return jwt_token + else: + print(f"登录失败: {result.get('message', '未知错误')}") + return None + else: + print(f"登录请求失败: {response.status_code}") + return None + + except Exception as e: + print(f"登录异常: {e}") + return None + + +def batch_publish_articles(base_url: str, jwt_token: str, article_ids: List[int]) -> bool: + """ + 批量提交文章到/api/articles/batch-publish-auto接口 + """ + try: + print(f"开始批量提交 {len(article_ids)} 篇文章到batch-publish-auto接口") + + # 构建批量发布数据 + publish_data = { + "article_ids": article_ids + } + + print(f"准备批量提交的数据: {json.dumps(publish_data, ensure_ascii=False)}") + + # 发送请求 + upload_url = f"{base_url}/api/articles/batch-publish-auto" + headers = { + 'Authorization': f'Bearer {jwt_token}', + 'Content-Type': 'application/json', + 'Accept': 'application/json' + } + + response = requests.post(upload_url, json=publish_data, headers=headers) + + print(f"批量提交响应状态码: {response.status_code}") + + if response.status_code == 200: + try: + result = response.json() + print(f"批量提交响应内容: {result}") + + # 根据接口实际返回格式判断成功 + if result.get('code') == 200: + data = result.get('data', {}) + published_count = data.get('published_count', 0) + failed_count = data.get('failed_count', 0) + + success_msg = f"批量提交成功,发布: {published_count}篇,失败: {failed_count}篇" + print(success_msg) + return True + else: + print(f"批量提交失败: {result.get('message', '未知错误')}") + return False + except json.JSONDecodeError as e: + print(f"解析批量提交响应失败: {e}") + return False + elif response.status_code == 401: + # Token过期 + print("收到401错误,JWT token可能已过期") + return False + else: + print(f"批量提交请求失败,状态码: {response.status_code}") + return False + + except Exception as e: + print(f"批量提交异常: {e}") + return False + + +def process_single_article(article, used_image_counts, match_results): + """ + 处理单个文章与图片的匹配和挂靠 + + Args: + article: 单个文章数据 + used_image_counts: 图片使用计数 + match_results: 匹配结果列表 + + Returns: + 是否处理成功 + """ + print(f"\n处理文章: {article['title']} (ID: {article['id']})") + + # 根据文章标签获取匹配的图片(考虑已使用次数) + matched_images = get_images_by_tags_from_db(article['tags'], used_image_counts) + + if matched_images: + print(f"找到 {len(matched_images)} 张符合条件的匹配图片") + + # 按基础使用次数排序,优先使用基础计数较低的图片 + matched_images.sort(key=lambda x: x['base_count']) + + matched = False + for img in matched_images: + # 提取图片URL并添加前缀 + image_url = "http://images11.bxmkb.cn/Images/" + img['image_url'] + + if image_url: # 确保图片URL存在 + # 调用通义千问大模型进行挂靠评估 + match_success = call_qwen_model(article, [image_url]) + + if match_success: + print(f"文章与图片挂靠成功: {article['title']}") + + # 更新图片使用次数 + used_image_counts[img['id']] += 1 + + # 记录匹配结果 + match_results.append({ + '文章ID': article['id'], + '文章标题': article['title'], + '文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'], # 限制内容长度 + '标签': ', '.join(article['tags']), + '匹配的图片URL': image_url, + '图片ID': img['id'], + '图片名称': img['image_name'], + '图片标签': img['tag_name'], + '图片关键词': img['keywords_name'], + '图片部门': img['department_name'], + '匹配状态': '成功' + }) + + return True + + if not matched: + print(f"文章未能与任何图片成功匹配,使用Gemini生成图片: {article['title']}") + + # 使用文章标题和标签生成提示词 + prompt = f"与'{article['title']}'相关的插图,标签: {', '.join(article['tags'])}" + generated_image_url = generate_image_with_gemini(prompt, article['tags'], article['id']) + print(f"生成的图片URL: {generated_image_url}") + + # 记录生成图片的结果 + match_results.append({ + '文章ID': article['id'], + '文章标题': article['title'], + '文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'], + '标签': ', '.join(article['tags']), + '匹配的图片URL': generated_image_url, + '图片ID': 'N/A', + '图片名称': 'Generated', + '图片标签': 'N/A', + '图片关键词': 'N/A', + '图片部门': 'N/A', + '匹配状态': '生成图片' + }) + + return True + else: + print(f"没有找到符合条件的匹配图片,使用Gemini生成图片: {article['title']}") + + # 使用文章标题和标签生成提示词 + prompt = f"与'{article['title']}'相关的插图,标签: {', '.join(article['tags'])}" + generated_image_url = generate_image_with_gemini(prompt, article['tags'], article['id']) + print(f"生成的图片URL: {generated_image_url}") + + # 记录生成图片的结果 + match_results.append({ + '文章ID': article['id'], + '文章标题': article['title'], + '文章内容': article['content'][:100] + '...' if len(article['content']) > 100 else article['content'], + '标签': ', '.join(article['tags']), + '匹配的图片URL': generated_image_url, + '图片ID': 'N/A', + '图片名称': 'Generated', + '图片标签': 'N/A', + '图片关键词': 'N/A', + '图片部门': 'N/A', + '匹配状态': '生成图片' + }) + + return True + + +def process_article_image_matching(test_mode=False, test_count=None): + """ + 处理文章与图片的匹配和挂靠 + + Args: + test_mode: 是否为测试模式 + test_count: 测试文章数量(仅在测试模式下使用) + """ + # 用于跟踪每张图片的使用次数 + used_image_counts = defaultdict(int) + # 存储匹配结果 + match_results = [] + + try: + # 根据模式决定获取哪些文章 + articles = get_articles_with_tags_from_db() + + if not articles: + print("没有找到文章") + return + + # 如果是测试模式,只取前test_count条数据 + if test_mode: + if test_count is None: + test_count = 3 # 默认测试前3条 + articles = articles[:test_count] + print(f"测试模式:处理前 {len(articles)} 篇文章") + + success_count = 0 + generated_count = 0 + + # 收集所有处理后的文章ID用于发布 + processed_article_ids = [] + + for article in articles: + if process_single_article(article, used_image_counts, match_results): + success_count += 1 + processed_article_ids.append(article['id']) + else: + print(f"处理文章 {article['id']} 失败") + + # 将匹配结果写入CSV文件 + output_csv = 'article_image_match_results.csv' + with open(output_csv, 'w', newline='', encoding='utf-8-sig') as csvfile: + fieldnames = [ + '文章ID', '文章标题', '文章内容', '标签', + '匹配的图片URL', '图片ID', '图片名称', + '图片标签', '图片关键词', '图片部门', '匹配状态' + ] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for result in match_results: + writer.writerow(result) + + if not test_mode: + print(f"\n处理完成! 成功挂靠: {success_count} 篇, 生成图片: {generated_count} 篇") + print(f"匹配结果已保存至: {output_csv}") + + # 如果有处理过的文章,将它们提交到发布接口 + if processed_article_ids: + print(f"\n开始发布处理过的 {len(processed_article_ids)} 篇文章...") + + # 登录获取JWT token + base_url = "http://47.99.184.230:8324" # 使用外网API地址 + jwt_token = login_and_get_jwt_token(base_url) + + if jwt_token: + # 批量发布文章 + if batch_publish_articles(base_url, jwt_token, processed_article_ids): + print(f"成功发布 {len(processed_article_ids)} 篇文章") + else: + print("批量发布失败") + else: + print("获取JWT token失败,无法发布文章") + else: + print("\n没有处理过的文章,跳过发布步骤") + else: + print(f"\n测试模式完成! 处理了 {len(articles)} 篇文章,成功挂靠: {success_count} 篇, 生成图片: {len([r for r in match_results if r['匹配状态'] == '生成图片'])} 篇") + print(f"处理结果已保存至: {output_csv}") + + except Exception as e: + print(f"处理文章图片匹配时发生错误: {e}") + raise + + +if __name__ == "__main__": + import sys + + print("开始处理文章与图片的智能挂靠...") + + # 检查命令行参数 + if len(sys.argv) > 1: + if sys.argv[1] == "--test" and len(sys.argv) > 2: + # 测试模式:处理前N篇文章 + test_count = int(sys.argv[2]) + print(f"启动测试模式,处理前 {test_count} 篇文章") + process_article_image_matching(test_mode=True, test_count=test_count) + elif sys.argv[1] == "--test" and len(sys.argv) == 2: + # 提示用户输入要测试的文章数量 + test_count_input = input("请输入要测试的文章数量 (默认3): ") + test_count = int(test_count_input) if test_count_input.strip().isdigit() else 3 + print(f"启动测试模式,处理前 {test_count} 篇文章") + process_article_image_matching(test_mode=True, test_count=test_count) + else: + print("使用方法:") + print(" 正常模式: python match_article_images.py") + print(" 测试模式: python match_article_images.py --test [文章ID]") + else: + # 正常模式:处理所有文章 + process_article_image_matching() \ No newline at end of file diff --git a/push_article_published.py b/push_article_published.py new file mode 100644 index 0000000..7347fc7 --- /dev/null +++ b/push_article_published.py @@ -0,0 +1,680 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +AI文章自动生成监控脚本 +监控数据库中status为topic的记录,自动调用Coze API生成文章并提交 +""" + +import os +import sys +import time +import json +import logging +import requests +import pymysql +from datetime import datetime +from typing import Dict, List, Optional, Any +import traceback +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed +from queue import Queue, Empty +import random +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# 添加项目根目录到Python路径 +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +from database_config import get_db_manager +from log_config import setup_logger + +# 配置日志记录器,支持按日期切割和控制台输出 +logger = setup_logger( + name='push_article', + log_file='logs/push_article_published.log', + error_log_file='logs/push_article_published_error.log', + level=logging.INFO, + console_output=True +) + +# 配置常量 +#BASE_URL = "http://47.99.184.230:8324" +BASE_URL = "http://127.0.0.1:8324" +SLEEP_INTERVAL = 5 # 监控间隔(秒) +WORKER_COUNT = 10 # 并行处理worker数量,可配置 + +# 新增:批量发布配置 +BATCH_SIZE = 8 # 一次处理的文章数量,可调 +BATCH_INTERVAL = 2 # 批次间隔时间(秒),可调 + +# 网络重试配置 +MAX_RETRIES = 3 # 最大重试次数 +BACKOFF_FACTOR = 1 # 退避因子 +RETRY_STATUS_CODES = [500, 502, 503, 504, 429] # 需要重试的HTTP状态码 +CONNECTION_TIMEOUT = 30 # 连接超时(秒) +READ_TIMEOUT = 120 # 读取超时(秒) + +# 全局变量 +AUTH_TOKEN = None +WORKFLOW_ID = None +JWT_TOKEN = None + +class PushArticlePublished: + def __init__(self): + # API配置 + self.base_url = BASE_URL + + # 认证信息 + self.auth_token = None + self.workflow_id = None + self.jwt_token = None + + # 使用统一的数据库管理器 + self.db_manager = get_db_manager() + + # 登录配置 + self.login_credentials = { + 'username': 'user010', + 'password': '@5^2W6R7' + } + + # 禁用代理 + self.proxies = { + 'http': None, + 'https': None + } + + # 并行处理相关 + self.processing_lock = threading.Lock() # 用于线程安全的记录分配 + self.processed_ids = set() # 已处理的记录ID集合 + + # 创建会话和配置重试策略 + self.session = self._create_session() + + # 网络统计 + self.request_stats = { + 'total_requests': 0, + 'successful_requests': 0, + 'failed_requests': 0, + 'retry_attempts': 0, + 'connection_errors': 0, + 'timeout_errors': 0 + } + + logger.info("PushArticlePublished 初始化完成") + + def _create_session(self): + """创建配置了重试策略的requests会话""" + session = requests.Session() + + # 配置重试策略 + retry_strategy = Retry( + total=MAX_RETRIES, + status_forcelist=RETRY_STATUS_CODES, + backoff_factor=BACKOFF_FACTOR, + allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"] + ) + + # 配置HTTP适配器 + adapter = HTTPAdapter( + max_retries=retry_strategy, + pool_connections=10, + pool_maxsize=20 + ) + + session.mount("http://", adapter) + session.mount("https://", adapter) + + # 设置默认超时 + session.timeout = (CONNECTION_TIMEOUT, READ_TIMEOUT) + + return session + + def _make_request_with_retry(self, method, url, **kwargs): + """带重试机制的网络请求方法""" + self.request_stats['total_requests'] += 1 + + for attempt in range(MAX_RETRIES + 1): + try: + # 使用会话发送请求 + response = self.session.request( + method=method, + url=url, + timeout=(CONNECTION_TIMEOUT, READ_TIMEOUT), + proxies=self.proxies, + **kwargs + ) + + # 请求成功 + self.request_stats['successful_requests'] += 1 + if attempt > 0: + logger.info(f"网络请求在第 {attempt + 1} 次尝试后成功") + return response + + except requests.exceptions.ConnectionError as e: + self.request_stats['connection_errors'] += 1 + if attempt < MAX_RETRIES: + self.request_stats['retry_attempts'] += 1 + backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1) + logger.warning(f"连接错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}") + logger.info(f"等待 {backoff_time:.2f} 秒后重试...") + time.sleep(backoff_time) + else: + self.request_stats['failed_requests'] += 1 + logger.error(f"连接最终失败,已重试 {MAX_RETRIES} 次: {e}") + raise + + except requests.exceptions.Timeout as e: + self.request_stats['timeout_errors'] += 1 + if attempt < MAX_RETRIES: + self.request_stats['retry_attempts'] += 1 + backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1) + logger.warning(f"请求超时 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}") + logger.info(f"等待 {backoff_time:.2f} 秒后重试...") + time.sleep(backoff_time) + else: + self.request_stats['failed_requests'] += 1 + logger.error(f"请求超时最终失败,已重试 {MAX_RETRIES} 次: {e}") + raise + + except requests.exceptions.ChunkedEncodingError as e: + if attempt < MAX_RETRIES: + self.request_stats['retry_attempts'] += 1 + backoff_time = (BACKOFF_FACTOR * (2 ** attempt)) + random.uniform(0, 1) + logger.warning(f"数据传输错误 (尝试 {attempt + 1}/{MAX_RETRIES + 1}): {e}") + logger.info(f"等待 {backoff_time:.2f} 秒后重试...") + time.sleep(backoff_time) + else: + self.request_stats['failed_requests'] += 1 + logger.error(f"数据传输最终失败,已重试 {MAX_RETRIES} 次: {e}") + raise + + except Exception as e: + self.request_stats['failed_requests'] += 1 + logger.error(f"网络请求发生未预期错误: {e}") + raise + + def log_network_stats(self): + """记录网络统计信息""" + stats = self.request_stats + success_rate = (stats['successful_requests'] / stats['total_requests'] * 100) if stats['total_requests'] > 0 else 0 + + stats_msg = ( + f"网络统计 - 总请求: {stats['total_requests']}, " + f"成功: {stats['successful_requests']}, " + f"失败: {stats['failed_requests']}, " + f"重试: {stats['retry_attempts']}, " + f"连接错误: {stats['connection_errors']}, " + f"超时错误: {stats['timeout_errors']}, " + f"成功率: {success_rate:.1f}%" + ) + + logger.info(stats_msg) + self.log_to_database('INFO', '网络统计', stats_msg) + + def get_db_connection(self): + """获取数据库连接""" + try: + return self.db_manager.get_connection() + except Exception as e: + logger.error(f"数据库连接失败: {e}") + return None + + def log_to_database(self, level: str, message: str, details: str = None): + """记录日志到数据库ai_logs表""" + try: + with self.db_manager.get_cursor() as cursor: + # 映射日志级别到数据库状态 + status_map = { + 'INFO': 'success', + 'WARNING': 'warning', + 'ERROR': 'error' + } + status = status_map.get(level, 'success') + + sql = """ + INSERT INTO ai_logs (user_id, action, description, status, error_message, created_at) + VALUES (%s, %s, %s, %s, %s, NOW()) + """ + cursor.execute(sql, (None, 'coze_generator', message, status, details)) + logger.info(f"日志已记录到数据库: {level} - {message}") + except Exception as e: + logger.error(f"记录日志到数据库失败: {e}") + + def login_and_get_jwt_token(self) -> bool: + """登录获取JWT token,参考JavaScript逻辑""" + try: + login_url = f"{self.base_url}/api/auth/login" + login_data = { + "username": "user010", # 使用用户指定的账号 + "password": "@5^2W6R7" + } + + logger.info(f"尝试登录: {login_data['username']}") + logger.info(f"登录URL: {login_url}") + self.log_to_database('INFO', f"尝试登录用户: {login_data['username']}") + + response = self._make_request_with_retry( + 'POST', + login_url, + json=login_data, + headers={'Content-Type': 'application/json'} + ) + + logger.info(f"响应状态码: {response.status_code}") + logger.info(f"响应内容: {response.text[:500]}...") + + if response.status_code == 200: + result = response.json() + if result.get('code') == 200: + self.jwt_token = result['data']['token'] + logger.info("JWT token获取成功") + self.log_to_database('INFO', "JWT token获取成功", json.dumps(result['data'])) + return True + else: + error_msg = f"登录失败: {result.get('message', '未知错误')}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, json.dumps(result)) + return False + else: + error_msg = f"登录请求失败: {response.status_code}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, response.text) + return False + + except Exception as e: + error_msg = f"登录异常: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, traceback.format_exc()) + return False + + def batch_publish_auto(self, article_ids: List[int]) -> bool: + """批量提交文章到/api/articles/batch-publish-auto接口""" + try: + logger.info(f"开始批量提交 {len(article_ids)} 篇文章到batch-publish-auto接口") + self.log_to_database('INFO', f"开始批量提交文章", f"article_ids: {article_ids}") + + # 确保有JWT token + if not self.jwt_token: + logger.warning("JWT token缺失,尝试重新登录") + self.log_to_database('WARNING', "JWT token缺失,重新登录") + if not self.login_and_get_jwt_token(): + error_msg = "重新登录失败" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg) + return False + + # 构建批量发布数据 - 根据接口要求只需要article_ids + publish_data = { + "article_ids": article_ids + } + + logger.info(f"准备批量提交的数据: {json.dumps(publish_data, ensure_ascii=False)}") + + # 发送请求 - 修正接口路径 + upload_url = f"{self.base_url}/api/articles/batch-publish-auto" + headers = { + 'Authorization': f'Bearer {self.jwt_token}', + 'Content-Type': 'application/json', + 'Accept': 'application/json' + } + + response = self._make_request_with_retry( + 'POST', + upload_url, + json=publish_data, + headers=headers + ) + + logger.info(f"批量提交响应状态码: {response.status_code}") + + if response.status_code == 200: + try: + result = response.json() + logger.info(f"批量提交响应内容: {result}") + + # 根据接口实际返回格式判断成功 + if result.get('code') == 200: + data = result.get('data', {}) + published_count = data.get('published_count', 0) + failed_count = data.get('failed_count', 0) + + success_msg = f"批量提交成功,发布: {published_count}篇,失败: {failed_count}篇" + logger.info(success_msg) + self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}") + return True + else: + error_msg = f"批量提交失败: {result.get('message', '未知错误')}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}") + return False + except json.JSONDecodeError as e: + error_msg = f"解析批量提交响应失败: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"response_text: {response.text}") + return False + elif response.status_code == 401: + # Token过期,尝试重新登录并重试一次 + logger.warning("收到401错误,JWT token可能已过期,尝试重新登录") + self.log_to_database('WARNING', "JWT token过期,重新登录", f"article_ids: {article_ids}") + + if self.login_and_get_jwt_token(): + logger.info("重新登录成功,重试批量提交请求") + # 更新headers中的token + headers['Authorization'] = f'Bearer {self.jwt_token}' + + # 重试请求 + retry_response = self._make_request_with_retry( + 'POST', + upload_url, + json=publish_data, + headers=headers + ) + + if retry_response.status_code == 200: + try: + result = retry_response.json() + logger.info(f"重试批量提交响应内容: {result}") + + if result.get('code') == 200: + data = result.get('data', {}) + published_count = data.get('published_count', 0) + failed_count = data.get('failed_count', 0) + + success_msg = f"重试批量提交成功,发布: {published_count}篇,失败: {failed_count}篇" + logger.info(success_msg) + self.log_to_database('INFO', success_msg, f"article_ids: {article_ids}") + return True + else: + error_msg = f"重试批量提交失败: {result.get('message', '未知错误')}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, response: {result}") + return False + except json.JSONDecodeError as e: + error_msg = f"解析重试批量提交响应失败: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}") + return False + else: + error_msg = f"重试批量提交请求失败,状态码: {retry_response.status_code}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"response_text: {retry_response.text}") + return False + else: + error_msg = "重新登录失败,无法重试批量提交" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}") + return False + else: + error_msg = f"批量提交请求失败,状态码: {response.status_code}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"response_text: {response.text}") + return False + + except requests.exceptions.Timeout as e: + error_msg = f"批量提交请求超时: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, timeout: {CONNECTION_TIMEOUT}s/{READ_TIMEOUT}s") + return False + except requests.exceptions.ConnectionError as e: + error_msg = f"批量提交连接错误: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, base_url: {self.base_url}") + return False + except requests.exceptions.RequestException as e: + error_msg = f"批量提交网络异常: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, exception_type: {type(e).__name__}") + return False + except Exception as e: + error_msg = f"批量提交异常: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, f"article_ids: {article_ids}, traceback: {traceback.format_exc()}") + return False + + + def is_publish_time_allowed(self) -> bool: + """检查当前时间是否在允许发布的时间窗口内(北京时间6:00-23:59)""" + current_hour = datetime.now().hour + # 凌晨00:00-05:59禁止发布,6:00-23:59允许发布 + if current_hour >= 6: + logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 可以推送") + return True + else: + logger.info(f"当前时间 {datetime.now().strftime('%H:%M:%S')} 在禁止发布时段(00:00-05:59),跳过推送") + return False + + def filter_articles_by_daily_limit(self, articles: List[Dict]) -> List[Dict]: + """根据作者每日发文限制过滤文章 + + 检查ai_statistics_days表中daily_published_count是否超过daily_post_max + 如果超过,则该作者的文章今日不发 + """ + if not articles: + return [] + + try: + today_date = datetime.now().strftime('%Y-%m-%d') + filtered_articles = [] + + with self.db_manager.get_cursor() as cursor: + for article in articles: + author_id = article.get('author_id') + if not author_id: + logger.warning(f"文章ID {article['id']} 缺少author_id,跳过") + continue + + # 先检查ai_authors表:作者必须满足 daily_post_max > 0, status = 'active', channel = 1 + author_check_sql = """ + SELECT id, author_name, daily_post_max, status, channel + FROM ai_authors + WHERE id = %s AND daily_post_max > 0 AND status = 'active' AND channel = 1 + """ + cursor.execute(author_check_sql, (author_id,)) + author_result = cursor.fetchone() + + if not author_result: + logger.info(f"[业务日志] 作者ID {author_id} 不符合发文条件(daily_post_max>0 AND status=active AND channel=1),文章ID {article['id']} 过滤掉") + # 将文章状态更新为pending_review,重新走审批流程 + update_sql = "UPDATE ai_articles SET status = 'pending_review', updated_at = NOW() WHERE id = %s" + cursor.execute(update_sql, (article['id'],)) + logger.info(f"[业务日志] 文章ID {article['id']} 状态已更新为pending_review,需重新审批") + continue + + # 查询该作者当天的发文统计 + sql = """ + SELECT daily_published_count, daily_post_max + FROM ai_statistics_days + WHERE author_id = %s AND stat_date = %s + """ + cursor.execute(sql, (author_id, today_date)) + result = cursor.fetchone() + + if result: + daily_published_count = result['daily_published_count'] or 0 + daily_post_max = result['daily_post_max'] or 0 + + # 检查daily_post_max是否小于1,小于1则不允许发文 + if daily_post_max < 1: + #logger.info(f"[业务日志] 作者ID {author_id} daily_post_max={daily_post_max} 小于1,文章ID {article['id']} 过滤掉,不允许发文") + continue + + if daily_published_count >= daily_post_max: + #logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count} 篇,达到上限 {daily_post_max},文章ID {article['id']} 跳过") + continue + else: + #logger.info(f"[业务日志] 作者ID {author_id} 今日已发 {daily_published_count}/{daily_post_max},文章ID {article['id']} 允许发布") + filtered_articles.append(article) + else: + # 没有统计记录,默认不允许发布(需要先初始化统计记录) + logger.info(f"[业务日志] 作者ID {author_id} 无当日统计记录,文章ID {article['id']} 过滤掉,需先初始化统计记录") + continue + + logger.info(f"每日限制过滤完成: 原始 {len(articles)} 篇 -> 允许发布 {len(filtered_articles)} 篇") + return filtered_articles + + except Exception as e: + error_msg = f"检查每日发文限制异常: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, traceback.format_exc()) + # 异常时返回原始列表,避免阻塞 + return articles + + def get_published_review_articles(self) -> List[Dict]: + """获取状态为published_review的待发布文章""" + try: + with self.db_manager.get_cursor() as cursor: + # 查询published_review状态的文章 + sql = """ + SELECT + id, + title, + status, + created_at, + updated_at, + author_id + FROM ( + SELECT + id, + title, + status, + created_at, + updated_at, + author_id, + ROW_NUMBER() OVER ( + PARTITION BY author_id + ORDER BY updated_at ASC, id ASC + ) as author_rank + FROM ai_articles + WHERE status = 'published_review' + AND author_id > 0 + ) ranked_articles + """ + cursor.execute(sql) + results = cursor.fetchall() + + if results: + logger.info(f"查询到 {len(results)} 个待发布文章") + for result in results: + logger.info(f"待发布文章 - ID: {result['id']}, 标题: {result['title']}, 状态: {result['status']}") + #self.log_to_database('INFO', f"发现待发布文章: {result['title']}", + #f"ID: {result['id']}, 状态: {result['status']}") + else: + logger.info("未查询到待发布文章") + + return results + except Exception as e: + error_msg = f"查询待发布文章异常: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, traceback.format_exc()) + return [] + + def process_published_review_articles(self, published_articles: List[Dict], worker_id: int) -> int: + """Worker线程处理published_review状态文章的方法""" + processed_count = 0 + thread_name = f"PublishWorker-{worker_id}" + threading.current_thread().name = thread_name + + logger.info(f"[{thread_name}] 启动,准备处理待发布文章") + + # 按批次处理文章 + for i in range(0, len(published_articles), BATCH_SIZE): + batch = published_articles[i:i + BATCH_SIZE] + article_ids = [article['id'] for article in batch] + + logger.info(f"[{thread_name}] 处理批次 {i//BATCH_SIZE + 1},文章ID: {article_ids}") + + # 批量提交文章 + if self.batch_publish_auto(article_ids): + processed_count += len(article_ids) + logger.info(f"[{thread_name}] 成功处理批次,文章数量: {len(article_ids)}") + else: + logger.error(f"[{thread_name}] 处理批次失败,文章ID: {article_ids}") + + # 批次间隔 + if i + BATCH_SIZE < len(published_articles): + logger.info(f"[{thread_name}] 等待 {BATCH_INTERVAL} 秒后处理下一批次") + time.sleep(BATCH_INTERVAL) + + logger.info(f"[{thread_name}] 完成,共处理 {processed_count} 篇文章") + return processed_count + + def run_monitor(self): + """运行监控循环,支持多worker并行处理""" + logger.info(f"开始监控ai_articles表,使用 {WORKER_COUNT} 个worker并行处理...") + self.log_to_database('INFO', f'启动文章自动生成监控服务,worker数量: {WORKER_COUNT}', 'run_monitor') + + # 统计计数器 + loop_count = 0 + stats_interval = 60 # 每60次循环记录一次统计(约5分钟) + + while True: + try: + # 获取待发布的文章 + published_articles = self.get_published_review_articles() + + # 逻辑1: 检查时间窗口(北京时间6:00-23:59允许,00:00-05:59禁止) + if not self.is_publish_time_allowed(): + published_articles = [] + logger.info("当前处于禁止发布时段,清空待发布列表") + + # 逻辑2: 根据作者每日发文限制过滤文章 + if published_articles: + published_articles = self.filter_articles_by_daily_limit(published_articles) + + # 处理待发布文章 + if published_articles: + logger.info(f"发现 {len(published_articles)} 篇待发布文章,启动批量发布处理") + self.log_to_database('INFO', f'发现待发布文章,启动批量处理', f'文章数量: {len(published_articles)}') + + # 使用单个worker处理批量发布(避免并发冲突) + try: + processed_count = self.process_published_review_articles(published_articles, 1) + logger.info(f"批量发布处理完成,共处理 {processed_count} 篇文章") + self.log_to_database('INFO', f'批量发布处理完成', f'共处理 {processed_count} 篇文章') + except Exception as e: + logger.error(f"批量发布处理异常: {e}") + self.log_to_database('ERROR', f'批量发布处理异常', str(e)) + + # 如果没有任何待处理任务 + if not published_articles: + logger.info("暂无待处理任务,继续监控...") + + # 每次循环后休息 + time.sleep(SLEEP_INTERVAL) + + # 定期记录网络统计 + loop_count += 1 + if loop_count % stats_interval == 0: + self.log_network_stats() + + except KeyboardInterrupt: + logger.info("收到中断信号,停止监控") + self.log_to_database('INFO', '监控服务手动停止', 'KeyboardInterrupt') + break + except Exception as e: + error_msg = f"监控循环异常: {e}" + logger.error(error_msg) + self.log_to_database('ERROR', error_msg, traceback.format_exc()) + time.sleep(5) # 异常时等待5秒再继续 + +def main(): + """主函数""" + generator = PushArticlePublished() + + try: + # 先登录获取JWT token + logger.info("开始登录获取JWT token") + if not generator.login_and_get_jwt_token(): + logger.error("登录失败,程序退出") + return + + # 开始监控 + generator.run_monitor() + + except Exception as e: + logger.error(f"程序运行异常: {e}") + generator.log_to_database('ERROR', f'程序运行异常: {e}', traceback.format_exc()) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e49ee14 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +requests==2.31.0 +google-genai==0.1.0 +Pillow==10.0.0 +openpyxl==3.1.2 \ No newline at end of file diff --git a/setup_env.bat b/setup_env.bat new file mode 100644 index 0000000..590ca9c --- /dev/null +++ b/setup_env.bat @@ -0,0 +1,14 @@ +@echo off +echo 正在创建虚拟环境... +python -m venv venv +echo 虚拟环境创建完成! + +echo 正在激活虚拟环境... +call venv\Scripts\activate.bat + +echo 正在安装依赖... +pip install -r requirements.txt + +echo 虚拟环境设置完成! +echo 激活虚拟环境的命令: venv\Scripts\activate +pause \ No newline at end of file diff --git a/setup_env.sh b/setup_env.sh new file mode 100644 index 0000000..b244066 --- /dev/null +++ b/setup_env.sh @@ -0,0 +1,13 @@ +#!/bin/bash +echo "正在创建虚拟环境..." +python3 -m venv venv +echo "虚拟环境创建完成!" + +echo "正在激活虚拟环境..." +source venv/bin/activate + +echo "正在安装依赖..." +pip install -r requirements.txt + +echo "虚拟环境设置完成!" +echo "激活虚拟环境的命令: source venv/bin/activate" \ No newline at end of file diff --git a/split_sql_tables.py b/split_sql_tables.py new file mode 100644 index 0000000..499e889 --- /dev/null +++ b/split_sql_tables.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +将包含多个表的SQL文件拆分为单个表的SQL文件 +""" + +import os +import re +from pathlib import Path + + +def split_sql_tables(input_file_path): + """ + 将SQL文件中的每个表拆分为单独的文件 + """ + # 读取输入文件 + with open(input_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # 分割SQL内容,查找CREATE TABLE语句 + # 使用正则表达式匹配CREATE TABLE语句 + table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\([^;]*END\s*OF\s*DATA;)?)' + + # 更精确的匹配模式,寻找CREATE TABLE语句直到遇到下一个CREATE TABLE或文件结尾 + create_table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(.+?)(?=\nCREATE TABLE|\Z)' + + # 分离出每个CREATE TABLE语句 + tables = re.findall(create_table_pattern, content, re.DOTALL | re.IGNORECASE) + + # 如果上面的正则没匹配到,尝试另一种方式 + if not tables: + # 分割CREATE TABLE部分 + parts = re.split(r'\n(?=CREATE TABLE)', content) + tables = [] + + for part in parts: + if part.strip().upper().startswith('CREATE TABLE'): + # 提取表名 + table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', part, re.IGNORECASE) + if table_name_match: + table_name = table_name_match.group(1) + tables.append((part.strip(), table_name)) + + # 确保输出目录存在 + output_dir = Path(input_file_path).parent / "split_tables" + output_dir.mkdir(exist_ok=True) + + # 为每个表创建单独的文件 + for table_sql, table_name in tables: + # 清理表名,确保它是有效的文件名 + clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name) + + # 创建输出文件路径 + output_file_path = output_dir / f"{clean_table_name}.sql" + + # 写入表定义到单独的文件 + with open(output_file_path, 'w', encoding='utf-8') as f: + f.write("-- SQL table definition\n") + f.write("-- Generated from splitting a larger SQL file\n") + f.write("\n") + f.write(table_sql.strip()) + f.write("\n") + + print(f"已创建表文件: {output_file_path}") + + +def split_sql_tables_advanced(input_file_path): + """ + 高级方法拆分SQL文件中的表定义 + """ + with open(input_file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + # 确保输出目录存在 + output_dir = Path(input_file_path).parent / "split_tables" + output_dir.mkdir(exist_ok=True) + + current_table_lines = [] + in_table_definition = False + current_table_name = "" + + i = 0 + while i < len(lines): + line = lines[i].strip() + + # 检查是否是CREATE TABLE语句 + if line.upper().startswith('CREATE TABLE'): + # 如果之前已经在处理表定义,保存之前的表 + if in_table_definition and current_table_lines: + save_table_to_file(current_table_name, current_table_lines, output_dir) + current_table_lines = [] + + # 开始新的表定义 + in_table_definition = True + current_table_lines.append(lines[i]) + + # 提取表名 + table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', line, re.IGNORECASE) + if table_name_match: + current_table_name = table_name_match.group(1) + + # 检查这一行是否以分号结束 + if line.endswith(';'): + # 单行CREATE TABLE语句 + save_table_to_file(current_table_name, current_table_lines, output_dir) + current_table_lines = [] + in_table_definition = False + else: + # 多行CREATE TABLE语句,继续收集行直到遇到分号 + pass + elif in_table_definition: + current_table_lines.append(lines[i]) + # 检查是否以分号结束 + if line.endswith(';'): + # 结束当前表定义 + save_table_to_file(current_table_name, current_table_lines, output_dir) + current_table_lines = [] + in_table_definition = False + # 如果不在表定义中且遇到CREATE TABLE之前的行,忽略或处理其他内容 + + i += 1 + + # 处理最后一个表(如果有) + if in_table_definition and current_table_lines: + save_table_to_file(current_table_name, current_table_lines, output_dir) + + +def save_table_to_file(table_name, table_lines, output_dir): + """ + 将表定义保存到文件 + """ + # 清理表名,确保它是有效的文件名 + clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name) + + # 创建输出文件路径 + output_file_path = output_dir / f"{clean_table_name}.sql" + + # 写入表定义到单独的文件 + with open(output_file_path, 'w', encoding='utf-8') as f: + f.write("-- SQL table definition\n") + f.write("-- Generated from splitting a larger SQL file\n") + f.write("-- Table: " + table_name + "\n") + f.write("\n") + + for line in table_lines: + f.write(line.rstrip() + '\n') + + print(f"已创建表文件: {output_file_path}") + + +def extract_create_table_statements(input_file_path): + """ + 提取SQL文件中的所有CREATE TABLE语句 + """ + with open(input_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # 正则表达式匹配CREATE TABLE语句 + # 匹配从CREATE TABLE开始到遇到下一个CREATE TABLE或文件结尾的内容 + pattern = r'(CREATE TABLE\s+`?\w+`?[^;]*(?:;|ENGINE.*?;))' + + # 更复杂的正则表达式,考虑多行和嵌套括号 + # complex_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(((?>[^()]+|\((?)|\)(?<-DEPTH>))*(?(DEPTH)(?!)))\)[^;]*;)' + + # 使用简单方法,逐行解析 + lines = content.split('\n') + + # 确保输出目录存在 + output_dir = Path(input_file_path).parent / "split_tables" + output_dir.mkdir(exist_ok=True) + + current_table_lines = [] + in_table_definition = False + current_table_name = "" + + for line in lines: + stripped_line = line.strip() + + if stripped_line.upper().startswith('CREATE TABLE'): + # 如果正在处理上一个表,保存它 + if in_table_definition and current_table_lines: + save_table_to_file(current_table_name, current_table_lines, output_dir) + + # 开始新表 + in_table_definition = True + current_table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', stripped_line, re.IGNORECASE) + if current_table_name_match: + current_table_name = current_table_name_match.group(1) + current_table_lines = [line] + elif in_table_definition: + current_table_lines.append(line) + # 检查行是否以分号结尾,表示表定义结束 + if stripped_line.endswith(';'): + # 这可能是一个完整的表定义 + # 简单检查是否是表定义的结尾 + save_table_to_file(current_table_name, current_table_lines, output_dir) + current_table_lines = [] + in_table_definition = False + # 否则跳过非表定义的行 + + # 处理最后一个表 + if in_table_definition and current_table_lines: + save_table_to_file(current_table_name, current_table_lines, output_dir) + + +def parse_sql_file(input_file_path): + """ + 解析SQL文件并拆分表定义 + """ + with open(input_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # 查找所有CREATE TABLE语句 + # 更安全的解析方法 - 逐行处理 + lines = content.split('\n') + + # 确保输出目录存在 + output_dir = Path(input_file_path).parent / "split_tables" + output_dir.mkdir(exist_ok=True) + + current_table_lines = [] + in_table_definition = False + current_table_name = "" + + for line in lines: + stripped_line = line.strip() + + if stripped_line.upper().startswith('CREATE TABLE'): + # 如果正在处理上一个表,保存它 + if in_table_definition and current_table_lines: + save_table_to_file_simple(current_table_name, current_table_lines, output_dir) + + # 开始新表 + in_table_definition = True + # 提取表名 + table_name_match = re.search(r'CREATE TABLE\s+(?:IF NOT EXISTS\s+)?`?(\w+)`?', stripped_line, re.IGNORECASE) + if table_name_match: + current_table_name = table_name_match.group(1) + current_table_lines = [line] + elif in_table_definition: + current_table_lines.append(line) + # 检查行是否以分号结尾,表示表定义结束 + if stripped_line and stripped_line.endswith(';'): + # 检查是否包含表定义的关键元素,如ENGINE, CHARACTER SET等 + # 或者是完整的CREATE TABLE语句 + if ('ENGINE' in stripped_line or 'CHARACTER SET' in stripped_line or + 'ROW_FORMAT' in stripped_line or ') ENGINE' in line or line.count('(') <= line.count(')')): + # 这是一个完整的表定义 + save_table_to_file_simple(current_table_name, current_table_lines, output_dir) + current_table_lines = [] + in_table_definition = False + # 否则跳过非表定义的行 + + # 处理最后一个表 + if in_table_definition and current_table_lines: + save_table_to_file_simple(current_table_name, current_table_lines, output_dir) + + +def save_table_to_file_simple(table_name, table_lines, output_dir): + """ + 将表定义保存到文件(简化版) + """ + # 清理表名 + clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name) + + # 创建输出文件路径 + output_file_path = output_dir / f"{clean_table_name}.sql" + + # 写入表定义到单独的文件 + with open(output_file_path, 'w', encoding='utf-8') as f: + f.write("-- SQL table definition\n") + f.write("-- Generated from splitting a larger SQL file\n") + f.write(f"-- Table: {table_name}\n") + f.write("--\n\n") + + for line in table_lines: + f.write(line) + f.write('\n') + + print(f"已创建表文件: {output_file_path}") + + +if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + input_file = input("请输入SQL文件路径: ").strip().strip('"\'') + else: + input_file = sys.argv[1].strip('"\'') + + if not os.path.exists(input_file): + print(f"错误: 文件 {input_file} 不存在") + sys.exit(1) + + print(f"正在拆分SQL文件: {input_file}") + parse_sql_file(input_file) + print("拆分完成!") \ No newline at end of file diff --git a/test_articles.xlsx b/test_articles.xlsx new file mode 100644 index 0000000..fb9e17e Binary files /dev/null and b/test_articles.xlsx differ diff --git a/test_images.xlsx b/test_images.xlsx new file mode 100644 index 0000000..8882c3f Binary files /dev/null and b/test_images.xlsx differ