commit 2882852cd24df1238c11e010fcb8c0844a057739 Author: liangguodong Date: Fri Jan 30 18:30:05 2026 +0800 Initial commit: AI tagging images project diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bd8ccd1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so + +# Virtual environment +venv/ +env/ +.venv/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Logs +*.log + +# OS +.DS_Store +Thumbs.db diff --git a/README.md b/README.md new file mode 100644 index 0000000..19a8064 --- /dev/null +++ b/README.md @@ -0,0 +1,161 @@ +# AI 图片标签衍生系统 + +基于千问视觉大模型(Qwen-VL)的医疗健康图片标签自动衍生系统。 + +## 功能概述 + +- **离线批量处理**:遍历数据库图片,批量调用大模型进行标签衍生 +- **RESTful API 服务**:提供标签衍生的 HTTP 接口 +- **智能重试机制**:API 调用失败自动重试,支持指数退避 +- **统一配置管理**:支持环境变量配置,灵活部署 +- **完整日志系统**:文件 + 控制台双输出,便于问题排查 + +## 项目结构 + +``` +ai_tagging_images/ +├── config/ +│ ├── __init__.py +│ └── settings.py # 配置管理中心 +├── logs/ # 日志目录(自动创建) +├── promt/ +│ └── qwen_tag_derive_prompt.py +├── database_config.py # 数据库连接和 DAO +├── image_tag_derive.py # 离线批量处理脚本 +├── logger.py # 日志模块 +├── retry_handler.py # 重试机制 +├── tag_derive_api.py # FastAPI 服务 +├── query_tags.py # 标签查询工具 +├── check_results.py # 结果检查工具 +├── requirements.txt # 依赖清单 +└── ai_article.sql # 数据库结构 +``` + +## 快速开始 + +### 1. 安装依赖 + +```bash +pip install -r requirements.txt +``` + +### 2. 配置环境变量(可选) + +```bash +# Windows +set DASHSCOPE_API_KEY=your-api-key +set DB_HOST=localhost +set DB_PASSWORD=your-password + +# Linux/Mac +export DASHSCOPE_API_KEY=your-api-key +export DB_HOST=localhost +export DB_PASSWORD=your-password +``` + +### 3. 运行离线脚本 + +```bash +# 处理全部待处理数据 +python image_tag_derive.py + +# 从指定ID开始处理(断点续传) +python image_tag_derive.py --start-id 100 + +# 指定ID范围处理 +python image_tag_derive.py --start-id 100 --end-id 200 + +# 指定起始ID和批次大小 +python image_tag_derive.py --start-id 100 --batch-size 3 + +# 按指定ID处理(单个或多个) +python image_tag_derive.py --id 16495 +python image_tag_derive.py --id 16495 16496 16497 +``` + +> 注意:所有模式都会检查衍生标签,已有衍生标签的记录会被跳过。 + +### 4. 启动 API 服务 + +```bash +python tag_derive_api.py +``` + +服务启动后访问: +- API 文档:http://127.0.0.1:8000/docs +- 健康检查:http://127.0.0.1:8000/health + +## API 接口 + +| 方法 | 端点 | 说明 | +|------|------|------| +| GET | `/` | 服务状态 | +| GET | `/health` | 健康检查 | +| POST | `/api/derive/single` | 单张图片标签衍生 | +| POST | `/api/derive/batch` | 批量标签衍生(最多5张) | +| POST | `/api/derive/async` | 异步批量任务 | +| GET | `/api/task/{task_id}` | 查询任务状态 | +| GET | `/api/stats` | 统计信息 | +| GET | `/api/pending` | 待处理列表 | + +### 示例请求 + +**单张图片衍生:** +```bash +curl -X POST http://127.0.0.1:8000/api/derive/single \ + -H "Content-Type: application/json" \ + -d '{ + "image_url": "https://example.com/image.jpg", + "tag_name": "高血压" + }' +``` + +**响应:** +```json +{ + "success": true, + "original_tag": "高血压", + "derived_tags": ["血压升高", "心血管疾病", "降压药", "血压监测"], + "merged_tag": "#高血压##血压升高##心血管疾病##降压药##血压监测#" +} +``` + +## 配置说明 + +| 环境变量 | 默认值 | 说明 | +|----------|--------|------| +| `DASHSCOPE_API_KEY` | - | 千问 API Key | +| `DB_HOST` | localhost | 数据库主机 | +| `DB_PORT` | 3306 | 数据库端口 | +| `DB_USER` | root | 数据库用户 | +| `DB_PASSWORD` | - | 数据库密码 | +| `DB_DATABASE` | ai_article | 数据库名 | +| `BATCH_SIZE` | 3 | 每批处理图片数 | +| `QWEN_MAX_RETRIES` | 3 | 最大重试次数 | +| `LOG_LEVEL` | INFO | 日志级别 | +| `API_PORT` | 8000 | API 服务端口 | + +## 技术栈 + +- **大模型**:阿里云千问 Qwen-VL-Max +- **Web 框架**:FastAPI +- **数据库**:MySQL 9.0 +- **Python**:3.10+ + +## 数据表 + +主要涉及以下数据表: +- `ai_image_tags`:图片标签关联表 +- `ai_tags`:标签主表 + +## 日志 + +日志文件保存在 `logs/` 目录,按日期命名: +``` +logs/ +└── tag_derive_20260130.log +``` + +## License + +MIT diff --git a/ai_article.sql b/ai_article.sql new file mode 100644 index 0000000..cd4d994 --- /dev/null +++ b/ai_article.sql @@ -0,0 +1,930 @@ +/* + Navicat Premium Dump SQL + + Source Server : mixue + Source Server Type : MySQL + Source Server Version : 90001 (9.0.1) + Source Host : localhost:3306 + Source Schema : ai_article + + Target Server Type : MySQL + Target Server Version : 90001 (9.0.1) + File Encoding : 65001 + + Date: 28/01/2026 14:04:39 +*/ + +SET NAMES utf8mb4; +SET FOREIGN_KEY_CHECKS = 0; + +-- ---------------------------- +-- Table structure for ai_article_images +-- ---------------------------- +DROP TABLE IF EXISTS `ai_article_images`; +CREATE TABLE `ai_article_images` ( + `id` int NOT NULL AUTO_INCREMENT, + `article_id` int NOT NULL DEFAULT 0, + `image_id` int NOT NULL DEFAULT 0, + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_tag_id` int NOT NULL DEFAULT 0, + `sort_order` int NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `keywords_id` int NOT NULL DEFAULT 0, + `keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `department_id` int NOT NULL DEFAULT 0, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_source` tinyint(1) NOT NULL DEFAULT 0 COMMENT '1=tag|2=change', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_article_image`(`article_id` ASC, `image_id` ASC) USING BTREE, + INDEX `image_id`(`image_id` ASC) USING BTREE, + INDEX `idx_tag_article_lookup`(`image_tag_id` ASC, `article_id` ASC) USING BTREE, + INDEX `idx_article_images_article_tag`(`article_id` ASC, `image_tag_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 699 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_article_tags +-- ---------------------------- +DROP TABLE IF EXISTS `ai_article_tags`; +CREATE TABLE `ai_article_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `article_id` int NOT NULL, + `coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_article_tag`(`article_id` ASC) USING BTREE, + CONSTRAINT `ai_article_tags_ibfk_1` FOREIGN KEY (`article_id`) REFERENCES `ai_articles` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 943 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_articles +-- ---------------------------- +DROP TABLE IF EXISTS `ai_articles`; +CREATE TABLE `ai_articles` ( + `id` int NOT NULL AUTO_INCREMENT, + `batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID', + `topic_type_id` int UNSIGNED NOT NULL DEFAULT 0, + `prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0, + `topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `author_id` int NULL DEFAULT NULL, + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `department_id` int NULL DEFAULT NULL, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `created_user_id` int NOT NULL DEFAULT 0, + `review_user_id` int NULL DEFAULT NULL, + `publish_user_id` int NULL DEFAULT NULL, + `status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `publish_time` timestamp NULL DEFAULT NULL, + `baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `word_count` int NULL DEFAULT 0, + `image_count` int NULL DEFAULT 0, + `coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `created_user_id`(`created_user_id` ASC) USING BTREE, + INDEX `review_user_id`(`review_user_id` ASC) USING BTREE, + INDEX `publish_user_id`(`publish_user_id` ASC) USING BTREE, + INDEX `idx_articles_status_user_created`(`status` ASC, `created_user_id` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_articles_status_created`(`status` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_articles_status`(`status` ASC) USING BTREE, + INDEX `idx_articles_created_at`(`created_at` DESC) USING BTREE, + INDEX `idx_status_id_author`(`status` ASC, `id` ASC, `author_id` ASC) USING BTREE, + INDEX `idx_articles_updated_at`(`updated_at` DESC) USING BTREE, + INDEX `idx_articles_status_prompt_topic_id`(`status` ASC, `prompt_workflow_id` ASC, `topic` ASC, `id` ASC) USING BTREE, + INDEX `idx_articles_status_author_created`(`status` ASC, `author_id` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_articles_created_status_author`(`created_at` ASC, `status` ASC, `author_id` ASC) USING BTREE, + INDEX `idx_channel_status_publish_author`(`channel` ASC, `status` ASC, `publish_time` ASC, `author_id` ASC) USING BTREE, + INDEX `idx_author_channel_status_date`(`author_id` ASC, `channel` ASC, `status` ASC, `updated_at` ASC) USING BTREE, + INDEX `idx_audit_stats`(`author_id` ASC, `channel` ASC, `status` ASC, `updated_at` ASC) USING BTREE, + INDEX `idx_status_id`(`status` ASC, `id` ASC) USING BTREE, + INDEX `idx_status_dept_author`(`status` ASC, `department_id` ASC, `author_id` ASC) USING BTREE, + CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 1180 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_authors +-- ---------------------------- +DROP TABLE IF EXISTS `ai_authors`; +CREATE TABLE `ai_authors` ( + `id` int NOT NULL AUTO_INCREMENT, + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `app_id` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `app_token` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL DEFAULT 0, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `title` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `hospital` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `specialty` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `toutiao_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `toutiao_images_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `toutiao_images` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `introduction` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `avatar_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `cumulative_published_count` int NULL DEFAULT 0 COMMENT '累计发文量(从起始日到stat_date的总和)', + `cumulative_revenue_sum` int NULL DEFAULT 0 COMMENT '累计收入(从起始日到stat_date的总和)', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `daily_post_max` int NOT NULL DEFAULT 0 COMMENT '作者每日发文MAX', + `publishing_priority` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '发文优先等级', + `stock_quantity` int NOT NULL DEFAULT 0 COMMENT '作者可发文库存量', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_ai_authors_status`(`status` ASC) USING BTREE, + INDEX `idx_ai_authors_status_id`(`status` ASC, `id` ASC) USING BTREE, + INDEX `idx_status_created_at`(`status` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_status_updated_at`(`status` ASC, `updated_at` DESC) USING BTREE, + INDEX `idx_status_cumulative_published`(`status` ASC, `cumulative_published_count` DESC) USING BTREE, + INDEX `idx_channel_status_id`(`channel` ASC, `status` ASC, `id` ASC) USING BTREE, + INDEX `idx_channel_status_daily_max`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC) USING BTREE, + INDEX `idx_channel_status_daily_max_id`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC) USING BTREE, + INDEX `idx_query_optimized`(`channel` ASC, `status` ASC, `id` ASC, `daily_post_max` ASC, `author_name` ASC) USING BTREE, + INDEX `idx_channel_status_dailymax_id`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC, `author_name` ASC) USING BTREE, + INDEX `idx_dept_channel_status`(`department_id` ASC, `channel` ASC, `status` ASC) USING BTREE, + INDEX `idx_ai_authors_department_id`(`department_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 256 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_batch_uploads +-- ---------------------------- +DROP TABLE IF EXISTS `ai_batch_uploads`; +CREATE TABLE `ai_batch_uploads` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int NOT NULL, + `file_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `file_path` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `total_count` int NULL DEFAULT 0, + `success_count` int NULL DEFAULT 0, + `failed_count` int NULL DEFAULT 0, + `status` enum('processing','completed','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'processing', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `user_id`(`user_id` ASC) USING BTREE, + CONSTRAINT `ai_batch_uploads_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 101 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_departments +-- ---------------------------- +DROP TABLE IF EXISTS `ai_departments`; +CREATE TABLE `ai_departments` ( + `id` int NOT NULL AUTO_INCREMENT, + `department_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `query_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下query审核存量', + `article_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下审核内容存量', + `max_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下设置发文总量max', + `published_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下发布成功的量', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_ai_departments_created_at`(`created_at` DESC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 82 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_image_tags +-- ---------------------------- +DROP TABLE IF EXISTS `ai_image_tags`; +CREATE TABLE `ai_image_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `image_id` int NOT NULL, + `image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `tag_id` int NOT NULL, + `tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `default_tag_id` int NOT NULL DEFAULT 0 COMMENT '初始标签ID', + `default_tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '初始标签', + `keywords_id` int NOT NULL, + `keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_source` tinyint UNSIGNED NOT NULL DEFAULT 1 COMMENT '1=clean_images|2=Flower_character|3=gemini3', + `created_user_id` int NOT NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `image_attached_article_count` int NOT NULL DEFAULT 0 COMMENT 'Number of articles the image is attached to', + `status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'draft' COMMENT '图片完整扭转流程状态', + `blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_image_tag`(`image_id` ASC, `tag_id` ASC) USING BTREE, + INDEX `tag_id`(`tag_id` ASC) USING BTREE, + INDEX `idx_id_desc`(`id` DESC) USING BTREE, + INDEX `idx_image_id_id`(`image_id` ASC, `id` DESC) USING BTREE, + INDEX `idx_created_at`(`created_at` DESC) USING BTREE, + INDEX `idx_department_id`(`department_id` ASC) USING BTREE, + INDEX `idx_keywords_id`(`keywords_id` ASC) USING BTREE, + INDEX `idx_dept_keywords`(`department_id` ASC, `keywords_id` ASC) USING BTREE, + INDEX `idx_dept_keywords_count_id`(`department_id` ASC, `keywords_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE, + INDEX `idx_keywords_count_id`(`keywords_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE, + INDEX `idx_dept_count_id`(`department_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE, + INDEX `idx_count_id`(`image_attached_article_count` ASC, `id` DESC) USING BTREE, + INDEX `idx_tag_name`(`tag_name` ASC) USING BTREE, + INDEX `idx_tag_name_id`(`tag_name` ASC, `id` ASC) USING BTREE, + INDEX `idx_tag_notnull_id`(`id` ASC, `tag_name` ASC, `image_id` ASC, `created_at` ASC) USING BTREE, + CONSTRAINT `ai_image_tags_ibfk_2` FOREIGN KEY (`tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 929767 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_images +-- ---------------------------- +DROP TABLE IF EXISTS `ai_images`; +CREATE TABLE `ai_images` ( + `id` int NOT NULL AUTO_INCREMENT, + `image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `thumbnail_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `image_type` enum('medical','lifestyle','instruction') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'medical', + `file_size` bigint NULL DEFAULT NULL, + `width` int NULL DEFAULT NULL, + `height` int NULL DEFAULT NULL, + `upload_user_id` int NOT NULL, + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `upload_user_id`(`upload_user_id` ASC) USING BTREE, + INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE, + CONSTRAINT `ai_images_ibfk_1` FOREIGN KEY (`upload_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 26832 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_keywords +-- ---------------------------- +DROP TABLE IF EXISTS `ai_keywords`; +CREATE TABLE `ai_keywords` ( + `id` int NOT NULL AUTO_INCREMENT, + `keywords_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_ai_keywords_dept_created`(`department_id` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_ai_keywords_created_at`(`created_at` DESC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 295 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_logs +-- ---------------------------- +DROP TABLE IF EXISTS `ai_logs`; +CREATE TABLE `ai_logs` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int NULL DEFAULT NULL, + `action` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `target_type` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `target_id` int NULL DEFAULT NULL, + `description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `ip_address` varchar(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `request_data` json NULL, + `response_data` json NULL, + `status` enum('success','error','warning') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'success', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `user_id`(`user_id` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` DESC) USING BTREE, + CONSTRAINT `ai_logs_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 116027 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_mip_click +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_click`; +CREATE TABLE `ai_mip_click` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `site_id` bigint NOT NULL COMMENT '关联站点ID(外键指向 ai_mip_site.id)', + `site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL(冗余字段,便于查询优化)', + `click_time` datetime NOT NULL COMMENT '点击发生时间', + `user_ip` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '用户IP地址', + `user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '浏览器/设备信息', + `referer_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源页面URL', + `device_type` enum('mobile','pc','tablet') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '设备类型', + `click_count` int NULL DEFAULT 1 COMMENT '本次点击事件的计数(一般为1,可用于批量插入)', + `is_valid` tinyint(1) NULL DEFAULT 1 COMMENT '是否有效点击(防刷)', + `task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID(可选)', + `operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者(如自动系统)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_site_id`(`site_id` ASC) USING BTREE, + INDEX `idx_click_time`(`click_time` ASC) USING BTREE, + INDEX `idx_site_url`(`site_url` ASC) USING BTREE, + INDEX `idx_click_time_site`(`click_time` ASC, `site_id` ASC) USING BTREE, + INDEX `idx_task_id`(`task_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告点击日志表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_mip_interaction +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_interaction`; +CREATE TABLE `ai_mip_interaction` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `site_id` bigint NOT NULL COMMENT '关联站点ID', + `click_id` bigint NULL DEFAULT NULL COMMENT '关联点击记录ID', + `task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID', + `interaction_type` enum('reply','comment','message','form_submit','follow','like','share') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '互动类型', + `interaction_time` datetime NOT NULL COMMENT '互动发生时间', + `interaction_status` enum('pending','success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'pending' COMMENT '互动状态', + `reply_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '回复/评论的内容', + `reply_template_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的回复模板ID', + `ad_element_xpath` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的XPath定位', + `ad_element_selector` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的CSS选择器', + `ad_text_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '广告的文本内容', + `execution_mode` enum('auto','manual','semi_auto') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行方式', + `rpa_script` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的RPA脚本名称', + `browser_type` enum('headless','headed','playwright','selenium') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器类型', + `anti_detection_method` json NULL COMMENT '万金油技术方案', + `proxy_ip` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的代理IP', + `user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '使用的User-Agent', + `custom_headers` json NULL COMMENT '自定义HTTP头', + `fingerprint_id` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器指纹ID', + `response_received` tinyint(1) NULL DEFAULT 0 COMMENT '是否收到回复', + `response_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '对方回复的内容', + `response_time` datetime NULL DEFAULT NULL COMMENT '收到回复的时间', + `response_delay_seconds` int NULL DEFAULT NULL COMMENT '回复延迟(秒)', + `is_successful` tinyint(1) NULL DEFAULT 0 COMMENT '是否成功互动', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '失败原因/错误信息', + `retry_count` int NULL DEFAULT 0 COMMENT '重试次数', + `conversion_flag` tinyint(1) NULL DEFAULT 0 COMMENT '是否产生转化', + `site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签', + `campaign_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告活动ID', + `operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间', + `remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_site_id`(`site_id` ASC) USING BTREE, + INDEX `idx_click_id`(`click_id` ASC) USING BTREE, + INDEX `idx_task_id`(`task_id` ASC) USING BTREE, + INDEX `idx_interaction_time`(`interaction_time` ASC) USING BTREE, + INDEX `idx_interaction_status`(`interaction_status` ASC) USING BTREE, + INDEX `idx_composite`(`site_id` ASC, `interaction_time` ASC, `interaction_status` ASC) USING BTREE, + INDEX `idx_response_received`(`response_received` ASC) USING BTREE, + INDEX `idx_conversion`(`conversion_flag` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告互动回复日志表' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_mip_query_task +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_query_task`; +CREATE TABLE `ai_mip_query_task` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `query_word` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '查询词/关键词', + `query_type` enum('keyword','phrase','long_tail') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'keyword' COMMENT '查询类型:关键词/短语/长尾词', + `task_date` char(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '任务日期,格式:YYYYMMDD', + `threshold_max` int NOT NULL DEFAULT 100 COMMENT '最大抓取数量阈值', + `current_count` int NOT NULL DEFAULT 0 COMMENT '当前已抓取数量', + `status` enum('ready','doing','failed','finished','closed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'ready' COMMENT '任务状态:准备中/执行中/失败/完成/已关闭', + `priority` tinyint NOT NULL DEFAULT 5 COMMENT '优先级(1-10,数字越小优先级越高)', + `category` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '分类标签(如:医疗、教育、法律等)', + `source_platform` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'baidu' COMMENT '来源平台:baidu/sogou/360等', + `crawl_url_count` int NOT NULL DEFAULT 0 COMMENT '已爬取URL数量', + `valid_url_count` int NOT NULL DEFAULT 0 COMMENT '有效URL数量(带广告)', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '错误信息', + `started_at` timestamp NULL DEFAULT NULL COMMENT '开始执行时间', + `finished_at` timestamp NULL DEFAULT NULL COMMENT '完成时间', + `closed_at` timestamp NULL DEFAULT NULL COMMENT '达到阈值关闭时间', + `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'system' COMMENT '创建人', + `remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uniq_query_date`(`query_word`(191) ASC, `task_date` ASC) USING BTREE COMMENT '同一查询词每天只有一个任务', + INDEX `idx_date_status`(`task_date` ASC, `status` ASC) USING BTREE COMMENT '按日期和状态查询', + INDEX `idx_status_priority`(`status` ASC, `priority` ASC) USING BTREE COMMENT '按状态和优先级查询', + INDEX `idx_category`(`category` ASC) USING BTREE COMMENT '按分类查询', + INDEX `idx_threshold`(`threshold_max` ASC, `current_count` ASC) USING BTREE COMMENT '阈值监控', + INDEX `idx_closed`(`closed_at` ASC) USING BTREE COMMENT '关闭时间索引' +) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = 'MIP查询任务表 - 用于存储查询词抓取网址任务' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_mip_site +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_site`; +CREATE TABLE `ai_mip_site` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL,唯一', + `site_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网站名称(可选)', + `status` enum('active','inactive','pending') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'active' COMMENT '状态:激活/停用/待审核', + `frequency` int NULL DEFAULT 1 COMMENT '频次(如每小时发几次)', + `time_start` time NULL DEFAULT '00:00:00' COMMENT '开始时间(HH:MM:SS)', + `time_end` time NULL DEFAULT '23:59:59' COMMENT '结束时间(HH:MM:SS)', + `interval_minutes` int NULL DEFAULT 60 COMMENT '执行间隔(分钟)', + `ad_feature` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告特征描述(JSON格式,如:{\"color\":\"red\", \"position\":\"top\"})', + `click_count` bigint NULL DEFAULT 0 COMMENT '累计点击次数', + `reply_count` bigint NULL DEFAULT 0 COMMENT '累计回复次数', + `site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签(如:教育、医疗等)', + `query_word` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源查询词(从哪个关键词抓取)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '创建人', + `updated_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '更新人', + `remark` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `site_url`(`site_url` ASC) USING BTREE, + UNIQUE INDEX `idx_site_url`(`site_url`(191) ASC) USING BTREE, + INDEX `idx_status`(`status` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE, + INDEX `idx_query_word`(`query_word`(191) ASC) USING BTREE COMMENT '按查询词查询' +) ENGINE = InnoDB AUTO_INCREMENT = 3 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告网址管理表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_mip_task_log +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_task_log`; +CREATE TABLE `ai_mip_task_log` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT 'RPA任务唯一ID', + `site_id` bigint NOT NULL COMMENT '关联站点ID', + `step_1_visit_time` datetime NULL DEFAULT NULL COMMENT '步骤1:访问网址时间', + `step_1_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤1状态', + `step_2_antibot_time` datetime NULL DEFAULT NULL COMMENT '步骤2:万金油技术方案执行时间', + `step_2_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤2状态', + `step_3_ad_detection_time` datetime NULL DEFAULT NULL COMMENT '步骤3:广告检测时间', + `step_3_has_ad` tinyint(1) NULL DEFAULT NULL COMMENT '是否检测到广告', + `step_3_ad_count` int NULL DEFAULT 0 COMMENT '检测到的广告数量', + `step_4_click_time` datetime NULL DEFAULT NULL COMMENT '步骤4:点击广告时间', + `step_4_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤4状态', + `step_5_reply_time` datetime NULL DEFAULT NULL COMMENT '步骤5:获取回复时间', + `step_5_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤5状态', + `task_start_time` datetime NOT NULL COMMENT '任务开始时间', + `task_end_time` datetime NULL DEFAULT NULL COMMENT '任务结束时间', + `task_duration_seconds` int NULL DEFAULT NULL COMMENT '任务执行时长(秒)', + `task_status` enum('running','completed','failed','timeout') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'running' COMMENT '任务整体状态', + `total_clicks` int NULL DEFAULT 0 COMMENT '本次任务总点击次数', + `total_interactions` int NULL DEFAULT 0 COMMENT '本次任务总互动次数', + `successful_interactions` int NULL DEFAULT 0 COMMENT '成功互动次数', + `failed_interactions` int NULL DEFAULT 0 COMMENT '失败互动次数', + `execution_mode` enum('auto','manual','scheduled') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行模式', + `triggered_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '触发者(定时任务/手动触发/队列)', + `error_log` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '错误日志', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `task_id`(`task_id` ASC) USING BTREE, + UNIQUE INDEX `uk_task_id`(`task_id` ASC) USING BTREE, + INDEX `idx_site_id`(`site_id` ASC) USING BTREE, + INDEX `idx_task_status`(`task_status` ASC) USING BTREE, + INDEX `idx_start_time`(`task_start_time` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'RPA任务执行日志表' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_prompt_workflow +-- ---------------------------- +DROP TABLE IF EXISTS `ai_prompt_workflow`; +CREATE TABLE `ai_prompt_workflow` ( + `id` int NOT NULL AUTO_INCREMENT, + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `auth_token` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `workflow_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `workflow_type_id` int UNSIGNED NOT NULL DEFAULT 0, + `workflow_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `created_user_id` int NOT NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `query_enable` tinyint NOT NULL DEFAULT 0 COMMENT 'query生效AI生文大模型', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_created_user_time`(`created_user_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE, + INDEX `idx_workflow_id`(`workflow_id` ASC) USING BTREE, + INDEX `idx_prompt_workflow_name`(`prompt_workflow_name` ASC) USING BTREE, + INDEX `idx_query_enable`(`query_enable` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_query_category +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_category`; +CREATE TABLE `ai_query_category` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active' COMMENT '状态', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 5 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_query_strategies +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_strategies`; +CREATE TABLE `ai_query_strategies` ( + `id` int NOT NULL AUTO_INCREMENT, + `category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `query_type_id` int NOT NULL DEFAULT 0 COMMENT '类型ID', + `define_context` varchar(2048) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '定义上下文', + `for_example` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '案例', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + PRIMARY KEY (`id`) USING BTREE, + INDEX `query_type_id`(`query_type_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 136 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_query_type +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_type`; +CREATE TABLE `ai_query_type` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID', + `category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active' COMMENT '状态', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 131 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_statistics +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics`; +CREATE TABLE `ai_statistics` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT 'Auto-increment ID', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `date` date NOT NULL COMMENT 'Date of statistics', + `submission_count` int NULL DEFAULT 0 COMMENT 'Number of submissions (投稿量)', + `read_count` int NULL DEFAULT 0 COMMENT 'Number of reads (阅读量)', + `comment_count` int NULL DEFAULT 0 COMMENT 'Number of comments (评论量)', + `comment_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Comment rate (评论率)', + `like_count` int NULL DEFAULT 0 COMMENT 'Number of likes (点赞量)', + `like_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Like rate (点赞率)', + `favorite_count` int NULL DEFAULT 0 COMMENT 'Number of favorites (收藏量)', + `favorite_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Favorite rate (收藏率)', + `share_count` int NULL DEFAULT 0 COMMENT 'Number of shares (分享量)', + `share_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Share rate (分享率)', + `slide_ratio` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Slide view ratio (滑图占比)', + `baidu_search_volume` int NULL DEFAULT 0 COMMENT 'Baidu search volume (百度搜索量)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'Creation timestamp', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Update timestamp', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_date`(`author_id` ASC, `date` ASC) USING BTREE, + INDEX `idx_date`(`date` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 51 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI Content Statistics' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_statistics_day +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics_day`; +CREATE TABLE `ai_statistics_day` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_date` date NOT NULL COMMENT '统计日期(天)', + `total_submission_count` int NULL DEFAULT 0 COMMENT '投稿量(当日总计)', + `total_read_count` int NULL DEFAULT 0 COMMENT '阅读量(当日总计)', + `total_comment_count` int NULL DEFAULT 0 COMMENT '评论量(当日总计)', + `total_like_count` int NULL DEFAULT 0 COMMENT '点赞量(当日总计)', + `total_favorite_count` int NULL DEFAULT 0 COMMENT '收藏量(当日总计)', + `total_share_count` int NULL DEFAULT 0 COMMENT '分享量(当日总计)', + `avg_comment_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '评论率(当日平均)', + `avg_like_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '点赞率(当日平均)', + `avg_favorite_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '收藏率(当日平均)', + `avg_share_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '分享率(当日平均)', + `avg_slide_ratio` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '滑图占比(当日平均)', + `total_baidu_search_volume` int NULL DEFAULT 0 COMMENT '百度搜索量(当日总计)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_date` ASC) USING BTREE, + INDEX `idx_stat_date`(`stat_date` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 51 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每日汇总统计表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_statistics_days +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics_days`; +CREATE TABLE `ai_statistics_days` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_date` date NOT NULL COMMENT '统计日期(自然日)', + `daily_published_count` int NULL DEFAULT 0 COMMENT '单日发文量', + `day_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当天收益(stat_date所在自然日)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `daily_post_max` int NOT NULL DEFAULT 0 COMMENT '作者每日发文MAX', + `stock_quantity` int NOT NULL DEFAULT 0 COMMENT '作者每日发文库存量', + `defect_quantity` int NOT NULL DEFAULT 0 COMMENT '作者每日发文失败量', + `is_full` tinyint(1) NOT NULL DEFAULT 0 COMMENT '是否发满:0-未发满,1-已发满', + `department_id` int NOT NULL DEFAULT 0 COMMENT '科室ID', + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室名称', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_date` ASC) USING BTREE, + INDEX `idx_stat_date`(`stat_date` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 71003 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每日核心指标汇总表(含累计、收益及环比)' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_statistics_monthly +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics_monthly`; +CREATE TABLE `ai_statistics_monthly` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_monthly` varchar(48) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '统计日期(自然月)', + `monthly_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当月收益(stat_date所在自然月的总收益)', + `revenue_mom_growth_rate` decimal(10, 6) NULL DEFAULT 0.000000 COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0))', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_monthly` ASC) USING BTREE, + INDEX `idx_stat_date`(`stat_monthly` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 3069 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每月核心指标汇总表(含累计、收益及环比)' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_statistics_weekly +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics_weekly`; +CREATE TABLE `ai_statistics_weekly` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_weekly` varchar(48) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '统计日期(自然周)', + `weekly_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当周收益(stat_date所在自然周的总收益,周一至周日)', + `revenue_wow_growth_rate` decimal(10, 6) NULL DEFAULT 0.000000 COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0))', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_weekly` ASC) USING BTREE, + INDEX `idx_stat_date`(`stat_weekly` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 10644 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每周核心指标汇总表(含累计、收益及环比)' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_tag_subsets +-- ---------------------------- +DROP TABLE IF EXISTS `ai_tag_subsets`; +CREATE TABLE `ai_tag_subsets` ( + `id` int NOT NULL AUTO_INCREMENT, + `parent_tag_id` int NOT NULL, + `subset_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `subset_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `parent_tag_id`(`parent_tag_id` ASC) USING BTREE, + CONSTRAINT `ai_tag_subsets_ibfk_1` FOREIGN KEY (`parent_tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 20478 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_tags +-- ---------------------------- +DROP TABLE IF EXISTS `ai_tags`; +CREATE TABLE `ai_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `tag_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `tag_category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `usage_count` int NULL DEFAULT 0, + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_tag_name`(`tag_name` ASC) USING BTREE, + INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 13417 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_topic_type +-- ---------------------------- +DROP TABLE IF EXISTS `ai_topic_type`; +CREATE TABLE `ai_topic_type` ( + `id` int NOT NULL AUTO_INCREMENT, + `topic_type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `type_id` int NOT NULL DEFAULT 0, + `type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0, + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `created_user_id` int NOT NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_created_user_time`(`created_user_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE, + INDEX `idx_type_id`(`type_id` ASC) USING BTREE, + INDEX `idx_topic_type_name`(`topic_type_name` ASC) USING BTREE, + INDEX `idx_prompt_workflow_id`(`prompt_workflow_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_user_authors +-- ---------------------------- +DROP TABLE IF EXISTS `ai_user_authors`; +CREATE TABLE `ai_user_authors` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int UNSIGNED NOT NULL DEFAULT 0, + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `author_id` int NOT NULL DEFAULT 0, + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT '', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_user_author`(`user_id` ASC, `author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 208 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_user_departments +-- ---------------------------- +DROP TABLE IF EXISTS `ai_user_departments`; +CREATE TABLE `ai_user_departments` ( + `id` int UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `user_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '用户ID', + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '用户名', + `department_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '科室ID', + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '科室名称', + `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_user_department`(`user_id` ASC, `department_id` ASC) USING BTREE, + INDEX `idx_dept_user`(`department_id` ASC, `user_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 77 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = '用户-科室关系表' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_user_topics +-- ---------------------------- +DROP TABLE IF EXISTS `ai_user_topics`; +CREATE TABLE `ai_user_topics` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int UNSIGNED NOT NULL DEFAULT 0, + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `topic_type_id` int UNSIGNED NOT NULL DEFAULT 0, + `topic_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `prompt_workflow_id` int NOT NULL DEFAULT 0, + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT '', + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'inactive', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_topic_type_id`(`topic_type_id` ASC) USING BTREE, + INDEX `idx_prompt_workflow_id`(`prompt_workflow_id` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 81 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_users +-- ---------------------------- +DROP TABLE IF EXISTS `ai_users`; +CREATE TABLE `ai_users` ( + `id` int NOT NULL AUTO_INCREMENT, + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `password` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `real_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `email` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `xhs_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '小红书Cookie', + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `role` enum('admin','editor','reviewer','publisher','each_title_reviewer','reviewer_query') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'editor' COMMENT '用户角色', + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_username`(`username` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 262 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for baidu_keyword +-- ---------------------------- +DROP TABLE IF EXISTS `baidu_keyword`; +CREATE TABLE `baidu_keyword` ( + `id` int NOT NULL AUTO_INCREMENT, + `keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL, + `crawled` tinyint NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `parents_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '父层级', + `seed_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '种子', + `seed_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '种子名称', + `department` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室', + `department_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '科室ID', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '作者名称', + `type` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '类型', + `type_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '类型D', + `partsof_speech` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '词性', + `partsof_speech_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '词性ID', + `yesorno_question` enum('yes','no','unprocessed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'unprocessed' COMMENT '是否是问题?', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `query_type_id` int NOT NULL DEFAULT 0 COMMENT '类型ID', + `category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID', + `query_summary_status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'ready', + `blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因', + `article_id` int NOT NULL DEFAULT 0 COMMENT '文章ID', + `query_stage` enum('draft','created','summary','reviewed','generated','published') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '分5个阶段,创建|总结|审核|生文|发布', + `query_status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT 'query完整扭转流程状态', + `status` enum('draft','available','unavailable','successful','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '状态_分2个阶段|可用|不可用|发布成功|发布失败', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `review_user_id` int NOT NULL DEFAULT 0 COMMENT '审核用户ID', + `similarity` enum('draft','yes','calc','recalc') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT 'yes=是相似|calc=已计算|recalc=需要重新计算', + `similarity_query` int NOT NULL DEFAULT 0 COMMENT 'yes=是相似|把query_id写入', + `similarity_query_keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT 'yes=是相似|把query写入', + `similarity score` float NOT NULL DEFAULT 0 COMMENT '相似时候,计算相似度值', + `reviewed_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '审核日期', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE, + INDEX `idx_crawled_seed`(`crawled` ASC, `seed_id` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE, + INDEX `idx_query_status_id`(`query_status` ASC, `id` DESC) USING BTREE, + INDEX `idx_status_stage`(`query_status` ASC, `query_stage` ASC) USING BTREE, + INDEX `idx_review_status_user`(`query_status` ASC, `review_user_id` ASC) USING BTREE, + INDEX `idx_review_query`(`query_status` ASC, `review_user_id` ASC) USING BTREE, + INDEX `idx_status_user_created`(`query_status` ASC, `review_user_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_article_id`(`article_id` ASC) USING BTREE, + INDEX `idx_department_id`(`department_id` ASC) USING BTREE, + INDEX `idx_dept_status`(`department_id` ASC, `query_status` ASC) USING BTREE, + INDEX `idx_dept_query_status`(`department_id` ASC, `query_status` ASC) USING BTREE, + INDEX `idx_dept_review_user`(`department_id` ASC, `review_user_id` ASC) USING BTREE, + INDEX `idx_query_status_dept_id`(`query_status` ASC, `department_id` ASC, `id` DESC) USING BTREE, + INDEX `idx_status_dept_created`(`query_status` ASC, `department_id` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_status_dept_id`(`query_status` ASC, `department_id` ASC, `id` ASC) USING BTREE, + INDEX `idx_seed_created`(`seed_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_baidu_query_status`(`query_status` ASC, `id` ASC) USING BTREE, + INDEX `idx_baidu_seed_created`(`seed_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_status_id`(`query_status` ASC, `id` ASC) USING BTREE, + INDEX `idx_query_status_cover`(`query_status` ASC) USING BTREE, + INDEX `idx_query_status_id_asc`(`query_status` ASC, `id` ASC) USING BTREE, + INDEX `idx_status_order_covering`(`query_status` ASC, `id` ASC, `keyword` ASC) USING BTREE, + FULLTEXT INDEX `idx_keyword_fulltext`(`keyword`) +) ENGINE = InnoDB AUTO_INCREMENT = 901728 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for baidu_query_task +-- ---------------------------- +DROP TABLE IF EXISTS `baidu_query_task`; +CREATE TABLE `baidu_query_task` ( + `id` int NOT NULL AUTO_INCREMENT, + `seed_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '种子', + `seed_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '种子名称', + `task_date` char(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '任务日期,格式:YYYYMMDD', + `threshold_max` int NOT NULL DEFAULT 1000 COMMENT '最大阈值', + `current_count` int NOT NULL DEFAULT 0 COMMENT '当前增量', + `status` enum('ready','doing','failed','finished','closed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'ready', + `started_at` timestamp NULL DEFAULT NULL, + `finished_at` timestamp NULL DEFAULT NULL, + `closed_at` timestamp NULL DEFAULT NULL COMMENT '达到阈值关闭时间', + `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `uniq_seed_date`(`seed_id` ASC, `task_date` ASC) USING BTREE, + INDEX `idx_date_status`(`task_date` ASC, `status` ASC) USING BTREE, + INDEX `idx_status_count`(`status` ASC, `current_count` ASC) USING BTREE, + INDEX `idx_threshold`(`threshold_max` ASC) USING BTREE, + INDEX `idx_closed`(`closed_at` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 184 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '百度查询任务表' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for baidu_seed_keywords +-- ---------------------------- +DROP TABLE IF EXISTS `baidu_seed_keywords`; +CREATE TABLE `baidu_seed_keywords` ( + `id` int NOT NULL AUTO_INCREMENT, + `batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID', + `keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL, + `crawled` tinyint NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'ready', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `priority_weight` int NOT NULL DEFAULT 0 COMMENT '优先级和权重1~10000|更高的先处理', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE, + INDEX `idx_crawled_priority`(`crawled` ASC, `priority_weight` DESC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 48 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = DYNAMIC; + +SET FOREIGN_KEY_CHECKS = 1; diff --git a/ai_image_tags.txt b/ai_image_tags.txt new file mode 100644 index 0000000..386988d --- /dev/null +++ b/ai_image_tags.txt @@ -0,0 +1,34 @@ +8.149.233.36/ai_article/ai_image_tags/ http://47.99.184.230:8008/andes/index.php?route=/sql&pos=0&db=ai_article&table=ai_image_tags + + 正在显示第 25 - 49 行 (共 32937 行, 查询花费 0.0009 秒。) + + +SELECT * FROM `ai_image_tags` + + +id image_id image_name image_url image_thumb_url tag_id tag_name default_tag_id default_tag_name keywords_id keywords_name department_id department_name image_source created_user_id created_at updated_at image_attached_article_count status blocking_reason +16495 19346 1755312359566253.png 20250816/1755312359566253.png 20250816/1755312359566253_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:19:11 7 draft +16496 19347 1755312362360723.png 20250816/1755312362360723.png 20250816/1755312362360723_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft +16497 19348 1755312364406476.png 20250816/1755312364406476.png 20250816/1755312364406476_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft +16498 19349 1755312367284353.png 20250816/1755312367284353.png 20250816/1755312367284353_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft +16499 19350 1755312370484005.png 20250816/1755312370484005.png 20250816/1755312370484005_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:19:11 7 draft +16500 19351 1755312373245801.png 20250816/1755312373245801.png 20250816/1755312373245801_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:41 17 draft +16501 19352 1755312378278262.png 20250816/1755312378278262.png 20250816/1755312378278262_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:55 35 draft +16502 19353 1755312380298110.png 20250816/1755312380298110.png 20250816/1755312380298110_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:51 37 draft +16503 19354 1755312382399131.png 20250816/1755312382399131.png 20250816/1755312382399131_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:30 93 draft +16504 19355 1755312386945978.png 20250816/1755312386945978.png 20250816/1755312386945978_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:33 20 draft +16505 19356 1755312388894962.png 20250816/1755312388894962.png 20250816/1755312388894962_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:06 30 draft +16506 19357 1755312391383717.png 20250816/1755312391383717.png 20250816/1755312391383717_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:30 49 draft +16507 19358 1755312393565035.png 20250816/1755312393565035.png 20250816/1755312393565035_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:32:57 135 draft +16508 19359 1755312396609453.png 20250816/1755312396609453.png 20250816/1755312396609453_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft +16509 19360 1755312401479871.png 20250816/1755312401479871.png 20250816/1755312401479871_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:41 17 draft +16510 19361 1755312407229190.png 20250816/1755312407229190.png 20250816/1755312407229190_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:29 21 draft +16511 19362 1755312410797310.png 20250816/1755312410797310.png 20250816/1755312410797310_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:08 29 draft +16512 19363 1755312437724619.png 20250816/1755312437724619.png 20250816/1755312437724619_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:59 69 draft +16513 19364 1755312440270419.png 20250816/1755312440270419.png 20250816/1755312440270419_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:29 94 draft +16514 19365 1755312442259884.png 20250816/1755312442259884.png 20250816/1755312442259884_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:18 107 draft +16515 19366 1755312445610363.png 20250816/1755312445610363.png 20250816/1755312445610363_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:32:36 173 draft +16516 19367 1755312448884355.png 20250816/1755312448884355.png 20250816/1755312448884355_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:14 111 draft +16517 19368 1755312451681906.png 20250816/1755312451681906.png 20250816/1755312451681906_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:29 94 draft +16518 19369 1755312453351689.png 20250816/1755312453351689.png 20250816/1755312453351689_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:24 100 draft +16519 19370 1755312456284588.png 20250816/1755312456284588.png 20250816/1755312456284588_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:09 118 draft diff --git a/ai_tags.txt b/ai_tags.txt new file mode 100644 index 0000000..e69de29 diff --git a/check_results.py b/check_results.py new file mode 100644 index 0000000..f70b6fb --- /dev/null +++ b/check_results.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +import json + +with open('d:/标签运营/derive_results.json', 'r', encoding='utf-8') as f: + data = json.load(f) + +failed = [d for d in data if not d.get('success')] +success = [d for d in data if d.get('success')] + +print(f"总数: {len(data)}") +print(f"成功: {len(success)}") +print(f"失败: {len(failed)}") + +if failed: + print("\n失败详情:") + for d in failed: + print(f" ID: {d['image_id']}, 标签: {d['tag_name']}") + print(f" 错误: {d.get('error', '未知')}") + print() diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..827ef73 --- /dev/null +++ b/config/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from .settings import settings, Settings + +__all__ = ['settings', 'Settings'] diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000..c6201ec --- /dev/null +++ b/config/settings.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +""" +配置管理模块 +支持环境变量和默认值,统一管理所有配置项 +""" + +import os +from dataclasses import dataclass, field +from typing import Optional + + +@dataclass +class DatabaseConfig: + """数据库配置""" + host: str = "localhost" + port: int = 3306 + user: str = "root" + password: str = "liang20020523" # 数据库密码 + database: str = "ai_article" + charset: str = "utf8mb4" + pool_size: int = 5 + + +@dataclass +class QwenConfig: + """千问大模型配置""" + api_key: str = "sk-e6a38204022a4b538b8954f0584712af" + vision_model: str = "qwen-vl-max" # 视觉模型 + text_model: str = "qwen-turbo" # 文本模型 + max_retries: int = 3 # 最大重试次数 + retry_delay: float = 1.0 # 重试间隔(秒) + timeout: int = 60 # 超时时间(秒) + + +@dataclass +class TagDeriveConfig: + """标签衍生配置""" + batch_size: int = 3 # 每批处理图片数 + min_derived_tags: int = 5 # 最少衍生标签数 + max_derived_tags: int = 10 # 最多衍生标签数 + max_tag_length: int = 10 # 单个标签最大长度 + max_total_tags: Optional[int] = 8 # 合并后总标签数量上限,None表示不限制 + image_cdn_base: str = "http://images11.bxmkb.cn/Images/" # 图片CDN基础URL + + +@dataclass +class LogConfig: + """日志配置""" + level: str = "INFO" + format: str = "%(asctime)s [%(levelname)s] %(name)s: %(message)s" + file_path: Optional[str] = None # 日志文件路径,None表示只输出到控制台 + + +@dataclass +class Settings: + """全局配置""" + db: DatabaseConfig = field(default_factory=DatabaseConfig) + qwen: QwenConfig = field(default_factory=QwenConfig) + tag_derive: TagDeriveConfig = field(default_factory=TagDeriveConfig) + log: LogConfig = field(default_factory=LogConfig) + + # API服务配置 + api_host: str = "0.0.0.0" + api_port: int = 8000 + debug: bool = False + + @classmethod + def from_env(cls) -> 'Settings': + """从环境变量加载配置,环境变量优先,否则使用默认值""" + settings = cls() + + # 数据库配置 + settings.db.host = os.getenv("DB_HOST", settings.db.host) + settings.db.port = int(os.getenv("DB_PORT", settings.db.port)) + settings.db.user = os.getenv("DB_USER", settings.db.user) + settings.db.password = os.getenv("DB_PASSWORD", settings.db.password) + settings.db.database = os.getenv("DB_DATABASE", settings.db.database) + settings.db.pool_size = int(os.getenv("DB_POOL_SIZE", settings.db.pool_size)) + + # 千问配置 + settings.qwen.api_key = os.getenv("DASHSCOPE_API_KEY", settings.qwen.api_key) + settings.qwen.vision_model = os.getenv("QWEN_VISION_MODEL", settings.qwen.vision_model) + settings.qwen.text_model = os.getenv("QWEN_TEXT_MODEL", settings.qwen.text_model) + settings.qwen.max_retries = int(os.getenv("QWEN_MAX_RETRIES", settings.qwen.max_retries)) + settings.qwen.retry_delay = float(os.getenv("QWEN_RETRY_DELAY", settings.qwen.retry_delay)) + + # 标签衍生配置 + settings.tag_derive.batch_size = int(os.getenv("BATCH_SIZE", settings.tag_derive.batch_size)) + settings.tag_derive.min_derived_tags = int(os.getenv("MIN_DERIVED_TAGS", settings.tag_derive.min_derived_tags)) + settings.tag_derive.max_derived_tags = int(os.getenv("MAX_DERIVED_TAGS", settings.tag_derive.max_derived_tags)) + settings.tag_derive.image_cdn_base = os.getenv("IMAGE_CDN_BASE", settings.tag_derive.image_cdn_base) + max_total = os.getenv("MAX_TOTAL_TAGS") + if max_total: + settings.tag_derive.max_total_tags = int(max_total) + + # 日志配置 + settings.log.level = os.getenv("LOG_LEVEL", settings.log.level) + settings.log.file_path = os.getenv("LOG_FILE_PATH", settings.log.file_path) + + # API配置 + settings.api_host = os.getenv("API_HOST", settings.api_host) + settings.api_port = int(os.getenv("API_PORT", settings.api_port)) + settings.debug = os.getenv("DEBUG", "false").lower() == "true" + + return settings + + +# 全局配置实例 +settings = Settings.from_env() + + +# ============== 便捷访问 ============== +def get_db_config() -> dict: + """获取数据库配置字典""" + return { + "host": settings.db.host, + "port": settings.db.port, + "user": settings.db.user, + "password": settings.db.password, + "database": settings.db.database, + "charset": settings.db.charset, + "collation": "utf8mb4_general_ci", + "autocommit": True, + "pool_name": "ai_article_pool", + "pool_size": settings.db.pool_size + } + + +def get_qwen_api_key() -> str: + """获取千问API Key""" + return settings.qwen.api_key + + +if __name__ == "__main__": + print("=" * 50) + print("配置信息") + print("=" * 50) + print(f"数据库: {settings.db.host}:{settings.db.port}/{settings.db.database}") + print(f"千问模型: {settings.qwen.vision_model}") + print(f"批量大小: {settings.tag_derive.batch_size}") + print(f"日志级别: {settings.log.level}") + print(f"API端口: {settings.api_port}") diff --git a/database_config.py b/database_config.py new file mode 100644 index 0000000..3c6729e --- /dev/null +++ b/database_config.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- +""" +数据库配置管理模块 +统一管理数据库连接和SQL操作 +配置统一从 config/settings.py 读取 +""" + +import mysql.connector +from mysql.connector import pooling +from contextlib import contextmanager +from typing import List, Dict, Any, Optional + +# 导入统一配置 +from config.settings import settings + + +class DatabaseManager: + """数据库管理器 - 单例模式""" + + _instance = None + _pool = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if self._pool is None: + self._init_pool() + + def _init_pool(self): + """初始化连接池,使用统一配置""" + try: + self._pool = pooling.MySQLConnectionPool( + pool_name="ai_article_pool", + pool_size=settings.db.pool_size, + host=settings.db.host, + port=settings.db.port, + user=settings.db.user, + password=settings.db.password, + database=settings.db.database, + charset=settings.db.charset, + collation="utf8mb4_general_ci", + autocommit=True + ) + print(f"[DatabaseManager] 连接池初始化成功,池大小: {settings.db.pool_size}") + except Exception as e: + print(f"[DatabaseManager] 连接池初始化失败: {e}") + raise + + def get_connection(self): + """从连接池获取连接""" + return self._pool.get_connection() + + @contextmanager + def get_cursor(self, dictionary: bool = True): + """获取游标的上下文管理器""" + conn = self.get_connection() + cursor = conn.cursor(dictionary=dictionary) + try: + yield cursor + conn.commit() + except Exception as e: + conn.rollback() + raise e + finally: + cursor.close() + conn.close() + + def execute_query(self, sql: str, params: tuple = None) -> List[Dict[str, Any]]: + """执行查询SQL,返回结果列表""" + with self.get_cursor() as cursor: + cursor.execute(sql, params or ()) + return cursor.fetchall() + + def execute_one(self, sql: str, params: tuple = None) -> Optional[Dict[str, Any]]: + """执行查询SQL,返回单条结果""" + with self.get_cursor() as cursor: + cursor.execute(sql, params or ()) + return cursor.fetchone() + + def execute_update(self, sql: str, params: tuple = None) -> int: + """执行更新SQL,返回影响行数""" + with self.get_cursor() as cursor: + cursor.execute(sql, params or ()) + return cursor.rowcount + + def execute_insert(self, sql: str, params: tuple = None) -> int: + """执行插入SQL,返回插入ID""" + with self.get_cursor() as cursor: + cursor.execute(sql, params or ()) + return cursor.lastrowid + + def execute_many(self, sql: str, params_list: List[tuple]) -> int: + """批量执行SQL,返回影响行数""" + with self.get_cursor() as cursor: + cursor.executemany(sql, params_list) + return cursor.rowcount + + +class ImageTagsDAO: + """ai_image_tags 表数据访问对象""" + + def __init__(self): + self.db = DatabaseManager() + self.table = "ai_image_tags" + + def get_by_id(self, id: int) -> Optional[Dict[str, Any]]: + """根据ID获取记录""" + sql = f"SELECT * FROM {self.table} WHERE id = %s" + return self.db.execute_one(sql, (id,)) + + def get_list(self, limit: int = 10, offset: int = 0, + status: str = None, department_id: int = None) -> List[Dict[str, Any]]: + """获取列表""" + sql = f"SELECT * FROM {self.table} WHERE 1=1" + params = [] + + if status: + sql += " AND status = %s" + params.append(status) + if department_id: + sql += " AND department_id = %s" + params.append(department_id) + + sql += " ORDER BY id DESC LIMIT %s OFFSET %s" + params.extend([limit, offset]) + + return self.db.execute_query(sql, tuple(params)) + + def get_for_tag_derive(self, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]: + """获取用于标签衍生的数据(包含图片URL和标签名)""" + sql = f""" + SELECT id, image_id, image_name, image_url, image_thumb_url, + tag_id, tag_name, keywords_id, keywords_name, + department_id, department_name, status + FROM {self.table} + WHERE image_url != '' AND tag_name != '' + ORDER BY id DESC + LIMIT %s OFFSET %s + """ + return self.db.execute_query(sql, (limit, offset)) + + def get_by_tag_name(self, tag_name: str, limit: int = 100) -> List[Dict[str, Any]]: + """根据标签名获取记录""" + sql = f"SELECT * FROM {self.table} WHERE tag_name = %s LIMIT %s" + return self.db.execute_query(sql, (tag_name, limit)) + + def get_by_department(self, department_id: int, limit: int = 100) -> List[Dict[str, Any]]: + """根据科室获取记录""" + sql = f"SELECT * FROM {self.table} WHERE department_id = %s ORDER BY id DESC LIMIT %s" + return self.db.execute_query(sql, (department_id, limit)) + + def count_by_status(self) -> List[Dict[str, Any]]: + """按状态统计数量""" + sql = f"SELECT status, COUNT(*) as count FROM {self.table} GROUP BY status" + return self.db.execute_query(sql) + + def update_status(self, id: int, status: str) -> int: + """更新状态""" + sql = f"UPDATE {self.table} SET status = %s WHERE id = %s" + return self.db.execute_update(sql, (status, id)) + + def batch_update_status(self, ids: List[int], status: str) -> int: + """批量更新状态""" + if not ids: + return 0 + placeholders = ",".join(["%s"] * len(ids)) + sql = f"UPDATE {self.table} SET status = %s WHERE id IN ({placeholders})" + params = [status] + ids + return self.db.execute_update(sql, tuple(params)) + + def insert(self, data: Dict[str, Any]) -> int: + """插入记录""" + columns = ", ".join(data.keys()) + placeholders = ", ".join(["%s"] * len(data)) + sql = f"INSERT INTO {self.table} ({columns}) VALUES ({placeholders})" + return self.db.execute_insert(sql, tuple(data.values())) + + def batch_insert(self, data_list: List[Dict[str, Any]]) -> int: + """批量插入记录""" + if not data_list: + return 0 + columns = ", ".join(data_list[0].keys()) + placeholders = ", ".join(["%s"] * len(data_list[0])) + sql = f"INSERT INTO {self.table} ({columns}) VALUES ({placeholders})" + params_list = [tuple(d.values()) for d in data_list] + return self.db.execute_many(sql, params_list) + + +# ============== 便捷函数 ============== +def get_db() -> DatabaseManager: + """获取数据库管理器实例""" + return DatabaseManager() + + +def get_image_tags_dao() -> ImageTagsDAO: + """获取 ImageTags DAO 实例""" + return ImageTagsDAO() + + +# ============== 测试代码 ============== +if __name__ == "__main__": + print("=" * 50) + print("数据库配置管理模块测试") + print("=" * 50) + + # 测试数据库连接 + print("\n[1] 测试数据库连接...") + try: + db = get_db() + result = db.execute_one("SELECT 1 as test") + print(f"连接成功: {result}") + except Exception as e: + print(f"连接失败: {e}") + + # 测试 DAO + print("\n[2] 测试 ImageTagsDAO...") + try: + dao = get_image_tags_dao() + + # 获取列表 + items = dao.get_list(limit=3) + print(f"获取到 {len(items)} 条记录") + for item in items: + print(f" - ID: {item['id']}, 标签: {item['tag_name']}") + + # 按状态统计 + stats = dao.count_by_status() + print(f"\n状态统计:") + for stat in stats: + print(f" - {stat['status']}: {stat['count']} 条") + + except Exception as e: + print(f"DAO测试失败: {e}") + + print("\n" + "=" * 50) + print("测试完成") diff --git a/derive_results.json b/derive_results.json new file mode 100644 index 0000000..26fcb10 --- /dev/null +++ b/derive_results.json @@ -0,0 +1,53 @@ +[ + { + "success": true, + "image_id": 16496, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "宫颈炎", + "盆腔炎", + "感染因素", + "个人卫生", + "抗生素治疗", + "抗炎药物", + "预防措施" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#", + "new_tag_id": 12681 + }, + { + "success": true, + "image_id": 16497, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "宫颈炎", + "盆腔炎", + "感染因素", + "个人卫生", + "抗生素治疗", + "抗炎药物", + "预防措施" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#", + "new_tag_id": 12681 + }, + { + "success": true, + "image_id": 16498, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "宫颈炎", + "盆腔炎", + "感染因素", + "个人卫生", + "抗生素治疗", + "抗炎药物", + "预防措施" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#", + "new_tag_id": 12681 + } +] \ No newline at end of file diff --git a/image_tag_derive.py b/image_tag_derive.py new file mode 100644 index 0000000..8014199 --- /dev/null +++ b/image_tag_derive.py @@ -0,0 +1,374 @@ +# -*- coding: utf-8 -*- +""" +千问大模型 - 图片标签衍生生成脚本 +流程:每次批量2-3张图片 -> 大模型返回各自衍生标签 -> 分别更新数据库 +""" + +import os +import json +from http import HTTPStatus +from typing import List, Dict, Optional + +from database_config import get_db +from config.settings import settings +from logger import get_logger, log_info, log_error +from retry_handler import retry + +# 初始化日志 +logger = get_logger("tag_derive") + +try: + import dashscope + from dashscope import MultiModalConversation +except ImportError: + logger.error("请先安装 dashscope: pip install dashscope") + exit(1) + +# 使用配置中心的API Key +dashscope.api_key = settings.qwen.api_key + + +# ============== Prompt模板 ============== +BATCH_DERIVE_PROMPT = """你是一个专业的医疗健康内容标签分析专家。 + +## 任务 +我提供了{image_count}张医疗健康相关图片,每张图片有一个原始标签。请分析每张图片,为每张图片生成衍生标签。 + +## 图片及原始标签 +{image_tags_list} + +## 要求 +1. 分析每张图片内容,结合其原始标签 +2. 为每张图片生成 5-8 个衍生标签 +3. 衍生标签包括:同义词、上位概念、下位概念、相关症状/治疗等 +4. 标签简洁,每个不超过10个字 + +## 输出格式 +请严格以JSON格式输出,按图片顺序返回: +```json +{{ + "results": [ + {{"image_index": 1, "original_tag": "原始标签1", "derived_tags": ["衍生1", "衍生2", "衍生3"]}}, + {{"image_index": 2, "original_tag": "原始标签2", "derived_tags": ["衍生1", "衍生2", "衍生3"]}} + ] +}} +``` + +注意:只输出JSON,不要输出其他内容。results数组长度必须等于图片数量。 +""" + + +class TagsDAO: + def __init__(self): + self.db = get_db() + self.table = "ai_tags" + + def insert(self, tag_name: str, tag_category: str = None, department: str = None) -> int: + sql = f"INSERT INTO {self.table} (tag_name, tag_category, department, status) VALUES (%s, %s, %s, 'active')" + return self.db.execute_insert(sql, (tag_name, tag_category, department)) + + def get_by_tag_name(self, tag_name: str) -> Optional[Dict]: + sql = f"SELECT * FROM {self.table} WHERE tag_name = %s LIMIT 1" + return self.db.execute_one(sql, (tag_name,)) + + def get_or_create(self, tag_name: str, tag_category: str = None, department: str = None) -> int: + existing = self.get_by_tag_name(tag_name) + if existing: + return existing['id'] + return self.insert(tag_name, tag_category, department) + + +def parse_tag_string(tag_str: str) -> List[str]: + """解析标签字符串,支持 #标签1##标签2# 格式和普通字符串""" + if tag_str.startswith('#') and tag_str.endswith('#'): + # 已是 #标签# 格式,拆分出所有标签 + tags = [t for t in tag_str.split('#') if t.strip()] + return tags + return [tag_str] + + +def merge_tags(original_tag: str, derived_tags: List[str], max_total_tags: int = None) -> str: + """ + 合并标签,格式: #原始标签##衍生标签1##衍生标签2# + + Args: + original_tag: 原始标签字符串 + derived_tags: 衍生标签列表 + max_total_tags: 总标签数量上限,None表示不限制 + """ + # 解析原始标签(可能已是 #xx##yy# 格式) + original_tags = parse_tag_string(original_tag) + + # 计算可添加的衍生标签数量 + if max_total_tags is not None: + available_slots = max(0, max_total_tags - len(original_tags)) + derived_tags = derived_tags[:available_slots] + + # 合并并去重,保持顺序 + all_tags = [] + seen = set() + for t in original_tags + derived_tags: + if t and t not in seen: + all_tags.append(t) + seen.add(t) + + return ''.join([f'#{t}#' for t in all_tags]) + + +@retry(max_retries=settings.qwen.max_retries, delay=settings.qwen.retry_delay, backoff=2.0) +def derive_tags_batch(items: List[Dict]) -> Dict: + """ + 批量调用千问大模型,每张图片独立返回衍生标签 + items: [{"id": 1, "image_url": "...", "tag_name": "高血压"}, ...] + 带重试机制 + """ + logger.info(f"[批量处理] {len(items)} 张图片") + + # 构建图片标签列表描述 + image_tags_list = "" + for i, item in enumerate(items): + image_tags_list += f"- 图片{i+1}: 原始标签「{item['tag_name']}」\n" + logger.debug(f" 图片{i+1}: {item['tag_name']} - {item['image_url'][:50]}...") + + prompt = BATCH_DERIVE_PROMPT.format( + image_count=len(items), + image_tags_list=image_tags_list.strip() + ) + + # 构建多图消息 + content = [] + for item in items: + content.append({"image": item['image_url']}) + content.append({"text": prompt}) + + messages = [{"role": "user", "content": content}] + + response = MultiModalConversation.call( + model=settings.qwen.vision_model, + messages=messages + ) + + if response.status_code == HTTPStatus.OK: + result_text = response.output.choices[0].message.content[0]["text"] + + try: + json_start = result_text.find('{') + json_end = result_text.rfind('}') + 1 + if json_start != -1 and json_end > json_start: + json_str = result_text[json_start:json_end] + result_json = json.loads(json_str) + results = result_json.get('results', []) + return {"success": True, "results": results} + except json.JSONDecodeError as e: + logger.error(f" JSON解析失败: {e}") + + return {"success": False, "error": "JSON解析失败"} + else: + error_msg = f"{response.code}-{response.message}" + logger.error(f" API调用失败: {error_msg}") + raise Exception(error_msg) # 抛出异常触发重试 + + +def process_batch(items: List[Dict], tags_dao: TagsDAO) -> List[Dict]: + """ + 处理一批图片 + """ + # 1. 批量调用大模型 + try: + result = derive_tags_batch(items) + except Exception as e: + logger.error(f"批量处理失败: {e}") + return [{"success": False, "image_id": item['id'], "error": str(e)} for item in items] + + if not result.get('success'): + return [{"success": False, "image_id": item['id'], "error": result.get('error')} for item in items] + + api_results = result.get('results', []) + db = get_db() + process_results = [] + + # 2. 逐个匹配并更新 + for i, item in enumerate(items): + # 查找对应的衍生结果 + derived_tags = [] + for r in api_results: + if r.get('image_index') == i + 1 or r.get('original_tag') == item['tag_name']: + derived_tags = r.get('derived_tags', []) + break + + if not derived_tags and i < len(api_results): + derived_tags = api_results[i].get('derived_tags', []) + + if not derived_tags: + process_results.append({"success": False, "image_id": item['id'], "error": "未找到衍生标签"}) + continue + + logger.info(f" [{item['tag_name']}] 衍生: {derived_tags}") + + # 合并标签(限制总标签数量) + max_total = getattr(settings.tag_derive, 'max_total_tags', None) + merged_tag_name = merge_tags(item['tag_name'], derived_tags, max_total_tags=max_total) + + # 插入ai_tags + try: + new_tag_id = tags_dao.get_or_create(merged_tag_name, '衍生标签', item.get('department_name', '')) + except Exception as e: + process_results.append({"success": False, "image_id": item['id'], "error": str(e)}) + continue + + # 更新ai_image_tags + try: + sql = "UPDATE ai_image_tags SET tag_id = %s, tag_name = %s WHERE id = %s" + db.execute_update(sql, (new_tag_id, merged_tag_name, item['id'])) + process_results.append({ + "success": True, + "image_id": item['id'], + "original_tag": item['tag_name'], + "derived_tags": derived_tags, + "merged_tag": merged_tag_name, + "new_tag_id": new_tag_id + }) + logger.info(f" ✓ ID:{item['id']} -> tag_id:{new_tag_id}") + except Exception as e: + process_results.append({"success": False, "image_id": item['id'], "error": str(e)}) + + return process_results + + +def batch_derive_tags(batch_size: int = None, start_id: int = None, end_id: int = None, ids: List[int] = None) -> List[Dict]: + """ + 分批处理,每批2-3张图片 + + Args: + batch_size: 每批处理的图片数量 + start_id: 起始ID,从该ID开始处理(用于断点续传) + end_id: 结束ID,处理到该ID为止 + ids: 指定ID列表,只处理这些ID + """ + if batch_size is None: + batch_size = settings.tag_derive.batch_size + + tags_dao = TagsDAO() + db = get_db() + + # 查询需要处理的记录 + if ids: + # 按指定ID查询(同样检查是否已有衍生标签) + placeholders = ','.join(['%s'] * len(ids)) + sql = f""" + SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name + FROM ai_image_tags it + LEFT JOIN ai_tags t ON it.tag_id = t.id + WHERE it.id IN ({placeholders}) + AND it.image_thumb_url != '' AND it.tag_name != '' + AND (t.tag_category IS NULL OR t.tag_category != '衍生标签') + ORDER BY it.id + """ + items = db.execute_query(sql, ids) + else: + # 按条件查询 + sql = """ + SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name + FROM ai_image_tags it + LEFT JOIN ai_tags t ON it.tag_id = t.id + WHERE it.image_thumb_url != '' AND it.tag_name != '' + AND (t.tag_category IS NULL OR t.tag_category != '衍生标签') + """ + + params = [] + if start_id is not None: + sql += " AND it.id >= %s" + params.append(start_id) + if end_id is not None: + sql += " AND it.id <= %s" + params.append(end_id) + + sql += " ORDER BY it.id" + + items = db.execute_query(sql, params) if params else db.execute_query(sql) + + if not items: + logger.info("没有需要处理的数据") + return [] + + # 拼接完整图片URL + for item in items: + if item.get('image_thumb_url'): + item['image_url'] = settings.tag_derive.image_cdn_base + item['image_thumb_url'] + else: + item['image_url'] = '' + + total = len(items) + logger.info(f"待处理: {total} 条,每批 {batch_size} 张") + + all_results = [] + + # 分批处理 + for i in range(0, total, batch_size): + batch = items[i:i+batch_size] + batch_num = i // batch_size + 1 + logger.info(f"{'='*60}") + logger.info(f"批次 {batch_num}/{(total + batch_size - 1) // batch_size}") + + results = process_batch(batch, tags_dao) + all_results.extend(results) + + success = sum(1 for r in results if r.get('success')) + logger.info(f" 批次完成: {success}/{len(batch)}") + + success_count = sum(1 for r in all_results if r.get('success')) + logger.info(f"{'='*60}") + logger.info(f"全部完成: 成功 {success_count}/{len(all_results)} 条") + + return all_results + + +def print_summary(results: List[Dict]): + logger.info("=" * 60) + logger.info("处理结果摘要") + logger.info("=" * 60) + + success_count = sum(1 for r in results if r.get('success')) + logger.info(f"总数: {len(results)}, 成功: {success_count}, 失败: {len(results) - success_count}") + + logger.info("详细结果:") + for r in results: + if r.get('success'): + logger.info(f" [ID:{r['image_id']}] {r['original_tag']} -> {r['merged_tag'][:40]}...") + else: + logger.warning(f" [ID:{r.get('image_id')}] 失败: {r.get('error')}") + + +def main(): + import argparse + + parser = argparse.ArgumentParser(description='千问视觉大模型 - 图片标签衍生生成器') + parser.add_argument('--start-id', type=int, default=None, help='起始ID,从该ID开始处理(用于断点续传)') + parser.add_argument('--end-id', type=int, default=None, help='结束ID,处理到该ID为止') + parser.add_argument('--batch-size', type=int, default=None, help='每批处理的图片数量') + parser.add_argument('--id', type=int, nargs='+', default=None, help='指定ID,只处理这些ID(可指定多个)') + args = parser.parse_args() + + logger.info("=" * 60) + logger.info("千问视觉大模型 - 图片标签衍生生成器") + logger.info(f"模式: 每批{args.batch_size or settings.tag_derive.batch_size}张图片,各自返回衍生标签") + if args.id: + logger.info(f"指定ID: {args.id}") + elif args.start_id or args.end_id: + id_range = f"{args.start_id or '起始'} ~ {args.end_id or '结束'}" + logger.info(f"ID范围: {id_range}") + logger.info("=" * 60) + + results = batch_derive_tags(batch_size=args.batch_size, start_id=args.start_id, end_id=args.end_id, ids=args.id) + + if results: + print_summary(results) + + output_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "derive_results.json") + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + logger.info(f"结果已保存到: {output_file}") + + +if __name__ == "__main__": + main() diff --git a/logger.py b/logger.py new file mode 100644 index 0000000..f6e67f1 --- /dev/null +++ b/logger.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +""" +日志管理模块 +统一管理日志配置和输出 +""" + +import os +import logging +import sys +from datetime import datetime +from typing import Optional + +# 日志目录 +LOG_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs") + + +def setup_logger( + name: str = "tag_derive", + level: str = "INFO", + log_file: Optional[str] = None, + console: bool = True +) -> logging.Logger: + """ + 设置并返回logger + + Args: + name: logger名称 + level: 日志级别 (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_file: 日志文件路径,None则不写入文件 + console: 是否输出到控制台 + """ + logger = logging.getLogger(name) + + # 避免重复添加handler + if logger.handlers: + return logger + + logger.setLevel(getattr(logging, level.upper(), logging.INFO)) + + # 日志格式 + formatter = logging.Formatter( + fmt="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + + # 控制台输出 + if console: + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # 文件输出 + if log_file: + # 确保日志目录存在 + os.makedirs(os.path.dirname(log_file), exist_ok=True) + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger + + +def get_default_log_file() -> str: + """获取默认日志文件路径(按日期)""" + os.makedirs(LOG_DIR, exist_ok=True) + date_str = datetime.now().strftime("%Y%m%d") + return os.path.join(LOG_DIR, f"tag_derive_{date_str}.log") + + +# 默认logger实例 +_default_logger = None + + +def get_logger(name: str = "tag_derive") -> logging.Logger: + """获取logger实例""" + global _default_logger + if _default_logger is None: + _default_logger = setup_logger( + name=name, + level="INFO", + log_file=get_default_log_file(), + console=True + ) + return _default_logger + + +class LogMixin: + """日志混入类,为类提供日志能力""" + + @property + def logger(self) -> logging.Logger: + if not hasattr(self, '_logger'): + self._logger = get_logger(self.__class__.__name__) + return self._logger + + +# ============== 便捷函数 ============== +def log_info(msg: str, *args): + get_logger().info(msg, *args) + +def log_error(msg: str, *args): + get_logger().error(msg, *args) + +def log_warning(msg: str, *args): + get_logger().warning(msg, *args) + +def log_debug(msg: str, *args): + get_logger().debug(msg, *args) + + +if __name__ == "__main__": + # 测试日志 + logger = get_logger() + logger.info("日志系统初始化成功") + logger.debug("这是DEBUG日志") + logger.warning("这是WARNING日志") + logger.error("这是ERROR日志") + print(f"日志文件: {get_default_log_file()}") diff --git a/promt/qwen_tag_derive_prompt.py b/promt/qwen_tag_derive_prompt.py new file mode 100644 index 0000000..5f907c2 --- /dev/null +++ b/promt/qwen_tag_derive_prompt.py @@ -0,0 +1,261 @@ +# -*- coding: utf-8 -*- +""" +千问大模型 - 图片标签衍生生成器 +从 ai_image_tags 表获取图片和标签,调用千问大模型生成标签衍生 +配置统一从 config/settings.py 读取 +""" + +import os +import sys +import json +from http import HTTPStatus + +# 添加项目根目录到路径 +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# 导入统一配置 +from config.settings import settings +from database_config import get_image_tags_dao, ImageTagsDAO + +# 尝试导入dashscope,如果没有安装会提示 +try: + import dashscope + from dashscope import MultiModalConversation +except ImportError: + print("请先安装 dashscope: pip install dashscope") + exit(1) + +# ============== 提示词模板 ============== +TAG_DERIVE_PROMPT_TEMPLATE = """你是一个专业的医疗健康内容标签分析专家。 + +## 任务 +请根据提供的图片和当前标签,生成相关的衍生标签。 + +## 当前标签 +{tag_name} + +## 要求 +1. 根据图片内容和当前标签,生成 5-10 个相关的衍生标签 +2. 衍生标签应该包括: + - 同义词/近义词标签 + - 上位概念标签(更宽泛的分类) + - 下位概念标签(更具体的细分) + - 相关联想标签(与主题相关但角度不同) + - 应用场景标签(使用场景或人群) +3. 标签要简洁,每个标签不超过10个字 +4. 标签要与医疗健康领域相关 + +## 输出格式 +请以JSON格式输出,包含以下字段: +```json +{{ + "original_tag": "原始标签", + "derived_tags": [ + {{"tag": "衍生标签1", "type": "同义词", "relevance": "高"}}, + {{"tag": "衍生标签2", "type": "上位概念", "relevance": "高"}}, + ... + ], + "tag_description": "对原始标签的简要描述", + "suggested_keywords": ["关键词1", "关键词2", ...] +}} +``` +""" + +# 纯文本模式的提示词(不使用图片) +TAG_DERIVE_TEXT_PROMPT_TEMPLATE = """你是一个专业的医疗健康内容标签分析专家。 + +## 任务 +请根据提供的标签,生成相关的衍生标签。 + +## 当前标签 +{tag_name} + +## 图片信息 +图片名称:{image_name} + +## 要求 +1. 根据当前标签,生成 5-10 个相关的衍生标签 +2. 衍生标签应该包括: + - 同义词/近义词标签 + - 上位概念标签(更宽泛的分类) + - 下位概念标签(更具体的细分) + - 相关联想标签(与主题相关但角度不同) + - 应用场景标签(使用场景或人群) +3. 标签要简洁,每个标签不超过10个字 +4. 标签要与医疗健康领域相关 + +## 输出格式 +请以JSON格式输出,包含以下字段: +```json +{{ + "original_tag": "原始标签", + "derived_tags": [ + {{"tag": "衍生标签1", "type": "同义词", "relevance": "高"}}, + {{"tag": "衍生标签2", "type": "上位概念", "relevance": "高"}}, + ... + ], + "tag_description": "对原始标签的简要描述", + "suggested_keywords": ["关键词1", "关键词2", ...] +}} +``` +""" + + +class QwenTagDeriver: + """千问标签衍生生成器""" + + def __init__(self, api_key: str = None): + self.api_key = api_key or settings.qwen.api_key + dashscope.api_key = self.api_key + self.dao = get_image_tags_dao() # 使用统一的数据库配置 + + def get_image_tags_from_db(self, limit: int = 10, offset: int = 0) -> list: + """从数据库获取图片标签数据""" + return self.dao.get_for_tag_derive(limit=limit, offset=offset) + + def generate_prompt(self, tag_name: str, image_name: str = "", use_image: bool = False) -> str: + """生成提示词""" + if use_image: + return TAG_DERIVE_PROMPT_TEMPLATE.format(tag_name=tag_name) + else: + return TAG_DERIVE_TEXT_PROMPT_TEMPLATE.format( + tag_name=tag_name, + image_name=image_name + ) + + def call_qwen_with_image(self, image_url: str, tag_name: str) -> dict: + """调用千问多模态模型(带图片)""" + prompt = self.generate_prompt(tag_name, use_image=True) + + messages = [ + { + "role": "user", + "content": [ + {"image": image_url}, + {"text": prompt} + ] + } + ] + + response = MultiModalConversation.call( + model=settings.qwen.vision_model, # 千问视觉大模型 + messages=messages + ) + + if response.status_code == HTTPStatus.OK: + return { + "success": True, + "result": response.output.choices[0].message.content[0]["text"] + } + else: + return { + "success": False, + "error": f"Error: {response.code} - {response.message}" + } + + def call_qwen_text_only(self, tag_name: str, image_name: str = "") -> dict: + """调用千问文本模型(不带图片)""" + from dashscope import Generation + + prompt = self.generate_prompt(tag_name, image_name, use_image=False) + + response = Generation.call( + model=settings.qwen.text_model, # 使用配置中的文本模型 + prompt=prompt, + result_format="message" + ) + + if response.status_code == HTTPStatus.OK: + return { + "success": True, + "result": response.output.choices[0].message.content + } + else: + return { + "success": False, + "error": f"Error: {response.code} - {response.message}" + } + + def derive_tags_for_image(self, image_data: dict, use_image: bool = False) -> dict: + """为单个图片生成衍生标签""" + tag_name = image_data.get("tag_name", "") + image_url = image_data.get("image_url", "") + image_name = image_data.get("image_name", "") + + print(f"\n处理标签: {tag_name}") + print(f"图片URL: {image_url[:50]}..." if len(image_url) > 50 else f"图片URL: {image_url}") + + if use_image and image_url: + result = self.call_qwen_with_image(image_url, tag_name) + else: + result = self.call_qwen_text_only(tag_name, image_name) + + return { + "image_id": image_data.get("image_id"), + "tag_id": image_data.get("tag_id"), + "original_tag": tag_name, + "image_url": image_url, + "derive_result": result + } + + def batch_derive_tags(self, limit: int = 5, use_image: bool = False) -> list: + """批量生成衍生标签""" + image_tags = self.get_image_tags_from_db(limit=limit) + results = [] + + for item in image_tags: + result = self.derive_tags_for_image(item, use_image) + results.append(result) + + return results + + +def main(): + """主函数 - 演示用法""" + print("=" * 60) + print("千问大模型 - 图片标签衍生生成器") + print("=" * 60) + + # 初始化 + deriver = QwenTagDeriver() + + # 1. 从数据库获取数据示例 + print("\n[1] 从数据库获取图片标签数据...") + try: + image_tags = deriver.get_image_tags_from_db(limit=3) + if image_tags: + print(f"获取到 {len(image_tags)} 条数据:") + for item in image_tags: + print(f" - ID: {item['id']}, 标签: {item['tag_name']}") + else: + print("数据库中暂无数据") + except Exception as e: + print(f"数据库连接失败: {e}") + image_tags = [] + + # 2. 生成提示词示例 + print("\n[2] 生成提示词示例:") + sample_tag = "高血压" + sample_prompt = deriver.generate_prompt(sample_tag, "blood_pressure.jpg") + print("-" * 40) + print(sample_prompt[:500] + "..." if len(sample_prompt) > 500 else sample_prompt) + print("-" * 40) + + # 3. 调用千问API(需要有效的API Key) + print("\n[3] 调用千问API生成衍生标签...") + if not settings.qwen.api_key or settings.qwen.api_key == "your-api-key-here": + print("请先设置有效的 DASHSCOPE_API_KEY") + print("可以通过环境变量设置: export DASHSCOPE_API_KEY=your-key") + print("或修改 config/settings.py 中的配置") + else: + # 使用文本模式调用 + result = deriver.call_qwen_text_only(sample_tag, "示例图片") + if result["success"]: + print("生成结果:") + print(result["result"]) + else: + print(f"调用失败: {result['error']}") + + +if __name__ == "__main__": + main() diff --git a/query_tags.py b/query_tags.py new file mode 100644 index 0000000..decc9bd --- /dev/null +++ b/query_tags.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +"""查询所有带标签字段的数据""" + +from database_config import get_db + +db = get_db() + +# 查询所有带标签相关字段的数据 +sql = """ +SELECT id, image_id, image_name, + tag_id, tag_name, + default_tag_id, default_tag_name, + keywords_id, keywords_name, + department_id, department_name, + status +FROM ai_image_tags +ORDER BY id +""" + +results = db.execute_query(sql) + +print(f"{'=' * 120}") +print(f"ai_image_tags 表中共有 {len(results)} 条数据") +print(f"{'=' * 120}") + +# 表头 +print(f"{'ID':<6} {'图片ID':<8} {'标签名':<15} {'初始标签名':<15} {'关键词':<12} {'科室':<10} {'状态':<10}") +print(f"{'-' * 120}") + +for r in results: + print(f"{r['id']:<6} {r['image_id']:<8} {r['tag_name']:<15} {r['default_tag_name']:<15} {r['keywords_name']:<12} {r['department_name']:<10} {r['status']:<10}") + +print(f"{'=' * 120}") +print(f"总计: {len(results)} 条记录") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8d32bf2 Binary files /dev/null and b/requirements.txt differ diff --git a/reset_image_tags_data.py b/reset_image_tags_data.py new file mode 100644 index 0000000..042f911 --- /dev/null +++ b/reset_image_tags_data.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +""" +重置 ai_image_tags 和 ai_tags 表数据 +从 ai_image_tags.txt (Tab分隔格式) 导入数据 +""" + +import os +from database_config import get_db +from logger import get_logger + +logger = get_logger("reset_data") + + +def parse_tsv_file(file_path: str) -> list: + """ + 解析 Tab 分隔的 ai_image_tags.txt 文件 + """ + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + records = [] + header = None + + for line in lines: + line = line.strip() + if not line: + continue + + # 跳过头信息行 + if line.startswith('8.149') or line.startswith('http://') or '正在显示' in line or line.startswith('SELECT'): + continue + + # 解析表头 + if line.startswith('id\t'): + header = line.split('\t') + logger.info(f"解析到表头: {len(header)} 列") + continue + + # 解析数据行 + if header and line[0].isdigit(): + fields = line.split('\t') + if len(fields) >= 19: # 至少要有19列 + record = { + 'id': int(fields[0]) if fields[0] else 0, + 'image_id': int(fields[1]) if fields[1] else 0, + 'image_name': fields[2], + 'image_url': fields[3], + 'image_thumb_url': fields[4], + 'tag_id': int(fields[5]) if fields[5] else 0, + 'tag_name': fields[6], + 'default_tag_id': int(fields[7]) if fields[7] else 0, + 'default_tag_name': fields[8], + 'keywords_id': int(fields[9]) if fields[9] else 0, + 'keywords_name': fields[10], + 'department_id': int(fields[11]) if fields[11] else 0, + 'department_name': fields[12], + 'image_source': int(fields[13]) if fields[13] else 1, + 'created_user_id': int(fields[14]) if fields[14] else 0, + 'created_at': fields[15] if fields[15] else None, + 'updated_at': fields[16] if fields[16] else None, + 'image_attached_article_count': int(fields[17]) if fields[17] else 0, + 'status': fields[18] if fields[18] else 'draft', + 'blocking_reason': fields[19] if len(fields) > 19 else '' + } + records.append(record) + + return records + + +def reset_tables_and_import(file_path: str): + """ + 清空表并导入数据 + """ + db = get_db() + + # 解析文件 + logger.info(f"正在解析文件: {file_path}") + records = parse_tsv_file(file_path) + logger.info(f"共解析 {len(records)} 条记录") + + if not records: + logger.error("没有解析到有效数据,操作取消") + return + + # 收集所有 tag + tag_map = {} + for r in records: + if r['tag_id'] and r['tag_name']: + tag_map[r['tag_id']] = r['tag_name'] + + # 确认操作 + print(f"\n即将执行以下操作:") + print(f"1. 清空 ai_image_tags 表") + print(f"2. 清空 ai_tags 表") + print(f"3. 导入 {len(tag_map)} 条 ai_tags 记录") + print(f"4. 导入 {len(records)} 条 ai_image_tags 记录") + print(f"\n注意: 此操作不可逆!") + + confirm = input("\n确认执行? (输入 yes 继续): ") + if confirm.lower() != 'yes': + logger.info("操作已取消") + return + + try: + # 清空表(先删子表,再删父表) + logger.info("清空 ai_image_tags 表...") + db.execute_update("DELETE FROM ai_image_tags") + logger.info("清空 ai_tags 表...") + db.execute_update("DELETE FROM ai_tags") + + # 重置自增ID + db.execute_update("ALTER TABLE ai_image_tags AUTO_INCREMENT = 1") + db.execute_update("ALTER TABLE ai_tags AUTO_INCREMENT = 1") + + # 插入 ai_tags + logger.info(f"插入 ai_tags...") + for tag_id, tag_name in tag_map.items(): + sql = "INSERT INTO ai_tags (id, tag_name, status) VALUES (%s, %s, 'active')" + try: + db.execute_insert(sql, (tag_id, tag_name)) + except Exception as e: + logger.warning(f"插入 tag {tag_id} 失败: {e}") + + # 插入 ai_image_tags + logger.info(f"插入 ai_image_tags...") + success_count = 0 + for r in records: + sql = """ + INSERT INTO ai_image_tags + (id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name, + default_tag_id, default_tag_name, keywords_id, keywords_name, + department_id, department_name, image_source, created_user_id, + created_at, updated_at, image_attached_article_count, status, blocking_reason) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + """ + params = ( + r['id'], r['image_id'], r['image_name'], r['image_url'], r['image_thumb_url'], + r['tag_id'], r['tag_name'], r['default_tag_id'], r['default_tag_name'], + r['keywords_id'], r['keywords_name'], r['department_id'], r['department_name'], + r['image_source'], r['created_user_id'], r['created_at'], r['updated_at'], + r['image_attached_article_count'], r['status'], r['blocking_reason'] + ) + try: + db.execute_insert(sql, params) + success_count += 1 + except Exception as e: + logger.error(f"插入记录 {r['id']} 失败: {e}") + + # 恢复自增ID到最大值 + db.execute_update("ALTER TABLE ai_image_tags AUTO_INCREMENT = 1") + db.execute_update("ALTER TABLE ai_tags AUTO_INCREMENT = 1") + + logger.info(f"导入完成: 成功 {success_count}/{len(records)} 条") + + except Exception as e: + logger.error(f"操作失败: {e}") + raise + + +if __name__ == "__main__": + file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ai_image_tags.txt") + reset_tables_and_import(file_path) diff --git a/reset_tags.py b/reset_tags.py new file mode 100644 index 0000000..10e8134 --- /dev/null +++ b/reset_tags.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +""" +重置标签数据脚本 +1. 把 ai_image_tags 的 tag_id/tag_name 恢复为 default_tag_id/default_tag_name +2. 删除 ai_tags 表中的衍生标签 +""" + +from database_config import get_db + + +def reset_database(): + db = get_db() + + # 1. 先查看当前状态 + print("=" * 60) + print("当前数据状态") + print("=" * 60) + + # 查看衍生标签数量 + derived_count = db.execute_one( + "SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'" + ) + print(f"ai_tags 表中的衍生标签数量: {derived_count['cnt']}") + + # 查看需要恢复的图片数量 + need_reset = db.execute_one(""" + SELECT COUNT(*) as cnt FROM ai_image_tags + WHERE default_tag_id > 0 + AND (tag_id != default_tag_id OR tag_name != default_tag_name) + """) + print(f"需要恢复到初始标签的图片数量: {need_reset['cnt']}") + + print("\n" + "=" * 60) + print("开始执行清理") + print("=" * 60) + + # 2. 把 ai_image_tags 的标签恢复为初始标签 + print("\n[步骤1] 恢复 ai_image_tags 到初始标签...") + affected = db.execute_update(""" + UPDATE ai_image_tags + SET tag_id = default_tag_id, + tag_name = default_tag_name + WHERE default_tag_id > 0 + """) + print(f" 已更新 {affected} 条记录") + + # 3. 删除衍生标签 + print("\n[步骤2] 删除 ai_tags 表中的衍生标签...") + deleted = db.execute_update( + "DELETE FROM ai_tags WHERE tag_category = '衍生标签'" + ) + print(f" 已删除 {deleted} 条衍生标签") + + # 4. 验证结果 + print("\n" + "=" * 60) + print("清理完成,验证结果") + print("=" * 60) + + remaining_derived = db.execute_one( + "SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'" + ) + print(f"剩余衍生标签数量: {remaining_derived['cnt']}") + + sample = db.execute_query(""" + SELECT id, image_id, tag_name, default_tag_name + FROM ai_image_tags + WHERE default_tag_id > 0 + LIMIT 5 + """) + print("\n前5条图片标签示例:") + for row in sample: + print(f" ID:{row['id']} | tag_name: {row['tag_name']} | default_tag_name: {row['default_tag_name']}") + + +if __name__ == "__main__": + confirm = input("确认执行数据库清理?这将:\n1. 把所有图片标签恢复为初始标签\n2. 删除所有衍生标签\n输入 'yes' 确认: ") + if confirm.lower() == 'yes': + reset_database() + print("\n✓ 数据库已重置到干净状态") + else: + print("已取消操作") diff --git a/retry_handler.py b/retry_handler.py new file mode 100644 index 0000000..2b76216 --- /dev/null +++ b/retry_handler.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +""" +重试机制模块 +提供带重试功能的装饰器和工具函数 +""" + +import time +import functools +from typing import Callable, Type, Tuple, Optional, Any +from logger import get_logger + +logger = get_logger("retry") + + +def retry( + max_retries: int = 3, + delay: float = 1.0, + backoff: float = 2.0, + exceptions: Tuple[Type[Exception], ...] = (Exception,), + on_retry: Optional[Callable[[Exception, int], None]] = None +): + """ + 重试装饰器 + + Args: + max_retries: 最大重试次数 + delay: 初始延迟时间(秒) + backoff: 延迟退避倍数 + exceptions: 需要重试的异常类型 + on_retry: 重试时的回调函数 (exception, attempt) -> None + + Example: + @retry(max_retries=3, delay=1.0, exceptions=(ConnectionError,)) + def call_api(): + ... + """ + def decorator(func: Callable) -> Callable: + @functools.wraps(func) + def wrapper(*args, **kwargs) -> Any: + current_delay = delay + last_exception = None + + for attempt in range(max_retries + 1): + try: + return func(*args, **kwargs) + except exceptions as e: + last_exception = e + + if attempt < max_retries: + logger.warning( + f"[{func.__name__}] 第{attempt + 1}次调用失败: {e}, " + f"{current_delay:.1f}秒后重试..." + ) + + if on_retry: + on_retry(e, attempt + 1) + + time.sleep(current_delay) + current_delay *= backoff + else: + logger.error( + f"[{func.__name__}] 已达最大重试次数({max_retries}), 最后错误: {e}" + ) + + raise last_exception + + return wrapper + return decorator + + +class RetryHandler: + """重试处理器类""" + + def __init__( + self, + max_retries: int = 3, + delay: float = 1.0, + backoff: float = 2.0 + ): + self.max_retries = max_retries + self.delay = delay + self.backoff = backoff + self.attempt = 0 + self.last_error = None + + def should_retry(self, error: Exception) -> bool: + """判断是否应该重试""" + self.last_error = error + self.attempt += 1 + + if self.attempt <= self.max_retries: + wait_time = self.delay * (self.backoff ** (self.attempt - 1)) + logger.warning(f"第{self.attempt}次重试, 等待{wait_time:.1f}秒...") + time.sleep(wait_time) + return True + + return False + + def reset(self): + """重置重试计数""" + self.attempt = 0 + self.last_error = None + + def execute(self, func: Callable, *args, **kwargs) -> Any: + """执行带重试的函数""" + self.reset() + + while True: + try: + return func(*args, **kwargs) + except Exception as e: + if not self.should_retry(e): + raise + + +def retry_call( + func: Callable, + args: tuple = (), + kwargs: dict = None, + max_retries: int = 3, + delay: float = 1.0, + backoff: float = 2.0, + exceptions: Tuple[Type[Exception], ...] = (Exception,) +) -> Any: + """ + 带重试的函数调用 + + Args: + func: 要调用的函数 + args: 位置参数 + kwargs: 关键字参数 + max_retries: 最大重试次数 + delay: 初始延迟 + backoff: 退避倍数 + exceptions: 需要重试的异常 + + Returns: + 函数返回值 + """ + kwargs = kwargs or {} + current_delay = delay + last_exception = None + + for attempt in range(max_retries + 1): + try: + return func(*args, **kwargs) + except exceptions as e: + last_exception = e + + if attempt < max_retries: + logger.warning( + f"调用失败(尝试 {attempt + 1}/{max_retries + 1}): {e}" + ) + time.sleep(current_delay) + current_delay *= backoff + + raise last_exception + + +if __name__ == "__main__": + # 测试重试装饰器 + call_count = 0 + + @retry(max_retries=3, delay=0.5, exceptions=(ValueError,)) + def test_func(): + global call_count + call_count += 1 + if call_count < 3: + raise ValueError(f"模拟失败 {call_count}") + return "成功" + + try: + result = test_func() + print(f"结果: {result}, 调用次数: {call_count}") + except ValueError as e: + print(f"最终失败: {e}") diff --git a/tag_derive_api.py b/tag_derive_api.py new file mode 100644 index 0000000..265ac2b --- /dev/null +++ b/tag_derive_api.py @@ -0,0 +1,498 @@ +# -*- coding: utf-8 -*- +""" +标签衍生 API 服务 +基于 FastAPI 封装标签衍生功能,提供 RESTful API +""" + +import os +import json +import time +from http import HTTPStatus +from typing import List, Dict, Optional, Any +from datetime import datetime +from contextlib import asynccontextmanager + +from fastapi import FastAPI, HTTPException, BackgroundTasks, Query +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field + +# 导入项目模块 +from config.settings import settings +from database_config import get_db, ImageTagsDAO +from logger import get_logger, log_info, log_error +from retry_handler import retry + +# 初始化 +logger = get_logger("api") + +try: + import dashscope + from dashscope import MultiModalConversation + dashscope.api_key = settings.qwen.api_key +except ImportError: + logger.error("请先安装 dashscope: pip install dashscope") + raise + + +# ============== Pydantic 模型 ============== +class TagDeriveRequest(BaseModel): + """单张图片标签衍生请求""" + image_url: str = Field(..., description="图片URL") + tag_name: str = Field(..., description="原始标签名") + department: Optional[str] = Field(None, description="科室") + + +class BatchDeriveRequest(BaseModel): + """批量标签衍生请求""" + items: List[TagDeriveRequest] = Field(..., description="图片列表", max_length=5) + + +class TagDeriveResponse(BaseModel): + """标签衍生响应""" + success: bool + original_tag: str + derived_tags: List[str] = [] + merged_tag: Optional[str] = None + error: Optional[str] = None + + +class BatchDeriveResponse(BaseModel): + """批量衍生响应""" + success: bool + total: int + success_count: int + failed_count: int + results: List[TagDeriveResponse] + + +class TaskStatusResponse(BaseModel): + """任务状态响应""" + task_id: str + status: str # pending, running, completed, failed + progress: int # 0-100 + total: int + processed: int + success_count: int + failed_count: int + started_at: Optional[str] = None + completed_at: Optional[str] = None + + +class StatsResponse(BaseModel): + """统计信息响应""" + total_images: int + processed_images: int + pending_images: int + derived_tags_count: int + + +# ============== 提示词模板 ============== +DERIVE_PROMPT = """你是一个专业的医疗健康内容标签分析专家。 + +## 任务 +我提供了{image_count}张医疗健康相关图片,每张图片有一个原始标签。请分析每张图片,为每张图片生成衍生标签。 + +## 图片及原始标签 +{image_tags_list} + +## 要求 +1. 分析每张图片内容,结合其原始标签 +2. 为每张图片生成 {min_tags}-{max_tags} 个衍生标签 +3. 衍生标签包括:同义词、上位概念、下位概念、相关症状/治疗等 +4. 标签简洁,每个不超过{max_tag_length}个字 + +## 输出格式 +请严格以JSON格式输出,按图片顺序返回: +```json +{{ + "results": [ + {{"image_index": 1, "original_tag": "原始标签1", "derived_tags": ["衍生1", "衍生2", "衍生3"]}}, + {{"image_index": 2, "original_tag": "原始标签2", "derived_tags": ["衍生1", "衍生2", "衍生3"]}} + ] +}} +``` + +注意:只输出JSON,不要输出其他内容。results数组长度必须等于图片数量。 +""" + + +# ============== 核心服务类 ============== +class TagDeriveService: + """标签衍生服务""" + + def __init__(self): + self.db = get_db() + self.dao = ImageTagsDAO() + self.config = settings.tag_derive + + @retry(max_retries=3, delay=1.0, backoff=2.0) + def call_qwen_batch(self, items: List[Dict]) -> Dict: + """ + 调用千问大模型批量处理 + 带重试机制 + """ + # 构建图片标签列表描述 + image_tags_list = "" + for i, item in enumerate(items): + image_tags_list += f"- 图片{i+1}: 原始标签「{item['tag_name']}」\n" + + prompt = DERIVE_PROMPT.format( + image_count=len(items), + image_tags_list=image_tags_list.strip(), + min_tags=self.config.min_derived_tags, + max_tags=self.config.max_derived_tags, + max_tag_length=self.config.max_tag_length + ) + + # 构建多图消息 + content = [] + for item in items: + content.append({"image": item['image_url']}) + content.append({"text": prompt}) + + messages = [{"role": "user", "content": content}] + + response = MultiModalConversation.call( + model=settings.qwen.vision_model, + messages=messages + ) + + if response.status_code == HTTPStatus.OK: + result_text = response.output.choices[0].message.content[0]["text"] + + # 解析JSON + json_start = result_text.find('{') + json_end = result_text.rfind('}') + 1 + if json_start != -1 and json_end > json_start: + json_str = result_text[json_start:json_end] + result_json = json.loads(json_str) + return {"success": True, "results": result_json.get('results', [])} + + return {"success": False, "error": "JSON解析失败", "raw": result_text} + else: + return {"success": False, "error": f"{response.code}-{response.message}"} + + def derive_tags(self, items: List[Dict]) -> List[TagDeriveResponse]: + """处理标签衍生""" + logger.info(f"开始处理 {len(items)} 张图片的标签衍生") + + try: + result = self.call_qwen_batch(items) + except Exception as e: + logger.error(f"调用千问API失败: {e}") + return [ + TagDeriveResponse( + success=False, + original_tag=item['tag_name'], + error=str(e) + ) for item in items + ] + + if not result.get('success'): + error_msg = result.get('error', '未知错误') + return [ + TagDeriveResponse( + success=False, + original_tag=item['tag_name'], + error=error_msg + ) for item in items + ] + + # 处理结果 + api_results = result.get('results', []) + responses = [] + + for i, item in enumerate(items): + derived_tags = [] + + # 查找对应结果 + for r in api_results: + if r.get('image_index') == i + 1 or r.get('original_tag') == item['tag_name']: + derived_tags = r.get('derived_tags', []) + break + + if not derived_tags and i < len(api_results): + derived_tags = api_results[i].get('derived_tags', []) + + if derived_tags: + merged = self._merge_tags(item['tag_name'], derived_tags) + responses.append(TagDeriveResponse( + success=True, + original_tag=item['tag_name'], + derived_tags=derived_tags, + merged_tag=merged + )) + logger.info(f"[{item['tag_name']}] 衍生成功: {len(derived_tags)} 个标签") + else: + responses.append(TagDeriveResponse( + success=False, + original_tag=item['tag_name'], + error="未获取到衍生标签" + )) + + return responses + + def _merge_tags(self, original: str, derived: List[str]) -> str: + """合并标签""" + # 解析原始标签 + if original.startswith('#') and original.endswith('#'): + original_tags = [t for t in original.split('#') if t.strip()] + else: + original_tags = [original] + + # 合并去重 + all_tags = [] + seen = set() + for t in original_tags + derived: + if t and t not in seen: + all_tags.append(t) + seen.add(t) + + return ''.join([f'#{t}#' for t in all_tags]) + + def get_pending_images(self, limit: int = 100) -> List[Dict]: + """获取待处理的图片""" + sql = """ + SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name + FROM ai_image_tags it + LEFT JOIN ai_tags t ON it.tag_id = t.id + WHERE it.image_thumb_url != '' AND it.tag_name != '' + AND (t.tag_category IS NULL OR t.tag_category != '衍生标签') + ORDER BY it.id + LIMIT %s + """ + items = self.db.execute_query(sql, (limit,)) + + # 拼接完整图片URL + for item in items: + if item.get('image_thumb_url'): + item['image_url'] = self.config.image_cdn_base + item['image_thumb_url'] + else: + item['image_url'] = '' + + return items + + def get_stats(self) -> Dict: + """获取统计信息""" + # 总图片数 + total = self.db.execute_one( + "SELECT COUNT(*) as cnt FROM ai_image_tags WHERE image_thumb_url != ''" + )['cnt'] + + # 已处理数 + processed = self.db.execute_one(""" + SELECT COUNT(*) as cnt FROM ai_image_tags it + JOIN ai_tags t ON it.tag_id = t.id + WHERE t.tag_category = '衍生标签' + """)['cnt'] + + # 衍生标签数 + derived_count = self.db.execute_one( + "SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'" + )['cnt'] + + return { + "total_images": total, + "processed_images": processed, + "pending_images": total - processed, + "derived_tags_count": derived_count + } + + +# ============== 任务管理 ============== +# 简单的内存任务存储(生产环境建议用Redis) +tasks_store: Dict[str, Dict] = {} + + +def create_task(task_id: str, total: int): + """创建任务""" + tasks_store[task_id] = { + "status": "pending", + "progress": 0, + "total": total, + "processed": 0, + "success_count": 0, + "failed_count": 0, + "started_at": None, + "completed_at": None, + "results": [] + } + + +def update_task(task_id: str, **kwargs): + """更新任务状态""" + if task_id in tasks_store: + tasks_store[task_id].update(kwargs) + if tasks_store[task_id]["total"] > 0: + tasks_store[task_id]["progress"] = int( + tasks_store[task_id]["processed"] / tasks_store[task_id]["total"] * 100 + ) + + +# ============== 后台任务 ============== +def process_batch_task(task_id: str, batch_size: int = 3): + """后台批量处理任务""" + service = TagDeriveService() + + update_task(task_id, status="running", started_at=datetime.now().isoformat()) + + try: + items = service.get_pending_images(limit=tasks_store[task_id]["total"]) + + for i in range(0, len(items), batch_size): + batch = items[i:i+batch_size] + batch_data = [ + {"image_url": item["image_url"], "tag_name": item["tag_name"]} + for item in batch + ] + + results = service.derive_tags(batch_data) + + success = sum(1 for r in results if r.success) + failed = len(results) - success + + update_task( + task_id, + processed=i + len(batch), + success_count=tasks_store[task_id]["success_count"] + success, + failed_count=tasks_store[task_id]["failed_count"] + failed + ) + + # 避免API限流 + time.sleep(0.5) + + update_task(task_id, status="completed", completed_at=datetime.now().isoformat()) + + except Exception as e: + logger.error(f"批量任务失败: {e}") + update_task(task_id, status="failed", completed_at=datetime.now().isoformat()) + + +# ============== FastAPI 应用 ============== +@asynccontextmanager +async def lifespan(app: FastAPI): + """应用生命周期管理""" + logger.info("标签衍生API服务启动") + yield + logger.info("标签衍生API服务关闭") + + +app = FastAPI( + title="标签衍生API", + description="基于千问大模型的图片标签衍生服务", + version="1.0.0", + lifespan=lifespan +) + +# CORS配置 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 服务实例 +service = TagDeriveService() + + +# ============== API 路由 ============== +@app.get("/", tags=["健康检查"]) +async def root(): + """API根路径""" + return {"message": "标签衍生API服务运行中", "version": "1.0.0"} + + +@app.get("/health", tags=["健康检查"]) +async def health_check(): + """健康检查""" + return {"status": "healthy", "timestamp": datetime.now().isoformat()} + + +@app.post("/api/derive/single", response_model=TagDeriveResponse, tags=["标签衍生"]) +async def derive_single(request: TagDeriveRequest): + """单张图片标签衍生""" + items = [{"image_url": request.image_url, "tag_name": request.tag_name}] + results = service.derive_tags(items) + return results[0] + + +@app.post("/api/derive/batch", response_model=BatchDeriveResponse, tags=["标签衍生"]) +async def derive_batch(request: BatchDeriveRequest): + """批量标签衍生(最多5张)""" + items = [{"image_url": item.image_url, "tag_name": item.tag_name} for item in request.items] + results = service.derive_tags(items) + + success_count = sum(1 for r in results if r.success) + + return BatchDeriveResponse( + success=success_count > 0, + total=len(results), + success_count=success_count, + failed_count=len(results) - success_count, + results=results + ) + + +@app.post("/api/derive/async", tags=["异步任务"]) +async def derive_async( + background_tasks: BackgroundTasks, + limit: int = Query(default=100, ge=1, le=1000, description="处理数量") +): + """异步批量处理任务""" + task_id = f"task_{int(time.time() * 1000)}" + create_task(task_id, limit) + + background_tasks.add_task(process_batch_task, task_id, settings.tag_derive.batch_size) + + return {"task_id": task_id, "message": "任务已创建", "total": limit} + + +@app.get("/api/task/{task_id}", response_model=TaskStatusResponse, tags=["异步任务"]) +async def get_task_status(task_id: str): + """获取任务状态""" + if task_id not in tasks_store: + raise HTTPException(status_code=404, detail="任务不存在") + + task = tasks_store[task_id] + return TaskStatusResponse( + task_id=task_id, + status=task["status"], + progress=task["progress"], + total=task["total"], + processed=task["processed"], + success_count=task["success_count"], + failed_count=task["failed_count"], + started_at=task["started_at"], + completed_at=task["completed_at"] + ) + + +@app.get("/api/stats", response_model=StatsResponse, tags=["统计"]) +async def get_stats(): + """获取统计信息""" + stats = service.get_stats() + return StatsResponse(**stats) + + +@app.get("/api/pending", tags=["数据查询"]) +async def get_pending_images( + limit: int = Query(default=10, ge=1, le=100, description="返回数量") +): + """获取待处理的图片列表""" + items = service.get_pending_images(limit) + return {"total": len(items), "items": items} + + +# ============== 启动入口 ============== +if __name__ == "__main__": + import uvicorn + + logger.info(f"启动API服务: http://{settings.api_host}:{settings.api_port}") + uvicorn.run( + "tag_derive_api:app", + host=settings.api_host, + port=settings.api_port, + reload=settings.debug + ) diff --git a/增加字段.txt b/增加字段.txt new file mode 100644 index 0000000..20d3c98 --- /dev/null +++ b/增加字段.txt @@ -0,0 +1 @@ +ALTER TABLE ai_article.ai_image_tags ADD COLUMN derived_tag VARCHAR(1000) NOT NULL DEFAULT '' COMMENT '衍生标签,格式:#标签1##标签2#'; \ No newline at end of file