Initial commit: AI tagging images project

This commit is contained in:
2026-01-30 18:30:05 +08:00
commit 2882852cd2
20 changed files with 3310 additions and 0 deletions

23
.gitignore vendored Normal file
View File

@@ -0,0 +1,23 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
# Virtual environment
venv/
env/
.venv/
# IDE
.idea/
.vscode/
*.swp
*.swo
# Logs
*.log
# OS
.DS_Store
Thumbs.db

161
README.md Normal file
View File

@@ -0,0 +1,161 @@
# AI 图片标签衍生系统
基于千问视觉大模型Qwen-VL的医疗健康图片标签自动衍生系统。
## 功能概述
- **离线批量处理**:遍历数据库图片,批量调用大模型进行标签衍生
- **RESTful API 服务**:提供标签衍生的 HTTP 接口
- **智能重试机制**API 调用失败自动重试,支持指数退避
- **统一配置管理**:支持环境变量配置,灵活部署
- **完整日志系统**:文件 + 控制台双输出,便于问题排查
## 项目结构
```
ai_tagging_images/
├── config/
│ ├── __init__.py
│ └── settings.py # 配置管理中心
├── logs/ # 日志目录(自动创建)
├── promt/
│ └── qwen_tag_derive_prompt.py
├── database_config.py # 数据库连接和 DAO
├── image_tag_derive.py # 离线批量处理脚本
├── logger.py # 日志模块
├── retry_handler.py # 重试机制
├── tag_derive_api.py # FastAPI 服务
├── query_tags.py # 标签查询工具
├── check_results.py # 结果检查工具
├── requirements.txt # 依赖清单
└── ai_article.sql # 数据库结构
```
## 快速开始
### 1. 安装依赖
```bash
pip install -r requirements.txt
```
### 2. 配置环境变量(可选)
```bash
# Windows
set DASHSCOPE_API_KEY=your-api-key
set DB_HOST=localhost
set DB_PASSWORD=your-password
# Linux/Mac
export DASHSCOPE_API_KEY=your-api-key
export DB_HOST=localhost
export DB_PASSWORD=your-password
```
### 3. 运行离线脚本
```bash
# 处理全部待处理数据
python image_tag_derive.py
# 从指定ID开始处理断点续传
python image_tag_derive.py --start-id 100
# 指定ID范围处理
python image_tag_derive.py --start-id 100 --end-id 200
# 指定起始ID和批次大小
python image_tag_derive.py --start-id 100 --batch-size 3
# 按指定ID处理单个或多个
python image_tag_derive.py --id 16495
python image_tag_derive.py --id 16495 16496 16497
```
> 注意:所有模式都会检查衍生标签,已有衍生标签的记录会被跳过。
### 4. 启动 API 服务
```bash
python tag_derive_api.py
```
服务启动后访问:
- API 文档http://127.0.0.1:8000/docs
- 健康检查http://127.0.0.1:8000/health
## API 接口
| 方法 | 端点 | 说明 |
|------|------|------|
| GET | `/` | 服务状态 |
| GET | `/health` | 健康检查 |
| POST | `/api/derive/single` | 单张图片标签衍生 |
| POST | `/api/derive/batch` | 批量标签衍生最多5张 |
| POST | `/api/derive/async` | 异步批量任务 |
| GET | `/api/task/{task_id}` | 查询任务状态 |
| GET | `/api/stats` | 统计信息 |
| GET | `/api/pending` | 待处理列表 |
### 示例请求
**单张图片衍生:**
```bash
curl -X POST http://127.0.0.1:8000/api/derive/single \
-H "Content-Type: application/json" \
-d '{
"image_url": "https://example.com/image.jpg",
"tag_name": "高血压"
}'
```
**响应:**
```json
{
"success": true,
"original_tag": "高血压",
"derived_tags": ["血压升高", "心血管疾病", "降压药", "血压监测"],
"merged_tag": "#高血压##血压升高##心血管疾病##降压药##血压监测#"
}
```
## 配置说明
| 环境变量 | 默认值 | 说明 |
|----------|--------|------|
| `DASHSCOPE_API_KEY` | - | 千问 API Key |
| `DB_HOST` | localhost | 数据库主机 |
| `DB_PORT` | 3306 | 数据库端口 |
| `DB_USER` | root | 数据库用户 |
| `DB_PASSWORD` | - | 数据库密码 |
| `DB_DATABASE` | ai_article | 数据库名 |
| `BATCH_SIZE` | 3 | 每批处理图片数 |
| `QWEN_MAX_RETRIES` | 3 | 最大重试次数 |
| `LOG_LEVEL` | INFO | 日志级别 |
| `API_PORT` | 8000 | API 服务端口 |
## 技术栈
- **大模型**:阿里云千问 Qwen-VL-Max
- **Web 框架**FastAPI
- **数据库**MySQL 9.0
- **Python**3.10+
## 数据表
主要涉及以下数据表:
- `ai_image_tags`:图片标签关联表
- `ai_tags`:标签主表
## 日志
日志文件保存在 `logs/` 目录,按日期命名:
```
logs/
└── tag_derive_20260130.log
```
## License
MIT

930
ai_article.sql Normal file
View File

@@ -0,0 +1,930 @@
/*
Navicat Premium Dump SQL
Source Server : mixue
Source Server Type : MySQL
Source Server Version : 90001 (9.0.1)
Source Host : localhost:3306
Source Schema : ai_article
Target Server Type : MySQL
Target Server Version : 90001 (9.0.1)
File Encoding : 65001
Date: 28/01/2026 14:04:39
*/
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for ai_article_images
-- ----------------------------
DROP TABLE IF EXISTS `ai_article_images`;
CREATE TABLE `ai_article_images` (
`id` int NOT NULL AUTO_INCREMENT,
`article_id` int NOT NULL DEFAULT 0,
`image_id` int NOT NULL DEFAULT 0,
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`image_tag_id` int NOT NULL DEFAULT 0,
`sort_order` int NULL DEFAULT 0,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`keywords_id` int NOT NULL DEFAULT 0,
`keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`department_id` int NOT NULL DEFAULT 0,
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`image_source` tinyint(1) NOT NULL DEFAULT 0 COMMENT '1=tag|2=change',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_article_image`(`article_id` ASC, `image_id` ASC) USING BTREE,
INDEX `image_id`(`image_id` ASC) USING BTREE,
INDEX `idx_tag_article_lookup`(`image_tag_id` ASC, `article_id` ASC) USING BTREE,
INDEX `idx_article_images_article_tag`(`article_id` ASC, `image_tag_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 699 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_article_tags
-- ----------------------------
DROP TABLE IF EXISTS `ai_article_tags`;
CREATE TABLE `ai_article_tags` (
`id` int NOT NULL AUTO_INCREMENT,
`article_id` int NOT NULL,
`coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_article_tag`(`article_id` ASC) USING BTREE,
CONSTRAINT `ai_article_tags_ibfk_1` FOREIGN KEY (`article_id`) REFERENCES `ai_articles` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 943 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_articles
-- ----------------------------
DROP TABLE IF EXISTS `ai_articles`;
CREATE TABLE `ai_articles` (
`id` int NOT NULL AUTO_INCREMENT,
`batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID',
`topic_type_id` int UNSIGNED NOT NULL DEFAULT 0,
`prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0,
`topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`author_id` int NULL DEFAULT NULL,
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`department_id` int NULL DEFAULT NULL,
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`created_user_id` int NOT NULL DEFAULT 0,
`review_user_id` int NULL DEFAULT NULL,
`publish_user_id` int NULL DEFAULT NULL,
`status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft',
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin',
`review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`publish_time` timestamp NULL DEFAULT NULL,
`baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`word_count` int NULL DEFAULT 0,
`image_count` int NULL DEFAULT 0,
`coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `created_user_id`(`created_user_id` ASC) USING BTREE,
INDEX `review_user_id`(`review_user_id` ASC) USING BTREE,
INDEX `publish_user_id`(`publish_user_id` ASC) USING BTREE,
INDEX `idx_articles_status_user_created`(`status` ASC, `created_user_id` ASC, `created_at` DESC) USING BTREE,
INDEX `idx_articles_status_created`(`status` ASC, `created_at` DESC) USING BTREE,
INDEX `idx_articles_status`(`status` ASC) USING BTREE,
INDEX `idx_articles_created_at`(`created_at` DESC) USING BTREE,
INDEX `idx_status_id_author`(`status` ASC, `id` ASC, `author_id` ASC) USING BTREE,
INDEX `idx_articles_updated_at`(`updated_at` DESC) USING BTREE,
INDEX `idx_articles_status_prompt_topic_id`(`status` ASC, `prompt_workflow_id` ASC, `topic` ASC, `id` ASC) USING BTREE,
INDEX `idx_articles_status_author_created`(`status` ASC, `author_id` ASC, `created_at` DESC) USING BTREE,
INDEX `idx_articles_created_status_author`(`created_at` ASC, `status` ASC, `author_id` ASC) USING BTREE,
INDEX `idx_channel_status_publish_author`(`channel` ASC, `status` ASC, `publish_time` ASC, `author_id` ASC) USING BTREE,
INDEX `idx_author_channel_status_date`(`author_id` ASC, `channel` ASC, `status` ASC, `updated_at` ASC) USING BTREE,
INDEX `idx_audit_stats`(`author_id` ASC, `channel` ASC, `status` ASC, `updated_at` ASC) USING BTREE,
INDEX `idx_status_id`(`status` ASC, `id` ASC) USING BTREE,
INDEX `idx_status_dept_author`(`status` ASC, `department_id` ASC, `author_id` ASC) USING BTREE,
CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 1180 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_authors
-- ----------------------------
DROP TABLE IF EXISTS `ai_authors`;
CREATE TABLE `ai_authors` (
`id` int NOT NULL AUTO_INCREMENT,
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`app_id` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`app_token` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department_id` int NOT NULL DEFAULT 0,
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`title` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`hospital` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`specialty` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`toutiao_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`toutiao_images_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`toutiao_images` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`introduction` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`avatar_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`cumulative_published_count` int NULL DEFAULT 0 COMMENT '累计发文量从起始日到stat_date的总和',
`cumulative_revenue_sum` int NULL DEFAULT 0 COMMENT '累计收入从起始日到stat_date的总和',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active',
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`daily_post_max` int NOT NULL DEFAULT 0 COMMENT '作者每日发文MAX',
`publishing_priority` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '发文优先等级',
`stock_quantity` int NOT NULL DEFAULT 0 COMMENT '作者可发文库存量',
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_ai_authors_status`(`status` ASC) USING BTREE,
INDEX `idx_ai_authors_status_id`(`status` ASC, `id` ASC) USING BTREE,
INDEX `idx_status_created_at`(`status` ASC, `created_at` DESC) USING BTREE,
INDEX `idx_status_updated_at`(`status` ASC, `updated_at` DESC) USING BTREE,
INDEX `idx_status_cumulative_published`(`status` ASC, `cumulative_published_count` DESC) USING BTREE,
INDEX `idx_channel_status_id`(`channel` ASC, `status` ASC, `id` ASC) USING BTREE,
INDEX `idx_channel_status_daily_max`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC) USING BTREE,
INDEX `idx_channel_status_daily_max_id`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC) USING BTREE,
INDEX `idx_query_optimized`(`channel` ASC, `status` ASC, `id` ASC, `daily_post_max` ASC, `author_name` ASC) USING BTREE,
INDEX `idx_channel_status_dailymax_id`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC, `author_name` ASC) USING BTREE,
INDEX `idx_dept_channel_status`(`department_id` ASC, `channel` ASC, `status` ASC) USING BTREE,
INDEX `idx_ai_authors_department_id`(`department_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 256 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_batch_uploads
-- ----------------------------
DROP TABLE IF EXISTS `ai_batch_uploads`;
CREATE TABLE `ai_batch_uploads` (
`id` int NOT NULL AUTO_INCREMENT,
`user_id` int NOT NULL,
`file_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`file_path` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`total_count` int NULL DEFAULT 0,
`success_count` int NULL DEFAULT 0,
`failed_count` int NULL DEFAULT 0,
`status` enum('processing','completed','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'processing',
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `user_id`(`user_id` ASC) USING BTREE,
CONSTRAINT `ai_batch_uploads_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 101 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_departments
-- ----------------------------
DROP TABLE IF EXISTS `ai_departments`;
CREATE TABLE `ai_departments` (
`id` int NOT NULL AUTO_INCREMENT,
`department_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`query_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下query审核存量',
`article_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下审核内容存量',
`max_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下设置发文总量max',
`published_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下发布成功的量',
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_ai_departments_created_at`(`created_at` DESC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 82 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_image_tags
-- ----------------------------
DROP TABLE IF EXISTS `ai_image_tags`;
CREATE TABLE `ai_image_tags` (
`id` int NOT NULL AUTO_INCREMENT,
`image_id` int NOT NULL,
`image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`tag_id` int NOT NULL,
`tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`default_tag_id` int NOT NULL DEFAULT 0 COMMENT '初始标签ID',
`default_tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '初始标签',
`keywords_id` int NOT NULL,
`keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department_id` int NOT NULL,
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_source` tinyint UNSIGNED NOT NULL DEFAULT 1 COMMENT '1=clean_images|2=Flower_character|3=gemini3',
`created_user_id` int NOT NULL DEFAULT 0,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`image_attached_article_count` int NOT NULL DEFAULT 0 COMMENT 'Number of articles the image is attached to',
`status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'draft' COMMENT '图片完整扭转流程状态',
`blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_image_tag`(`image_id` ASC, `tag_id` ASC) USING BTREE,
INDEX `tag_id`(`tag_id` ASC) USING BTREE,
INDEX `idx_id_desc`(`id` DESC) USING BTREE,
INDEX `idx_image_id_id`(`image_id` ASC, `id` DESC) USING BTREE,
INDEX `idx_created_at`(`created_at` DESC) USING BTREE,
INDEX `idx_department_id`(`department_id` ASC) USING BTREE,
INDEX `idx_keywords_id`(`keywords_id` ASC) USING BTREE,
INDEX `idx_dept_keywords`(`department_id` ASC, `keywords_id` ASC) USING BTREE,
INDEX `idx_dept_keywords_count_id`(`department_id` ASC, `keywords_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE,
INDEX `idx_keywords_count_id`(`keywords_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE,
INDEX `idx_dept_count_id`(`department_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE,
INDEX `idx_count_id`(`image_attached_article_count` ASC, `id` DESC) USING BTREE,
INDEX `idx_tag_name`(`tag_name` ASC) USING BTREE,
INDEX `idx_tag_name_id`(`tag_name` ASC, `id` ASC) USING BTREE,
INDEX `idx_tag_notnull_id`(`id` ASC, `tag_name` ASC, `image_id` ASC, `created_at` ASC) USING BTREE,
CONSTRAINT `ai_image_tags_ibfk_2` FOREIGN KEY (`tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 929767 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_images
-- ----------------------------
DROP TABLE IF EXISTS `ai_images`;
CREATE TABLE `ai_images` (
`id` int NOT NULL AUTO_INCREMENT,
`image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`thumbnail_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`image_type` enum('medical','lifestyle','instruction') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'medical',
`file_size` bigint NULL DEFAULT NULL,
`width` int NULL DEFAULT NULL,
`height` int NULL DEFAULT NULL,
`upload_user_id` int NOT NULL,
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `upload_user_id`(`upload_user_id` ASC) USING BTREE,
INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE,
CONSTRAINT `ai_images_ibfk_1` FOREIGN KEY (`upload_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 26832 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_keywords
-- ----------------------------
DROP TABLE IF EXISTS `ai_keywords`;
CREATE TABLE `ai_keywords` (
`id` int NOT NULL AUTO_INCREMENT,
`keywords_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`department_id` int NOT NULL DEFAULT 0,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_ai_keywords_dept_created`(`department_id` ASC, `created_at` DESC) USING BTREE,
INDEX `idx_ai_keywords_created_at`(`created_at` DESC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 295 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_logs
-- ----------------------------
DROP TABLE IF EXISTS `ai_logs`;
CREATE TABLE `ai_logs` (
`id` int NOT NULL AUTO_INCREMENT,
`user_id` int NULL DEFAULT NULL,
`action` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`target_type` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`target_id` int NULL DEFAULT NULL,
`description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`ip_address` varchar(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`request_data` json NULL,
`response_data` json NULL,
`status` enum('success','error','warning') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'success',
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `user_id`(`user_id` ASC) USING BTREE,
INDEX `idx_created_at`(`created_at` DESC) USING BTREE,
CONSTRAINT `ai_logs_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 116027 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_mip_click
-- ----------------------------
DROP TABLE IF EXISTS `ai_mip_click`;
CREATE TABLE `ai_mip_click` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`site_id` bigint NOT NULL COMMENT '关联站点ID外键指向 ai_mip_site.id',
`site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL冗余字段便于查询优化',
`click_time` datetime NOT NULL COMMENT '点击发生时间',
`user_ip` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '用户IP地址',
`user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '浏览器/设备信息',
`referer_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源页面URL',
`device_type` enum('mobile','pc','tablet') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '设备类型',
`click_count` int NULL DEFAULT 1 COMMENT '本次点击事件的计数一般为1可用于批量插入',
`is_valid` tinyint(1) NULL DEFAULT 1 COMMENT '是否有效点击(防刷)',
`task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID可选',
`operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者(如自动系统)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_site_id`(`site_id` ASC) USING BTREE,
INDEX `idx_click_time`(`click_time` ASC) USING BTREE,
INDEX `idx_site_url`(`site_url` ASC) USING BTREE,
INDEX `idx_click_time_site`(`click_time` ASC, `site_id` ASC) USING BTREE,
INDEX `idx_task_id`(`task_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告点击日志表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_mip_interaction
-- ----------------------------
DROP TABLE IF EXISTS `ai_mip_interaction`;
CREATE TABLE `ai_mip_interaction` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`site_id` bigint NOT NULL COMMENT '关联站点ID',
`click_id` bigint NULL DEFAULT NULL COMMENT '关联点击记录ID',
`task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID',
`interaction_type` enum('reply','comment','message','form_submit','follow','like','share') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '互动类型',
`interaction_time` datetime NOT NULL COMMENT '互动发生时间',
`interaction_status` enum('pending','success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'pending' COMMENT '互动状态',
`reply_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '回复/评论的内容',
`reply_template_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的回复模板ID',
`ad_element_xpath` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的XPath定位',
`ad_element_selector` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的CSS选择器',
`ad_text_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '广告的文本内容',
`execution_mode` enum('auto','manual','semi_auto') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行方式',
`rpa_script` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的RPA脚本名称',
`browser_type` enum('headless','headed','playwright','selenium') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器类型',
`anti_detection_method` json NULL COMMENT '万金油技术方案',
`proxy_ip` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的代理IP',
`user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '使用的User-Agent',
`custom_headers` json NULL COMMENT '自定义HTTP头',
`fingerprint_id` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器指纹ID',
`response_received` tinyint(1) NULL DEFAULT 0 COMMENT '是否收到回复',
`response_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '对方回复的内容',
`response_time` datetime NULL DEFAULT NULL COMMENT '收到回复的时间',
`response_delay_seconds` int NULL DEFAULT NULL COMMENT '回复延迟(秒)',
`is_successful` tinyint(1) NULL DEFAULT 0 COMMENT '是否成功互动',
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '失败原因/错误信息',
`retry_count` int NULL DEFAULT 0 COMMENT '重试次数',
`conversion_flag` tinyint(1) NULL DEFAULT 0 COMMENT '是否产生转化',
`site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签',
`campaign_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告活动ID',
`operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间',
`remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息',
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_site_id`(`site_id` ASC) USING BTREE,
INDEX `idx_click_id`(`click_id` ASC) USING BTREE,
INDEX `idx_task_id`(`task_id` ASC) USING BTREE,
INDEX `idx_interaction_time`(`interaction_time` ASC) USING BTREE,
INDEX `idx_interaction_status`(`interaction_status` ASC) USING BTREE,
INDEX `idx_composite`(`site_id` ASC, `interaction_time` ASC, `interaction_status` ASC) USING BTREE,
INDEX `idx_response_received`(`response_received` ASC) USING BTREE,
INDEX `idx_conversion`(`conversion_flag` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告互动回复日志表' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_mip_query_task
-- ----------------------------
DROP TABLE IF EXISTS `ai_mip_query_task`;
CREATE TABLE `ai_mip_query_task` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`query_word` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '查询词/关键词',
`query_type` enum('keyword','phrase','long_tail') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'keyword' COMMENT '查询类型:关键词/短语/长尾词',
`task_date` char(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '任务日期格式YYYYMMDD',
`threshold_max` int NOT NULL DEFAULT 100 COMMENT '最大抓取数量阈值',
`current_count` int NOT NULL DEFAULT 0 COMMENT '当前已抓取数量',
`status` enum('ready','doing','failed','finished','closed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'ready' COMMENT '任务状态:准备中/执行中/失败/完成/已关闭',
`priority` tinyint NOT NULL DEFAULT 5 COMMENT '优先级1-10数字越小优先级越高',
`category` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '分类标签(如:医疗、教育、法律等)',
`source_platform` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'baidu' COMMENT '来源平台baidu/sogou/360等',
`crawl_url_count` int NOT NULL DEFAULT 0 COMMENT '已爬取URL数量',
`valid_url_count` int NOT NULL DEFAULT 0 COMMENT '有效URL数量带广告',
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '错误信息',
`started_at` timestamp NULL DEFAULT NULL COMMENT '开始执行时间',
`finished_at` timestamp NULL DEFAULT NULL COMMENT '完成时间',
`closed_at` timestamp NULL DEFAULT NULL COMMENT '达到阈值关闭时间',
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'system' COMMENT '创建人',
`remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uniq_query_date`(`query_word`(191) ASC, `task_date` ASC) USING BTREE COMMENT '同一查询词每天只有一个任务',
INDEX `idx_date_status`(`task_date` ASC, `status` ASC) USING BTREE COMMENT '按日期和状态查询',
INDEX `idx_status_priority`(`status` ASC, `priority` ASC) USING BTREE COMMENT '按状态和优先级查询',
INDEX `idx_category`(`category` ASC) USING BTREE COMMENT '按分类查询',
INDEX `idx_threshold`(`threshold_max` ASC, `current_count` ASC) USING BTREE COMMENT '阈值监控',
INDEX `idx_closed`(`closed_at` ASC) USING BTREE COMMENT '关闭时间索引'
) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = 'MIP查询任务表 - 用于存储查询词抓取网址任务' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_mip_site
-- ----------------------------
DROP TABLE IF EXISTS `ai_mip_site`;
CREATE TABLE `ai_mip_site` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL唯一',
`site_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网站名称(可选)',
`status` enum('active','inactive','pending') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'active' COMMENT '状态:激活/停用/待审核',
`frequency` int NULL DEFAULT 1 COMMENT '频次(如每小时发几次)',
`time_start` time NULL DEFAULT '00:00:00' COMMENT '开始时间HH:MM:SS',
`time_end` time NULL DEFAULT '23:59:59' COMMENT '结束时间HH:MM:SS',
`interval_minutes` int NULL DEFAULT 60 COMMENT '执行间隔(分钟)',
`ad_feature` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告特征描述JSON格式{\"color\":\"red\", \"position\":\"top\"}',
`click_count` bigint NULL DEFAULT 0 COMMENT '累计点击次数',
`reply_count` bigint NULL DEFAULT 0 COMMENT '累计回复次数',
`site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签(如:教育、医疗等)',
`query_word` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源查询词(从哪个关键词抓取)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '创建人',
`updated_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '更新人',
`remark` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `site_url`(`site_url` ASC) USING BTREE,
UNIQUE INDEX `idx_site_url`(`site_url`(191) ASC) USING BTREE,
INDEX `idx_status`(`status` ASC) USING BTREE,
INDEX `idx_created_at`(`created_at` ASC) USING BTREE,
INDEX `idx_query_word`(`query_word`(191) ASC) USING BTREE COMMENT '按查询词查询'
) ENGINE = InnoDB AUTO_INCREMENT = 3 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告网址管理表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_mip_task_log
-- ----------------------------
DROP TABLE IF EXISTS `ai_mip_task_log`;
CREATE TABLE `ai_mip_task_log` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT 'RPA任务唯一ID',
`site_id` bigint NOT NULL COMMENT '关联站点ID',
`step_1_visit_time` datetime NULL DEFAULT NULL COMMENT '步骤1访问网址时间',
`step_1_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤1状态',
`step_2_antibot_time` datetime NULL DEFAULT NULL COMMENT '步骤2万金油技术方案执行时间',
`step_2_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤2状态',
`step_3_ad_detection_time` datetime NULL DEFAULT NULL COMMENT '步骤3广告检测时间',
`step_3_has_ad` tinyint(1) NULL DEFAULT NULL COMMENT '是否检测到广告',
`step_3_ad_count` int NULL DEFAULT 0 COMMENT '检测到的广告数量',
`step_4_click_time` datetime NULL DEFAULT NULL COMMENT '步骤4点击广告时间',
`step_4_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤4状态',
`step_5_reply_time` datetime NULL DEFAULT NULL COMMENT '步骤5获取回复时间',
`step_5_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤5状态',
`task_start_time` datetime NOT NULL COMMENT '任务开始时间',
`task_end_time` datetime NULL DEFAULT NULL COMMENT '任务结束时间',
`task_duration_seconds` int NULL DEFAULT NULL COMMENT '任务执行时长(秒)',
`task_status` enum('running','completed','failed','timeout') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'running' COMMENT '任务整体状态',
`total_clicks` int NULL DEFAULT 0 COMMENT '本次任务总点击次数',
`total_interactions` int NULL DEFAULT 0 COMMENT '本次任务总互动次数',
`successful_interactions` int NULL DEFAULT 0 COMMENT '成功互动次数',
`failed_interactions` int NULL DEFAULT 0 COMMENT '失败互动次数',
`execution_mode` enum('auto','manual','scheduled') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行模式',
`triggered_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '触发者(定时任务/手动触发/队列)',
`error_log` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '错误日志',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `task_id`(`task_id` ASC) USING BTREE,
UNIQUE INDEX `uk_task_id`(`task_id` ASC) USING BTREE,
INDEX `idx_site_id`(`site_id` ASC) USING BTREE,
INDEX `idx_task_status`(`task_status` ASC) USING BTREE,
INDEX `idx_start_time`(`task_start_time` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'RPA任务执行日志表' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_prompt_workflow
-- ----------------------------
DROP TABLE IF EXISTS `ai_prompt_workflow`;
CREATE TABLE `ai_prompt_workflow` (
`id` int NOT NULL AUTO_INCREMENT,
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`auth_token` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`workflow_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`workflow_type_id` int UNSIGNED NOT NULL DEFAULT 0,
`workflow_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`created_user_id` int NOT NULL DEFAULT 0,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`query_enable` tinyint NOT NULL DEFAULT 0 COMMENT 'query生效AI生文大模型',
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_created_user_time`(`created_user_id` ASC, `created_at` ASC) USING BTREE,
INDEX `idx_created_at`(`created_at` ASC) USING BTREE,
INDEX `idx_workflow_id`(`workflow_id` ASC) USING BTREE,
INDEX `idx_prompt_workflow_name`(`prompt_workflow_name` ASC) USING BTREE,
INDEX `idx_query_enable`(`query_enable` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_query_category
-- ----------------------------
DROP TABLE IF EXISTS `ai_query_category`;
CREATE TABLE `ai_query_category` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID',
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
`created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active' COMMENT '状态',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 5 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_query_strategies
-- ----------------------------
DROP TABLE IF EXISTS `ai_query_strategies`;
CREATE TABLE `ai_query_strategies` (
`id` int NOT NULL AUTO_INCREMENT,
`category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID',
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
`query_type_id` int NOT NULL DEFAULT 0 COMMENT '类型ID',
`define_context` varchar(2048) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '定义上下文',
`for_example` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '案例',
`created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active',
PRIMARY KEY (`id`) USING BTREE,
INDEX `query_type_id`(`query_type_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 136 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_query_type
-- ----------------------------
DROP TABLE IF EXISTS `ai_query_type`;
CREATE TABLE `ai_query_type` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID',
`category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID',
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
`created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID',
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active' COMMENT '状态',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 131 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_statistics
-- ----------------------------
DROP TABLE IF EXISTS `ai_statistics`;
CREATE TABLE `ai_statistics` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT 'Auto-increment ID',
`author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin',
`date` date NOT NULL COMMENT 'Date of statistics',
`submission_count` int NULL DEFAULT 0 COMMENT 'Number of submissions (投稿量)',
`read_count` int NULL DEFAULT 0 COMMENT 'Number of reads (阅读量)',
`comment_count` int NULL DEFAULT 0 COMMENT 'Number of comments (评论量)',
`comment_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Comment rate (评论率)',
`like_count` int NULL DEFAULT 0 COMMENT 'Number of likes (点赞量)',
`like_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Like rate (点赞率)',
`favorite_count` int NULL DEFAULT 0 COMMENT 'Number of favorites (收藏量)',
`favorite_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Favorite rate (收藏率)',
`share_count` int NULL DEFAULT 0 COMMENT 'Number of shares (分享量)',
`share_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Share rate (分享率)',
`slide_ratio` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Slide view ratio (滑图占比)',
`baidu_search_volume` int NULL DEFAULT 0 COMMENT 'Baidu search volume (百度搜索量)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'Creation timestamp',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Update timestamp',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_author_date`(`author_id` ASC, `date` ASC) USING BTREE,
INDEX `idx_date`(`date` ASC) USING BTREE,
INDEX `idx_author_id`(`author_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 51 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI Content Statistics' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_statistics_day
-- ----------------------------
DROP TABLE IF EXISTS `ai_statistics_day`;
CREATE TABLE `ai_statistics_day` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin',
`stat_date` date NOT NULL COMMENT '统计日期(天)',
`total_submission_count` int NULL DEFAULT 0 COMMENT '投稿量(当日总计)',
`total_read_count` int NULL DEFAULT 0 COMMENT '阅读量(当日总计)',
`total_comment_count` int NULL DEFAULT 0 COMMENT '评论量(当日总计)',
`total_like_count` int NULL DEFAULT 0 COMMENT '点赞量(当日总计)',
`total_favorite_count` int NULL DEFAULT 0 COMMENT '收藏量(当日总计)',
`total_share_count` int NULL DEFAULT 0 COMMENT '分享量(当日总计)',
`avg_comment_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '评论率(当日平均)',
`avg_like_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '点赞率(当日平均)',
`avg_favorite_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '收藏率(当日平均)',
`avg_share_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '分享率(当日平均)',
`avg_slide_ratio` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '滑图占比(当日平均)',
`total_baidu_search_volume` int NULL DEFAULT 0 COMMENT '百度搜索量(当日总计)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_date` ASC) USING BTREE,
INDEX `idx_stat_date`(`stat_date` ASC) USING BTREE,
INDEX `idx_author_id`(`author_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 51 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每日汇总统计表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_statistics_days
-- ----------------------------
DROP TABLE IF EXISTS `ai_statistics_days`;
CREATE TABLE `ai_statistics_days` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin',
`stat_date` date NOT NULL COMMENT '统计日期(自然日)',
`daily_published_count` int NULL DEFAULT 0 COMMENT '单日发文量',
`day_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当天收益stat_date所在自然日',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`daily_post_max` int NOT NULL DEFAULT 0 COMMENT '作者每日发文MAX',
`stock_quantity` int NOT NULL DEFAULT 0 COMMENT '作者每日发文库存量',
`defect_quantity` int NOT NULL DEFAULT 0 COMMENT '作者每日发文失败量',
`is_full` tinyint(1) NOT NULL DEFAULT 0 COMMENT '是否发满0-未发满1-已发满',
`department_id` int NOT NULL DEFAULT 0 COMMENT '科室ID',
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室名称',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_date` ASC) USING BTREE,
INDEX `idx_stat_date`(`stat_date` ASC) USING BTREE,
INDEX `idx_author_id`(`author_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 71003 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每日核心指标汇总表含累计、收益及环比' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_statistics_monthly
-- ----------------------------
DROP TABLE IF EXISTS `ai_statistics_monthly`;
CREATE TABLE `ai_statistics_monthly` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin',
`stat_monthly` varchar(48) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '统计日期(自然月)',
`monthly_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当月收益stat_date所在自然月的总收益',
`revenue_mom_growth_rate` decimal(10, 6) NULL DEFAULT 0.000000 COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_monthly` ASC) USING BTREE,
INDEX `idx_stat_date`(`stat_monthly` ASC) USING BTREE,
INDEX `idx_author_id`(`author_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 3069 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每月核心指标汇总表含累计、收益及环比' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_statistics_weekly
-- ----------------------------
DROP TABLE IF EXISTS `ai_statistics_weekly`;
CREATE TABLE `ai_statistics_weekly` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称',
`channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin',
`stat_weekly` varchar(48) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '统计日期(自然周)',
`weekly_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当周收益stat_date所在自然周的总收益周一至周日',
`revenue_wow_growth_rate` decimal(10, 6) NULL DEFAULT 0.000000 COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0)',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_weekly` ASC) USING BTREE,
INDEX `idx_stat_date`(`stat_weekly` ASC) USING BTREE,
INDEX `idx_author_id`(`author_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 10644 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每周核心指标汇总表含累计、收益及环比' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_tag_subsets
-- ----------------------------
DROP TABLE IF EXISTS `ai_tag_subsets`;
CREATE TABLE `ai_tag_subsets` (
`id` int NOT NULL AUTO_INCREMENT,
`parent_tag_id` int NOT NULL,
`subset_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`subset_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `parent_tag_id`(`parent_tag_id` ASC) USING BTREE,
CONSTRAINT `ai_tag_subsets_ibfk_1` FOREIGN KEY (`parent_tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 20478 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_tags
-- ----------------------------
DROP TABLE IF EXISTS `ai_tags`;
CREATE TABLE `ai_tags` (
`id` int NOT NULL AUTO_INCREMENT,
`tag_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`tag_category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL,
`usage_count` int NULL DEFAULT 0,
`status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_tag_name`(`tag_name` ASC) USING BTREE,
INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 13417 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_topic_type
-- ----------------------------
DROP TABLE IF EXISTS `ai_topic_type`;
CREATE TABLE `ai_topic_type` (
`id` int NOT NULL AUTO_INCREMENT,
`topic_type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`type_id` int NOT NULL DEFAULT 0,
`type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0,
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`created_user_id` int NOT NULL DEFAULT 0,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_created_user_time`(`created_user_id` ASC, `created_at` ASC) USING BTREE,
INDEX `idx_created_at`(`created_at` ASC) USING BTREE,
INDEX `idx_type_id`(`type_id` ASC) USING BTREE,
INDEX `idx_topic_type_name`(`topic_type_name` ASC) USING BTREE,
INDEX `idx_prompt_workflow_id`(`prompt_workflow_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_user_authors
-- ----------------------------
DROP TABLE IF EXISTS `ai_user_authors`;
CREATE TABLE `ai_user_authors` (
`id` int NOT NULL AUTO_INCREMENT,
`user_id` int UNSIGNED NOT NULL DEFAULT 0,
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`author_id` int NOT NULL DEFAULT 0,
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT '',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_user_author`(`user_id` ASC, `author_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 208 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_user_departments
-- ----------------------------
DROP TABLE IF EXISTS `ai_user_departments`;
CREATE TABLE `ai_user_departments` (
`id` int UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`user_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '用户ID',
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '用户名',
`department_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '科室ID',
`department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '科室名称',
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_user_department`(`user_id` ASC, `department_id` ASC) USING BTREE,
INDEX `idx_dept_user`(`department_id` ASC, `user_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 77 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = '用户-科室关系表' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_user_topics
-- ----------------------------
DROP TABLE IF EXISTS `ai_user_topics`;
CREATE TABLE `ai_user_topics` (
`id` int NOT NULL AUTO_INCREMENT,
`user_id` int UNSIGNED NOT NULL DEFAULT 0,
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`topic_type_id` int UNSIGNED NOT NULL DEFAULT 0,
`topic_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '',
`prompt_workflow_id` int NOT NULL DEFAULT 0,
`prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT '',
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'inactive',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `idx_topic_type_id`(`topic_type_id` ASC) USING BTREE,
INDEX `idx_prompt_workflow_id`(`prompt_workflow_id` ASC) USING BTREE,
INDEX `idx_created_at`(`created_at` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 81 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_users
-- ----------------------------
DROP TABLE IF EXISTS `ai_users`;
CREATE TABLE `ai_users` (
`id` int NOT NULL AUTO_INCREMENT,
`username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`password` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`real_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`email` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`xhs_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '小红书Cookie',
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`role` enum('admin','editor','reviewer','publisher','each_title_reviewer','reviewer_query') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'editor' COMMENT '用户角色',
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_username`(`username` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 262 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for baidu_keyword
-- ----------------------------
DROP TABLE IF EXISTS `baidu_keyword`;
CREATE TABLE `baidu_keyword` (
`id` int NOT NULL AUTO_INCREMENT,
`keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
`crawled` tinyint NULL DEFAULT 0,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`parents_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '父层级',
`seed_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '种子',
`seed_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '种子名称',
`department` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室',
`department_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '科室ID',
`author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID',
`author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '作者名称',
`type` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '类型',
`type_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '类型D',
`partsof_speech` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '词性',
`partsof_speech_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '词性ID',
`yesorno_question` enum('yes','no','unprocessed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'unprocessed' COMMENT '是否是问题?',
`query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称',
`query_type_id` int NOT NULL DEFAULT 0 COMMENT '类型ID',
`category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID',
`category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称',
`created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID',
`query_summary_status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'ready',
`blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因',
`article_id` int NOT NULL DEFAULT 0 COMMENT '文章ID',
`query_stage` enum('draft','created','summary','reviewed','generated','published') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '分5个阶段创建|总结|审核|生文|发布',
`query_status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT 'query完整扭转流程状态',
`status` enum('draft','available','unavailable','successful','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '状态_分2个阶段|可用|不可用|发布成功|发布失败',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`review_user_id` int NOT NULL DEFAULT 0 COMMENT '审核用户ID',
`similarity` enum('draft','yes','calc','recalc') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT 'yes=是相似|calc=已计算|recalc=需要重新计算',
`similarity_query` int NOT NULL DEFAULT 0 COMMENT 'yes=是相似|把query_id写入',
`similarity_query_keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT 'yes=是相似|把query写入',
`similarity score` float NOT NULL DEFAULT 0 COMMENT '相似时候,计算相似度值',
`reviewed_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '审核日期',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE,
INDEX `idx_crawled_seed`(`crawled` ASC, `seed_id` ASC) USING BTREE,
INDEX `idx_created_at`(`created_at` ASC) USING BTREE,
INDEX `idx_query_status_id`(`query_status` ASC, `id` DESC) USING BTREE,
INDEX `idx_status_stage`(`query_status` ASC, `query_stage` ASC) USING BTREE,
INDEX `idx_review_status_user`(`query_status` ASC, `review_user_id` ASC) USING BTREE,
INDEX `idx_review_query`(`query_status` ASC, `review_user_id` ASC) USING BTREE,
INDEX `idx_status_user_created`(`query_status` ASC, `review_user_id` ASC, `created_at` ASC) USING BTREE,
INDEX `idx_article_id`(`article_id` ASC) USING BTREE,
INDEX `idx_department_id`(`department_id` ASC) USING BTREE,
INDEX `idx_dept_status`(`department_id` ASC, `query_status` ASC) USING BTREE,
INDEX `idx_dept_query_status`(`department_id` ASC, `query_status` ASC) USING BTREE,
INDEX `idx_dept_review_user`(`department_id` ASC, `review_user_id` ASC) USING BTREE,
INDEX `idx_query_status_dept_id`(`query_status` ASC, `department_id` ASC, `id` DESC) USING BTREE,
INDEX `idx_status_dept_created`(`query_status` ASC, `department_id` ASC, `created_at` DESC) USING BTREE,
INDEX `idx_status_dept_id`(`query_status` ASC, `department_id` ASC, `id` ASC) USING BTREE,
INDEX `idx_seed_created`(`seed_id` ASC, `created_at` ASC) USING BTREE,
INDEX `idx_baidu_query_status`(`query_status` ASC, `id` ASC) USING BTREE,
INDEX `idx_baidu_seed_created`(`seed_id` ASC, `created_at` ASC) USING BTREE,
INDEX `idx_status_id`(`query_status` ASC, `id` ASC) USING BTREE,
INDEX `idx_query_status_cover`(`query_status` ASC) USING BTREE,
INDEX `idx_query_status_id_asc`(`query_status` ASC, `id` ASC) USING BTREE,
INDEX `idx_status_order_covering`(`query_status` ASC, `id` ASC, `keyword` ASC) USING BTREE,
FULLTEXT INDEX `idx_keyword_fulltext`(`keyword`)
) ENGINE = InnoDB AUTO_INCREMENT = 901728 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for baidu_query_task
-- ----------------------------
DROP TABLE IF EXISTS `baidu_query_task`;
CREATE TABLE `baidu_query_task` (
`id` int NOT NULL AUTO_INCREMENT,
`seed_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '种子',
`seed_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '种子名称',
`task_date` char(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '任务日期格式YYYYMMDD',
`threshold_max` int NOT NULL DEFAULT 1000 COMMENT '最大阈值',
`current_count` int NOT NULL DEFAULT 0 COMMENT '当前增量',
`status` enum('ready','doing','failed','finished','closed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'ready',
`started_at` timestamp NULL DEFAULT NULL,
`finished_at` timestamp NULL DEFAULT NULL,
`closed_at` timestamp NULL DEFAULT NULL COMMENT '达到阈值关闭时间',
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) USING BTREE,
INDEX `uniq_seed_date`(`seed_id` ASC, `task_date` ASC) USING BTREE,
INDEX `idx_date_status`(`task_date` ASC, `status` ASC) USING BTREE,
INDEX `idx_status_count`(`status` ASC, `current_count` ASC) USING BTREE,
INDEX `idx_threshold`(`threshold_max` ASC) USING BTREE,
INDEX `idx_closed`(`closed_at` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 184 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '百度查询任务表' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for baidu_seed_keywords
-- ----------------------------
DROP TABLE IF EXISTS `baidu_seed_keywords`;
CREATE TABLE `baidu_seed_keywords` (
`id` int NOT NULL AUTO_INCREMENT,
`batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID',
`keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
`crawled` tinyint NULL DEFAULT 0,
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'ready',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`priority_weight` int NOT NULL DEFAULT 0 COMMENT '优先级和权重1~10000|更高的先处理',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE,
INDEX `idx_crawled_priority`(`crawled` ASC, `priority_weight` DESC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 48 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = DYNAMIC;
SET FOREIGN_KEY_CHECKS = 1;

34
ai_image_tags.txt Normal file
View File

@@ -0,0 +1,34 @@
8.149.233.36/ai_article/ai_image_tags/ http://47.99.184.230:8008/andes/index.php?route=/sql&pos=0&db=ai_article&table=ai_image_tags
正在显示第 25 - 49 行 (共 32937 行, 查询花费 0.0009 秒。)
SELECT * FROM `ai_image_tags`
id image_id image_name image_url image_thumb_url tag_id tag_name default_tag_id default_tag_name keywords_id keywords_name department_id department_name image_source created_user_id created_at updated_at image_attached_article_count status blocking_reason
16495 19346 1755312359566253.png 20250816/1755312359566253.png 20250816/1755312359566253_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:19:11 7 draft
16496 19347 1755312362360723.png 20250816/1755312362360723.png 20250816/1755312362360723_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft
16497 19348 1755312364406476.png 20250816/1755312364406476.png 20250816/1755312364406476_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft
16498 19349 1755312367284353.png 20250816/1755312367284353.png 20250816/1755312367284353_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft
16499 19350 1755312370484005.png 20250816/1755312370484005.png 20250816/1755312370484005_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:19:11 7 draft
16500 19351 1755312373245801.png 20250816/1755312373245801.png 20250816/1755312373245801_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:41 17 draft
16501 19352 1755312378278262.png 20250816/1755312378278262.png 20250816/1755312378278262_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:55 35 draft
16502 19353 1755312380298110.png 20250816/1755312380298110.png 20250816/1755312380298110_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:51 37 draft
16503 19354 1755312382399131.png 20250816/1755312382399131.png 20250816/1755312382399131_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:30 93 draft
16504 19355 1755312386945978.png 20250816/1755312386945978.png 20250816/1755312386945978_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:33 20 draft
16505 19356 1755312388894962.png 20250816/1755312388894962.png 20250816/1755312388894962_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:06 30 draft
16506 19357 1755312391383717.png 20250816/1755312391383717.png 20250816/1755312391383717_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:30 49 draft
16507 19358 1755312393565035.png 20250816/1755312393565035.png 20250816/1755312393565035_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:32:57 135 draft
16508 19359 1755312396609453.png 20250816/1755312396609453.png 20250816/1755312396609453_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft
16509 19360 1755312401479871.png 20250816/1755312401479871.png 20250816/1755312401479871_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:41 17 draft
16510 19361 1755312407229190.png 20250816/1755312407229190.png 20250816/1755312407229190_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:29 21 draft
16511 19362 1755312410797310.png 20250816/1755312410797310.png 20250816/1755312410797310_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:08 29 draft
16512 19363 1755312437724619.png 20250816/1755312437724619.png 20250816/1755312437724619_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:59 69 draft
16513 19364 1755312440270419.png 20250816/1755312440270419.png 20250816/1755312440270419_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:29 94 draft
16514 19365 1755312442259884.png 20250816/1755312442259884.png 20250816/1755312442259884_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:18 107 draft
16515 19366 1755312445610363.png 20250816/1755312445610363.png 20250816/1755312445610363_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:32:36 173 draft
16516 19367 1755312448884355.png 20250816/1755312448884355.png 20250816/1755312448884355_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:14 111 draft
16517 19368 1755312451681906.png 20250816/1755312451681906.png 20250816/1755312451681906_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:29 94 draft
16518 19369 1755312453351689.png 20250816/1755312453351689.png 20250816/1755312453351689_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:24 100 draft
16519 19370 1755312456284588.png 20250816/1755312456284588.png 20250816/1755312456284588_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:09 118 draft

0
ai_tags.txt Normal file
View File

19
check_results.py Normal file
View File

@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
import json
with open('d:/标签运营/derive_results.json', 'r', encoding='utf-8') as f:
data = json.load(f)
failed = [d for d in data if not d.get('success')]
success = [d for d in data if d.get('success')]
print(f"总数: {len(data)}")
print(f"成功: {len(success)}")
print(f"失败: {len(failed)}")
if failed:
print("\n失败详情:")
for d in failed:
print(f" ID: {d['image_id']}, 标签: {d['tag_name']}")
print(f" 错误: {d.get('error', '未知')}")
print()

4
config/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
from .settings import settings, Settings
__all__ = ['settings', 'Settings']

142
config/settings.py Normal file
View File

@@ -0,0 +1,142 @@
# -*- coding: utf-8 -*-
"""
配置管理模块
支持环境变量和默认值,统一管理所有配置项
"""
import os
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class DatabaseConfig:
"""数据库配置"""
host: str = "localhost"
port: int = 3306
user: str = "root"
password: str = "liang20020523" # 数据库密码
database: str = "ai_article"
charset: str = "utf8mb4"
pool_size: int = 5
@dataclass
class QwenConfig:
"""千问大模型配置"""
api_key: str = "sk-e6a38204022a4b538b8954f0584712af"
vision_model: str = "qwen-vl-max" # 视觉模型
text_model: str = "qwen-turbo" # 文本模型
max_retries: int = 3 # 最大重试次数
retry_delay: float = 1.0 # 重试间隔(秒)
timeout: int = 60 # 超时时间(秒)
@dataclass
class TagDeriveConfig:
"""标签衍生配置"""
batch_size: int = 3 # 每批处理图片数
min_derived_tags: int = 5 # 最少衍生标签数
max_derived_tags: int = 10 # 最多衍生标签数
max_tag_length: int = 10 # 单个标签最大长度
max_total_tags: Optional[int] = 8 # 合并后总标签数量上限None表示不限制
image_cdn_base: str = "http://images11.bxmkb.cn/Images/" # 图片CDN基础URL
@dataclass
class LogConfig:
"""日志配置"""
level: str = "INFO"
format: str = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
file_path: Optional[str] = None # 日志文件路径None表示只输出到控制台
@dataclass
class Settings:
"""全局配置"""
db: DatabaseConfig = field(default_factory=DatabaseConfig)
qwen: QwenConfig = field(default_factory=QwenConfig)
tag_derive: TagDeriveConfig = field(default_factory=TagDeriveConfig)
log: LogConfig = field(default_factory=LogConfig)
# API服务配置
api_host: str = "0.0.0.0"
api_port: int = 8000
debug: bool = False
@classmethod
def from_env(cls) -> 'Settings':
"""从环境变量加载配置,环境变量优先,否则使用默认值"""
settings = cls()
# 数据库配置
settings.db.host = os.getenv("DB_HOST", settings.db.host)
settings.db.port = int(os.getenv("DB_PORT", settings.db.port))
settings.db.user = os.getenv("DB_USER", settings.db.user)
settings.db.password = os.getenv("DB_PASSWORD", settings.db.password)
settings.db.database = os.getenv("DB_DATABASE", settings.db.database)
settings.db.pool_size = int(os.getenv("DB_POOL_SIZE", settings.db.pool_size))
# 千问配置
settings.qwen.api_key = os.getenv("DASHSCOPE_API_KEY", settings.qwen.api_key)
settings.qwen.vision_model = os.getenv("QWEN_VISION_MODEL", settings.qwen.vision_model)
settings.qwen.text_model = os.getenv("QWEN_TEXT_MODEL", settings.qwen.text_model)
settings.qwen.max_retries = int(os.getenv("QWEN_MAX_RETRIES", settings.qwen.max_retries))
settings.qwen.retry_delay = float(os.getenv("QWEN_RETRY_DELAY", settings.qwen.retry_delay))
# 标签衍生配置
settings.tag_derive.batch_size = int(os.getenv("BATCH_SIZE", settings.tag_derive.batch_size))
settings.tag_derive.min_derived_tags = int(os.getenv("MIN_DERIVED_TAGS", settings.tag_derive.min_derived_tags))
settings.tag_derive.max_derived_tags = int(os.getenv("MAX_DERIVED_TAGS", settings.tag_derive.max_derived_tags))
settings.tag_derive.image_cdn_base = os.getenv("IMAGE_CDN_BASE", settings.tag_derive.image_cdn_base)
max_total = os.getenv("MAX_TOTAL_TAGS")
if max_total:
settings.tag_derive.max_total_tags = int(max_total)
# 日志配置
settings.log.level = os.getenv("LOG_LEVEL", settings.log.level)
settings.log.file_path = os.getenv("LOG_FILE_PATH", settings.log.file_path)
# API配置
settings.api_host = os.getenv("API_HOST", settings.api_host)
settings.api_port = int(os.getenv("API_PORT", settings.api_port))
settings.debug = os.getenv("DEBUG", "false").lower() == "true"
return settings
# 全局配置实例
settings = Settings.from_env()
# ============== 便捷访问 ==============
def get_db_config() -> dict:
"""获取数据库配置字典"""
return {
"host": settings.db.host,
"port": settings.db.port,
"user": settings.db.user,
"password": settings.db.password,
"database": settings.db.database,
"charset": settings.db.charset,
"collation": "utf8mb4_general_ci",
"autocommit": True,
"pool_name": "ai_article_pool",
"pool_size": settings.db.pool_size
}
def get_qwen_api_key() -> str:
"""获取千问API Key"""
return settings.qwen.api_key
if __name__ == "__main__":
print("=" * 50)
print("配置信息")
print("=" * 50)
print(f"数据库: {settings.db.host}:{settings.db.port}/{settings.db.database}")
print(f"千问模型: {settings.qwen.vision_model}")
print(f"批量大小: {settings.tag_derive.batch_size}")
print(f"日志级别: {settings.log.level}")
print(f"API端口: {settings.api_port}")

239
database_config.py Normal file
View File

@@ -0,0 +1,239 @@
# -*- coding: utf-8 -*-
"""
数据库配置管理模块
统一管理数据库连接和SQL操作
配置统一从 config/settings.py 读取
"""
import mysql.connector
from mysql.connector import pooling
from contextlib import contextmanager
from typing import List, Dict, Any, Optional
# 导入统一配置
from config.settings import settings
class DatabaseManager:
"""数据库管理器 - 单例模式"""
_instance = None
_pool = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if self._pool is None:
self._init_pool()
def _init_pool(self):
"""初始化连接池,使用统一配置"""
try:
self._pool = pooling.MySQLConnectionPool(
pool_name="ai_article_pool",
pool_size=settings.db.pool_size,
host=settings.db.host,
port=settings.db.port,
user=settings.db.user,
password=settings.db.password,
database=settings.db.database,
charset=settings.db.charset,
collation="utf8mb4_general_ci",
autocommit=True
)
print(f"[DatabaseManager] 连接池初始化成功,池大小: {settings.db.pool_size}")
except Exception as e:
print(f"[DatabaseManager] 连接池初始化失败: {e}")
raise
def get_connection(self):
"""从连接池获取连接"""
return self._pool.get_connection()
@contextmanager
def get_cursor(self, dictionary: bool = True):
"""获取游标的上下文管理器"""
conn = self.get_connection()
cursor = conn.cursor(dictionary=dictionary)
try:
yield cursor
conn.commit()
except Exception as e:
conn.rollback()
raise e
finally:
cursor.close()
conn.close()
def execute_query(self, sql: str, params: tuple = None) -> List[Dict[str, Any]]:
"""执行查询SQL返回结果列表"""
with self.get_cursor() as cursor:
cursor.execute(sql, params or ())
return cursor.fetchall()
def execute_one(self, sql: str, params: tuple = None) -> Optional[Dict[str, Any]]:
"""执行查询SQL返回单条结果"""
with self.get_cursor() as cursor:
cursor.execute(sql, params or ())
return cursor.fetchone()
def execute_update(self, sql: str, params: tuple = None) -> int:
"""执行更新SQL返回影响行数"""
with self.get_cursor() as cursor:
cursor.execute(sql, params or ())
return cursor.rowcount
def execute_insert(self, sql: str, params: tuple = None) -> int:
"""执行插入SQL返回插入ID"""
with self.get_cursor() as cursor:
cursor.execute(sql, params or ())
return cursor.lastrowid
def execute_many(self, sql: str, params_list: List[tuple]) -> int:
"""批量执行SQL返回影响行数"""
with self.get_cursor() as cursor:
cursor.executemany(sql, params_list)
return cursor.rowcount
class ImageTagsDAO:
"""ai_image_tags 表数据访问对象"""
def __init__(self):
self.db = DatabaseManager()
self.table = "ai_image_tags"
def get_by_id(self, id: int) -> Optional[Dict[str, Any]]:
"""根据ID获取记录"""
sql = f"SELECT * FROM {self.table} WHERE id = %s"
return self.db.execute_one(sql, (id,))
def get_list(self, limit: int = 10, offset: int = 0,
status: str = None, department_id: int = None) -> List[Dict[str, Any]]:
"""获取列表"""
sql = f"SELECT * FROM {self.table} WHERE 1=1"
params = []
if status:
sql += " AND status = %s"
params.append(status)
if department_id:
sql += " AND department_id = %s"
params.append(department_id)
sql += " ORDER BY id DESC LIMIT %s OFFSET %s"
params.extend([limit, offset])
return self.db.execute_query(sql, tuple(params))
def get_for_tag_derive(self, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]:
"""获取用于标签衍生的数据包含图片URL和标签名"""
sql = f"""
SELECT id, image_id, image_name, image_url, image_thumb_url,
tag_id, tag_name, keywords_id, keywords_name,
department_id, department_name, status
FROM {self.table}
WHERE image_url != '' AND tag_name != ''
ORDER BY id DESC
LIMIT %s OFFSET %s
"""
return self.db.execute_query(sql, (limit, offset))
def get_by_tag_name(self, tag_name: str, limit: int = 100) -> List[Dict[str, Any]]:
"""根据标签名获取记录"""
sql = f"SELECT * FROM {self.table} WHERE tag_name = %s LIMIT %s"
return self.db.execute_query(sql, (tag_name, limit))
def get_by_department(self, department_id: int, limit: int = 100) -> List[Dict[str, Any]]:
"""根据科室获取记录"""
sql = f"SELECT * FROM {self.table} WHERE department_id = %s ORDER BY id DESC LIMIT %s"
return self.db.execute_query(sql, (department_id, limit))
def count_by_status(self) -> List[Dict[str, Any]]:
"""按状态统计数量"""
sql = f"SELECT status, COUNT(*) as count FROM {self.table} GROUP BY status"
return self.db.execute_query(sql)
def update_status(self, id: int, status: str) -> int:
"""更新状态"""
sql = f"UPDATE {self.table} SET status = %s WHERE id = %s"
return self.db.execute_update(sql, (status, id))
def batch_update_status(self, ids: List[int], status: str) -> int:
"""批量更新状态"""
if not ids:
return 0
placeholders = ",".join(["%s"] * len(ids))
sql = f"UPDATE {self.table} SET status = %s WHERE id IN ({placeholders})"
params = [status] + ids
return self.db.execute_update(sql, tuple(params))
def insert(self, data: Dict[str, Any]) -> int:
"""插入记录"""
columns = ", ".join(data.keys())
placeholders = ", ".join(["%s"] * len(data))
sql = f"INSERT INTO {self.table} ({columns}) VALUES ({placeholders})"
return self.db.execute_insert(sql, tuple(data.values()))
def batch_insert(self, data_list: List[Dict[str, Any]]) -> int:
"""批量插入记录"""
if not data_list:
return 0
columns = ", ".join(data_list[0].keys())
placeholders = ", ".join(["%s"] * len(data_list[0]))
sql = f"INSERT INTO {self.table} ({columns}) VALUES ({placeholders})"
params_list = [tuple(d.values()) for d in data_list]
return self.db.execute_many(sql, params_list)
# ============== 便捷函数 ==============
def get_db() -> DatabaseManager:
"""获取数据库管理器实例"""
return DatabaseManager()
def get_image_tags_dao() -> ImageTagsDAO:
"""获取 ImageTags DAO 实例"""
return ImageTagsDAO()
# ============== 测试代码 ==============
if __name__ == "__main__":
print("=" * 50)
print("数据库配置管理模块测试")
print("=" * 50)
# 测试数据库连接
print("\n[1] 测试数据库连接...")
try:
db = get_db()
result = db.execute_one("SELECT 1 as test")
print(f"连接成功: {result}")
except Exception as e:
print(f"连接失败: {e}")
# 测试 DAO
print("\n[2] 测试 ImageTagsDAO...")
try:
dao = get_image_tags_dao()
# 获取列表
items = dao.get_list(limit=3)
print(f"获取到 {len(items)} 条记录")
for item in items:
print(f" - ID: {item['id']}, 标签: {item['tag_name']}")
# 按状态统计
stats = dao.count_by_status()
print(f"\n状态统计:")
for stat in stats:
print(f" - {stat['status']}: {stat['count']}")
except Exception as e:
print(f"DAO测试失败: {e}")
print("\n" + "=" * 50)
print("测试完成")

53
derive_results.json Normal file
View File

@@ -0,0 +1,53 @@
[
{
"success": true,
"image_id": 16496,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"宫颈炎",
"盆腔炎",
"感染因素",
"个人卫生",
"抗生素治疗",
"抗炎药物",
"预防措施"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#",
"new_tag_id": 12681
},
{
"success": true,
"image_id": 16497,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"宫颈炎",
"盆腔炎",
"感染因素",
"个人卫生",
"抗生素治疗",
"抗炎药物",
"预防措施"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#",
"new_tag_id": 12681
},
{
"success": true,
"image_id": 16498,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"宫颈炎",
"盆腔炎",
"感染因素",
"个人卫生",
"抗生素治疗",
"抗炎药物",
"预防措施"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#",
"new_tag_id": 12681
}
]

374
image_tag_derive.py Normal file
View File

@@ -0,0 +1,374 @@
# -*- coding: utf-8 -*-
"""
千问大模型 - 图片标签衍生生成脚本
流程每次批量2-3张图片 -> 大模型返回各自衍生标签 -> 分别更新数据库
"""
import os
import json
from http import HTTPStatus
from typing import List, Dict, Optional
from database_config import get_db
from config.settings import settings
from logger import get_logger, log_info, log_error
from retry_handler import retry
# 初始化日志
logger = get_logger("tag_derive")
try:
import dashscope
from dashscope import MultiModalConversation
except ImportError:
logger.error("请先安装 dashscope: pip install dashscope")
exit(1)
# 使用配置中心的API Key
dashscope.api_key = settings.qwen.api_key
# ============== Prompt模板 ==============
BATCH_DERIVE_PROMPT = """你是一个专业的医疗健康内容标签分析专家。
## 任务
我提供了{image_count}张医疗健康相关图片,每张图片有一个原始标签。请分析每张图片,为每张图片生成衍生标签。
## 图片及原始标签
{image_tags_list}
## 要求
1. 分析每张图片内容,结合其原始标签
2. 为每张图片生成 5-8 个衍生标签
3. 衍生标签包括:同义词、上位概念、下位概念、相关症状/治疗等
4. 标签简洁每个不超过10个字
## 输出格式
请严格以JSON格式输出按图片顺序返回
```json
{{
"results": [
{{"image_index": 1, "original_tag": "原始标签1", "derived_tags": ["衍生1", "衍生2", "衍生3"]}},
{{"image_index": 2, "original_tag": "原始标签2", "derived_tags": ["衍生1", "衍生2", "衍生3"]}}
]
}}
```
注意只输出JSON不要输出其他内容。results数组长度必须等于图片数量。
"""
class TagsDAO:
def __init__(self):
self.db = get_db()
self.table = "ai_tags"
def insert(self, tag_name: str, tag_category: str = None, department: str = None) -> int:
sql = f"INSERT INTO {self.table} (tag_name, tag_category, department, status) VALUES (%s, %s, %s, 'active')"
return self.db.execute_insert(sql, (tag_name, tag_category, department))
def get_by_tag_name(self, tag_name: str) -> Optional[Dict]:
sql = f"SELECT * FROM {self.table} WHERE tag_name = %s LIMIT 1"
return self.db.execute_one(sql, (tag_name,))
def get_or_create(self, tag_name: str, tag_category: str = None, department: str = None) -> int:
existing = self.get_by_tag_name(tag_name)
if existing:
return existing['id']
return self.insert(tag_name, tag_category, department)
def parse_tag_string(tag_str: str) -> List[str]:
"""解析标签字符串,支持 #标签1##标签2# 格式和普通字符串"""
if tag_str.startswith('#') and tag_str.endswith('#'):
# 已是 #标签# 格式,拆分出所有标签
tags = [t for t in tag_str.split('#') if t.strip()]
return tags
return [tag_str]
def merge_tags(original_tag: str, derived_tags: List[str], max_total_tags: int = None) -> str:
"""
合并标签,格式: #原始标签##衍生标签1##衍生标签2#
Args:
original_tag: 原始标签字符串
derived_tags: 衍生标签列表
max_total_tags: 总标签数量上限None表示不限制
"""
# 解析原始标签(可能已是 #xx##yy# 格式)
original_tags = parse_tag_string(original_tag)
# 计算可添加的衍生标签数量
if max_total_tags is not None:
available_slots = max(0, max_total_tags - len(original_tags))
derived_tags = derived_tags[:available_slots]
# 合并并去重,保持顺序
all_tags = []
seen = set()
for t in original_tags + derived_tags:
if t and t not in seen:
all_tags.append(t)
seen.add(t)
return ''.join([f'#{t}#' for t in all_tags])
@retry(max_retries=settings.qwen.max_retries, delay=settings.qwen.retry_delay, backoff=2.0)
def derive_tags_batch(items: List[Dict]) -> Dict:
"""
批量调用千问大模型,每张图片独立返回衍生标签
items: [{"id": 1, "image_url": "...", "tag_name": "高血压"}, ...]
带重试机制
"""
logger.info(f"[批量处理] {len(items)} 张图片")
# 构建图片标签列表描述
image_tags_list = ""
for i, item in enumerate(items):
image_tags_list += f"- 图片{i+1}: 原始标签「{item['tag_name']}\n"
logger.debug(f" 图片{i+1}: {item['tag_name']} - {item['image_url'][:50]}...")
prompt = BATCH_DERIVE_PROMPT.format(
image_count=len(items),
image_tags_list=image_tags_list.strip()
)
# 构建多图消息
content = []
for item in items:
content.append({"image": item['image_url']})
content.append({"text": prompt})
messages = [{"role": "user", "content": content}]
response = MultiModalConversation.call(
model=settings.qwen.vision_model,
messages=messages
)
if response.status_code == HTTPStatus.OK:
result_text = response.output.choices[0].message.content[0]["text"]
try:
json_start = result_text.find('{')
json_end = result_text.rfind('}') + 1
if json_start != -1 and json_end > json_start:
json_str = result_text[json_start:json_end]
result_json = json.loads(json_str)
results = result_json.get('results', [])
return {"success": True, "results": results}
except json.JSONDecodeError as e:
logger.error(f" JSON解析失败: {e}")
return {"success": False, "error": "JSON解析失败"}
else:
error_msg = f"{response.code}-{response.message}"
logger.error(f" API调用失败: {error_msg}")
raise Exception(error_msg) # 抛出异常触发重试
def process_batch(items: List[Dict], tags_dao: TagsDAO) -> List[Dict]:
"""
处理一批图片
"""
# 1. 批量调用大模型
try:
result = derive_tags_batch(items)
except Exception as e:
logger.error(f"批量处理失败: {e}")
return [{"success": False, "image_id": item['id'], "error": str(e)} for item in items]
if not result.get('success'):
return [{"success": False, "image_id": item['id'], "error": result.get('error')} for item in items]
api_results = result.get('results', [])
db = get_db()
process_results = []
# 2. 逐个匹配并更新
for i, item in enumerate(items):
# 查找对应的衍生结果
derived_tags = []
for r in api_results:
if r.get('image_index') == i + 1 or r.get('original_tag') == item['tag_name']:
derived_tags = r.get('derived_tags', [])
break
if not derived_tags and i < len(api_results):
derived_tags = api_results[i].get('derived_tags', [])
if not derived_tags:
process_results.append({"success": False, "image_id": item['id'], "error": "未找到衍生标签"})
continue
logger.info(f" [{item['tag_name']}] 衍生: {derived_tags}")
# 合并标签(限制总标签数量)
max_total = getattr(settings.tag_derive, 'max_total_tags', None)
merged_tag_name = merge_tags(item['tag_name'], derived_tags, max_total_tags=max_total)
# 插入ai_tags
try:
new_tag_id = tags_dao.get_or_create(merged_tag_name, '衍生标签', item.get('department_name', ''))
except Exception as e:
process_results.append({"success": False, "image_id": item['id'], "error": str(e)})
continue
# 更新ai_image_tags
try:
sql = "UPDATE ai_image_tags SET tag_id = %s, tag_name = %s WHERE id = %s"
db.execute_update(sql, (new_tag_id, merged_tag_name, item['id']))
process_results.append({
"success": True,
"image_id": item['id'],
"original_tag": item['tag_name'],
"derived_tags": derived_tags,
"merged_tag": merged_tag_name,
"new_tag_id": new_tag_id
})
logger.info(f" ✓ ID:{item['id']} -> tag_id:{new_tag_id}")
except Exception as e:
process_results.append({"success": False, "image_id": item['id'], "error": str(e)})
return process_results
def batch_derive_tags(batch_size: int = None, start_id: int = None, end_id: int = None, ids: List[int] = None) -> List[Dict]:
"""
分批处理每批2-3张图片
Args:
batch_size: 每批处理的图片数量
start_id: 起始ID从该ID开始处理用于断点续传
end_id: 结束ID处理到该ID为止
ids: 指定ID列表只处理这些ID
"""
if batch_size is None:
batch_size = settings.tag_derive.batch_size
tags_dao = TagsDAO()
db = get_db()
# 查询需要处理的记录
if ids:
# 按指定ID查询同样检查是否已有衍生标签
placeholders = ','.join(['%s'] * len(ids))
sql = f"""
SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name
FROM ai_image_tags it
LEFT JOIN ai_tags t ON it.tag_id = t.id
WHERE it.id IN ({placeholders})
AND it.image_thumb_url != '' AND it.tag_name != ''
AND (t.tag_category IS NULL OR t.tag_category != '衍生标签')
ORDER BY it.id
"""
items = db.execute_query(sql, ids)
else:
# 按条件查询
sql = """
SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name
FROM ai_image_tags it
LEFT JOIN ai_tags t ON it.tag_id = t.id
WHERE it.image_thumb_url != '' AND it.tag_name != ''
AND (t.tag_category IS NULL OR t.tag_category != '衍生标签')
"""
params = []
if start_id is not None:
sql += " AND it.id >= %s"
params.append(start_id)
if end_id is not None:
sql += " AND it.id <= %s"
params.append(end_id)
sql += " ORDER BY it.id"
items = db.execute_query(sql, params) if params else db.execute_query(sql)
if not items:
logger.info("没有需要处理的数据")
return []
# 拼接完整图片URL
for item in items:
if item.get('image_thumb_url'):
item['image_url'] = settings.tag_derive.image_cdn_base + item['image_thumb_url']
else:
item['image_url'] = ''
total = len(items)
logger.info(f"待处理: {total} 条,每批 {batch_size}")
all_results = []
# 分批处理
for i in range(0, total, batch_size):
batch = items[i:i+batch_size]
batch_num = i // batch_size + 1
logger.info(f"{'='*60}")
logger.info(f"批次 {batch_num}/{(total + batch_size - 1) // batch_size}")
results = process_batch(batch, tags_dao)
all_results.extend(results)
success = sum(1 for r in results if r.get('success'))
logger.info(f" 批次完成: {success}/{len(batch)}")
success_count = sum(1 for r in all_results if r.get('success'))
logger.info(f"{'='*60}")
logger.info(f"全部完成: 成功 {success_count}/{len(all_results)}")
return all_results
def print_summary(results: List[Dict]):
logger.info("=" * 60)
logger.info("处理结果摘要")
logger.info("=" * 60)
success_count = sum(1 for r in results if r.get('success'))
logger.info(f"总数: {len(results)}, 成功: {success_count}, 失败: {len(results) - success_count}")
logger.info("详细结果:")
for r in results:
if r.get('success'):
logger.info(f" [ID:{r['image_id']}] {r['original_tag']} -> {r['merged_tag'][:40]}...")
else:
logger.warning(f" [ID:{r.get('image_id')}] 失败: {r.get('error')}")
def main():
import argparse
parser = argparse.ArgumentParser(description='千问视觉大模型 - 图片标签衍生生成器')
parser.add_argument('--start-id', type=int, default=None, help='起始ID从该ID开始处理用于断点续传')
parser.add_argument('--end-id', type=int, default=None, help='结束ID处理到该ID为止')
parser.add_argument('--batch-size', type=int, default=None, help='每批处理的图片数量')
parser.add_argument('--id', type=int, nargs='+', default=None, help='指定ID只处理这些ID可指定多个')
args = parser.parse_args()
logger.info("=" * 60)
logger.info("千问视觉大模型 - 图片标签衍生生成器")
logger.info(f"模式: 每批{args.batch_size or settings.tag_derive.batch_size}张图片,各自返回衍生标签")
if args.id:
logger.info(f"指定ID: {args.id}")
elif args.start_id or args.end_id:
id_range = f"{args.start_id or '起始'} ~ {args.end_id or '结束'}"
logger.info(f"ID范围: {id_range}")
logger.info("=" * 60)
results = batch_derive_tags(batch_size=args.batch_size, start_id=args.start_id, end_id=args.end_id, ids=args.id)
if results:
print_summary(results)
output_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "derive_results.json")
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
logger.info(f"结果已保存到: {output_file}")
if __name__ == "__main__":
main()

118
logger.py Normal file
View File

@@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
"""
日志管理模块
统一管理日志配置和输出
"""
import os
import logging
import sys
from datetime import datetime
from typing import Optional
# 日志目录
LOG_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs")
def setup_logger(
name: str = "tag_derive",
level: str = "INFO",
log_file: Optional[str] = None,
console: bool = True
) -> logging.Logger:
"""
设置并返回logger
Args:
name: logger名称
level: 日志级别 (DEBUG, INFO, WARNING, ERROR, CRITICAL)
log_file: 日志文件路径None则不写入文件
console: 是否输出到控制台
"""
logger = logging.getLogger(name)
# 避免重复添加handler
if logger.handlers:
return logger
logger.setLevel(getattr(logging, level.upper(), logging.INFO))
# 日志格式
formatter = logging.Formatter(
fmt="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
# 控制台输出
if console:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
# 文件输出
if log_file:
# 确保日志目录存在
os.makedirs(os.path.dirname(log_file), exist_ok=True)
file_handler = logging.FileHandler(log_file, encoding="utf-8")
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger
def get_default_log_file() -> str:
"""获取默认日志文件路径(按日期)"""
os.makedirs(LOG_DIR, exist_ok=True)
date_str = datetime.now().strftime("%Y%m%d")
return os.path.join(LOG_DIR, f"tag_derive_{date_str}.log")
# 默认logger实例
_default_logger = None
def get_logger(name: str = "tag_derive") -> logging.Logger:
"""获取logger实例"""
global _default_logger
if _default_logger is None:
_default_logger = setup_logger(
name=name,
level="INFO",
log_file=get_default_log_file(),
console=True
)
return _default_logger
class LogMixin:
"""日志混入类,为类提供日志能力"""
@property
def logger(self) -> logging.Logger:
if not hasattr(self, '_logger'):
self._logger = get_logger(self.__class__.__name__)
return self._logger
# ============== 便捷函数 ==============
def log_info(msg: str, *args):
get_logger().info(msg, *args)
def log_error(msg: str, *args):
get_logger().error(msg, *args)
def log_warning(msg: str, *args):
get_logger().warning(msg, *args)
def log_debug(msg: str, *args):
get_logger().debug(msg, *args)
if __name__ == "__main__":
# 测试日志
logger = get_logger()
logger.info("日志系统初始化成功")
logger.debug("这是DEBUG日志")
logger.warning("这是WARNING日志")
logger.error("这是ERROR日志")
print(f"日志文件: {get_default_log_file()}")

View File

@@ -0,0 +1,261 @@
# -*- coding: utf-8 -*-
"""
千问大模型 - 图片标签衍生生成器
从 ai_image_tags 表获取图片和标签,调用千问大模型生成标签衍生
配置统一从 config/settings.py 读取
"""
import os
import sys
import json
from http import HTTPStatus
# 添加项目根目录到路径
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# 导入统一配置
from config.settings import settings
from database_config import get_image_tags_dao, ImageTagsDAO
# 尝试导入dashscope如果没有安装会提示
try:
import dashscope
from dashscope import MultiModalConversation
except ImportError:
print("请先安装 dashscope: pip install dashscope")
exit(1)
# ============== 提示词模板 ==============
TAG_DERIVE_PROMPT_TEMPLATE = """你是一个专业的医疗健康内容标签分析专家。
## 任务
请根据提供的图片和当前标签,生成相关的衍生标签。
## 当前标签
{tag_name}
## 要求
1. 根据图片内容和当前标签,生成 5-10 个相关的衍生标签
2. 衍生标签应该包括:
- 同义词/近义词标签
- 上位概念标签(更宽泛的分类)
- 下位概念标签(更具体的细分)
- 相关联想标签(与主题相关但角度不同)
- 应用场景标签(使用场景或人群)
3. 标签要简洁每个标签不超过10个字
4. 标签要与医疗健康领域相关
## 输出格式
请以JSON格式输出包含以下字段
```json
{{
"original_tag": "原始标签",
"derived_tags": [
{{"tag": "衍生标签1", "type": "同义词", "relevance": ""}},
{{"tag": "衍生标签2", "type": "上位概念", "relevance": ""}},
...
],
"tag_description": "对原始标签的简要描述",
"suggested_keywords": ["关键词1", "关键词2", ...]
}}
```
"""
# 纯文本模式的提示词(不使用图片)
TAG_DERIVE_TEXT_PROMPT_TEMPLATE = """你是一个专业的医疗健康内容标签分析专家。
## 任务
请根据提供的标签,生成相关的衍生标签。
## 当前标签
{tag_name}
## 图片信息
图片名称:{image_name}
## 要求
1. 根据当前标签,生成 5-10 个相关的衍生标签
2. 衍生标签应该包括:
- 同义词/近义词标签
- 上位概念标签(更宽泛的分类)
- 下位概念标签(更具体的细分)
- 相关联想标签(与主题相关但角度不同)
- 应用场景标签(使用场景或人群)
3. 标签要简洁每个标签不超过10个字
4. 标签要与医疗健康领域相关
## 输出格式
请以JSON格式输出包含以下字段
```json
{{
"original_tag": "原始标签",
"derived_tags": [
{{"tag": "衍生标签1", "type": "同义词", "relevance": ""}},
{{"tag": "衍生标签2", "type": "上位概念", "relevance": ""}},
...
],
"tag_description": "对原始标签的简要描述",
"suggested_keywords": ["关键词1", "关键词2", ...]
}}
```
"""
class QwenTagDeriver:
"""千问标签衍生生成器"""
def __init__(self, api_key: str = None):
self.api_key = api_key or settings.qwen.api_key
dashscope.api_key = self.api_key
self.dao = get_image_tags_dao() # 使用统一的数据库配置
def get_image_tags_from_db(self, limit: int = 10, offset: int = 0) -> list:
"""从数据库获取图片标签数据"""
return self.dao.get_for_tag_derive(limit=limit, offset=offset)
def generate_prompt(self, tag_name: str, image_name: str = "", use_image: bool = False) -> str:
"""生成提示词"""
if use_image:
return TAG_DERIVE_PROMPT_TEMPLATE.format(tag_name=tag_name)
else:
return TAG_DERIVE_TEXT_PROMPT_TEMPLATE.format(
tag_name=tag_name,
image_name=image_name
)
def call_qwen_with_image(self, image_url: str, tag_name: str) -> dict:
"""调用千问多模态模型(带图片)"""
prompt = self.generate_prompt(tag_name, use_image=True)
messages = [
{
"role": "user",
"content": [
{"image": image_url},
{"text": prompt}
]
}
]
response = MultiModalConversation.call(
model=settings.qwen.vision_model, # 千问视觉大模型
messages=messages
)
if response.status_code == HTTPStatus.OK:
return {
"success": True,
"result": response.output.choices[0].message.content[0]["text"]
}
else:
return {
"success": False,
"error": f"Error: {response.code} - {response.message}"
}
def call_qwen_text_only(self, tag_name: str, image_name: str = "") -> dict:
"""调用千问文本模型(不带图片)"""
from dashscope import Generation
prompt = self.generate_prompt(tag_name, image_name, use_image=False)
response = Generation.call(
model=settings.qwen.text_model, # 使用配置中的文本模型
prompt=prompt,
result_format="message"
)
if response.status_code == HTTPStatus.OK:
return {
"success": True,
"result": response.output.choices[0].message.content
}
else:
return {
"success": False,
"error": f"Error: {response.code} - {response.message}"
}
def derive_tags_for_image(self, image_data: dict, use_image: bool = False) -> dict:
"""为单个图片生成衍生标签"""
tag_name = image_data.get("tag_name", "")
image_url = image_data.get("image_url", "")
image_name = image_data.get("image_name", "")
print(f"\n处理标签: {tag_name}")
print(f"图片URL: {image_url[:50]}..." if len(image_url) > 50 else f"图片URL: {image_url}")
if use_image and image_url:
result = self.call_qwen_with_image(image_url, tag_name)
else:
result = self.call_qwen_text_only(tag_name, image_name)
return {
"image_id": image_data.get("image_id"),
"tag_id": image_data.get("tag_id"),
"original_tag": tag_name,
"image_url": image_url,
"derive_result": result
}
def batch_derive_tags(self, limit: int = 5, use_image: bool = False) -> list:
"""批量生成衍生标签"""
image_tags = self.get_image_tags_from_db(limit=limit)
results = []
for item in image_tags:
result = self.derive_tags_for_image(item, use_image)
results.append(result)
return results
def main():
"""主函数 - 演示用法"""
print("=" * 60)
print("千问大模型 - 图片标签衍生生成器")
print("=" * 60)
# 初始化
deriver = QwenTagDeriver()
# 1. 从数据库获取数据示例
print("\n[1] 从数据库获取图片标签数据...")
try:
image_tags = deriver.get_image_tags_from_db(limit=3)
if image_tags:
print(f"获取到 {len(image_tags)} 条数据:")
for item in image_tags:
print(f" - ID: {item['id']}, 标签: {item['tag_name']}")
else:
print("数据库中暂无数据")
except Exception as e:
print(f"数据库连接失败: {e}")
image_tags = []
# 2. 生成提示词示例
print("\n[2] 生成提示词示例:")
sample_tag = "高血压"
sample_prompt = deriver.generate_prompt(sample_tag, "blood_pressure.jpg")
print("-" * 40)
print(sample_prompt[:500] + "..." if len(sample_prompt) > 500 else sample_prompt)
print("-" * 40)
# 3. 调用千问API需要有效的API Key
print("\n[3] 调用千问API生成衍生标签...")
if not settings.qwen.api_key or settings.qwen.api_key == "your-api-key-here":
print("请先设置有效的 DASHSCOPE_API_KEY")
print("可以通过环境变量设置: export DASHSCOPE_API_KEY=your-key")
print("或修改 config/settings.py 中的配置")
else:
# 使用文本模式调用
result = deriver.call_qwen_text_only(sample_tag, "示例图片")
if result["success"]:
print("生成结果:")
print(result["result"])
else:
print(f"调用失败: {result['error']}")
if __name__ == "__main__":
main()

34
query_tags.py Normal file
View File

@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
"""查询所有带标签字段的数据"""
from database_config import get_db
db = get_db()
# 查询所有带标签相关字段的数据
sql = """
SELECT id, image_id, image_name,
tag_id, tag_name,
default_tag_id, default_tag_name,
keywords_id, keywords_name,
department_id, department_name,
status
FROM ai_image_tags
ORDER BY id
"""
results = db.execute_query(sql)
print(f"{'=' * 120}")
print(f"ai_image_tags 表中共有 {len(results)} 条数据")
print(f"{'=' * 120}")
# 表头
print(f"{'ID':<6} {'图片ID':<8} {'标签名':<15} {'初始标签名':<15} {'关键词':<12} {'科室':<10} {'状态':<10}")
print(f"{'-' * 120}")
for r in results:
print(f"{r['id']:<6} {r['image_id']:<8} {r['tag_name']:<15} {r['default_tag_name']:<15} {r['keywords_name']:<12} {r['department_name']:<10} {r['status']:<10}")
print(f"{'=' * 120}")
print(f"总计: {len(results)} 条记录")

BIN
requirements.txt Normal file

Binary file not shown.

162
reset_image_tags_data.py Normal file
View File

@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-
"""
重置 ai_image_tags 和 ai_tags 表数据
从 ai_image_tags.txt (Tab分隔格式) 导入数据
"""
import os
from database_config import get_db
from logger import get_logger
logger = get_logger("reset_data")
def parse_tsv_file(file_path: str) -> list:
"""
解析 Tab 分隔的 ai_image_tags.txt 文件
"""
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
records = []
header = None
for line in lines:
line = line.strip()
if not line:
continue
# 跳过头信息行
if line.startswith('8.149') or line.startswith('http://') or '正在显示' in line or line.startswith('SELECT'):
continue
# 解析表头
if line.startswith('id\t'):
header = line.split('\t')
logger.info(f"解析到表头: {len(header)}")
continue
# 解析数据行
if header and line[0].isdigit():
fields = line.split('\t')
if len(fields) >= 19: # 至少要有19列
record = {
'id': int(fields[0]) if fields[0] else 0,
'image_id': int(fields[1]) if fields[1] else 0,
'image_name': fields[2],
'image_url': fields[3],
'image_thumb_url': fields[4],
'tag_id': int(fields[5]) if fields[5] else 0,
'tag_name': fields[6],
'default_tag_id': int(fields[7]) if fields[7] else 0,
'default_tag_name': fields[8],
'keywords_id': int(fields[9]) if fields[9] else 0,
'keywords_name': fields[10],
'department_id': int(fields[11]) if fields[11] else 0,
'department_name': fields[12],
'image_source': int(fields[13]) if fields[13] else 1,
'created_user_id': int(fields[14]) if fields[14] else 0,
'created_at': fields[15] if fields[15] else None,
'updated_at': fields[16] if fields[16] else None,
'image_attached_article_count': int(fields[17]) if fields[17] else 0,
'status': fields[18] if fields[18] else 'draft',
'blocking_reason': fields[19] if len(fields) > 19 else ''
}
records.append(record)
return records
def reset_tables_and_import(file_path: str):
"""
清空表并导入数据
"""
db = get_db()
# 解析文件
logger.info(f"正在解析文件: {file_path}")
records = parse_tsv_file(file_path)
logger.info(f"共解析 {len(records)} 条记录")
if not records:
logger.error("没有解析到有效数据,操作取消")
return
# 收集所有 tag
tag_map = {}
for r in records:
if r['tag_id'] and r['tag_name']:
tag_map[r['tag_id']] = r['tag_name']
# 确认操作
print(f"\n即将执行以下操作:")
print(f"1. 清空 ai_image_tags 表")
print(f"2. 清空 ai_tags 表")
print(f"3. 导入 {len(tag_map)} 条 ai_tags 记录")
print(f"4. 导入 {len(records)} 条 ai_image_tags 记录")
print(f"\n注意: 此操作不可逆!")
confirm = input("\n确认执行? (输入 yes 继续): ")
if confirm.lower() != 'yes':
logger.info("操作已取消")
return
try:
# 清空表(先删子表,再删父表)
logger.info("清空 ai_image_tags 表...")
db.execute_update("DELETE FROM ai_image_tags")
logger.info("清空 ai_tags 表...")
db.execute_update("DELETE FROM ai_tags")
# 重置自增ID
db.execute_update("ALTER TABLE ai_image_tags AUTO_INCREMENT = 1")
db.execute_update("ALTER TABLE ai_tags AUTO_INCREMENT = 1")
# 插入 ai_tags
logger.info(f"插入 ai_tags...")
for tag_id, tag_name in tag_map.items():
sql = "INSERT INTO ai_tags (id, tag_name, status) VALUES (%s, %s, 'active')"
try:
db.execute_insert(sql, (tag_id, tag_name))
except Exception as e:
logger.warning(f"插入 tag {tag_id} 失败: {e}")
# 插入 ai_image_tags
logger.info(f"插入 ai_image_tags...")
success_count = 0
for r in records:
sql = """
INSERT INTO ai_image_tags
(id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name,
default_tag_id, default_tag_name, keywords_id, keywords_name,
department_id, department_name, image_source, created_user_id,
created_at, updated_at, image_attached_article_count, status, blocking_reason)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
params = (
r['id'], r['image_id'], r['image_name'], r['image_url'], r['image_thumb_url'],
r['tag_id'], r['tag_name'], r['default_tag_id'], r['default_tag_name'],
r['keywords_id'], r['keywords_name'], r['department_id'], r['department_name'],
r['image_source'], r['created_user_id'], r['created_at'], r['updated_at'],
r['image_attached_article_count'], r['status'], r['blocking_reason']
)
try:
db.execute_insert(sql, params)
success_count += 1
except Exception as e:
logger.error(f"插入记录 {r['id']} 失败: {e}")
# 恢复自增ID到最大值
db.execute_update("ALTER TABLE ai_image_tags AUTO_INCREMENT = 1")
db.execute_update("ALTER TABLE ai_tags AUTO_INCREMENT = 1")
logger.info(f"导入完成: 成功 {success_count}/{len(records)}")
except Exception as e:
logger.error(f"操作失败: {e}")
raise
if __name__ == "__main__":
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ai_image_tags.txt")
reset_tables_and_import(file_path)

81
reset_tags.py Normal file
View File

@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
"""
重置标签数据脚本
1. 把 ai_image_tags 的 tag_id/tag_name 恢复为 default_tag_id/default_tag_name
2. 删除 ai_tags 表中的衍生标签
"""
from database_config import get_db
def reset_database():
db = get_db()
# 1. 先查看当前状态
print("=" * 60)
print("当前数据状态")
print("=" * 60)
# 查看衍生标签数量
derived_count = db.execute_one(
"SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'"
)
print(f"ai_tags 表中的衍生标签数量: {derived_count['cnt']}")
# 查看需要恢复的图片数量
need_reset = db.execute_one("""
SELECT COUNT(*) as cnt FROM ai_image_tags
WHERE default_tag_id > 0
AND (tag_id != default_tag_id OR tag_name != default_tag_name)
""")
print(f"需要恢复到初始标签的图片数量: {need_reset['cnt']}")
print("\n" + "=" * 60)
print("开始执行清理")
print("=" * 60)
# 2. 把 ai_image_tags 的标签恢复为初始标签
print("\n[步骤1] 恢复 ai_image_tags 到初始标签...")
affected = db.execute_update("""
UPDATE ai_image_tags
SET tag_id = default_tag_id,
tag_name = default_tag_name
WHERE default_tag_id > 0
""")
print(f" 已更新 {affected} 条记录")
# 3. 删除衍生标签
print("\n[步骤2] 删除 ai_tags 表中的衍生标签...")
deleted = db.execute_update(
"DELETE FROM ai_tags WHERE tag_category = '衍生标签'"
)
print(f" 已删除 {deleted} 条衍生标签")
# 4. 验证结果
print("\n" + "=" * 60)
print("清理完成,验证结果")
print("=" * 60)
remaining_derived = db.execute_one(
"SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'"
)
print(f"剩余衍生标签数量: {remaining_derived['cnt']}")
sample = db.execute_query("""
SELECT id, image_id, tag_name, default_tag_name
FROM ai_image_tags
WHERE default_tag_id > 0
LIMIT 5
""")
print("\n前5条图片标签示例:")
for row in sample:
print(f" ID:{row['id']} | tag_name: {row['tag_name']} | default_tag_name: {row['default_tag_name']}")
if __name__ == "__main__":
confirm = input("确认执行数据库清理?这将:\n1. 把所有图片标签恢复为初始标签\n2. 删除所有衍生标签\n输入 'yes' 确认: ")
if confirm.lower() == 'yes':
reset_database()
print("\n✓ 数据库已重置到干净状态")
else:
print("已取消操作")

176
retry_handler.py Normal file
View File

@@ -0,0 +1,176 @@
# -*- coding: utf-8 -*-
"""
重试机制模块
提供带重试功能的装饰器和工具函数
"""
import time
import functools
from typing import Callable, Type, Tuple, Optional, Any
from logger import get_logger
logger = get_logger("retry")
def retry(
max_retries: int = 3,
delay: float = 1.0,
backoff: float = 2.0,
exceptions: Tuple[Type[Exception], ...] = (Exception,),
on_retry: Optional[Callable[[Exception, int], None]] = None
):
"""
重试装饰器
Args:
max_retries: 最大重试次数
delay: 初始延迟时间(秒)
backoff: 延迟退避倍数
exceptions: 需要重试的异常类型
on_retry: 重试时的回调函数 (exception, attempt) -> None
Example:
@retry(max_retries=3, delay=1.0, exceptions=(ConnectionError,))
def call_api():
...
"""
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
def wrapper(*args, **kwargs) -> Any:
current_delay = delay
last_exception = None
for attempt in range(max_retries + 1):
try:
return func(*args, **kwargs)
except exceptions as e:
last_exception = e
if attempt < max_retries:
logger.warning(
f"[{func.__name__}] 第{attempt + 1}次调用失败: {e}, "
f"{current_delay:.1f}秒后重试..."
)
if on_retry:
on_retry(e, attempt + 1)
time.sleep(current_delay)
current_delay *= backoff
else:
logger.error(
f"[{func.__name__}] 已达最大重试次数({max_retries}), 最后错误: {e}"
)
raise last_exception
return wrapper
return decorator
class RetryHandler:
"""重试处理器类"""
def __init__(
self,
max_retries: int = 3,
delay: float = 1.0,
backoff: float = 2.0
):
self.max_retries = max_retries
self.delay = delay
self.backoff = backoff
self.attempt = 0
self.last_error = None
def should_retry(self, error: Exception) -> bool:
"""判断是否应该重试"""
self.last_error = error
self.attempt += 1
if self.attempt <= self.max_retries:
wait_time = self.delay * (self.backoff ** (self.attempt - 1))
logger.warning(f"{self.attempt}次重试, 等待{wait_time:.1f}秒...")
time.sleep(wait_time)
return True
return False
def reset(self):
"""重置重试计数"""
self.attempt = 0
self.last_error = None
def execute(self, func: Callable, *args, **kwargs) -> Any:
"""执行带重试的函数"""
self.reset()
while True:
try:
return func(*args, **kwargs)
except Exception as e:
if not self.should_retry(e):
raise
def retry_call(
func: Callable,
args: tuple = (),
kwargs: dict = None,
max_retries: int = 3,
delay: float = 1.0,
backoff: float = 2.0,
exceptions: Tuple[Type[Exception], ...] = (Exception,)
) -> Any:
"""
带重试的函数调用
Args:
func: 要调用的函数
args: 位置参数
kwargs: 关键字参数
max_retries: 最大重试次数
delay: 初始延迟
backoff: 退避倍数
exceptions: 需要重试的异常
Returns:
函数返回值
"""
kwargs = kwargs or {}
current_delay = delay
last_exception = None
for attempt in range(max_retries + 1):
try:
return func(*args, **kwargs)
except exceptions as e:
last_exception = e
if attempt < max_retries:
logger.warning(
f"调用失败(尝试 {attempt + 1}/{max_retries + 1}): {e}"
)
time.sleep(current_delay)
current_delay *= backoff
raise last_exception
if __name__ == "__main__":
# 测试重试装饰器
call_count = 0
@retry(max_retries=3, delay=0.5, exceptions=(ValueError,))
def test_func():
global call_count
call_count += 1
if call_count < 3:
raise ValueError(f"模拟失败 {call_count}")
return "成功"
try:
result = test_func()
print(f"结果: {result}, 调用次数: {call_count}")
except ValueError as e:
print(f"最终失败: {e}")

498
tag_derive_api.py Normal file
View File

@@ -0,0 +1,498 @@
# -*- coding: utf-8 -*-
"""
标签衍生 API 服务
基于 FastAPI 封装标签衍生功能,提供 RESTful API
"""
import os
import json
import time
from http import HTTPStatus
from typing import List, Dict, Optional, Any
from datetime import datetime
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, BackgroundTasks, Query
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
# 导入项目模块
from config.settings import settings
from database_config import get_db, ImageTagsDAO
from logger import get_logger, log_info, log_error
from retry_handler import retry
# 初始化
logger = get_logger("api")
try:
import dashscope
from dashscope import MultiModalConversation
dashscope.api_key = settings.qwen.api_key
except ImportError:
logger.error("请先安装 dashscope: pip install dashscope")
raise
# ============== Pydantic 模型 ==============
class TagDeriveRequest(BaseModel):
"""单张图片标签衍生请求"""
image_url: str = Field(..., description="图片URL")
tag_name: str = Field(..., description="原始标签名")
department: Optional[str] = Field(None, description="科室")
class BatchDeriveRequest(BaseModel):
"""批量标签衍生请求"""
items: List[TagDeriveRequest] = Field(..., description="图片列表", max_length=5)
class TagDeriveResponse(BaseModel):
"""标签衍生响应"""
success: bool
original_tag: str
derived_tags: List[str] = []
merged_tag: Optional[str] = None
error: Optional[str] = None
class BatchDeriveResponse(BaseModel):
"""批量衍生响应"""
success: bool
total: int
success_count: int
failed_count: int
results: List[TagDeriveResponse]
class TaskStatusResponse(BaseModel):
"""任务状态响应"""
task_id: str
status: str # pending, running, completed, failed
progress: int # 0-100
total: int
processed: int
success_count: int
failed_count: int
started_at: Optional[str] = None
completed_at: Optional[str] = None
class StatsResponse(BaseModel):
"""统计信息响应"""
total_images: int
processed_images: int
pending_images: int
derived_tags_count: int
# ============== 提示词模板 ==============
DERIVE_PROMPT = """你是一个专业的医疗健康内容标签分析专家。
## 任务
我提供了{image_count}张医疗健康相关图片,每张图片有一个原始标签。请分析每张图片,为每张图片生成衍生标签。
## 图片及原始标签
{image_tags_list}
## 要求
1. 分析每张图片内容,结合其原始标签
2. 为每张图片生成 {min_tags}-{max_tags} 个衍生标签
3. 衍生标签包括:同义词、上位概念、下位概念、相关症状/治疗等
4. 标签简洁,每个不超过{max_tag_length}个字
## 输出格式
请严格以JSON格式输出按图片顺序返回
```json
{{
"results": [
{{"image_index": 1, "original_tag": "原始标签1", "derived_tags": ["衍生1", "衍生2", "衍生3"]}},
{{"image_index": 2, "original_tag": "原始标签2", "derived_tags": ["衍生1", "衍生2", "衍生3"]}}
]
}}
```
注意只输出JSON不要输出其他内容。results数组长度必须等于图片数量。
"""
# ============== 核心服务类 ==============
class TagDeriveService:
"""标签衍生服务"""
def __init__(self):
self.db = get_db()
self.dao = ImageTagsDAO()
self.config = settings.tag_derive
@retry(max_retries=3, delay=1.0, backoff=2.0)
def call_qwen_batch(self, items: List[Dict]) -> Dict:
"""
调用千问大模型批量处理
带重试机制
"""
# 构建图片标签列表描述
image_tags_list = ""
for i, item in enumerate(items):
image_tags_list += f"- 图片{i+1}: 原始标签「{item['tag_name']}\n"
prompt = DERIVE_PROMPT.format(
image_count=len(items),
image_tags_list=image_tags_list.strip(),
min_tags=self.config.min_derived_tags,
max_tags=self.config.max_derived_tags,
max_tag_length=self.config.max_tag_length
)
# 构建多图消息
content = []
for item in items:
content.append({"image": item['image_url']})
content.append({"text": prompt})
messages = [{"role": "user", "content": content}]
response = MultiModalConversation.call(
model=settings.qwen.vision_model,
messages=messages
)
if response.status_code == HTTPStatus.OK:
result_text = response.output.choices[0].message.content[0]["text"]
# 解析JSON
json_start = result_text.find('{')
json_end = result_text.rfind('}') + 1
if json_start != -1 and json_end > json_start:
json_str = result_text[json_start:json_end]
result_json = json.loads(json_str)
return {"success": True, "results": result_json.get('results', [])}
return {"success": False, "error": "JSON解析失败", "raw": result_text}
else:
return {"success": False, "error": f"{response.code}-{response.message}"}
def derive_tags(self, items: List[Dict]) -> List[TagDeriveResponse]:
"""处理标签衍生"""
logger.info(f"开始处理 {len(items)} 张图片的标签衍生")
try:
result = self.call_qwen_batch(items)
except Exception as e:
logger.error(f"调用千问API失败: {e}")
return [
TagDeriveResponse(
success=False,
original_tag=item['tag_name'],
error=str(e)
) for item in items
]
if not result.get('success'):
error_msg = result.get('error', '未知错误')
return [
TagDeriveResponse(
success=False,
original_tag=item['tag_name'],
error=error_msg
) for item in items
]
# 处理结果
api_results = result.get('results', [])
responses = []
for i, item in enumerate(items):
derived_tags = []
# 查找对应结果
for r in api_results:
if r.get('image_index') == i + 1 or r.get('original_tag') == item['tag_name']:
derived_tags = r.get('derived_tags', [])
break
if not derived_tags and i < len(api_results):
derived_tags = api_results[i].get('derived_tags', [])
if derived_tags:
merged = self._merge_tags(item['tag_name'], derived_tags)
responses.append(TagDeriveResponse(
success=True,
original_tag=item['tag_name'],
derived_tags=derived_tags,
merged_tag=merged
))
logger.info(f"[{item['tag_name']}] 衍生成功: {len(derived_tags)} 个标签")
else:
responses.append(TagDeriveResponse(
success=False,
original_tag=item['tag_name'],
error="未获取到衍生标签"
))
return responses
def _merge_tags(self, original: str, derived: List[str]) -> str:
"""合并标签"""
# 解析原始标签
if original.startswith('#') and original.endswith('#'):
original_tags = [t for t in original.split('#') if t.strip()]
else:
original_tags = [original]
# 合并去重
all_tags = []
seen = set()
for t in original_tags + derived:
if t and t not in seen:
all_tags.append(t)
seen.add(t)
return ''.join([f'#{t}#' for t in all_tags])
def get_pending_images(self, limit: int = 100) -> List[Dict]:
"""获取待处理的图片"""
sql = """
SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name
FROM ai_image_tags it
LEFT JOIN ai_tags t ON it.tag_id = t.id
WHERE it.image_thumb_url != '' AND it.tag_name != ''
AND (t.tag_category IS NULL OR t.tag_category != '衍生标签')
ORDER BY it.id
LIMIT %s
"""
items = self.db.execute_query(sql, (limit,))
# 拼接完整图片URL
for item in items:
if item.get('image_thumb_url'):
item['image_url'] = self.config.image_cdn_base + item['image_thumb_url']
else:
item['image_url'] = ''
return items
def get_stats(self) -> Dict:
"""获取统计信息"""
# 总图片数
total = self.db.execute_one(
"SELECT COUNT(*) as cnt FROM ai_image_tags WHERE image_thumb_url != ''"
)['cnt']
# 已处理数
processed = self.db.execute_one("""
SELECT COUNT(*) as cnt FROM ai_image_tags it
JOIN ai_tags t ON it.tag_id = t.id
WHERE t.tag_category = '衍生标签'
""")['cnt']
# 衍生标签数
derived_count = self.db.execute_one(
"SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'"
)['cnt']
return {
"total_images": total,
"processed_images": processed,
"pending_images": total - processed,
"derived_tags_count": derived_count
}
# ============== 任务管理 ==============
# 简单的内存任务存储生产环境建议用Redis
tasks_store: Dict[str, Dict] = {}
def create_task(task_id: str, total: int):
"""创建任务"""
tasks_store[task_id] = {
"status": "pending",
"progress": 0,
"total": total,
"processed": 0,
"success_count": 0,
"failed_count": 0,
"started_at": None,
"completed_at": None,
"results": []
}
def update_task(task_id: str, **kwargs):
"""更新任务状态"""
if task_id in tasks_store:
tasks_store[task_id].update(kwargs)
if tasks_store[task_id]["total"] > 0:
tasks_store[task_id]["progress"] = int(
tasks_store[task_id]["processed"] / tasks_store[task_id]["total"] * 100
)
# ============== 后台任务 ==============
def process_batch_task(task_id: str, batch_size: int = 3):
"""后台批量处理任务"""
service = TagDeriveService()
update_task(task_id, status="running", started_at=datetime.now().isoformat())
try:
items = service.get_pending_images(limit=tasks_store[task_id]["total"])
for i in range(0, len(items), batch_size):
batch = items[i:i+batch_size]
batch_data = [
{"image_url": item["image_url"], "tag_name": item["tag_name"]}
for item in batch
]
results = service.derive_tags(batch_data)
success = sum(1 for r in results if r.success)
failed = len(results) - success
update_task(
task_id,
processed=i + len(batch),
success_count=tasks_store[task_id]["success_count"] + success,
failed_count=tasks_store[task_id]["failed_count"] + failed
)
# 避免API限流
time.sleep(0.5)
update_task(task_id, status="completed", completed_at=datetime.now().isoformat())
except Exception as e:
logger.error(f"批量任务失败: {e}")
update_task(task_id, status="failed", completed_at=datetime.now().isoformat())
# ============== FastAPI 应用 ==============
@asynccontextmanager
async def lifespan(app: FastAPI):
"""应用生命周期管理"""
logger.info("标签衍生API服务启动")
yield
logger.info("标签衍生API服务关闭")
app = FastAPI(
title="标签衍生API",
description="基于千问大模型的图片标签衍生服务",
version="1.0.0",
lifespan=lifespan
)
# CORS配置
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 服务实例
service = TagDeriveService()
# ============== API 路由 ==============
@app.get("/", tags=["健康检查"])
async def root():
"""API根路径"""
return {"message": "标签衍生API服务运行中", "version": "1.0.0"}
@app.get("/health", tags=["健康检查"])
async def health_check():
"""健康检查"""
return {"status": "healthy", "timestamp": datetime.now().isoformat()}
@app.post("/api/derive/single", response_model=TagDeriveResponse, tags=["标签衍生"])
async def derive_single(request: TagDeriveRequest):
"""单张图片标签衍生"""
items = [{"image_url": request.image_url, "tag_name": request.tag_name}]
results = service.derive_tags(items)
return results[0]
@app.post("/api/derive/batch", response_model=BatchDeriveResponse, tags=["标签衍生"])
async def derive_batch(request: BatchDeriveRequest):
"""批量标签衍生最多5张"""
items = [{"image_url": item.image_url, "tag_name": item.tag_name} for item in request.items]
results = service.derive_tags(items)
success_count = sum(1 for r in results if r.success)
return BatchDeriveResponse(
success=success_count > 0,
total=len(results),
success_count=success_count,
failed_count=len(results) - success_count,
results=results
)
@app.post("/api/derive/async", tags=["异步任务"])
async def derive_async(
background_tasks: BackgroundTasks,
limit: int = Query(default=100, ge=1, le=1000, description="处理数量")
):
"""异步批量处理任务"""
task_id = f"task_{int(time.time() * 1000)}"
create_task(task_id, limit)
background_tasks.add_task(process_batch_task, task_id, settings.tag_derive.batch_size)
return {"task_id": task_id, "message": "任务已创建", "total": limit}
@app.get("/api/task/{task_id}", response_model=TaskStatusResponse, tags=["异步任务"])
async def get_task_status(task_id: str):
"""获取任务状态"""
if task_id not in tasks_store:
raise HTTPException(status_code=404, detail="任务不存在")
task = tasks_store[task_id]
return TaskStatusResponse(
task_id=task_id,
status=task["status"],
progress=task["progress"],
total=task["total"],
processed=task["processed"],
success_count=task["success_count"],
failed_count=task["failed_count"],
started_at=task["started_at"],
completed_at=task["completed_at"]
)
@app.get("/api/stats", response_model=StatsResponse, tags=["统计"])
async def get_stats():
"""获取统计信息"""
stats = service.get_stats()
return StatsResponse(**stats)
@app.get("/api/pending", tags=["数据查询"])
async def get_pending_images(
limit: int = Query(default=10, ge=1, le=100, description="返回数量")
):
"""获取待处理的图片列表"""
items = service.get_pending_images(limit)
return {"total": len(items), "items": items}
# ============== 启动入口 ==============
if __name__ == "__main__":
import uvicorn
logger.info(f"启动API服务: http://{settings.api_host}:{settings.api_port}")
uvicorn.run(
"tag_derive_api:app",
host=settings.api_host,
port=settings.api_port,
reload=settings.debug
)

1
增加字段.txt Normal file
View File

@@ -0,0 +1 @@
ALTER TABLE ai_article.ai_image_tags ADD COLUMN derived_tag VARCHAR(1000) NOT NULL DEFAULT '' COMMENT '衍生标签,格式:#标签1##标签2#';