Initial commit: Add AI tagging images project files

This commit is contained in:
2026-02-04 11:12:34 +08:00
parent 1697372b46
commit a6f203a3e3
15 changed files with 1868 additions and 406 deletions

36
.gitignore vendored
View File

@@ -3,21 +3,45 @@ __pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual environment
# Virtual Environment
venv/
env/
.venv/
ENV/
# IDE
.idea/
.vscode/
.idea/
*.swp
*.swo
# Logs
*.log
# OS
.DS_Store
Thumbs.db
# Logs
*.log
logs/
# Data files
*.json # Except specific ones we want to track
backup_data.json # Track this specific json file
# Local config
config/local_settings.py

BIN
1.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 72 KiB

View File

@@ -11,7 +11,7 @@
Target Server Version : 90001 (9.0.1)
File Encoding : 65001
Date: 28/01/2026 14:04:39
Date: 02/02/2026 16:34:15
*/
SET NAMES utf8mb4;
@@ -41,7 +41,7 @@ CREATE TABLE `ai_article_images` (
INDEX `image_id`(`image_id` ASC) USING BTREE,
INDEX `idx_tag_article_lookup`(`image_tag_id` ASC, `article_id` ASC) USING BTREE,
INDEX `idx_article_images_article_tag`(`article_id` ASC, `image_tag_id` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 699 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC;
) ENGINE = InnoDB AUTO_INCREMENT = 700 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_article_tags
@@ -111,7 +111,7 @@ CREATE TABLE `ai_articles` (
CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT,
CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 1180 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
) ENGINE = InnoDB AUTO_INCREMENT = 1350 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_authors
@@ -178,6 +178,25 @@ CREATE TABLE `ai_batch_uploads` (
CONSTRAINT `ai_batch_uploads_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 101 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_department_config
-- ----------------------------
DROP TABLE IF EXISTS `ai_department_config`;
CREATE TABLE `ai_department_config` (
`id` int UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`department_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '科室名称',
`department_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '科室编码',
`keywords` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '关联关键词JSON数组',
`priority` int NOT NULL DEFAULT 0 COMMENT '优先级',
`status` tinyint NOT NULL DEFAULT 1 COMMENT '状态0-禁用1-启用',
`remark` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '备注',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_department_code`(`department_code` ASC) USING BTREE,
INDEX `idx_status`(`status` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 3 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = '科室标签配置表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_departments
-- ----------------------------
@@ -218,8 +237,11 @@ CREATE TABLE `ai_image_tags` (
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`image_attached_article_count` int NOT NULL DEFAULT 0 COMMENT 'Number of articles the image is attached to',
`status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'draft' COMMENT '图片完整扭转流程状态',
`status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','tag_extension','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','generate_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'draft' COMMENT '图片完整扭转流程状态',
`blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因',
`similarity` enum('draft','yes','calc','recalc') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT 'yes=是相似|calc=已计算|recalc=需要重新计算',
`similarity_image_tags_id` int NOT NULL DEFAULT 0 COMMENT 'yes=是相似|把image_tags_id写入',
`similarity score` float NOT NULL DEFAULT 0 COMMENT '相似时候,计算相似度值',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_image_tag`(`image_id` ASC, `tag_id` ASC) USING BTREE,
INDEX `tag_id`(`tag_id` ASC) USING BTREE,
@@ -237,7 +259,7 @@ CREATE TABLE `ai_image_tags` (
INDEX `idx_tag_name_id`(`tag_name` ASC, `id` ASC) USING BTREE,
INDEX `idx_tag_notnull_id`(`id` ASC, `tag_name` ASC, `image_id` ASC, `created_at` ASC) USING BTREE,
CONSTRAINT `ai_image_tags_ibfk_2` FOREIGN KEY (`tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 929767 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
) ENGINE = InnoDB AUTO_INCREMENT = 929784 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_images
@@ -263,7 +285,7 @@ CREATE TABLE `ai_images` (
INDEX `upload_user_id`(`upload_user_id` ASC) USING BTREE,
INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE,
CONSTRAINT `ai_images_ibfk_1` FOREIGN KEY (`upload_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 26832 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
) ENGINE = InnoDB AUTO_INCREMENT = 26849 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_keywords
@@ -302,7 +324,7 @@ CREATE TABLE `ai_logs` (
INDEX `user_id`(`user_id` ASC) USING BTREE,
INDEX `idx_created_at`(`created_at` DESC) USING BTREE,
CONSTRAINT `ai_logs_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 116027 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
) ENGINE = InnoDB AUTO_INCREMENT = 116565 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_mip_click
@@ -412,7 +434,7 @@ CREATE TABLE `ai_mip_query_task` (
INDEX `idx_category`(`category` ASC) USING BTREE COMMENT '按分类查询',
INDEX `idx_threshold`(`threshold_max` ASC, `current_count` ASC) USING BTREE COMMENT '阈值监控',
INDEX `idx_closed`(`closed_at` ASC) USING BTREE COMMENT '关闭时间索引'
) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = 'MIP查询任务表 - 用于存储查询词抓取网址任务' ROW_FORMAT = DYNAMIC;
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = 'MIP查询任务表 - 用于存储查询词抓取网址任务' ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Table structure for ai_mip_site
@@ -508,6 +530,61 @@ CREATE TABLE `ai_prompt_workflow` (
INDEX `idx_query_enable`(`query_enable` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_query_audit
-- ----------------------------
DROP TABLE IF EXISTS `ai_query_audit`;
CREATE TABLE `ai_query_audit` (
`id` bigint UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`query_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT 'Query原文',
`query_hash` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT 'Query MD5哈希值用于去重',
`query_status` tinyint NOT NULL DEFAULT 0 COMMENT 'Query状态0-待审核1-已过滤黑名单2-已通过3-已拒绝4-待人工审核',
`filter_reason` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '过滤原因',
`matched_keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '命中的黑名单关键词JSON数组',
`department_tags` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '科室标签JSON数组影像科、CT等',
`batch_tag_result` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL COMMENT '批量打标签结果JSON',
`ai_score` decimal(5, 2) NULL DEFAULT NULL COMMENT 'AI评分',
`is_health_related` tinyint NULL DEFAULT NULL COMMENT '是否健康相关0-否1-是',
`is_complete_sentence` tinyint NULL DEFAULT NULL COMMENT '是否完整语句0-否1-是',
`has_person_name` tinyint NULL DEFAULT 0 COMMENT '是否包含人名0-否1-是',
`has_location` tinyint NULL DEFAULT 0 COMMENT '是否包含地名0-否1-是',
`has_hospital_name` tinyint NULL DEFAULT 0 COMMENT '是否包含医院名0-否1-是',
`source` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '来源渠道',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_query_hash`(`query_hash` ASC) USING BTREE,
INDEX `idx_query_status`(`query_status` ASC, `create_time` ASC) USING BTREE,
INDEX `idx_department_tags`(`department_tags`(100) ASC) USING BTREE,
INDEX `idx_create_time`(`create_time` ASC) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = 'AI Query审核记录表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_query_blacklist
-- ----------------------------
DROP TABLE IF EXISTS `ai_query_blacklist`;
CREATE TABLE `ai_query_blacklist` (
`id` bigint UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`keyword` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '黑名单关键词',
`keyword_type` tinyint NOT NULL DEFAULT 1 COMMENT '关键词类型1-通用词汇2-人名3-地名4-医院名5-其他',
`filter_rule` tinyint NOT NULL DEFAULT 1 COMMENT '过滤规则1-包含即过滤2-完全匹配3-正则匹配',
`regex_pattern` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '正则表达式当filter_rule=3时使用',
`category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '分类标签:药品、图片、费用等',
`priority` int NOT NULL DEFAULT 0 COMMENT '优先级,数值越大优先级越高',
`status` tinyint NOT NULL DEFAULT 1 COMMENT '状态0-禁用1-启用',
`remark` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '备注说明',
`creator` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '创建人',
`updater` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '更新人',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`is_deleted` tinyint NOT NULL DEFAULT 0 COMMENT '是否删除0-未删除1-已删除',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_keyword`(`keyword` ASC, `is_deleted` ASC) USING BTREE,
INDEX `idx_keyword_type`(`keyword_type` ASC, `status` ASC) USING BTREE,
INDEX `idx_category`(`category` ASC, `status` ASC) USING BTREE,
INDEX `idx_create_time`(`create_time` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 66 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = 'AI查询黑名单词表' ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_query_category
-- ----------------------------
@@ -700,7 +777,7 @@ CREATE TABLE `ai_tag_subsets` (
PRIMARY KEY (`id`) USING BTREE,
INDEX `parent_tag_id`(`parent_tag_id` ASC) USING BTREE,
CONSTRAINT `ai_tag_subsets_ibfk_1` FOREIGN KEY (`parent_tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT
) ENGINE = InnoDB AUTO_INCREMENT = 20478 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
) ENGINE = InnoDB AUTO_INCREMENT = 20495 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_tags
@@ -719,7 +796,7 @@ CREATE TABLE `ai_tags` (
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `uk_tag_name`(`tag_name` ASC) USING BTREE,
INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 13417 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
) ENGINE = InnoDB AUTO_INCREMENT = 13434 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for ai_topic_type
@@ -810,7 +887,7 @@ CREATE TABLE `ai_users` (
`phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`xhs_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '小红书Cookie',
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`role` enum('admin','editor','reviewer','publisher','each_title_reviewer','reviewer_query') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'editor' COMMENT '用户角色',
`role` enum('admin','editor','reviewer','publisher','each_title_reviewer','reviewer_query','reviewer_image') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'editor' COMMENT '用户角色',
`status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active',
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
@@ -848,7 +925,7 @@ CREATE TABLE `baidu_keyword` (
`blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因',
`article_id` int NOT NULL DEFAULT 0 COMMENT '文章ID',
`query_stage` enum('draft','created','summary','reviewed','generated','published') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '分5个阶段创建|总结|审核|生文|发布',
`query_status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT 'query完整扭转流程状态',
`query_status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','generate_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT 'query完整扭转流程状态',
`status` enum('draft','available','unavailable','successful','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '状态_分2个阶段|可用|不可用|发布成功|发布失败',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`review_user_id` int NOT NULL DEFAULT 0 COMMENT '审核用户ID',
@@ -857,6 +934,8 @@ CREATE TABLE `baidu_keyword` (
`similarity_query_keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT 'yes=是相似|把query写入',
`similarity score` float NOT NULL DEFAULT 0 COMMENT '相似时候,计算相似度值',
`reviewed_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '审核日期',
`fast_track` tinyint(1) NOT NULL DEFAULT 0 COMMENT '加急|0=否|1=是',
`automated_review_failed_reason` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '千问大模型审核query不符合原因',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE,
INDEX `idx_crawled_seed`(`crawled` ASC, `seed_id` ASC) USING BTREE,
@@ -881,8 +960,9 @@ CREATE TABLE `baidu_keyword` (
INDEX `idx_query_status_cover`(`query_status` ASC) USING BTREE,
INDEX `idx_query_status_id_asc`(`query_status` ASC, `id` ASC) USING BTREE,
INDEX `idx_status_order_covering`(`query_status` ASC, `id` ASC, `keyword` ASC) USING BTREE,
INDEX `idx_status_fast_id_keyword`(`query_status` ASC, `fast_track` ASC, `id` ASC, `keyword` ASC) USING BTREE,
FULLTEXT INDEX `idx_keyword_fulltext`(`keyword`)
) ENGINE = InnoDB AUTO_INCREMENT = 901728 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
) ENGINE = InnoDB AUTO_INCREMENT = 901869 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for baidu_query_task
@@ -922,9 +1002,14 @@ CREATE TABLE `baidu_seed_keywords` (
`status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'ready',
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`priority_weight` int NOT NULL DEFAULT 0 COMMENT '优先级和权重1~10000|更高的先处理',
`fast_track` tinyint(1) NOT NULL DEFAULT 0 COMMENT '加急|0=否|1=是',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE,
INDEX `idx_crawled_priority`(`crawled` ASC, `priority_weight` DESC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 48 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = DYNAMIC;
INDEX `idx_crawled_priority`(`crawled` ASC, `priority_weight` DESC) USING BTREE,
INDEX `idx_fast_track`(`fast_track` ASC) USING BTREE,
INDEX `idx_crawled_fast_weight`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC) USING BTREE,
INDEX `idx_crawled_fast_weight_covering`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC, `keyword` ASC, `id` ASC) USING BTREE,
INDEX `idx_crawled_fast_priority`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC, `id` ASC, `keyword` ASC) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 100001 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = DYNAMIC;
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -0,0 +1,9 @@
12679
#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#
NULL
妇科
NULL
0
active
2025-08-16 21:48:16
2025-08-16 21:48:16

642
backup_data.json Normal file
View File

@@ -0,0 +1,642 @@
{
"ai_tags": [
{
"id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"tag_category": null,
"department": "妇科",
"description": null,
"usage_count": 0,
"status": "active",
"created_at": "2025-08-16T21:48:16",
"updated_at": "2025-08-16T21:48:16"
}
],
"ai_image_tags": [
{
"id": 16495,
"image_id": 19346,
"image_name": "1755312359566253.png",
"image_url": "20250816/1755312359566253.png",
"image_thumb_url": "20250816/1755312359566253_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:19:11",
"image_attached_article_count": 7,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16496,
"image_id": 19347,
"image_name": "1755312362360723.png",
"image_url": "20250816/1755312362360723.png",
"image_thumb_url": "20250816/1755312362360723_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:18:55",
"image_attached_article_count": 8,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16497,
"image_id": 19348,
"image_name": "1755312364406476.png",
"image_url": "20250816/1755312364406476.png",
"image_thumb_url": "20250816/1755312364406476_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:18:55",
"image_attached_article_count": 8,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16498,
"image_id": 19349,
"image_name": "1755312367284353.png",
"image_url": "20250816/1755312367284353.png",
"image_thumb_url": "20250816/1755312367284353_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:18:55",
"image_attached_article_count": 8,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16499,
"image_id": 19350,
"image_name": "1755312370484005.png",
"image_url": "20250816/1755312370484005.png",
"image_thumb_url": "20250816/1755312370484005_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:19:11",
"image_attached_article_count": 7,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16500,
"image_id": 19351,
"image_name": "1755312373245801.png",
"image_url": "20250816/1755312373245801.png",
"image_thumb_url": "20250816/1755312373245801_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:35:41",
"image_attached_article_count": 17,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16501,
"image_id": 19352,
"image_name": "1755312378278262.png",
"image_url": "20250816/1755312378278262.png",
"image_thumb_url": "20250816/1755312378278262_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:34:55",
"image_attached_article_count": 35,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16502,
"image_id": 19353,
"image_name": "1755312380298110.png",
"image_url": "20250816/1755312380298110.png",
"image_thumb_url": "20250816/1755312380298110_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:34:51",
"image_attached_article_count": 37,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16503,
"image_id": 19354,
"image_name": "1755312382399131.png",
"image_url": "20250816/1755312382399131.png",
"image_thumb_url": "20250816/1755312382399131_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:33:30",
"image_attached_article_count": 93,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16504,
"image_id": 19355,
"image_name": "1755312386945978.png",
"image_url": "20250816/1755312386945978.png",
"image_thumb_url": "20250816/1755312386945978_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:35:33",
"image_attached_article_count": 20,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16505,
"image_id": 19356,
"image_name": "1755312388894962.png",
"image_url": "20250816/1755312388894962.png",
"image_thumb_url": "20250816/1755312388894962_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:35:06",
"image_attached_article_count": 30,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16506,
"image_id": 19357,
"image_name": "1755312391383717.png",
"image_url": "20250816/1755312391383717.png",
"image_thumb_url": "20250816/1755312391383717_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:34:30",
"image_attached_article_count": 49,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16507,
"image_id": 19358,
"image_name": "1755312393565035.png",
"image_url": "20250816/1755312393565035.png",
"image_thumb_url": "20250816/1755312393565035_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:32:57",
"image_attached_article_count": 135,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16508,
"image_id": 19359,
"image_name": "1755312396609453.png",
"image_url": "20250816/1755312396609453.png",
"image_thumb_url": "20250816/1755312396609453_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:18:55",
"image_attached_article_count": 8,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16509,
"image_id": 19360,
"image_name": "1755312401479871.png",
"image_url": "20250816/1755312401479871.png",
"image_thumb_url": "20250816/1755312401479871_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:35:41",
"image_attached_article_count": 17,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16510,
"image_id": 19361,
"image_name": "1755312407229190.png",
"image_url": "20250816/1755312407229190.png",
"image_thumb_url": "20250816/1755312407229190_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:35:29",
"image_attached_article_count": 21,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16511,
"image_id": 19362,
"image_name": "1755312410797310.png",
"image_url": "20250816/1755312410797310.png",
"image_thumb_url": "20250816/1755312410797310_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 186,
"keywords_name": "妇科炎症",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:35:08",
"image_attached_article_count": 29,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16512,
"image_id": 19363,
"image_name": "1755312437724619.png",
"image_url": "20250816/1755312437724619.png",
"image_thumb_url": "20250816/1755312437724619_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 265,
"keywords_name": "废止",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:33:59",
"image_attached_article_count": 69,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16513,
"image_id": 19364,
"image_name": "1755312440270419.png",
"image_url": "20250816/1755312440270419.png",
"image_thumb_url": "20250816/1755312440270419_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 265,
"keywords_name": "废止",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:33:29",
"image_attached_article_count": 94,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16514,
"image_id": 19365,
"image_name": "1755312442259884.png",
"image_url": "20250816/1755312442259884.png",
"image_thumb_url": "20250816/1755312442259884_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 265,
"keywords_name": "废止",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:33:18",
"image_attached_article_count": 107,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16515,
"image_id": 19366,
"image_name": "1755312445610363.png",
"image_url": "20250816/1755312445610363.png",
"image_thumb_url": "20250816/1755312445610363_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 265,
"keywords_name": "废止",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:32:36",
"image_attached_article_count": 173,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16516,
"image_id": 19367,
"image_name": "1755312448884355.png",
"image_url": "20250816/1755312448884355.png",
"image_thumb_url": "20250816/1755312448884355_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 265,
"keywords_name": "废止",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:33:14",
"image_attached_article_count": 111,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16517,
"image_id": 19368,
"image_name": "1755312451681906.png",
"image_url": "20250816/1755312451681906.png",
"image_thumb_url": "20250816/1755312451681906_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 265,
"keywords_name": "废止",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:33:29",
"image_attached_article_count": 94,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16518,
"image_id": 19369,
"image_name": "1755312453351689.png",
"image_url": "20250816/1755312453351689.png",
"image_thumb_url": "20250816/1755312453351689_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 265,
"keywords_name": "废止",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:33:24",
"image_attached_article_count": 100,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
},
{
"id": 16519,
"image_id": 19370,
"image_name": "1755312456284588.png",
"image_url": "20250816/1755312456284588.png",
"image_thumb_url": "20250816/1755312456284588_thumb.png",
"tag_id": 12679,
"tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"default_tag_id": 0,
"default_tag_name": "",
"keywords_id": 265,
"keywords_name": "废止",
"department_id": 11,
"department_name": "妇科",
"image_source": 1,
"created_user_id": 0,
"created_at": "2025-08-16T21:48:16",
"updated_at": "2026-01-30T14:33:09",
"image_attached_article_count": 118,
"status": "draft",
"blocking_reason": "",
"similarity": "draft",
"similarity_image_tags_id": 0,
"similarity score": 0.0
}
]
}

View File

@@ -35,7 +35,8 @@ class QwenConfig:
@dataclass
class TagDeriveConfig:
"""标签衍生配置"""
batch_size: int = 3 # 每批处理图片数
batch_size: int = 50 # 每批次从数据库读取的图片数
concurrency: int = 10 # 并发请求数同时发出的API请求数
min_derived_tags: int = 5 # 最少衍生标签数
max_derived_tags: int = 10 # 最多衍生标签数
max_tag_length: int = 10 # 单个标签最大长度
@@ -86,6 +87,7 @@ class Settings:
# 标签衍生配置
settings.tag_derive.batch_size = int(os.getenv("BATCH_SIZE", settings.tag_derive.batch_size))
settings.tag_derive.concurrency = int(os.getenv("CONCURRENCY", settings.tag_derive.concurrency))
settings.tag_derive.min_derived_tags = int(os.getenv("MIN_DERIVED_TAGS", settings.tag_derive.min_derived_tags))
settings.tag_derive.max_derived_tags = int(os.getenv("MAX_DERIVED_TAGS", settings.tag_derive.max_derived_tags))
settings.tag_derive.image_cdn_base = os.getenv("IMAGE_CDN_BASE", settings.tag_derive.image_cdn_base)

View File

@@ -1,4 +1,140 @@
[
{
"success": true,
"image_id": 16495,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"白带异常",
"妇科感染",
"盆腔炎",
"宫颈炎",
"分泌物增多",
"抗炎治疗",
"妇科检查"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##白带异常##妇科感染##盆腔炎#",
"new_tag_id": 13434
},
{
"success": true,
"image_id": 16508,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"宫颈炎",
"盆腔炎",
"白带异常",
"外阴瘙痒",
"妇科检查",
"抗炎治疗",
"女性健康"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##白带异常#",
"new_tag_id": 13435
},
{
"success": true,
"image_id": 16506,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"盆腔炎",
"白带异常",
"妇科检查",
"抗生素治疗",
"私处护理",
"月经不调",
"感染预防"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##盆腔炎##白带异常##妇科检查#",
"new_tag_id": 13436
},
{
"success": true,
"image_id": 16503,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"宫颈炎",
"盆腔炎",
"白带异常",
"外阴瘙痒",
"妇科感染",
"炎症治疗",
"妇科疾病"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##白带异常#",
"new_tag_id": 13435
},
{
"success": true,
"image_id": 16515,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"妇科疾病",
"阴道炎",
"盆腔炎",
"白带异常",
"月经不调",
"抗炎治疗",
"个人卫生",
"免疫力"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##妇科疾病##阴道炎##盆腔炎##白带异常#",
"new_tag_id": 13437
},
{
"success": true,
"image_id": 16512,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"盆腔炎",
"白带异常",
"月经不调",
"抗生素治疗",
"妇科检查",
"免疫力下降",
"激素变化"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##盆腔炎##白带异常##月经不调#",
"new_tag_id": 13438
},
{
"success": true,
"image_id": 16514,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"妇科疾病",
"阴道炎",
"盆腔炎",
"白带异常",
"抗生素治疗",
"个人卫生",
"免疫力下降",
"月经不调"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##妇科疾病##阴道炎##盆腔炎##白带异常#",
"new_tag_id": 13437
},
{
"success": true,
"image_id": 16513,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"宫颈炎",
"盆腔炎",
"白带异常",
"瘙痒",
"抗生素治疗",
"个人卫生",
"妇科检查"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##白带异常#",
"new_tag_id": 13435
},
{
"success": true,
"image_id": 16496,
@@ -7,47 +143,64 @@
"阴道炎",
"宫颈炎",
"盆腔炎",
"感染因素",
"个人卫生",
"白带异常",
"抗生素治疗",
"抗炎药物",
"预防措施"
"妇科检查",
"免疫力下降",
"性传播疾病"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#",
"new_tag_id": 12681
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##白带异常#",
"new_tag_id": 13435
},
{
"success": true,
"image_id": 16497,
"image_id": 16500,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"妇科疾病",
"阴道炎",
"宫颈炎",
"盆腔炎",
"感染因素",
"个人卫生",
"白带异常",
"私处护理",
"抗生素治疗",
"抗炎药物",
"预防措施"
"免疫力提升",
"月经不调"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#",
"new_tag_id": 12681
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##妇科疾病##阴道炎##盆腔炎##白带异常#",
"new_tag_id": 13437
},
{
"success": true,
"image_id": 16498,
"image_id": 16516,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"更年期症状",
"激素变化",
"月经紊乱",
"潮热出汗",
"骨质疏松",
"情绪波动",
"妇科保健",
"内分泌失调"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##更年期症状##激素变化##月经紊乱##潮热出汗#",
"new_tag_id": 13439
},
{
"success": true,
"image_id": 16518,
"original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#",
"derived_tags": [
"阴道炎",
"宫颈炎",
"盆腔炎",
"感染因素",
"个人卫生",
"白带异常",
"抗生素治疗",
"抗炎药物",
"预防措施"
"个人卫生",
"妇科检查",
"免疫力下降",
"性传播疾病"
],
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#",
"new_tag_id": 12681
"merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##盆腔炎##白带异常##抗生素治疗#",
"new_tag_id": 13440
}
]

View File

@@ -1,13 +1,14 @@
# -*- coding: utf-8 -*-
"""
千问大模型 - 图片标签衍生生成脚本
流程:每次批量2-3张图片 -> 大模型返回各自衍生标签 -> 分别更新数据库
流程:每批次N张图片并发请求 -> 大模型返回各自衍生标签 -> 分别更新数据库
"""
import os
import json
from http import HTTPStatus
from typing import List, Dict, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from database_config import get_db
from config.settings import settings
@@ -29,32 +30,25 @@ dashscope.api_key = settings.qwen.api_key
# ============== Prompt模板 ==============
BATCH_DERIVE_PROMPT = """你是一个专业的医疗健康内容标签分析专家。
# 单张图片的Prompt
SINGLE_DERIVE_PROMPT = """你是一个专业的医疗健康内容标签分析专家。
## 任务
我提供了{image_count}张医疗健康相关图片,每张图片有一个原始标签。请分析每张图片,为每张图片生成衍生标签。
## 图片及原始标签
{image_tags_list}
我提供了一张医疗健康相关图片,原始标签为「{original_tag}」。请分析图片内容,生成衍生标签。
## 要求
1. 分析每张图片内容,结合原始标签
2. 为每张图片生成 5-8 个衍生标签
1. 分析图片内容,结合原始标签
2. 生成 5-8 个衍生标签
3. 衍生标签包括:同义词、上位概念、下位概念、相关症状/治疗等
4. 标签简洁每个不超过10个字
## 输出格式
请严格以JSON格式输出,按图片顺序返回
请严格以JSON格式输出
```json
{{
"results": [
{{"image_index": 1, "original_tag": "原始标签1", "derived_tags": ["衍生1", "衍生2", "衍生3"]}},
{{"image_index": 2, "original_tag": "原始标签2", "derived_tags": ["衍生1", "衍生2", "衍生3"]}}
]
}}
{{"derived_tags": ["衍生1", "衍生2", "衍生3", "衍生4", "衍生5"]}}
```
注意只输出JSON不要输出其他内容。results数组长度必须等于图片数量。
注意只输出JSON不要输出其他内容。
"""
@@ -116,30 +110,20 @@ def merge_tags(original_tag: str, derived_tags: List[str], max_total_tags: int =
@retry(max_retries=settings.qwen.max_retries, delay=settings.qwen.retry_delay, backoff=2.0)
def derive_tags_batch(items: List[Dict]) -> Dict:
def derive_tags_single(item: Dict) -> Dict:
"""
批量调用千问大模型,每张图片独立返回衍生标签
items: [{"id": 1, "image_url": "...", "tag_name": "高血压"}, ...]
带重试机制
单张图片调用千问大模型获取衍生标签
item: {"id": 1, "image_url": "...", "tag_name": "高血压", ...}
返回: {"success": True/False, "item": item, "derived_tags": [...], "error": "..."}
"""
logger.info(f"[批量处理] {len(items)} 张图片")
logger.debug(f" 处理 ID:{item['id']} - {item['tag_name']}")
# 构建图片标签列表描述
image_tags_list = ""
for i, item in enumerate(items):
image_tags_list += f"- 图片{i+1}: 原始标签「{item['tag_name']}\n"
logger.debug(f" 图片{i+1}: {item['tag_name']} - {item['image_url'][:50]}...")
prompt = SINGLE_DERIVE_PROMPT.format(original_tag=item['tag_name'])
prompt = BATCH_DERIVE_PROMPT.format(
image_count=len(items),
image_tags_list=image_tags_list.strip()
)
# 构建多图消息
content = []
for item in items:
content.append({"image": item['image_url']})
content.append({"text": prompt})
content = [
{"image": item['image_url']},
{"text": prompt}
]
messages = [{"role": "user", "content": content}]
@@ -150,6 +134,7 @@ def derive_tags_batch(items: List[Dict]) -> Dict:
if response.status_code == HTTPStatus.OK:
result_text = response.output.choices[0].message.content[0]["text"]
logger.debug(f" ID:{item['id']} 原始响应: {result_text[:200]}...")
try:
json_start = result_text.find('{')
@@ -157,50 +142,83 @@ def derive_tags_batch(items: List[Dict]) -> Dict:
if json_start != -1 and json_end > json_start:
json_str = result_text[json_start:json_end]
result_json = json.loads(json_str)
results = result_json.get('results', [])
return {"success": True, "results": results}
derived_tags = result_json.get('derived_tags', [])
if not derived_tags:
logger.warning(f" ID:{item['id']} 返回JSON中无derived_tags字段: {json_str[:100]}")
return {"success": True, "item": item, "derived_tags": derived_tags}
else:
logger.warning(f" ID:{item['id']} 未找到JSON内容: {result_text[:200]}")
return {"success": False, "item": item, "error": "未找到JSON内容"}
except json.JSONDecodeError as e:
logger.error(f" JSON解析失败: {e}")
logger.error(f" ID:{item['id']} JSON解析失败: {e}, 内容: {result_text[:200]}")
return {"success": False, "error": "JSON解析失败"}
return {"success": False, "item": item, "error": "JSON解析失败"}
else:
error_msg = f"{response.code}-{response.message}"
logger.error(f" API调用失败: {error_msg}")
logger.error(f" ID:{item['id']} API调用失败: {error_msg}")
raise Exception(error_msg) # 抛出异常触发重试
def process_batch(items: List[Dict], tags_dao: TagsDAO) -> List[Dict]:
def process_batch(items: List[Dict], tags_dao: TagsDAO, concurrency: int = None) -> List[Dict]:
"""
处理一批图片
并发处理一批图片
Args:
items: 要处理的图片列表
tags_dao: 标签 DAO
concurrency: 并发数(同时发出的请求数)
"""
# 1. 批量调用大模型
try:
result = derive_tags_batch(items)
except Exception as e:
logger.error(f"批量处理失败: {e}")
return [{"success": False, "image_id": item['id'], "error": str(e)} for item in items]
if concurrency is None:
concurrency = settings.tag_derive.concurrency
if not result.get('success'):
return [{"success": False, "image_id": item['id'], "error": result.get('error')} for item in items]
logger.info(f"[处理批次] {len(items)} 张图片,并发数: {concurrency}")
api_results = result.get('results', [])
db = get_db()
process_results = []
api_results = []
# 2. 逐个匹配并更新
for i, item in enumerate(items):
# 查找对应的衍生结果
derived_tags = []
for r in api_results:
if r.get('image_index') == i + 1 or r.get('original_tag') == item['tag_name']:
derived_tags = r.get('derived_tags', [])
break
# 1. 并发调用大模型(按并发数限制)
with ThreadPoolExecutor(max_workers=concurrency) as executor:
# 提交所有任务
future_to_item = {
executor.submit(derive_tags_single, item): item
for item in items
}
if not derived_tags and i < len(api_results):
derived_tags = api_results[i].get('derived_tags', [])
# 收集结果
for future in as_completed(future_to_item):
item = future_to_item[future]
try:
result = future.result()
api_results.append(result)
except Exception as e:
logger.error(f" ID:{item['id']} 处理异常: {e}")
api_results.append({
"success": False,
"item": item,
"error": str(e)
})
# 2. 逐个处理结果并更新数据库
for result in api_results:
item = result.get('item', {})
if not result.get('success'):
process_results.append({
"success": False,
"image_id": item.get('id'),
"error": result.get('error', '未知错误')
})
continue
derived_tags = result.get('derived_tags', [])
if not derived_tags:
process_results.append({"success": False, "image_id": item['id'], "error": "未找到衍生标签"})
process_results.append({
"success": False,
"image_id": item['id'],
"error": "未获取到衍生标签"
})
continue
logger.info(f" [{item['tag_name']}] 衍生: {derived_tags}")
@@ -211,14 +229,14 @@ def process_batch(items: List[Dict], tags_dao: TagsDAO) -> List[Dict]:
# 插入ai_tags
try:
new_tag_id = tags_dao.get_or_create(merged_tag_name, '衍生标签', item.get('department_name', ''))
new_tag_id = tags_dao.get_or_create(merged_tag_name, None, item.get('department_name', ''))
except Exception as e:
process_results.append({"success": False, "image_id": item['id'], "error": str(e)})
continue
# 更新ai_image_tags
# 更新ai_image_tags(包括 tag_id, tag_name, status
try:
sql = "UPDATE ai_image_tags SET tag_id = %s, tag_name = %s WHERE id = %s"
sql = "UPDATE ai_image_tags SET tag_id = %s, tag_name = %s, status = 'manual_review' WHERE id = %s"
db.execute_update(sql, (new_tag_id, merged_tag_name, item['id']))
process_results.append({
"success": True,
@@ -228,51 +246,52 @@ def process_batch(items: List[Dict], tags_dao: TagsDAO) -> List[Dict]:
"merged_tag": merged_tag_name,
"new_tag_id": new_tag_id
})
logger.info(f" ✓ ID:{item['id']} -> tag_id:{new_tag_id}")
logger.info(f" ✓ ID:{item['id']} -> tag_id:{new_tag_id}, status -> manual_review")
except Exception as e:
process_results.append({"success": False, "image_id": item['id'], "error": str(e)})
return process_results
def batch_derive_tags(batch_size: int = None, start_id: int = None, end_id: int = None, ids: List[int] = None) -> List[Dict]:
def batch_derive_tags(batch_size: int = None, concurrency: int = None, start_id: int = None, end_id: int = None, ids: List[int] = None) -> List[Dict]:
"""
分批处理每批2-3张图片
分批处理图片标签衍生
Args:
batch_size: 每批处理的图片数量
batch_size: 每批次从数据库读取的图片数量
concurrency: 并发请求数同时发出的API请求数
start_id: 起始ID从该ID开始处理用于断点续传
end_id: 结束ID处理到该ID为止
ids: 指定ID列表只处理这些ID
"""
if batch_size is None:
batch_size = settings.tag_derive.batch_size
if concurrency is None:
concurrency = settings.tag_derive.concurrency
tags_dao = TagsDAO()
db = get_db()
# 查询需要处理的记录
if ids:
# 按指定ID查询同样检查是否已有衍生标签
# 按指定ID查询查询 status='tag_extension' 的记录
placeholders = ','.join(['%s'] * len(ids))
sql = f"""
SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name
FROM ai_image_tags it
LEFT JOIN ai_tags t ON it.tag_id = t.id
WHERE it.id IN ({placeholders})
AND it.image_thumb_url != '' AND it.tag_name != ''
AND (t.tag_category IS NULL OR t.tag_category != '衍生标签')
AND it.status = 'tag_extension'
ORDER BY it.id
"""
items = db.execute_query(sql, ids)
else:
# 按条件查询
# 按条件查询 status='tag_extension' 的记录
sql = """
SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name
FROM ai_image_tags it
LEFT JOIN ai_tags t ON it.tag_id = t.id
WHERE it.image_thumb_url != '' AND it.tag_name != ''
AND (t.tag_category IS NULL OR t.tag_category != '衍生标签')
AND it.status = 'tag_extension'
"""
params = []
@@ -299,7 +318,7 @@ def batch_derive_tags(batch_size: int = None, start_id: int = None, end_id: int
item['image_url'] = ''
total = len(items)
logger.info(f"待处理: {total} 条,每批 {batch_size}")
logger.info(f"待处理: {total} 条,每批 {batch_size},并发数: {concurrency}")
all_results = []
@@ -310,7 +329,7 @@ def batch_derive_tags(batch_size: int = None, start_id: int = None, end_id: int
logger.info(f"{'='*60}")
logger.info(f"批次 {batch_num}/{(total + batch_size - 1) // batch_size}")
results = process_batch(batch, tags_dao)
results = process_batch(batch, tags_dao, concurrency)
all_results.extend(results)
success = sum(1 for r in results if r.get('success'))
@@ -345,13 +364,17 @@ def main():
parser = argparse.ArgumentParser(description='千问视觉大模型 - 图片标签衍生生成器')
parser.add_argument('--start-id', type=int, default=None, help='起始ID从该ID开始处理用于断点续传')
parser.add_argument('--end-id', type=int, default=None, help='结束ID处理到该ID为止')
parser.add_argument('--batch-size', type=int, default=None, help='每批处理的图片数量')
parser.add_argument('--batch-size', type=int, default=None, help='每批次从数据库读取的图片数量')
parser.add_argument('--concurrency', type=int, default=None, help='并发请求数同时发出的API请求数')
parser.add_argument('--id', type=int, nargs='+', default=None, help='指定ID只处理这些ID可指定多个')
args = parser.parse_args()
batch_size = args.batch_size or settings.tag_derive.batch_size
concurrency = args.concurrency or settings.tag_derive.concurrency
logger.info("=" * 60)
logger.info("千问视觉大模型 - 图片标签衍生生成器")
logger.info(f"模式: 每批{args.batch_size or settings.tag_derive.batch_size}张图片,各自返回衍生标签")
logger.info(f"模式: 每批 {batch_size} 张,并发 {concurrency} 个请求")
if args.id:
logger.info(f"指定ID: {args.id}")
elif args.start_id or args.end_id:
@@ -359,7 +382,13 @@ def main():
logger.info(f"ID范围: {id_range}")
logger.info("=" * 60)
results = batch_derive_tags(batch_size=args.batch_size, start_id=args.start_id, end_id=args.end_id, ids=args.id)
results = batch_derive_tags(
batch_size=args.batch_size,
concurrency=args.concurrency,
start_id=args.start_id,
end_id=args.end_id,
ids=args.id
)
if results:
print_summary(results)

View File

@@ -1,34 +1,185 @@
# -*- coding: utf-8 -*-
"""查询所有带标签字段的数据"""
from database_config import get_db
db = get_db()
# 查询所有带标签相关字段的数据
sql = """
SELECT id, image_id, image_name,
tag_id, tag_name,
default_tag_id, default_tag_name,
keywords_id, keywords_name,
department_id, department_name,
status
FROM ai_image_tags
ORDER BY id
"""
标签数据查询脚本
支持查询 ai_image_tags 和 ai_tags 表的相关数据
"""
results = db.execute_query(sql)
import argparse
from database_config import get_db
print(f"{'=' * 120}")
print(f"ai_image_tags 表中共有 {len(results)} 条数据")
print(f"{'=' * 120}")
# 表头
print(f"{'ID':<6} {'图片ID':<8} {'标签名':<15} {'初始标签名':<15} {'关键词':<12} {'科室':<10} {'状态':<10}")
print(f"{'-' * 120}")
def query_image_tags_by_status(status: str = None, limit: int = 20):
"""按状态查询 ai_image_tags 表"""
db = get_db()
if status:
sql = """
SELECT id, image_id, image_name, tag_id, tag_name,
default_tag_id, default_tag_name,
keywords_name, department_name, status, created_at
FROM ai_image_tags
WHERE status = %s
ORDER BY id DESC
LIMIT %s
"""
results = db.execute_query(sql, (status, limit))
else:
sql = """
SELECT id, image_id, image_name, tag_id, tag_name,
default_tag_id, default_tag_name,
keywords_name, department_name, status, created_at
FROM ai_image_tags
ORDER BY id DESC
LIMIT %s
"""
results = db.execute_query(sql, (limit,))
print(f"\n{'=' * 130}")
print(f"ai_image_tags 查询结果 (status={status or '全部'}, limit={limit})")
print(f"{'=' * 130}")
print(f"{'ID':<8} {'tag_id':<8} {'tag_name':<25} {'default_tag':<20} {'department':<12} {'status':<20}")
print(f"{'-' * 130}")
for r in results:
tag_name = (r['tag_name'] or '')[:24]
default_tag = (r['default_tag_name'] or '')[:19]
dept = (r['department_name'] or '')[:11]
print(f"{r['id']:<8} {r['tag_id']:<8} {tag_name:<25} {default_tag:<20} {dept:<12} {r['status']:<20}")
print(f"{'=' * 130}")
print(f"显示 {len(results)} 条记录")
for r in results:
print(f"{r['id']:<6} {r['image_id']:<8} {r['tag_name']:<15} {r['default_tag_name']:<15} {r['keywords_name']:<12} {r['department_name']:<10} {r['status']:<10}")
print(f"{'=' * 120}")
print(f"总计: {len(results)} 条记录")
def query_status_stats():
"""统计各状态的数量"""
db = get_db()
sql = "SELECT status, COUNT(*) as cnt FROM ai_image_tags GROUP BY status ORDER BY cnt DESC"
results = db.execute_query(sql)
print(f"\n{'=' * 50}")
print(f"ai_image_tags 状态统计")
print(f"{'=' * 50}")
print(f"{'状态':<25} {'数量':>10}")
print(f"{'-' * 50}")
total = 0
for r in results:
print(f"{r['status']:<25} {r['cnt']:>10}")
total += r['cnt']
print(f"{'-' * 50}")
print(f"{'总计':<25} {total:>10}")
def query_tags_by_category(category: str = None, limit: int = 20):
"""查询 ai_tags 表"""
db = get_db()
if category:
sql = """
SELECT id, tag_name, tag_category, department, usage_count, status, created_at
FROM ai_tags
WHERE tag_category = %s
ORDER BY id DESC
LIMIT %s
"""
results = db.execute_query(sql, (category, limit))
else:
sql = """
SELECT id, tag_name, tag_category, department, usage_count, status, created_at
FROM ai_tags
ORDER BY id DESC
LIMIT %s
"""
results = db.execute_query(sql, (limit,))
print(f"\n{'=' * 120}")
print(f"ai_tags 查询结果 (category={category or '全部'}, limit={limit})")
print(f"{'=' * 120}")
print(f"{'ID':<8} {'tag_name':<40} {'category':<15} {'department':<12} {'usage':<8}")
print(f"{'-' * 120}")
for r in results:
tag_name = (r['tag_name'] or '')[:39]
category_val = (r['tag_category'] or '')[:14]
dept = (r['department'] or '')[:11]
print(f"{r['id']:<8} {tag_name:<40} {category_val:<15} {dept:<12} {r['usage_count']:<8}")
print(f"{'=' * 120}")
print(f"显示 {len(results)} 条记录")
def query_join_data(status: str = 'tag_extension', limit: int = 20):
"""关联查询 ai_image_tags 和 ai_tags"""
db = get_db()
sql = """
SELECT it.id, it.image_id, it.tag_id, it.tag_name as it_tag_name,
it.default_tag_name, it.department_name, it.status,
t.tag_name as t_tag_name, t.tag_category
FROM ai_image_tags it
LEFT JOIN ai_tags t ON it.tag_id = t.id
WHERE it.status = %s
ORDER BY it.id DESC
LIMIT %s
"""
results = db.execute_query(sql, (status, limit))
print(f"\n{'=' * 140}")
print(f"关联查询 (ai_image_tags.status={status})")
print(f"{'=' * 140}")
print(f"{'it.id':<8} {'tag_id':<8} {'it.tag_name':<30} {'t.tag_name':<30} {'t.category':<15} {'department':<12}")
print(f"{'-' * 140}")
for r in results:
it_tag = (r['it_tag_name'] or '')[:29]
t_tag = (r['t_tag_name'] or '')[:29]
category = (r['tag_category'] or '')[:14]
dept = (r['department_name'] or '')[:11]
print(f"{r['id']:<8} {r['tag_id'] or 0:<8} {it_tag:<30} {t_tag:<30} {category:<15} {dept:<12}")
print(f"{'=' * 140}")
print(f"显示 {len(results)} 条记录")
def query_both_tables(status: str = None, category: str = None, limit: int = 20):
"""同时查询 ai_image_tags 和 ai_tags 两个表"""
query_status_stats()
query_image_tags_by_status(status=status, limit=limit)
query_tags_by_category(category=category, limit=limit)
def main():
parser = argparse.ArgumentParser(description='标签数据查询工具')
parser.add_argument('--status', '-s', type=str, default=None,
help='按状态过滤 ai_image_tags (如: tag_extension, manual_review, draft)')
parser.add_argument('--category', '-c', type=str, default=None,
help='按分类过滤 ai_tags (如: 衍生标签)')
parser.add_argument('--limit', '-l', type=int, default=20,
help='返回记录数限制 (默认: 20)')
parser.add_argument('--stats', action='store_true',
help='仅显示状态统计')
parser.add_argument('--join', '-j', action='store_true',
help='关联查询 ai_image_tags 和 ai_tags')
parser.add_argument('--image-tags', '-i', action='store_true',
help='仅查询 ai_image_tags 表')
parser.add_argument('--tags', '-t', action='store_true',
help='仅查询 ai_tags 表')
args = parser.parse_args()
if args.stats:
query_status_stats()
elif args.join:
query_join_data(status=args.status or 'tag_extension', limit=args.limit)
elif args.image_tags:
query_image_tags_by_status(status=args.status, limit=args.limit)
elif args.tags:
query_tags_by_category(category=args.category, limit=args.limit)
else:
# 默认同时查询两个表
query_both_tables(status=args.status, category=args.category, limit=args.limit)
if __name__ == "__main__":
main()

310
rebuild_database.py Normal file
View File

@@ -0,0 +1,310 @@
# -*- coding: utf-8 -*-
"""
数据库重建脚本
根据 ai_article.sql 重建数据库结构,从 ai_image_tags.txt 导入数据
"""
import mysql.connector
import os
# 数据库配置
DB_CONFIG = {
"host": "localhost",
"port": 3306,
"user": "root",
"password": "liang20020523",
"charset": "utf8mb4"
}
DATABASE_NAME = "ai_article"
SQL_FILE = "ai_article.sql"
DATA_FILE = "ai_image_tags.txt"
def get_connection(with_database=False):
"""获取数据库连接"""
config = DB_CONFIG.copy()
if with_database:
config["database"] = DATABASE_NAME
return mysql.connector.connect(**config)
def rebuild_database_structure():
"""重建数据库结构"""
print("=" * 60)
print("步骤1: 重建数据库结构")
print("=" * 60)
# 读取SQL文件
sql_path = os.path.join(os.path.dirname(__file__), SQL_FILE)
print(f"读取SQL文件: {sql_path}")
with open(sql_path, "r", encoding="utf-8") as f:
sql_content = f.read()
# 连接MySQL不指定数据库
conn = get_connection(with_database=False)
cursor = conn.cursor()
try:
# 删除并重新创建数据库
print(f"\n删除数据库 {DATABASE_NAME}(如果存在)...")
cursor.execute(f"DROP DATABASE IF EXISTS `{DATABASE_NAME}`")
print(f"创建数据库 {DATABASE_NAME}...")
cursor.execute(f"CREATE DATABASE `{DATABASE_NAME}` CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci")
print(f"切换到数据库 {DATABASE_NAME}...")
cursor.execute(f"USE `{DATABASE_NAME}`")
# 分割并执行SQL语句
print("\n执行SQL脚本...")
# 移除注释并分割SQL语句
statements = []
current_statement = ""
in_comment = False
for line in sql_content.split("\n"):
stripped = line.strip()
# 跳过空行
if not stripped:
continue
# 跳过单行注释
if stripped.startswith("--"):
continue
# 处理多行注释开始
if stripped.startswith("/*"):
in_comment = True
continue
# 处理多行注释结束
if "*/" in stripped:
in_comment = False
continue
# 跳过注释中的内容
if in_comment:
continue
current_statement += line + "\n"
# 检查语句是否结束
if stripped.endswith(";"):
statements.append(current_statement.strip())
current_statement = ""
# 执行每条SQL语句
success_count = 0
error_count = 0
for i, stmt in enumerate(statements):
if not stmt or stmt.strip() == ";":
continue
try:
cursor.execute(stmt)
conn.commit()
success_count += 1
# 打印表创建信息
if "CREATE TABLE" in stmt.upper():
table_name = stmt.split("`")[1] if "`" in stmt else "unknown"
print(f" ✓ 创建表: {table_name}")
except mysql.connector.Error as e:
error_count += 1
# 只打印关键错误
if "already exists" not in str(e).lower():
print(f" ✗ SQL执行错误: {str(e)[:100]}")
print(f"\nSQL执行完成: 成功 {success_count} 条, 失败 {error_count}")
finally:
cursor.close()
conn.close()
def import_image_tags_data():
"""从 ai_image_tags.txt 导入数据"""
print("\n" + "=" * 60)
print("步骤2: 导入 ai_image_tags 数据")
print("=" * 60)
data_path = os.path.join(os.path.dirname(__file__), DATA_FILE)
print(f"读取数据文件: {data_path}")
if not os.path.exists(data_path):
print(f"数据文件不存在: {data_path}")
return
with open(data_path, "r", encoding="utf-8") as f:
lines = f.readlines()
# 解析数据
# 文件格式第9行是列头从第10行开始是数据
data_rows = []
header_line = None
for i, line in enumerate(lines):
stripped = line.strip()
if not stripped:
continue
# 找到列头行(包含 id, image_id 等)
if stripped.startswith("id\t"):
header_line = stripped
print(f"找到列头(第{i+1}行): {stripped[:80]}...")
continue
# 跳过非数据行
if header_line is None:
continue
# 解析数据行(以数字开头)
parts = stripped.split("\t")
if len(parts) >= 10 and parts[0].isdigit():
data_rows.append(parts)
print(f"解析到 {len(data_rows)} 条数据")
if not data_rows:
print("没有数据需要导入")
return
# 连接数据库
conn = get_connection(with_database=True)
cursor = conn.cursor()
try:
# 禁用外键检查
cursor.execute("SET FOREIGN_KEY_CHECKS = 0")
print("已禁用外键检查")
# 插入数据
insert_sql = """
INSERT INTO ai_image_tags
(id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name,
default_tag_id, default_tag_name, keywords_id, keywords_name,
department_id, department_name, image_source, created_user_id,
created_at, updated_at, image_attached_article_count, status, blocking_reason)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
success_count = 0
error_count = 0
for row in data_rows:
try:
# 处理数据,确保长度匹配
while len(row) < 20:
row.append("")
# 转换数据类型
values = (
int(row[0]), # id
int(row[1]), # image_id
row[2], # image_name
row[3], # image_url
row[4], # image_thumb_url
int(row[5]), # tag_id
row[6], # tag_name
int(row[7]) if row[7] else 0, # default_tag_id
row[8], # default_tag_name
int(row[9]), # keywords_id
row[10], # keywords_name
int(row[11]), # department_id
row[12], # department_name
int(row[13]) if row[13] else 1, # image_source
int(row[14]) if row[14] else 0, # created_user_id
row[15], # created_at
row[16], # updated_at
int(row[17]) if row[17] else 0, # image_attached_article_count
row[18] if row[18] else "draft", # status
row[19] if len(row) > 19 else "" # blocking_reason
)
cursor.execute(insert_sql, values)
success_count += 1
except Exception as e:
error_count += 1
if error_count <= 3:
print(f" 插入错误 (id={row[0]}): {e}")
conn.commit()
print(f"\n数据导入完成: 成功 {success_count} 条, 失败 {error_count}")
# 恢复外键检查
cursor.execute("SET FOREIGN_KEY_CHECKS = 1")
print("已恢复外键检查")
finally:
cursor.close()
conn.close()
def verify_database():
"""验证数据库"""
print("\n" + "=" * 60)
print("步骤3: 验证数据库")
print("=" * 60)
conn = get_connection(with_database=True)
cursor = conn.cursor()
try:
# 检查表数量
cursor.execute("SHOW TABLES")
tables = cursor.fetchall()
print(f"\n数据库中共有 {len(tables)} 张表:")
for t in tables[:10]:
print(f" - {t[0]}")
if len(tables) > 10:
print(f" ... 还有 {len(tables) - 10} 张表")
# 检查 ai_image_tags 表数据
cursor.execute("SELECT COUNT(*) FROM ai_image_tags")
count = cursor.fetchone()[0]
print(f"\nai_image_tags 表共有 {count} 条记录")
if count > 0:
cursor.execute("SELECT id, tag_name, department_name FROM ai_image_tags LIMIT 3")
rows = cursor.fetchall()
print("示例数据:")
for row in rows:
print(f" ID: {row[0]}, 标签: {row[1][:30]}..., 科室: {row[2]}")
finally:
cursor.close()
conn.close()
def main():
print("\n" + "=" * 60)
print(" 数据库重建脚本")
print("=" * 60)
print(f"数据库: {DATABASE_NAME}")
print(f"SQL文件: {SQL_FILE}")
print(f"数据文件: {DATA_FILE}")
print("=" * 60)
# 确认操作
confirm = input("\n警告: 此操作将删除并重建数据库,所有数据将丢失!\n确认继续? (输入 'yes' 确认): ")
if confirm.lower() != "yes":
print("操作已取消")
return
# 执行重建
rebuild_database_structure()
import_image_tags_data()
verify_database()
print("\n" + "=" * 60)
print("数据库重建完成!")
print("=" * 60)
if __name__ == "__main__":
main()

View File

@@ -1,162 +0,0 @@
# -*- coding: utf-8 -*-
"""
重置 ai_image_tags 和 ai_tags 表数据
从 ai_image_tags.txt (Tab分隔格式) 导入数据
"""
import os
from database_config import get_db
from logger import get_logger
logger = get_logger("reset_data")
def parse_tsv_file(file_path: str) -> list:
"""
解析 Tab 分隔的 ai_image_tags.txt 文件
"""
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
records = []
header = None
for line in lines:
line = line.strip()
if not line:
continue
# 跳过头信息行
if line.startswith('8.149') or line.startswith('http://') or '正在显示' in line or line.startswith('SELECT'):
continue
# 解析表头
if line.startswith('id\t'):
header = line.split('\t')
logger.info(f"解析到表头: {len(header)}")
continue
# 解析数据行
if header and line[0].isdigit():
fields = line.split('\t')
if len(fields) >= 19: # 至少要有19列
record = {
'id': int(fields[0]) if fields[0] else 0,
'image_id': int(fields[1]) if fields[1] else 0,
'image_name': fields[2],
'image_url': fields[3],
'image_thumb_url': fields[4],
'tag_id': int(fields[5]) if fields[5] else 0,
'tag_name': fields[6],
'default_tag_id': int(fields[7]) if fields[7] else 0,
'default_tag_name': fields[8],
'keywords_id': int(fields[9]) if fields[9] else 0,
'keywords_name': fields[10],
'department_id': int(fields[11]) if fields[11] else 0,
'department_name': fields[12],
'image_source': int(fields[13]) if fields[13] else 1,
'created_user_id': int(fields[14]) if fields[14] else 0,
'created_at': fields[15] if fields[15] else None,
'updated_at': fields[16] if fields[16] else None,
'image_attached_article_count': int(fields[17]) if fields[17] else 0,
'status': fields[18] if fields[18] else 'draft',
'blocking_reason': fields[19] if len(fields) > 19 else ''
}
records.append(record)
return records
def reset_tables_and_import(file_path: str):
"""
清空表并导入数据
"""
db = get_db()
# 解析文件
logger.info(f"正在解析文件: {file_path}")
records = parse_tsv_file(file_path)
logger.info(f"共解析 {len(records)} 条记录")
if not records:
logger.error("没有解析到有效数据,操作取消")
return
# 收集所有 tag
tag_map = {}
for r in records:
if r['tag_id'] and r['tag_name']:
tag_map[r['tag_id']] = r['tag_name']
# 确认操作
print(f"\n即将执行以下操作:")
print(f"1. 清空 ai_image_tags 表")
print(f"2. 清空 ai_tags 表")
print(f"3. 导入 {len(tag_map)} 条 ai_tags 记录")
print(f"4. 导入 {len(records)} 条 ai_image_tags 记录")
print(f"\n注意: 此操作不可逆!")
confirm = input("\n确认执行? (输入 yes 继续): ")
if confirm.lower() != 'yes':
logger.info("操作已取消")
return
try:
# 清空表(先删子表,再删父表)
logger.info("清空 ai_image_tags 表...")
db.execute_update("DELETE FROM ai_image_tags")
logger.info("清空 ai_tags 表...")
db.execute_update("DELETE FROM ai_tags")
# 重置自增ID
db.execute_update("ALTER TABLE ai_image_tags AUTO_INCREMENT = 1")
db.execute_update("ALTER TABLE ai_tags AUTO_INCREMENT = 1")
# 插入 ai_tags
logger.info(f"插入 ai_tags...")
for tag_id, tag_name in tag_map.items():
sql = "INSERT INTO ai_tags (id, tag_name, status) VALUES (%s, %s, 'active')"
try:
db.execute_insert(sql, (tag_id, tag_name))
except Exception as e:
logger.warning(f"插入 tag {tag_id} 失败: {e}")
# 插入 ai_image_tags
logger.info(f"插入 ai_image_tags...")
success_count = 0
for r in records:
sql = """
INSERT INTO ai_image_tags
(id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name,
default_tag_id, default_tag_name, keywords_id, keywords_name,
department_id, department_name, image_source, created_user_id,
created_at, updated_at, image_attached_article_count, status, blocking_reason)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
params = (
r['id'], r['image_id'], r['image_name'], r['image_url'], r['image_thumb_url'],
r['tag_id'], r['tag_name'], r['default_tag_id'], r['default_tag_name'],
r['keywords_id'], r['keywords_name'], r['department_id'], r['department_name'],
r['image_source'], r['created_user_id'], r['created_at'], r['updated_at'],
r['image_attached_article_count'], r['status'], r['blocking_reason']
)
try:
db.execute_insert(sql, params)
success_count += 1
except Exception as e:
logger.error(f"插入记录 {r['id']} 失败: {e}")
# 恢复自增ID到最大值
db.execute_update("ALTER TABLE ai_image_tags AUTO_INCREMENT = 1")
db.execute_update("ALTER TABLE ai_tags AUTO_INCREMENT = 1")
logger.info(f"导入完成: 成功 {success_count}/{len(records)}")
except Exception as e:
logger.error(f"操作失败: {e}")
raise
if __name__ == "__main__":
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ai_image_tags.txt")
reset_tables_and_import(file_path)

View File

@@ -1,81 +0,0 @@
# -*- coding: utf-8 -*-
"""
重置标签数据脚本
1. 把 ai_image_tags 的 tag_id/tag_name 恢复为 default_tag_id/default_tag_name
2. 删除 ai_tags 表中的衍生标签
"""
from database_config import get_db
def reset_database():
db = get_db()
# 1. 先查看当前状态
print("=" * 60)
print("当前数据状态")
print("=" * 60)
# 查看衍生标签数量
derived_count = db.execute_one(
"SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'"
)
print(f"ai_tags 表中的衍生标签数量: {derived_count['cnt']}")
# 查看需要恢复的图片数量
need_reset = db.execute_one("""
SELECT COUNT(*) as cnt FROM ai_image_tags
WHERE default_tag_id > 0
AND (tag_id != default_tag_id OR tag_name != default_tag_name)
""")
print(f"需要恢复到初始标签的图片数量: {need_reset['cnt']}")
print("\n" + "=" * 60)
print("开始执行清理")
print("=" * 60)
# 2. 把 ai_image_tags 的标签恢复为初始标签
print("\n[步骤1] 恢复 ai_image_tags 到初始标签...")
affected = db.execute_update("""
UPDATE ai_image_tags
SET tag_id = default_tag_id,
tag_name = default_tag_name
WHERE default_tag_id > 0
""")
print(f" 已更新 {affected} 条记录")
# 3. 删除衍生标签
print("\n[步骤2] 删除 ai_tags 表中的衍生标签...")
deleted = db.execute_update(
"DELETE FROM ai_tags WHERE tag_category = '衍生标签'"
)
print(f" 已删除 {deleted} 条衍生标签")
# 4. 验证结果
print("\n" + "=" * 60)
print("清理完成,验证结果")
print("=" * 60)
remaining_derived = db.execute_one(
"SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'"
)
print(f"剩余衍生标签数量: {remaining_derived['cnt']}")
sample = db.execute_query("""
SELECT id, image_id, tag_name, default_tag_name
FROM ai_image_tags
WHERE default_tag_id > 0
LIMIT 5
""")
print("\n前5条图片标签示例:")
for row in sample:
print(f" ID:{row['id']} | tag_name: {row['tag_name']} | default_tag_name: {row['default_tag_name']}")
if __name__ == "__main__":
confirm = input("确认执行数据库清理?这将:\n1. 把所有图片标签恢复为初始标签\n2. 删除所有衍生标签\n输入 'yes' 确认: ")
if confirm.lower() == 'yes':
reset_database()
print("\n✓ 数据库已重置到干净状态")
else:
print("已取消操作")

299
restore_database.py Normal file
View File

@@ -0,0 +1,299 @@
# -*- coding: utf-8 -*-
"""
数据库恢复脚本
根据 ai_article.sql 重建数据库结构,从 backup_data.json 恢复数据
"""
import mysql.connector
import json
import os
# 数据库配置
DB_CONFIG = {
"host": "localhost",
"port": 3306,
"user": "root",
"password": "liang20020523",
"charset": "utf8mb4"
}
DATABASE_NAME = "ai_article"
SQL_FILE = "ai_article.sql"
BACKUP_FILE = "backup_data.json"
def get_connection(with_database=False):
"""获取数据库连接"""
config = DB_CONFIG.copy()
if with_database:
config["database"] = DATABASE_NAME
return mysql.connector.connect(**config)
def rebuild_database_structure():
"""重建数据库结构"""
print("=" * 60)
print("步骤1: 重建数据库结构")
print("=" * 60)
sql_path = os.path.join(os.path.dirname(__file__), SQL_FILE)
print(f"读取SQL文件: {sql_path}")
with open(sql_path, "r", encoding="utf-8") as f:
sql_content = f.read()
conn = get_connection(with_database=False)
cursor = conn.cursor()
try:
print(f"\n删除数据库 {DATABASE_NAME}(如果存在)...")
cursor.execute(f"DROP DATABASE IF EXISTS `{DATABASE_NAME}`")
print(f"创建数据库 {DATABASE_NAME}...")
cursor.execute(f"CREATE DATABASE `{DATABASE_NAME}` CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci")
print(f"切换到数据库 {DATABASE_NAME}...")
cursor.execute(f"USE `{DATABASE_NAME}`")
print("\n执行SQL脚本...")
statements = []
current_statement = ""
in_comment = False
for line in sql_content.split("\n"):
stripped = line.strip()
if not stripped:
continue
if stripped.startswith("--"):
continue
if stripped.startswith("/*"):
in_comment = True
continue
if "*/" in stripped:
in_comment = False
continue
if in_comment:
continue
current_statement += line + "\n"
if stripped.endswith(";"):
statements.append(current_statement.strip())
current_statement = ""
success_count = 0
table_count = 0
for stmt in statements:
if not stmt or stmt.strip() == ";":
continue
try:
cursor.execute(stmt)
conn.commit()
success_count += 1
if "CREATE TABLE" in stmt.upper():
table_name = stmt.split("`")[1] if "`" in stmt else "unknown"
print(f" ✓ 创建表: {table_name}")
table_count += 1
except mysql.connector.Error as e:
if "already exists" not in str(e).lower():
print(f" ✗ SQL执行错误: {str(e)[:80]}")
print(f"\n表结构创建完成: {table_count} 张表")
finally:
cursor.close()
conn.close()
def restore_data():
"""从备份文件恢复数据"""
print("\n" + "=" * 60)
print("步骤2: 恢复表数据")
print("=" * 60)
backup_path = os.path.join(os.path.dirname(__file__), BACKUP_FILE)
print(f"读取备份文件: {backup_path}")
if not os.path.exists(backup_path):
print(f"备份文件不存在: {backup_path}")
return
with open(backup_path, "r", encoding="utf-8") as f:
data = json.load(f)
conn = get_connection(with_database=True)
cursor = conn.cursor()
try:
# 禁用外键检查
cursor.execute("SET FOREIGN_KEY_CHECKS = 0")
print("已禁用外键检查\n")
# 恢复 ai_tags
tags = data.get("ai_tags", [])
print(f"恢复 ai_tags 表 ({len(tags)} 条)...")
if tags:
for tag in tags:
columns = ", ".join(f"`{k}`" for k in tag.keys())
placeholders = ", ".join(["%s"] * len(tag))
sql = f"INSERT INTO ai_tags ({columns}) VALUES ({placeholders})"
try:
cursor.execute(sql, list(tag.values()))
except mysql.connector.Error as e:
print(f" 插入错误: {e}")
conn.commit()
print(f" ✓ ai_tags 恢复完成")
# 恢复 ai_image_tags
image_tags = data.get("ai_image_tags", [])
print(f"恢复 ai_image_tags 表 ({len(image_tags)} 条)...")
if image_tags:
success = 0
for item in image_tags:
columns = ", ".join(f"`{k}`" for k in item.keys())
placeholders = ", ".join(["%s"] * len(item))
sql = f"INSERT INTO ai_image_tags ({columns}) VALUES ({placeholders})"
try:
cursor.execute(sql, list(item.values()))
success += 1
except mysql.connector.Error as e:
print(f" 插入错误 (id={item.get('id')}): {e}")
conn.commit()
print(f" ✓ ai_image_tags 恢复完成 ({success} 条)")
# 恢复外键检查
cursor.execute("SET FOREIGN_KEY_CHECKS = 1")
print("\n已恢复外键检查")
finally:
cursor.close()
conn.close()
def verify_database():
"""验证数据库"""
print("\n" + "=" * 60)
print("步骤3: 验证数据库")
print("=" * 60)
conn = get_connection(with_database=True)
cursor = conn.cursor()
try:
cursor.execute("SHOW TABLES")
tables = cursor.fetchall()
print(f"\n数据库共有 {len(tables)} 张表")
cursor.execute("SELECT COUNT(*) FROM ai_tags")
count = cursor.fetchone()[0]
print(f"ai_tags 表: {count} 条记录")
cursor.execute("SELECT COUNT(*) FROM ai_image_tags")
count = cursor.fetchone()[0]
print(f"ai_image_tags 表: {count} 条记录")
# 显示示例数据
cursor.execute("SELECT id, tag_name, department FROM ai_tags LIMIT 3")
rows = cursor.fetchall()
if rows:
print("\nai_tags 示例:")
for row in rows:
print(f" ID: {row[0]}, 标签: {row[1][:40]}..., 科室: {row[2]}")
finally:
cursor.close()
conn.close()
def backup_current_data():
"""备份当前数据到 backup_data.json"""
print("=" * 60)
print("备份当前数据")
print("=" * 60)
conn = get_connection(with_database=True)
cursor = conn.cursor(dictionary=True)
try:
# 导出 ai_tags
cursor.execute('SELECT * FROM ai_tags')
tags = cursor.fetchall()
print(f"ai_tags: {len(tags)}")
# 导出 ai_image_tags
cursor.execute('SELECT * FROM ai_image_tags')
image_tags = cursor.fetchall()
print(f"ai_image_tags: {len(image_tags)}")
# 转换datetime为字符串
def convert_datetime(obj):
for key, value in obj.items():
if hasattr(value, 'isoformat'):
obj[key] = value.isoformat()
return obj
tags = [convert_datetime(t) for t in tags]
image_tags = [convert_datetime(t) for t in image_tags]
# 保存为JSON
backup_path = os.path.join(os.path.dirname(__file__), BACKUP_FILE)
data = {'ai_tags': tags, 'ai_image_tags': image_tags}
with open(backup_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"\n数据已保存到 {backup_path}")
finally:
cursor.close()
conn.close()
def main():
print("\n" + "=" * 60)
print(" 数据库恢复脚本")
print("=" * 60)
print(f"数据库: {DATABASE_NAME}")
print(f"SQL文件: {SQL_FILE}")
print(f"备份文件: {BACKUP_FILE}")
print("=" * 60)
print("\n请选择操作:")
print(" 1. 完整恢复 (重建结构 + 恢复数据)")
print(" 2. 仅恢复数据 (保留现有结构)")
print(" 3. 备份当前数据")
print(" 0. 取消")
choice = input("\n请输入选项 (0-3): ").strip()
if choice == "1":
confirm = input("\n警告: 此操作将删除并重建数据库!确认? (输入 'yes'): ")
if confirm.lower() == "yes":
rebuild_database_structure()
restore_data()
verify_database()
print("\n" + "=" * 60)
print("数据库恢复完成!")
print("=" * 60)
else:
print("操作已取消")
elif choice == "2":
restore_data()
verify_database()
print("\n数据恢复完成!")
elif choice == "3":
backup_current_data()
print("\n备份完成!")
else:
print("操作已取消")
if __name__ == "__main__":
main()

View File

@@ -251,13 +251,12 @@ class TagDeriveService:
return ''.join([f'#{t}#' for t in all_tags])
def get_pending_images(self, limit: int = 100) -> List[Dict]:
"""获取待处理的图片"""
"""获取待处理的图片status='tag_extension'"""
sql = """
SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name
FROM ai_image_tags it
LEFT JOIN ai_tags t ON it.tag_id = t.id
WHERE it.image_thumb_url != '' AND it.tag_name != ''
AND (t.tag_category IS NULL OR t.tag_category != '衍生标签')
AND it.status = 'tag_extension'
ORDER BY it.id
LIMIT %s
"""
@@ -279,12 +278,15 @@ class TagDeriveService:
"SELECT COUNT(*) as cnt FROM ai_image_tags WHERE image_thumb_url != ''"
)['cnt']
# 处理数
processed = self.db.execute_one("""
SELECT COUNT(*) as cnt FROM ai_image_tags it
JOIN ai_tags t ON it.tag_id = t.id
WHERE t.tag_category = '衍生标签'
""")['cnt']
# 处理数status='tag_extension'
pending = self.db.execute_one(
"SELECT COUNT(*) as cnt FROM ai_image_tags WHERE status = 'tag_extension'"
)['cnt']
# 已处理数status='manual_review'
processed = self.db.execute_one(
"SELECT COUNT(*) as cnt FROM ai_image_tags WHERE status = 'manual_review'"
)['cnt']
# 衍生标签数
derived_count = self.db.execute_one(
@@ -294,7 +296,7 @@ class TagDeriveService:
return {
"total_images": total,
"processed_images": processed,
"pending_images": total - processed,
"pending_images": pending,
"derived_tags_count": derived_count
}

View File

@@ -1 +0,0 @@
ALTER TABLE ai_article.ai_image_tags ADD COLUMN derived_tag VARCHAR(1000) NOT NULL DEFAULT '' COMMENT '衍生标签,格式:#标签1##标签2#';