diff --git a/.gitignore b/.gitignore index bd8ccd1..bb4ac20 100644 --- a/.gitignore +++ b/.gitignore @@ -3,21 +3,45 @@ __pycache__/ *.py[cod] *$py.class *.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg -# Virtual environment +# Virtual Environment venv/ env/ -.venv/ +ENV/ # IDE -.idea/ .vscode/ +.idea/ *.swp *.swo -# Logs -*.log - # OS .DS_Store Thumbs.db + +# Logs +*.log +logs/ + +# Data files +*.json # Except specific ones we want to track +backup_data.json # Track this specific json file + +# Local config +config/local_settings.py \ No newline at end of file diff --git a/1.png b/1.png deleted file mode 100644 index f30afcc..0000000 Binary files a/1.png and /dev/null differ diff --git a/ai_article.sql b/ai_article.sql index cd4d994..0a40562 100644 --- a/ai_article.sql +++ b/ai_article.sql @@ -11,7 +11,7 @@ Target Server Version : 90001 (9.0.1) File Encoding : 65001 - Date: 28/01/2026 14:04:39 + Date: 02/02/2026 16:34:15 */ SET NAMES utf8mb4; @@ -41,7 +41,7 @@ CREATE TABLE `ai_article_images` ( INDEX `image_id`(`image_id` ASC) USING BTREE, INDEX `idx_tag_article_lookup`(`image_tag_id` ASC, `article_id` ASC) USING BTREE, INDEX `idx_article_images_article_tag`(`article_id` ASC, `image_tag_id` ASC) USING BTREE -) ENGINE = InnoDB AUTO_INCREMENT = 699 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; +) ENGINE = InnoDB AUTO_INCREMENT = 700 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; -- ---------------------------- -- Table structure for ai_article_tags @@ -111,7 +111,7 @@ CREATE TABLE `ai_articles` ( CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT -) ENGINE = InnoDB AUTO_INCREMENT = 1180 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; +) ENGINE = InnoDB AUTO_INCREMENT = 1350 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; -- ---------------------------- -- Table structure for ai_authors @@ -178,6 +178,25 @@ CREATE TABLE `ai_batch_uploads` ( CONSTRAINT `ai_batch_uploads_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT ) ENGINE = InnoDB AUTO_INCREMENT = 101 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; +-- ---------------------------- +-- Table structure for ai_department_config +-- ---------------------------- +DROP TABLE IF EXISTS `ai_department_config`; +CREATE TABLE `ai_department_config` ( + `id` int UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `department_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '科室名称', + `department_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '科室编码', + `keywords` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '关联关键词(JSON数组)', + `priority` int NOT NULL DEFAULT 0 COMMENT '优先级', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态:0-禁用,1-启用', + `remark` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_department_code`(`department_code` ASC) USING BTREE, + INDEX `idx_status`(`status` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 3 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = '科室标签配置表' ROW_FORMAT = Dynamic; + -- ---------------------------- -- Table structure for ai_departments -- ---------------------------- @@ -218,8 +237,11 @@ CREATE TABLE `ai_image_tags` ( `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `image_attached_article_count` int NOT NULL DEFAULT 0 COMMENT 'Number of articles the image is attached to', - `status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'draft' COMMENT '图片完整扭转流程状态', + `status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','tag_extension','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','generate_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'draft' COMMENT '图片完整扭转流程状态', `blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因', + `similarity` enum('draft','yes','calc','recalc') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT 'yes=是相似|calc=已计算|recalc=需要重新计算', + `similarity_image_tags_id` int NOT NULL DEFAULT 0 COMMENT 'yes=是相似|把image_tags_id写入', + `similarity score` float NOT NULL DEFAULT 0 COMMENT '相似时候,计算相似度值', PRIMARY KEY (`id`) USING BTREE, UNIQUE INDEX `uk_image_tag`(`image_id` ASC, `tag_id` ASC) USING BTREE, INDEX `tag_id`(`tag_id` ASC) USING BTREE, @@ -237,7 +259,7 @@ CREATE TABLE `ai_image_tags` ( INDEX `idx_tag_name_id`(`tag_name` ASC, `id` ASC) USING BTREE, INDEX `idx_tag_notnull_id`(`id` ASC, `tag_name` ASC, `image_id` ASC, `created_at` ASC) USING BTREE, CONSTRAINT `ai_image_tags_ibfk_2` FOREIGN KEY (`tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT -) ENGINE = InnoDB AUTO_INCREMENT = 929767 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; +) ENGINE = InnoDB AUTO_INCREMENT = 929784 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; -- ---------------------------- -- Table structure for ai_images @@ -263,7 +285,7 @@ CREATE TABLE `ai_images` ( INDEX `upload_user_id`(`upload_user_id` ASC) USING BTREE, INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE, CONSTRAINT `ai_images_ibfk_1` FOREIGN KEY (`upload_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT -) ENGINE = InnoDB AUTO_INCREMENT = 26832 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; +) ENGINE = InnoDB AUTO_INCREMENT = 26849 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; -- ---------------------------- -- Table structure for ai_keywords @@ -302,7 +324,7 @@ CREATE TABLE `ai_logs` ( INDEX `user_id`(`user_id` ASC) USING BTREE, INDEX `idx_created_at`(`created_at` DESC) USING BTREE, CONSTRAINT `ai_logs_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT -) ENGINE = InnoDB AUTO_INCREMENT = 116027 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; +) ENGINE = InnoDB AUTO_INCREMENT = 116565 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; -- ---------------------------- -- Table structure for ai_mip_click @@ -412,7 +434,7 @@ CREATE TABLE `ai_mip_query_task` ( INDEX `idx_category`(`category` ASC) USING BTREE COMMENT '按分类查询', INDEX `idx_threshold`(`threshold_max` ASC, `current_count` ASC) USING BTREE COMMENT '阈值监控', INDEX `idx_closed`(`closed_at` ASC) USING BTREE COMMENT '关闭时间索引' -) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = 'MIP查询任务表 - 用于存储查询词抓取网址任务' ROW_FORMAT = DYNAMIC; +) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = 'MIP查询任务表 - 用于存储查询词抓取网址任务' ROW_FORMAT = DYNAMIC; -- ---------------------------- -- Table structure for ai_mip_site @@ -508,6 +530,61 @@ CREATE TABLE `ai_prompt_workflow` ( INDEX `idx_query_enable`(`query_enable` ASC) USING BTREE ) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; +-- ---------------------------- +-- Table structure for ai_query_audit +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_audit`; +CREATE TABLE `ai_query_audit` ( + `id` bigint UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `query_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT 'Query原文', + `query_hash` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT 'Query MD5哈希值,用于去重', + `query_status` tinyint NOT NULL DEFAULT 0 COMMENT 'Query状态:0-待审核,1-已过滤(黑名单),2-已通过,3-已拒绝,4-待人工审核', + `filter_reason` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '过滤原因', + `matched_keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '命中的黑名单关键词(JSON数组)', + `department_tags` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '科室标签(JSON数组):影像科、CT等', + `batch_tag_result` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL COMMENT '批量打标签结果(JSON)', + `ai_score` decimal(5, 2) NULL DEFAULT NULL COMMENT 'AI评分', + `is_health_related` tinyint NULL DEFAULT NULL COMMENT '是否健康相关:0-否,1-是', + `is_complete_sentence` tinyint NULL DEFAULT NULL COMMENT '是否完整语句:0-否,1-是', + `has_person_name` tinyint NULL DEFAULT 0 COMMENT '是否包含人名:0-否,1-是', + `has_location` tinyint NULL DEFAULT 0 COMMENT '是否包含地名:0-否,1-是', + `has_hospital_name` tinyint NULL DEFAULT 0 COMMENT '是否包含医院名:0-否,1-是', + `source` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '来源渠道', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_query_hash`(`query_hash` ASC) USING BTREE, + INDEX `idx_query_status`(`query_status` ASC, `create_time` ASC) USING BTREE, + INDEX `idx_department_tags`(`department_tags`(100) ASC) USING BTREE, + INDEX `idx_create_time`(`create_time` ASC) USING BTREE +) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = 'AI Query审核记录表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_query_blacklist +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_blacklist`; +CREATE TABLE `ai_query_blacklist` ( + `id` bigint UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `keyword` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '黑名单关键词', + `keyword_type` tinyint NOT NULL DEFAULT 1 COMMENT '关键词类型:1-通用词汇,2-人名,3-地名,4-医院名,5-其他', + `filter_rule` tinyint NOT NULL DEFAULT 1 COMMENT '过滤规则:1-包含即过滤,2-完全匹配,3-正则匹配', + `regex_pattern` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '正则表达式(当filter_rule=3时使用)', + `category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '分类标签:药品、图片、费用等', + `priority` int NOT NULL DEFAULT 0 COMMENT '优先级,数值越大优先级越高', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态:0-禁用,1-启用', + `remark` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '备注说明', + `creator` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '创建人', + `updater` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '更新人', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `is_deleted` tinyint NOT NULL DEFAULT 0 COMMENT '是否删除:0-未删除,1-已删除', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_keyword`(`keyword` ASC, `is_deleted` ASC) USING BTREE, + INDEX `idx_keyword_type`(`keyword_type` ASC, `status` ASC) USING BTREE, + INDEX `idx_category`(`category` ASC, `status` ASC) USING BTREE, + INDEX `idx_create_time`(`create_time` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 66 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = 'AI查询黑名单词表' ROW_FORMAT = Dynamic; + -- ---------------------------- -- Table structure for ai_query_category -- ---------------------------- @@ -700,7 +777,7 @@ CREATE TABLE `ai_tag_subsets` ( PRIMARY KEY (`id`) USING BTREE, INDEX `parent_tag_id`(`parent_tag_id` ASC) USING BTREE, CONSTRAINT `ai_tag_subsets_ibfk_1` FOREIGN KEY (`parent_tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT -) ENGINE = InnoDB AUTO_INCREMENT = 20478 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; +) ENGINE = InnoDB AUTO_INCREMENT = 20495 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; -- ---------------------------- -- Table structure for ai_tags @@ -719,7 +796,7 @@ CREATE TABLE `ai_tags` ( PRIMARY KEY (`id`) USING BTREE, UNIQUE INDEX `uk_tag_name`(`tag_name` ASC) USING BTREE, INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE -) ENGINE = InnoDB AUTO_INCREMENT = 13417 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; +) ENGINE = InnoDB AUTO_INCREMENT = 13434 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; -- ---------------------------- -- Table structure for ai_topic_type @@ -810,7 +887,7 @@ CREATE TABLE `ai_users` ( `phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, `xhs_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '小红书Cookie', `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, - `role` enum('admin','editor','reviewer','publisher','each_title_reviewer','reviewer_query') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'editor' COMMENT '用户角色', + `role` enum('admin','editor','reviewer','publisher','each_title_reviewer','reviewer_query','reviewer_image') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'editor' COMMENT '用户角色', `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, @@ -848,7 +925,7 @@ CREATE TABLE `baidu_keyword` ( `blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因', `article_id` int NOT NULL DEFAULT 0 COMMENT '文章ID', `query_stage` enum('draft','created','summary','reviewed','generated','published') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '分5个阶段,创建|总结|审核|生文|发布', - `query_status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT 'query完整扭转流程状态', + `query_status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','generate_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT 'query完整扭转流程状态', `status` enum('draft','available','unavailable','successful','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '状态_分2个阶段|可用|不可用|发布成功|发布失败', `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `review_user_id` int NOT NULL DEFAULT 0 COMMENT '审核用户ID', @@ -857,6 +934,8 @@ CREATE TABLE `baidu_keyword` ( `similarity_query_keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT 'yes=是相似|把query写入', `similarity score` float NOT NULL DEFAULT 0 COMMENT '相似时候,计算相似度值', `reviewed_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '审核日期', + `fast_track` tinyint(1) NOT NULL DEFAULT 0 COMMENT '加急|0=否|1=是', + `automated_review_failed_reason` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '千问大模型审核query不符合原因', PRIMARY KEY (`id`) USING BTREE, UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE, INDEX `idx_crawled_seed`(`crawled` ASC, `seed_id` ASC) USING BTREE, @@ -881,8 +960,9 @@ CREATE TABLE `baidu_keyword` ( INDEX `idx_query_status_cover`(`query_status` ASC) USING BTREE, INDEX `idx_query_status_id_asc`(`query_status` ASC, `id` ASC) USING BTREE, INDEX `idx_status_order_covering`(`query_status` ASC, `id` ASC, `keyword` ASC) USING BTREE, + INDEX `idx_status_fast_id_keyword`(`query_status` ASC, `fast_track` ASC, `id` ASC, `keyword` ASC) USING BTREE, FULLTEXT INDEX `idx_keyword_fulltext`(`keyword`) -) ENGINE = InnoDB AUTO_INCREMENT = 901728 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic; +) ENGINE = InnoDB AUTO_INCREMENT = 901869 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic; -- ---------------------------- -- Table structure for baidu_query_task @@ -922,9 +1002,14 @@ CREATE TABLE `baidu_seed_keywords` ( `status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'ready', `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `priority_weight` int NOT NULL DEFAULT 0 COMMENT '优先级和权重1~10000|更高的先处理', + `fast_track` tinyint(1) NOT NULL DEFAULT 0 COMMENT '加急|0=否|1=是', PRIMARY KEY (`id`) USING BTREE, UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE, - INDEX `idx_crawled_priority`(`crawled` ASC, `priority_weight` DESC) USING BTREE -) ENGINE = InnoDB AUTO_INCREMENT = 48 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = DYNAMIC; + INDEX `idx_crawled_priority`(`crawled` ASC, `priority_weight` DESC) USING BTREE, + INDEX `idx_fast_track`(`fast_track` ASC) USING BTREE, + INDEX `idx_crawled_fast_weight`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC) USING BTREE, + INDEX `idx_crawled_fast_weight_covering`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC, `keyword` ASC, `id` ASC) USING BTREE, + INDEX `idx_crawled_fast_priority`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC, `id` ASC, `keyword` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 100001 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = DYNAMIC; SET FOREIGN_KEY_CHECKS = 1; diff --git a/ai_tags.txt b/ai_tags.txt index e69de29..9d079e1 100644 --- a/ai_tags.txt +++ b/ai_tags.txt @@ -0,0 +1,9 @@ +12679 +#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# +NULL +妇科 +NULL +0 +active +2025-08-16 21:48:16 +2025-08-16 21:48:16 \ No newline at end of file diff --git a/backup_data.json b/backup_data.json new file mode 100644 index 0000000..4b4778a --- /dev/null +++ b/backup_data.json @@ -0,0 +1,642 @@ +{ + "ai_tags": [ + { + "id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "tag_category": null, + "department": "妇科", + "description": null, + "usage_count": 0, + "status": "active", + "created_at": "2025-08-16T21:48:16", + "updated_at": "2025-08-16T21:48:16" + } + ], + "ai_image_tags": [ + { + "id": 16495, + "image_id": 19346, + "image_name": "1755312359566253.png", + "image_url": "20250816/1755312359566253.png", + "image_thumb_url": "20250816/1755312359566253_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:19:11", + "image_attached_article_count": 7, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16496, + "image_id": 19347, + "image_name": "1755312362360723.png", + "image_url": "20250816/1755312362360723.png", + "image_thumb_url": "20250816/1755312362360723_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:18:55", + "image_attached_article_count": 8, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16497, + "image_id": 19348, + "image_name": "1755312364406476.png", + "image_url": "20250816/1755312364406476.png", + "image_thumb_url": "20250816/1755312364406476_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:18:55", + "image_attached_article_count": 8, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16498, + "image_id": 19349, + "image_name": "1755312367284353.png", + "image_url": "20250816/1755312367284353.png", + "image_thumb_url": "20250816/1755312367284353_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:18:55", + "image_attached_article_count": 8, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16499, + "image_id": 19350, + "image_name": "1755312370484005.png", + "image_url": "20250816/1755312370484005.png", + "image_thumb_url": "20250816/1755312370484005_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:19:11", + "image_attached_article_count": 7, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16500, + "image_id": 19351, + "image_name": "1755312373245801.png", + "image_url": "20250816/1755312373245801.png", + "image_thumb_url": "20250816/1755312373245801_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:35:41", + "image_attached_article_count": 17, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16501, + "image_id": 19352, + "image_name": "1755312378278262.png", + "image_url": "20250816/1755312378278262.png", + "image_thumb_url": "20250816/1755312378278262_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:34:55", + "image_attached_article_count": 35, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16502, + "image_id": 19353, + "image_name": "1755312380298110.png", + "image_url": "20250816/1755312380298110.png", + "image_thumb_url": "20250816/1755312380298110_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:34:51", + "image_attached_article_count": 37, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16503, + "image_id": 19354, + "image_name": "1755312382399131.png", + "image_url": "20250816/1755312382399131.png", + "image_thumb_url": "20250816/1755312382399131_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:33:30", + "image_attached_article_count": 93, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16504, + "image_id": 19355, + "image_name": "1755312386945978.png", + "image_url": "20250816/1755312386945978.png", + "image_thumb_url": "20250816/1755312386945978_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:35:33", + "image_attached_article_count": 20, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16505, + "image_id": 19356, + "image_name": "1755312388894962.png", + "image_url": "20250816/1755312388894962.png", + "image_thumb_url": "20250816/1755312388894962_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:35:06", + "image_attached_article_count": 30, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16506, + "image_id": 19357, + "image_name": "1755312391383717.png", + "image_url": "20250816/1755312391383717.png", + "image_thumb_url": "20250816/1755312391383717_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:34:30", + "image_attached_article_count": 49, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16507, + "image_id": 19358, + "image_name": "1755312393565035.png", + "image_url": "20250816/1755312393565035.png", + "image_thumb_url": "20250816/1755312393565035_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:32:57", + "image_attached_article_count": 135, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16508, + "image_id": 19359, + "image_name": "1755312396609453.png", + "image_url": "20250816/1755312396609453.png", + "image_thumb_url": "20250816/1755312396609453_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:18:55", + "image_attached_article_count": 8, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16509, + "image_id": 19360, + "image_name": "1755312401479871.png", + "image_url": "20250816/1755312401479871.png", + "image_thumb_url": "20250816/1755312401479871_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:35:41", + "image_attached_article_count": 17, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16510, + "image_id": 19361, + "image_name": "1755312407229190.png", + "image_url": "20250816/1755312407229190.png", + "image_thumb_url": "20250816/1755312407229190_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:35:29", + "image_attached_article_count": 21, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16511, + "image_id": 19362, + "image_name": "1755312410797310.png", + "image_url": "20250816/1755312410797310.png", + "image_thumb_url": "20250816/1755312410797310_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 186, + "keywords_name": "妇科炎症", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:35:08", + "image_attached_article_count": 29, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16512, + "image_id": 19363, + "image_name": "1755312437724619.png", + "image_url": "20250816/1755312437724619.png", + "image_thumb_url": "20250816/1755312437724619_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 265, + "keywords_name": "废止", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:33:59", + "image_attached_article_count": 69, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16513, + "image_id": 19364, + "image_name": "1755312440270419.png", + "image_url": "20250816/1755312440270419.png", + "image_thumb_url": "20250816/1755312440270419_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 265, + "keywords_name": "废止", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:33:29", + "image_attached_article_count": 94, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16514, + "image_id": 19365, + "image_name": "1755312442259884.png", + "image_url": "20250816/1755312442259884.png", + "image_thumb_url": "20250816/1755312442259884_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 265, + "keywords_name": "废止", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:33:18", + "image_attached_article_count": 107, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16515, + "image_id": 19366, + "image_name": "1755312445610363.png", + "image_url": "20250816/1755312445610363.png", + "image_thumb_url": "20250816/1755312445610363_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 265, + "keywords_name": "废止", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:32:36", + "image_attached_article_count": 173, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16516, + "image_id": 19367, + "image_name": "1755312448884355.png", + "image_url": "20250816/1755312448884355.png", + "image_thumb_url": "20250816/1755312448884355_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 265, + "keywords_name": "废止", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:33:14", + "image_attached_article_count": 111, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16517, + "image_id": 19368, + "image_name": "1755312451681906.png", + "image_url": "20250816/1755312451681906.png", + "image_thumb_url": "20250816/1755312451681906_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 265, + "keywords_name": "废止", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:33:29", + "image_attached_article_count": 94, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16518, + "image_id": 19369, + "image_name": "1755312453351689.png", + "image_url": "20250816/1755312453351689.png", + "image_thumb_url": "20250816/1755312453351689_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 265, + "keywords_name": "废止", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:33:24", + "image_attached_article_count": 100, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + }, + { + "id": 16519, + "image_id": 19370, + "image_name": "1755312456284588.png", + "image_url": "20250816/1755312456284588.png", + "image_thumb_url": "20250816/1755312456284588_thumb.png", + "tag_id": 12679, + "tag_name": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "default_tag_id": 0, + "default_tag_name": "", + "keywords_id": 265, + "keywords_name": "废止", + "department_id": 11, + "department_name": "妇科", + "image_source": 1, + "created_user_id": 0, + "created_at": "2025-08-16T21:48:16", + "updated_at": "2026-01-30T14:33:09", + "image_attached_article_count": 118, + "status": "draft", + "blocking_reason": "", + "similarity": "draft", + "similarity_image_tags_id": 0, + "similarity score": 0.0 + } + ] +} \ No newline at end of file diff --git a/config/settings.py b/config/settings.py index c6201ec..f012e74 100644 --- a/config/settings.py +++ b/config/settings.py @@ -35,7 +35,8 @@ class QwenConfig: @dataclass class TagDeriveConfig: """标签衍生配置""" - batch_size: int = 3 # 每批处理图片数 + batch_size: int = 50 # 每批次从数据库读取的图片数量 + concurrency: int = 10 # 并发请求数(同时发出的API请求数) min_derived_tags: int = 5 # 最少衍生标签数 max_derived_tags: int = 10 # 最多衍生标签数 max_tag_length: int = 10 # 单个标签最大长度 @@ -86,6 +87,7 @@ class Settings: # 标签衍生配置 settings.tag_derive.batch_size = int(os.getenv("BATCH_SIZE", settings.tag_derive.batch_size)) + settings.tag_derive.concurrency = int(os.getenv("CONCURRENCY", settings.tag_derive.concurrency)) settings.tag_derive.min_derived_tags = int(os.getenv("MIN_DERIVED_TAGS", settings.tag_derive.min_derived_tags)) settings.tag_derive.max_derived_tags = int(os.getenv("MAX_DERIVED_TAGS", settings.tag_derive.max_derived_tags)) settings.tag_derive.image_cdn_base = os.getenv("IMAGE_CDN_BASE", settings.tag_derive.image_cdn_base) diff --git a/derive_results.json b/derive_results.json index 26fcb10..6c59064 100644 --- a/derive_results.json +++ b/derive_results.json @@ -1,4 +1,140 @@ [ + { + "success": true, + "image_id": 16495, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "白带异常", + "妇科感染", + "盆腔炎", + "宫颈炎", + "分泌物增多", + "抗炎治疗", + "妇科检查" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##白带异常##妇科感染##盆腔炎#", + "new_tag_id": 13434 + }, + { + "success": true, + "image_id": 16508, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "宫颈炎", + "盆腔炎", + "白带异常", + "外阴瘙痒", + "妇科检查", + "抗炎治疗", + "女性健康" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##白带异常#", + "new_tag_id": 13435 + }, + { + "success": true, + "image_id": 16506, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "盆腔炎", + "白带异常", + "妇科检查", + "抗生素治疗", + "私处护理", + "月经不调", + "感染预防" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##盆腔炎##白带异常##妇科检查#", + "new_tag_id": 13436 + }, + { + "success": true, + "image_id": 16503, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "宫颈炎", + "盆腔炎", + "白带异常", + "外阴瘙痒", + "妇科感染", + "炎症治疗", + "妇科疾病" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##白带异常#", + "new_tag_id": 13435 + }, + { + "success": true, + "image_id": 16515, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "妇科疾病", + "阴道炎", + "盆腔炎", + "白带异常", + "月经不调", + "抗炎治疗", + "个人卫生", + "免疫力" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##妇科疾病##阴道炎##盆腔炎##白带异常#", + "new_tag_id": 13437 + }, + { + "success": true, + "image_id": 16512, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "盆腔炎", + "白带异常", + "月经不调", + "抗生素治疗", + "妇科检查", + "免疫力下降", + "激素变化" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##盆腔炎##白带异常##月经不调#", + "new_tag_id": 13438 + }, + { + "success": true, + "image_id": 16514, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "妇科疾病", + "阴道炎", + "盆腔炎", + "白带异常", + "抗生素治疗", + "个人卫生", + "免疫力下降", + "月经不调" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##妇科疾病##阴道炎##盆腔炎##白带异常#", + "new_tag_id": 13437 + }, + { + "success": true, + "image_id": 16513, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "阴道炎", + "宫颈炎", + "盆腔炎", + "白带异常", + "瘙痒", + "抗生素治疗", + "个人卫生", + "妇科检查" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##白带异常#", + "new_tag_id": 13435 + }, { "success": true, "image_id": 16496, @@ -7,47 +143,64 @@ "阴道炎", "宫颈炎", "盆腔炎", - "感染因素", - "个人卫生", + "白带异常", "抗生素治疗", - "抗炎药物", - "预防措施" + "妇科检查", + "免疫力下降", + "性传播疾病" ], - "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#", - "new_tag_id": 12681 + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##白带异常#", + "new_tag_id": 13435 }, { "success": true, - "image_id": 16497, + "image_id": 16500, "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", "derived_tags": [ + "妇科疾病", "阴道炎", - "宫颈炎", "盆腔炎", - "感染因素", - "个人卫生", + "白带异常", + "私处护理", "抗生素治疗", - "抗炎药物", - "预防措施" + "免疫力提升", + "月经不调" ], - "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#", - "new_tag_id": 12681 + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##妇科疾病##阴道炎##盆腔炎##白带异常#", + "new_tag_id": 13437 }, { "success": true, - "image_id": 16498, + "image_id": 16516, + "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", + "derived_tags": [ + "更年期症状", + "激素变化", + "月经紊乱", + "潮热出汗", + "骨质疏松", + "情绪波动", + "妇科保健", + "内分泌失调" + ], + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##更年期症状##激素变化##月经紊乱##潮热出汗#", + "new_tag_id": 13439 + }, + { + "success": true, + "image_id": 16518, "original_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办#", "derived_tags": [ "阴道炎", - "宫颈炎", "盆腔炎", - "感染因素", - "个人卫生", + "白带异常", "抗生素治疗", - "抗炎药物", - "预防措施" + "个人卫生", + "妇科检查", + "免疫力下降", + "性传播疾病" ], - "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##宫颈炎##盆腔炎##感染因素#", - "new_tag_id": 12681 + "merged_tag": "#妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办##阴道炎##盆腔炎##白带异常##抗生素治疗#", + "new_tag_id": 13440 } ] \ No newline at end of file diff --git a/image_tag_derive.py b/image_tag_derive.py index 8014199..028c988 100644 --- a/image_tag_derive.py +++ b/image_tag_derive.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- """ 千问大模型 - 图片标签衍生生成脚本 -流程:每次批量2-3张图片 -> 大模型返回各自衍生标签 -> 分别更新数据库 +流程:每批次N张图片并发请求 -> 大模型返回各自衍生标签 -> 分别更新数据库 """ import os import json from http import HTTPStatus from typing import List, Dict, Optional +from concurrent.futures import ThreadPoolExecutor, as_completed from database_config import get_db from config.settings import settings @@ -29,32 +30,25 @@ dashscope.api_key = settings.qwen.api_key # ============== Prompt模板 ============== -BATCH_DERIVE_PROMPT = """你是一个专业的医疗健康内容标签分析专家。 +# 单张图片的Prompt +SINGLE_DERIVE_PROMPT = """你是一个专业的医疗健康内容标签分析专家。 ## 任务 -我提供了{image_count}张医疗健康相关图片,每张图片有一个原始标签。请分析每张图片,为每张图片生成衍生标签。 - -## 图片及原始标签 -{image_tags_list} +我提供了一张医疗健康相关图片,原始标签为「{original_tag}」。请分析图片内容,生成衍生标签。 ## 要求 -1. 分析每张图片内容,结合其原始标签 -2. 为每张图片生成 5-8 个衍生标签 +1. 分析图片内容,结合原始标签 +2. 生成 5-8 个衍生标签 3. 衍生标签包括:同义词、上位概念、下位概念、相关症状/治疗等 4. 标签简洁,每个不超过10个字 ## 输出格式 -请严格以JSON格式输出,按图片顺序返回: +请严格以JSON格式输出: ```json -{{ - "results": [ - {{"image_index": 1, "original_tag": "原始标签1", "derived_tags": ["衍生1", "衍生2", "衍生3"]}}, - {{"image_index": 2, "original_tag": "原始标签2", "derived_tags": ["衍生1", "衍生2", "衍生3"]}} - ] -}} +{{"derived_tags": ["衍生1", "衍生2", "衍生3", "衍生4", "衍生5"]}} ``` -注意:只输出JSON,不要输出其他内容。results数组长度必须等于图片数量。 +注意:只输出JSON,不要输出其他内容。 """ @@ -116,30 +110,20 @@ def merge_tags(original_tag: str, derived_tags: List[str], max_total_tags: int = @retry(max_retries=settings.qwen.max_retries, delay=settings.qwen.retry_delay, backoff=2.0) -def derive_tags_batch(items: List[Dict]) -> Dict: +def derive_tags_single(item: Dict) -> Dict: """ - 批量调用千问大模型,每张图片独立返回衍生标签 - items: [{"id": 1, "image_url": "...", "tag_name": "高血压"}, ...] - 带重试机制 + 单张图片调用千问大模型获取衍生标签 + item: {"id": 1, "image_url": "...", "tag_name": "高血压", ...} + 返回: {"success": True/False, "item": item, "derived_tags": [...], "error": "..."} """ - logger.info(f"[批量处理] {len(items)} 张图片") + logger.debug(f" 处理 ID:{item['id']} - {item['tag_name']}") - # 构建图片标签列表描述 - image_tags_list = "" - for i, item in enumerate(items): - image_tags_list += f"- 图片{i+1}: 原始标签「{item['tag_name']}」\n" - logger.debug(f" 图片{i+1}: {item['tag_name']} - {item['image_url'][:50]}...") + prompt = SINGLE_DERIVE_PROMPT.format(original_tag=item['tag_name']) - prompt = BATCH_DERIVE_PROMPT.format( - image_count=len(items), - image_tags_list=image_tags_list.strip() - ) - - # 构建多图消息 - content = [] - for item in items: - content.append({"image": item['image_url']}) - content.append({"text": prompt}) + content = [ + {"image": item['image_url']}, + {"text": prompt} + ] messages = [{"role": "user", "content": content}] @@ -150,6 +134,7 @@ def derive_tags_batch(items: List[Dict]) -> Dict: if response.status_code == HTTPStatus.OK: result_text = response.output.choices[0].message.content[0]["text"] + logger.debug(f" ID:{item['id']} 原始响应: {result_text[:200]}...") try: json_start = result_text.find('{') @@ -157,50 +142,83 @@ def derive_tags_batch(items: List[Dict]) -> Dict: if json_start != -1 and json_end > json_start: json_str = result_text[json_start:json_end] result_json = json.loads(json_str) - results = result_json.get('results', []) - return {"success": True, "results": results} + derived_tags = result_json.get('derived_tags', []) + if not derived_tags: + logger.warning(f" ID:{item['id']} 返回JSON中无derived_tags字段: {json_str[:100]}") + return {"success": True, "item": item, "derived_tags": derived_tags} + else: + logger.warning(f" ID:{item['id']} 未找到JSON内容: {result_text[:200]}") + return {"success": False, "item": item, "error": "未找到JSON内容"} except json.JSONDecodeError as e: - logger.error(f" JSON解析失败: {e}") + logger.error(f" ID:{item['id']} JSON解析失败: {e}, 内容: {result_text[:200]}") - return {"success": False, "error": "JSON解析失败"} + return {"success": False, "item": item, "error": "JSON解析失败"} else: error_msg = f"{response.code}-{response.message}" - logger.error(f" API调用失败: {error_msg}") + logger.error(f" ID:{item['id']} API调用失败: {error_msg}") raise Exception(error_msg) # 抛出异常触发重试 -def process_batch(items: List[Dict], tags_dao: TagsDAO) -> List[Dict]: +def process_batch(items: List[Dict], tags_dao: TagsDAO, concurrency: int = None) -> List[Dict]: """ - 处理一批图片 + 并发处理一批图片 + + Args: + items: 要处理的图片列表 + tags_dao: 标签 DAO + concurrency: 并发数(同时发出的请求数) """ - # 1. 批量调用大模型 - try: - result = derive_tags_batch(items) - except Exception as e: - logger.error(f"批量处理失败: {e}") - return [{"success": False, "image_id": item['id'], "error": str(e)} for item in items] + if concurrency is None: + concurrency = settings.tag_derive.concurrency - if not result.get('success'): - return [{"success": False, "image_id": item['id'], "error": result.get('error')} for item in items] + logger.info(f"[处理批次] {len(items)} 张图片,并发数: {concurrency}") - api_results = result.get('results', []) db = get_db() process_results = [] + api_results = [] - # 2. 逐个匹配并更新 - for i, item in enumerate(items): - # 查找对应的衍生结果 - derived_tags = [] - for r in api_results: - if r.get('image_index') == i + 1 or r.get('original_tag') == item['tag_name']: - derived_tags = r.get('derived_tags', []) - break + # 1. 并发调用大模型(按并发数限制) + with ThreadPoolExecutor(max_workers=concurrency) as executor: + # 提交所有任务 + future_to_item = { + executor.submit(derive_tags_single, item): item + for item in items + } - if not derived_tags and i < len(api_results): - derived_tags = api_results[i].get('derived_tags', []) + # 收集结果 + for future in as_completed(future_to_item): + item = future_to_item[future] + try: + result = future.result() + api_results.append(result) + except Exception as e: + logger.error(f" ID:{item['id']} 处理异常: {e}") + api_results.append({ + "success": False, + "item": item, + "error": str(e) + }) + + # 2. 逐个处理结果并更新数据库 + for result in api_results: + item = result.get('item', {}) + + if not result.get('success'): + process_results.append({ + "success": False, + "image_id": item.get('id'), + "error": result.get('error', '未知错误') + }) + continue + + derived_tags = result.get('derived_tags', []) if not derived_tags: - process_results.append({"success": False, "image_id": item['id'], "error": "未找到衍生标签"}) + process_results.append({ + "success": False, + "image_id": item['id'], + "error": "未获取到衍生标签" + }) continue logger.info(f" [{item['tag_name']}] 衍生: {derived_tags}") @@ -211,14 +229,14 @@ def process_batch(items: List[Dict], tags_dao: TagsDAO) -> List[Dict]: # 插入ai_tags try: - new_tag_id = tags_dao.get_or_create(merged_tag_name, '衍生标签', item.get('department_name', '')) + new_tag_id = tags_dao.get_or_create(merged_tag_name, None, item.get('department_name', '')) except Exception as e: process_results.append({"success": False, "image_id": item['id'], "error": str(e)}) continue - # 更新ai_image_tags + # 更新ai_image_tags(包括 tag_id, tag_name, status) try: - sql = "UPDATE ai_image_tags SET tag_id = %s, tag_name = %s WHERE id = %s" + sql = "UPDATE ai_image_tags SET tag_id = %s, tag_name = %s, status = 'manual_review' WHERE id = %s" db.execute_update(sql, (new_tag_id, merged_tag_name, item['id'])) process_results.append({ "success": True, @@ -228,51 +246,52 @@ def process_batch(items: List[Dict], tags_dao: TagsDAO) -> List[Dict]: "merged_tag": merged_tag_name, "new_tag_id": new_tag_id }) - logger.info(f" ✓ ID:{item['id']} -> tag_id:{new_tag_id}") + logger.info(f" ✓ ID:{item['id']} -> tag_id:{new_tag_id}, status -> manual_review") except Exception as e: process_results.append({"success": False, "image_id": item['id'], "error": str(e)}) return process_results -def batch_derive_tags(batch_size: int = None, start_id: int = None, end_id: int = None, ids: List[int] = None) -> List[Dict]: +def batch_derive_tags(batch_size: int = None, concurrency: int = None, start_id: int = None, end_id: int = None, ids: List[int] = None) -> List[Dict]: """ - 分批处理,每批2-3张图片 + 分批处理图片标签衍生 Args: - batch_size: 每批处理的图片数量 + batch_size: 每批次从数据库读取的图片数量 + concurrency: 并发请求数(同时发出的API请求数) start_id: 起始ID,从该ID开始处理(用于断点续传) end_id: 结束ID,处理到该ID为止 ids: 指定ID列表,只处理这些ID """ if batch_size is None: batch_size = settings.tag_derive.batch_size + if concurrency is None: + concurrency = settings.tag_derive.concurrency tags_dao = TagsDAO() db = get_db() # 查询需要处理的记录 if ids: - # 按指定ID查询(同样检查是否已有衍生标签) + # 按指定ID查询(查询 status='tag_extension' 的记录) placeholders = ','.join(['%s'] * len(ids)) sql = f""" SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name FROM ai_image_tags it - LEFT JOIN ai_tags t ON it.tag_id = t.id WHERE it.id IN ({placeholders}) AND it.image_thumb_url != '' AND it.tag_name != '' - AND (t.tag_category IS NULL OR t.tag_category != '衍生标签') + AND it.status = 'tag_extension' ORDER BY it.id """ items = db.execute_query(sql, ids) else: - # 按条件查询 + # 按条件查询 status='tag_extension' 的记录 sql = """ SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name FROM ai_image_tags it - LEFT JOIN ai_tags t ON it.tag_id = t.id WHERE it.image_thumb_url != '' AND it.tag_name != '' - AND (t.tag_category IS NULL OR t.tag_category != '衍生标签') + AND it.status = 'tag_extension' """ params = [] @@ -299,7 +318,7 @@ def batch_derive_tags(batch_size: int = None, start_id: int = None, end_id: int item['image_url'] = '' total = len(items) - logger.info(f"待处理: {total} 条,每批 {batch_size} 张") + logger.info(f"待处理: {total} 条,每批 {batch_size} 张,并发数: {concurrency}") all_results = [] @@ -310,7 +329,7 @@ def batch_derive_tags(batch_size: int = None, start_id: int = None, end_id: int logger.info(f"{'='*60}") logger.info(f"批次 {batch_num}/{(total + batch_size - 1) // batch_size}") - results = process_batch(batch, tags_dao) + results = process_batch(batch, tags_dao, concurrency) all_results.extend(results) success = sum(1 for r in results if r.get('success')) @@ -345,13 +364,17 @@ def main(): parser = argparse.ArgumentParser(description='千问视觉大模型 - 图片标签衍生生成器') parser.add_argument('--start-id', type=int, default=None, help='起始ID,从该ID开始处理(用于断点续传)') parser.add_argument('--end-id', type=int, default=None, help='结束ID,处理到该ID为止') - parser.add_argument('--batch-size', type=int, default=None, help='每批处理的图片数量') + parser.add_argument('--batch-size', type=int, default=None, help='每批次从数据库读取的图片数量') + parser.add_argument('--concurrency', type=int, default=None, help='并发请求数(同时发出的API请求数)') parser.add_argument('--id', type=int, nargs='+', default=None, help='指定ID,只处理这些ID(可指定多个)') args = parser.parse_args() + batch_size = args.batch_size or settings.tag_derive.batch_size + concurrency = args.concurrency or settings.tag_derive.concurrency + logger.info("=" * 60) logger.info("千问视觉大模型 - 图片标签衍生生成器") - logger.info(f"模式: 每批{args.batch_size or settings.tag_derive.batch_size}张图片,各自返回衍生标签") + logger.info(f"模式: 每批 {batch_size} 张,并发 {concurrency} 个请求") if args.id: logger.info(f"指定ID: {args.id}") elif args.start_id or args.end_id: @@ -359,7 +382,13 @@ def main(): logger.info(f"ID范围: {id_range}") logger.info("=" * 60) - results = batch_derive_tags(batch_size=args.batch_size, start_id=args.start_id, end_id=args.end_id, ids=args.id) + results = batch_derive_tags( + batch_size=args.batch_size, + concurrency=args.concurrency, + start_id=args.start_id, + end_id=args.end_id, + ids=args.id + ) if results: print_summary(results) diff --git a/query_tags.py b/query_tags.py index decc9bd..95a29e0 100644 --- a/query_tags.py +++ b/query_tags.py @@ -1,34 +1,185 @@ # -*- coding: utf-8 -*- -"""查询所有带标签字段的数据""" - -from database_config import get_db - -db = get_db() - -# 查询所有带标签相关字段的数据 -sql = """ -SELECT id, image_id, image_name, - tag_id, tag_name, - default_tag_id, default_tag_name, - keywords_id, keywords_name, - department_id, department_name, - status -FROM ai_image_tags -ORDER BY id +""" +标签数据查询脚本 +支持查询 ai_image_tags 和 ai_tags 表的相关数据 """ -results = db.execute_query(sql) +import argparse +from database_config import get_db -print(f"{'=' * 120}") -print(f"ai_image_tags 表中共有 {len(results)} 条数据") -print(f"{'=' * 120}") -# 表头 -print(f"{'ID':<6} {'图片ID':<8} {'标签名':<15} {'初始标签名':<15} {'关键词':<12} {'科室':<10} {'状态':<10}") -print(f"{'-' * 120}") +def query_image_tags_by_status(status: str = None, limit: int = 20): + """按状态查询 ai_image_tags 表""" + db = get_db() + + if status: + sql = """ + SELECT id, image_id, image_name, tag_id, tag_name, + default_tag_id, default_tag_name, + keywords_name, department_name, status, created_at + FROM ai_image_tags + WHERE status = %s + ORDER BY id DESC + LIMIT %s + """ + results = db.execute_query(sql, (status, limit)) + else: + sql = """ + SELECT id, image_id, image_name, tag_id, tag_name, + default_tag_id, default_tag_name, + keywords_name, department_name, status, created_at + FROM ai_image_tags + ORDER BY id DESC + LIMIT %s + """ + results = db.execute_query(sql, (limit,)) + + print(f"\n{'=' * 130}") + print(f"ai_image_tags 查询结果 (status={status or '全部'}, limit={limit})") + print(f"{'=' * 130}") + print(f"{'ID':<8} {'tag_id':<8} {'tag_name':<25} {'default_tag':<20} {'department':<12} {'status':<20}") + print(f"{'-' * 130}") + + for r in results: + tag_name = (r['tag_name'] or '')[:24] + default_tag = (r['default_tag_name'] or '')[:19] + dept = (r['department_name'] or '')[:11] + print(f"{r['id']:<8} {r['tag_id']:<8} {tag_name:<25} {default_tag:<20} {dept:<12} {r['status']:<20}") + + print(f"{'=' * 130}") + print(f"显示 {len(results)} 条记录") -for r in results: - print(f"{r['id']:<6} {r['image_id']:<8} {r['tag_name']:<15} {r['default_tag_name']:<15} {r['keywords_name']:<12} {r['department_name']:<10} {r['status']:<10}") -print(f"{'=' * 120}") -print(f"总计: {len(results)} 条记录") +def query_status_stats(): + """统计各状态的数量""" + db = get_db() + + sql = "SELECT status, COUNT(*) as cnt FROM ai_image_tags GROUP BY status ORDER BY cnt DESC" + results = db.execute_query(sql) + + print(f"\n{'=' * 50}") + print(f"ai_image_tags 状态统计") + print(f"{'=' * 50}") + print(f"{'状态':<25} {'数量':>10}") + print(f"{'-' * 50}") + + total = 0 + for r in results: + print(f"{r['status']:<25} {r['cnt']:>10}") + total += r['cnt'] + + print(f"{'-' * 50}") + print(f"{'总计':<25} {total:>10}") + + +def query_tags_by_category(category: str = None, limit: int = 20): + """查询 ai_tags 表""" + db = get_db() + + if category: + sql = """ + SELECT id, tag_name, tag_category, department, usage_count, status, created_at + FROM ai_tags + WHERE tag_category = %s + ORDER BY id DESC + LIMIT %s + """ + results = db.execute_query(sql, (category, limit)) + else: + sql = """ + SELECT id, tag_name, tag_category, department, usage_count, status, created_at + FROM ai_tags + ORDER BY id DESC + LIMIT %s + """ + results = db.execute_query(sql, (limit,)) + + print(f"\n{'=' * 120}") + print(f"ai_tags 查询结果 (category={category or '全部'}, limit={limit})") + print(f"{'=' * 120}") + print(f"{'ID':<8} {'tag_name':<40} {'category':<15} {'department':<12} {'usage':<8}") + print(f"{'-' * 120}") + + for r in results: + tag_name = (r['tag_name'] or '')[:39] + category_val = (r['tag_category'] or '')[:14] + dept = (r['department'] or '')[:11] + print(f"{r['id']:<8} {tag_name:<40} {category_val:<15} {dept:<12} {r['usage_count']:<8}") + + print(f"{'=' * 120}") + print(f"显示 {len(results)} 条记录") + + +def query_join_data(status: str = 'tag_extension', limit: int = 20): + """关联查询 ai_image_tags 和 ai_tags""" + db = get_db() + + sql = """ + SELECT it.id, it.image_id, it.tag_id, it.tag_name as it_tag_name, + it.default_tag_name, it.department_name, it.status, + t.tag_name as t_tag_name, t.tag_category + FROM ai_image_tags it + LEFT JOIN ai_tags t ON it.tag_id = t.id + WHERE it.status = %s + ORDER BY it.id DESC + LIMIT %s + """ + results = db.execute_query(sql, (status, limit)) + + print(f"\n{'=' * 140}") + print(f"关联查询 (ai_image_tags.status={status})") + print(f"{'=' * 140}") + print(f"{'it.id':<8} {'tag_id':<8} {'it.tag_name':<30} {'t.tag_name':<30} {'t.category':<15} {'department':<12}") + print(f"{'-' * 140}") + + for r in results: + it_tag = (r['it_tag_name'] or '')[:29] + t_tag = (r['t_tag_name'] or '')[:29] + category = (r['tag_category'] or '')[:14] + dept = (r['department_name'] or '')[:11] + print(f"{r['id']:<8} {r['tag_id'] or 0:<8} {it_tag:<30} {t_tag:<30} {category:<15} {dept:<12}") + + print(f"{'=' * 140}") + print(f"显示 {len(results)} 条记录") + + +def query_both_tables(status: str = None, category: str = None, limit: int = 20): + """同时查询 ai_image_tags 和 ai_tags 两个表""" + query_status_stats() + query_image_tags_by_status(status=status, limit=limit) + query_tags_by_category(category=category, limit=limit) + + +def main(): + parser = argparse.ArgumentParser(description='标签数据查询工具') + parser.add_argument('--status', '-s', type=str, default=None, + help='按状态过滤 ai_image_tags (如: tag_extension, manual_review, draft)') + parser.add_argument('--category', '-c', type=str, default=None, + help='按分类过滤 ai_tags (如: 衍生标签)') + parser.add_argument('--limit', '-l', type=int, default=20, + help='返回记录数限制 (默认: 20)') + parser.add_argument('--stats', action='store_true', + help='仅显示状态统计') + parser.add_argument('--join', '-j', action='store_true', + help='关联查询 ai_image_tags 和 ai_tags') + parser.add_argument('--image-tags', '-i', action='store_true', + help='仅查询 ai_image_tags 表') + parser.add_argument('--tags', '-t', action='store_true', + help='仅查询 ai_tags 表') + args = parser.parse_args() + + if args.stats: + query_status_stats() + elif args.join: + query_join_data(status=args.status or 'tag_extension', limit=args.limit) + elif args.image_tags: + query_image_tags_by_status(status=args.status, limit=args.limit) + elif args.tags: + query_tags_by_category(category=args.category, limit=args.limit) + else: + # 默认同时查询两个表 + query_both_tables(status=args.status, category=args.category, limit=args.limit) + + +if __name__ == "__main__": + main() diff --git a/rebuild_database.py b/rebuild_database.py new file mode 100644 index 0000000..bc2afe6 --- /dev/null +++ b/rebuild_database.py @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +""" +数据库重建脚本 +根据 ai_article.sql 重建数据库结构,从 ai_image_tags.txt 导入数据 +""" + +import mysql.connector +import os + + +# 数据库配置 +DB_CONFIG = { + "host": "localhost", + "port": 3306, + "user": "root", + "password": "liang20020523", + "charset": "utf8mb4" +} + +DATABASE_NAME = "ai_article" +SQL_FILE = "ai_article.sql" +DATA_FILE = "ai_image_tags.txt" + + +def get_connection(with_database=False): + """获取数据库连接""" + config = DB_CONFIG.copy() + if with_database: + config["database"] = DATABASE_NAME + return mysql.connector.connect(**config) + + +def rebuild_database_structure(): + """重建数据库结构""" + print("=" * 60) + print("步骤1: 重建数据库结构") + print("=" * 60) + + # 读取SQL文件 + sql_path = os.path.join(os.path.dirname(__file__), SQL_FILE) + print(f"读取SQL文件: {sql_path}") + + with open(sql_path, "r", encoding="utf-8") as f: + sql_content = f.read() + + # 连接MySQL(不指定数据库) + conn = get_connection(with_database=False) + cursor = conn.cursor() + + try: + # 删除并重新创建数据库 + print(f"\n删除数据库 {DATABASE_NAME}(如果存在)...") + cursor.execute(f"DROP DATABASE IF EXISTS `{DATABASE_NAME}`") + + print(f"创建数据库 {DATABASE_NAME}...") + cursor.execute(f"CREATE DATABASE `{DATABASE_NAME}` CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") + + print(f"切换到数据库 {DATABASE_NAME}...") + cursor.execute(f"USE `{DATABASE_NAME}`") + + # 分割并执行SQL语句 + print("\n执行SQL脚本...") + + # 移除注释并分割SQL语句 + statements = [] + current_statement = "" + in_comment = False + + for line in sql_content.split("\n"): + stripped = line.strip() + + # 跳过空行 + if not stripped: + continue + + # 跳过单行注释 + if stripped.startswith("--"): + continue + + # 处理多行注释开始 + if stripped.startswith("/*"): + in_comment = True + continue + + # 处理多行注释结束 + if "*/" in stripped: + in_comment = False + continue + + # 跳过注释中的内容 + if in_comment: + continue + + current_statement += line + "\n" + + # 检查语句是否结束 + if stripped.endswith(";"): + statements.append(current_statement.strip()) + current_statement = "" + + # 执行每条SQL语句 + success_count = 0 + error_count = 0 + + for i, stmt in enumerate(statements): + if not stmt or stmt.strip() == ";": + continue + try: + cursor.execute(stmt) + conn.commit() + success_count += 1 + # 打印表创建信息 + if "CREATE TABLE" in stmt.upper(): + table_name = stmt.split("`")[1] if "`" in stmt else "unknown" + print(f" ✓ 创建表: {table_name}") + except mysql.connector.Error as e: + error_count += 1 + # 只打印关键错误 + if "already exists" not in str(e).lower(): + print(f" ✗ SQL执行错误: {str(e)[:100]}") + + print(f"\nSQL执行完成: 成功 {success_count} 条, 失败 {error_count} 条") + + finally: + cursor.close() + conn.close() + + +def import_image_tags_data(): + """从 ai_image_tags.txt 导入数据""" + print("\n" + "=" * 60) + print("步骤2: 导入 ai_image_tags 数据") + print("=" * 60) + + data_path = os.path.join(os.path.dirname(__file__), DATA_FILE) + print(f"读取数据文件: {data_path}") + + if not os.path.exists(data_path): + print(f"数据文件不存在: {data_path}") + return + + with open(data_path, "r", encoding="utf-8") as f: + lines = f.readlines() + + # 解析数据 + # 文件格式:第9行是列头,从第10行开始是数据 + data_rows = [] + header_line = None + + for i, line in enumerate(lines): + stripped = line.strip() + if not stripped: + continue + + # 找到列头行(包含 id, image_id 等) + if stripped.startswith("id\t"): + header_line = stripped + print(f"找到列头(第{i+1}行): {stripped[:80]}...") + continue + + # 跳过非数据行 + if header_line is None: + continue + + # 解析数据行(以数字开头) + parts = stripped.split("\t") + if len(parts) >= 10 and parts[0].isdigit(): + data_rows.append(parts) + + print(f"解析到 {len(data_rows)} 条数据") + + if not data_rows: + print("没有数据需要导入") + return + + # 连接数据库 + conn = get_connection(with_database=True) + cursor = conn.cursor() + + try: + # 禁用外键检查 + cursor.execute("SET FOREIGN_KEY_CHECKS = 0") + print("已禁用外键检查") + + # 插入数据 + insert_sql = """ + INSERT INTO ai_image_tags + (id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name, + default_tag_id, default_tag_name, keywords_id, keywords_name, + department_id, department_name, image_source, created_user_id, + created_at, updated_at, image_attached_article_count, status, blocking_reason) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + """ + + success_count = 0 + error_count = 0 + + for row in data_rows: + try: + # 处理数据,确保长度匹配 + while len(row) < 20: + row.append("") + + # 转换数据类型 + values = ( + int(row[0]), # id + int(row[1]), # image_id + row[2], # image_name + row[3], # image_url + row[4], # image_thumb_url + int(row[5]), # tag_id + row[6], # tag_name + int(row[7]) if row[7] else 0, # default_tag_id + row[8], # default_tag_name + int(row[9]), # keywords_id + row[10], # keywords_name + int(row[11]), # department_id + row[12], # department_name + int(row[13]) if row[13] else 1, # image_source + int(row[14]) if row[14] else 0, # created_user_id + row[15], # created_at + row[16], # updated_at + int(row[17]) if row[17] else 0, # image_attached_article_count + row[18] if row[18] else "draft", # status + row[19] if len(row) > 19 else "" # blocking_reason + ) + + cursor.execute(insert_sql, values) + success_count += 1 + + except Exception as e: + error_count += 1 + if error_count <= 3: + print(f" 插入错误 (id={row[0]}): {e}") + + conn.commit() + print(f"\n数据导入完成: 成功 {success_count} 条, 失败 {error_count} 条") + + # 恢复外键检查 + cursor.execute("SET FOREIGN_KEY_CHECKS = 1") + print("已恢复外键检查") + + finally: + cursor.close() + conn.close() + + +def verify_database(): + """验证数据库""" + print("\n" + "=" * 60) + print("步骤3: 验证数据库") + print("=" * 60) + + conn = get_connection(with_database=True) + cursor = conn.cursor() + + try: + # 检查表数量 + cursor.execute("SHOW TABLES") + tables = cursor.fetchall() + print(f"\n数据库中共有 {len(tables)} 张表:") + for t in tables[:10]: + print(f" - {t[0]}") + if len(tables) > 10: + print(f" ... 还有 {len(tables) - 10} 张表") + + # 检查 ai_image_tags 表数据 + cursor.execute("SELECT COUNT(*) FROM ai_image_tags") + count = cursor.fetchone()[0] + print(f"\nai_image_tags 表共有 {count} 条记录") + + if count > 0: + cursor.execute("SELECT id, tag_name, department_name FROM ai_image_tags LIMIT 3") + rows = cursor.fetchall() + print("示例数据:") + for row in rows: + print(f" ID: {row[0]}, 标签: {row[1][:30]}..., 科室: {row[2]}") + + finally: + cursor.close() + conn.close() + + +def main(): + print("\n" + "=" * 60) + print(" 数据库重建脚本") + print("=" * 60) + print(f"数据库: {DATABASE_NAME}") + print(f"SQL文件: {SQL_FILE}") + print(f"数据文件: {DATA_FILE}") + print("=" * 60) + + # 确认操作 + confirm = input("\n警告: 此操作将删除并重建数据库,所有数据将丢失!\n确认继续? (输入 'yes' 确认): ") + if confirm.lower() != "yes": + print("操作已取消") + return + + # 执行重建 + rebuild_database_structure() + import_image_tags_data() + verify_database() + + print("\n" + "=" * 60) + print("数据库重建完成!") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/reset_image_tags_data.py b/reset_image_tags_data.py deleted file mode 100644 index 042f911..0000000 --- a/reset_image_tags_data.py +++ /dev/null @@ -1,162 +0,0 @@ -# -*- coding: utf-8 -*- -""" -重置 ai_image_tags 和 ai_tags 表数据 -从 ai_image_tags.txt (Tab分隔格式) 导入数据 -""" - -import os -from database_config import get_db -from logger import get_logger - -logger = get_logger("reset_data") - - -def parse_tsv_file(file_path: str) -> list: - """ - 解析 Tab 分隔的 ai_image_tags.txt 文件 - """ - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - - records = [] - header = None - - for line in lines: - line = line.strip() - if not line: - continue - - # 跳过头信息行 - if line.startswith('8.149') or line.startswith('http://') or '正在显示' in line or line.startswith('SELECT'): - continue - - # 解析表头 - if line.startswith('id\t'): - header = line.split('\t') - logger.info(f"解析到表头: {len(header)} 列") - continue - - # 解析数据行 - if header and line[0].isdigit(): - fields = line.split('\t') - if len(fields) >= 19: # 至少要有19列 - record = { - 'id': int(fields[0]) if fields[0] else 0, - 'image_id': int(fields[1]) if fields[1] else 0, - 'image_name': fields[2], - 'image_url': fields[3], - 'image_thumb_url': fields[4], - 'tag_id': int(fields[5]) if fields[5] else 0, - 'tag_name': fields[6], - 'default_tag_id': int(fields[7]) if fields[7] else 0, - 'default_tag_name': fields[8], - 'keywords_id': int(fields[9]) if fields[9] else 0, - 'keywords_name': fields[10], - 'department_id': int(fields[11]) if fields[11] else 0, - 'department_name': fields[12], - 'image_source': int(fields[13]) if fields[13] else 1, - 'created_user_id': int(fields[14]) if fields[14] else 0, - 'created_at': fields[15] if fields[15] else None, - 'updated_at': fields[16] if fields[16] else None, - 'image_attached_article_count': int(fields[17]) if fields[17] else 0, - 'status': fields[18] if fields[18] else 'draft', - 'blocking_reason': fields[19] if len(fields) > 19 else '' - } - records.append(record) - - return records - - -def reset_tables_and_import(file_path: str): - """ - 清空表并导入数据 - """ - db = get_db() - - # 解析文件 - logger.info(f"正在解析文件: {file_path}") - records = parse_tsv_file(file_path) - logger.info(f"共解析 {len(records)} 条记录") - - if not records: - logger.error("没有解析到有效数据,操作取消") - return - - # 收集所有 tag - tag_map = {} - for r in records: - if r['tag_id'] and r['tag_name']: - tag_map[r['tag_id']] = r['tag_name'] - - # 确认操作 - print(f"\n即将执行以下操作:") - print(f"1. 清空 ai_image_tags 表") - print(f"2. 清空 ai_tags 表") - print(f"3. 导入 {len(tag_map)} 条 ai_tags 记录") - print(f"4. 导入 {len(records)} 条 ai_image_tags 记录") - print(f"\n注意: 此操作不可逆!") - - confirm = input("\n确认执行? (输入 yes 继续): ") - if confirm.lower() != 'yes': - logger.info("操作已取消") - return - - try: - # 清空表(先删子表,再删父表) - logger.info("清空 ai_image_tags 表...") - db.execute_update("DELETE FROM ai_image_tags") - logger.info("清空 ai_tags 表...") - db.execute_update("DELETE FROM ai_tags") - - # 重置自增ID - db.execute_update("ALTER TABLE ai_image_tags AUTO_INCREMENT = 1") - db.execute_update("ALTER TABLE ai_tags AUTO_INCREMENT = 1") - - # 插入 ai_tags - logger.info(f"插入 ai_tags...") - for tag_id, tag_name in tag_map.items(): - sql = "INSERT INTO ai_tags (id, tag_name, status) VALUES (%s, %s, 'active')" - try: - db.execute_insert(sql, (tag_id, tag_name)) - except Exception as e: - logger.warning(f"插入 tag {tag_id} 失败: {e}") - - # 插入 ai_image_tags - logger.info(f"插入 ai_image_tags...") - success_count = 0 - for r in records: - sql = """ - INSERT INTO ai_image_tags - (id, image_id, image_name, image_url, image_thumb_url, tag_id, tag_name, - default_tag_id, default_tag_name, keywords_id, keywords_name, - department_id, department_name, image_source, created_user_id, - created_at, updated_at, image_attached_article_count, status, blocking_reason) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) - """ - params = ( - r['id'], r['image_id'], r['image_name'], r['image_url'], r['image_thumb_url'], - r['tag_id'], r['tag_name'], r['default_tag_id'], r['default_tag_name'], - r['keywords_id'], r['keywords_name'], r['department_id'], r['department_name'], - r['image_source'], r['created_user_id'], r['created_at'], r['updated_at'], - r['image_attached_article_count'], r['status'], r['blocking_reason'] - ) - try: - db.execute_insert(sql, params) - success_count += 1 - except Exception as e: - logger.error(f"插入记录 {r['id']} 失败: {e}") - - # 恢复自增ID到最大值 - db.execute_update("ALTER TABLE ai_image_tags AUTO_INCREMENT = 1") - db.execute_update("ALTER TABLE ai_tags AUTO_INCREMENT = 1") - - logger.info(f"导入完成: 成功 {success_count}/{len(records)} 条") - - except Exception as e: - logger.error(f"操作失败: {e}") - raise - - -if __name__ == "__main__": - file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ai_image_tags.txt") - reset_tables_and_import(file_path) diff --git a/reset_tags.py b/reset_tags.py deleted file mode 100644 index 10e8134..0000000 --- a/reset_tags.py +++ /dev/null @@ -1,81 +0,0 @@ -# -*- coding: utf-8 -*- -""" -重置标签数据脚本 -1. 把 ai_image_tags 的 tag_id/tag_name 恢复为 default_tag_id/default_tag_name -2. 删除 ai_tags 表中的衍生标签 -""" - -from database_config import get_db - - -def reset_database(): - db = get_db() - - # 1. 先查看当前状态 - print("=" * 60) - print("当前数据状态") - print("=" * 60) - - # 查看衍生标签数量 - derived_count = db.execute_one( - "SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'" - ) - print(f"ai_tags 表中的衍生标签数量: {derived_count['cnt']}") - - # 查看需要恢复的图片数量 - need_reset = db.execute_one(""" - SELECT COUNT(*) as cnt FROM ai_image_tags - WHERE default_tag_id > 0 - AND (tag_id != default_tag_id OR tag_name != default_tag_name) - """) - print(f"需要恢复到初始标签的图片数量: {need_reset['cnt']}") - - print("\n" + "=" * 60) - print("开始执行清理") - print("=" * 60) - - # 2. 把 ai_image_tags 的标签恢复为初始标签 - print("\n[步骤1] 恢复 ai_image_tags 到初始标签...") - affected = db.execute_update(""" - UPDATE ai_image_tags - SET tag_id = default_tag_id, - tag_name = default_tag_name - WHERE default_tag_id > 0 - """) - print(f" 已更新 {affected} 条记录") - - # 3. 删除衍生标签 - print("\n[步骤2] 删除 ai_tags 表中的衍生标签...") - deleted = db.execute_update( - "DELETE FROM ai_tags WHERE tag_category = '衍生标签'" - ) - print(f" 已删除 {deleted} 条衍生标签") - - # 4. 验证结果 - print("\n" + "=" * 60) - print("清理完成,验证结果") - print("=" * 60) - - remaining_derived = db.execute_one( - "SELECT COUNT(*) as cnt FROM ai_tags WHERE tag_category = '衍生标签'" - ) - print(f"剩余衍生标签数量: {remaining_derived['cnt']}") - - sample = db.execute_query(""" - SELECT id, image_id, tag_name, default_tag_name - FROM ai_image_tags - WHERE default_tag_id > 0 - LIMIT 5 - """) - print("\n前5条图片标签示例:") - for row in sample: - print(f" ID:{row['id']} | tag_name: {row['tag_name']} | default_tag_name: {row['default_tag_name']}") - - -if __name__ == "__main__": - confirm = input("确认执行数据库清理?这将:\n1. 把所有图片标签恢复为初始标签\n2. 删除所有衍生标签\n输入 'yes' 确认: ") - if confirm.lower() == 'yes': - reset_database() - print("\n✓ 数据库已重置到干净状态") - else: - print("已取消操作") diff --git a/restore_database.py b/restore_database.py new file mode 100644 index 0000000..5a88aae --- /dev/null +++ b/restore_database.py @@ -0,0 +1,299 @@ +# -*- coding: utf-8 -*- +""" +数据库恢复脚本 +根据 ai_article.sql 重建数据库结构,从 backup_data.json 恢复数据 +""" + +import mysql.connector +import json +import os + + +# 数据库配置 +DB_CONFIG = { + "host": "localhost", + "port": 3306, + "user": "root", + "password": "liang20020523", + "charset": "utf8mb4" +} + +DATABASE_NAME = "ai_article" +SQL_FILE = "ai_article.sql" +BACKUP_FILE = "backup_data.json" + + +def get_connection(with_database=False): + """获取数据库连接""" + config = DB_CONFIG.copy() + if with_database: + config["database"] = DATABASE_NAME + return mysql.connector.connect(**config) + + +def rebuild_database_structure(): + """重建数据库结构""" + print("=" * 60) + print("步骤1: 重建数据库结构") + print("=" * 60) + + sql_path = os.path.join(os.path.dirname(__file__), SQL_FILE) + print(f"读取SQL文件: {sql_path}") + + with open(sql_path, "r", encoding="utf-8") as f: + sql_content = f.read() + + conn = get_connection(with_database=False) + cursor = conn.cursor() + + try: + print(f"\n删除数据库 {DATABASE_NAME}(如果存在)...") + cursor.execute(f"DROP DATABASE IF EXISTS `{DATABASE_NAME}`") + + print(f"创建数据库 {DATABASE_NAME}...") + cursor.execute(f"CREATE DATABASE `{DATABASE_NAME}` CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci") + + print(f"切换到数据库 {DATABASE_NAME}...") + cursor.execute(f"USE `{DATABASE_NAME}`") + + print("\n执行SQL脚本...") + + statements = [] + current_statement = "" + in_comment = False + + for line in sql_content.split("\n"): + stripped = line.strip() + if not stripped: + continue + if stripped.startswith("--"): + continue + if stripped.startswith("/*"): + in_comment = True + continue + if "*/" in stripped: + in_comment = False + continue + if in_comment: + continue + + current_statement += line + "\n" + + if stripped.endswith(";"): + statements.append(current_statement.strip()) + current_statement = "" + + success_count = 0 + table_count = 0 + + for stmt in statements: + if not stmt or stmt.strip() == ";": + continue + try: + cursor.execute(stmt) + conn.commit() + success_count += 1 + if "CREATE TABLE" in stmt.upper(): + table_name = stmt.split("`")[1] if "`" in stmt else "unknown" + print(f" ✓ 创建表: {table_name}") + table_count += 1 + except mysql.connector.Error as e: + if "already exists" not in str(e).lower(): + print(f" ✗ SQL执行错误: {str(e)[:80]}") + + print(f"\n表结构创建完成: {table_count} 张表") + + finally: + cursor.close() + conn.close() + + +def restore_data(): + """从备份文件恢复数据""" + print("\n" + "=" * 60) + print("步骤2: 恢复表数据") + print("=" * 60) + + backup_path = os.path.join(os.path.dirname(__file__), BACKUP_FILE) + print(f"读取备份文件: {backup_path}") + + if not os.path.exists(backup_path): + print(f"备份文件不存在: {backup_path}") + return + + with open(backup_path, "r", encoding="utf-8") as f: + data = json.load(f) + + conn = get_connection(with_database=True) + cursor = conn.cursor() + + try: + # 禁用外键检查 + cursor.execute("SET FOREIGN_KEY_CHECKS = 0") + print("已禁用外键检查\n") + + # 恢复 ai_tags + tags = data.get("ai_tags", []) + print(f"恢复 ai_tags 表 ({len(tags)} 条)...") + + if tags: + for tag in tags: + columns = ", ".join(f"`{k}`" for k in tag.keys()) + placeholders = ", ".join(["%s"] * len(tag)) + sql = f"INSERT INTO ai_tags ({columns}) VALUES ({placeholders})" + try: + cursor.execute(sql, list(tag.values())) + except mysql.connector.Error as e: + print(f" 插入错误: {e}") + conn.commit() + print(f" ✓ ai_tags 恢复完成") + + # 恢复 ai_image_tags + image_tags = data.get("ai_image_tags", []) + print(f"恢复 ai_image_tags 表 ({len(image_tags)} 条)...") + + if image_tags: + success = 0 + for item in image_tags: + columns = ", ".join(f"`{k}`" for k in item.keys()) + placeholders = ", ".join(["%s"] * len(item)) + sql = f"INSERT INTO ai_image_tags ({columns}) VALUES ({placeholders})" + try: + cursor.execute(sql, list(item.values())) + success += 1 + except mysql.connector.Error as e: + print(f" 插入错误 (id={item.get('id')}): {e}") + conn.commit() + print(f" ✓ ai_image_tags 恢复完成 ({success} 条)") + + # 恢复外键检查 + cursor.execute("SET FOREIGN_KEY_CHECKS = 1") + print("\n已恢复外键检查") + + finally: + cursor.close() + conn.close() + + +def verify_database(): + """验证数据库""" + print("\n" + "=" * 60) + print("步骤3: 验证数据库") + print("=" * 60) + + conn = get_connection(with_database=True) + cursor = conn.cursor() + + try: + cursor.execute("SHOW TABLES") + tables = cursor.fetchall() + print(f"\n数据库共有 {len(tables)} 张表") + + cursor.execute("SELECT COUNT(*) FROM ai_tags") + count = cursor.fetchone()[0] + print(f"ai_tags 表: {count} 条记录") + + cursor.execute("SELECT COUNT(*) FROM ai_image_tags") + count = cursor.fetchone()[0] + print(f"ai_image_tags 表: {count} 条记录") + + # 显示示例数据 + cursor.execute("SELECT id, tag_name, department FROM ai_tags LIMIT 3") + rows = cursor.fetchall() + if rows: + print("\nai_tags 示例:") + for row in rows: + print(f" ID: {row[0]}, 标签: {row[1][:40]}..., 科室: {row[2]}") + + finally: + cursor.close() + conn.close() + + +def backup_current_data(): + """备份当前数据到 backup_data.json""" + print("=" * 60) + print("备份当前数据") + print("=" * 60) + + conn = get_connection(with_database=True) + cursor = conn.cursor(dictionary=True) + + try: + # 导出 ai_tags + cursor.execute('SELECT * FROM ai_tags') + tags = cursor.fetchall() + print(f"ai_tags: {len(tags)} 条") + + # 导出 ai_image_tags + cursor.execute('SELECT * FROM ai_image_tags') + image_tags = cursor.fetchall() + print(f"ai_image_tags: {len(image_tags)} 条") + + # 转换datetime为字符串 + def convert_datetime(obj): + for key, value in obj.items(): + if hasattr(value, 'isoformat'): + obj[key] = value.isoformat() + return obj + + tags = [convert_datetime(t) for t in tags] + image_tags = [convert_datetime(t) for t in image_tags] + + # 保存为JSON + backup_path = os.path.join(os.path.dirname(__file__), BACKUP_FILE) + data = {'ai_tags': tags, 'ai_image_tags': image_tags} + with open(backup_path, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + print(f"\n数据已保存到 {backup_path}") + + finally: + cursor.close() + conn.close() + + +def main(): + print("\n" + "=" * 60) + print(" 数据库恢复脚本") + print("=" * 60) + print(f"数据库: {DATABASE_NAME}") + print(f"SQL文件: {SQL_FILE}") + print(f"备份文件: {BACKUP_FILE}") + print("=" * 60) + + print("\n请选择操作:") + print(" 1. 完整恢复 (重建结构 + 恢复数据)") + print(" 2. 仅恢复数据 (保留现有结构)") + print(" 3. 备份当前数据") + print(" 0. 取消") + + choice = input("\n请输入选项 (0-3): ").strip() + + if choice == "1": + confirm = input("\n警告: 此操作将删除并重建数据库!确认? (输入 'yes'): ") + if confirm.lower() == "yes": + rebuild_database_structure() + restore_data() + verify_database() + print("\n" + "=" * 60) + print("数据库恢复完成!") + print("=" * 60) + else: + print("操作已取消") + + elif choice == "2": + restore_data() + verify_database() + print("\n数据恢复完成!") + + elif choice == "3": + backup_current_data() + print("\n备份完成!") + + else: + print("操作已取消") + + +if __name__ == "__main__": + main() diff --git a/tag_derive_api.py b/tag_derive_api.py index 265ac2b..9ec0872 100644 --- a/tag_derive_api.py +++ b/tag_derive_api.py @@ -251,13 +251,12 @@ class TagDeriveService: return ''.join([f'#{t}#' for t in all_tags]) def get_pending_images(self, limit: int = 100) -> List[Dict]: - """获取待处理的图片""" + """获取待处理的图片(status='tag_extension')""" sql = """ SELECT it.id, it.image_thumb_url, it.tag_id, it.tag_name, it.department_name FROM ai_image_tags it - LEFT JOIN ai_tags t ON it.tag_id = t.id WHERE it.image_thumb_url != '' AND it.tag_name != '' - AND (t.tag_category IS NULL OR t.tag_category != '衍生标签') + AND it.status = 'tag_extension' ORDER BY it.id LIMIT %s """ @@ -279,12 +278,15 @@ class TagDeriveService: "SELECT COUNT(*) as cnt FROM ai_image_tags WHERE image_thumb_url != ''" )['cnt'] - # 已处理数 - processed = self.db.execute_one(""" - SELECT COUNT(*) as cnt FROM ai_image_tags it - JOIN ai_tags t ON it.tag_id = t.id - WHERE t.tag_category = '衍生标签' - """)['cnt'] + # 待处理数(status='tag_extension') + pending = self.db.execute_one( + "SELECT COUNT(*) as cnt FROM ai_image_tags WHERE status = 'tag_extension'" + )['cnt'] + + # 已处理数(status='manual_review') + processed = self.db.execute_one( + "SELECT COUNT(*) as cnt FROM ai_image_tags WHERE status = 'manual_review'" + )['cnt'] # 衍生标签数 derived_count = self.db.execute_one( @@ -294,7 +296,7 @@ class TagDeriveService: return { "total_images": total, "processed_images": processed, - "pending_images": total - processed, + "pending_images": pending, "derived_tags_count": derived_count } diff --git a/增加字段.txt b/增加字段.txt deleted file mode 100644 index 20d3c98..0000000 --- a/增加字段.txt +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE ai_article.ai_image_tags ADD COLUMN derived_tag VARCHAR(1000) NOT NULL DEFAULT '' COMMENT '衍生标签,格式:#标签1##标签2#'; \ No newline at end of file