From 82b5fbf87574d1496a0e492b79d0c07de2b63869 Mon Sep 17 00:00:00 2001 From: liangguodong Date: Wed, 4 Feb 2026 14:36:13 +0800 Subject: [PATCH] Initial commit with .gitignore --- .gitignore | 27 + ai_article.sql | 1015 +++++++++++++++++++++++++++++++++++++ ai_image_tags.txt | 34 ++ basket.py | 72 +++ image_similarity_check.py | 386 ++++++++++++++ query_status.py | 59 +++ requirements.txt | 6 + reset_data.py | 51 ++ reset_vector.py | 54 ++ 9 files changed, 1704 insertions(+) create mode 100644 .gitignore create mode 100644 ai_article.sql create mode 100644 ai_image_tags.txt create mode 100644 basket.py create mode 100644 image_similarity_check.py create mode 100644 query_status.py create mode 100644 requirements.txt create mode 100644 reset_data.py create mode 100644 reset_vector.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..23a6975 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Virtual environment +venv/ +.venv/ +env/ +.env/ + +# Python cache +__pycache__/ +*.py[cod] +*$py.class +*.so + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Config with sensitive data +config.ini + +# Logs +*.log + +# OS files +.DS_Store +Thumbs.db diff --git a/ai_article.sql b/ai_article.sql new file mode 100644 index 0000000..0a40562 --- /dev/null +++ b/ai_article.sql @@ -0,0 +1,1015 @@ +/* + Navicat Premium Dump SQL + + Source Server : mixue + Source Server Type : MySQL + Source Server Version : 90001 (9.0.1) + Source Host : localhost:3306 + Source Schema : ai_article + + Target Server Type : MySQL + Target Server Version : 90001 (9.0.1) + File Encoding : 65001 + + Date: 02/02/2026 16:34:15 +*/ + +SET NAMES utf8mb4; +SET FOREIGN_KEY_CHECKS = 0; + +-- ---------------------------- +-- Table structure for ai_article_images +-- ---------------------------- +DROP TABLE IF EXISTS `ai_article_images`; +CREATE TABLE `ai_article_images` ( + `id` int NOT NULL AUTO_INCREMENT, + `article_id` int NOT NULL DEFAULT 0, + `image_id` int NOT NULL DEFAULT 0, + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_tag_id` int NOT NULL DEFAULT 0, + `sort_order` int NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `keywords_id` int NOT NULL DEFAULT 0, + `keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `department_id` int NOT NULL DEFAULT 0, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `image_source` tinyint(1) NOT NULL DEFAULT 0 COMMENT '1=tag|2=change', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_article_image`(`article_id` ASC, `image_id` ASC) USING BTREE, + INDEX `image_id`(`image_id` ASC) USING BTREE, + INDEX `idx_tag_article_lookup`(`image_tag_id` ASC, `article_id` ASC) USING BTREE, + INDEX `idx_article_images_article_tag`(`article_id` ASC, `image_tag_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 700 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_article_tags +-- ---------------------------- +DROP TABLE IF EXISTS `ai_article_tags`; +CREATE TABLE `ai_article_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `article_id` int NOT NULL, + `coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_article_tag`(`article_id` ASC) USING BTREE, + CONSTRAINT `ai_article_tags_ibfk_1` FOREIGN KEY (`article_id`) REFERENCES `ai_articles` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 943 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_articles +-- ---------------------------- +DROP TABLE IF EXISTS `ai_articles`; +CREATE TABLE `ai_articles` ( + `id` int NOT NULL AUTO_INCREMENT, + `batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID', + `topic_type_id` int UNSIGNED NOT NULL DEFAULT 0, + `prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0, + `topic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `departmentids` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `author_id` int NULL DEFAULT NULL, + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `department_id` int NULL DEFAULT NULL, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `created_user_id` int NOT NULL DEFAULT 0, + `review_user_id` int NULL DEFAULT NULL, + `publish_user_id` int NULL DEFAULT NULL, + `status` enum('topic','cover_image','generate','generate_failed','draft','pending_review','approved','rejected','published_review','published','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `review_comment` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `publish_time` timestamp NULL DEFAULT NULL, + `baijiahao_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `baijiahao_status` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `word_count` int NULL DEFAULT 0, + `image_count` int NULL DEFAULT 0, + `coze_tag` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT 'Coze生成的标签', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `created_user_id`(`created_user_id` ASC) USING BTREE, + INDEX `review_user_id`(`review_user_id` ASC) USING BTREE, + INDEX `publish_user_id`(`publish_user_id` ASC) USING BTREE, + INDEX `idx_articles_status_user_created`(`status` ASC, `created_user_id` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_articles_status_created`(`status` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_articles_status`(`status` ASC) USING BTREE, + INDEX `idx_articles_created_at`(`created_at` DESC) USING BTREE, + INDEX `idx_status_id_author`(`status` ASC, `id` ASC, `author_id` ASC) USING BTREE, + INDEX `idx_articles_updated_at`(`updated_at` DESC) USING BTREE, + INDEX `idx_articles_status_prompt_topic_id`(`status` ASC, `prompt_workflow_id` ASC, `topic` ASC, `id` ASC) USING BTREE, + INDEX `idx_articles_status_author_created`(`status` ASC, `author_id` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_articles_created_status_author`(`created_at` ASC, `status` ASC, `author_id` ASC) USING BTREE, + INDEX `idx_channel_status_publish_author`(`channel` ASC, `status` ASC, `publish_time` ASC, `author_id` ASC) USING BTREE, + INDEX `idx_author_channel_status_date`(`author_id` ASC, `channel` ASC, `status` ASC, `updated_at` ASC) USING BTREE, + INDEX `idx_audit_stats`(`author_id` ASC, `channel` ASC, `status` ASC, `updated_at` ASC) USING BTREE, + INDEX `idx_status_id`(`status` ASC, `id` ASC) USING BTREE, + INDEX `idx_status_dept_author`(`status` ASC, `department_id` ASC, `author_id` ASC) USING BTREE, + CONSTRAINT `ai_articles_ibfk_1` FOREIGN KEY (`author_id`) REFERENCES `ai_authors` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_2` FOREIGN KEY (`created_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_3` FOREIGN KEY (`review_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT, + CONSTRAINT `ai_articles_ibfk_4` FOREIGN KEY (`publish_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 1350 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_authors +-- ---------------------------- +DROP TABLE IF EXISTS `ai_authors`; +CREATE TABLE `ai_authors` ( + `id` int NOT NULL AUTO_INCREMENT, + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `app_id` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `app_token` varchar(127) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL DEFAULT 0, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `title` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `hospital` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `specialty` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `toutiao_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `toutiao_images_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `toutiao_images` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `introduction` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `avatar_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `cumulative_published_count` int NULL DEFAULT 0 COMMENT '累计发文量(从起始日到stat_date的总和)', + `cumulative_revenue_sum` int NULL DEFAULT 0 COMMENT '累计收入(从起始日到stat_date的总和)', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `daily_post_max` int NOT NULL DEFAULT 0 COMMENT '作者每日发文MAX', + `publishing_priority` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '发文优先等级', + `stock_quantity` int NOT NULL DEFAULT 0 COMMENT '作者可发文库存量', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_ai_authors_status`(`status` ASC) USING BTREE, + INDEX `idx_ai_authors_status_id`(`status` ASC, `id` ASC) USING BTREE, + INDEX `idx_status_created_at`(`status` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_status_updated_at`(`status` ASC, `updated_at` DESC) USING BTREE, + INDEX `idx_status_cumulative_published`(`status` ASC, `cumulative_published_count` DESC) USING BTREE, + INDEX `idx_channel_status_id`(`channel` ASC, `status` ASC, `id` ASC) USING BTREE, + INDEX `idx_channel_status_daily_max`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC) USING BTREE, + INDEX `idx_channel_status_daily_max_id`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC) USING BTREE, + INDEX `idx_query_optimized`(`channel` ASC, `status` ASC, `id` ASC, `daily_post_max` ASC, `author_name` ASC) USING BTREE, + INDEX `idx_channel_status_dailymax_id`(`channel` ASC, `status` ASC, `daily_post_max` ASC, `id` ASC, `author_name` ASC) USING BTREE, + INDEX `idx_dept_channel_status`(`department_id` ASC, `channel` ASC, `status` ASC) USING BTREE, + INDEX `idx_ai_authors_department_id`(`department_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 256 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_batch_uploads +-- ---------------------------- +DROP TABLE IF EXISTS `ai_batch_uploads`; +CREATE TABLE `ai_batch_uploads` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int NOT NULL, + `file_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `file_path` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `total_count` int NULL DEFAULT 0, + `success_count` int NULL DEFAULT 0, + `failed_count` int NULL DEFAULT 0, + `status` enum('processing','completed','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'processing', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `user_id`(`user_id` ASC) USING BTREE, + CONSTRAINT `ai_batch_uploads_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 101 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_department_config +-- ---------------------------- +DROP TABLE IF EXISTS `ai_department_config`; +CREATE TABLE `ai_department_config` ( + `id` int UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `department_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '科室名称', + `department_code` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '科室编码', + `keywords` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '关联关键词(JSON数组)', + `priority` int NOT NULL DEFAULT 0 COMMENT '优先级', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态:0-禁用,1-启用', + `remark` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '备注', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_department_code`(`department_code` ASC) USING BTREE, + INDEX `idx_status`(`status` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 3 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = '科室标签配置表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_departments +-- ---------------------------- +DROP TABLE IF EXISTS `ai_departments`; +CREATE TABLE `ai_departments` ( + `id` int NOT NULL AUTO_INCREMENT, + `department_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `query_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下query审核存量', + `article_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下审核内容存量', + `max_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下设置发文总量max', + `published_stock_quantity` int NOT NULL DEFAULT 0 COMMENT '科室下发布成功的量', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_ai_departments_created_at`(`created_at` DESC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 82 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_image_tags +-- ---------------------------- +DROP TABLE IF EXISTS `ai_image_tags`; +CREATE TABLE `ai_image_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `image_id` int NOT NULL, + `image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `tag_id` int NOT NULL, + `tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `default_tag_id` int NOT NULL DEFAULT 0 COMMENT '初始标签ID', + `default_tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '初始标签', + `keywords_id` int NOT NULL, + `keywords_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL, + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_source` tinyint UNSIGNED NOT NULL DEFAULT 1 COMMENT '1=clean_images|2=Flower_character|3=gemini3', + `created_user_id` int NOT NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `image_attached_article_count` int NOT NULL DEFAULT 0 COMMENT 'Number of articles the image is attached to', + `status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','tag_extension','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','generate_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'draft' COMMENT '图片完整扭转流程状态', + `blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因', + `similarity` enum('draft','yes','calc','recalc') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT 'yes=是相似|calc=已计算|recalc=需要重新计算', + `similarity_image_tags_id` int NOT NULL DEFAULT 0 COMMENT 'yes=是相似|把image_tags_id写入', + `similarity score` float NOT NULL DEFAULT 0 COMMENT '相似时候,计算相似度值', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_image_tag`(`image_id` ASC, `tag_id` ASC) USING BTREE, + INDEX `tag_id`(`tag_id` ASC) USING BTREE, + INDEX `idx_id_desc`(`id` DESC) USING BTREE, + INDEX `idx_image_id_id`(`image_id` ASC, `id` DESC) USING BTREE, + INDEX `idx_created_at`(`created_at` DESC) USING BTREE, + INDEX `idx_department_id`(`department_id` ASC) USING BTREE, + INDEX `idx_keywords_id`(`keywords_id` ASC) USING BTREE, + INDEX `idx_dept_keywords`(`department_id` ASC, `keywords_id` ASC) USING BTREE, + INDEX `idx_dept_keywords_count_id`(`department_id` ASC, `keywords_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE, + INDEX `idx_keywords_count_id`(`keywords_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE, + INDEX `idx_dept_count_id`(`department_id` ASC, `image_attached_article_count` ASC, `id` DESC) USING BTREE, + INDEX `idx_count_id`(`image_attached_article_count` ASC, `id` DESC) USING BTREE, + INDEX `idx_tag_name`(`tag_name` ASC) USING BTREE, + INDEX `idx_tag_name_id`(`tag_name` ASC, `id` ASC) USING BTREE, + INDEX `idx_tag_notnull_id`(`id` ASC, `tag_name` ASC, `image_id` ASC, `created_at` ASC) USING BTREE, + CONSTRAINT `ai_image_tags_ibfk_2` FOREIGN KEY (`tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 929784 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_images +-- ---------------------------- +DROP TABLE IF EXISTS `ai_images`; +CREATE TABLE `ai_images` ( + `id` int NOT NULL AUTO_INCREMENT, + `image_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `image_thumb_url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `thumbnail_url` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `image_type` enum('medical','lifestyle','instruction') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'medical', + `file_size` bigint NULL DEFAULT NULL, + `width` int NULL DEFAULT NULL, + `height` int NULL DEFAULT NULL, + `upload_user_id` int NOT NULL, + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `upload_user_id`(`upload_user_id` ASC) USING BTREE, + INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE, + CONSTRAINT `ai_images_ibfk_1` FOREIGN KEY (`upload_user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 26849 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_keywords +-- ---------------------------- +DROP TABLE IF EXISTS `ai_keywords`; +CREATE TABLE `ai_keywords` ( + `id` int NOT NULL AUTO_INCREMENT, + `keywords_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `department_id` int NOT NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_ai_keywords_dept_created`(`department_id` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_ai_keywords_created_at`(`created_at` DESC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 295 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_logs +-- ---------------------------- +DROP TABLE IF EXISTS `ai_logs`; +CREATE TABLE `ai_logs` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int NULL DEFAULT NULL, + `action` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `target_type` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `target_id` int NULL DEFAULT NULL, + `description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `ip_address` varchar(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `request_data` json NULL, + `response_data` json NULL, + `status` enum('success','error','warning') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'success', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `user_id`(`user_id` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` DESC) USING BTREE, + CONSTRAINT `ai_logs_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `ai_users` (`id`) ON DELETE RESTRICT ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 116565 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_mip_click +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_click`; +CREATE TABLE `ai_mip_click` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `site_id` bigint NOT NULL COMMENT '关联站点ID(外键指向 ai_mip_site.id)', + `site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL(冗余字段,便于查询优化)', + `click_time` datetime NOT NULL COMMENT '点击发生时间', + `user_ip` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '用户IP地址', + `user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '浏览器/设备信息', + `referer_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源页面URL', + `device_type` enum('mobile','pc','tablet') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '设备类型', + `click_count` int NULL DEFAULT 1 COMMENT '本次点击事件的计数(一般为1,可用于批量插入)', + `is_valid` tinyint(1) NULL DEFAULT 1 COMMENT '是否有效点击(防刷)', + `task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID(可选)', + `operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者(如自动系统)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_site_id`(`site_id` ASC) USING BTREE, + INDEX `idx_click_time`(`click_time` ASC) USING BTREE, + INDEX `idx_site_url`(`site_url` ASC) USING BTREE, + INDEX `idx_click_time_site`(`click_time` ASC, `site_id` ASC) USING BTREE, + INDEX `idx_task_id`(`task_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告点击日志表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_mip_interaction +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_interaction`; +CREATE TABLE `ai_mip_interaction` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `site_id` bigint NOT NULL COMMENT '关联站点ID', + `click_id` bigint NULL DEFAULT NULL COMMENT '关联点击记录ID', + `task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID', + `interaction_type` enum('reply','comment','message','form_submit','follow','like','share') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '互动类型', + `interaction_time` datetime NOT NULL COMMENT '互动发生时间', + `interaction_status` enum('pending','success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'pending' COMMENT '互动状态', + `reply_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '回复/评论的内容', + `reply_template_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的回复模板ID', + `ad_element_xpath` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的XPath定位', + `ad_element_selector` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的CSS选择器', + `ad_text_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '广告的文本内容', + `execution_mode` enum('auto','manual','semi_auto') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行方式', + `rpa_script` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的RPA脚本名称', + `browser_type` enum('headless','headed','playwright','selenium') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器类型', + `anti_detection_method` json NULL COMMENT '万金油技术方案', + `proxy_ip` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的代理IP', + `user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '使用的User-Agent', + `custom_headers` json NULL COMMENT '自定义HTTP头', + `fingerprint_id` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器指纹ID', + `response_received` tinyint(1) NULL DEFAULT 0 COMMENT '是否收到回复', + `response_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '对方回复的内容', + `response_time` datetime NULL DEFAULT NULL COMMENT '收到回复的时间', + `response_delay_seconds` int NULL DEFAULT NULL COMMENT '回复延迟(秒)', + `is_successful` tinyint(1) NULL DEFAULT 0 COMMENT '是否成功互动', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '失败原因/错误信息', + `retry_count` int NULL DEFAULT 0 COMMENT '重试次数', + `conversion_flag` tinyint(1) NULL DEFAULT 0 COMMENT '是否产生转化', + `site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签', + `campaign_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告活动ID', + `operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间', + `remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_site_id`(`site_id` ASC) USING BTREE, + INDEX `idx_click_id`(`click_id` ASC) USING BTREE, + INDEX `idx_task_id`(`task_id` ASC) USING BTREE, + INDEX `idx_interaction_time`(`interaction_time` ASC) USING BTREE, + INDEX `idx_interaction_status`(`interaction_status` ASC) USING BTREE, + INDEX `idx_composite`(`site_id` ASC, `interaction_time` ASC, `interaction_status` ASC) USING BTREE, + INDEX `idx_response_received`(`response_received` ASC) USING BTREE, + INDEX `idx_conversion`(`conversion_flag` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告互动回复日志表' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_mip_query_task +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_query_task`; +CREATE TABLE `ai_mip_query_task` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `query_word` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '查询词/关键词', + `query_type` enum('keyword','phrase','long_tail') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'keyword' COMMENT '查询类型:关键词/短语/长尾词', + `task_date` char(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '任务日期,格式:YYYYMMDD', + `threshold_max` int NOT NULL DEFAULT 100 COMMENT '最大抓取数量阈值', + `current_count` int NOT NULL DEFAULT 0 COMMENT '当前已抓取数量', + `status` enum('ready','doing','failed','finished','closed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'ready' COMMENT '任务状态:准备中/执行中/失败/完成/已关闭', + `priority` tinyint NOT NULL DEFAULT 5 COMMENT '优先级(1-10,数字越小优先级越高)', + `category` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '分类标签(如:医疗、教育、法律等)', + `source_platform` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'baidu' COMMENT '来源平台:baidu/sogou/360等', + `crawl_url_count` int NOT NULL DEFAULT 0 COMMENT '已爬取URL数量', + `valid_url_count` int NOT NULL DEFAULT 0 COMMENT '有效URL数量(带广告)', + `error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '错误信息', + `started_at` timestamp NULL DEFAULT NULL COMMENT '开始执行时间', + `finished_at` timestamp NULL DEFAULT NULL COMMENT '完成时间', + `closed_at` timestamp NULL DEFAULT NULL COMMENT '达到阈值关闭时间', + `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'system' COMMENT '创建人', + `remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uniq_query_date`(`query_word`(191) ASC, `task_date` ASC) USING BTREE COMMENT '同一查询词每天只有一个任务', + INDEX `idx_date_status`(`task_date` ASC, `status` ASC) USING BTREE COMMENT '按日期和状态查询', + INDEX `idx_status_priority`(`status` ASC, `priority` ASC) USING BTREE COMMENT '按状态和优先级查询', + INDEX `idx_category`(`category` ASC) USING BTREE COMMENT '按分类查询', + INDEX `idx_threshold`(`threshold_max` ASC, `current_count` ASC) USING BTREE COMMENT '阈值监控', + INDEX `idx_closed`(`closed_at` ASC) USING BTREE COMMENT '关闭时间索引' +) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = 'MIP查询任务表 - 用于存储查询词抓取网址任务' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_mip_site +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_site`; +CREATE TABLE `ai_mip_site` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL,唯一', + `site_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网站名称(可选)', + `status` enum('active','inactive','pending') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'active' COMMENT '状态:激活/停用/待审核', + `frequency` int NULL DEFAULT 1 COMMENT '频次(如每小时发几次)', + `time_start` time NULL DEFAULT '00:00:00' COMMENT '开始时间(HH:MM:SS)', + `time_end` time NULL DEFAULT '23:59:59' COMMENT '结束时间(HH:MM:SS)', + `interval_minutes` int NULL DEFAULT 60 COMMENT '执行间隔(分钟)', + `ad_feature` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告特征描述(JSON格式,如:{\"color\":\"red\", \"position\":\"top\"})', + `click_count` bigint NULL DEFAULT 0 COMMENT '累计点击次数', + `reply_count` bigint NULL DEFAULT 0 COMMENT '累计回复次数', + `site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签(如:教育、医疗等)', + `query_word` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源查询词(从哪个关键词抓取)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '创建人', + `updated_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '更新人', + `remark` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `site_url`(`site_url` ASC) USING BTREE, + UNIQUE INDEX `idx_site_url`(`site_url`(191) ASC) USING BTREE, + INDEX `idx_status`(`status` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE, + INDEX `idx_query_word`(`query_word`(191) ASC) USING BTREE COMMENT '按查询词查询' +) ENGINE = InnoDB AUTO_INCREMENT = 3 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告网址管理表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_mip_task_log +-- ---------------------------- +DROP TABLE IF EXISTS `ai_mip_task_log`; +CREATE TABLE `ai_mip_task_log` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT 'RPA任务唯一ID', + `site_id` bigint NOT NULL COMMENT '关联站点ID', + `step_1_visit_time` datetime NULL DEFAULT NULL COMMENT '步骤1:访问网址时间', + `step_1_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤1状态', + `step_2_antibot_time` datetime NULL DEFAULT NULL COMMENT '步骤2:万金油技术方案执行时间', + `step_2_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤2状态', + `step_3_ad_detection_time` datetime NULL DEFAULT NULL COMMENT '步骤3:广告检测时间', + `step_3_has_ad` tinyint(1) NULL DEFAULT NULL COMMENT '是否检测到广告', + `step_3_ad_count` int NULL DEFAULT 0 COMMENT '检测到的广告数量', + `step_4_click_time` datetime NULL DEFAULT NULL COMMENT '步骤4:点击广告时间', + `step_4_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤4状态', + `step_5_reply_time` datetime NULL DEFAULT NULL COMMENT '步骤5:获取回复时间', + `step_5_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤5状态', + `task_start_time` datetime NOT NULL COMMENT '任务开始时间', + `task_end_time` datetime NULL DEFAULT NULL COMMENT '任务结束时间', + `task_duration_seconds` int NULL DEFAULT NULL COMMENT '任务执行时长(秒)', + `task_status` enum('running','completed','failed','timeout') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'running' COMMENT '任务整体状态', + `total_clicks` int NULL DEFAULT 0 COMMENT '本次任务总点击次数', + `total_interactions` int NULL DEFAULT 0 COMMENT '本次任务总互动次数', + `successful_interactions` int NULL DEFAULT 0 COMMENT '成功互动次数', + `failed_interactions` int NULL DEFAULT 0 COMMENT '失败互动次数', + `execution_mode` enum('auto','manual','scheduled') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行模式', + `triggered_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '触发者(定时任务/手动触发/队列)', + `error_log` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '错误日志', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `task_id`(`task_id` ASC) USING BTREE, + UNIQUE INDEX `uk_task_id`(`task_id` ASC) USING BTREE, + INDEX `idx_site_id`(`site_id` ASC) USING BTREE, + INDEX `idx_task_status`(`task_status` ASC) USING BTREE, + INDEX `idx_start_time`(`task_start_time` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'RPA任务执行日志表' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_prompt_workflow +-- ---------------------------- +DROP TABLE IF EXISTS `ai_prompt_workflow`; +CREATE TABLE `ai_prompt_workflow` ( + `id` int NOT NULL AUTO_INCREMENT, + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `auth_token` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `workflow_id` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `workflow_type_id` int UNSIGNED NOT NULL DEFAULT 0, + `workflow_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `created_user_id` int NOT NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `query_enable` tinyint NOT NULL DEFAULT 0 COMMENT 'query生效AI生文大模型', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_created_user_time`(`created_user_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE, + INDEX `idx_workflow_id`(`workflow_id` ASC) USING BTREE, + INDEX `idx_prompt_workflow_name`(`prompt_workflow_name` ASC) USING BTREE, + INDEX `idx_query_enable`(`query_enable` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_query_audit +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_audit`; +CREATE TABLE `ai_query_audit` ( + `id` bigint UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `query_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT 'Query原文', + `query_hash` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT 'Query MD5哈希值,用于去重', + `query_status` tinyint NOT NULL DEFAULT 0 COMMENT 'Query状态:0-待审核,1-已过滤(黑名单),2-已通过,3-已拒绝,4-待人工审核', + `filter_reason` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '过滤原因', + `matched_keywords` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '命中的黑名单关键词(JSON数组)', + `department_tags` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '科室标签(JSON数组):影像科、CT等', + `batch_tag_result` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL COMMENT '批量打标签结果(JSON)', + `ai_score` decimal(5, 2) NULL DEFAULT NULL COMMENT 'AI评分', + `is_health_related` tinyint NULL DEFAULT NULL COMMENT '是否健康相关:0-否,1-是', + `is_complete_sentence` tinyint NULL DEFAULT NULL COMMENT '是否完整语句:0-否,1-是', + `has_person_name` tinyint NULL DEFAULT 0 COMMENT '是否包含人名:0-否,1-是', + `has_location` tinyint NULL DEFAULT 0 COMMENT '是否包含地名:0-否,1-是', + `has_hospital_name` tinyint NULL DEFAULT 0 COMMENT '是否包含医院名:0-否,1-是', + `source` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '来源渠道', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_query_hash`(`query_hash` ASC) USING BTREE, + INDEX `idx_query_status`(`query_status` ASC, `create_time` ASC) USING BTREE, + INDEX `idx_department_tags`(`department_tags`(100) ASC) USING BTREE, + INDEX `idx_create_time`(`create_time` ASC) USING BTREE +) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = 'AI Query审核记录表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_query_blacklist +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_blacklist`; +CREATE TABLE `ai_query_blacklist` ( + `id` bigint UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `keyword` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '黑名单关键词', + `keyword_type` tinyint NOT NULL DEFAULT 1 COMMENT '关键词类型:1-通用词汇,2-人名,3-地名,4-医院名,5-其他', + `filter_rule` tinyint NOT NULL DEFAULT 1 COMMENT '过滤规则:1-包含即过滤,2-完全匹配,3-正则匹配', + `regex_pattern` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '正则表达式(当filter_rule=3时使用)', + `category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '分类标签:药品、图片、费用等', + `priority` int NOT NULL DEFAULT 0 COMMENT '优先级,数值越大优先级越高', + `status` tinyint NOT NULL DEFAULT 1 COMMENT '状态:0-禁用,1-启用', + `remark` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '备注说明', + `creator` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '创建人', + `updater` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL COMMENT '更新人', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `is_deleted` tinyint NOT NULL DEFAULT 0 COMMENT '是否删除:0-未删除,1-已删除', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_keyword`(`keyword` ASC, `is_deleted` ASC) USING BTREE, + INDEX `idx_keyword_type`(`keyword_type` ASC, `status` ASC) USING BTREE, + INDEX `idx_category`(`category` ASC, `status` ASC) USING BTREE, + INDEX `idx_create_time`(`create_time` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 66 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = 'AI查询黑名单词表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_query_category +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_category`; +CREATE TABLE `ai_query_category` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active' COMMENT '状态', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 5 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_query_strategies +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_strategies`; +CREATE TABLE `ai_query_strategies` ( + `id` int NOT NULL AUTO_INCREMENT, + `category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `query_type_id` int NOT NULL DEFAULT 0 COMMENT '类型ID', + `define_context` varchar(2048) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '定义上下文', + `for_example` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '案例', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + PRIMARY KEY (`id`) USING BTREE, + INDEX `query_type_id`(`query_type_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 136 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_query_type +-- ---------------------------- +DROP TABLE IF EXISTS `ai_query_type`; +CREATE TABLE `ai_query_type` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '类型ID', + `category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID', + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active' COMMENT '状态', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 131 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_statistics +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics`; +CREATE TABLE `ai_statistics` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT 'Auto-increment ID', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `date` date NOT NULL COMMENT 'Date of statistics', + `submission_count` int NULL DEFAULT 0 COMMENT 'Number of submissions (投稿量)', + `read_count` int NULL DEFAULT 0 COMMENT 'Number of reads (阅读量)', + `comment_count` int NULL DEFAULT 0 COMMENT 'Number of comments (评论量)', + `comment_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Comment rate (评论率)', + `like_count` int NULL DEFAULT 0 COMMENT 'Number of likes (点赞量)', + `like_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Like rate (点赞率)', + `favorite_count` int NULL DEFAULT 0 COMMENT 'Number of favorites (收藏量)', + `favorite_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Favorite rate (收藏率)', + `share_count` int NULL DEFAULT 0 COMMENT 'Number of shares (分享量)', + `share_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Share rate (分享率)', + `slide_ratio` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT 'Slide view ratio (滑图占比)', + `baidu_search_volume` int NULL DEFAULT 0 COMMENT 'Baidu search volume (百度搜索量)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'Creation timestamp', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Update timestamp', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_date`(`author_id` ASC, `date` ASC) USING BTREE, + INDEX `idx_date`(`date` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 51 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI Content Statistics' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_statistics_day +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics_day`; +CREATE TABLE `ai_statistics_day` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_date` date NOT NULL COMMENT '统计日期(天)', + `total_submission_count` int NULL DEFAULT 0 COMMENT '投稿量(当日总计)', + `total_read_count` int NULL DEFAULT 0 COMMENT '阅读量(当日总计)', + `total_comment_count` int NULL DEFAULT 0 COMMENT '评论量(当日总计)', + `total_like_count` int NULL DEFAULT 0 COMMENT '点赞量(当日总计)', + `total_favorite_count` int NULL DEFAULT 0 COMMENT '收藏量(当日总计)', + `total_share_count` int NULL DEFAULT 0 COMMENT '分享量(当日总计)', + `avg_comment_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '评论率(当日平均)', + `avg_like_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '点赞率(当日平均)', + `avg_favorite_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '收藏率(当日平均)', + `avg_share_rate` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '分享率(当日平均)', + `avg_slide_ratio` decimal(5, 4) NULL DEFAULT 0.0000 COMMENT '滑图占比(当日平均)', + `total_baidu_search_volume` int NULL DEFAULT 0 COMMENT '百度搜索量(当日总计)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_date` ASC) USING BTREE, + INDEX `idx_stat_date`(`stat_date` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 51 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每日汇总统计表' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_statistics_days +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics_days`; +CREATE TABLE `ai_statistics_days` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_date` date NOT NULL COMMENT '统计日期(自然日)', + `daily_published_count` int NULL DEFAULT 0 COMMENT '单日发文量', + `day_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当天收益(stat_date所在自然日)', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + `daily_post_max` int NOT NULL DEFAULT 0 COMMENT '作者每日发文MAX', + `stock_quantity` int NOT NULL DEFAULT 0 COMMENT '作者每日发文库存量', + `defect_quantity` int NOT NULL DEFAULT 0 COMMENT '作者每日发文失败量', + `is_full` tinyint(1) NOT NULL DEFAULT 0 COMMENT '是否发满:0-未发满,1-已发满', + `department_id` int NOT NULL DEFAULT 0 COMMENT '科室ID', + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室名称', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_date` ASC) USING BTREE, + INDEX `idx_stat_date`(`stat_date` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 71003 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每日核心指标汇总表(含累计、收益及环比)' ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_statistics_monthly +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics_monthly`; +CREATE TABLE `ai_statistics_monthly` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_monthly` varchar(48) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '统计日期(自然月)', + `monthly_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当月收益(stat_date所在自然月的总收益)', + `revenue_mom_growth_rate` decimal(10, 6) NULL DEFAULT 0.000000 COMMENT '收益月环比增长率((本月收益 - 上月收益) / NULLIF(上月收益, 0))', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_monthly` ASC) USING BTREE, + INDEX `idx_stat_date`(`stat_monthly` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 3069 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每月核心指标汇总表(含累计、收益及环比)' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_statistics_weekly +-- ---------------------------- +DROP TABLE IF EXISTS `ai_statistics_weekly`; +CREATE TABLE `ai_statistics_weekly` ( + `id` bigint NOT NULL AUTO_INCREMENT COMMENT '自增主键', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '作者名称', + `channel` tinyint(1) NOT NULL DEFAULT 1 COMMENT '1=baidu|2=toutiao|3=weixin', + `stat_weekly` varchar(48) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '统计日期(自然周)', + `weekly_revenue` decimal(18, 2) NULL DEFAULT 0.00 COMMENT '当周收益(stat_date所在自然周的总收益,周一至周日)', + `revenue_wow_growth_rate` decimal(10, 6) NULL DEFAULT 0.000000 COMMENT '收益周环比增长率((本周收益 - 上周收益) / NULLIF(上周收益, 0))', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_author_stat_date`(`author_id` ASC, `stat_weekly` ASC) USING BTREE, + INDEX `idx_stat_date`(`stat_weekly` ASC) USING BTREE, + INDEX `idx_author_id`(`author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 10644 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'AI内容每周核心指标汇总表(含累计、收益及环比)' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_tag_subsets +-- ---------------------------- +DROP TABLE IF EXISTS `ai_tag_subsets`; +CREATE TABLE `ai_tag_subsets` ( + `id` int NOT NULL AUTO_INCREMENT, + `parent_tag_id` int NOT NULL, + `subset_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `subset_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `parent_tag_id`(`parent_tag_id` ASC) USING BTREE, + CONSTRAINT `ai_tag_subsets_ibfk_1` FOREIGN KEY (`parent_tag_id`) REFERENCES `ai_tags` (`id`) ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE = InnoDB AUTO_INCREMENT = 20495 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_tags +-- ---------------------------- +DROP TABLE IF EXISTS `ai_tags`; +CREATE TABLE `ai_tags` ( + `id` int NOT NULL AUTO_INCREMENT, + `tag_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `tag_category` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL, + `usage_count` int NULL DEFAULT 0, + `status` enum('active','inactive') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_tag_name`(`tag_name` ASC) USING BTREE, + INDEX `idx_status_updated`(`status` ASC, `updated_at` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 13434 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_topic_type +-- ---------------------------- +DROP TABLE IF EXISTS `ai_topic_type`; +CREATE TABLE `ai_topic_type` ( + `id` int NOT NULL AUTO_INCREMENT, + `topic_type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `type_id` int NOT NULL DEFAULT 0, + `type_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `prompt_workflow_id` int UNSIGNED NOT NULL DEFAULT 0, + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `created_user_id` int NOT NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_created_user_time`(`created_user_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE, + INDEX `idx_type_id`(`type_id` ASC) USING BTREE, + INDEX `idx_topic_type_name`(`topic_type_name` ASC) USING BTREE, + INDEX `idx_prompt_workflow_id`(`prompt_workflow_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for ai_user_authors +-- ---------------------------- +DROP TABLE IF EXISTS `ai_user_authors`; +CREATE TABLE `ai_user_authors` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int UNSIGNED NOT NULL DEFAULT 0, + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `author_id` int NOT NULL DEFAULT 0, + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT '', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_user_author`(`user_id` ASC, `author_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 208 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_user_departments +-- ---------------------------- +DROP TABLE IF EXISTS `ai_user_departments`; +CREATE TABLE `ai_user_departments` ( + `id` int UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键ID', + `user_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '用户ID', + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '用户名', + `department_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '科室ID', + `department_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '科室名称', + `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_user_department`(`user_id` ASC, `department_id` ASC) USING BTREE, + INDEX `idx_dept_user`(`department_id` ASC, `user_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 77 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci COMMENT = '用户-科室关系表' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_user_topics +-- ---------------------------- +DROP TABLE IF EXISTS `ai_user_topics`; +CREATE TABLE `ai_user_topics` ( + `id` int NOT NULL AUTO_INCREMENT, + `user_id` int UNSIGNED NOT NULL DEFAULT 0, + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `topic_type_id` int UNSIGNED NOT NULL DEFAULT 0, + `topic_type_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '', + `prompt_workflow_id` int NOT NULL DEFAULT 0, + `prompt_workflow_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT '', + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'inactive', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `idx_topic_type_id`(`topic_type_id` ASC) USING BTREE, + INDEX `idx_prompt_workflow_id`(`prompt_workflow_id` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 81 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for ai_users +-- ---------------------------- +DROP TABLE IF EXISTS `ai_users`; +CREATE TABLE `ai_users` ( + `id` int NOT NULL AUTO_INCREMENT, + `username` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `password` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL, + `real_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `email` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `xhs_cookie` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '小红书Cookie', + `department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL, + `role` enum('admin','editor','reviewer','publisher','each_title_reviewer','reviewer_query','reviewer_image') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'editor' COMMENT '用户角色', + `status` enum('active','inactive','deleted') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'active', + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `uk_username`(`username` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 262 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for baidu_keyword +-- ---------------------------- +DROP TABLE IF EXISTS `baidu_keyword`; +CREATE TABLE `baidu_keyword` ( + `id` int NOT NULL AUTO_INCREMENT, + `keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL, + `crawled` tinyint NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `parents_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '父层级', + `seed_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '种子', + `seed_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '种子名称', + `department` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '科室', + `department_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '科室ID', + `author_id` int NOT NULL DEFAULT 0 COMMENT '作者ID', + `author_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '作者名称', + `type` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '类型', + `type_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '类型D', + `partsof_speech` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '词性', + `partsof_speech_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '词性ID', + `yesorno_question` enum('yes','no','unprocessed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'unprocessed' COMMENT '是否是问题?', + `query_type_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '类型名称', + `query_type_id` int NOT NULL DEFAULT 0 COMMENT '类型ID', + `category_id` int NOT NULL DEFAULT 0 COMMENT '分类ID', + `category_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '分类名称', + `created_user_id` int NOT NULL DEFAULT 0 COMMENT '创建用户ID', + `query_summary_status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'ready', + `blocking_reason` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '审核不通过原因', + `article_id` int NOT NULL DEFAULT 0 COMMENT '文章ID', + `query_stage` enum('draft','created','summary','reviewed','generated','published') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '分5个阶段,创建|总结|审核|生文|发布', + `query_status` enum('draft','ready','doing','failed','finished','duplicates','calc_similarity','similarity','hit_yellow','automated_review','automated_review_failed','manual_review','manual_review_failed','generate_review','generate','generate_failed','published','published_failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'draft' COMMENT 'query完整扭转流程状态', + `status` enum('draft','available','unavailable','successful','failed') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT '状态_分2个阶段|可用|不可用|发布成功|发布失败', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `review_user_id` int NOT NULL DEFAULT 0 COMMENT '审核用户ID', + `similarity` enum('draft','yes','calc','recalc') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT 'draft' COMMENT 'yes=是相似|calc=已计算|recalc=需要重新计算', + `similarity_query` int NOT NULL DEFAULT 0 COMMENT 'yes=是相似|把query_id写入', + `similarity_query_keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT 'yes=是相似|把query写入', + `similarity score` float NOT NULL DEFAULT 0 COMMENT '相似时候,计算相似度值', + `reviewed_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '审核日期', + `fast_track` tinyint(1) NOT NULL DEFAULT 0 COMMENT '加急|0=否|1=是', + `automated_review_failed_reason` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '千问大模型审核query不符合原因', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE, + INDEX `idx_crawled_seed`(`crawled` ASC, `seed_id` ASC) USING BTREE, + INDEX `idx_created_at`(`created_at` ASC) USING BTREE, + INDEX `idx_query_status_id`(`query_status` ASC, `id` DESC) USING BTREE, + INDEX `idx_status_stage`(`query_status` ASC, `query_stage` ASC) USING BTREE, + INDEX `idx_review_status_user`(`query_status` ASC, `review_user_id` ASC) USING BTREE, + INDEX `idx_review_query`(`query_status` ASC, `review_user_id` ASC) USING BTREE, + INDEX `idx_status_user_created`(`query_status` ASC, `review_user_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_article_id`(`article_id` ASC) USING BTREE, + INDEX `idx_department_id`(`department_id` ASC) USING BTREE, + INDEX `idx_dept_status`(`department_id` ASC, `query_status` ASC) USING BTREE, + INDEX `idx_dept_query_status`(`department_id` ASC, `query_status` ASC) USING BTREE, + INDEX `idx_dept_review_user`(`department_id` ASC, `review_user_id` ASC) USING BTREE, + INDEX `idx_query_status_dept_id`(`query_status` ASC, `department_id` ASC, `id` DESC) USING BTREE, + INDEX `idx_status_dept_created`(`query_status` ASC, `department_id` ASC, `created_at` DESC) USING BTREE, + INDEX `idx_status_dept_id`(`query_status` ASC, `department_id` ASC, `id` ASC) USING BTREE, + INDEX `idx_seed_created`(`seed_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_baidu_query_status`(`query_status` ASC, `id` ASC) USING BTREE, + INDEX `idx_baidu_seed_created`(`seed_id` ASC, `created_at` ASC) USING BTREE, + INDEX `idx_status_id`(`query_status` ASC, `id` ASC) USING BTREE, + INDEX `idx_query_status_cover`(`query_status` ASC) USING BTREE, + INDEX `idx_query_status_id_asc`(`query_status` ASC, `id` ASC) USING BTREE, + INDEX `idx_status_order_covering`(`query_status` ASC, `id` ASC, `keyword` ASC) USING BTREE, + INDEX `idx_status_fast_id_keyword`(`query_status` ASC, `fast_track` ASC, `id` ASC, `keyword` ASC) USING BTREE, + FULLTEXT INDEX `idx_keyword_fulltext`(`keyword`) +) ENGINE = InnoDB AUTO_INCREMENT = 901869 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic; + +-- ---------------------------- +-- Table structure for baidu_query_task +-- ---------------------------- +DROP TABLE IF EXISTS `baidu_query_task`; +CREATE TABLE `baidu_query_task` ( + `id` int NOT NULL AUTO_INCREMENT, + `seed_id` int UNSIGNED NOT NULL DEFAULT 0 COMMENT '种子', + `seed_name` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '种子名称', + `task_date` char(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '任务日期,格式:YYYYMMDD', + `threshold_max` int NOT NULL DEFAULT 1000 COMMENT '最大阈值', + `current_count` int NOT NULL DEFAULT 0 COMMENT '当前增量', + `status` enum('ready','doing','failed','finished','closed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'ready', + `started_at` timestamp NULL DEFAULT NULL, + `finished_at` timestamp NULL DEFAULT NULL, + `closed_at` timestamp NULL DEFAULT NULL COMMENT '达到阈值关闭时间', + `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) USING BTREE, + INDEX `uniq_seed_date`(`seed_id` ASC, `task_date` ASC) USING BTREE, + INDEX `idx_date_status`(`task_date` ASC, `status` ASC) USING BTREE, + INDEX `idx_status_count`(`status` ASC, `current_count` ASC) USING BTREE, + INDEX `idx_threshold`(`threshold_max` ASC) USING BTREE, + INDEX `idx_closed`(`closed_at` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 184 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '百度查询任务表' ROW_FORMAT = DYNAMIC; + +-- ---------------------------- +-- Table structure for baidu_seed_keywords +-- ---------------------------- +DROP TABLE IF EXISTS `baidu_seed_keywords`; +CREATE TABLE `baidu_seed_keywords` ( + `id` int NOT NULL AUTO_INCREMENT, + `batch_id` bigint UNSIGNED NOT NULL DEFAULT 0 COMMENT '批次ID', + `keyword` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL, + `crawled` tinyint NULL DEFAULT 0, + `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, + `status` enum('ready','doing','failed','finished') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT 'ready', + `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `priority_weight` int NOT NULL DEFAULT 0 COMMENT '优先级和权重1~10000|更高的先处理', + `fast_track` tinyint(1) NOT NULL DEFAULT 0 COMMENT '加急|0=否|1=是', + PRIMARY KEY (`id`) USING BTREE, + UNIQUE INDEX `keyword`(`keyword` ASC) USING BTREE, + INDEX `idx_crawled_priority`(`crawled` ASC, `priority_weight` DESC) USING BTREE, + INDEX `idx_fast_track`(`fast_track` ASC) USING BTREE, + INDEX `idx_crawled_fast_weight`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC) USING BTREE, + INDEX `idx_crawled_fast_weight_covering`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC, `keyword` ASC, `id` ASC) USING BTREE, + INDEX `idx_crawled_fast_priority`(`crawled` ASC, `fast_track` ASC, `priority_weight` DESC, `id` ASC, `keyword` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 100001 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = DYNAMIC; + +SET FOREIGN_KEY_CHECKS = 1; diff --git a/ai_image_tags.txt b/ai_image_tags.txt new file mode 100644 index 0000000..386988d --- /dev/null +++ b/ai_image_tags.txt @@ -0,0 +1,34 @@ +8.149.233.36/ai_article/ai_image_tags/ http://47.99.184.230:8008/andes/index.php?route=/sql&pos=0&db=ai_article&table=ai_image_tags + + 正在显示第 25 - 49 行 (共 32937 行, 查询花费 0.0009 秒。) + + +SELECT * FROM `ai_image_tags` + + +id image_id image_name image_url image_thumb_url tag_id tag_name default_tag_id default_tag_name keywords_id keywords_name department_id department_name image_source created_user_id created_at updated_at image_attached_article_count status blocking_reason +16495 19346 1755312359566253.png 20250816/1755312359566253.png 20250816/1755312359566253_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:19:11 7 draft +16496 19347 1755312362360723.png 20250816/1755312362360723.png 20250816/1755312362360723_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft +16497 19348 1755312364406476.png 20250816/1755312364406476.png 20250816/1755312364406476_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft +16498 19349 1755312367284353.png 20250816/1755312367284353.png 20250816/1755312367284353_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft +16499 19350 1755312370484005.png 20250816/1755312370484005.png 20250816/1755312370484005_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:19:11 7 draft +16500 19351 1755312373245801.png 20250816/1755312373245801.png 20250816/1755312373245801_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:41 17 draft +16501 19352 1755312378278262.png 20250816/1755312378278262.png 20250816/1755312378278262_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:55 35 draft +16502 19353 1755312380298110.png 20250816/1755312380298110.png 20250816/1755312380298110_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:51 37 draft +16503 19354 1755312382399131.png 20250816/1755312382399131.png 20250816/1755312382399131_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:30 93 draft +16504 19355 1755312386945978.png 20250816/1755312386945978.png 20250816/1755312386945978_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:33 20 draft +16505 19356 1755312388894962.png 20250816/1755312388894962.png 20250816/1755312388894962_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:06 30 draft +16506 19357 1755312391383717.png 20250816/1755312391383717.png 20250816/1755312391383717_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:34:30 49 draft +16507 19358 1755312393565035.png 20250816/1755312393565035.png 20250816/1755312393565035_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:32:57 135 draft +16508 19359 1755312396609453.png 20250816/1755312396609453.png 20250816/1755312396609453_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:18:55 8 draft +16509 19360 1755312401479871.png 20250816/1755312401479871.png 20250816/1755312401479871_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:41 17 draft +16510 19361 1755312407229190.png 20250816/1755312407229190.png 20250816/1755312407229190_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:29 21 draft +16511 19362 1755312410797310.png 20250816/1755312410797310.png 20250816/1755312410797310_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 186 妇科炎症 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:35:08 29 draft +16512 19363 1755312437724619.png 20250816/1755312437724619.png 20250816/1755312437724619_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:59 69 draft +16513 19364 1755312440270419.png 20250816/1755312440270419.png 20250816/1755312440270419_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:29 94 draft +16514 19365 1755312442259884.png 20250816/1755312442259884.png 20250816/1755312442259884_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:18 107 draft +16515 19366 1755312445610363.png 20250816/1755312445610363.png 20250816/1755312445610363_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:32:36 173 draft +16516 19367 1755312448884355.png 20250816/1755312448884355.png 20250816/1755312448884355_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:14 111 draft +16517 19368 1755312451681906.png 20250816/1755312451681906.png 20250816/1755312451681906_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:29 94 draft +16518 19369 1755312453351689.png 20250816/1755312453351689.png 20250816/1755312453351689_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:24 100 draft +16519 19370 1755312456284588.png 20250816/1755312456284588.png 20250816/1755312456284588_thumb.png 12679 #妇科炎症##妇科炎症原因##妇科炎症治疗##妇科炎症怎么办# 0 265 废止 11 妇科 1 0 2025-08-16 21:48:16 2026-01-30 14:33:09 118 draft diff --git a/basket.py b/basket.py new file mode 100644 index 0000000..974f3e0 --- /dev/null +++ b/basket.py @@ -0,0 +1,72 @@ +import requests +from dashvector import Client, Doc + +# === 配置 === +DASHVECTOR_API_KEY = 'sk-55x6oBXypSlPHQ8NvPHfyBABcMIMUE0407A0FCC2A11F0B9C802831A608ABB' +DASHVECTOR_ENDPOINT = 'vrs-cn-2ml4jm42o0001r.dashvector.cn-hangzhou.aliyuncs.com' + +# 从 DashScope 控制台获取(不是百炼 Model Studio!) +DASHSCOPE_API_KEY = 'sk-d3f235925afa4e4e83d707dde04b9e52' # 👈 替换这里! + +def get_embedding(text): + url = "https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding-v1" + headers = { + "Authorization": f"Bearer {DASHSCOPE_API_KEY}", + "Content-Type": "application/json" + } + data = { + "input": {"texts": [text]}, + "model": "text-embedding-v1" + } + resp = requests.post(url, headers=headers, json=data) + if resp.status_code == 200: + return resp.json()["output"]["embeddings"][0]["embedding"] + else: + raise Exception(f"❌ Embedding API 错误: {resp.status_code} - {resp.text}") + +# === 初始化 DashVector 客户端 === +client = Client(api_key=DASHVECTOR_API_KEY, endpoint=DASHVECTOR_ENDPOINT) + +# === 创建集合(注意维度是 1536!)=== +collection_name = "medical_topics" +try: + client.delete(collection_name) +except: + pass + +client.create(name=collection_name, dimension=1536) # text-embedding-v1 输出 1536 维 +collection = client.get(collection_name) +print("✅ 集合已创建并获取") + +# === 插入数据 === +topics = [ + "如何治疗阳痿、早泄和肾虚?", + "早泄可以吃哪些中药?", + "该如何治疗早泄?", + "前列腺肥大是什么原因引起的?" +] + +docs = [] +for i, text in enumerate(topics, 1): + emb = get_embedding(text) + docs.append(Doc(id=f"topic_{i}", vector=emb, fields={"content": text})) + +resp = collection.insert(docs) +if resp.success: + print("✅ 4 条中文话题已成功插入!") +else: + print("❌ 插入失败:", resp) + exit(1) + +# === 查询测试 === +query_text = "早泄的治疗方法有哪些?" +query_vec = get_embedding(query_text) + +rets = collection.query(vector=query_vec, topk=3, output_fields=["content"]) +if rets.success: + print(f"\n🔍 查询 '{query_text}' 的结果:") + for doc in rets.documents: + print(f" ID: {doc.id} | 相似度: {doc.score:.4f} | 内容: {doc.fields['content']}") +else: + print("❌ 查询失败:", rets) + diff --git a/image_similarity_check.py b/image_similarity_check.py new file mode 100644 index 0000000..52421c5 --- /dev/null +++ b/image_similarity_check.py @@ -0,0 +1,386 @@ +# -*- coding: utf-8 -*- +""" +图片去重审核脚本 - DashScope 多模态版 +采用: pHash预筛 + DashScope多模态Embedding + 异步批量处理 +""" + +import configparser +import logging +import asyncio +import aiohttp +import imagehash +import base64 +import time +import dashscope +from dashscope import MultiModalEmbedding +from io import BytesIO +from typing import Optional, Tuple, List, Dict + +import pymysql +from dashvector import Client, Doc +from PIL import Image + + +class ImageSimilarityChecker: + """图片相似度检查器 - DashScope 多模态版""" + + def __init__(self, config_path: str = 'config.ini'): + self.config = configparser.ConfigParser() + self.config.read(config_path, encoding='utf-8') + + self._setup_logging() + + # 连接 + self.db_conn = None + self.dashvector_client = None + self.collection = None + + # DashScope API + self.dashscope_api_key = self.config.get('dashscope', 'api_key') + dashscope.api_key = self.dashscope_api_key + + # pHash 缓存 {phash_str: image_tag_id} + self.phash_cache: Dict[str, int] = {} + + # 配置参数 + self.image_cdn_base = self.config.get('image', 'cdn_base') + self.phash_threshold = self.config.getint('similarity', 'phash_threshold') + self.vector_threshold = self.config.getfloat('similarity', 'vector_threshold') + self.batch_size = self.config.getint('process', 'batch_size') + self.concurrent_downloads = self.config.getint('process', 'concurrent_downloads') + + def _setup_logging(self): + log_level = self.config.get('process', 'log_level', fallback='INFO') + log_file = self.config.get('process', 'log_file', fallback='image_similarity.log') + + self.logger = logging.getLogger(__name__) + + # 避免重复添加 handler + if not self.logger.handlers: + self.logger.setLevel(getattr(logging, log_level)) + formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + + fh = logging.FileHandler(log_file, encoding='utf-8') + fh.setFormatter(formatter) + self.logger.addHandler(fh) + + sh = logging.StreamHandler() + sh.setFormatter(formatter) + self.logger.addHandler(sh) + + def connect_db(self): + """连接数据库""" + self.db_conn = pymysql.connect( + host=self.config.get('database', 'host'), + port=self.config.getint('database', 'port'), + user=self.config.get('database', 'user'), + password=self.config.get('database', 'password'), + database=self.config.get('database', 'database'), + charset=self.config.get('database', 'charset'), + cursorclass=pymysql.cursors.DictCursor + ) + self.logger.info("数据库连接成功") + + def connect_dashvector(self): + """连接 DashVector""" + api_key = self.config.get('dashvector', 'api_key') + endpoint = self.config.get('dashvector', 'endpoint') + collection_name = self.config.get('dashvector', 'collection_name') + dimension = self.config.getint('dashvector', 'vector_dimension') + + self.dashvector_client = Client(api_key=api_key, endpoint=endpoint) + + # 检查集合是否存在 + existing = self.dashvector_client.get(collection_name) + if existing is None: + self.logger.info(f"创建集合 {collection_name},维度 {dimension}") + self.dashvector_client.create(collection_name, dimension=dimension) + else: + self.logger.info(f"集合 {collection_name} 已存在,直接复用") + + self.collection = self.dashvector_client.get(collection_name) + self.logger.info("DashVector 连接成功") + + def get_image_embedding(self, image_url: str = None, image_base64: str = None, max_retries: int = 5) -> Optional[List[float]]: + """ + 调用 DashScope 多模态 Embedding SDK 获取图片向量 + 支持传入 URL 或 base64,带限流退避重试 + """ + for attempt in range(max_retries): + try: + # 构建输入 + if image_url: + input_data = [{'image': image_url}] + elif image_base64: + input_data = [{'image': f'data:image/jpeg;base64,{image_base64}'}] + else: + return None + + resp = MultiModalEmbedding.call( + model='multimodal-embedding-v1', + input=input_data + ) + + if resp.status_code == 200: + return resp.output['embeddings'][0]['embedding'] + elif resp.status_code in (429, 403): + wait_time = 3 + attempt * 3 + self.logger.warning(f"API 限流,等待 {wait_time} 秒后重试 ({attempt + 1}/{max_retries})...") + time.sleep(wait_time) + else: + self.logger.warning(f"Embedding API 错误: {resp.status_code} - {resp.message}") + return None + except Exception as e: + self.logger.warning(f"Embedding API 异常: {e}") + time.sleep(2) + + return None + + def load_phash_cache(self): + """初始化 pHash 缓存""" + self.logger.info("pHash 缓存初始化完成") + + def compute_phash(self, image: Image.Image) -> str: + """计算感知哈希""" + return str(imagehash.phash(image)) + + + + def check_phash_duplicate(self, phash: str) -> Tuple[bool, Optional[int], Optional[int]]: + """通过 pHash 检查是否重复""" + phash_obj = imagehash.hex_to_hash(phash) + + for cached_phash, image_id in self.phash_cache.items(): + cached_obj = imagehash.hex_to_hash(cached_phash) + distance = phash_obj - cached_obj + + if distance <= self.phash_threshold: + return True, image_id, distance + + return False, None, None + + async def download_image_async(self, session: aiohttp.ClientSession, + image_id: int, url: str) -> Tuple[int, Optional[Image.Image], Optional[bytes]]: + """异步下载单张图片""" + try: + async with session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response: + if response.status == 200: + data = await response.read() + image = Image.open(BytesIO(data)).convert('RGB') + return image_id, image, data + except Exception as e: + self.logger.warning(f"下载失败 ID={image_id}: {e}") + + return image_id, None, None + + async def download_images_batch(self, image_records: List[dict]) -> Dict[int, Tuple[Image.Image, bytes, str]]: + """批量异步下载图片""" + images = {} + + connector = aiohttp.TCPConnector(limit=self.concurrent_downloads) + async with aiohttp.ClientSession(connector=connector) as session: + tasks = [ + self.download_image_async(session, rec['id'], rec['image_url']) + for rec in image_records + ] + results = await asyncio.gather(*tasks) + + for i, (image_id, image, data) in enumerate(results): + if image is not None: + url = image_records[i]['image_url'] + images[image_id] = (image, data, url) + + return images + + def search_similar(self, features: List[float], exclude_id: int) -> Tuple[bool, Optional[int], Optional[float]]: + """在 DashVector 中搜索相似图片(score越小越相似)""" + try: + results = self.collection.query(features, topk=3) + + if results and results.output: + for doc in results.output: + similar_id = int(doc.id) + if similar_id == exclude_id: + continue + + # score 是距离,越小越相似,转换为相似度 + similarity = 1.0 - doc.score + self.logger.info(f"搜索到: {similar_id}, 距离={doc.score:.4f}, 相似度={similarity:.4f}") + + if similarity >= self.vector_threshold: + return True, similar_id, similarity + + return False, None, None + except Exception as e: + self.logger.warning(f"搜索失败: {e}") + return False, None, None + + def upsert_to_dashvector(self, image_id: int, features: List[float]): + """存入 DashVector""" + try: + doc = Doc(id=str(image_id), vector=features) + result = self.collection.upsert([doc]) + if result.code == 0: + self.logger.info(f"向量入库成功: {image_id}") + else: + self.logger.warning(f"向量入库失败 ID={image_id}: code={result.code}, msg={result.message}") + except Exception as e: + self.logger.warning(f"存入 DashVector 异常 ID={image_id}: {e}") + + def get_draft_images(self) -> List[dict]: + """获取待处理图片""" + with self.db_conn.cursor() as cursor: + sql = """ + SELECT id, image_id, image_url, image_thumb_url, image_name + FROM ai_image_tags + WHERE status = 'draft' AND similarity = 'draft' + AND image_url != '' AND image_url IS NOT NULL + ORDER BY id ASC + LIMIT %s + """ + cursor.execute(sql, (self.batch_size,)) + return cursor.fetchall() + + def update_as_duplicate(self, image_id: int, similar_id: int, score: float): + """更新为重复图片""" + with self.db_conn.cursor() as cursor: + sql = """ + UPDATE ai_image_tags + SET status = 'similarity', + similarity = 'yes', + similarity_image_tags_id = %s, + `similarity score` = %s, + updated_at = NOW() + WHERE id = %s + """ + cursor.execute(sql, (similar_id, score, image_id)) + self.db_conn.commit() + self.logger.info(f"重复: {image_id} -> {similar_id} (分数={score:.4f})") + + def update_as_unique(self, image_id: int): + """更新为不重复图片""" + with self.db_conn.cursor() as cursor: + sql = """ + UPDATE ai_image_tags + SET status = 'tag_extension', + similarity = 'calc', + updated_at = NOW() + WHERE id = %s + """ + cursor.execute(sql, (image_id,)) + self.db_conn.commit() + self.logger.info(f"不重复: {image_id} -> tag_extension") + + def update_as_failed(self, image_id: int, reason: str): + """标记为处理失败""" + with self.db_conn.cursor() as cursor: + sql = """ + UPDATE ai_image_tags + SET status = 'draft', + similarity = 'recalc', + updated_at = NOW() + WHERE id = %s + """ + cursor.execute(sql, (image_id,)) + self.db_conn.commit() + + def process_batch(self, image_records: List[dict]) -> Tuple[int, int, int]: + """处理一批图片,返回 (重复数, 不重复数, 失败数)""" + if not image_records: + return 0, 0, 0 + + duplicates = 0 + unique = 0 + failed = 0 + + for rec in image_records: + image_id = rec['id'] + # 检查是否有有效的图像URL + if not rec['image_url'] or rec['image_url'].strip() == '': + self.logger.warning(f"图像URL为空,跳过处理: {image_id}") + self.update_as_failed(image_id, "图像URL为空") + failed += 1 + continue + + # 拼接 CDN URL,使用原图 + full_url = f"{self.image_cdn_base}{rec['image_url']}" + + + + try: + # 限流控制:免费版 2 QPS + time.sleep(0.5) + self.logger.info(f"获取 Embedding: {image_id} -> {full_url}") + + # 直接传 URL 给 DashScope + features = self.get_image_embedding(image_url=full_url) + + if features is None: + self.logger.warning(f"Embedding 获取失败: {image_id}") + self.update_as_failed(image_id, "Embedding API 失败") + failed += 1 + continue + + # DashVector 搜索相似图片 + is_dup, similar_id, score = self.search_similar(features, image_id) + + if is_dup: + self.update_as_duplicate(image_id, similar_id, score) + duplicates += 1 + else: + self.upsert_to_dashvector(image_id, features) + self.update_as_unique(image_id) + unique += 1 + + except Exception as e: + self.logger.error(f"处理失败 {image_id}: {e}") + self.update_as_failed(image_id, str(e)[:200]) + failed += 1 + continue + + return duplicates, unique, failed + + def run(self): + """运行主流程""" + self.logger.info("=" * 60) + self.logger.info("图片去重审核 - DashScope 多模态版") + self.logger.info("=" * 60) + + # 初始化 + self.connect_db() + self.connect_dashvector() + self.load_phash_cache() + + total_duplicates = 0 + total_unique = 0 + batch_num = 0 + + try: + while True: + images = self.get_draft_images() + + if not images: + self.logger.info("没有待处理的图片") + break + + batch_num += 1 + self.logger.info(f"\n--- 批次 {batch_num}: {len(images)} 张 ---") + + dup, uniq, fail = self.process_batch(images) + total_duplicates += dup + total_unique += uniq + + self.logger.info(f"批次结果: 重复={dup}, 不重复={uniq}, 失败={fail}") + + finally: + if self.db_conn: + self.db_conn.close() + + self.logger.info("=" * 60) + self.logger.info(f"完成! 总重复: {total_duplicates}, 总不重复: {total_unique}") + self.logger.info("=" * 60) + + +if __name__ == '__main__': + checker = ImageSimilarityChecker('config.ini') + checker.run() diff --git a/query_status.py b/query_status.py new file mode 100644 index 0000000..0a633fa --- /dev/null +++ b/query_status.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" +查询图片相似度状态脚本 +""" + +import configparser +import pymysql + + +def main(): + config = configparser.ConfigParser() + config.read('config.ini', encoding='utf-8') + + db_conn = pymysql.connect( + host=config.get('database', 'host'), + port=config.getint('database', 'port'), + user=config.get('database', 'user'), + password=config.get('database', 'password'), + database=config.get('database', 'database'), + charset=config.get('database', 'charset'), + cursorclass=pymysql.cursors.DictCursor + ) + + with db_conn.cursor() as cursor: + sql = """ + SELECT id, image_name, status, similarity, + similarity_image_tags_id, `similarity score`, blocking_reason + FROM ai_image_tags + ORDER BY id + """ + cursor.execute(sql) + rows = cursor.fetchall() + + db_conn.close() + + # 统计 + total = len(rows) + draft_count = sum(1 for r in rows if r['status'] == 'draft') + unique_count = sum(1 for r in rows if r['status'] == 'tag_extension') + dup_count = sum(1 for r in rows if r['status'] == 'similarity') + failed_count = sum(1 for r in rows if r['similarity'] == 'recalc') + + print("=" * 100) + print(f"{'ID':<8} {'图片名称':<30} {'状态':<15} {'相似性':<8} {'相似ID':<8} {'分数':<8} {'原因'}") + print("=" * 100) + + for r in rows: + score = f"{r['similarity score']:.4f}" if r['similarity score'] else "-" + similar_id = r['similarity_image_tags_id'] if r['similarity_image_tags_id'] else "-" + reason = r['blocking_reason'][:20] if r['blocking_reason'] else "-" + print(f"{r['id']:<8} {r['image_name'][:28]:<30} {r['status']:<15} {r['similarity']:<8} {similar_id:<8} {score:<8} {reason}") + + print("=" * 100) + print(f"总计: {total} | 待处理: {draft_count} | 不重复: {unique_count} | 重复: {dup_count} | 失败: {failed_count}") + print("=" * 100) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1c589a8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +pymysql>=1.0.0 +dashvector>=1.0.0 +Pillow>=9.0.0 +aiohttp>=3.8.0 +imagehash>=4.3.0 +requests>=2.28.0 diff --git a/reset_data.py b/reset_data.py new file mode 100644 index 0000000..5a0b588 --- /dev/null +++ b/reset_data.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +""" +重置关系数据库脚本 +""" + +import configparser +import pymysql + + +def main(): + config = configparser.ConfigParser() + config.read('config.ini', encoding='utf-8') + + print("=" * 50) + print("重置 MySQL 数据") + print("=" * 50) + + db_conn = pymysql.connect( + host=config.get('database', 'host'), + port=config.getint('database', 'port'), + user=config.get('database', 'user'), + password=config.get('database', 'password'), + database=config.get('database', 'database'), + charset=config.get('database', 'charset'), + cursorclass=pymysql.cursors.DictCursor + ) + + with db_conn.cursor() as cursor: + sql = """ + UPDATE ai_image_tags + SET status = 'draft', + similarity = 'draft', + similarity_image_tags_id = 0, + `similarity score` = 0 + WHERE status != 'draft' OR similarity != 'draft' + """ + affected = cursor.execute(sql) + db_conn.commit() + + cursor.execute("SELECT COUNT(*) as total FROM ai_image_tags WHERE status = 'draft'") + result = cursor.fetchone() + + db_conn.close() + + print(f"更新记录数: {affected}") + print(f"当前 draft 状态总数: {result['total']}") + print("=" * 50) + + +if __name__ == '__main__': + main() diff --git a/reset_vector.py b/reset_vector.py new file mode 100644 index 0000000..35e6c59 --- /dev/null +++ b/reset_vector.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +""" +重置向量数据库集合脚本 +""" + +import configparser +import time +from dashvector import Client + + +def main(): + config = configparser.ConfigParser() + config.read('config.ini', encoding='utf-8') + + print("=" * 50) + print("重置 DashVector 集合") + print("=" * 50) + + api_key = config.get('dashvector', 'api_key') + endpoint = config.get('dashvector', 'endpoint') + collection_name = config.get('dashvector', 'collection_name') + dimension = config.getint('dashvector', 'vector_dimension') + + client = Client(api_key=api_key, endpoint=endpoint) + + # 删除 + del_result = client.delete(collection_name) + if del_result.code == 0: + print(f"已删除集合: {collection_name}") + print("等待删除完成...") + time.sleep(3) + else: + print(f"集合不存在,跳过删除") + + # 创建 + create_result = client.create(collection_name, dimension=dimension) + if create_result.code == 0: + print(f"已创建集合: {collection_name} (维度={dimension})") + else: + print(f"创建失败: {create_result.message}") + return + + # 验证 + collections = client.list() + if collection_name in collections.output: + print(f"验证通过: 集合已在列表中") + else: + print(f"警告: 集合未出现在列表中,请稍后重试") + + print("=" * 50) + + +if __name__ == '__main__': + main()