实现按科室ID过滤图片：修改get_available_images_with_tags方法支持department_id参数，更新匹配逻辑

2026-02-05 13:23:10 +08:00
parent 5304840c43
commit 1436129845
1 changed files with 71 additions and 56 deletions
--- a/article_auto_image_matching.py
+++ b/article_auto_image_matching.py
@@ -124,9 +124,9 @@ class ArticleImageMatcher:
            self.log_to_database('ERROR', error_msg, traceback.format_exc())
            return []
    
-    def get_available_images_with_tags(self) -> List[Dict]:
+    def get_available_images_with_tags(self, article_department_id: int = 0) -> List[Dict]:
        """
-        从ai_image_tags表获取可用的图片及其标签（状态为generate且挂载次数<5）
+        从ai_image_tags表获取可用的图片及其标签（按科室ID过滤且状态为generate且挂载次数<5）
        
        Returns:
            包含图片ID、标签等信息的列表
@@ -135,8 +135,37 @@ class ArticleImageMatcher:
            connection = self.db_manager.get_connection()
            try:
                with connection.cursor(pymysql.cursors.DictCursor) as cursor:
-                    # 查询状态为generate且附加文章数量小于5的图片（不使用JOIN）
+                    # 查询指定科室ID、状态为generate且附加文章数量小于5的图片（不使用JOIN）
                    # 包含image_source字段用于区分实拍图和模板图
+                    if article_department_id > 0:
+                        sql = """
+                        SELECT 
+                            it.id,
+                            it.image_id,
+                            it.image_name,
+                            it.image_url,
+                            it.image_thumb_url,
+                            it.tag_id,
+                            it.tag_name,
+                            it.keywords_id,
+                            it.keywords_name,
+                            it.department_id,
+                            it.department_name,
+                            it.image_attached_article_count,
+                            it.image_source
+                        FROM ai_image_tags it
+                        WHERE it.image_attached_article_count < 5
+                        AND it.department_id = %s
+                        AND EXISTS (
+                            SELECT 1 FROM ai_images i
+                            WHERE i.id = it.image_id
+                            AND i.status = 'generate'
+                        )
+                        ORDER BY it.image_attached_article_count ASC, it.id DESC
+                        """
+                        cursor.execute(sql, (article_department_id,))
+                    else:
+                        # 如果没有提供科室ID，则查询所有科室的图片
                        sql = """
                        SELECT 
                            it.id,
@@ -162,7 +191,6 @@ class ArticleImageMatcher:
                        ORDER BY it.image_attached_article_count ASC, it.id DESC
                        """
                        cursor.execute(sql)
-                    cursor.execute(sql)
                    results = cursor.fetchall()
                    
                    if results:
@@ -730,13 +758,12 @@ class ArticleImageMatcher:
            logger.error(f"插入文章图片关联信息失败: {e}")
            return None
    
-    def match_article_with_images(self, article_data: Dict, available_images: List[Dict]) -> bool:
+    def match_article_with_images(self, article_data: Dict) -> bool:
        """
        为单篇文章匹配图片，未成功匹配时调用Gemini生图
        
        Args:
            article_data: 文章数据
-            available_images: 可用图片列表
        
        Returns:
            是否匹配成功
@@ -746,6 +773,7 @@ class ArticleImageMatcher:
        article_content = article_data.get('content', '')
        coze_tag = article_data.get('coze_tag', '')
        article_department = article_data.get('department', '')
+        article_department_id = article_data.get('department_id', 0)
        
        try:
            # 解析文章标签
@@ -758,27 +786,21 @@ class ArticleImageMatcher:
                logger.warning(f"文章 {article_id} 没有有效标签，跳过")
                return False
            
-            logger.info(f"开始为文章 {article_id} 匹配图片 - 标题: {article_title}, 标签: {article_tags}, 科室: {article_department}")
+            logger.info(f"开始为文章 {article_id} 匹配图片 - 标题: {article_title}, 标签: {article_tags}, 科室: {article_department}, 科室ID: {article_department_id}")
            
-            # 根据文章科室ID缩小图片范围
-            department_filtered_images = []
-            article_dept_id = article_data.get('department_id', 0)
+            # 根据文章科室ID获取可用图片
+            available_images = self.get_available_images_with_tags(article_department_id)
            
-            for img in available_images:
-                # 匹配科室ID相同的图片
-                if img.get('department_id', 0) == article_dept_id:
-                    department_filtered_images.append(img)
-            
-            # 如果没有匹配科室的图片，则使用所有图片
-            if not department_filtered_images:
-                department_filtered_images = available_images
+            if not available_images:
+                logger.warning(f"文章 {article_id} 没有找到对应科室的可用图片，跳过")
+                return False
            
            # 根据图片类型（实拍图/模板图）进行分类处理
            # 根据image_source字段：1=clean_images(模板图), 2=Flower_character(实拍图)
            actual_photos = []  # 实拍图
            template_photos = []  # 模板图
            
-            for img in department_filtered_images:
+            for img in available_images:
                image_source = img.get('image_source', 1)  # 默认为模板图
                if image_source == 2:  # 实拍图
                    actual_photos.append(img)
@@ -859,7 +881,7 @@ class ArticleImageMatcher:
            return None
    
    def worker_process_articles(self, pending_articles: List[Dict], 
-                                available_images: List[Dict], worker_id: int) -> int:
+                                available_images: Optional[List[Dict]], worker_id: int) -> int:
        """Worker线程处理文章匹配"""
        processed_count = 0
        thread_name = f"Worker-{worker_id}"
@@ -875,7 +897,7 @@ class ArticleImageMatcher:
                break
            
            # 匹配文章与图片
-            if self.match_article_with_images(article_data, available_images):
+            if self.match_article_with_images(article_data):
                processed_count += 1
                logger.info(f"[{thread_name}] 成功处理文章: {article_data['article_id']}")
            else:
@@ -896,16 +918,9 @@ class ArticleImageMatcher:
                logger.info("没有需要匹配的文章")
                return
            
-            # 获取可用图片
-            available_images = self.get_available_images_with_tags()
-            if not available_images:
-                logger.warning("没有可用图片，无法进行匹配")
-                self.log_to_database('WARNING', '没有可用图片')
-                return
-            
-            logger.info(f"开始匹配 {len(pending_articles)} 篇文章与 {len(available_images)} 张图片")
+            logger.info(f"开始匹配 {len(pending_articles)} 篇文章")
            self.log_to_database('INFO', '开始批量匹配', 
-                               f'文章数: {len(pending_articles)}, 图片数: {len(available_images)}')
+                               f'文章数: {len(pending_articles)}')
            
            # 清空已处理记录集合
            with self.processing_lock:
@@ -919,7 +934,7 @@ class ArticleImageMatcher:
                    future = executor.submit(
                        self.worker_process_articles, 
                        pending_articles, 
-                        available_images, 
+                        None, 
                        worker_id
                    )
                    future_to_worker[future] = worker_id