diff --git a/article_auto_image_matching.py b/article_auto_image_matching.py index e7ee1a2..df55408 100644 --- a/article_auto_image_matching.py +++ b/article_auto_image_matching.py @@ -124,9 +124,9 @@ class ArticleImageMatcher: self.log_to_database('ERROR', error_msg, traceback.format_exc()) return [] - def get_available_images_with_tags(self) -> List[Dict]: + def get_available_images_with_tags(self, article_department_id: int = 0) -> List[Dict]: """ - 从ai_image_tags表获取可用的图片及其标签(状态为generate且挂载次数<5) + 从ai_image_tags表获取可用的图片及其标签(按科室ID过滤且状态为generate且挂载次数<5) Returns: 包含图片ID、标签等信息的列表 @@ -135,34 +135,62 @@ class ArticleImageMatcher: connection = self.db_manager.get_connection() try: with connection.cursor(pymysql.cursors.DictCursor) as cursor: - # 查询状态为generate且附加文章数量小于5的图片(不使用JOIN) + # 查询指定科室ID、状态为generate且附加文章数量小于5的图片(不使用JOIN) # 包含image_source字段用于区分实拍图和模板图 - sql = """ - SELECT - it.id, - it.image_id, - it.image_name, - it.image_url, - it.image_thumb_url, - it.tag_id, - it.tag_name, - it.keywords_id, - it.keywords_name, - it.department_id, - it.department_name, - it.image_attached_article_count, - it.image_source - FROM ai_image_tags it - WHERE it.image_attached_article_count < 5 - AND EXISTS ( - SELECT 1 FROM ai_images i - WHERE i.id = it.image_id - AND i.status = 'generate' - ) - ORDER BY it.image_attached_article_count ASC, it.id DESC - """ - cursor.execute(sql) - cursor.execute(sql) + if article_department_id > 0: + sql = """ + SELECT + it.id, + it.image_id, + it.image_name, + it.image_url, + it.image_thumb_url, + it.tag_id, + it.tag_name, + it.keywords_id, + it.keywords_name, + it.department_id, + it.department_name, + it.image_attached_article_count, + it.image_source + FROM ai_image_tags it + WHERE it.image_attached_article_count < 5 + AND it.department_id = %s + AND EXISTS ( + SELECT 1 FROM ai_images i + WHERE i.id = it.image_id + AND i.status = 'generate' + ) + ORDER BY it.image_attached_article_count ASC, it.id DESC + """ + cursor.execute(sql, (article_department_id,)) + else: + # 如果没有提供科室ID,则查询所有科室的图片 + sql = """ + SELECT + it.id, + it.image_id, + it.image_name, + it.image_url, + it.image_thumb_url, + it.tag_id, + it.tag_name, + it.keywords_id, + it.keywords_name, + it.department_id, + it.department_name, + it.image_attached_article_count, + it.image_source + FROM ai_image_tags it + WHERE it.image_attached_article_count < 5 + AND EXISTS ( + SELECT 1 FROM ai_images i + WHERE i.id = it.image_id + AND i.status = 'generate' + ) + ORDER BY it.image_attached_article_count ASC, it.id DESC + """ + cursor.execute(sql) results = cursor.fetchall() if results: @@ -730,13 +758,12 @@ class ArticleImageMatcher: logger.error(f"插入文章图片关联信息失败: {e}") return None - def match_article_with_images(self, article_data: Dict, available_images: List[Dict]) -> bool: + def match_article_with_images(self, article_data: Dict) -> bool: """ 为单篇文章匹配图片,未成功匹配时调用Gemini生图 Args: article_data: 文章数据 - available_images: 可用图片列表 Returns: 是否匹配成功 @@ -746,6 +773,7 @@ class ArticleImageMatcher: article_content = article_data.get('content', '') coze_tag = article_data.get('coze_tag', '') article_department = article_data.get('department', '') + article_department_id = article_data.get('department_id', 0) try: # 解析文章标签 @@ -758,27 +786,21 @@ class ArticleImageMatcher: logger.warning(f"文章 {article_id} 没有有效标签,跳过") return False - logger.info(f"开始为文章 {article_id} 匹配图片 - 标题: {article_title}, 标签: {article_tags}, 科室: {article_department}") + logger.info(f"开始为文章 {article_id} 匹配图片 - 标题: {article_title}, 标签: {article_tags}, 科室: {article_department}, 科室ID: {article_department_id}") - # 根据文章科室ID缩小图片范围 - department_filtered_images = [] - article_dept_id = article_data.get('department_id', 0) + # 根据文章科室ID获取可用图片 + available_images = self.get_available_images_with_tags(article_department_id) - for img in available_images: - # 匹配科室ID相同的图片 - if img.get('department_id', 0) == article_dept_id: - department_filtered_images.append(img) - - # 如果没有匹配科室的图片,则使用所有图片 - if not department_filtered_images: - department_filtered_images = available_images + if not available_images: + logger.warning(f"文章 {article_id} 没有找到对应科室的可用图片,跳过") + return False # 根据图片类型(实拍图/模板图)进行分类处理 # 根据image_source字段:1=clean_images(模板图), 2=Flower_character(实拍图) actual_photos = [] # 实拍图 template_photos = [] # 模板图 - for img in department_filtered_images: + for img in available_images: image_source = img.get('image_source', 1) # 默认为模板图 if image_source == 2: # 实拍图 actual_photos.append(img) @@ -859,7 +881,7 @@ class ArticleImageMatcher: return None def worker_process_articles(self, pending_articles: List[Dict], - available_images: List[Dict], worker_id: int) -> int: + available_images: Optional[List[Dict]], worker_id: int) -> int: """Worker线程处理文章匹配""" processed_count = 0 thread_name = f"Worker-{worker_id}" @@ -875,7 +897,7 @@ class ArticleImageMatcher: break # 匹配文章与图片 - if self.match_article_with_images(article_data, available_images): + if self.match_article_with_images(article_data): processed_count += 1 logger.info(f"[{thread_name}] 成功处理文章: {article_data['article_id']}") else: @@ -896,16 +918,9 @@ class ArticleImageMatcher: logger.info("没有需要匹配的文章") return - # 获取可用图片 - available_images = self.get_available_images_with_tags() - if not available_images: - logger.warning("没有可用图片,无法进行匹配") - self.log_to_database('WARNING', '没有可用图片') - return - - logger.info(f"开始匹配 {len(pending_articles)} 篇文章与 {len(available_images)} 张图片") + logger.info(f"开始匹配 {len(pending_articles)} 篇文章") self.log_to_database('INFO', '开始批量匹配', - f'文章数: {len(pending_articles)}, 图片数: {len(available_images)}') + f'文章数: {len(pending_articles)}') # 清空已处理记录集合 with self.processing_lock: @@ -919,7 +934,7 @@ class ArticleImageMatcher: future = executor.submit( self.worker_process_articles, pending_articles, - available_images, + None, worker_id ) future_to_worker[future] = worker_id