实现按科室ID过滤图片:修改get_available_images_with_tags方法支持department_id参数,更新匹配逻辑

This commit is contained in:
2026-02-05 13:23:10 +08:00
parent 5304840c43
commit 1436129845

View File

@@ -124,9 +124,9 @@ class ArticleImageMatcher:
self.log_to_database('ERROR', error_msg, traceback.format_exc())
return []
def get_available_images_with_tags(self) -> List[Dict]:
def get_available_images_with_tags(self, article_department_id: int = 0) -> List[Dict]:
"""
从ai_image_tags表获取可用的图片及其标签状态为generate且挂载次数<5
从ai_image_tags表获取可用的图片及其标签按科室ID过滤且状态为generate且挂载次数<5
Returns:
包含图片ID、标签等信息的列表
@@ -135,8 +135,37 @@ class ArticleImageMatcher:
connection = self.db_manager.get_connection()
try:
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
# 查询状态为generate且附加文章数量小于5的图片不使用JOIN
# 查询指定科室ID、状态为generate且附加文章数量小于5的图片不使用JOIN
# 包含image_source字段用于区分实拍图和模板图
if article_department_id > 0:
sql = """
SELECT
it.id,
it.image_id,
it.image_name,
it.image_url,
it.image_thumb_url,
it.tag_id,
it.tag_name,
it.keywords_id,
it.keywords_name,
it.department_id,
it.department_name,
it.image_attached_article_count,
it.image_source
FROM ai_image_tags it
WHERE it.image_attached_article_count < 5
AND it.department_id = %s
AND EXISTS (
SELECT 1 FROM ai_images i
WHERE i.id = it.image_id
AND i.status = 'generate'
)
ORDER BY it.image_attached_article_count ASC, it.id DESC
"""
cursor.execute(sql, (article_department_id,))
else:
# 如果没有提供科室ID则查询所有科室的图片
sql = """
SELECT
it.id,
@@ -162,7 +191,6 @@ class ArticleImageMatcher:
ORDER BY it.image_attached_article_count ASC, it.id DESC
"""
cursor.execute(sql)
cursor.execute(sql)
results = cursor.fetchall()
if results:
@@ -730,13 +758,12 @@ class ArticleImageMatcher:
logger.error(f"插入文章图片关联信息失败: {e}")
return None
def match_article_with_images(self, article_data: Dict, available_images: List[Dict]) -> bool:
def match_article_with_images(self, article_data: Dict) -> bool:
"""
为单篇文章匹配图片未成功匹配时调用Gemini生图
Args:
article_data: 文章数据
available_images: 可用图片列表
Returns:
是否匹配成功
@@ -746,6 +773,7 @@ class ArticleImageMatcher:
article_content = article_data.get('content', '')
coze_tag = article_data.get('coze_tag', '')
article_department = article_data.get('department', '')
article_department_id = article_data.get('department_id', 0)
try:
# 解析文章标签
@@ -758,27 +786,21 @@ class ArticleImageMatcher:
logger.warning(f"文章 {article_id} 没有有效标签,跳过")
return False
logger.info(f"开始为文章 {article_id} 匹配图片 - 标题: {article_title}, 标签: {article_tags}, 科室: {article_department}")
logger.info(f"开始为文章 {article_id} 匹配图片 - 标题: {article_title}, 标签: {article_tags}, 科室: {article_department}, 科室ID: {article_department_id}")
# 根据文章科室ID缩小图片范围
department_filtered_images = []
article_dept_id = article_data.get('department_id', 0)
# 根据文章科室ID获取可用图片
available_images = self.get_available_images_with_tags(article_department_id)
for img in available_images:
# 匹配科室ID相同的图片
if img.get('department_id', 0) == article_dept_id:
department_filtered_images.append(img)
# 如果没有匹配科室的图片,则使用所有图片
if not department_filtered_images:
department_filtered_images = available_images
if not available_images:
logger.warning(f"文章 {article_id} 没有找到对应科室的可用图片,跳过")
return False
# 根据图片类型(实拍图/模板图)进行分类处理
# 根据image_source字段1=clean_images(模板图), 2=Flower_character(实拍图)
actual_photos = [] # 实拍图
template_photos = [] # 模板图
for img in department_filtered_images:
for img in available_images:
image_source = img.get('image_source', 1) # 默认为模板图
if image_source == 2: # 实拍图
actual_photos.append(img)
@@ -859,7 +881,7 @@ class ArticleImageMatcher:
return None
def worker_process_articles(self, pending_articles: List[Dict],
available_images: List[Dict], worker_id: int) -> int:
available_images: Optional[List[Dict]], worker_id: int) -> int:
"""Worker线程处理文章匹配"""
processed_count = 0
thread_name = f"Worker-{worker_id}"
@@ -875,7 +897,7 @@ class ArticleImageMatcher:
break
# 匹配文章与图片
if self.match_article_with_images(article_data, available_images):
if self.match_article_with_images(article_data):
processed_count += 1
logger.info(f"[{thread_name}] 成功处理文章: {article_data['article_id']}")
else:
@@ -896,16 +918,9 @@ class ArticleImageMatcher:
logger.info("没有需要匹配的文章")
return
# 获取可用图片
available_images = self.get_available_images_with_tags()
if not available_images:
logger.warning("没有可用图片,无法进行匹配")
self.log_to_database('WARNING', '没有可用图片')
return
logger.info(f"开始匹配 {len(pending_articles)} 篇文章与 {len(available_images)} 张图片")
logger.info(f"开始匹配 {len(pending_articles)} 篇文章")
self.log_to_database('INFO', '开始批量匹配',
f'文章数: {len(pending_articles)}, 图片数: {len(available_images)}')
f'文章数: {len(pending_articles)}')
# 清空已处理记录集合
with self.processing_lock:
@@ -919,7 +934,7 @@ class ArticleImageMatcher:
future = executor.submit(
self.worker_process_articles,
pending_articles,
available_images,
None,
worker_id
)
future_to_worker[future] = worker_id