修改文章图片匹配逻辑:合并文章查询、按科室缩小图片范围、按挂载次数排序、按图片类型分类处理
This commit is contained in:
@@ -79,7 +79,7 @@ class ArticleImageMatcher:
|
|||||||
|
|
||||||
def get_articles_with_tags(self, limit: int = BATCH_SIZE) -> List[Dict]:
|
def get_articles_with_tags(self, limit: int = BATCH_SIZE) -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
从ai_article_tags表获取需要匹配图片的文章
|
从ai_articles表获取需要匹配图片的文章
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
包含文章ID、标签等信息的列表
|
包含文章ID、标签等信息的列表
|
||||||
@@ -91,55 +91,32 @@ class ArticleImageMatcher:
|
|||||||
# 查询有标签但未匹配图片的文章
|
# 查询有标签但未匹配图片的文章
|
||||||
sql = """
|
sql = """
|
||||||
SELECT
|
SELECT
|
||||||
at.id,
|
a.id as article_id,
|
||||||
at.article_id,
|
a.title,
|
||||||
at.coze_tag
|
a.content,
|
||||||
FROM ai_article_tags at
|
a.coze_tag,
|
||||||
WHERE at.coze_tag IS NOT NULL
|
a.department
|
||||||
AND at.coze_tag != ''
|
FROM ai_articles a
|
||||||
|
WHERE a.coze_tag IS NOT NULL
|
||||||
|
AND a.coze_tag != ''
|
||||||
AND NOT EXISTS (
|
AND NOT EXISTS (
|
||||||
SELECT 1 FROM ai_article_images ai
|
SELECT 1 FROM ai_article_images ai
|
||||||
WHERE ai.article_id = at.article_id
|
WHERE ai.article_id = a.id
|
||||||
)
|
)
|
||||||
AND at.article_id IN (
|
AND a.status = 'pending_review'
|
||||||
SELECT id FROM ai_articles
|
ORDER BY a.id DESC
|
||||||
WHERE status = 'pending_review'
|
|
||||||
)
|
|
||||||
ORDER BY at.id DESC
|
|
||||||
LIMIT %s
|
LIMIT %s
|
||||||
"""
|
"""
|
||||||
cursor.execute(sql, (limit,))
|
cursor.execute(sql, (limit,))
|
||||||
results = cursor.fetchall()
|
results = cursor.fetchall()
|
||||||
|
|
||||||
# 为每个结果添加文章标题和内容
|
if results:
|
||||||
processed_results = []
|
logger.info(f"查询到 {len(results)} 篇需要匹配图片的文章")
|
||||||
for row in results:
|
self.log_to_database('INFO', f"查询到需要匹配图片的文章", f"数量: {len(results)}")
|
||||||
# 查询文章标题和内容
|
|
||||||
article_sql = """
|
|
||||||
SELECT title, content
|
|
||||||
FROM ai_articles
|
|
||||||
WHERE id = %s
|
|
||||||
"""
|
|
||||||
cursor.execute(article_sql, (row['article_id'],))
|
|
||||||
article_data = cursor.fetchone()
|
|
||||||
|
|
||||||
if article_data:
|
|
||||||
processed_row = {
|
|
||||||
'id': row['id'],
|
|
||||||
'article_id': row['article_id'],
|
|
||||||
'coze_tag': row['coze_tag'],
|
|
||||||
'title': article_data['title'],
|
|
||||||
'content': article_data['content']
|
|
||||||
}
|
|
||||||
processed_results.append(processed_row)
|
|
||||||
|
|
||||||
if processed_results:
|
|
||||||
logger.info(f"查询到 {len(processed_results)} 篇需要匹配图片的文章")
|
|
||||||
self.log_to_database('INFO', f"查询到需要匹配图片的文章", f"数量: {len(processed_results)}")
|
|
||||||
else:
|
else:
|
||||||
logger.info("未查询到需要匹配图片的文章")
|
logger.info("未查询到需要匹配图片的文章")
|
||||||
|
|
||||||
return processed_results
|
return results
|
||||||
finally:
|
finally:
|
||||||
connection.close()
|
connection.close()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -159,7 +136,8 @@ class ArticleImageMatcher:
|
|||||||
connection = self.db_manager.get_connection()
|
connection = self.db_manager.get_connection()
|
||||||
try:
|
try:
|
||||||
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
|
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
|
||||||
# 查询状态为generate且附加文章数量小于5的图片
|
# 查询状态为generate且附加文章数量小于5的图片(不使用JOIN)
|
||||||
|
# 包含image_source字段用于区分实拍图和模板图
|
||||||
sql = """
|
sql = """
|
||||||
SELECT
|
SELECT
|
||||||
it.id,
|
it.id,
|
||||||
@@ -173,11 +151,14 @@ class ArticleImageMatcher:
|
|||||||
it.keywords_name,
|
it.keywords_name,
|
||||||
it.department_id,
|
it.department_id,
|
||||||
it.department_name,
|
it.department_name,
|
||||||
it.image_attached_article_count
|
it.image_attached_article_count,
|
||||||
|
it.image_source
|
||||||
FROM ai_image_tags it
|
FROM ai_image_tags it
|
||||||
INNER JOIN ai_images i ON it.image_id = i.id
|
|
||||||
WHERE it.image_attached_article_count < 5
|
WHERE it.image_attached_article_count < 5
|
||||||
AND i.status = 'generate'
|
AND it.image_id IN (
|
||||||
|
SELECT id FROM ai_images
|
||||||
|
WHERE status = 'generate'
|
||||||
|
)
|
||||||
ORDER BY it.image_attached_article_count ASC, it.id DESC
|
ORDER BY it.image_attached_article_count ASC, it.id DESC
|
||||||
"""
|
"""
|
||||||
cursor.execute(sql)
|
cursor.execute(sql)
|
||||||
@@ -763,6 +744,7 @@ class ArticleImageMatcher:
|
|||||||
article_title = article_data.get('title', '')
|
article_title = article_data.get('title', '')
|
||||||
article_content = article_data.get('content', '')
|
article_content = article_data.get('content', '')
|
||||||
coze_tag = article_data.get('coze_tag', '')
|
coze_tag = article_data.get('coze_tag', '')
|
||||||
|
article_department = article_data.get('department', '')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 解析文章标签
|
# 解析文章标签
|
||||||
@@ -771,15 +753,46 @@ class ArticleImageMatcher:
|
|||||||
logger.warning(f"文章 {article_id} 没有有效标签,跳过")
|
logger.warning(f"文章 {article_id} 没有有效标签,跳过")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
logger.info(f"开始为文章 {article_id} 匹配图片 - 标题: {article_title}, 标签: {article_tags}")
|
logger.info(f"开始为文章 {article_id} 匹配图片 - 标题: {article_title}, 标签: {article_tags}, 科室: {article_department}")
|
||||||
|
|
||||||
|
# 根据文章科室缩小图片范围
|
||||||
|
department_filtered_images = []
|
||||||
|
for img in available_images:
|
||||||
|
# 优先匹配科室相同的图片
|
||||||
|
if article_department and img.get('department_name', '').lower() == article_department.lower():
|
||||||
|
department_filtered_images.append(img)
|
||||||
|
|
||||||
|
# 如果没有匹配科室的图片,则使用所有图片
|
||||||
|
if not department_filtered_images:
|
||||||
|
department_filtered_images = available_images
|
||||||
|
|
||||||
|
# 根据图片类型(实拍图/模板图)进行分类处理
|
||||||
|
# 根据image_source字段:1=clean_images(模板图), 2=Flower_character(实拍图)
|
||||||
|
actual_photos = [] # 实拍图
|
||||||
|
template_photos = [] # 模板图
|
||||||
|
|
||||||
|
for img in department_filtered_images:
|
||||||
|
image_source = img.get('image_source', 1) # 默认为模板图
|
||||||
|
if image_source == 2: # 实拍图
|
||||||
|
actual_photos.append(img)
|
||||||
|
else: # 模板图
|
||||||
|
template_photos.append(img)
|
||||||
|
|
||||||
|
# 按照挂载次数排序,优先选择挂载次数少的图片
|
||||||
|
actual_photos.sort(key=lambda x: x['image_attached_article_count'])
|
||||||
|
template_photos.sort(key=lambda x: x['image_attached_article_count'])
|
||||||
|
|
||||||
|
# 合并图片列表,优先使用实拍图,然后是模板图
|
||||||
|
filtered_images = actual_photos + template_photos
|
||||||
|
|
||||||
best_match = None
|
best_match = None
|
||||||
best_score = 0.0
|
best_score = 0.0
|
||||||
|
|
||||||
# 遍历可用图片,找到最佳匹配
|
# 遍历筛选后的可用图片,找到最佳匹配
|
||||||
for image_data in available_images:
|
for image_data in filtered_images:
|
||||||
image_tags = [image_data['tag_name']]
|
image_tags = [image_data['tag_name']]
|
||||||
image_keywords = image_data.get('keywords_name', '')
|
image_keywords = image_data.get('keywords_name', '')
|
||||||
|
image_department = image_data.get('department_name', '')
|
||||||
|
|
||||||
# 调用通义千问评估匹配度
|
# 调用通义千问评估匹配度
|
||||||
is_match, score = self.call_qwen_for_matching(
|
is_match, score = self.call_qwen_for_matching(
|
||||||
@@ -787,7 +800,7 @@ class ArticleImageMatcher:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 记录评估结果
|
# 记录评估结果
|
||||||
logger.info(f"文章 {article_id} vs 图片 {image_data['image_id']} - 匹配: {is_match}, 分数: {score}")
|
logger.info(f"文章 {article_id} vs 图片 {image_data['image_id']} - 匹配: {is_match}, 分数: {score}, 科室: {image_department}")
|
||||||
|
|
||||||
# 更新最佳匹配
|
# 更新最佳匹配
|
||||||
if is_match and score > best_score and score >= MATCH_THRESHOLD:
|
if is_match and score > best_score and score >= MATCH_THRESHOLD:
|
||||||
|
|||||||
Reference in New Issue
Block a user