129 lines
4.0 KiB
Python
129 lines
4.0 KiB
Python
"""
|
|
测试查询任务管理功能
|
|
"""
|
|
|
|
from loguru import logger
|
|
from db_manager import QueryTaskManager
|
|
from datetime import datetime
|
|
import sys
|
|
|
|
logger.remove()
|
|
logger.add(sys.stdout, format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>")
|
|
|
|
|
|
def test_query_task_manager():
|
|
"""测试查询任务管理器"""
|
|
|
|
print("=" * 70)
|
|
print(" 测试 QueryTaskManager")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
# 初始化管理器
|
|
task_mgr = QueryTaskManager()
|
|
|
|
# 1. 创建任务
|
|
logger.info("【测试1】创建查询任务")
|
|
task_date = datetime.now().strftime('%Y%m%d')
|
|
|
|
task_id1 = task_mgr.create_task(
|
|
query_word="高血压治疗方法",
|
|
query_type="keyword",
|
|
threshold_max=50,
|
|
priority=3,
|
|
category="医疗",
|
|
remark="测试任务1"
|
|
)
|
|
|
|
task_id2 = task_mgr.create_task(
|
|
query_word="在线教育平台推荐",
|
|
query_type="phrase",
|
|
threshold_max=30,
|
|
priority=5,
|
|
category="教育",
|
|
remark="测试任务2"
|
|
)
|
|
|
|
task_id3 = task_mgr.create_task(
|
|
query_word="法律咨询免费在线24小时",
|
|
query_type="long_tail",
|
|
threshold_max=20,
|
|
priority=7,
|
|
category="法律",
|
|
remark="测试任务3"
|
|
)
|
|
|
|
print()
|
|
|
|
# 2. 获取ready任务
|
|
logger.info("【测试2】获取ready任务")
|
|
ready_tasks = task_mgr.get_ready_tasks(limit=5)
|
|
logger.info(f"获取到 {len(ready_tasks)} 个ready任务")
|
|
for task in ready_tasks:
|
|
logger.info(f" - [{task['priority']}] {task['query_word']} ({task['category']}) - {task['status']}")
|
|
print()
|
|
|
|
# 3. 更新任务状态
|
|
if task_id1:
|
|
logger.info("【测试3】更新任务状态")
|
|
task_mgr.update_task_status(task_id1, 'doing')
|
|
task = task_mgr.get_task_by_id(task_id1)
|
|
logger.info(f"任务状态: {task['status']}, 开始时间: {task['started_at']}")
|
|
print()
|
|
|
|
# 4. 增加抓取计数
|
|
if task_id1:
|
|
logger.info("【测试4】增加抓取计数")
|
|
task_mgr.increment_crawl_count(task_id1, crawl_count=10, valid_count=7)
|
|
task = task_mgr.get_task_by_id(task_id1)
|
|
logger.info(f"已抓取: {task['crawl_url_count']}, 有效: {task['valid_url_count']}, 当前计数: {task['current_count']}")
|
|
print()
|
|
|
|
# 5. 检查阈值
|
|
if task_id1:
|
|
logger.info("【测试5】检查阈值")
|
|
reached = task_mgr.check_threshold(task_id1)
|
|
logger.info(f"是否达到阈值: {reached}")
|
|
print()
|
|
|
|
# 6. 按日期获取任务
|
|
logger.info("【测试6】按日期获取任务")
|
|
date_tasks = task_mgr.get_tasks_by_date(task_date)
|
|
logger.info(f"今天的任务数: {len(date_tasks)}")
|
|
print()
|
|
|
|
# 7. 获取统计信息
|
|
logger.info("【测试7】获取统计信息")
|
|
stats = task_mgr.get_task_statistics(task_date)
|
|
logger.info("任务统计:")
|
|
logger.info(f" 总任务数: {stats.get('total_tasks', 0)}")
|
|
logger.info(f" 准备中: {stats.get('ready_count', 0)}")
|
|
logger.info(f" 执行中: {stats.get('doing_count', 0)}")
|
|
logger.info(f" 已完成: {stats.get('finished_count', 0)}")
|
|
logger.info(f" 失败: {stats.get('failed_count', 0)}")
|
|
logger.info(f" 已关闭: {stats.get('closed_count', 0)}")
|
|
logger.info(f" 总抓取: {stats.get('total_crawled', 0)}")
|
|
logger.info(f" 总有效: {stats.get('total_valid', 0)}")
|
|
print()
|
|
|
|
# 8. 完成任务
|
|
if task_id1:
|
|
logger.info("【测试8】完成任务")
|
|
task_mgr.update_task_status(task_id1, 'finished')
|
|
task = task_mgr.get_task_by_id(task_id1)
|
|
logger.info(f"任务状态: {task['status']}, 完成时间: {task['finished_at']}")
|
|
print()
|
|
|
|
print("=" * 70)
|
|
print(" 测试完成")
|
|
print("=" * 70)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
test_query_task_manager()
|
|
except Exception as e:
|
|
logger.error(f"测试失败: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|