Initial commit: 百家号文章采集系统
This commit is contained in:
222
check_taskworker.py
Normal file
222
check_taskworker.py
Normal file
@@ -0,0 +1,222 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
TaskWorker 状态检查和修复工具
|
||||
用于诊断和解决任务卡在等待中的问题
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import psutil
|
||||
import time
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_taskworker_lock():
|
||||
"""检查 TaskWorker 锁文件"""
|
||||
lock_file = 'data/taskworker.lock'
|
||||
|
||||
if os.path.exists(lock_file):
|
||||
try:
|
||||
with open(lock_file, 'r') as f:
|
||||
pid = f.read().strip()
|
||||
|
||||
logger.info(f"发现锁文件,记录的PID: {pid}")
|
||||
|
||||
# 检查进程是否存在
|
||||
try:
|
||||
pid_int = int(pid)
|
||||
if psutil.pid_exists(pid_int):
|
||||
proc = psutil.Process(pid_int)
|
||||
logger.info(f"进程 {pid} 存在: {proc.name()} - {proc.status()}")
|
||||
return True, pid_int
|
||||
else:
|
||||
logger.warning(f"进程 {pid} 不存在,锁文件已失效")
|
||||
return False, None
|
||||
except ValueError:
|
||||
logger.error(f"锁文件内容无效: {pid}")
|
||||
return False, None
|
||||
except Exception as e:
|
||||
logger.error(f"读取锁文件失败: {e}")
|
||||
return False, None
|
||||
else:
|
||||
logger.info("未发现锁文件")
|
||||
return False, None
|
||||
|
||||
|
||||
def check_pending_tasks():
|
||||
"""检查等待中的任务数量"""
|
||||
try:
|
||||
from task_queue import get_task_queue
|
||||
queue = get_task_queue()
|
||||
tasks = queue.get_all_tasks()
|
||||
|
||||
pending_tasks = [t for t in tasks if t.get('status') == 'pending']
|
||||
processing_tasks = [t for t in tasks if t.get('status') == 'processing']
|
||||
|
||||
logger.info(f"待处理任务: {len(pending_tasks)} 个")
|
||||
logger.info(f"处理中任务: {len(processing_tasks)} 个")
|
||||
|
||||
if pending_tasks:
|
||||
logger.info("待处理任务列表:")
|
||||
for task in pending_tasks[:5]: # 只显示前5个
|
||||
logger.info(f" - {task['task_id']}: {task.get('url', 'N/A')[:50]}")
|
||||
|
||||
return len(pending_tasks), len(processing_tasks)
|
||||
except Exception as e:
|
||||
logger.error(f"检查任务失败: {e}")
|
||||
return 0, 0
|
||||
|
||||
|
||||
def check_worker_threads():
|
||||
"""检查 TaskWorker 线程是否运行"""
|
||||
try:
|
||||
from task_worker import get_task_worker
|
||||
worker = get_task_worker()
|
||||
|
||||
logger.info(f"TaskWorker 运行状态: {worker.running}")
|
||||
logger.info(f"当前并发数: {worker.current_workers}/{worker.max_workers}")
|
||||
logger.info(f"工作线程数: {len(worker.worker_threads)}")
|
||||
logger.info(f"正在处理的任务: {len(worker.processing_tasks)}")
|
||||
|
||||
# 检查线程是否活跃
|
||||
alive_threads = sum(1 for t in worker.worker_threads if t and t.is_alive())
|
||||
logger.info(f"活跃线程数: {alive_threads}")
|
||||
|
||||
return worker.running, alive_threads
|
||||
except Exception as e:
|
||||
logger.error(f"检查 TaskWorker 失败: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return False, 0
|
||||
|
||||
|
||||
def restart_taskworker():
|
||||
"""重启 TaskWorker"""
|
||||
logger.info("正在重启 TaskWorker...")
|
||||
|
||||
try:
|
||||
from task_worker import get_task_worker
|
||||
worker = get_task_worker()
|
||||
|
||||
# 停止现有 worker
|
||||
if worker.running:
|
||||
logger.info("停止现有 TaskWorker...")
|
||||
worker.stop()
|
||||
time.sleep(2)
|
||||
|
||||
# 启动新的 worker
|
||||
logger.info("启动新的 TaskWorker...")
|
||||
worker.start()
|
||||
time.sleep(1)
|
||||
|
||||
# 验证启动状态
|
||||
running, alive_threads = check_worker_threads()
|
||||
if running and alive_threads > 0:
|
||||
logger.info("✅ TaskWorker 重启成功")
|
||||
return True
|
||||
else:
|
||||
logger.error("❌ TaskWorker 重启失败")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"重启 TaskWorker 失败: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return False
|
||||
|
||||
|
||||
def clean_stale_lock():
|
||||
"""清理失效的锁文件"""
|
||||
lock_file = 'data/taskworker.lock'
|
||||
|
||||
if os.path.exists(lock_file):
|
||||
try:
|
||||
os.remove(lock_file)
|
||||
logger.info("✅ 已清理失效的锁文件")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"清理锁文件失败: {e}")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print("=" * 60)
|
||||
print("TaskWorker 状态检查工具")
|
||||
print("=" * 60)
|
||||
|
||||
# 1. 检查锁文件
|
||||
print("\n[1] 检查锁文件...")
|
||||
lock_exists, lock_pid = check_taskworker_lock()
|
||||
|
||||
# 2. 检查待处理任务
|
||||
print("\n[2] 检查任务队列...")
|
||||
pending_count, processing_count = check_pending_tasks()
|
||||
|
||||
# 3. 检查 Worker 线程
|
||||
print("\n[3] 检查 TaskWorker 状态...")
|
||||
try:
|
||||
is_running, alive_threads = check_worker_threads()
|
||||
except:
|
||||
is_running, alive_threads = False, 0
|
||||
|
||||
# 4. 诊断和修复
|
||||
print("\n[4] 诊断结果:")
|
||||
print("-" * 60)
|
||||
|
||||
need_fix = False
|
||||
|
||||
if pending_count > 0 and alive_threads == 0:
|
||||
print("❌ 问题: 有待处理任务,但没有活跃的工作线程")
|
||||
need_fix = True
|
||||
|
||||
if lock_exists and not lock_pid:
|
||||
print("⚠️ 警告: 锁文件存在但进程不存在(僵尸锁)")
|
||||
need_fix = True
|
||||
|
||||
if not is_running:
|
||||
print("❌ 问题: TaskWorker 未运行")
|
||||
need_fix = True
|
||||
|
||||
if not need_fix:
|
||||
print("✅ TaskWorker 运行正常")
|
||||
return
|
||||
|
||||
# 5. 修复
|
||||
print("\n[5] 开始修复...")
|
||||
print("-" * 60)
|
||||
|
||||
if '--fix' in sys.argv or '--auto-fix' in sys.argv:
|
||||
# 清理失效的锁文件
|
||||
clean_stale_lock()
|
||||
|
||||
# 重启 TaskWorker
|
||||
if restart_taskworker():
|
||||
print("\n✅ 修复完成!")
|
||||
print("\n重新检查状态...")
|
||||
time.sleep(2)
|
||||
check_worker_threads()
|
||||
check_pending_tasks()
|
||||
else:
|
||||
print("\n❌ 修复失败,请手动重启服务")
|
||||
else:
|
||||
print("\n提示: 使用 --fix 参数自动修复问题")
|
||||
print("示例: python check_taskworker.py --fix")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n用户中断")
|
||||
except Exception as e:
|
||||
logger.error(f"执行失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
Reference in New Issue
Block a user