#!/bin/bash # ============================================ # 关键词导入系统管理脚本 # 支持进程数量控制 # ============================================ # 配置区 BASE_DIR="/home/work/ai_import_quary" VENV_PYTHON="/home/work/keyword_crawl/venv/bin/python" # import_keywords 配置 IMPORT_SCRIPT="${BASE_DIR}/import_keywords.py" IMPORT_PID_FILE="${BASE_DIR}/import_keywords.pid" IMPORT_LOG_FILE="${BASE_DIR}/import_keywords.log" IMPORT_MAX_PROCESSES=1 # 限制最多1个进程 # 颜色定义 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # 获取脚本正在运行的进程数量 get_process_count() { local script_name=$(basename "$1") pgrep -f "$script_name" 2>/dev/null | wc -l } # 获取所有相关进程的PID get_all_pids() { local script_name=$(basename "$1") pgrep -f "$script_name" 2>/dev/null | tr '\n' ' ' } # 启动服务(带进程数量控制) start_single() { local script=$1 local pid_file=$2 local log_file=$3 local name=$4 local max_processes=$5 local script_name=$(basename "$script") local current_count=$(get_process_count "$script") # 检查是否超过最大进程数 if [ $current_count -ge $max_processes ]; then echo -e "${YELLOW}${name} 已达到最大进程数 (${current_count}/${max_processes}),跳过启动${NC}" local first_pid=$(pgrep -f "$script_name" | head -n1) if [ -n "$first_pid" ]; then echo "$first_pid" > "$pid_file" fi return 0 fi # 检查PID文件记录的进程 if [ -f "$pid_file" ]; then local pid=$(cat "$pid_file" 2>/dev/null) if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then echo -e "${YELLOW}${name} 已在运行(PID文件记录),PID: ${pid}${NC}" return 0 fi fi echo -e "${BLUE}正在启动 ${name}(守护模式)...${NC}" # 确保日志目录存在 mkdir -p "$(dirname "$log_file")" # 备份旧日志 if [ -f "$log_file" ]; then local backup_log="${log_file}.$(date +%Y%m%d_%H%M%S).bak" cp "$log_file" "$backup_log" echo -e "${BLUE}旧日志已备份到: ${backup_log}${NC}" fi # 启动守护进程 nohup "$VENV_PYTHON" "$script" >> "$log_file" 2>&1 & local new_pid=$! # 等待进程真正启动 sleep 2 # 验证进程是否启动成功 if kill -0 "$new_pid" 2>/dev/null; then echo "$new_pid" > "$pid_file" echo -e "${GREEN}${name} 已启动,PID: ${new_pid}${NC}" echo -e "${BLUE}日志文件: ${log_file}${NC}" return 0 else echo -e "${RED}${name} 启动失败,请检查日志${NC}" tail -20 "$log_file" rm -f "$pid_file" return 1 fi } # 停止服务的所有实例 stop_single_all() { local script=$1 local name=$2 local pid_file=$3 local script_name=$(basename "$script") local pids=$(get_all_pids "$script") local count=$(get_process_count "$script") if [ $count -eq 0 ]; then echo -e "${YELLOW}${name} 没有运行中的进程${NC}" rm -f "$pid_file" return 0 fi echo -e "${BLUE}正在停止 ${name} (${count}个进程)...${NC}" echo -e "进程PIDs: ${pids}" # 首先尝试优雅终止 for pid in $pids; do if kill -0 "$pid" 2>/dev/null; then echo -e " 发送SIGTERM到 PID $pid..." kill "$pid" fi done # 等待优雅退出 local wait_time=10 local remaining=$count for i in $(seq 1 $wait_time); do remaining=$(get_process_count "$script") if [ $remaining -eq 0 ]; then break fi echo -n "." sleep 1 done echo "" # 检查是否还有进程残留 remaining=$(get_process_count "$script") if [ $remaining -gt 0 ]; then echo -e "${YELLOW}还有 ${remaining} 个进程未退出,强制终止...${NC}" pids=$(get_all_pids "$script") for pid in $pids; do if kill -0 "$pid" 2>/dev/null; then kill -9 "$pid" 2>/dev/null fi done sleep 2 fi # 验证所有进程都已停止 remaining=$(get_process_count "$script") if [ $remaining -eq 0 ]; then echo -e "${GREEN}${name} 所有进程已停止${NC}" rm -f "$pid_file" return 0 else echo -e "${RED}警告:仍有 ${remaining} 个进程无法终止${NC}" return 1 fi } # 启动服务 start() { echo -e "${BLUE}========== 启动关键词导入系统 ==========${NC}" echo -e "${YELLOW}进程限制:最多启动1个实例${NC}" echo "" start_single "$IMPORT_SCRIPT" "$IMPORT_PID_FILE" "$IMPORT_LOG_FILE" "import_keywords" "$IMPORT_MAX_PROCESSES" echo -e "${BLUE}========================================${NC}" } # 停止服务 stop() { echo -e "${BLUE}========== 停止关键词导入系统 ==========${NC}" stop_single_all "$IMPORT_SCRIPT" "import_keywords" "$IMPORT_PID_FILE" echo -e "${BLUE}========================================${NC}" } # 强制停止 force-stop() { echo -e "${RED}========== 强制停止关键词导入进程 ==========${NC}" # 停止守护进程 if [ -f "$IMPORT_PID_FILE" ]; then local pid=$(cat "$IMPORT_PID_FILE" 2>/dev/null) if [ -n "$pid" ]; then kill -9 "$pid" 2>/dev/null fi fi # 停止所有相关进程 pkill -9 -f "import_keywords.py" 2>/dev/null sleep 2 rm -f "$IMPORT_PID_FILE" local remaining=$(pgrep -f "import_keywords" | wc -l) if [ $remaining -eq 0 ]; then echo -e "${GREEN}✅ 所有进程已强制停止${NC}" else echo -e "${RED}❌ 仍有 ${remaining} 个进程存活${NC}" pgrep -f "import_keywords" | xargs ps -fp 2>/dev/null fi echo -e "${RED}==========================================${NC}" } # 重启服务 restart() { echo -e "${BLUE}========== 重启关键词导入系统 ==========${NC}" stop if [ $? -eq 0 ]; then sleep 3 start else echo -e "${RED}停止服务失败,请使用 force-restart${NC}" return 1 fi echo -e "${BLUE}========================================${NC}" } # 强制重启 force-restart() { echo -e "${YELLOW}========== 强制重启关键词导入系统 ==========${NC}" force-stop sleep 3 start echo -e "${YELLOW}============================================${NC}" } # 显示状态 status() { echo -e "${BLUE}========== 关键词导入系统状态 ==========${NC}" echo -e "${BLUE}系统时间: $(date)${NC}" echo -e "${BLUE}工作目录: ${BASE_DIR}${NC}" echo "" local count=$(get_process_count "$IMPORT_SCRIPT") echo -e "${YELLOW}📊 进程状态:${NC}" echo -e " 进程数: ${count}" if [ $count -gt 0 ]; then local pids=$(get_all_pids "$IMPORT_SCRIPT") echo -e " 进程PIDs: ${pids}" # 显示CPU和内存使用 for pid in $pids; do local cpu=$(ps -p $pid -o %cpu --no-headers 2>/dev/null | tr -d ' ') local mem=$(ps -p $pid -o %mem --no-headers 2>/dev/null | tr -d ' ') local runtime=$(ps -p $pid -o etime --no-headers 2>/dev/null | tr -d ' ') echo -e " PID ${pid}: CPU ${cpu}%, 内存 ${mem}%, 运行时间 ${runtime}" done else echo -e "${YELLOW}没有运行中的进程${NC}" fi echo "" echo -e "${YELLOW}📁 PID文件状态:${NC}" if [ -f "$IMPORT_PID_FILE" ]; then local pid=$(cat "$IMPORT_PID_FILE" 2>/dev/null) if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then echo -e " ${GREEN}✓ import_keywords.pid: 有效 (PID: $pid)${NC}" else echo -e " ${RED}✗ import_keywords.pid: 无效或进程不存在${NC}" fi else echo -e " ${YELLOW}○ import_keywords.pid: 不存在${NC}" fi echo "" echo -e "${YELLOW}📝 最近日志:${NC}" if [ -f "$IMPORT_LOG_FILE" ]; then echo -e "${BLUE}--- import_keywords.log (最后10行) ---${NC}" tail -10 "$IMPORT_LOG_FILE" 2>/dev/null else echo -e "${YELLOW}日志文件不存在${NC}" fi echo -e "${BLUE}========================================${NC}" } # 查看日志 logs() { local lines=${1:-50} echo -e "${BLUE}========== 查看日志 (最后 ${lines} 行) ==========${NC}" if [ -f "$IMPORT_LOG_FILE" ]; then tail -$lines "$IMPORT_LOG_FILE" else echo -e "${YELLOW}日志文件不存在${NC}" fi echo -e "${BLUE}============================================${NC}" } # 实时查看日志 logs-follow() { echo -e "${BLUE}========== 实时查看日志 (Ctrl+C 退出) ==========${NC}" tail -f "$IMPORT_LOG_FILE" } # 显示帮助 show_help() { echo -e "${GREEN}关键词导入系统管理脚本${NC}" echo "" echo -e "${YELLOW}当前配置:${NC}" echo -e " 工作目录: ${BASE_DIR}" echo -e " 最大进程数: ${IMPORT_MAX_PROCESSES}" echo -e " 监控目录: query_upload/" echo -e " 轮询间隔: 60秒" echo "" echo -e "${BLUE}用法: $0 {命令}${NC}" echo "" echo -e "${GREEN}服务管理:${NC}" echo -e " ${YELLOW}start${NC} 启动服务(定频轮询模式)" echo -e " ${YELLOW}stop${NC} 停止服务" echo -e " ${YELLOW}force-stop${NC} 强制停止所有进程" echo -e " ${YELLOW}restart${NC} 重启服务" echo -e " ${YELLOW}force-restart${NC} 强制重启" echo "" echo -e "${GREEN}状态查看:${NC}" echo -e " ${YELLOW}status${NC} 显示进程状态" echo -e " ${YELLOW}logs [N]${NC} 查看最后N行日志(默认50)" echo -e " ${YELLOW}logs-follow${NC} 实时查看日志" echo "" echo -e "${GREEN}其他:${NC}" echo -e " ${YELLOW}help${NC} 显示帮助" echo "" echo -e "${GREEN}工作流程:${NC}" echo -e " 1. 定频轮询 query_upload/ 目录" echo -e " 2. 发现Excel文件后自动处理" echo -e " 3. 读取query列和科室列" echo -e " 4. 查询科室ID (ai_departments)" echo -e " 5. 随机获取作者信息 (ai_authors)" echo -e " 6. 判重后插入 baidu_keyword 表" echo -e " 7. 处理完成后删除源文件" echo "" echo -e "${GREEN}示例:${NC}" echo -e " $0 start # 启动定频轮询服务" echo -e " $0 status # 查看运行状态" echo -e " $0 logs-follow # 实时查看日志" echo -e " $0 restart # 重启服务" } # 主逻辑 case "$1" in start) start ;; stop) stop ;; force-stop) force-stop ;; restart) restart ;; force-restart) force-restart ;; status) status ;; logs) logs $2 ;; logs-follow) logs-follow ;; help|--help|-h) show_help ;; *) echo -e "${RED}错误:未知命令 '$1'${NC}" echo "" show_help exit 1 ;; esac exit 0