Files
ai_image_quary/start_import_keywords.sh

395 lines
11 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
# ============================================
# 关键词导入系统管理脚本
# 支持进程数量控制
# ============================================
# 配置区
BASE_DIR="/home/work/ai_import_quary"
VENV_PYTHON="/home/work/keyword_crawl/venv/bin/python"
# import_keywords 配置
IMPORT_SCRIPT="${BASE_DIR}/import_keywords.py"
IMPORT_PID_FILE="${BASE_DIR}/import_keywords.pid"
IMPORT_LOG_FILE="${BASE_DIR}/import_keywords.log"
IMPORT_MAX_PROCESSES=1 # 限制最多1个进程
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 获取脚本正在运行的进程数量
get_process_count() {
local script_name=$(basename "$1")
pgrep -f "$script_name" 2>/dev/null | wc -l
}
# 获取所有相关进程的PID
get_all_pids() {
local script_name=$(basename "$1")
pgrep -f "$script_name" 2>/dev/null | tr '\n' ' '
}
# 启动服务(带进程数量控制)
start_single() {
local script=$1
local pid_file=$2
local log_file=$3
local name=$4
local max_processes=$5
local script_name=$(basename "$script")
local current_count=$(get_process_count "$script")
# 检查是否超过最大进程数
if [ $current_count -ge $max_processes ]; then
echo -e "${YELLOW}${name} 已达到最大进程数 (${current_count}/${max_processes}),跳过启动${NC}"
local first_pid=$(pgrep -f "$script_name" | head -n1)
if [ -n "$first_pid" ]; then
echo "$first_pid" > "$pid_file"
fi
return 0
fi
# 检查PID文件记录的进程
if [ -f "$pid_file" ]; then
local pid=$(cat "$pid_file" 2>/dev/null)
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
echo -e "${YELLOW}${name} 已在运行PID文件记录PID: ${pid}${NC}"
return 0
fi
fi
echo -e "${BLUE}正在启动 ${name}(守护模式)...${NC}"
# 确保日志目录存在
mkdir -p "$(dirname "$log_file")"
# 备份旧日志
if [ -f "$log_file" ]; then
local backup_log="${log_file}.$(date +%Y%m%d_%H%M%S).bak"
cp "$log_file" "$backup_log"
echo -e "${BLUE}旧日志已备份到: ${backup_log}${NC}"
fi
# 启动守护进程
nohup "$VENV_PYTHON" "$script" >> "$log_file" 2>&1 &
local new_pid=$!
# 等待进程真正启动
sleep 2
# 验证进程是否启动成功
if kill -0 "$new_pid" 2>/dev/null; then
echo "$new_pid" > "$pid_file"
echo -e "${GREEN}${name} 已启动PID: ${new_pid}${NC}"
echo -e "${BLUE}日志文件: ${log_file}${NC}"
return 0
else
echo -e "${RED}${name} 启动失败,请检查日志${NC}"
tail -20 "$log_file"
rm -f "$pid_file"
return 1
fi
}
# 停止服务的所有实例
stop_single_all() {
local script=$1
local name=$2
local pid_file=$3
local script_name=$(basename "$script")
local pids=$(get_all_pids "$script")
local count=$(get_process_count "$script")
if [ $count -eq 0 ]; then
echo -e "${YELLOW}${name} 没有运行中的进程${NC}"
rm -f "$pid_file"
return 0
fi
echo -e "${BLUE}正在停止 ${name} (${count}个进程)...${NC}"
echo -e "进程PIDs: ${pids}"
# 首先尝试优雅终止
for pid in $pids; do
if kill -0 "$pid" 2>/dev/null; then
echo -e " 发送SIGTERM到 PID $pid..."
kill "$pid"
fi
done
# 等待优雅退出
local wait_time=10
local remaining=$count
for i in $(seq 1 $wait_time); do
remaining=$(get_process_count "$script")
if [ $remaining -eq 0 ]; then
break
fi
echo -n "."
sleep 1
done
echo ""
# 检查是否还有进程残留
remaining=$(get_process_count "$script")
if [ $remaining -gt 0 ]; then
echo -e "${YELLOW}还有 ${remaining} 个进程未退出,强制终止...${NC}"
pids=$(get_all_pids "$script")
for pid in $pids; do
if kill -0 "$pid" 2>/dev/null; then
kill -9 "$pid" 2>/dev/null
fi
done
sleep 2
fi
# 验证所有进程都已停止
remaining=$(get_process_count "$script")
if [ $remaining -eq 0 ]; then
echo -e "${GREEN}${name} 所有进程已停止${NC}"
rm -f "$pid_file"
return 0
else
echo -e "${RED}警告:仍有 ${remaining} 个进程无法终止${NC}"
return 1
fi
}
# 启动服务
start() {
echo -e "${BLUE}========== 启动关键词导入系统 ==========${NC}"
echo -e "${YELLOW}进程限制最多启动1个实例${NC}"
echo ""
start_single "$IMPORT_SCRIPT" "$IMPORT_PID_FILE" "$IMPORT_LOG_FILE" "import_keywords" "$IMPORT_MAX_PROCESSES"
echo -e "${BLUE}========================================${NC}"
}
# 停止服务
stop() {
echo -e "${BLUE}========== 停止关键词导入系统 ==========${NC}"
stop_single_all "$IMPORT_SCRIPT" "import_keywords" "$IMPORT_PID_FILE"
echo -e "${BLUE}========================================${NC}"
}
# 强制停止
force-stop() {
echo -e "${RED}========== 强制停止关键词导入进程 ==========${NC}"
# 停止守护进程
if [ -f "$IMPORT_PID_FILE" ]; then
local pid=$(cat "$IMPORT_PID_FILE" 2>/dev/null)
if [ -n "$pid" ]; then
kill -9 "$pid" 2>/dev/null
fi
fi
# 停止所有相关进程
pkill -9 -f "import_keywords.py" 2>/dev/null
sleep 2
rm -f "$IMPORT_PID_FILE"
local remaining=$(pgrep -f "import_keywords" | wc -l)
if [ $remaining -eq 0 ]; then
echo -e "${GREEN}✅ 所有进程已强制停止${NC}"
else
echo -e "${RED}❌ 仍有 ${remaining} 个进程存活${NC}"
pgrep -f "import_keywords" | xargs ps -fp 2>/dev/null
fi
echo -e "${RED}==========================================${NC}"
}
# 重启服务
restart() {
echo -e "${BLUE}========== 重启关键词导入系统 ==========${NC}"
stop
if [ $? -eq 0 ]; then
sleep 3
start
else
echo -e "${RED}停止服务失败,请使用 force-restart${NC}"
return 1
fi
echo -e "${BLUE}========================================${NC}"
}
# 强制重启
force-restart() {
echo -e "${YELLOW}========== 强制重启关键词导入系统 ==========${NC}"
force-stop
sleep 3
start
echo -e "${YELLOW}============================================${NC}"
}
# 显示状态
status() {
echo -e "${BLUE}========== 关键词导入系统状态 ==========${NC}"
echo -e "${BLUE}系统时间: $(date)${NC}"
echo -e "${BLUE}工作目录: ${BASE_DIR}${NC}"
echo ""
local count=$(get_process_count "$IMPORT_SCRIPT")
echo -e "${YELLOW}📊 进程状态:${NC}"
echo -e " 进程数: ${count}"
if [ $count -gt 0 ]; then
local pids=$(get_all_pids "$IMPORT_SCRIPT")
echo -e " 进程PIDs: ${pids}"
# 显示CPU和内存使用
for pid in $pids; do
local cpu=$(ps -p $pid -o %cpu --no-headers 2>/dev/null | tr -d ' ')
local mem=$(ps -p $pid -o %mem --no-headers 2>/dev/null | tr -d ' ')
local runtime=$(ps -p $pid -o etime --no-headers 2>/dev/null | tr -d ' ')
echo -e " PID ${pid}: CPU ${cpu}%, 内存 ${mem}%, 运行时间 ${runtime}"
done
else
echo -e "${YELLOW}没有运行中的进程${NC}"
fi
echo ""
echo -e "${YELLOW}📁 PID文件状态${NC}"
if [ -f "$IMPORT_PID_FILE" ]; then
local pid=$(cat "$IMPORT_PID_FILE" 2>/dev/null)
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
echo -e " ${GREEN}✓ import_keywords.pid: 有效 (PID: $pid)${NC}"
else
echo -e " ${RED}✗ import_keywords.pid: 无效或进程不存在${NC}"
fi
else
echo -e " ${YELLOW}○ import_keywords.pid: 不存在${NC}"
fi
echo ""
echo -e "${YELLOW}📝 最近日志:${NC}"
if [ -f "$IMPORT_LOG_FILE" ]; then
echo -e "${BLUE}--- import_keywords.log (最后10行) ---${NC}"
tail -10 "$IMPORT_LOG_FILE" 2>/dev/null
else
echo -e "${YELLOW}日志文件不存在${NC}"
fi
echo -e "${BLUE}========================================${NC}"
}
# 查看日志
logs() {
local lines=${1:-50}
echo -e "${BLUE}========== 查看日志 (最后 ${lines} 行) ==========${NC}"
if [ -f "$IMPORT_LOG_FILE" ]; then
tail -$lines "$IMPORT_LOG_FILE"
else
echo -e "${YELLOW}日志文件不存在${NC}"
fi
echo -e "${BLUE}============================================${NC}"
}
# 实时查看日志
logs-follow() {
echo -e "${BLUE}========== 实时查看日志 (Ctrl+C 退出) ==========${NC}"
tail -f "$IMPORT_LOG_FILE"
}
# 显示帮助
show_help() {
echo -e "${GREEN}关键词导入系统管理脚本${NC}"
echo ""
echo -e "${YELLOW}当前配置:${NC}"
echo -e " 工作目录: ${BASE_DIR}"
echo -e " 最大进程数: ${IMPORT_MAX_PROCESSES}"
echo -e " 监控目录: query_upload/"
echo -e " 轮询间隔: 60秒"
echo ""
echo -e "${BLUE}用法: $0 {命令}${NC}"
echo ""
echo -e "${GREEN}服务管理:${NC}"
echo -e " ${YELLOW}start${NC} 启动服务(定频轮询模式)"
echo -e " ${YELLOW}stop${NC} 停止服务"
echo -e " ${YELLOW}force-stop${NC} 强制停止所有进程"
echo -e " ${YELLOW}restart${NC} 重启服务"
echo -e " ${YELLOW}force-restart${NC} 强制重启"
echo ""
echo -e "${GREEN}状态查看:${NC}"
echo -e " ${YELLOW}status${NC} 显示进程状态"
echo -e " ${YELLOW}logs [N]${NC} 查看最后N行日志(默认50)"
echo -e " ${YELLOW}logs-follow${NC} 实时查看日志"
echo ""
echo -e "${GREEN}其他:${NC}"
echo -e " ${YELLOW}help${NC} 显示帮助"
echo ""
echo -e "${GREEN}工作流程:${NC}"
echo -e " 1. 定频轮询 query_upload/ 目录"
echo -e " 2. 发现Excel文件后自动处理"
echo -e " 3. 读取query列和科室列"
echo -e " 4. 查询科室ID (ai_departments)"
echo -e " 5. 随机获取作者信息 (ai_authors)"
echo -e " 6. 判重后插入 baidu_keyword 表"
echo -e " 7. 处理完成后删除源文件"
echo ""
echo -e "${GREEN}示例:${NC}"
echo -e " $0 start # 启动定频轮询服务"
echo -e " $0 status # 查看运行状态"
echo -e " $0 logs-follow # 实时查看日志"
echo -e " $0 restart # 重启服务"
}
# 主逻辑
case "$1" in
start)
start
;;
stop)
stop
;;
force-stop)
force-stop
;;
restart)
restart
;;
force-restart)
force-restart
;;
status)
status
;;
logs)
logs $2
;;
logs-follow)
logs-follow
;;
help|--help|-h)
show_help
;;
*)
echo -e "${RED}错误:未知命令 '$1'${NC}"
echo ""
show_help
exit 1
;;
esac
exit 0