125 lines
2.8 KiB
Bash
125 lines
2.8 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
|
||
|
|
# 服务监控脚本 - 用于外部监控服务状态
|
||
|
|
# 可以配合cron定时任务使用
|
||
|
|
|
||
|
|
# 配置
|
||
|
|
SERVICE_NAME="AI小红书服务"
|
||
|
|
ALERT_PHONE="15707023967"
|
||
|
|
HEARTBEAT_FILE="/tmp/ai_xhs_service_heartbeat.json"
|
||
|
|
CHECK_INTERVAL=120 # 检查间隔(秒),心跳超过这个时间未更新则认为服务宕机
|
||
|
|
MONITOR_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||
|
|
|
||
|
|
# 颜色输出
|
||
|
|
RED='\033[0;31m'
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
NC='\033[0m' # No Color
|
||
|
|
|
||
|
|
log_info() {
|
||
|
|
echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
log_warn() {
|
||
|
|
echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
log_error() {
|
||
|
|
echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
# 检查心跳文件是否存在
|
||
|
|
check_heartbeat_file() {
|
||
|
|
if [ ! -f "$HEARTBEAT_FILE" ]; then
|
||
|
|
log_error "心跳文件不存在: $HEARTBEAT_FILE"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
return 0
|
||
|
|
}
|
||
|
|
|
||
|
|
# 获取最后心跳时间
|
||
|
|
get_last_heartbeat() {
|
||
|
|
if ! check_heartbeat_file; then
|
||
|
|
echo "0"
|
||
|
|
return
|
||
|
|
fi
|
||
|
|
|
||
|
|
# 从JSON文件中提取last_heartbeat时间
|
||
|
|
last_heartbeat=$(grep -o '"last_heartbeat":"[^"]*"' "$HEARTBEAT_FILE" | cut -d'"' -f4)
|
||
|
|
|
||
|
|
if [ -z "$last_heartbeat" ]; then
|
||
|
|
echo "0"
|
||
|
|
return
|
||
|
|
fi
|
||
|
|
|
||
|
|
# 转换为Unix时间戳
|
||
|
|
heartbeat_timestamp=$(date -d "$last_heartbeat" +%s 2>/dev/null)
|
||
|
|
if [ $? -ne 0 ]; then
|
||
|
|
echo "0"
|
||
|
|
return
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo "$heartbeat_timestamp"
|
||
|
|
}
|
||
|
|
|
||
|
|
# 检查服务是否运行
|
||
|
|
check_service_status() {
|
||
|
|
log_info "开始检查服务状态..."
|
||
|
|
|
||
|
|
last_heartbeat_ts=$(get_last_heartbeat)
|
||
|
|
|
||
|
|
if [ "$last_heartbeat_ts" = "0" ]; then
|
||
|
|
log_error "无法获取心跳信息"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
current_ts=$(date +%s)
|
||
|
|
time_diff=$((current_ts - last_heartbeat_ts))
|
||
|
|
|
||
|
|
log_info "距离上次心跳: ${time_diff}秒"
|
||
|
|
|
||
|
|
if [ $time_diff -gt $CHECK_INTERVAL ]; then
|
||
|
|
log_error "服务可能已宕机(超过${CHECK_INTERVAL}秒未更新心跳)"
|
||
|
|
return 1
|
||
|
|
else
|
||
|
|
log_info "服务运行正常"
|
||
|
|
return 0
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# 发送宕机通知
|
||
|
|
send_alert() {
|
||
|
|
log_warn "尝试发送宕机通知..."
|
||
|
|
|
||
|
|
# 调用Go程序发送通知
|
||
|
|
cd "$MONITOR_DIR"
|
||
|
|
go run test_service_alert.go
|
||
|
|
|
||
|
|
if [ $? -eq 0 ]; then
|
||
|
|
log_info "宕机通知发送成功"
|
||
|
|
return 0
|
||
|
|
else
|
||
|
|
log_error "宕机通知发送失败"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# 主函数
|
||
|
|
main() {
|
||
|
|
echo "========================================"
|
||
|
|
echo "服务监控检查 - $(date '+%Y-%m-%d %H:%M:%S')"
|
||
|
|
echo "========================================"
|
||
|
|
|
||
|
|
if ! check_service_status; then
|
||
|
|
log_error "检测到服务异常"
|
||
|
|
send_alert
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_info "服务状态正常"
|
||
|
|
exit 0
|
||
|
|
}
|
||
|
|
|
||
|
|
# 运行主函数
|
||
|
|
main
|