This commit is contained in:
sjk
2026-01-06 19:36:42 +08:00
parent 15b579d64a
commit 19942144fb
261 changed files with 24034 additions and 5477 deletions

View File

@@ -0,0 +1,257 @@
package tools
import (
"ai_xhs/service"
"encoding/json"
"io/ioutil"
"log"
"os"
"os/signal"
"path/filepath"
"sync"
"syscall"
"time"
)
// ServiceMonitor 服务监控器
type ServiceMonitor struct {
alertPhone string
serviceName string
smsService *service.SmsService
isRunning bool
mutex sync.Mutex
shutdownChan chan os.Signal
alertSent bool // 标记是否已发送通知,避免重复发送
heartbeatFile string // 心跳文件路径
lastHeartbeat time.Time // 最后心跳时间
}
// HeartbeatData 心跳数据
type HeartbeatData struct {
ServiceName string `json:"service_name"`
LastHeartbeat time.Time `json:"last_heartbeat"`
PID int `json:"pid"`
StartTime time.Time `json:"start_time"`
GracefulShut bool `json:"graceful_shutdown"` // 是否为正常关闭
}
var (
monitorInstance *ServiceMonitor
monitorOnce sync.Once
)
// GetServiceMonitor 获取服务监控器单例
func GetServiceMonitor(alertPhone string, serviceName string) *ServiceMonitor {
monitorOnce.Do(func() {
heartbeatFile := filepath.Join(os.TempDir(), "ai_xhs_service_heartbeat.json")
monitorInstance = &ServiceMonitor{
alertPhone: alertPhone,
serviceName: serviceName,
smsService: service.GetSmsService(),
isRunning: true,
shutdownChan: make(chan os.Signal, 1),
alertSent: false,
heartbeatFile: heartbeatFile,
lastHeartbeat: time.Now(),
}
})
return monitorInstance
}
// StartMonitoring 启动服务监控
// 监听系统信号,在服务异常退出时发送短信通知
func (m *ServiceMonitor) StartMonitoring() {
// 检查上次启动是否异常关闭
m.checkLastShutdown()
// 启动心跳任务
m.startHeartbeat()
// 监听退出信号
signal.Notify(m.shutdownChan,
os.Interrupt, // Ctrl+C
syscall.SIGTERM, // kill命令
syscall.SIGQUIT, // Ctrl+\
syscall.SIGABRT, // abort
)
go func() {
sig := <-m.shutdownChan
log.Printf("[服务监控] 捕获到退出信号: %v", sig)
m.mutex.Lock()
m.isRunning = false
m.mutex.Unlock()
// 标记为正常关闭
m.markGracefulShutdown()
// 发送宕机通知
if !m.alertSent {
m.sendAlert("服务接收到退出信号")
}
// 给短信发送一些时间
time.Sleep(2 * time.Second)
// 退出程序
os.Exit(0)
}()
log.Printf("[服务监控] 服务监控已启动,监控电话: %s", m.alertPhone)
log.Printf("[服务监控] 心跳文件: %s", m.heartbeatFile)
}
// SetAlertSent 设置通知已发送标记(供外部调用,避免重复发送)
func (m *ServiceMonitor) SetAlertSent() {
m.mutex.Lock()
m.alertSent = true
m.mutex.Unlock()
}
// SendManualAlert 手动发送服务宕机通知
func (m *ServiceMonitor) SendManualAlert(reason string) error {
return m.sendAlert(reason)
}
// sendAlert 发送宕机通知
func (m *ServiceMonitor) sendAlert(reason string) error {
if m.alertSent {
log.Printf("[服务监控] 宕机通知已发送,跳过重复发送")
return nil
}
log.Printf("[服务监控] 服务宕机,原因: %s", reason)
err := m.smsService.SendServiceDownAlert(m.alertPhone, m.serviceName)
if err != nil {
log.Printf("[服务监控] 发送宕机通知失败: %v", err)
return err
}
m.alertSent = true
log.Printf("[服务监控] 宕机通知已发送到 %s", m.alertPhone)
return nil
}
// IsRunning 检查服务是否运行中
func (m *ServiceMonitor) IsRunning() bool {
m.mutex.Lock()
defer m.mutex.Unlock()
return m.isRunning
}
// Shutdown 优雅关闭
func (m *ServiceMonitor) Shutdown() {
if m.shutdownChan != nil {
m.shutdownChan <- syscall.SIGTERM
}
}
// startHeartbeat 启动心跳任务每30秒更新一次
func (m *ServiceMonitor) startHeartbeat() {
// 立即写入一次
m.updateHeartbeat()
// 启动定时任务
ticker := time.NewTicker(30 * time.Second)
go func() {
for range ticker.C {
if !m.IsRunning() {
break
}
m.updateHeartbeat()
}
}()
log.Printf("[服务监控] 心跳任务已启动每30秒更新一次")
}
// updateHeartbeat 更新心跳文件
func (m *ServiceMonitor) updateHeartbeat() {
m.mutex.Lock()
m.lastHeartbeat = time.Now()
m.mutex.Unlock()
data := HeartbeatData{
ServiceName: m.serviceName,
LastHeartbeat: m.lastHeartbeat,
PID: os.Getpid(),
StartTime: time.Now(), // 在实际应用中应该记录启动时间
GracefulShut: false, // 默认未正常关闭
}
jsonData, err := json.MarshalIndent(data, "", " ")
if err != nil {
log.Printf("[服务监控] 序列化心跳数据失败: %v", err)
return
}
if err := ioutil.WriteFile(m.heartbeatFile, jsonData, 0644); err != nil {
log.Printf("[服务监控] 写入心跳文件失败: %v", err)
}
}
// markGracefulShutdown 标记为正常关闭
func (m *ServiceMonitor) markGracefulShutdown() {
data := HeartbeatData{
ServiceName: m.serviceName,
LastHeartbeat: time.Now(),
PID: os.Getpid(),
StartTime: m.lastHeartbeat,
GracefulShut: true, // 标记为正常关闭
}
jsonData, err := json.MarshalIndent(data, "", " ")
if err != nil {
log.Printf("[服务监控] 序列化关闭数据失败: %v", err)
return
}
if err := ioutil.WriteFile(m.heartbeatFile, jsonData, 0644); err != nil {
log.Printf("[服务监控] 写入关闭标记失败: %v", err)
}
log.Printf("[服务监控] 已标记为正常关闭")
}
// checkLastShutdown 检查上次关闭是否异常
func (m *ServiceMonitor) checkLastShutdown() {
// 读取心跳文件
if _, err := os.Stat(m.heartbeatFile); os.IsNotExist(err) {
log.Printf("[服务监控] 未找到历史心跳文件,可能是首次启动")
return
}
fileData, err := ioutil.ReadFile(m.heartbeatFile)
if err != nil {
log.Printf("[服务监控] 读取心跳文件失败: %v", err)
return
}
var lastData HeartbeatData
if err := json.Unmarshal(fileData, &lastData); err != nil {
log.Printf("[服务监控] 解析心跳数据失败: %v", err)
return
}
log.Printf("[服务监控] 上次心跳: %v, PID: %d, 正常关闭: %v",
lastData.LastHeartbeat.Format("2006-01-02 15:04:05"),
lastData.PID,
lastData.GracefulShut)
// 如果上次不是正常关闭,发送通知
if !lastData.GracefulShut {
timeSinceLastHeartbeat := time.Since(lastData.LastHeartbeat)
// 如果距离上次心跳超过2分钟认为是异常关闭
if timeSinceLastHeartbeat > 2*time.Minute {
log.Printf("[服务监控] 检测到上次服务异常关闭(%v前发送通知", timeSinceLastHeartbeat)
err := m.smsService.SendServiceDownAlert(m.alertPhone, m.serviceName)
if err != nil {
log.Printf("[服务监控] 发送异常关闭通知失败: %v", err)
} else {
log.Printf("[服务监控] 已发送异常关闭通知")
}
} else {
log.Printf("[服务监控] 距离上次心跳仅%v可能是快速重启不发送通知", timeSinceLastHeartbeat)
}
}
}