From 40b861ae3deec574fddd8bc4eb75d38c8d32717a Mon Sep 17 00:00:00 2001 From: liangguodong Date: Thu, 5 Feb 2026 21:32:28 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AE=88=E6=8A=A4=E6=A8=A1=E5=BC=8F?= =?UTF-8?q?=EF=BC=8C=E6=97=A0=E6=95=B0=E6=8D=AE=E6=97=B6=E7=AD=89=E5=BE=85?= =?UTF-8?q?10=E7=A7=92=E7=BB=A7=E7=BB=AD=E6=A3=80=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 36 +++ image_similarity_check.py | 5 +- image_similarity_recalc.py | 5 +- start_similarity.sh | 527 +++++++++++++++++++++++++++++++++++++ 4 files changed, 569 insertions(+), 4 deletions(-) create mode 100644 start_similarity.sh diff --git a/README.md b/README.md index 44cbae3..ff79c2a 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ - 支持 pHash 感知哈希预筛选 - 异步批量下载和处理图片 - 自动标记重复图片并记录相似度分数 +- 守护模式运行,无数据时等待 10 秒后继续检查 ## 环境要求 @@ -72,12 +73,47 @@ python image_similarity_recalc.py python stats_similarity.py ``` +## 服务器部署 + +```bash +# 启动所有服务 +./start_similarity.sh start + +# 只启动主处理脚本 +./start_similarity.sh start-check + +# 只启动重算脚本 +./start_similarity.sh start-recalc + +# 停止所有服务 +./start_similarity.sh stop + +# 强制停止 +./start_similarity.sh force-stop + +# 重启 +./start_similarity.sh restart + +# 查看进程状态 +./start_similarity.sh status + +# 查看统计报告 +./start_similarity.sh stats + +# 查看日志 +./start_similarity.sh logs + +# 实时查看日志 +./start_similarity.sh logs-follow +``` + ## 项目结构 ``` ├── image_similarity_check.py # 主程序:处理新图片 ├── image_similarity_recalc.py # 重算程序:处理失败的图片 ├── stats_similarity.py # 统计脚本:查看处理结果 +├── start_similarity.sh # 部署脚本:服务启停管理 ├── query_status.py # 查询处理状态 ├── reset_data.py # 重置数据 ├── reset_vector.py # 重置向量库 diff --git a/image_similarity_check.py b/image_similarity_check.py index f2a2b1a..20f9d71 100644 --- a/image_similarity_check.py +++ b/image_similarity_check.py @@ -360,8 +360,9 @@ class ImageSimilarityChecker: images = self.get_draft_images() if not images: - self.logger.info("没有待处理的图片") - break + self.logger.info("没有待处理的图片,等待 10 秒后继续检查...") + time.sleep(10) + continue batch_num += 1 self.logger.info(f"\n--- 批次 {batch_num}: {len(images)} 张 ---") diff --git a/image_similarity_recalc.py b/image_similarity_recalc.py index 92cd03f..8d78973 100644 --- a/image_similarity_recalc.py +++ b/image_similarity_recalc.py @@ -263,8 +263,9 @@ class ImageSimilarityRecalc: images = self.get_recalc_images() if not images: - self.logger.info("没有需要重新计算的图片") - break + self.logger.info("没有需要重新计算的图片,等待 10 秒后继续检查...") + time.sleep(10) + continue batch_num += 1 self.logger.info(f"\n--- 批次 {batch_num}: {len(images)} 张 (recalc) ---") diff --git a/start_similarity.sh b/start_similarity.sh new file mode 100644 index 0000000..95b18f3 --- /dev/null +++ b/start_similarity.sh @@ -0,0 +1,527 @@ +#!/bin/bash + +# ============================================ +# 图片去重审核系统管理脚本 +# 支持进程数量控制 +# ============================================ + +# 配置区 +BASE_DIR="/home/work/ai_Image_review" +VENV_PYTHON="${BASE_DIR}/venv/bin/python" + +# image_similarity_check 配置 +CHECK_SCRIPT="${BASE_DIR}/image_similarity_check.py" +CHECK_PID_FILE="${BASE_DIR}/image_similarity_check.pid" +CHECK_LOG_FILE="${BASE_DIR}/image_similarity.log" +CHECK_MAX_PROCESSES=1 # 限制最多1个进程 + +# image_similarity_recalc 配置 +RECALC_SCRIPT="${BASE_DIR}/image_similarity_recalc.py" +RECALC_PID_FILE="${BASE_DIR}/image_similarity_recalc.pid" +RECALC_LOG_FILE="${BASE_DIR}/image_similarity_recalc.log" +RECALC_MAX_PROCESSES=1 # 限制最多1个进程 + +# stats_similarity 配置(仅用于查看状态,不需要常驻进程) +STATS_SCRIPT="${BASE_DIR}/stats_similarity.py" + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# 获取脚本正在运行的进程数量 +get_process_count() { + local script_name=$(basename "$1") + pgrep -f "$script_name" 2>/dev/null | wc -l +} + +# 获取所有相关进程的PID +get_all_pids() { + local script_name=$(basename "$1") + pgrep -f "$script_name" 2>/dev/null | tr '\n' ' ' +} + +# 启动单个服务(带进程数量控制) +start_single() { + local script=$1 + local pid_file=$2 + local log_file=$3 + local name=$4 + local max_processes=$5 + + local script_name=$(basename "$script") + local current_count=$(get_process_count "$script") + + # 检查是否超过最大进程数 + if [ $current_count -ge $max_processes ]; then + echo -e "${YELLOW}${name} 已达到最大进程数 (${current_count}/${max_processes}),跳过启动${NC}" + # 更新PID文件为第一个找到的PID + local first_pid=$(pgrep -f "$script_name" | head -n1) + if [ -n "$first_pid" ]; then + echo "$first_pid" > "$pid_file" + fi + return 0 + fi + + # 检查PID文件记录的进程 + if [ -f "$pid_file" ]; then + local pid=$(cat "$pid_file" 2>/dev/null) + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null 2>&1; then + echo -e "${YELLOW}${name} 已在运行(PID文件记录),PID: ${pid}${NC}" + return 0 + fi + fi + + echo -e "${BLUE}正在启动 ${name}...${NC}" + + # 确保日志目录存在 + mkdir -p "$(dirname "$log_file")" + + # 备份旧日志 + if [ -f "$log_file" ]; then + local backup_log="${log_file}.$(date +%Y%m%d_%H%M%S).bak" + cp "$log_file" "$backup_log" + echo -e "${BLUE}旧日志已备份到: ${backup_log}${NC}" + fi + + # 启动进程 + nohup "$VENV_PYTHON" "$script" >> "$log_file" 2>&1 & + local new_pid=$! + + # 等待进程真正启动 + sleep 2 + + # 验证进程是否启动成功 + if kill -0 "$new_pid" 2>/dev/null; then + echo "$new_pid" > "$pid_file" + echo -e "${GREEN}${name} 已启动,PID: ${new_pid}${NC}" + echo -e "${BLUE}日志文件: ${log_file}${NC}" + return 0 + else + echo -e "${RED}${name} 启动失败,请检查日志${NC}" + tail -20 "$log_file" + rm -f "$pid_file" + return 1 + fi +} + +# 停止单个服务的所有实例 +stop_single_all() { + local script=$1 + local name=$2 + local pid_file=$3 + + local script_name=$(basename "$script") + local pids=$(get_all_pids "$script") + local count=$(get_process_count "$script") + + if [ $count -eq 0 ]; then + echo -e "${YELLOW}${name} 没有运行中的进程${NC}" + rm -f "$pid_file" + return 0 + fi + + echo -e "${BLUE}正在停止 ${name} (${count}个进程)...${NC}" + echo -e "进程PIDs: ${pids}" + + # 首先尝试优雅终止 + for pid in $pids; do + if kill -0 "$pid" 2>/dev/null; then + echo -e " 发送SIGTERM到 PID $pid..." + kill "$pid" + fi + done + + # 等待优雅退出 + local wait_time=10 + local remaining=$count + for i in $(seq 1 $wait_time); do + remaining=$(get_process_count "$script") + if [ $remaining -eq 0 ]; then + break + fi + echo -n "." + sleep 1 + done + + echo "" # 换行 + + # 检查是否还有进程残留 + remaining=$(get_process_count "$script") + if [ $remaining -gt 0 ]; then + echo -e "${YELLOW}还有 ${remaining} 个进程未退出,强制终止...${NC}" + pids=$(get_all_pids "$script") + for pid in $pids; do + if kill -0 "$pid" 2>/dev/null; then + kill -9 "$pid" 2>/dev/null + fi + done + sleep 2 + fi + + # 验证所有进程都已停止 + remaining=$(get_process_count "$script") + if [ $remaining -eq 0 ]; then + echo -e "${GREEN}${name} 所有进程已停止${NC}" + rm -f "$pid_file" + return 0 + else + echo -e "${RED}警告:仍有 ${remaining} 个进程无法终止${NC}" + return 1 + fi +} + +# 启动所有服务 +start() { + echo -e "${BLUE}========== 启动图片去重审核系统 ==========${NC}" + echo -e "${YELLOW}进程限制:每个服务最多启动1个实例${NC}" + echo "" + + # 检查总进程数 + local total_before=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + if [ $total_before -gt 0 ]; then + echo -e "${YELLOW}当前已有 ${total_before} 个审核进程在运行${NC}" + echo -e "${YELLOW}建议先停止现有进程: $0 stop${NC}" + echo -e "${YELLOW}或强制重启: $0 force-restart${NC}" + return 1 + fi + + local has_errors=0 + + # 启动主处理脚本 + if ! start_single "$CHECK_SCRIPT" "$CHECK_PID_FILE" "$CHECK_LOG_FILE" "image_similarity_check" "$CHECK_MAX_PROCESSES"; then + has_errors=1 + fi + sleep 3 + + # 启动重算脚本 + if ! start_single "$RECALC_SCRIPT" "$RECALC_PID_FILE" "$RECALC_LOG_FILE" "image_similarity_recalc" "$RECALC_MAX_PROCESSES"; then + has_errors=1 + fi + + echo "" + + # 检查启动结果 + local total_after=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + if [ $has_errors -eq 0 ]; then + echo -e "${GREEN}✅ 启动完成,当前运行 ${total_after} 个进程${NC}" + else + echo -e "${YELLOW}⚠️ 启动完成,但有错误,当前运行 ${total_after} 个进程${NC}" + fi + + echo -e "${BLUE}========================================${NC}" +} + +# 只启动主脚本 +start-check() { + echo -e "${BLUE}========== 启动主处理脚本 ==========${NC}" + start_single "$CHECK_SCRIPT" "$CHECK_PID_FILE" "$CHECK_LOG_FILE" "image_similarity_check" "$CHECK_MAX_PROCESSES" + echo -e "${BLUE}====================================${NC}" +} + +# 只启动重算脚本 +start-recalc() { + echo -e "${BLUE}========== 启动重算脚本 ==========${NC}" + start_single "$RECALC_SCRIPT" "$RECALC_PID_FILE" "$RECALC_LOG_FILE" "image_similarity_recalc" "$RECALC_MAX_PROCESSES" + echo -e "${BLUE}==================================${NC}" +} + +# 停止所有服务 +stop() { + echo -e "${BLUE}========== 停止图片去重审核系统 ==========${NC}" + + local has_errors=0 + local total_before=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + + echo -e "${YELLOW}当前共有 ${total_before} 个审核进程${NC}" + + # 停止重算脚本 + if ! stop_single_all "$RECALC_SCRIPT" "image_similarity_recalc" "$RECALC_PID_FILE"; then + has_errors=1 + fi + sleep 2 + + # 停止主处理脚本 + if ! stop_single_all "$CHECK_SCRIPT" "image_similarity_check" "$CHECK_PID_FILE"; then + has_errors=1 + fi + + # 最终检查 + local total_after=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + echo "" + + if [ $total_after -eq 0 ]; then + echo -e "${GREEN}✅ 所有审核进程已停止${NC}" + else + echo -e "${RED}❌ 仍有 ${total_after} 个进程无法停止${NC}" + has_errors=1 + fi + + echo -e "${BLUE}========================================${NC}" + return $has_errors +} + +# 强制停止所有服务 +force-stop() { + echo -e "${RED}========== 强制停止所有审核进程 ==========${NC}" + + # 停止所有相关进程 + pkill -9 -f "image_similarity_check.py" 2>/dev/null + pkill -9 -f "image_similarity_recalc.py" 2>/dev/null + + sleep 2 + + # 清理PID文件 + rm -f "${BASE_DIR}"/*.pid + + # 检查是否还有残留 + local remaining=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + if [ $remaining -eq 0 ]; then + echo -e "${GREEN}✅ 所有进程已强制停止${NC}" + else + echo -e "${RED}❌ 仍有 ${remaining} 个进程存活${NC}" + echo -e "请手动检查以下进程:" + pgrep -f "image_similarity_check\|image_similarity_recalc" | xargs ps -fp 2>/dev/null + fi + + echo -e "${RED}==========================================${NC}" +} + +# 重启服务 +restart() { + echo -e "${BLUE}========== 重启图片去重审核系统 ==========${NC}" + + stop + if [ $? -eq 0 ]; then + sleep 3 + start + else + echo -e "${RED}停止服务失败,请使用 force-restart${NC}" + return 1 + fi + + echo -e "${BLUE}========================================${NC}" +} + +# 强制重启 +force-restart() { + echo -e "${YELLOW}========== 强制重启图片去重审核系统 ==========${NC}" + + force-stop + sleep 3 + start + + echo -e "${YELLOW}============================================${NC}" +} + +# 显示统计信息 +stats() { + echo -e "${BLUE}========== 图片去重统计报告 ==========${NC}" + "$VENV_PYTHON" "$STATS_SCRIPT" + echo -e "${BLUE}======================================${NC}" +} + +# 显示详细状态 +status() { + echo -e "${BLUE}========== 图片去重审核系统状态 ==========${NC}" + echo -e "${BLUE}系统时间: $(date)${NC}" + echo -e "${BLUE}工作目录: ${BASE_DIR}${NC}" + echo "" + + local total_procs=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + echo -e "${YELLOW}📊 进程概览:${NC}" + echo -e " 总进程数: ${total_procs}" + + if [ $total_procs -gt 0 ]; then + # 按脚本类型统计 + declare -A script_count + declare -A script_cpu + declare -A script_mem + + local cpu_sum=0 + local mem_sum=0 + + # 收集所有进程信息 + while read -r line; do + if [ -n "$line" ]; then + pid=$(echo $line | awk '{print $1}') + script_path=$(echo $line | awk '{print $NF}') + script_name=$(basename "$script_path") + + ((script_count[$script_name]++)) + + # 获取CPU和内存使用 + cpu=$(ps -p $pid -o %cpu --no-headers 2>/dev/null | tr -d ' ' | head -1) + mem=$(ps -p $pid -o %mem --no-headers 2>/dev/null | tr -d ' ' | head -1) + + if [[ "$cpu" =~ ^[0-9.]+$ ]]; then + script_cpu[$script_name]=$(echo "${script_cpu[$script_name]:-0} + $cpu" | bc) + cpu_sum=$(echo "$cpu_sum + $cpu" | bc) + fi + + if [[ "$mem" =~ ^[0-9.]+$ ]]; then + script_mem[$script_name]=$(echo "${script_mem[$script_name]:-0} + $mem" | bc) + mem_sum=$(echo "$mem_sum + $mem" | bc) + fi + fi + done < <(pgrep -f "image_similarity_check\|image_similarity_recalc" | xargs ps -o pid,cmd --no-headers 2>/dev/null) + + # 显示每个脚本的统计 + for script in "${!script_count[@]}"; do + count=${script_count[$script]} + cpu=${script_cpu[$script]:-0} + mem=${script_mem[$script]:-0} + + echo -e "\n${YELLOW}${script}:${NC}" + echo -e " 进程数: ${count}" + echo -e " CPU使用: ${cpu}%" + echo -e " 内存使用: ${mem}%" + + # 显示进程PID + pids=$(pgrep -f "$script" | tr '\n' ' ') + echo -e " 进程PIDs: ${pids}" + done + + echo -e "\n${YELLOW}📈 总计:${NC}" + echo -e " 总CPU使用: ${cpu_sum}%" + echo -e " 总内存使用: ${mem_sum}%" + else + echo -e "${RED}没有运行中的审核进程${NC}" + fi + + echo "" + echo -e "${YELLOW}📁 PID文件状态:${NC}" + for pid_file in "$CHECK_PID_FILE" "$RECALC_PID_FILE"; do + if [ -f "$pid_file" ]; then + script_name=$(basename "$pid_file" .pid) + pid=$(cat "$pid_file" 2>/dev/null) + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + echo -e " ${GREEN}✓ ${script_name}.pid: 有效 (PID: $pid)${NC}" + else + echo -e " ${RED}✗ ${script_name}.pid: 无效或进程不存在${NC}" + fi + else + script_name=$(basename "$pid_file" .pid) + echo -e " ${YELLOW}○ ${script_name}.pid: 不存在${NC}" + fi + done + + echo "" + echo -e "${YELLOW}📝 最近日志:${NC}" + if [ -f "$CHECK_LOG_FILE" ]; then + echo -e "${BLUE}--- image_similarity.log (最后5行) ---${NC}" + tail -5 "$CHECK_LOG_FILE" 2>/dev/null + fi + + echo -e "${BLUE}========================================${NC}" +} + +# 查看日志 +logs() { + local lines=${1:-50} + echo -e "${BLUE}========== 查看日志 (最后 ${lines} 行) ==========${NC}" + + if [ -f "$CHECK_LOG_FILE" ]; then + echo -e "\n${YELLOW}--- image_similarity.log ---${NC}" + tail -$lines "$CHECK_LOG_FILE" + fi + + if [ -f "$RECALC_LOG_FILE" ]; then + echo -e "\n${YELLOW}--- image_similarity_recalc.log ---${NC}" + tail -$lines "$RECALC_LOG_FILE" + fi + + echo -e "${BLUE}============================================${NC}" +} + +# 实时查看日志 +logs-follow() { + echo -e "${BLUE}========== 实时查看日志 (Ctrl+C 退出) ==========${NC}" + tail -f "$CHECK_LOG_FILE" "$RECALC_LOG_FILE" +} + +# 显示帮助 +show_help() { + echo -e "${GREEN}图片去重审核系统管理脚本${NC}" + echo "" + echo -e "${YELLOW}当前配置:${NC}" + echo -e " 工作目录: ${BASE_DIR}" + echo -e " image_similarity_check: 最多 ${CHECK_MAX_PROCESSES} 个进程" + echo -e " image_similarity_recalc: 最多 ${RECALC_MAX_PROCESSES} 个进程" + echo "" + echo -e "${BLUE}用法: $0 {命令}${NC}" + echo "" + echo -e "${GREEN}服务管理:${NC}" + echo -e " ${YELLOW}start${NC} 启动所有服务" + echo -e " ${YELLOW}start-check${NC} 只启动主处理脚本" + echo -e " ${YELLOW}start-recalc${NC} 只启动重算脚本" + echo -e " ${YELLOW}stop${NC} 停止所有服务" + echo -e " ${YELLOW}force-stop${NC} 强制停止所有进程" + echo -e " ${YELLOW}restart${NC} 重启所有服务" + echo -e " ${YELLOW}force-restart${NC} 强制重启" + echo "" + echo -e "${GREEN}状态查看:${NC}" + echo -e " ${YELLOW}status${NC} 显示进程状态" + echo -e " ${YELLOW}stats${NC} 显示统计报告" + echo -e " ${YELLOW}logs [N]${NC} 查看最后N行日志(默认50)" + echo -e " ${YELLOW}logs-follow${NC} 实时查看日志" + echo "" + echo -e "${GREEN}其他:${NC}" + echo -e " ${YELLOW}help${NC} 显示帮助" + echo "" + echo -e "${RED}注意:${NC}" + echo -e " - 脚本会限制每个服务最多启动1个实例" + echo -e " - 两个脚本可以同时运行,处理不同状态的数据" + echo -e " - image_similarity_check: 处理 status='draft', similarity='draft'" + echo -e " - image_similarity_recalc: 处理 status='draft', similarity='recalc'" +} + +# 主逻辑 +case "$1" in + start) + start + ;; + start-check) + start-check + ;; + start-recalc) + start-recalc + ;; + stop) + stop + ;; + force-stop) + force-stop + ;; + restart) + restart + ;; + force-restart) + force-restart + ;; + status) + status + ;; + stats) + stats + ;; + logs) + logs $2 + ;; + logs-follow) + logs-follow + ;; + help|--help|-h) + show_help + ;; + *) + echo -e "${RED}错误:未知命令 '$1'${NC}" + echo "" + show_help + exit 1 + ;; +esac + +exit 0