diff --git a/README.md b/README.md index ff79c2a..3b90a3a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ - 支持 pHash 感知哈希预筛选 - 异步批量下载和处理图片 - 自动标记重复图片并记录相似度分数 -- 守护模式运行,无数据时等待 10 秒后继续检查 +- 守护模式运行,无数据时等待 2 秒后继续检查 ## 环境要求 @@ -76,16 +76,10 @@ python stats_similarity.py ## 服务器部署 ```bash -# 启动所有服务 +# 启动服务 ./start_similarity.sh start -# 只启动主处理脚本 -./start_similarity.sh start-check - -# 只启动重算脚本 -./start_similarity.sh start-recalc - -# 停止所有服务 +# 停止服务 ./start_similarity.sh stop # 强制停止 diff --git a/image_similarity_check.py b/image_similarity_check.py index 20f9d71..9ce7d1a 100644 --- a/image_similarity_check.py +++ b/image_similarity_check.py @@ -77,7 +77,8 @@ class ImageSimilarityChecker: password=self.config.get('database', 'password'), database=self.config.get('database', 'database'), charset=self.config.get('database', 'charset'), - cursorclass=pymysql.cursors.DictCursor + cursorclass=pymysql.cursors.DictCursor, + autocommit=True ) self.logger.info("数据库连接成功") @@ -254,7 +255,6 @@ class ImageSimilarityChecker: WHERE id = %s """ cursor.execute(sql, (similar_id, score, image_id)) - self.db_conn.commit() self.logger.info(f"重复: {image_id} -> {similar_id} (分数={score:.4f})") def update_as_unique(self, image_id: int): @@ -268,7 +268,6 @@ class ImageSimilarityChecker: WHERE id = %s """ cursor.execute(sql, (image_id,)) - self.db_conn.commit() self.logger.info(f"不重复: {image_id} -> tag_extension") def update_as_failed(self, image_id: int, reason: str): @@ -282,7 +281,6 @@ class ImageSimilarityChecker: WHERE id = %s """ cursor.execute(sql, (image_id,)) - self.db_conn.commit() def process_batch(self, image_records: List[dict]) -> Tuple[int, int, int]: """处理一批图片,返回 (重复数, 不重复数, 失败数)""" @@ -360,8 +358,8 @@ class ImageSimilarityChecker: images = self.get_draft_images() if not images: - self.logger.info("没有待处理的图片,等待 10 秒后继续检查...") - time.sleep(10) + self.logger.info("没有待处理的图片,等待 2 秒后继续检查...") + time.sleep(2) continue batch_num += 1 diff --git a/image_similarity_recalc.py b/image_similarity_recalc.py index 8d78973..ecf184e 100644 --- a/image_similarity_recalc.py +++ b/image_similarity_recalc.py @@ -65,7 +65,8 @@ class ImageSimilarityRecalc: password=self.config.get('database', 'password'), database=self.config.get('database', 'database'), charset=self.config.get('database', 'charset'), - cursorclass=pymysql.cursors.DictCursor + cursorclass=pymysql.cursors.DictCursor, + autocommit=True ) self.logger.info("数据库连接成功") @@ -165,7 +166,6 @@ class ImageSimilarityRecalc: WHERE id = %s """ cursor.execute(sql, (similar_id, score, image_id)) - self.db_conn.commit() self.logger.info(f"重复: {image_id} -> {similar_id} (分数={score:.4f})") def update_as_unique(self, image_id: int): @@ -179,7 +179,6 @@ class ImageSimilarityRecalc: WHERE id = %s """ cursor.execute(sql, (image_id,)) - self.db_conn.commit() self.logger.info(f"不重复: {image_id} -> tag_extension") def update_as_failed(self, image_id: int, reason: str): @@ -191,7 +190,6 @@ class ImageSimilarityRecalc: WHERE id = %s """ cursor.execute(sql, (image_id,)) - self.db_conn.commit() self.logger.warning(f"处理失败 {image_id}: {reason}") def process_batch(self, image_records: List[dict]) -> Tuple[int, int, int]: @@ -263,8 +261,8 @@ class ImageSimilarityRecalc: images = self.get_recalc_images() if not images: - self.logger.info("没有需要重新计算的图片,等待 10 秒后继续检查...") - time.sleep(10) + self.logger.info("没有需要重新计算的图片,等待 2 秒后继续检查...") + time.sleep(2) continue batch_num += 1 diff --git a/start_similarity.sh b/start_similarity.sh index 95b18f3..4dcb8c2 100644 --- a/start_similarity.sh +++ b/start_similarity.sh @@ -15,12 +15,6 @@ CHECK_PID_FILE="${BASE_DIR}/image_similarity_check.pid" CHECK_LOG_FILE="${BASE_DIR}/image_similarity.log" CHECK_MAX_PROCESSES=1 # 限制最多1个进程 -# image_similarity_recalc 配置 -RECALC_SCRIPT="${BASE_DIR}/image_similarity_recalc.py" -RECALC_PID_FILE="${BASE_DIR}/image_similarity_recalc.pid" -RECALC_LOG_FILE="${BASE_DIR}/image_similarity_recalc.log" -RECALC_MAX_PROCESSES=1 # 限制最多1个进程 - # stats_similarity 配置(仅用于查看状态,不需要常驻进程) STATS_SCRIPT="${BASE_DIR}/stats_similarity.py" @@ -175,12 +169,12 @@ stop_single_all() { # 启动所有服务 start() { - echo -e "${BLUE}========== 启动图片去重审核系统 ==========${NC}" + echo -e "${BLUE}========== 启动图片去重审核系统 ===========${NC}" echo -e "${YELLOW}进程限制:每个服务最多启动1个实例${NC}" echo "" # 检查总进程数 - local total_before=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + local total_before=$(pgrep -f "image_similarity_check" | wc -l) if [ $total_before -gt 0 ]; then echo -e "${YELLOW}当前已有 ${total_before} 个审核进程在运行${NC}" echo -e "${YELLOW}建议先停止现有进程: $0 stop${NC}" @@ -194,17 +188,11 @@ start() { if ! start_single "$CHECK_SCRIPT" "$CHECK_PID_FILE" "$CHECK_LOG_FILE" "image_similarity_check" "$CHECK_MAX_PROCESSES"; then has_errors=1 fi - sleep 3 - - # 启动重算脚本 - if ! start_single "$RECALC_SCRIPT" "$RECALC_PID_FILE" "$RECALC_LOG_FILE" "image_similarity_recalc" "$RECALC_MAX_PROCESSES"; then - has_errors=1 - fi echo "" # 检查启动结果 - local total_after=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + local total_after=$(pgrep -f "image_similarity_check" | wc -l) if [ $has_errors -eq 0 ]; then echo -e "${GREEN}✅ 启动完成,当前运行 ${total_after} 个进程${NC}" else @@ -216,40 +204,27 @@ start() { # 只启动主脚本 start-check() { - echo -e "${BLUE}========== 启动主处理脚本 ==========${NC}" + echo -e "${BLUE}========== 启动主处理脚本 ===========${NC}" start_single "$CHECK_SCRIPT" "$CHECK_PID_FILE" "$CHECK_LOG_FILE" "image_similarity_check" "$CHECK_MAX_PROCESSES" echo -e "${BLUE}====================================${NC}" } -# 只启动重算脚本 -start-recalc() { - echo -e "${BLUE}========== 启动重算脚本 ==========${NC}" - start_single "$RECALC_SCRIPT" "$RECALC_PID_FILE" "$RECALC_LOG_FILE" "image_similarity_recalc" "$RECALC_MAX_PROCESSES" - echo -e "${BLUE}==================================${NC}" -} - # 停止所有服务 stop() { - echo -e "${BLUE}========== 停止图片去重审核系统 ==========${NC}" + echo -e "${BLUE}========== 停止图片去重审核系统 ===========${NC}" local has_errors=0 - local total_before=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + local total_before=$(pgrep -f "image_similarity_check" | wc -l) echo -e "${YELLOW}当前共有 ${total_before} 个审核进程${NC}" - # 停止重算脚本 - if ! stop_single_all "$RECALC_SCRIPT" "image_similarity_recalc" "$RECALC_PID_FILE"; then - has_errors=1 - fi - sleep 2 - # 停止主处理脚本 if ! stop_single_all "$CHECK_SCRIPT" "image_similarity_check" "$CHECK_PID_FILE"; then has_errors=1 fi # 最终检查 - local total_after=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + local total_after=$(pgrep -f "image_similarity_check" | wc -l) echo "" if [ $total_after -eq 0 ]; then @@ -265,11 +240,10 @@ stop() { # 强制停止所有服务 force-stop() { - echo -e "${RED}========== 强制停止所有审核进程 ==========${NC}" + echo -e "${RED}========== 强制停止所有审核进程 ===========${NC}" # 停止所有相关进程 pkill -9 -f "image_similarity_check.py" 2>/dev/null - pkill -9 -f "image_similarity_recalc.py" 2>/dev/null sleep 2 @@ -277,13 +251,13 @@ force-stop() { rm -f "${BASE_DIR}"/*.pid # 检查是否还有残留 - local remaining=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + local remaining=$(pgrep -f "image_similarity_check" | wc -l) if [ $remaining -eq 0 ]; then echo -e "${GREEN}✅ 所有进程已强制停止${NC}" else echo -e "${RED}❌ 仍有 ${remaining} 个进程存活${NC}" echo -e "请手动检查以下进程:" - pgrep -f "image_similarity_check\|image_similarity_recalc" | xargs ps -fp 2>/dev/null + pgrep -f "image_similarity_check" | xargs ps -fp 2>/dev/null fi echo -e "${RED}==========================================${NC}" @@ -330,7 +304,7 @@ status() { echo -e "${BLUE}工作目录: ${BASE_DIR}${NC}" echo "" - local total_procs=$(pgrep -f "image_similarity_check\|image_similarity_recalc" | wc -l) + local total_procs=$(pgrep -f "image_similarity_check" | wc -l) echo -e "${YELLOW}📊 进程概览:${NC}" echo -e " 总进程数: ${total_procs}" @@ -366,7 +340,7 @@ status() { mem_sum=$(echo "$mem_sum + $mem" | bc) fi fi - done < <(pgrep -f "image_similarity_check\|image_similarity_recalc" | xargs ps -o pid,cmd --no-headers 2>/dev/null) + done < <(pgrep -f "image_similarity_check" | xargs ps -o pid,cmd --no-headers 2>/dev/null) # 显示每个脚本的统计 for script in "${!script_count[@]}"; do @@ -393,7 +367,7 @@ status() { echo "" echo -e "${YELLOW}📁 PID文件状态:${NC}" - for pid_file in "$CHECK_PID_FILE" "$RECALC_PID_FILE"; do + for pid_file in "$CHECK_PID_FILE"; do if [ -f "$pid_file" ]; then script_name=$(basename "$pid_file" .pid) pid=$(cat "$pid_file" 2>/dev/null) @@ -421,25 +395,20 @@ status() { # 查看日志 logs() { local lines=${1:-50} - echo -e "${BLUE}========== 查看日志 (最后 ${lines} 行) ==========${NC}" + echo -e "${BLUE}========== 查看日志 (最后 ${lines} 行) ===========${NC}" if [ -f "$CHECK_LOG_FILE" ]; then echo -e "\n${YELLOW}--- image_similarity.log ---${NC}" tail -$lines "$CHECK_LOG_FILE" fi - if [ -f "$RECALC_LOG_FILE" ]; then - echo -e "\n${YELLOW}--- image_similarity_recalc.log ---${NC}" - tail -$lines "$RECALC_LOG_FILE" - fi - echo -e "${BLUE}============================================${NC}" } # 实时查看日志 logs-follow() { - echo -e "${BLUE}========== 实时查看日志 (Ctrl+C 退出) ==========${NC}" - tail -f "$CHECK_LOG_FILE" "$RECALC_LOG_FILE" + echo -e "${BLUE}========== 实时查看日志 (Ctrl+C 退出) ===========${NC}" + tail -f "$CHECK_LOG_FILE" } # 显示帮助 @@ -449,17 +418,14 @@ show_help() { echo -e "${YELLOW}当前配置:${NC}" echo -e " 工作目录: ${BASE_DIR}" echo -e " image_similarity_check: 最多 ${CHECK_MAX_PROCESSES} 个进程" - echo -e " image_similarity_recalc: 最多 ${RECALC_MAX_PROCESSES} 个进程" echo "" echo -e "${BLUE}用法: $0 {命令}${NC}" echo "" echo -e "${GREEN}服务管理:${NC}" - echo -e " ${YELLOW}start${NC} 启动所有服务" - echo -e " ${YELLOW}start-check${NC} 只启动主处理脚本" - echo -e " ${YELLOW}start-recalc${NC} 只启动重算脚本" - echo -e " ${YELLOW}stop${NC} 停止所有服务" - echo -e " ${YELLOW}force-stop${NC} 强制停止所有进程" - echo -e " ${YELLOW}restart${NC} 重启所有服务" + echo -e " ${YELLOW}start${NC} 启动服务" + echo -e " ${YELLOW}stop${NC} 停止服务" + echo -e " ${YELLOW}force-stop${NC} 强制停止进程" + echo -e " ${YELLOW}restart${NC} 重启服务" echo -e " ${YELLOW}force-restart${NC} 强制重启" echo "" echo -e "${GREEN}状态查看:${NC}" @@ -472,10 +438,8 @@ show_help() { echo -e " ${YELLOW}help${NC} 显示帮助" echo "" echo -e "${RED}注意:${NC}" - echo -e " - 脚本会限制每个服务最多启动1个实例" - echo -e " - 两个脚本可以同时运行,处理不同状态的数据" - echo -e " - image_similarity_check: 处理 status='draft', similarity='draft'" - echo -e " - image_similarity_recalc: 处理 status='draft', similarity='recalc'" + echo -e " - 脚本会限制最多启动1个实例" + echo -e " - 处理 status='draft', similarity='draft' 的数据" } # 主逻辑 @@ -486,9 +450,6 @@ case "$1" in start-check) start-check ;; - start-recalc) - start-recalc - ;; stop) stop ;;