Files
yixiaogao/backend/api/server.go
2025-11-27 18:40:08 +08:00

544 lines
15 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
// Response 统一响应结构
type Response struct {
Success bool `json:"success"`
Message string `json:"message"`
Data interface{} `json:"data,omitempty"`
}
// 任务状态
type TaskStatus struct {
Running bool `json:"running"`
Progress int `json:"progress"`
Message string `json:"message"`
Error string `json:"error,omitempty"`
}
var currentTask = &TaskStatus{Running: false}
func main() {
// 启用CORS
http.HandleFunc("/", corsMiddleware(handleRoot))
http.HandleFunc("/api/homepage/extract", corsMiddleware(extractHomepageHandler))
http.HandleFunc("/api/article/download", corsMiddleware(downloadArticleHandler))
http.HandleFunc("/api/article/list", corsMiddleware(getArticleListHandler))
http.HandleFunc("/api/article/batch", corsMiddleware(batchDownloadHandler))
http.HandleFunc("/api/data/list", corsMiddleware(getDataListHandler))
http.HandleFunc("/api/task/status", corsMiddleware(getTaskStatusHandler))
http.HandleFunc("/api/download/", corsMiddleware(downloadFileHandler))
port := ":8080"
fmt.Println("===============================================")
fmt.Println(" 🚀 微信公众号文章爬虫 API 服务器")
fmt.Println("===============================================")
fmt.Printf("🌐 服务地址: http://localhost%s\n", port)
fmt.Printf("⏰ 启动时间: %s\n", time.Now().Format("2006-01-02 15:04:05"))
fmt.Println("===============================================\n")
if err := http.ListenAndServe(port, nil); err != nil {
log.Fatal("服务器启动失败:", err)
}
}
// CORS中间件
func corsMiddleware(next http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type")
if r.Method == "OPTIONS" {
w.WriteHeader(http.StatusOK)
return
}
next(w, r)
}
}
// 首页处理
func handleRoot(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
html := `
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>微信公众号文章爬虫 API</title>
<style>
body { font-family: Arial, sans-serif; max-width: 800px; margin: 50px auto; padding: 20px; }
h1 { color: #333; }
.endpoint { background: #f5f5f5; padding: 10px; margin: 10px 0; border-radius: 5px; }
.method { color: #4CAF50; font-weight: bold; }
</style>
</head>
<body>
<h1>🚀 微信公众号文章爬虫 API 服务器</h1>
<p>当前时间: ` + time.Now().Format("2006-01-02 15:04:05") + `</p>
<h2>可用接口:</h2>
<div class="endpoint">
<span class="method">POST</span> /api/homepage/extract - 提取公众号主页
</div>
<div class="endpoint">
<span class="method">POST</span> /api/article/download - 下载单篇文章
</div>
<div class="endpoint">
<span class="method">POST</span> /api/article/list - 获取文章列表
</div>
<div class="endpoint">
<span class="method">POST</span> /api/article/batch - 批量下载文章
</div>
<div class="endpoint">
<span class="method">GET</span> /api/data/list - 获取数据列表
</div>
<div class="endpoint">
<span class="method">GET</span> /api/task/status - 获取任务状态
</div>
</body>
</html>
`
w.Write([]byte(html))
}
// 提取公众号主页
func extractHomepageHandler(w http.ResponseWriter, r *http.Request) {
var req struct {
URL string `json:"url"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, Response{Success: false, Message: "请求参数错误"})
return
}
// 执行命令(使用绝对路径)
exePath := filepath.Join("..", "wechat-crawler.exe")
absPath, _ := filepath.Abs(exePath)
log.Printf("尝试执行: %s", absPath)
cmd := exec.Command(absPath, req.URL)
workDir, _ := filepath.Abs("..")
cmd.Dir = workDir
output, err := cmd.CombinedOutput()
if err != nil {
log.Printf("执行失败: %v, 输出: %s", err, string(output))
writeJSON(w, Response{Success: false, Message: "执行失败: " + string(output)})
return
}
// 从输出中提取公众号主页链接
outputStr := string(output)
lines := strings.Split(outputStr, "\n")
var homepageURL string
for _, line := range lines {
if strings.Contains(line, "公众号主页链接") || strings.Contains(line, "https://mp.weixin.qq.com/mp/profile_ext") {
// 提取URL
if idx := strings.Index(line, "https://"); idx != -1 {
homepageURL = strings.TrimSpace(line[idx:])
break
}
}
}
if homepageURL == "" {
writeJSON(w, Response{Success: false, Message: "未能提取到主页链接"})
return
}
writeJSON(w, Response{
Success: true,
Message: "提取成功",
Data: map[string]string{
"homepage": homepageURL,
"output": outputStr,
},
})
}
// 下载单篇文章(这里需要实现具体逻辑)
func downloadArticleHandler(w http.ResponseWriter, r *http.Request) {
var req struct {
URL string `json:"url"`
SaveImage bool `json:"save_image"`
SaveContent bool `json:"save_content"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, Response{Success: false, Message: "请求参数错误"})
return
}
currentTask.Running = true
currentTask.Progress = 0
currentTask.Message = "正在下载文章..."
// 注意:这里需要实际调用爬虫的下载功能
// 由于当前后端程序没有单独的下载单篇文章的命令行接口
// 需要后续实现或使用其他方式
writeJSON(w, Response{
Success: true,
Message: "下载任务已启动",
Data: map[string]interface{}{
"url": req.URL,
},
})
}
// 获取文章列表
func getArticleListHandler(w http.ResponseWriter, r *http.Request) {
var req struct {
AccessToken string `json:"access_token"`
Pages int `json:"pages"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, Response{Success: false, Message: "请求参数错误"})
return
}
currentTask.Running = true
currentTask.Progress = 0
currentTask.Message = "正在获取文章列表..."
// 同步执行爬虫程序功能3
exePath := filepath.Join("..", "wechat-crawler.exe")
absPath, _ := filepath.Abs(exePath)
workDir, _ := filepath.Abs("..")
log.Printf("启动功能3: %s, 工作目录: %s", absPath, workDir)
cmd := exec.Command(absPath)
cmd.Dir = workDir
// 创建输入管道
stdin, err := cmd.StdinPipe()
if err != nil {
log.Printf("创建输入管道失败: %v", err)
currentTask.Running = false
writeJSON(w, Response{Success: false, Message: "创建输入管道失败: " + err.Error()})
return
}
// 启动命令
if err := cmd.Start(); err != nil {
log.Printf("启动命令失败: %v", err)
currentTask.Running = false
writeJSON(w, Response{Success: false, Message: "启动命令失败: " + err.Error()})
return
}
// 发送选项"3"功能3通过access_token获取文章列表
fmt.Fprintln(stdin, "3")
fmt.Fprintln(stdin, req.AccessToken)
if req.Pages > 0 {
fmt.Fprintf(stdin, "%d\n", req.Pages)
} else {
fmt.Fprintln(stdin, "0")
}
stdin.Close()
// 等待命令完成
if err := cmd.Wait(); err != nil {
log.Printf("命令执行失败: %v", err)
currentTask.Running = false
writeJSON(w, Response{Success: false, Message: "命令执行失败: " + err.Error()})
return
}
currentTask.Running = false
currentTask.Progress = 100
currentTask.Message = "文章列表获取完成"
// 查找生成的文件并返回下载链接
dataDir := "../data"
entries, err := os.ReadDir(dataDir)
if err != nil {
writeJSON(w, Response{Success: false, Message: "读取数据目录失败: " + err.Error()})
return
}
// 查找最新创建的公众号目录
var latestDir string
var latestTime time.Time
for _, entry := range entries {
if entry.IsDir() && entry.Name() != "." && entry.Name() != ".." {
info, _ := entry.Info()
if info.ModTime().After(latestTime) {
latestTime = info.ModTime()
latestDir = entry.Name()
}
}
}
if latestDir == "" {
writeJSON(w, Response{Success: false, Message: "未找到生成的数据目录"})
return
}
log.Printf("找到最新目录: %s", latestDir)
// 查找文章列表文件(优先查找直连链接文件)
accountPath := filepath.Join(dataDir, latestDir)
files, err := os.ReadDir(accountPath)
if err != nil {
writeJSON(w, Response{Success: false, Message: "读取公众号目录失败: " + err.Error()})
return
}
var excelFile string
// 优先查找直连链接文件(.xlsx或.txt
for _, file := range files {
if !file.IsDir() && strings.Contains(file.Name(), "直连链接") {
if strings.HasSuffix(file.Name(), ".xlsx") || strings.HasSuffix(file.Name(), ".txt") {
excelFile = file.Name()
log.Printf("找到直连链接文件: %s", excelFile)
break
}
}
}
// 如果没有直连链接文件,查找原始链接文件
if excelFile == "" {
for _, file := range files {
if !file.IsDir() && strings.Contains(file.Name(), "原始链接") {
if strings.HasSuffix(file.Name(), ".xlsx") || strings.HasSuffix(file.Name(), ".txt") {
excelFile = file.Name()
log.Printf("找到原始链接文件: %s", excelFile)
break
}
}
}
}
// 如果还是没有,查找任何文章列表文件
if excelFile == "" {
for _, file := range files {
if !file.IsDir() && strings.Contains(file.Name(), "文章列表") {
if strings.HasSuffix(file.Name(), ".xlsx") || strings.HasSuffix(file.Name(), ".txt") {
excelFile = file.Name()
log.Printf("找到文章列表文件: %s", excelFile)
break
}
}
}
}
if excelFile == "" {
// 列出所有文件用于调试
var fileList []string
for _, file := range files {
fileList = append(fileList, file.Name())
}
log.Printf("目录 %s 中的文件: %v", latestDir, fileList)
writeJSON(w, Response{Success: false, Message: "未找到Excel文件目录中的文件: " + strings.Join(fileList, ", ")})
return
}
writeJSON(w, Response{
Success: true,
Message: "文章列表获取成功",
Data: map[string]interface{}{
"account": latestDir,
"filename": excelFile,
"download": fmt.Sprintf("/download/%s/%s", latestDir, excelFile),
},
})
}
// 批量下载文章
func batchDownloadHandler(w http.ResponseWriter, r *http.Request) {
var req struct {
OfficialAccount string `json:"official_account"`
SaveImage bool `json:"save_image"`
SaveContent bool `json:"save_content"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, Response{Success: false, Message: "请求参数错误"})
return
}
currentTask.Running = true
currentTask.Progress = 0
currentTask.Message = "正在批量下载文章..."
// 同步执行爬虫程序功能5
exePath := filepath.Join("..", "wechat-crawler.exe")
absPath, _ := filepath.Abs(exePath)
workDir, _ := filepath.Abs("..")
log.Printf("启动功能5: %s, 工作目录: %s", absPath, workDir)
cmd := exec.Command(absPath)
cmd.Dir = workDir
// 创建输入管道
stdin, err := cmd.StdinPipe()
if err != nil {
log.Printf("创建输入管道失败: %v", err)
currentTask.Running = false
writeJSON(w, Response{Success: false, Message: "创建输入管道失败: " + err.Error()})
return
}
// 启动命令
if err := cmd.Start(); err != nil {
log.Printf("启动命令失败: %v", err)
currentTask.Running = false
writeJSON(w, Response{Success: false, Message: "启动命令失败: " + err.Error()})
return
}
// 发送选项"5"功能5批量下载
fmt.Fprintln(stdin, "5")
fmt.Fprintln(stdin, req.OfficialAccount)
// 是否保存图片
if req.SaveImage {
fmt.Fprintln(stdin, "y")
} else {
fmt.Fprintln(stdin, "n")
}
stdin.Close()
// 等待命令完成
if err := cmd.Wait(); err != nil {
log.Printf("命令执行失败: %v", err)
currentTask.Running = false
writeJSON(w, Response{Success: false, Message: "命令执行失败: " + err.Error()})
return
}
currentTask.Running = false
currentTask.Progress = 100
currentTask.Message = "批量下载完成"
// 统计下载的文章数量
accountPath := filepath.Join("../data", req.OfficialAccount, "文章详细")
var articleCount int
if entries, err := os.ReadDir(accountPath); err == nil {
articleCount = len(entries)
}
writeJSON(w, Response{
Success: true,
Message: fmt.Sprintf("批量下载完成,共下载 %d 篇文章", articleCount),
Data: map[string]interface{}{
"account": req.OfficialAccount,
"articleCount": articleCount,
"path": accountPath,
},
})
}
// 获取数据列表
func getDataListHandler(w http.ResponseWriter, r *http.Request) {
dataDir := "../data"
var accounts []map[string]interface{}
entries, err := os.ReadDir(dataDir)
if err != nil {
// 如果目录不存在,返回空列表而不是错误
writeJSON(w, Response{
Success: true,
Data: accounts,
})
return
}
for _, entry := range entries {
if entry.IsDir() {
accountPath := filepath.Join(dataDir, entry.Name())
// 统计文章数量
detailPath := filepath.Join(accountPath, "文章详细")
var articleCount int
if detailEntries, err := os.ReadDir(detailPath); err == nil {
articleCount = len(detailEntries)
}
// 获取最后更新时间
info, _ := entry.Info()
lastUpdate := info.ModTime().Format("2006-01-02")
accounts = append(accounts, map[string]interface{}{
"name": entry.Name(),
"articleCount": articleCount,
"path": accountPath,
"lastUpdate": lastUpdate,
})
}
}
writeJSON(w, Response{
Success: true,
Data: accounts,
})
}
// 获取任务状态
func getTaskStatusHandler(w http.ResponseWriter, r *http.Request) {
writeJSON(w, Response{
Success: true,
Data: currentTask,
})
}
// 下载文件处理
func downloadFileHandler(w http.ResponseWriter, r *http.Request) {
// 从 URL 中提取路径 /api/download/公众号名称/文件名
path := strings.TrimPrefix(r.URL.Path, "/api/download/")
parts := strings.SplitN(path, "/", 2)
if len(parts) != 2 {
http.Error(w, "路径错误", http.StatusBadRequest)
return
}
accountName := parts[0]
filename := parts[1]
// 构建完整文件路径
filePath := filepath.Join("..", "data", accountName, filename)
absPath, _ := filepath.Abs(filePath)
// 检查文件是否存在
if _, err := os.Stat(absPath); os.IsNotExist(err) {
http.Error(w, "文件不存在", http.StatusNotFound)
return
}
log.Printf("下载文件: %s", absPath)
// 设置响应头
contentType := "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
if strings.HasSuffix(filename, ".txt") {
contentType = "text/plain; charset=utf-8"
}
w.Header().Set("Content-Type", contentType)
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename*=UTF-8''%s", filename))
// 发送文件
http.ServeFile(w, r, absPath)
}
// 写入JSON响应
func writeJSON(w http.ResponseWriter, data interface{}) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
json.NewEncoder(w).Encode(data)
}