1226 lines
34 KiB
Go
1226 lines
34 KiB
Go
package main
|
||
|
||
import (
|
||
"crypto/rand"
|
||
"encoding/hex"
|
||
"encoding/json"
|
||
"fmt"
|
||
"log"
|
||
"net/http"
|
||
"net/url"
|
||
"os"
|
||
"os/exec"
|
||
"path/filepath"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/wechat-crawler/pkg/wechat"
|
||
)
|
||
|
||
// Response 统一响应结构
|
||
type Response struct {
|
||
Success bool `json:"success"`
|
||
Message string `json:"message"`
|
||
Data interface{} `json:"data,omitempty"`
|
||
Code int `json:"code,omitempty"`
|
||
}
|
||
|
||
// 任务状态
|
||
type TaskStatus struct {
|
||
Running bool `json:"running"`
|
||
Progress int `json:"progress"`
|
||
Message string `json:"message"`
|
||
Error string `json:"error,omitempty"`
|
||
}
|
||
|
||
// 用户登录请求
|
||
type LoginRequest struct {
|
||
Username string `json:"username"`
|
||
Password string `json:"password"`
|
||
}
|
||
|
||
// 用户注册请求
|
||
type RegisterRequest struct {
|
||
Username string `json:"username"`
|
||
Password string `json:"password"`
|
||
Email string `json:"email"`
|
||
}
|
||
|
||
// Session存储
|
||
type Session struct {
|
||
Token string
|
||
UserID int
|
||
Expiry time.Time
|
||
}
|
||
|
||
var currentTask = &TaskStatus{Running: false}
|
||
var sessions = make(map[string]*Session)
|
||
|
||
func main() {
|
||
// 启用CORS
|
||
http.HandleFunc("/", corsMiddleware(handleRoot))
|
||
http.HandleFunc("/api/homepage/extract", corsMiddleware(extractHomepageHandler))
|
||
http.HandleFunc("/api/article/download", corsMiddleware(downloadArticleHandler))
|
||
http.HandleFunc("/api/article/list", corsMiddleware(getArticleListHandler))
|
||
http.HandleFunc("/api/article/batch", corsMiddleware(batchDownloadHandler))
|
||
http.HandleFunc("/api/article/detail", corsMiddleware(getArticleDetailHandler))
|
||
http.HandleFunc("/api/data/list", corsMiddleware(getDataListHandler))
|
||
http.HandleFunc("/api/task/status", corsMiddleware(getTaskStatusHandler))
|
||
http.HandleFunc("/api/download/", corsMiddleware(downloadFileHandler))
|
||
|
||
// 用户认证接口
|
||
http.HandleFunc("/api/user/register", corsMiddleware(registerHandler))
|
||
http.HandleFunc("/api/user/login", corsMiddleware(loginHandler))
|
||
http.HandleFunc("/api/user/logout", corsMiddleware(logoutHandler))
|
||
http.HandleFunc("/api/user/info", corsMiddleware(getUserInfoHandler))
|
||
http.HandleFunc("/api/user/update", corsMiddleware(updateUserHandler))
|
||
|
||
port := ":8080"
|
||
fmt.Println("===============================================")
|
||
fmt.Println(" 🚀 微信公众号文章爬虫 API 服务器")
|
||
fmt.Println("===============================================")
|
||
fmt.Printf("🌐 服务地址: http://localhost%s\n", port)
|
||
fmt.Printf("⏰ 启动时间: %s\n", time.Now().Format("2006-01-02 15:04:05"))
|
||
fmt.Println("===============================================\n")
|
||
|
||
if err := http.ListenAndServe(port, nil); err != nil {
|
||
log.Fatal("服务器启动失败:", err)
|
||
}
|
||
}
|
||
|
||
// CORS中间件
|
||
func corsMiddleware(next http.HandlerFunc) http.HandlerFunc {
|
||
return func(w http.ResponseWriter, r *http.Request) {
|
||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
|
||
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
|
||
|
||
if r.Method == "OPTIONS" {
|
||
w.WriteHeader(http.StatusOK)
|
||
return
|
||
}
|
||
|
||
next(w, r)
|
||
}
|
||
}
|
||
|
||
// 首页处理
|
||
func handleRoot(w http.ResponseWriter, r *http.Request) {
|
||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||
html := `
|
||
<!DOCTYPE html>
|
||
<html>
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<title>微信公众号文章爬虫 API</title>
|
||
<style>
|
||
body { font-family: Arial, sans-serif; max-width: 800px; margin: 50px auto; padding: 20px; }
|
||
h1 { color: #333; }
|
||
.endpoint { background: #f5f5f5; padding: 10px; margin: 10px 0; border-radius: 5px; }
|
||
.method { color: #4CAF50; font-weight: bold; }
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<h1>🚀 微信公众号文章爬虫 API 服务器</h1>
|
||
<p>当前时间: ` + time.Now().Format("2006-01-02 15:04:05") + `</p>
|
||
<h2>可用接口:</h2>
|
||
<div class="endpoint">
|
||
<span class="method">POST</span> /api/homepage/extract - 提取公众号主页
|
||
</div>
|
||
<div class="endpoint">
|
||
<span class="method">POST</span> /api/article/download - 下载单篇文章
|
||
</div>
|
||
<div class="endpoint">
|
||
<span class="method">POST</span> /api/article/list - 获取文章列表
|
||
</div>
|
||
<div class="endpoint">
|
||
<span class="method">POST</span> /api/article/detail - 获取文章详情(阅读量、点赞数、评论等)
|
||
</div>
|
||
<div class="endpoint">
|
||
<span class="method">POST</span> /api/article/batch - 批量下载文章
|
||
</div>
|
||
<div class="endpoint">
|
||
<span class="method">GET</span> /api/data/list - 获取数据列表
|
||
</div>
|
||
<div class="endpoint">
|
||
<span class="method">GET</span> /api/task/status - 获取任务状态
|
||
</div>
|
||
</body>
|
||
</html>
|
||
`
|
||
w.Write([]byte(html))
|
||
}
|
||
|
||
// 提取公众号主页
|
||
func extractHomepageHandler(w http.ResponseWriter, r *http.Request) {
|
||
var req struct {
|
||
URL string `json:"url"`
|
||
}
|
||
|
||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||
writeJSON(w, Response{Success: false, Message: "请求参数错误"})
|
||
return
|
||
}
|
||
|
||
// 执行命令(使用绝对路径)
|
||
exePath := filepath.Join("..", "wechat-crawler.exe")
|
||
absPath, _ := filepath.Abs(exePath)
|
||
log.Printf("尝试执行: %s", absPath)
|
||
|
||
cmd := exec.Command(absPath, req.URL)
|
||
workDir, _ := filepath.Abs("..")
|
||
cmd.Dir = workDir
|
||
output, err := cmd.CombinedOutput()
|
||
|
||
if err != nil {
|
||
log.Printf("执行失败: %v, 输出: %s", err, string(output))
|
||
writeJSON(w, Response{Success: false, Message: "执行失败: " + string(output)})
|
||
return
|
||
}
|
||
|
||
// 从输出中提取公众号主页链接
|
||
outputStr := string(output)
|
||
lines := strings.Split(outputStr, "\n")
|
||
var homepageURL string
|
||
|
||
for _, line := range lines {
|
||
if strings.Contains(line, "公众号主页链接") || strings.Contains(line, "https://mp.weixin.qq.com/mp/profile_ext") {
|
||
// 提取URL
|
||
if idx := strings.Index(line, "https://"); idx != -1 {
|
||
homepageURL = strings.TrimSpace(line[idx:])
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
if homepageURL == "" {
|
||
writeJSON(w, Response{Success: false, Message: "未能提取到主页链接"})
|
||
return
|
||
}
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: "提取成功",
|
||
Data: map[string]string{
|
||
"homepage": homepageURL,
|
||
"output": outputStr,
|
||
},
|
||
})
|
||
}
|
||
|
||
// 下载单篇文章(这里需要实现具体逻辑)
|
||
func downloadArticleHandler(w http.ResponseWriter, r *http.Request) {
|
||
var req struct {
|
||
URL string `json:"url"`
|
||
SaveImage bool `json:"save_image"`
|
||
SaveContent bool `json:"save_content"`
|
||
}
|
||
|
||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||
writeJSON(w, Response{Success: false, Message: "请求参数错误"})
|
||
return
|
||
}
|
||
|
||
currentTask.Running = true
|
||
currentTask.Progress = 0
|
||
currentTask.Message = "正在下载文章..."
|
||
|
||
// 注意:这里需要实际调用爬虫的下载功能
|
||
// 由于当前后端程序没有单独的下载单篇文章的命令行接口
|
||
// 需要后续实现或使用其他方式
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: "下载任务已启动",
|
||
Data: map[string]interface{}{
|
||
"url": req.URL,
|
||
},
|
||
})
|
||
}
|
||
|
||
// 获取文章列表
|
||
func getArticleListHandler(w http.ResponseWriter, r *http.Request) {
|
||
var req struct {
|
||
AccessToken string `json:"access_token"`
|
||
Pages int `json:"pages"`
|
||
}
|
||
|
||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||
writeJSON(w, Response{Success: false, Message: "请求参数错误"})
|
||
return
|
||
}
|
||
|
||
currentTask.Running = true
|
||
currentTask.Progress = 0
|
||
currentTask.Message = "正在获取文章列表..."
|
||
|
||
// 同步执行爬虫程序(功能2:获取文章列表)
|
||
exePath := filepath.Join("..", "wechat-crawler.exe")
|
||
absPath, _ := filepath.Abs(exePath)
|
||
workDir, _ := filepath.Abs("..")
|
||
|
||
log.Printf("启动功能2: %s, 工作目录: %s", absPath, workDir)
|
||
cmd := exec.Command(absPath)
|
||
cmd.Dir = workDir
|
||
|
||
// 创建输入管道
|
||
stdin, err := cmd.StdinPipe()
|
||
if err != nil {
|
||
log.Printf("创建输入管道失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "创建输入管道失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
// 启动命令
|
||
if err := cmd.Start(); err != nil {
|
||
log.Printf("启动命令失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "启动命令失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
// 发送选项"2"(功能2:通过access_token获取文章列表)
|
||
fmt.Fprintln(stdin, "2")
|
||
fmt.Fprintln(stdin, req.AccessToken)
|
||
if req.Pages > 0 {
|
||
fmt.Fprintf(stdin, "%d\n", req.Pages)
|
||
} else {
|
||
fmt.Fprintln(stdin, "0")
|
||
}
|
||
stdin.Close()
|
||
|
||
// 等待命令完成
|
||
if err := cmd.Wait(); err != nil {
|
||
log.Printf("命令执行失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "命令执行失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
currentTask.Running = false
|
||
currentTask.Progress = 100
|
||
currentTask.Message = "文章列表获取完成"
|
||
|
||
// 查找生成的文件并返回下载链接
|
||
dataDir := "../data"
|
||
entries, err := os.ReadDir(dataDir)
|
||
if err != nil {
|
||
writeJSON(w, Response{Success: false, Message: "读取数据目录失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
// 查找最新创建的公众号目录
|
||
var latestDir string
|
||
var latestTime time.Time
|
||
for _, entry := range entries {
|
||
if entry.IsDir() && entry.Name() != "." && entry.Name() != ".." {
|
||
info, _ := entry.Info()
|
||
if info.ModTime().After(latestTime) {
|
||
latestTime = info.ModTime()
|
||
latestDir = entry.Name()
|
||
}
|
||
}
|
||
}
|
||
|
||
if latestDir == "" {
|
||
writeJSON(w, Response{Success: false, Message: "未找到生成的数据目录"})
|
||
return
|
||
}
|
||
|
||
log.Printf("找到最新目录: %s", latestDir)
|
||
|
||
// 查找文章列表文件(优先查找直连链接文件)
|
||
accountPath := filepath.Join(dataDir, latestDir)
|
||
files, err := os.ReadDir(accountPath)
|
||
if err != nil {
|
||
writeJSON(w, Response{Success: false, Message: "读取公众号目录失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
var excelFile string
|
||
// 优先查找直连链接文件(.xlsx或.txt)
|
||
for _, file := range files {
|
||
if !file.IsDir() && strings.Contains(file.Name(), "直连链接") {
|
||
if strings.HasSuffix(file.Name(), ".xlsx") || strings.HasSuffix(file.Name(), ".txt") {
|
||
excelFile = file.Name()
|
||
log.Printf("找到直连链接文件: %s", excelFile)
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
// 如果没有直连链接文件,查找原始链接文件
|
||
if excelFile == "" {
|
||
for _, file := range files {
|
||
if !file.IsDir() && strings.Contains(file.Name(), "原始链接") {
|
||
if strings.HasSuffix(file.Name(), ".xlsx") || strings.HasSuffix(file.Name(), ".txt") {
|
||
excelFile = file.Name()
|
||
log.Printf("找到原始链接文件: %s", excelFile)
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 如果还是没有,查找任何文章列表文件
|
||
if excelFile == "" {
|
||
for _, file := range files {
|
||
if !file.IsDir() && strings.Contains(file.Name(), "文章列表") {
|
||
if strings.HasSuffix(file.Name(), ".xlsx") || strings.HasSuffix(file.Name(), ".txt") {
|
||
excelFile = file.Name()
|
||
log.Printf("找到文章列表文件: %s", excelFile)
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if excelFile == "" {
|
||
// 列出所有文件用于调试
|
||
var fileList []string
|
||
for _, file := range files {
|
||
fileList = append(fileList, file.Name())
|
||
}
|
||
log.Printf("目录 %s 中的文件: %v", latestDir, fileList)
|
||
writeJSON(w, Response{Success: false, Message: "未找到Excel文件,目录中的文件: " + strings.Join(fileList, ", ")})
|
||
return
|
||
}
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: "文章列表获取成功",
|
||
Data: map[string]interface{}{
|
||
"account": latestDir,
|
||
"filename": excelFile,
|
||
"download": fmt.Sprintf("/download/%s/%s", latestDir, excelFile),
|
||
},
|
||
})
|
||
}
|
||
|
||
// 批量下载文章
|
||
func batchDownloadHandler(w http.ResponseWriter, r *http.Request) {
|
||
var req struct {
|
||
OfficialAccount string `json:"official_account"`
|
||
SaveImage bool `json:"save_image"`
|
||
SaveContent bool `json:"save_content"`
|
||
}
|
||
|
||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||
writeJSON(w, Response{Success: false, Message: "请求参数错误"})
|
||
return
|
||
}
|
||
|
||
currentTask.Running = true
|
||
currentTask.Progress = 0
|
||
currentTask.Message = "正在批量下载文章..."
|
||
|
||
// 同步执行爬虫程序(功能5)
|
||
exePath := filepath.Join("..", "wechat-crawler.exe")
|
||
absPath, _ := filepath.Abs(exePath)
|
||
workDir, _ := filepath.Abs("..")
|
||
|
||
log.Printf("启动功能5: %s, 工作目录: %s", absPath, workDir)
|
||
cmd := exec.Command(absPath)
|
||
cmd.Dir = workDir
|
||
|
||
// 创建输入管道
|
||
stdin, err := cmd.StdinPipe()
|
||
if err != nil {
|
||
log.Printf("创建输入管道失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "创建输入管道失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
// 启动命令
|
||
if err := cmd.Start(); err != nil {
|
||
log.Printf("启动命令失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "启动命令失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
// 发送选项"5"(功能5:批量下载)
|
||
fmt.Fprintln(stdin, "5")
|
||
fmt.Fprintln(stdin, req.OfficialAccount)
|
||
|
||
// 是否保存图片
|
||
if req.SaveImage {
|
||
fmt.Fprintln(stdin, "y")
|
||
} else {
|
||
fmt.Fprintln(stdin, "n")
|
||
}
|
||
stdin.Close()
|
||
|
||
// 等待命令完成
|
||
if err := cmd.Wait(); err != nil {
|
||
log.Printf("命令执行失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "命令执行失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
currentTask.Running = false
|
||
currentTask.Progress = 100
|
||
currentTask.Message = "批量下载完成"
|
||
|
||
// 统计下载的文章数量
|
||
accountPath := filepath.Join("../data", req.OfficialAccount, "文章详细")
|
||
var articleCount int
|
||
if entries, err := os.ReadDir(accountPath); err == nil {
|
||
articleCount = len(entries)
|
||
}
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: fmt.Sprintf("批量下载完成,共下载 %d 篇文章", articleCount),
|
||
Data: map[string]interface{}{
|
||
"account": req.OfficialAccount,
|
||
"articleCount": articleCount,
|
||
"path": accountPath,
|
||
},
|
||
})
|
||
}
|
||
|
||
// 获取文章详情(功能4:包括阅读量、点赞数、评论等)
|
||
func getArticleDetailHandler(w http.ResponseWriter, r *http.Request) {
|
||
var req struct {
|
||
AccessToken string `json:"access_token"`
|
||
Pages int `json:"pages"`
|
||
}
|
||
|
||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||
log.Printf("❌ 解析请求失败: %v", err)
|
||
writeJSON(w, Response{Success: false, Message: "请求参数错误: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
if req.AccessToken == "" {
|
||
log.Printf("❌ Access Token 为空")
|
||
writeJSON(w, Response{Success: false, Message: "请输入Access Token URL"})
|
||
return
|
||
}
|
||
|
||
log.Printf("\n" + strings.Repeat("=", 60))
|
||
log.Printf("📊 开始获取文章详情功能")
|
||
log.Printf("接收到的 Access Token: %s", req.AccessToken[:min(100, len(req.AccessToken))])
|
||
log.Printf("获取页数: %d (0表示全部)", req.Pages)
|
||
|
||
currentTask.Running = true
|
||
currentTask.Progress = 0
|
||
currentTask.Message = "正在解析Access Token参数..."
|
||
|
||
// 从Access Token URL中提取参数
|
||
params, err := parseAccessToken(req.AccessToken)
|
||
if err != nil {
|
||
log.Printf("❌ 解析Access Token失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "Access Token 参数格式错误: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
log.Printf("✅ 参数解析成功:")
|
||
log.Printf(" - biz: %s", params["biz"][:min(20, len(params["biz"]))])
|
||
log.Printf(" - uin: %s", params["uin"])
|
||
log.Printf(" - key: %s", params["key"][:min(20, len(params["key"]))])
|
||
log.Printf(" - pass_ticket: %s", params["pass_ticket"][:min(20, len(params["pass_ticket"]))])
|
||
|
||
// 创建爬虫实例
|
||
log.Printf("🔧 创建爬虫实例...")
|
||
crawler, err := wechat.NewWechatCrawler(
|
||
params["biz"],
|
||
params["uin"],
|
||
params["key"],
|
||
params["pass_ticket"],
|
||
nil,
|
||
)
|
||
if err != nil {
|
||
log.Printf("❌ 创建爬虫实例失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "创建爬虫实例失败: " + err.Error()})
|
||
return
|
||
}
|
||
log.Printf("✅ 爬虫实例创建成功")
|
||
|
||
currentTask.Progress = 20
|
||
currentTask.Message = "正在获取公众号名称..."
|
||
|
||
// 获取公众号名称
|
||
log.Printf("📱 获取公众号名称...")
|
||
officialName, err := crawler.GetOfficialAccountName()
|
||
if err != nil {
|
||
log.Printf("❌ 获取公众号名称失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "获取公众号名称失败: " + err.Error()})
|
||
return
|
||
}
|
||
log.Printf("✅ 公众号名称: %s", officialName)
|
||
|
||
currentTask.Progress = 40
|
||
currentTask.Message = "正在获取文章列表..."
|
||
|
||
// 获取文章列表
|
||
log.Printf("📋 获取文章列表...")
|
||
var articleList [][]string
|
||
|
||
if req.Pages > 0 {
|
||
// 只获取指定页数
|
||
log.Printf("📄 限制获取前 %d 页", req.Pages)
|
||
for offset := 0; offset < req.Pages; offset++ {
|
||
result, e := crawler.GetNextList(offset)
|
||
if e != nil {
|
||
log.Printf("❌ 获取第 %d 页失败: %v", offset+1, e)
|
||
err = e
|
||
break
|
||
}
|
||
|
||
// 检查是否有数据
|
||
mFlag, ok := result["m_flag"].(int)
|
||
if !ok {
|
||
if mFlagFloat, ok := result["m_flag"].(float64); ok {
|
||
mFlag = int(mFlagFloat)
|
||
}
|
||
}
|
||
if mFlag == 0 {
|
||
log.Printf("ℹ️ 第 %d 页无更多数据", offset+1)
|
||
break
|
||
}
|
||
|
||
// 获取当前页的文章列表
|
||
log.Printf("📝 尝试从 result 中提取 passage_list...")
|
||
|
||
// 先尝试 [][]string 类型(GetNextList 实际返回的类型)
|
||
if passageListStr, ok := result["passage_list"].([][]string); ok {
|
||
log.Printf("✅ passage_list 提取成功([][]string),包含 %d 个元素", len(passageListStr))
|
||
for idx, strArr := range passageListStr {
|
||
articleList = append(articleList, strArr)
|
||
log.Printf("✅ 添加第 %d 篇文章: %v", idx+1, strArr)
|
||
}
|
||
} else if passageList, ok := result["passage_list"].([]interface{}); ok {
|
||
// 备用:尝试 []interface{} 类型
|
||
log.Printf("✅ passage_list 提取成功([]interface{}),包含 %d 个元素", len(passageList))
|
||
for idx, item := range passageList {
|
||
if arr, ok := item.([]interface{}); ok {
|
||
strArr := make([]string, len(arr))
|
||
for i, v := range arr {
|
||
if s, ok := v.(string); ok {
|
||
strArr[i] = s
|
||
}
|
||
}
|
||
articleList = append(articleList, strArr)
|
||
log.Printf("✅ 添加第 %d 篇文章: %v", idx+1, strArr)
|
||
} else {
|
||
log.Printf("❌ 第 %d 个 item 不是 []interface{} 类型,实际类型: %T", idx+1, item)
|
||
}
|
||
}
|
||
} else {
|
||
log.Printf("❌ passage_list 类型断言失败,实际类型: %T", result["passage_list"])
|
||
}
|
||
|
||
log.Printf("✅ 已获取第 %d/%d 页,当前累计 %d 篇文章", offset+1, req.Pages, len(articleList))
|
||
|
||
// 添加延迟
|
||
if offset < req.Pages-1 {
|
||
time.Sleep(2 * time.Second)
|
||
}
|
||
}
|
||
|
||
// 转换链接
|
||
log.Printf("🔗 转换文章链接...转换前共 %d 篇", len(articleList))
|
||
articleList = crawler.TransformLinks(articleList)
|
||
log.Printf("✅ 链接转换完成,共 %d 篇文章", len(articleList))
|
||
} else {
|
||
// 获取全部文章
|
||
log.Printf("📄 获取全部文章")
|
||
articleList, err = crawler.GetArticleList()
|
||
}
|
||
if err != nil {
|
||
log.Printf("❌ 获取文章列表失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "获取文章列表失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
if len(articleList) == 0 {
|
||
log.Printf("⚠️ 文章列表为空")
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "公众号文章列表为空,可能是 Access Token 无效或公众号无文章"})
|
||
return
|
||
}
|
||
|
||
log.Printf("✅ 获取到 %d 篇文章", len(articleList))
|
||
|
||
currentTask.Progress = 60
|
||
currentTask.Message = fmt.Sprintf("正在获取文章详情 (0/%d)...", len(articleList))
|
||
|
||
// 创建保存目录
|
||
dataDir := "../data"
|
||
officialPath := filepath.Join(dataDir, officialName)
|
||
log.Printf("📁 创建保存目录: %s", officialPath)
|
||
if err := os.MkdirAll(officialPath, 0755); err != nil {
|
||
log.Printf("❌ 创建保存目录失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "创建保存目录失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
// 获取文章详情
|
||
log.Printf("📊 开始获取文章详情数据...")
|
||
err = crawler.GetDetailList(articleList, officialPath)
|
||
if err != nil {
|
||
log.Printf("❌ 获取文章详情失败: %v", err)
|
||
currentTask.Running = false
|
||
writeJSON(w, Response{Success: false, Message: "获取文章详情失败: " + err.Error()})
|
||
return
|
||
}
|
||
|
||
log.Printf("✅ 文章详情获取完成")
|
||
|
||
currentTask.Running = false
|
||
currentTask.Progress = 100
|
||
currentTask.Message = "文章详情获取完成"
|
||
|
||
// 统计文章详情文件数量
|
||
detailPath := filepath.Join(officialPath, "文章详细")
|
||
var detailFiles []string
|
||
if entries, err := os.ReadDir(detailPath); err == nil {
|
||
for _, entry := range entries {
|
||
if !entry.IsDir() && strings.HasSuffix(entry.Name(), "_文章详情.txt") {
|
||
detailFiles = append(detailFiles, entry.Name())
|
||
}
|
||
}
|
||
}
|
||
|
||
if len(detailFiles) == 0 {
|
||
// 检查主目录
|
||
log.Printf("⚠️ 文章详细目录下未找到文件,检查主目录...")
|
||
if entries, err := os.ReadDir(officialPath); err == nil {
|
||
for _, entry := range entries {
|
||
if !entry.IsDir() && strings.HasSuffix(entry.Name(), "_文章详情.txt") {
|
||
detailFiles = append(detailFiles, entry.Name())
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
log.Printf("✅ 找到 %d 个文章详情文件", len(detailFiles))
|
||
log.Printf(strings.Repeat("=", 60) + "\n")
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: fmt.Sprintf("文章详情获取成功,共 %d 篇文章", len(detailFiles)),
|
||
Data: map[string]interface{}{
|
||
"account": officialName,
|
||
"articleCount": len(detailFiles),
|
||
"path": officialPath,
|
||
},
|
||
})
|
||
}
|
||
|
||
// min 返回两个整数中的较小值
|
||
func min(a, b int) int {
|
||
if a < b {
|
||
return a
|
||
}
|
||
return b
|
||
}
|
||
|
||
// parseAccessToken 从URL中解析access token参数
|
||
func parseAccessToken(accessToken string) (map[string]string, error) {
|
||
params := make(map[string]string)
|
||
|
||
// 如果是完整URL,解析参数
|
||
if strings.HasPrefix(accessToken, "http://") || strings.HasPrefix(accessToken, "https://") {
|
||
parsedURL, err := url.Parse(accessToken)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("URL格式错误: %v", err)
|
||
}
|
||
query := parsedURL.Query()
|
||
params["biz"] = query.Get("__biz")
|
||
params["uin"] = query.Get("uin")
|
||
params["key"] = query.Get("key")
|
||
params["pass_ticket"] = query.Get("pass_ticket")
|
||
} else {
|
||
// 尝试使用正则表达式提取参数
|
||
bizRegex := regexp.MustCompile(`__biz=([^&]+)`)
|
||
if match := bizRegex.FindStringSubmatch(accessToken); len(match) > 1 {
|
||
params["biz"] = match[1]
|
||
}
|
||
|
||
uinRegex := regexp.MustCompile(`uin=([^&]+)`)
|
||
if match := uinRegex.FindStringSubmatch(accessToken); len(match) > 1 {
|
||
params["uin"] = match[1]
|
||
}
|
||
|
||
keyRegex := regexp.MustCompile(`key=([^&]+)`)
|
||
if match := keyRegex.FindStringSubmatch(accessToken); len(match) > 1 {
|
||
params["key"] = match[1]
|
||
}
|
||
|
||
passTicketRegex := regexp.MustCompile(`pass_ticket=([^&]+)`)
|
||
if match := passTicketRegex.FindStringSubmatch(accessToken); len(match) > 1 {
|
||
params["pass_ticket"] = match[1]
|
||
}
|
||
}
|
||
|
||
// 验证必需参数
|
||
if params["biz"] == "" {
|
||
return nil, fmt.Errorf("缺少__biz参数")
|
||
}
|
||
if params["uin"] == "" {
|
||
return nil, fmt.Errorf("缺少uin参数")
|
||
}
|
||
if params["key"] == "" {
|
||
return nil, fmt.Errorf("缺少key参数")
|
||
}
|
||
if params["pass_ticket"] == "" {
|
||
return nil, fmt.Errorf("缺少pass_ticket参数")
|
||
}
|
||
|
||
return params, nil
|
||
}
|
||
|
||
// 获取数据列表
|
||
func getDataListHandler(w http.ResponseWriter, r *http.Request) {
|
||
dataDir := "../data"
|
||
var accounts []map[string]interface{}
|
||
|
||
entries, err := os.ReadDir(dataDir)
|
||
if err != nil {
|
||
// 如果目录不存在,返回空列表而不是错误
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Data: accounts,
|
||
})
|
||
return
|
||
}
|
||
|
||
for _, entry := range entries {
|
||
if entry.IsDir() {
|
||
accountPath := filepath.Join(dataDir, entry.Name())
|
||
|
||
// 统计文章数量
|
||
detailPath := filepath.Join(accountPath, "文章详细")
|
||
var articleCount int
|
||
if detailEntries, err := os.ReadDir(detailPath); err == nil {
|
||
articleCount = len(detailEntries)
|
||
}
|
||
|
||
// 获取最后更新时间
|
||
info, _ := entry.Info()
|
||
lastUpdate := info.ModTime().Format("2006-01-02")
|
||
|
||
accounts = append(accounts, map[string]interface{}{
|
||
"name": entry.Name(),
|
||
"articleCount": articleCount,
|
||
"path": accountPath,
|
||
"lastUpdate": lastUpdate,
|
||
})
|
||
}
|
||
}
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Data: accounts,
|
||
})
|
||
}
|
||
|
||
// 获取任务状态
|
||
func getTaskStatusHandler(w http.ResponseWriter, r *http.Request) {
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Data: currentTask,
|
||
})
|
||
}
|
||
|
||
// 下载文件处理
|
||
func downloadFileHandler(w http.ResponseWriter, r *http.Request) {
|
||
// 从 URL 中提取路径 /api/download/公众号名称/文件名
|
||
path := strings.TrimPrefix(r.URL.Path, "/api/download/")
|
||
parts := strings.SplitN(path, "/", 2)
|
||
|
||
if len(parts) != 2 {
|
||
http.Error(w, "路径错误", http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
accountName := parts[0]
|
||
filename := parts[1]
|
||
|
||
// 构建完整文件路径
|
||
filePath := filepath.Join("..", "data", accountName, filename)
|
||
absPath, _ := filepath.Abs(filePath)
|
||
|
||
// 检查文件是否存在
|
||
if _, err := os.Stat(absPath); os.IsNotExist(err) {
|
||
http.Error(w, "文件不存在", http.StatusNotFound)
|
||
return
|
||
}
|
||
|
||
log.Printf("下载文件: %s", absPath)
|
||
|
||
// 设置响应头
|
||
contentType := "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||
if strings.HasSuffix(filename, ".txt") {
|
||
contentType = "text/plain; charset=utf-8"
|
||
}
|
||
w.Header().Set("Content-Type", contentType)
|
||
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename*=UTF-8''%s", filename))
|
||
|
||
// 发送文件
|
||
http.ServeFile(w, r, absPath)
|
||
}
|
||
|
||
// 写入JSON响应
|
||
func writeJSON(w http.ResponseWriter, data interface{}) {
|
||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||
json.NewEncoder(w).Encode(data)
|
||
}
|
||
|
||
// 生成随机Token
|
||
func generateToken() string {
|
||
b := make([]byte, 32)
|
||
rand.Read(b)
|
||
return hex.EncodeToString(b)
|
||
}
|
||
|
||
// 调用Python脚本
|
||
func callPythonScript(scriptPath string, args ...string) (string, error) {
|
||
// 构建Python命令
|
||
cmdArgs := append([]string{scriptPath}, args...)
|
||
cmd := exec.Command("python", cmdArgs...)
|
||
|
||
// 设置工作目录为数据库目录
|
||
dbDir, _ := filepath.Abs(filepath.Join("..", "..", "database"))
|
||
cmd.Dir = dbDir
|
||
|
||
// 执行命令
|
||
output, err := cmd.CombinedOutput()
|
||
if err != nil {
|
||
return "", fmt.Errorf("%s: %s", err, string(output))
|
||
}
|
||
|
||
return string(output), nil
|
||
}
|
||
|
||
// 用户注册处理
|
||
func registerHandler(w http.ResponseWriter, r *http.Request) {
|
||
if r.Method != "POST" {
|
||
writeJSON(w, Response{Success: false, Message: "仅支持POST请求", Code: 405})
|
||
return
|
||
}
|
||
|
||
var req RegisterRequest
|
||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||
writeJSON(w, Response{Success: false, Message: "请求参数错误", Code: 400})
|
||
return
|
||
}
|
||
|
||
// 验证输入
|
||
if req.Username == "" || req.Password == "" || req.Email == "" {
|
||
writeJSON(w, Response{Success: false, Message: "用户名、密码和邮箱不能为空", Code: 400})
|
||
return
|
||
}
|
||
|
||
// 调用Python脚本创建用户
|
||
scriptPath := "user_cli.py"
|
||
args := []string{"create", req.Username, req.Password, req.Email}
|
||
|
||
output, err := callPythonScript(scriptPath, args...)
|
||
if err != nil {
|
||
log.Printf("注册失败: %v, 输出: %s", err, output)
|
||
|
||
// 判断错误类型
|
||
if strings.Contains(output, "用户名已存在") || strings.Contains(output, "邮箱已被注册") {
|
||
writeJSON(w, Response{Success: false, Message: "用户名或邮箱已存在", Code: 409})
|
||
} else if strings.Contains(output, "验证错误") {
|
||
writeJSON(w, Response{Success: false, Message: output, Code: 400})
|
||
} else {
|
||
writeJSON(w, Response{Success: false, Message: "注册失败", Code: 500})
|
||
}
|
||
return
|
||
}
|
||
|
||
log.Printf("用户注册成功: %s", req.Username)
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: "注册成功",
|
||
Code: 200,
|
||
Data: map[string]interface{}{
|
||
"username": req.Username,
|
||
},
|
||
})
|
||
}
|
||
|
||
// 用户登录处理
|
||
func loginHandler(w http.ResponseWriter, r *http.Request) {
|
||
if r.Method != "POST" {
|
||
writeJSON(w, Response{Success: false, Message: "仅支持POST请求", Code: 405})
|
||
return
|
||
}
|
||
|
||
var req LoginRequest
|
||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||
writeJSON(w, Response{Success: false, Message: "请求参数错误", Code: 400})
|
||
return
|
||
}
|
||
|
||
// 验证输入
|
||
if req.Username == "" || req.Password == "" {
|
||
writeJSON(w, Response{Success: false, Message: "用户名和密码不能为空", Code: 400})
|
||
return
|
||
}
|
||
|
||
// 调用Python脚本验证用户
|
||
scriptPath := "user_cli.py"
|
||
args := []string{"verify", req.Username, req.Password}
|
||
|
||
output, err := callPythonScript(scriptPath, args...)
|
||
log.Printf("🔍 Python输出: %s", output)
|
||
|
||
if err != nil {
|
||
log.Printf("❌ 登录失败: %v", err)
|
||
writeJSON(w, Response{Success: false, Message: "用户名或密码错误", Code: 401})
|
||
return
|
||
}
|
||
|
||
// 生成token
|
||
token := generateToken()
|
||
|
||
// 从输出中解析user_id和用户信息
|
||
var userData map[string]interface{}
|
||
if err := json.Unmarshal([]byte(output), &userData); err != nil {
|
||
log.Printf("❌ 解析用户数据失败: %v, 输出: %s", err, output)
|
||
writeJSON(w, Response{Success: false, Message: "服务器内部错误", Code: 500})
|
||
return
|
||
}
|
||
|
||
// 检查是否成功
|
||
if success, ok := userData["success"].(bool); !ok || !success {
|
||
log.Printf("❌ 用户验证失败: %v", userData)
|
||
writeJSON(w, Response{Success: false, Message: "用户名或密码错误", Code: 401})
|
||
return
|
||
}
|
||
|
||
userID := 0
|
||
if uid, ok := userData["user_id"].(float64); ok {
|
||
userID = int(uid)
|
||
}
|
||
|
||
// 存储session
|
||
sessions[token] = &Session{
|
||
Token: token,
|
||
UserID: userID,
|
||
Expiry: time.Now().Add(24 * time.Hour), // 24小时过期
|
||
}
|
||
|
||
log.Printf("✅ 用户登录成功: %s, token: %s", req.Username, token)
|
||
|
||
// 构建user_info,不包含密码相关和success标记
|
||
userInfo := make(map[string]interface{})
|
||
for k, v := range userData {
|
||
if k != "password_hash" && k != "success" {
|
||
userInfo[k] = v
|
||
}
|
||
}
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: "登录成功",
|
||
Code: 200,
|
||
Data: map[string]interface{}{
|
||
"token": token,
|
||
"user_id": userID,
|
||
"user_info": userInfo,
|
||
},
|
||
})
|
||
}
|
||
|
||
// 用户登出处理
|
||
func logoutHandler(w http.ResponseWriter, r *http.Request) {
|
||
if r.Method != "POST" {
|
||
writeJSON(w, Response{Success: false, Message: "仅支持POST请求", Code: 405})
|
||
return
|
||
}
|
||
|
||
// 从请求头中获取token
|
||
token := r.Header.Get("Authorization")
|
||
if token == "" {
|
||
var req struct {
|
||
Token string `json:"token"`
|
||
}
|
||
json.NewDecoder(r.Body).Decode(&req)
|
||
token = req.Token
|
||
}
|
||
|
||
if token == "" {
|
||
writeJSON(w, Response{Success: false, Message: "Token不能为空", Code: 400})
|
||
return
|
||
}
|
||
|
||
// 删除session
|
||
delete(sessions, token)
|
||
|
||
log.Printf("用户登出成功, token: %s", token)
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: "登出成功",
|
||
Code: 200,
|
||
})
|
||
}
|
||
|
||
// 获取用户信息处理
|
||
func getUserInfoHandler(w http.ResponseWriter, r *http.Request) {
|
||
if r.Method != "GET" {
|
||
writeJSON(w, Response{Success: false, Message: "仅支持GET请求", Code: 405})
|
||
return
|
||
}
|
||
|
||
// 从请求头中获取token
|
||
token := r.Header.Get("Authorization")
|
||
if token == "" {
|
||
token = r.URL.Query().Get("token")
|
||
}
|
||
|
||
if token == "" {
|
||
writeJSON(w, Response{Success: false, Message: "Token不能为空", Code: 401})
|
||
return
|
||
}
|
||
|
||
// 验证session
|
||
session, ok := sessions[token]
|
||
if !ok || session.Expiry.Before(time.Now()) {
|
||
if ok {
|
||
delete(sessions, token) // 删除过期session
|
||
}
|
||
writeJSON(w, Response{Success: false, Message: "Token无效或已过期", Code: 401})
|
||
return
|
||
}
|
||
|
||
// 调用Python脚本获取用户信息
|
||
scriptPath := "user_cli.py"
|
||
args := []string{"get", fmt.Sprintf("%d", session.UserID)}
|
||
|
||
output, err := callPythonScript(scriptPath, args...)
|
||
if err != nil {
|
||
log.Printf("获取用户信息失败: %v", err)
|
||
writeJSON(w, Response{Success: false, Message: "获取用户信息失败", Code: 500})
|
||
return
|
||
}
|
||
|
||
// 解析用户信息
|
||
var userData map[string]interface{}
|
||
if err := json.Unmarshal([]byte(output), &userData); err != nil {
|
||
log.Printf("解析用户信息失败: %v", err)
|
||
writeJSON(w, Response{Success: false, Message: "解析用户信息失败", Code: 500})
|
||
return
|
||
}
|
||
|
||
// 删除密码哈希
|
||
delete(userData, "password_hash")
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: "获取成功",
|
||
Code: 200,
|
||
Data: userData,
|
||
})
|
||
}
|
||
|
||
// 更新用户信息处理
|
||
func updateUserHandler(w http.ResponseWriter, r *http.Request) {
|
||
if r.Method != "POST" {
|
||
writeJSON(w, Response{Success: false, Message: "仅支持POST请求", Code: 405})
|
||
return
|
||
}
|
||
|
||
// 从请求头中获取token
|
||
token := r.Header.Get("Authorization")
|
||
if token == "" {
|
||
writeJSON(w, Response{Success: false, Message: "Token不能为空", Code: 401})
|
||
return
|
||
}
|
||
|
||
// 验证session
|
||
session, ok := sessions[token]
|
||
if !ok || session.Expiry.Before(time.Now()) {
|
||
if ok {
|
||
delete(sessions, token) // 删除过期session
|
||
}
|
||
writeJSON(w, Response{Success: false, Message: "Token无效或已过期", Code: 401})
|
||
return
|
||
}
|
||
|
||
// 解析请求体
|
||
var req struct {
|
||
UserID int `json:"user_id"`
|
||
Email string `json:"email"`
|
||
Bio string `json:"bio"`
|
||
}
|
||
|
||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||
log.Printf("❌ 解析请求体失败: %v", err)
|
||
writeJSON(w, Response{Success: false, Message: "请求参数错误", Code: 400})
|
||
return
|
||
}
|
||
|
||
log.Printf("🔍 更新用户信息: user_id=%d, email=%s", req.UserID, req.Email)
|
||
|
||
// 验证用户ID与session一致
|
||
if req.UserID != session.UserID {
|
||
log.Printf("❌ 用户ID不匹配: req=%d, session=%d", req.UserID, session.UserID)
|
||
writeJSON(w, Response{Success: false, Message: "无权操作", Code: 403})
|
||
return
|
||
}
|
||
|
||
// 调用Python脚本更新用户信息
|
||
scriptPath := "user_cli.py"
|
||
args := []string{"update", fmt.Sprintf("%d", req.UserID)}
|
||
|
||
// 添加需要更新的字段
|
||
if req.Email != "" {
|
||
args = append(args, "--email", req.Email)
|
||
}
|
||
if req.Bio != "" {
|
||
args = append(args, "--bio", req.Bio)
|
||
}
|
||
|
||
output, err := callPythonScript(scriptPath, args...)
|
||
log.Printf("🔍 Python输出: %s", output)
|
||
|
||
if err != nil {
|
||
log.Printf("❌ 更新用户信息失败: %v", err)
|
||
writeJSON(w, Response{Success: false, Message: "更新失败", Code: 500})
|
||
return
|
||
}
|
||
|
||
// 解析响应
|
||
var result map[string]interface{}
|
||
if err := json.Unmarshal([]byte(output), &result); err != nil {
|
||
log.Printf("❌ 解析响应失败: %v", err)
|
||
writeJSON(w, Response{Success: false, Message: "服务器内部错误", Code: 500})
|
||
return
|
||
}
|
||
|
||
// 检查是否成功
|
||
if success, ok := result["success"].(bool); !ok || !success {
|
||
errMsg := "更新失败"
|
||
if msg, ok := result["error"].(string); ok {
|
||
errMsg = msg
|
||
}
|
||
writeJSON(w, Response{Success: false, Message: errMsg, Code: 500})
|
||
return
|
||
}
|
||
|
||
log.Printf("✅ 用户信息更新成功: user_id=%d", req.UserID)
|
||
|
||
writeJSON(w, Response{
|
||
Success: true,
|
||
Message: "更新成功",
|
||
Code: 200,
|
||
})
|
||
}
|