]*>([\s\S]*?)<\/div>`)
- if match := richMediaClassRegex.FindStringSubmatch(content); len(match) > 1 {
+ // 2.1 优先查找 id=img-content 的div(微信新版本文章容器)
+ imgContentIdRegex := regexp.MustCompile(`(?s)
`)
+ if match := imgContentIdRegex.FindStringSubmatch(content); len(match) > 1 {
rawContent = match[1]
} else if rawContent == "" {
- // 2.2 尝试查找id为js_content的元素
- jsContentIdRegex := regexp.MustCompile(`(?s)
]*>([\s\S]*?)<\/div>`)
- if match := jsContentIdRegex.FindStringSubmatch(content); len(match) > 1 {
+ // 2.2 查找rich_media_content类的div(微信文章核心内容容器)
+ richMediaClassRegex := regexp.MustCompile(`(?s)
]*>([\s\S]*?)
`)
+ if match := richMediaClassRegex.FindStringSubmatch(content); len(match) > 1 {
rawContent = match[1]
+ } else if rawContent == "" {
+ // 2.3 尝试查找id为js_content的元素
+ jsContentIdRegex := regexp.MustCompile(`(?s)
]*>([\s\S]*?)
`)
+ if match := jsContentIdRegex.FindStringSubmatch(content); len(match) > 1 {
+ rawContent = match[1]
+ }
}
}
}
@@ -509,9 +611,11 @@ func (w *WechatCrawler) ExtractArticleInfo(content string) (string, string, stri
// 方法5: 尝试从微信文章特有的段落结构提取
if rawContent == "" {
- // 查找带有rich_media_p类的p标签(微信文章特有的段落样式)
- pTagsRegex := regexp.MustCompile(`(?s)
([\s\S]*?)<\/p>`)
- if matches := pTagsRegex.FindAllStringSubmatch(content, -1); len(matches) > 0 {
+ // Python版本使用 BeautifulSoup 的 getText() 方法提取所有文本
+ // 这里我们直接提取所有段落,然后过滤JavaScript
+ // 查找带有data-pm-slice或js_darkmode类的p标签(微信文章特有样式)
+ specialPTagsRegex := regexp.MustCompile(`(?s)
]*(?:data-pm-slice|js_darkmode)[^>]*>([\s\S]*?)
`)
+ if matches := specialPTagsRegex.FindAllStringSubmatch(content, -1); len(matches) > 0 {
// 如果找到多个p标签,合并它们的内容
var combinedContent strings.Builder
for _, match := range matches {
@@ -521,10 +625,11 @@ func (w *WechatCrawler) ExtractArticleInfo(content string) (string, string, stri
}
}
rawContent = combinedContent.String()
- } else {
- // 尝试一般的p标签,这是微信文章的备用段落格式
- generalPTagsRegex := regexp.MustCompile(`(?s)
]*>([\s\S]*?)<\/p>`)
- if matches := generalPTagsRegex.FindAllStringSubmatch(content, -1); len(matches) > 10 { // 至少10个p标签才可能是文章内容
+ } else if rawContent == "" {
+ // 查找带有rich_media_p类的p标签(微信文章特有的段落样式)
+ pTagsRegex := regexp.MustCompile(`(?s)
([\s\S]*?)
`)
+ if matches := pTagsRegex.FindAllStringSubmatch(content, -1); len(matches) > 0 {
+ // 如果找到多个p标签,合并它们的内容
var combinedContent strings.Builder
for _, match := range matches {
if len(match) > 1 {
@@ -533,6 +638,29 @@ func (w *WechatCrawler) ExtractArticleInfo(content string) (string, string, stri
}
}
rawContent = combinedContent.String()
+ } else {
+ // 尝试一般的p标签,这是微信文章的备用段落格式
+ generalPTagsRegex := regexp.MustCompile(`(?s)
]*>([\s\S]*?)
`)
+ if matches := generalPTagsRegex.FindAllStringSubmatch(content, -1); len(matches) > 10 { // 至少10个p标签才可能是文章内容
+ var combinedContent strings.Builder
+ for _, match := range matches {
+ if len(match) > 1 {
+ // 过滤JavaScript代码:如果段落包含function、var、window等关键词,跳过
+ paragraph := match[1]
+ // 简单过滤:如果段落中包含大量的JavaScript关键词,跳过
+ if !strings.Contains(paragraph, "function") &&
+ !strings.Contains(paragraph, "var ") &&
+ !strings.Contains(paragraph, "window.") &&
+ !strings.Contains(paragraph, ".length") {
+ combinedContent.WriteString(paragraph)
+ combinedContent.WriteString("\n")
+ }
+ }
+ }
+ if combinedContent.Len() > 100 { // 只有当合并后的内容超过100字符才认为有效
+ rawContent = combinedContent.String()
+ }
+ }
}
}
}
@@ -759,7 +887,8 @@ func (w *WechatCrawler) ExtractArticleInfo(content string) (string, string, stri
cleanText = regexp.MustCompile(`(?s)\s*document\.writeln\s*\([^)]*\);`).ReplaceAllString(cleanText, "")
// 如果JavaScript关键词较少且中文密度较高,可能是有效的文章内容
- if (jsCount < 5 || chineseDensity > 0.3) && len(cleanText) > 50 {
+ // 降低要求:只要中文密度 > 5% 或 长度 > 100 就认为有效
+ if (jsCount < 10 || chineseDensity > 0.05) && len(cleanText) > 50 {
// 按句子或段落分割,避免一行过长
if len(cleanText) > 0 {
// 首先尝试按段落分割
@@ -775,25 +904,17 @@ func (w *WechatCrawler) ExtractArticleInfo(content string) (string, string, stri
}
// 只添加非空且长度合理的段落(避免添加JavaScript片段)
paragraph := strings.TrimSpace(paragraphs[i])
- // 增强过滤条件,避免JavaScript片段,同时考虑中文密度
+ // 降低过滤条件,增强中文密度考虑
paraDensity := w.calculateChineseDensity(paragraph)
paraJsCount := w.jsKeywordCount(paragraph)
- if len(paragraph) > 15 &&
- !strings.Contains(paragraph, "{") &&
- !strings.Contains(paragraph, "}") &&
- !strings.Contains(paragraph, "function") &&
- !strings.Contains(paragraph, "var") &&
- !strings.Contains(paragraph, "window.") &&
- !strings.Contains(paragraph, "WX_BJ_REPORT") &&
- !strings.Contains(paragraph, "BadJs") &&
- (paraJsCount < 2 || paraDensity > 0.4) { // 根据中文密度调整JavaScript关键词容忍度
+ if len(paragraph) > 10 && (paraJsCount < 3 || paraDensity > 0.1) {
textContent = append(textContent, paragraph)
}
}
}
// 如果没有成功分割成段落,直接添加整个文本
- if len(textContent) == 0 && len(cleanText) > 50 && (w.jsKeywordCount(cleanText) < 3 || chineseDensity > 0.5) {
+ if len(textContent) == 0 && len(cleanText) > 50 && (w.jsKeywordCount(cleanText) < 5 || chineseDensity > 0.1) {
textContent = append(textContent, cleanText)
}
}
@@ -801,56 +922,97 @@ func (w *WechatCrawler) ExtractArticleInfo(content string) (string, string, stri
}
// 最后的备选方案:尝试从整个页面中提取非JavaScript的文本内容
- if len(textContent) == 0 {
- // 移除所有HTML标签
- allText := regexp.MustCompile(`<[^>]*>`).ReplaceAllString(content, "")
+ // 【修改】参考Python版本,直接提取所有文本,然后过滤
+ if len(textContent) < 5 { // 如果提取的段落很少,说明前面的方法都失败了
+ fmt.Printf(" [调试] 前面提取方法只得到%d个段落,尝试简单提取方法\n", len(textContent))
- // 应用增强的JavaScript代码块过滤
- allText = w.filterJavaScriptBlocks(allText)
+ // 方法1:优先尝试从 id="js_content" 容器中提取
+ contentRegex := regexp.MustCompile(`(?s)
]*id=["']js_content["'][^>]*>(.*?)
\s*`)
+ styleRegex := regexp.MustCompile(`(?s)`)
+ allText := scriptRegex.ReplaceAllString(content, "")
+ allText = styleRegex.ReplaceAllString(allText, "")
+
+ // 移除所有HTML标签
+ tagRegex := regexp.MustCompile(`<[^>]*>`)
+ allText = tagRegex.ReplaceAllString(allText, "\n")
+
+ // 移除HTML实体
+ allText = strings.ReplaceAll(allText, "<", "<")
+ allText = strings.ReplaceAll(allText, ">", ">")
+ allText = strings.ReplaceAll(allText, """, "\"")
+ allText = strings.ReplaceAll(allText, "&", "&")
+ allText = strings.ReplaceAll(allText, " ", " ")
+
+ // 按行分割,过滤空行和JS代码
+ textContent = []string{} // 重置
+ lines := strings.Split(allText, "\n")
+ for _, line := range lines {
+ line = strings.TrimSpace(line)
+ // 基础过滤:只保留有中文的行,且不是明显JS代码
+ if len(line) > 0 &&
+ !strings.HasPrefix(line, "var ") &&
+ !strings.HasPrefix(line, "function") &&
+ !strings.Contains(line, "window.") &&
+ w.calculateChineseDensity(line) > 0.1 {
+ textContent = append(textContent, line)
+ }
+ }
+ fmt.Printf(" [调试] 全局提取到 %d 个段落\n", len(textContent))
}
}
// 对提取的内容应用最终过滤,确保只保留真正的文章正文
filteredContent := w.finalContentFilter(textContent)
+
+ // 【调试】输出过滤前后的对比
+ fmt.Printf(" [调试] 过滤前段落数: %d, 过滤后段落数: %d\n", len(textContent), len(filteredContent))
+ if len(filteredContent) == 0 && len(textContent) > 0 {
+ fmt.Printf(" [调试] ⚠️ finalContentFilter 过滤掉了所有内容!\n")
+ fmt.Printf(" [调试] 过滤前第一段示例: %s\n", textContent[0][:min(len(textContent[0]), 200)])
+ }
+
return createTime, title, commentID, reqID, w.extractAuthor(content), filteredContent
}
+// min 返回两个整数中的最小值(Go 1.21之前需要手动实现)
+func min(a, b int) int {
+ if a < b {
+ return a
+ }
+ return b
+}
+
// calculateChineseDensity 计算文本中中文字符的密度
func (w *WechatCrawler) calculateChineseDensity(text string) float64 {
if len(text) == 0 {
@@ -921,91 +1083,77 @@ func (w *WechatCrawler) extractChineseText(text string) string {
}
// finalContentFilter 最终内容过滤,确保只保留真正的文章正文
-func (w *WechatCrawler) finalContentFilter(text string) string {
- // 1. 移除明显的JavaScript代码块
- // 移除WX_BJ_REPORT相关代码
- wxCodeRegex := regexp.MustCompile(`(?s)\s*WX_BJ_REPORT\s*\([^)]*\);|\s*var\s+WX_BJ_REPORT\s*=\s*function\s*\([^)]*\)\s*{[^}]*}\s*|\s*if\s*\(WX_BJ_REPORT\)[^;]*;`)
- text = wxCodeRegex.ReplaceAllString(text, "")
-
- // 移除BadJs相关代码
- badJsRegex := regexp.MustCompile(`(?s)\s*BadJs\s*\([^)]*\);|\s*var\s+BadJs\s*=\s*function\s*\([^)]*\)\s*{[^}]*}\s*|\s*if\s*\(BadJs\)[^;]*;`)
- text = badJsRegex.ReplaceAllString(text, "")
-
- // 移除window.logs相关代码
- logsRegex := regexp.MustCompile(`(?s)\s*window\.logs\s*=\s*\[.*?\];|\s*window\.logs\s*\..*?;`)
- text = logsRegex.ReplaceAllString(text, "")
-
- // 移除函数定义
- funcRegex := regexp.MustCompile(`(?s)\s*function\s+[^(]*\([^)]*\)\s*{[^}]*}\s*|\s*var\s+[^=]*=\s*function\s*\([^)]*\)\s*{[^}]*}\s*|\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*function\s*\([^)]*\)\s*{[^}]*}\s*`)
- text = funcRegex.ReplaceAllString(text, "")
-
- // 移除变量声明
- varRegex := regexp.MustCompile(`(?s)\s*var\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*{[^}]*}\s*;?|\s*let\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[^;]*;|\s*const\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[^;]*;|\s*window\.[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[^;]*;`)
- text = varRegex.ReplaceAllString(text, "")
-
- // 移除控制流语句
- flowRegex := regexp.MustCompile(`(?s)\s*if\s*\([^)]*\)\s*{[^}]*}\s*|\s*for\s*\([^)]*\)\s*{[^}]*}\s*|\s*while\s*\([^)]*\)\s*{[^}]*}\s*`)
- text = flowRegex.ReplaceAllString(text, "")
-
- // 2. 提取真正的文章段落
- paragraphs := regexp.MustCompile(`[。!?.!?]\s*`).Split(text, -1)
- punctuations := regexp.MustCompile(`[。!?.!?]\s*`).FindAllString(text, -1)
-
+// 修改:大幅降低过滤门槛,参考Python版本的简单逻辑
+func (w *WechatCrawler) finalContentFilter(textContent []string) []string {
var validParagraphs []string
- for i := 0; i < len(paragraphs); i++ {
- if paragraphs[i] != "" {
- paragraph := paragraphs[i]
- if i < len(punctuations) {
- paragraph += punctuations[i]
- }
- paragraph = strings.TrimSpace(paragraph)
- // 计算段落特征
- paraDensity := w.calculateChineseDensity(paragraph)
- paraJsCount := w.jsKeywordCount(paragraph)
- chineseCount := 0
- for _, char := range paragraph {
- if char >= 0x4e00 && char <= 0x9fa5 {
- chineseCount++
- }
- }
-
- // 严格的过滤规则
- if len(paragraph) > 25 && // 足够长的段落
- !strings.Contains(paragraph, "{") &&
- !strings.Contains(paragraph, "}") &&
- !strings.Contains(paragraph, "function") &&
- !strings.Contains(paragraph, "var") &&
- !strings.Contains(paragraph, "window.") &&
- !strings.Contains(paragraph, "WX_BJ_REPORT") &&
- !strings.Contains(paragraph, "BadJs") &&
- chineseCount > 15 && // 至少15个中文字符
- paraDensity > 0.4 && // 中文密度大于40%
- paraJsCount < 3 { // JavaScript关键词少于3个
- validParagraphs = append(validParagraphs, paragraph)
+ // 【修改】如果提取的段落很少,说明可能是提取阶段的问题,直接返回
+ if len(textContent) <= 3 {
+ fmt.Printf(" [调试] 提取的段落太少(%d个),可能提取逻辑有问题,跳过过滤\n", len(textContent))
+ // 简单过滤:只去掉纯标题行和过短的内容
+ for _, text := range textContent {
+ text = strings.TrimSpace(text)
+ // 去掉明显的JavaScript关键词行
+ if len(text) > 5 &&
+ !strings.Contains(text, "function(") &&
+ !strings.Contains(text, "window.") &&
+ !strings.Contains(text, "var ") {
+ validParagraphs = append(validParagraphs, text)
}
}
+ return validParagraphs
}
- // 3. 如果没有找到有效的段落,尝试使用更宽松的规则
- if len(validParagraphs) == 0 {
- // 直接检查整个文本
- overallDensity := w.calculateChineseDensity(text)
- overallJsCount := w.jsKeywordCount(text)
- overallChineseCount := 0
+ // 【修改】降低过滤标准,参考Python版本
+ for _, text := range textContent {
+ // 基础清理
+ text = strings.TrimSpace(text)
+
+ // 计算中文字符数
+ chineseCount := 0
for _, char := range text {
if char >= 0x4e00 && char <= 0x9fa5 {
- overallChineseCount++
+ chineseCount++
}
}
- // 宽松条件:如果中文密度很高且JavaScript关键词较少
- if overallDensity > 0.6 && overallJsCount < 5 && overallChineseCount > 100 {
+ // 计算中文密度
+ paraDensity := w.calculateChineseDensity(text)
+ paraJsCount := w.jsKeywordCount(text)
+
+ // 【大幅降低门槛】:
+ // - 长度 > 10(原来25)
+ // - 中文字符 > 3(原来15)
+ // - 中文密度 > 0.15(原来0.4)
+ // - JavaScript关键词 < 5(原来3)
+ if len(text) > 10 &&
+ !strings.Contains(text, "function(") &&
+ !strings.Contains(text, "window.") &&
+ !strings.Contains(text, "WX_BJ_REPORT") &&
+ !strings.Contains(text, "BadJs") &&
+ chineseCount > 3 &&
+ paraDensity > 0.15 &&
+ paraJsCount < 5 {
validParagraphs = append(validParagraphs, text)
}
}
- return strings.Join(validParagraphs, "\n\n")
+ // 【新增】如果过滤后还是空的,使用最宽松的规则
+ if len(validParagraphs) == 0 && len(textContent) > 0 {
+ fmt.Printf(" [调试] 标准过滤后仍为空,使用最宽松规则\n")
+ for _, text := range textContent {
+ text = strings.TrimSpace(text)
+ // 只要有中文字符且不是明显的JS代码就保留
+ overallDensity := w.calculateChineseDensity(text)
+ overallJsCount := w.jsKeywordCount(text)
+
+ if len(text) > 5 && overallDensity > 0.1 && overallJsCount < 10 {
+ validParagraphs = append(validParagraphs, text)
+ }
+ }
+ }
+
+ return validParagraphs
}
// jsKeywordCount 计算文本中JavaScript关键词的数量 - 增强版
@@ -1270,8 +1418,16 @@ func (w *WechatCrawler) GetArticleList() ([][]string, error) {
}
// 检查是否还有更多文章
- mFlag, ok := result["m_flag"].(float64)
- if !ok || mFlag == 0 {
+ mFlag, ok := result["m_flag"].(int)
+ if !ok {
+ // 尝试转换为float64(JSON反序列化可能将数字解析为float64)
+ if mFlagFloat, ok := result["m_flag"].(float64); ok {
+ mFlag = int(mFlagFloat)
+ } else {
+ mFlag = 0
+ }
+ }
+ if mFlag == 0 {
break
}
@@ -1309,12 +1465,72 @@ func (w *WechatCrawler) SaveArticleListToExcel(officialPath string, articleList
filePath := fmt.Sprintf("%s/文章列表(article_list)_直连链接.txt", officialPath)
var content strings.Builder
+ // 添加 UTF-8 BOM 头,确保 Excel 正确识别编码
+ content.WriteString("\xEF\xBB\xBF")
+
// 写入标题行
content.WriteString("序号,创建时间,标题,链接\n")
// 写入文章列表
for i, article := range articleList {
- content.WriteString(fmt.Sprintf("%d,%s,%s,%s\n", i+1, article[1], article[2], article[3]))
+ if len(article) < 4 {
+ continue // 跳过不完整的数据
+ }
+
+ // 转换时间戳为可读格式(如果是时间戳)
+ createTime := article[1]
+
+ // 调试输出:查看原始时间戳
+ if i == 0 { // 只打印第一篇文章,避免输出过多
+ fmt.Printf("调试信息 - 第1篇文章\n")
+ fmt.Printf(" article[0]: '%s'\n", article[0])
+ fmt.Printf(" article[1] (时间戳): '%s'\n", article[1])
+ fmt.Printf(" article[2] (标题): '%s'\n", article[2])
+ fmt.Printf(" 时间戳长度: %d\n", len(article[1]))
+ }
+
+ if createTime != "" && createTime != "0" {
+ // 尝试将字符串转换为时间戳
+ var ts int64
+ n, err := fmt.Sscanf(createTime, "%d", &ts)
+ if i == 0 {
+ fmt.Printf(" Sscanf 结果: n=%d, err=%v, ts=%d\n", n, err, ts)
+ }
+ if err == nil && n == 1 && ts > 0 {
+ // 转换为可读的日期时间格式
+ createTime = time.Unix(ts, 0).Format("2006-01-02 15:04:05")
+ if i == 0 {
+ fmt.Printf(" 转换后的时间: %s\n", createTime)
+ }
+ } else {
+ // 如果转换失败,保留原始值
+ if i == 0 {
+ fmt.Printf(" 转换失败,保留原始值: %s\n", createTime)
+ }
+ }
+ } else {
+ if i == 0 {
+ fmt.Printf(" 时间戳为空或为0,设置为'未知时间'\n")
+ }
+ createTime = "未知时间"
+ }
+
+ // 清理和转义标题(移除换行符、制表符等)
+ title := strings.TrimSpace(article[2])
+ title = strings.ReplaceAll(title, "\n", " ")
+ title = strings.ReplaceAll(title, "\r", " ")
+ title = strings.ReplaceAll(title, "\t", " ")
+
+ // 如果标题包含逗号或引号,需要用双引号包裹并转义内部引号
+ if strings.Contains(title, ",") || strings.Contains(title, "\"") || strings.Contains(title, "\n") {
+ title = "\"" + strings.ReplaceAll(title, "\"", "\"\"") + "\""
+ }
+
+ // 清理链接
+ link := strings.TrimSpace(article[3])
+
+ // 写入CSV行
+ content.WriteString(fmt.Sprintf("%d,%s,%s,%s\n", i+1, createTime, title, link))
}
// 写入文件
@@ -1324,6 +1540,7 @@ func (w *WechatCrawler) SaveArticleListToExcel(officialPath string, articleList
}
fmt.Printf("文章列表已保存到: %s\n", filePath)
+ fmt.Printf("共保存 %d 篇文章\n", len(articleList))
return nil
}
@@ -1357,9 +1574,27 @@ func (w *WechatCrawler) GetArticleDetail(link string) (*ArticleDetail, error) {
return nil, err
}
+ // 【调试】保存原始HTML到文件,用于分析内容提取问题
+ debugPath := "./debug_article_raw.html"
+ if err := os.WriteFile(debugPath, []byte(content), 0644); err == nil {
+ fmt.Printf(" [调试] 原始HTML已保存: %s (长度: %d 字节)\n", debugPath, len(content))
+ }
+
// 提取文章信息
createTime, title, commentID, reqID, _, textContent := w.ExtractArticleInfo(content)
+ // 【调试】输出内容提取详情
+ fmt.Printf(" [调试] 提取结果 - 标题: %s, 段落数: %d\n", title, len(textContent))
+ if len(textContent) > 0 {
+ firstPara := textContent[0]
+ if len(firstPara) > 100 {
+ firstPara = firstPara[:100] + "..."
+ }
+ fmt.Printf(" [调试] 第一段: %s\n", firstPara)
+ } else {
+ fmt.Printf(" [调试] ⚠️ ExtractArticleInfo 未提取到任何内容!\n")
+ }
+
// 提取公众号名称
accountName := w.ExtractOfficialAccountName(content)
@@ -1428,8 +1663,19 @@ func (w *WechatCrawler) GetDetailList(articleList [][]string, officialPath strin
continue
}
- // 保存文章详情 - 确保使用文章标题作为文件名
- filePath := fmt.Sprintf("%s/%s_文章详情.txt", officialPath, detail.Title)
+ // 保存文章详情 - 确保使用文章标题作为文件名,并清理非法字符
+ // 清理标题中的非法字符
+ cleanTitle := detail.Title
+ invalidChars := []string{"\\", "/", ":", "*", "?", "\"", "<", ">", "|"}
+ for _, char := range invalidChars {
+ cleanTitle = strings.ReplaceAll(cleanTitle, char, "_")
+ }
+ // 限制文件名长度,避免路径过长
+ if len(cleanTitle) > 100 {
+ cleanTitle = cleanTitle[:100]
+ }
+
+ filePath := fmt.Sprintf("%s/%s_文章详情.txt", officialPath, cleanTitle)
if err := w.SaveArticleDetailToExcel(detail, filePath); err != nil {
fmt.Printf("保存文章详情失败: %v\n", err)
errorCount++
@@ -1465,9 +1711,18 @@ func (w *WechatCrawler) GetDetailList(articleList [][]string, officialPath strin
// SaveArticleDetailToExcel 保存文章详情到Excel
func (c *WechatCrawler) SaveArticleDetailToExcel(article *ArticleDetail, filePath string) error {
- // 简化实现,保存为文本文件
+ // 【修复】不要清理整个路径!只需要确保目录存在即可
+ // filePath 已经在调用处清理过了文件名部分
+ // 这里直接使用即可
+
var content strings.Builder
+ // 添加 UTF-8 BOM 头,确保正确显示中文
+ content.WriteString("\xEF\xBB\xBF")
+
+ content.WriteString("=")
+ content.WriteString(strings.Repeat("=", 80))
+ content.WriteString("\n")
content.WriteString(fmt.Sprintf("本地创建时间: %s\n", article.LocalTime))
content.WriteString(fmt.Sprintf("文章发布时间: %s\n", article.CreateTime))
content.WriteString(fmt.Sprintf("公众号名称: %s\n", article.OfficialName))
@@ -1477,15 +1732,57 @@ func (c *WechatCrawler) SaveArticleDetailToExcel(article *ArticleDetail, filePat
content.WriteString(fmt.Sprintf("点赞数: %s\n", article.LikeCount))
content.WriteString(fmt.Sprintf("转发数: %s\n", article.ShareCount))
content.WriteString(fmt.Sprintf("在看数: %s\n", article.ShowRead))
- content.WriteString("\n文章内容:\n")
+ content.WriteString(strings.Repeat("=", 80))
+ content.WriteString("\n\n")
- for _, line := range article.Content {
- content.WriteString(line)
- content.WriteString("\n")
+ content.WriteString("文章内容:\n")
+ content.WriteString(strings.Repeat("-", 80))
+ content.WriteString("\n")
+
+ for i, line := range article.Content {
+ // 清理内容,移除多余的空白字符
+ cleanLine := strings.TrimSpace(line)
+ if cleanLine != "" {
+ content.WriteString(cleanLine)
+ content.WriteString("\n")
+
+ // 每个段落后添加空行,提高可读性
+ if i < len(article.Content)-1 {
+ content.WriteString("\n")
+ }
+ }
}
+ // 如果有评论,添加评论区
+ if len(article.Comments) > 0 {
+ content.WriteString("\n")
+ content.WriteString(strings.Repeat("=", 80))
+ content.WriteString("\n")
+ content.WriteString(fmt.Sprintf("评论区 (共 %d 条评论):\n", len(article.Comments)))
+ content.WriteString(strings.Repeat("-", 80))
+ content.WriteString("\n\n")
+
+ for i, comment := range article.Comments {
+ content.WriteString(fmt.Sprintf("%d. %s", i+1, comment))
+ if i < len(article.CommentLikes) && article.CommentLikes[i] != "" {
+ content.WriteString(fmt.Sprintf(" (点赞: %s)", article.CommentLikes[i]))
+ }
+ content.WriteString("\n\n")
+ }
+ }
+
+ content.WriteString("\n")
+ content.WriteString(strings.Repeat("=", 80))
+ content.WriteString("\n")
+ content.WriteString("文件结束\n")
+
// 写入文件
- return os.WriteFile(filePath, []byte(content.String()), 0644)
+ err := os.WriteFile(filePath, []byte(content.String()), 0644)
+ if err != nil {
+ return fmt.Errorf("保存文章详情失败: %v", err)
+ }
+
+ return nil
}
// GetListArticleFromFile 根据公众号名称或文章链接,从文件中读取文章列表并下载内容
@@ -1495,14 +1792,14 @@ func (w *WechatCrawler) GetListArticleFromFile(nameLink string, imgSaveFlag bool
if strings.Contains(nameLink, "http") {
fmt.Println("检测到输入为链接,开始获取公众号名称")
// 从文章链接获取公众号信息
- _, err := w.GetOfficialAccountLinkFromArticle(nameLink)
+ content, err := w.GetOneArticle(nameLink)
if err != nil {
- return fmt.Errorf("获取公众号信息失败: %v", err)
+ return fmt.Errorf("获取文章内容失败: %v", err)
}
- // 获取公众号名称
- nickname, err = w.GetOfficialAccountName()
- if err != nil {
- return fmt.Errorf("获取公众号名称失败: %v", err)
+ // 从内容中提取公众号名称
+ nickname = w.ExtractOfficialAccountName(content)
+ if nickname == "" {
+ return fmt.Errorf("无法从文章中提取公众号名称")
}
fmt.Printf("获取到公众号名称: %s\n", nickname)
} else {
@@ -1512,8 +1809,9 @@ func (w *WechatCrawler) GetListArticleFromFile(nameLink string, imgSaveFlag bool
// 2. 构建文件路径
rootPath := "./data/"
- officialNamesHead := "公众号----"
- officialPath := rootPath + officialNamesHead + nickname
+ officialPath := rootPath + nickname
+ // 【新增】创建"文章详细"子目录
+ articleDetailPath := officialPath + "/文章详细"
articleListPath := officialPath + "/文章列表(article_list)_直连链接.txt"
// 3. 检查文件是否存在
@@ -1529,47 +1827,140 @@ func (w *WechatCrawler) GetListArticleFromFile(nameLink string, imgSaveFlag bool
lines := strings.Split(string(fileContent), "\n")
var articleLinks []string
+ var articleTitles []string
+ var articleTimes []string
- // 跳过标题行,提取链接
+ // 跳过BOM头和标题行,提取链接
for i, line := range lines {
if i == 0 || line == "" {
continue
}
- parts := strings.Split(line, ",")
+ // 移除可能的BOM头
+ line = strings.TrimPrefix(line, "\xEF\xBB\xBF")
+ line = strings.TrimSpace(line)
+ if line == "" {
+ continue
+ }
+
+ // 解析CSV行(处理带引号的字段)
+ var parts []string
+ inQuote := false
+ currentPart := ""
+ for _, char := range line {
+ if char == '"' {
+ inQuote = !inQuote
+ } else if char == ',' && !inQuote {
+ parts = append(parts, currentPart)
+ currentPart = ""
+ } else {
+ currentPart += string(char)
+ }
+ }
+ parts = append(parts, currentPart) // 添加最后一个字段
+
if len(parts) >= 4 {
- link := parts[3]
- // 清理链接中的引号
- link = strings.TrimSpace(link)
+ // 序号,创建时间,标题,链接
+ time := strings.TrimSpace(parts[1])
+ title := strings.TrimSpace(parts[2])
+ link := strings.TrimSpace(parts[3])
+ // 清理引号
link = strings.Trim(link, "\"")
- articleLinks = append(articleLinks, link)
+ title = strings.Trim(title, "\"")
+
+ if link != "" && link != "链接" { // 跳过标题行
+ articleLinks = append(articleLinks, link)
+ articleTitles = append(articleTitles, title)
+ articleTimes = append(articleTimes, time)
+ }
}
}
- fmt.Printf("成功读取到%d篇文章链接\n", len(articleLinks))
+ fmt.Printf("成功读取到 %d 篇文章链接\n", len(articleLinks))
+ if len(articleLinks) == 0 {
+ return fmt.Errorf("未能从文件中提取到有效的文章链接")
+ }
// 5. 遍历下载每篇文章
successCount := 0
errorCount := 0
+ errorLinks := [][]string{} // 保存失败的文章信息
+
+ // 【新增】确保"文章详细"目录存在
+ if err := os.MkdirAll(articleDetailPath, 0755); err != nil {
+ return fmt.Errorf("创建文章详细目录失败: %v", err)
+ }
+ fmt.Printf("文章详细将保存到: %s\n", articleDetailPath)
for i, link := range articleLinks {
- fmt.Printf("正在处理第%d篇文章,链接: %s\n", i+1, link)
+ title := ""
+ if i < len(articleTitles) {
+ title = articleTitles[i]
+ }
+ creatTime := ""
+ if i < len(articleTimes) {
+ creatTime = articleTimes[i]
+ }
+
+ fmt.Printf("\n正在处理第 %d/%d 篇文章\n", i+1, len(articleLinks))
+ fmt.Printf("标题: %s\n", title)
+ fmt.Printf("链接: %s\n", link)
// 获取文章详情
detail, err := w.GetArticleDetail(link)
if err != nil {
- fmt.Printf("获取文章详情失败: %v\n", err)
+ fmt.Printf("❌ 获取文章详情失败: %v\n", err)
errorCount++
+ // 记录失败的文章
+ errorLinks = append(errorLinks, []string{
+ fmt.Sprintf("%d", i+1),
+ creatTime,
+ title,
+ link,
+ })
continue
}
// 保存文章内容
if contentSaveFlag {
- filePath := fmt.Sprintf("%s/%s_文章详情.txt", officialPath, detail.Title)
+ // 清理标题中的非法字符
+ cleanTitle := detail.Title
+ invalidChars := []string{"\\", "/", ":", "*", "?", "\"", "<", ">", "|"}
+ for _, char := range invalidChars {
+ cleanTitle = strings.ReplaceAll(cleanTitle, char, "_")
+ }
+ // 限制文件名长度
+ if len(cleanTitle) > 100 {
+ cleanTitle = cleanTitle[:100]
+ }
+
+ // 【修改】生成文件路径,保存到"文章详细"子目录中
+ filePath := fmt.Sprintf("%s/%s_文章详情.txt", articleDetailPath, cleanTitle)
+
+ // 调试:打印文件保存路径和内容长度
+ fmt.Printf(" 保存路径: %s\n", filePath)
+ fmt.Printf(" 内容段落数: %d\n", len(detail.Content))
+ if len(detail.Content) > 0 {
+ previewLen := 50
+ if len(detail.Content[0]) < previewLen {
+ previewLen = len(detail.Content[0])
+ }
+ fmt.Printf(" 第一段内容预览: %s...\n", detail.Content[0][:previewLen])
+ } else {
+ fmt.Printf(" ⚠️ 警告:文章内容为空!\n")
+ }
+
if err := w.SaveArticleDetailToExcel(detail, filePath); err != nil {
- fmt.Printf("保存文章详情失败: %v\n", err)
+ fmt.Printf("❌ 保存文章详情失败: %v\n", err)
errorCount++
+ errorLinks = append(errorLinks, []string{
+ fmt.Sprintf("%d", i+1),
+ creatTime,
+ title,
+ link,
+ })
continue
}
+ fmt.Printf("✅ 文章保存成功: %s\n", detail.Title)
}
// TODO: 保存图片功能(如果需要)
@@ -1578,12 +1969,44 @@ func (w *WechatCrawler) GetListArticleFromFile(nameLink string, imgSaveFlag bool
}
successCount++
- fmt.Printf("第%d篇文章处理成功: %s\n", i+1, detail.Title)
// 添加延迟,避免被封
- time.Sleep(3 * time.Second)
+ if i < len(articleLinks)-1 { // 不是最后一篇
+ delayTime := 3 + i/10 // 基础延迟3秒,每10篇增加1秒
+ fmt.Printf("为预防被封禁,延时 %d 秒...\n", delayTime)
+ time.Sleep(time.Duration(delayTime) * time.Second)
+ }
}
- fmt.Printf("文章列表处理完成: 成功%d篇, 失败%d篇\n", successCount, errorCount)
+ // 6. 保存失败的文章链接
+ if len(errorLinks) > 0 {
+ errorPath := officialPath + "/问题链接(error_links).txt"
+ var errorContent strings.Builder
+ // 添加 BOM 头
+ errorContent.WriteString("\xEF\xBB\xBF")
+ errorContent.WriteString("序号,创建时间,标题,链接\n")
+ for _, errorLink := range errorLinks {
+ // 处理标题中的逗号和引号
+ title := errorLink[2]
+ if strings.Contains(title, ",") || strings.Contains(title, "\"") {
+ title = "\"" + strings.ReplaceAll(title, "\"", "\"\"") + "\""
+ }
+ errorContent.WriteString(fmt.Sprintf("%s,%s,%s,%s\n",
+ errorLink[0], errorLink[1], title, errorLink[3]))
+ }
+ err := os.WriteFile(errorPath, []byte(errorContent.String()), 0644)
+ if err != nil {
+ fmt.Printf("⚠️ 保存错误链接失败: %v\n", err)
+ } else {
+ fmt.Printf("\n已保存失败的文章链接到: %s\n", errorPath)
+ }
+ }
+
+ fmt.Printf("\n" + strings.Repeat("=", 60) + "\n")
+ fmt.Printf("文章列表处理完成!\n")
+ fmt.Printf(" 成功: %d 篇\n", successCount)
+ fmt.Printf(" 失败: %d 篇\n", errorCount)
+ fmt.Printf(" 总计: %d 篇\n", len(articleLinks))
+ fmt.Printf(strings.Repeat("=", 60) + "\n")
return nil
}
diff --git a/backend/run.bat b/backend/run.bat
deleted file mode 100644
index 6715ae3..0000000
--- a/backend/run.bat
+++ /dev/null
@@ -1,48 +0,0 @@
-@echo off
-
-echo WeChat Public Article Crawler Startup Script
-echo =================================
-
-REM Check if cookie.txt file exists
-if not exist "cookie.txt" (
- echo Error: cookie.txt file not found!
- echo Please create cookie.txt file in backend directory and add WeChat public platform cookie information.
- echo.
- echo cookie.txt format example:
- echo __biz=xxx; uin=xxx; key=xxx; pass_ticket=xxx;
- echo.
- pause
- exit /b 1
-)
-
-REM Set Go environment variables (if needed)
-REM set GOPATH=%USERPROFILE%\go
-REM set GOROOT=C:\Go
-REM set PATH=%PATH%;%GOROOT%\bin;%GOPATH%\bin
-
-echo Downloading dependencies...
-go mod tidy
-if %errorlevel% neq 0 (
- echo Failed to download dependencies!
- pause
- exit /b 1
-)
-
-echo Compiling program...
-go build -o output\wechat-crawler.exe cmd\main.go
-if %errorlevel% neq 0 (
- echo Compilation failed!
- pause
- exit /b 1
-)
-
-echo Compilation successful! Starting program...
-echo.
-
-REM Ensure data directory exists
-if not exist "data" mkdir data
-
-REM Run the program
-output\wechat-crawler.exe
-
-pause
\ No newline at end of file
diff --git a/backend/run_article_link.bat b/backend/run_article_link.bat
deleted file mode 100644
index b356bf5..0000000
--- a/backend/run_article_link.bat
+++ /dev/null
@@ -1,57 +0,0 @@
-@echo off
-
-rem WeChat Official Account Article Crawler - Script for crawling via article link
-setlocal enabledelayedexpansion
-
-REM 检查是否有命令行参数传入
-if "%1" neq "" (
- REM 如果有参数,直接将其作为文章链接传入程序
- echo.
- echo Compiling and running...
- go run "cmd/main.go" "%1"
-
- if errorlevel 1 (
- echo.
- echo Failed to run, please check error messages above
- pause
- exit /b 1
- )
-
- echo.
- echo Crawling completed successfully!
- pause
- exit /b 0
-) else (
- REM 如果没有参数,运行交互式模式
- :input_loop
- cls
- echo ========================================
- echo WeChat Official Account Article Crawler
- echo ========================================
- echo.
- echo Please enter WeChat article link:
- echo Example: https://mp.weixin.qq.com/s/4r_LKJu0mOeUc70ZZXK9LA
- set /p ARTICLE_LINK=
-
- if "%ARTICLE_LINK%"=="" (
- echo.
- echo Error: Article link cannot be empty!
- pause
- goto input_loop
- )
-
- echo.
- echo Compiling and running...
- go run "cmd/main.go" "%ARTICLE_LINK%"
-
- if errorlevel 1 (
- echo.
- echo Failed to run, please check error messages above
- pause
- exit /b 1
- )
-
- echo.
- echo Crawling completed successfully!
- pause
-)
\ No newline at end of file
diff --git a/backend/tools/view_db.bat b/backend/tools/view_db.bat
new file mode 100644
index 0000000..482bed2
--- /dev/null
+++ b/backend/tools/view_db.bat
@@ -0,0 +1,21 @@
+@echo off
+chcp 65001 >nul
+cls
+
+echo ===============================================
+echo 📊 数据库内容查看工具
+echo ===============================================
+echo.
+
+cd /d "%~dp0"
+
+echo 正在查询数据库...
+echo.
+
+go run view_db.go
+
+echo.
+echo ===============================================
+echo 查询完成!
+echo ===============================================
+pause
diff --git a/backend/tools/view_db.go b/backend/tools/view_db.go
new file mode 100644
index 0000000..d0e4f19
--- /dev/null
+++ b/backend/tools/view_db.go
@@ -0,0 +1,231 @@
+package main
+
+import (
+ "database/sql"
+ "encoding/json"
+ "fmt"
+ "log"
+
+ _ "modernc.org/sqlite"
+)
+
+func main() {
+ // 打开数据库
+ db, err := sql.Open("sqlite", "../../data/wechat_articles.db")
+ if err != nil {
+ log.Fatal("打开数据库失败:", err)
+ }
+ defer db.Close()
+
+ fmt.Println("=" + repeatStr("=", 80))
+ fmt.Println("📊 微信公众号文章数据库内容查看")
+ fmt.Println("=" + repeatStr("=", 80))
+
+ // 查询公众号
+ fmt.Println("\n📢 【公众号列表】")
+ fmt.Println(repeatStr("-", 80))
+ queryOfficialAccounts(db)
+
+ // 查询文章
+ fmt.Println("\n📝 【文章列表】")
+ fmt.Println(repeatStr("-", 80))
+ queryArticles(db)
+
+ // 查询文章内容
+ fmt.Println("\n📄 【文章详细内容】")
+ fmt.Println(repeatStr("-", 80))
+ queryArticleContents(db)
+
+ fmt.Println("\n" + repeatStr("=", 80))
+}
+
+func queryOfficialAccounts(db *sql.DB) {
+ rows, err := db.Query(`
+ SELECT id, biz, nickname, homepage, description, created_at, updated_at
+ FROM official_accounts
+ ORDER BY id
+ `)
+ if err != nil {
+ log.Printf("查询公众号失败: %v\n", err)
+ return
+ }
+ defer rows.Close()
+
+ count := 0
+ for rows.Next() {
+ var id int
+ var biz, nickname, homepage, description, createdAt, updatedAt string
+ err := rows.Scan(&id, &biz, &nickname, &homepage, &description, &createdAt, &updatedAt)
+ if err != nil {
+ log.Printf("读取数据失败: %v\n", err)
+ continue
+ }
+ count++
+
+ fmt.Printf("\n🔹 公众号 #%d\n", id)
+ fmt.Printf(" 名称: %s\n", nickname)
+ fmt.Printf(" BIZ: %s\n", biz)
+ fmt.Printf(" 主页: %s\n", homepage)
+ fmt.Printf(" 简介: %s\n", description)
+ fmt.Printf(" 创建时间: %s\n", createdAt)
+ fmt.Printf(" 更新时间: %s\n", updatedAt)
+ }
+
+ if count == 0 {
+ fmt.Println(" 暂无数据")
+ } else {
+ fmt.Printf("\n总计: %d 个公众号\n", count)
+ }
+}
+
+func queryArticles(db *sql.DB) {
+ rows, err := db.Query(`
+ SELECT a.id, a.official_id, a.title, a.author, a.link, a.publish_time,
+ a.read_num, a.like_num, a.share_num, a.paragraph_count,
+ a.content_preview, a.created_at, oa.nickname
+ FROM articles a
+ LEFT JOIN official_accounts oa ON a.official_id = oa.id
+ ORDER BY a.id
+ `)
+ if err != nil {
+ log.Printf("查询文章失败: %v\n", err)
+ return
+ }
+ defer rows.Close()
+
+ count := 0
+ for rows.Next() {
+ var id, officialID, readNum, likeNum, shareNum, paragraphCount int
+ var title, author, link, publishTime, contentPreview, createdAt, officialName sql.NullString
+ err := rows.Scan(&id, &officialID, &title, &author, &link, &publishTime,
+ &readNum, &likeNum, &shareNum, ¶graphCount, &contentPreview, &createdAt, &officialName)
+ if err != nil {
+ log.Printf("读取数据失败: %v\n", err)
+ continue
+ }
+ count++
+
+ fmt.Printf("\n🔹 文章 #%d\n", id)
+ fmt.Printf(" 标题: %s\n", getStringValue(title))
+ if officialName.Valid {
+ fmt.Printf(" 公众号: %s\n", officialName.String)
+ }
+ fmt.Printf(" 作者: %s\n", getStringValue(author))
+ fmt.Printf(" 链接: %s\n", getStringValue(link))
+ fmt.Printf(" 发布时间: %s\n", getStringValue(publishTime))
+ fmt.Printf(" 阅读数: %d | 点赞数: %d | 分享数: %d\n", readNum, likeNum, shareNum)
+ fmt.Printf(" 段落数: %d\n", paragraphCount)
+ if contentPreview.Valid && contentPreview.String != "" {
+ preview := contentPreview.String
+ if len(preview) > 100 {
+ preview = preview[:100] + "..."
+ }
+ fmt.Printf(" 内容预览: %s\n", preview)
+ }
+ fmt.Printf(" 抓取时间: %s\n", getStringValue(createdAt))
+ }
+
+ if count == 0 {
+ fmt.Println(" 暂无数据")
+ } else {
+ fmt.Printf("\n总计: %d 篇文章\n", count)
+ }
+}
+
+func queryArticleContents(db *sql.DB) {
+ rows, err := db.Query(`
+ SELECT ac.id, ac.article_id, ac.html_content, ac.text_content,
+ ac.paragraphs, ac.images, ac.created_at, a.title
+ FROM article_contents ac
+ LEFT JOIN articles a ON ac.article_id = a.id
+ ORDER BY ac.id
+ `)
+ if err != nil {
+ log.Printf("查询文章内容失败: %v\n", err)
+ return
+ }
+ defer rows.Close()
+
+ count := 0
+ for rows.Next() {
+ var id, articleID int
+ var htmlContent, textContent, paragraphs, images, createdAt, title sql.NullString
+ err := rows.Scan(&id, &articleID, &htmlContent, &textContent,
+ ¶graphs, &images, &createdAt, &title)
+ if err != nil {
+ log.Printf("读取数据失败: %v\n", err)
+ continue
+ }
+ count++
+
+ fmt.Printf("\n🔹 内容 #%d (文章ID: %d)\n", id, articleID)
+ if title.Valid {
+ fmt.Printf(" 文章标题: %s\n", title.String)
+ }
+
+ // HTML内容长度
+ htmlLen := 0
+ if htmlContent.Valid {
+ htmlLen = len(htmlContent.String)
+ }
+ fmt.Printf(" HTML内容长度: %d 字符\n", htmlLen)
+
+ // 文本内容
+ if textContent.Valid && textContent.String != "" {
+ text := textContent.String
+ if len(text) > 200 {
+ text = text[:200] + "..."
+ }
+ fmt.Printf(" 文本内容: %s\n", text)
+ }
+
+ // 段落信息
+ if paragraphs.Valid && paragraphs.String != "" {
+ var paragraphList []interface{}
+ if err := json.Unmarshal([]byte(paragraphs.String), ¶graphList); err == nil {
+ fmt.Printf(" 段落数量: %d\n", len(paragraphList))
+ }
+ }
+
+ // 图片信息
+ if images.Valid && images.String != "" {
+ var imageList []interface{}
+ if err := json.Unmarshal([]byte(images.String), &imageList); err == nil {
+ fmt.Printf(" 图片数量: %d\n", len(imageList))
+ if len(imageList) > 0 {
+ fmt.Printf(" 图片URL:\n")
+ for i, img := range imageList {
+ if i >= 3 {
+ fmt.Printf(" ... 还有 %d 张图片\n", len(imageList)-3)
+ break
+ }
+ fmt.Printf(" %d. %v\n", i+1, img)
+ }
+ }
+ }
+ }
+
+ fmt.Printf(" 存储时间: %s\n", getStringValue(createdAt))
+ }
+
+ if count == 0 {
+ fmt.Println(" 暂无数据")
+ } else {
+ fmt.Printf("\n总计: %d 条详细内容\n", count)
+ }
+}
+
+func getStringValue(s sql.NullString) string {
+ if s.Valid {
+ return s.String
+ }
+ return ""
+}
+
+func repeatStr(s string, n int) string {
+ result := ""
+ for i := 0; i < n; i++ {
+ result += s
+ }
+ return result
+}
diff --git a/启动Web系统.bat b/启动Web系统.bat
new file mode 100644
index 0000000..f4b7eec
--- /dev/null
+++ b/启动Web系统.bat
@@ -0,0 +1,49 @@
+@echo off
+chcp 65001 >nul
+cls
+
+echo ===============================================
+echo 🚀 微信公众号文章爬虫 - Web系统启动器
+echo ===============================================
+echo.
+echo 正在启动系统,请稍候...
+echo.
+
+:: 启动API服务器(后台运行)
+echo [1/2] 启动 API 服务器...
+cd backend\api
+start "微信爬虫-API服务器" cmd /c "start_api.bat"
+cd ..\..
+timeout /t 2 /nobreak >nul
+
+:: 启动前端服务器
+echo [2/2] 启动 前端服务器...
+cd frontend
+start "微信爬虫-前端服务器" cmd /c "start_web.bat"
+cd ..
+
+echo.
+echo ===============================================
+echo ✅ 系统启动完成!
+echo ===============================================
+echo.
+echo 📝 重要提示:
+echo.
+echo 1️⃣ API服务器: http://localhost:8080
+echo - 提供后端接口服务
+echo - 窗口标题: "微信爬虫-API服务器"
+echo.
+echo 2️⃣ 前端界面: http://localhost:8000
+echo - Web操作界面
+echo - 窗口标题: "微信爬虫-前端服务器"
+echo.
+echo ⚠️ 请不要关闭这两个窗口!
+echo.
+echo 💡 使用说明:
+echo - 浏览器会自动打开前端界面
+echo - 如未自动打开,请手动访问 http://localhost:8000
+echo - 使用完毕后,关闭两个服务器窗口即可
+echo.
+echo ===============================================
+
+pause
+
+