新版可用
This commit is contained in:
11
backend/cmd/data/研招网资讯/文章列表(article_list)_直连链接.txt
Normal file
11
backend/cmd/data/研招网资讯/文章列表(article_list)_直连链接.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
序号,创建时间,标题,链接
|
||||
1,0,专家分析2026年考研报名人数,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500657&idx=1&sn=81eae7df4bfa2fdfc8bca69389489c52&chksm=ea981e22044aca6bbe5633849bfcd4903cb6f491646cd2ccf9321f4d9c852c64fe036f033c14&scene=27#wechat_redirect
|
||||
2,0,教育部:2026年全国硕士研究生报名人数为343万,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500650&idx=1&sn=9f230bbfefb24d98c18e42bd3651ad53&chksm=eac72972d56ff9b66f3658f0c3b1e6e363e56ddf879d56aba9c9c8f587b53ef00bcabe7992ff&scene=27#wechat_redirect
|
||||
3,0,【小研来了】“务必再坚持坚持”,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500645&idx=1&sn=8e1d5921861dc4e3647f7bf8adaada81&chksm=ea26b17ce2f7255aacd9d1d6358c9aeb8d4e043c692efb8b4d8183cfc8363b3068be79d585c2&scene=27#wechat_redirect
|
||||
4,0,学累了不?点进来看看这4个“续航”方法,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500631&idx=1&sn=b640b0e43378e368166e50a7f46735f2&chksm=ea71f10a83b7811e1896cd9704eac5d064b763f3e020b5b37c72727c55bb1b0862a92e9c4cf0&scene=27#wechat_redirect
|
||||
5,0,教育部:在“双一流”建设高校开展科技教育硕士培养,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500589&idx=1&sn=539d1229c9475ba5a2371698a362e9a7&chksm=ea4f97d3831139a276e50050f2f3307868b9c6ec7eb115bb9e288312f08572c47128a8016dce&scene=27#wechat_redirect
|
||||
6,0,“研味儿”正浓,冲刺在即!请你一定别放弃,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500584&idx=1&sn=294b6ba8d12f0948913abf04af8cb188&chksm=ea4cfb5b16684bdd12634b6e46d8d8f3ab72ca9108be0d4d7f83dfded09c6ecb9f31b1531e31&scene=27#wechat_redirect
|
||||
7,0,4个思维升级,让我找回了读研的掌控感,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500579&idx=1&sn=fa00084c8711e3009ff7e31fe0b3bc51&chksm=eaff1ec212ddbb738d20542a965bbd1b79ae3a9d2e5af5704ddcf41de3a8b8d658e562771f0c&scene=27#wechat_redirect
|
||||
8,0,研考网上确认成功后,需重点关注四件事,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500569&idx=1&sn=7707b698932ff6847de39d7351d3ac98&chksm=ea402eec6a96125a5bb02600aff24c3c1211eb5aaf5347080bbfe1f5861e9ca97fe9c400df21&scene=27#wechat_redirect
|
||||
9,0,,
|
||||
10,0,【小研来了】“小研,没有准考证照片怎么办?”,http://mp.weixin.qq.com/s?__biz=MzI3NzQzODQ5OA==&mid=2247500553&idx=1&sn=4fc6fd69684f02222e72d457c1004a81&chksm=eafc91ea346080790f9b641495fc3d9e31302ee5c2c9957eb4fa2bc9a139eda78163899b9219&scene=27#wechat_redirect
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -600,21 +601,48 @@ func parseAccessTokenParams(accessToken string) (string, string, string, string,
|
||||
if err != nil {
|
||||
return "", "", "", "", fmt.Errorf("未找到__biz参数")
|
||||
}
|
||||
// URL解码biz参数
|
||||
biz, err = url.QueryUnescape(biz)
|
||||
if err != nil {
|
||||
fmt.Printf("警告: URL解码__biz失败: %v,使用原始值\n", err)
|
||||
}
|
||||
|
||||
uin, err := utils.ExtractFromRegex(accessToken, "uin=([^&]*)")
|
||||
if err != nil {
|
||||
return "", "", "", "", fmt.Errorf("未找到uin参数")
|
||||
}
|
||||
// URL解码uin参数
|
||||
uin, err = url.QueryUnescape(uin)
|
||||
if err != nil {
|
||||
fmt.Printf("警告: URL解码uin失败: %v,使用原始值\n", err)
|
||||
}
|
||||
|
||||
key, err := utils.ExtractFromRegex(accessToken, "key=([^&]*)")
|
||||
if err != nil {
|
||||
return "", "", "", "", fmt.Errorf("未找到key参数")
|
||||
}
|
||||
// URL解码key参数
|
||||
key, err = url.QueryUnescape(key)
|
||||
if err != nil {
|
||||
fmt.Printf("警告: URL解码key失败: %v,使用原始值\n", err)
|
||||
}
|
||||
|
||||
passTicket, err := utils.ExtractFromRegex(accessToken, "pass_ticket=([^&]*)")
|
||||
if err != nil {
|
||||
return "", "", "", "", fmt.Errorf("未找到pass_ticket参数")
|
||||
}
|
||||
// URL解码pass_ticket参数
|
||||
passTicket, err = url.QueryUnescape(passTicket)
|
||||
if err != nil {
|
||||
fmt.Printf("警告: URL解码pass_ticket失败: %v,使用原始值\n", err)
|
||||
}
|
||||
|
||||
// 打印解码后的参数用于调试
|
||||
fmt.Printf("\n提取到的参数(已解码):\n")
|
||||
fmt.Printf(" __biz: %s\n", biz)
|
||||
fmt.Printf(" uin: %s\n", uin)
|
||||
fmt.Printf(" key长度: %d 字符\n", len(key))
|
||||
fmt.Printf(" pass_ticket长度: %d 字符\n", len(passTicket))
|
||||
|
||||
return biz, uin, key, passTicket, nil
|
||||
}
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/wechat-crawler/pkg/wechat"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Println("开始测试文章内容提取功能...")
|
||||
|
||||
// 创建一个简单的爬虫实例
|
||||
crawler := wechat.NewSimpleCrawler()
|
||||
|
||||
// 设置公众号名称(根据实际情况修改)
|
||||
officialAccountName := "验证"
|
||||
|
||||
// 调用GetListArticleFromFile函数测试
|
||||
err := crawler.GetListArticleFromFile(officialAccountName, false, true)
|
||||
if err != nil {
|
||||
fmt.Printf("测试失败: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Println("测试完成!请检查文章内容是否已正确提取。")
|
||||
}
|
||||
Reference in New Issue
Block a user