Initial commit: 百家号文章采集系统

This commit is contained in:
sjk
2025-12-19 22:48:58 +08:00
commit 0d5bbb1864
37 changed files with 11774 additions and 0 deletions

23
test_html.py Normal file
View File

@@ -0,0 +1,23 @@
from app import BaijiahaoScraper
app_id = "1700253559210167"
print(f"测试app_id: {app_id}\n")
uk, cookies = BaijiahaoScraper.get_uk_from_app_id(app_id)
print(f"UK: {uk}\n")
scraper = BaijiahaoScraper(uk, cookies)
# 测试HTML解析方式
print("使用HTML解析方式:")
articles = scraper.get_articles_from_html(app_id=app_id)
if articles:
print(f"\n成功! 获取到 {len(articles)} 篇文章")
print("\n前3篇:")
for i, article in enumerate(articles[:3], 1):
print(f"{i}. {article['标题']}")
print(f" {article['链接'][:80]}...")
else:
print("未获取到文章")