Initial commit: 百家号文章采集系统
This commit is contained in:
23
test_html.py
Normal file
23
test_html.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from app import BaijiahaoScraper
|
||||
|
||||
app_id = "1700253559210167"
|
||||
|
||||
print(f"测试app_id: {app_id}\n")
|
||||
|
||||
uk, cookies = BaijiahaoScraper.get_uk_from_app_id(app_id)
|
||||
print(f"UK: {uk}\n")
|
||||
|
||||
scraper = BaijiahaoScraper(uk, cookies)
|
||||
|
||||
# 测试HTML解析方式
|
||||
print("使用HTML解析方式:")
|
||||
articles = scraper.get_articles_from_html(app_id=app_id)
|
||||
|
||||
if articles:
|
||||
print(f"\n成功! 获取到 {len(articles)} 篇文章")
|
||||
print("\n前3篇:")
|
||||
for i, article in enumerate(articles[:3], 1):
|
||||
print(f"{i}. {article['标题']}")
|
||||
print(f" {article['链接'][:80]}...")
|
||||
else:
|
||||
print("未获取到文章")
|
||||
Reference in New Issue
Block a user