This commit is contained in:
sjk
2026-01-16 22:06:46 +08:00
parent 816bf29a2a
commit 3d402639da
114 changed files with 10763 additions and 419 deletions

62
batch_insert_urls.py Normal file
View File

@@ -0,0 +1,62 @@
"""批量插入URL到数据库"""
from data_manager import DataManager
# 要插入的URL列表
urls = [
"https://health.baidu.com/m/detail/ar_1763832104063502612",
"https://health.baidu.com/m/detail/ar_3234161746463547514",
"https://health.baidu.com/m/detail/ar_2979413891570169996",
"https://health.baidu.com/m/detail/ar_2956015846029041423",
"https://health.baidu.com/m/detail/ar_168792171069657865",
"https://health.baidu.com/m/detail/ar_6465728881863076989",
"https://health.baidu.com/m/detail/ar_5239302258777444788",
"https://health.baidu.com/m/detail/ar_4713935339392349406",
"https://health.baidu.com/m/detail/ar_5279303492380349045",
"https://health.baidu.com/m/detail/ar_3049436766450657685",
"https://health.baidu.com/m/detail/ar_2014490668952387433",
]
print("=" * 60)
print("批量插入URL到数据库")
print("=" * 60)
# 创建数据管理器
dm = DataManager()
print(f"\n存储方式: {'SQLite数据库' if dm.use_database else 'JSON文件'}")
print(f"总URL数: {len(urls)}\n")
# 批量插入
success_count = 0
failed_count = 0
for idx, url in enumerate(urls, 1):
print(f"[{idx}/{len(urls)}] 插入: {url}")
result = dm.add_url(url)
if result:
success_count += 1
print(f" ✓ 成功")
else:
failed_count += 1
print(f" × 失败(可能已存在)")
# 统计结果
print("\n" + "=" * 60)
print("插入完成")
print("=" * 60)
print(f"成功: {success_count}")
print(f"失败: {failed_count}")
# 显示当前数据库统计
print("\n数据库统计:")
stats = dm.get_statistics()
for key, value in stats.items():
print(f" {key}: {value}")
# 显示所有活跃URL
print("\n活跃URL列表:")
active_urls = dm.get_active_urls()
for idx, site in enumerate(active_urls[:15], 1):
site_url = site.get('site_url', site.get('url'))
click_count = site.get('click_count', 0)
print(f" {idx}. {site_url} (点击: {click_count}次)")