26 lines
858 B
Python
26 lines
858 B
Python
from playwright.sync_api import sync_playwright
|
|
|
|
url = 'https://m.baidu.com/bh/m/detail/ar_15450602323263592250'
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_page()
|
|
page.goto(url)
|
|
page.wait_for_timeout(3000) # 等待页面渲染
|
|
|
|
# 抓取标题
|
|
title = page.locator('div.index_articleTitleContainer__cbWVZ.index_pcTitleWrapper__VPR8_ h1.index_articleTitle__g5diF').inner_text()
|
|
|
|
# 抓取大家还在搜
|
|
remc_items = page.locator('div.index_remcList__YtEhr div.index_remcItemText__oJJRv').all_inner_texts()
|
|
|
|
# 保存到文件
|
|
with open('youlai_result.txt', 'w', encoding='utf-8') as f:
|
|
f.write('标题:\n')
|
|
f.write(title + '\n')
|
|
f.write('\n大家还在搜:\n')
|
|
for item in remc_items:
|
|
f.write(item + '\n')
|
|
|
|
browser.close()
|