init commit
This commit is contained in:
119
baidu.py
Normal file
119
baidu.py
Normal file
@@ -0,0 +1,119 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def fetch_related_words(keyword):
|
||||
cookies = {
|
||||
'PSTM': '1764302604',
|
||||
'BAIDUID': '17E56B6A4915D5B98222C8D7A7CFF059:FG=1',
|
||||
'BD_HOME': '1',
|
||||
'H_PS_PSSID': '63140_64007_65866_66117_66218_66194_66236_66243_66168_66362_66281_66264_66393_66395_66479_66510_66529_66553_66589_66590_66602_66614_66647_66679_66692_66695_66687',
|
||||
'delPer': '0',
|
||||
'BD_CK_SAM': '1',
|
||||
'PSINO': '3',
|
||||
'BAIDUID_BFESS': '17E56B6A4915D5B98222C8D7A7CFF059:FG=1',
|
||||
'PAD_BROWSER': '1',
|
||||
'BD_UPN': '12314753',
|
||||
'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
|
||||
'BA_HECTOR': 'ala10k2421a0ag25a5a40524a10l8n1kii7of24',
|
||||
'BIDUPSID': 'C047CB4D757AC8632D7B5792A4254C89',
|
||||
'ZFY': 'hLpeh2:BHPDeKfEN3yuM7C:A7dmFl03pP:AkeekLlPw5J4:C',
|
||||
'channel': 'baidusearch',
|
||||
'H_WISE_SIDS': '63140_64007_65866_66117_66218_66194_66236_66243_66168_66362_66281_66264_66393_66395_66479_66510_66529_66553_66589_66590_66602_66614_66647_66679_66692_66695_66687',
|
||||
'baikeVisitId': '7f510782-16ce-4371-ad1d-cc8c0ba5ccc8',
|
||||
'COOKIE_SESSION': '0_0_1_0_0_0_1_0_1_1_7462_1_0_0_0_0_0_0_1764302605%7C1%230_0_1764302605%7C1',
|
||||
'H_PS_645EC': 'c7554ktAJah5Z6fmLi0RDEpB3a2TvS0rgHEQ7JP12K2UeBuFhGHrlxODIbY',
|
||||
'BDSVRTM': '16',
|
||||
'WWW_ST': '1764310101036',
|
||||
}
|
||||
|
||||
from urllib.parse import quote
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||
'Connection': 'keep-alive',
|
||||
'Referer': f'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd={quote(keyword)}',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
# 'is_pbs': quote(keyword), # 中文字段已去除
|
||||
# 'is_referer': f'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd={quote(keyword)}',
|
||||
'is_xhr': '1',
|
||||
'sec-ch-ua': '"Chromium";v="142", "Google Chrome";v="142", "Not_A Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
|
||||
params = [
|
||||
('ie', 'utf-8'),
|
||||
('mod', '1'),
|
||||
('isbd', '1'),
|
||||
('isid', 'b857f27e00205440'),
|
||||
('ie', 'utf-8'),
|
||||
('f', '8'),
|
||||
('rsv_bp', '1'),
|
||||
('tn', 'baidu'),
|
||||
('wd', keyword),
|
||||
('oq', keyword),
|
||||
('rsv_pq', 'b857f27e00205440'),
|
||||
('rsv_t', 'c7554ktAJah5Z6fmLi0RDEpB3a2TvS0rgHEQ7JP12K2UeBuFhGHrlxODIbY'),
|
||||
('rqlang', 'cn'),
|
||||
('rsv_enter', '1'),
|
||||
('rsv_dl', 'tb'),
|
||||
('rsv_btype', 't'),
|
||||
('inputT', '23852'),
|
||||
('rsv_sug2', '0'),
|
||||
('rsv_sug3', '15'),
|
||||
('rsv_sug1', '23'),
|
||||
('rsv_sug7', '100'),
|
||||
('rsv_sug4', '23852'),
|
||||
('bs', keyword),
|
||||
('rsv_sid', 'undefined'),
|
||||
('_ss', '1'),
|
||||
('clist', 'ddad409c4a1855aa'),
|
||||
('hsug', ''),
|
||||
('f4s', '1'),
|
||||
('csor', '3'),
|
||||
('_cr1', '35542'),
|
||||
]
|
||||
|
||||
response = requests.get('https://www.baidu.com/s', params=params, cookies=cookies, headers=headers)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
result = []
|
||||
div = soup.find('div', class_='list_1V4Yg')
|
||||
if div:
|
||||
for a in div.find_all('a', class_='item_3WKCf'):
|
||||
spans = a.find_all('span')
|
||||
if len(spans) > 1:
|
||||
result.append(spans[1].get_text(strip=True))
|
||||
|
||||
# 相关搜索内容
|
||||
related_search = []
|
||||
rs_label = soup.find('div', class_='c-color-t rs-label_ihUhK')
|
||||
if rs_label:
|
||||
rs_table = rs_label.find_next('table', class_='rs-table_3RiQc')
|
||||
if rs_table:
|
||||
for a in rs_table.find_all('a', class_='rs-link_2DE3Q'):
|
||||
span = a.find('span', class_='rs-text_3K5mR')
|
||||
if span:
|
||||
related_search.append(span.get_text(strip=True))
|
||||
|
||||
# 保存所有内容到一个文件
|
||||
with open('baidu_result.txt', 'w', encoding='utf-8') as f:
|
||||
f.write('大家都在搜:\n')
|
||||
for item in result:
|
||||
f.write(item + '\n')
|
||||
f.write('\n相关搜索:\n')
|
||||
for item in related_search:
|
||||
f.write(item + '\n')
|
||||
|
||||
return result, related_search
|
||||
|
||||
if __name__ == '__main__':
|
||||
keyword = input('请输入关键词:')
|
||||
words, related_search = fetch_related_words(keyword)
|
||||
print("大家都在搜:", words)
|
||||
print("相关搜索:", related_search)
|
||||
print("所有内容已保存到 baidu_result.txt")
|
||||
Reference in New Issue
Block a user