Files
ai_keyword_baidu/youlai.py

70 lines
6.9 KiB
Python
Raw Normal View History

2025-12-08 15:44:38 +08:00
import requests
from bs4 import BeautifulSoup
cookies = {
'PSTM': '1764302604',
'BAIDUID': '17E56B6A4915D5B98222C8D7A7CFF059:FG=1',
'delPer': '0',
'PSINO': '3',
'BAIDUID_BFESS': '17E56B6A4915D5B98222C8D7A7CFF059:FG=1',
'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
'BA_HECTOR': 'ala10k2421a0ag25a5a40524a10l8n1kii7of24',
'BIDUPSID': 'C047CB4D757AC8632D7B5792A4254C89',
'ZFY': 'hLpeh2:BHPDeKfEN3yuM7C:A7dmFl03pP:AkeekLlPw5J4:C',
'H_PS_PSSID': '63140_64007_65866_66117_66218_66194_66236_66243_66168_66362_66281_66264_66393_66395_66479_66510_66529_66553_66589_66590_66602_66614_66647_66679_66692_66695_66687_66711',
'H_WISE_SIDS': '63140_64007_65866_66117_66218_66194_66236_66243_66168_66362_66281_66264_66393_66395_66479_66510_66529_66553_66589_66590_66602_66614_66647_66679_66692_66695_66687_66711',
'Hmery-Time': '1179046931',
'ppfuid': 'FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnsqkZri/4OJbm7r4CyJIowGEimjy3MrXEpSuItnI4KD1ziJfZNxo8yAiRbKjVuLfyDFMkrGDsyg5dNhHl7/LyWn+snuJUZLcdRLoqHtKxcMxJsVwXkGdF24AsEQ3K5XBbh9EHAWDOg2T1ejpq0s2eFy9ar/j566XqWDobGoNNfmfpaEhZpob9le2b5QIEdiQez0E9SVndeXkd9EampG0PcXhLZ126CPFCIEuj/nWa+RCvURLVm4bpFOvBv8e58/dDOXSxFWocn8LvXoXRLp3fo5/fbkvdNVYtfeSgcfqcJz7tVNIB9KqyLqpAKG/6PN5nHZFO3SaB4GS7zlBrG2cLm8lTRl19JYcYcqvy3P/50mxpWDwUUC4pvKOF9e+pwNq7l6HzKEZyCMUDd+W6AiaksYiu+4AAz72OnMQfgAyNUbW3IyzL5c+UBht87WUigOY9alcIuR+n1gwn+Dmf3unATYGtv0zKmAog3Ny9wFYiQ/gdKSrR9D25HSwrLQyIe5QKTkKSlY6nVev8MhaT3AUPwNqYIvWCQZXWkhuuU0ZXLMYAKJSeHY7mTrwwSSKC3ZaJ+A4OPRiYBTUqCc5JsBepONKqWGBPjd/5ec+gZP1XIjqkyA7LjmymBvzpMPSbNaTB91rSPWb2eeCt263/A+EJVR/A8+3BQ92SIDoXabq8Wb8ZGN9BAsC9g5OdjE6lhwzTadptHqT7mZN901gDzA4lMYEG/kekC+0J5/N5yVy+ei7UKhQHejRjxCO2+98Bn9oZ+yRR2B/06x2clYZDIZOXZ+pcZgcVUJxrUXTkrJcTxF9d7jPkj5hbBFquQKM4S+tDJ34jmplOTrqqKT7PPVfrdgd4OkK13pEy86BsJ8M0gKXgtivUgM8Bjl1m/pkg0SuDyntWLdrmMxcZYvgySvSSwQ2Qtm8EkKHIMyR/XgfHnpX5vadGpRMro2qaE8u+x8w18LAHUcIuTizQDTMQvVpJH1qeHlbtIIUXAPQK2AKm25kN9e++uG7KATaiQSHPJR40Dj0k/dKJ/JpAnSBWWUb3OOODKY6cFi7NgWAK6Jc1G4bJvJ++n215hSNvqitQjGGwIBF37aBhyiPWPAOeYXBqA',
'H_WISE_SIDS_BFESS': '63140_64007_65866_66117_66218_66194_66236_66243_66168_66362_66281_66264_66393_66395_66479_66510_66529_66553_66589_66590_66602_66614_66647_66679_66692_66695_66687_66711',
'__bsi': '11490570190430627224_00_21_R_R_50_0303_c02f_Y',
'ab_sr': '1.0.1_ZjZlMjkzNzE2YzY5Y2EyNTkzMTI2NzBlYTUzZDc1NzcxYTllODFiNDIwNTVkMmY5YjIxZDhkMTljNWU2NDFhNzhjMDZkYTBhMGIzNWQ2MzUxNzFkOTE2MDAzMDRiODVlMDgzY2Q0Y2NkNzI5ZTEzMDdmNTA3YTkzOWJhNWMzY2Q5YWY3Y2I1ZGI1ODNlZWJkYzNiOWUzMjBlYjJmNjY4Yg==',
}
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
'sec-ch-ua': '"Chromium";v="142", "Google Chrome";v="142", "Not_A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
# 'Cookie': 'PSTM=1764302604; BAIDUID=17E56B6A4915D5B98222C8D7A7CFF059:FG=1; delPer=0; PSINO=3; BAIDUID_BFESS=17E56B6A4915D5B98222C8D7A7CFF059:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BA_HECTOR=ala10k2421a0ag25a5a40524a10l8n1kii7of24; BIDUPSID=C047CB4D757AC8632D7B5792A4254C89; ZFY=hLpeh2:BHPDeKfEN3yuM7C:A7dmFl03pP:AkeekLlPw5J4:C; H_PS_PSSID=63140_64007_65866_66117_66218_66194_66236_66243_66168_66362_66281_66264_66393_66395_66479_66510_66529_66553_66589_66590_66602_66614_66647_66679_66692_66695_66687_66711; H_WISE_SIDS=63140_64007_65866_66117_66218_66194_66236_66243_66168_66362_66281_66264_66393_66395_66479_66510_66529_66553_66589_66590_66602_66614_66647_66679_66692_66695_66687_66711; Hmery-Time=1179046931; ppfuid=FOCoIC3q5fKa8fgJnwzbE67EJ49BGJeplOzf+4l4EOvDuu2RXBRv6R3A1AZMa49I27C0gDDLrJyxcIIeAeEhD8JYsoLTpBiaCXhLqvzbzmvy3SeAW17tKgNq/Xx+RgOdb8TWCFe62MVrDTY6lMf2GrfqL8c87KLF2qFER3obJGnsqkZri/4OJbm7r4CyJIowGEimjy3MrXEpSuItnI4KD1ziJfZNxo8yAiRbKjVuLfyDFMkrGDsyg5dNhHl7/LyWn+snuJUZLcdRLoqHtKxcMxJsVwXkGdF24AsEQ3K5XBbh9EHAWDOg2T1ejpq0s2eFy9ar/j566XqWDobGoNNfmfpaEhZpob9le2b5QIEdiQez0E9SVndeXkd9EampG0PcXhLZ126CPFCIEuj/nWa+RCvURLVm4bpFOvBv8e58/dDOXSxFWocn8LvXoXRLp3fo5/fbkvdNVYtfeSgcfqcJz7tVNIB9KqyLqpAKG/6PN5nHZFO3SaB4GS7zlBrG2cLm8lTRl19JYcYcqvy3P/50mxpWDwUUC4pvKOF9e+pwNq7l6HzKEZyCMUDd+W6AiaksYiu+4AAz72OnMQfgAyNUbW3IyzL5c+UBht87WUigOY9alcIuR+n1gwn+Dmf3unATYGtv0zKmAog3Ny9wFYiQ/gdKSrR9D25HSwrLQyIe5QKTkKSlY6nVev8MhaT3AUPwNqYIvWCQZXWkhuuU0ZXLMYAKJSeHY7mTrwwSSKC3ZaJ+A4OPRiYBTUqCc5JsBepONKqWGBPjd/5ec+gZP1XIjqkyA7LjmymBvzpMPSbNaTB91rSPWb2eeCt263/A+EJVR/A8+3BQ92SIDoXabq8Wb8ZGN9BAsC9g5OdjE6lhwzTadptHqT7mZN901gDzA4lMYEG/kekC+0J5/N5yVy+ei7UKhQHejRjxCO2+98Bn9oZ+yRR2B/06x2clYZDIZOXZ+pcZgcVUJxrUXTkrJcTxF9d7jPkj5hbBFquQKM4S+tDJ34jmplOTrqqKT7PPVfrdgd4OkK13pEy86BsJ8M0gKXgtivUgM8Bjl1m/pkg0SuDyntWLdrmMxcZYvgySvSSwQ2Qtm8EkKHIMyR/XgfHnpX5vadGpRMro2qaE8u+x8w18LAHUcIuTizQDTMQvVpJH1qeHlbtIIUXAPQK2AKm25kN9e++uG7KATaiQSHPJR40Dj0k/dKJ/JpAnSBWWUb3OOODKY6cFi7NgWAK6Jc1G4bJvJ++n215hSNvqitQjGGwIBF37aBhyiPWPAOeYXBqA; H_WISE_SIDS_BFESS=63140_64007_65866_66117_66218_66194_66236_66243_66168_66362_66281_66264_66393_66395_66479_66510_66529_66553_66589_66590_66602_66614_66647_66679_66692_66695_66687_66711; __bsi=11490570190430627224_00_21_R_R_50_0303_c02f_Y; ab_sr=1.0.1_ZjZlMjkzNzE2YzY5Y2EyNTkzMTI2NzBlYTUzZDc1NzcxYTllODFiNDIwNTVkMmY5YjIxZDhkMTljNWU2NDFhNzhjMDZkYTBhMGIzNWQ2MzUxNzFkOTE2MDAzMDRiODVlMDgzY2Q0Y2NkNzI5ZTEzMDdmNTA3YTkzOWJhNWMzY2Q5YWY3Y2I1ZGI1ODNlZWJkYzNiOWUzMjBlYjJmNjY4Yg==',
}
response = requests.get('https://m.baidu.com/bh/m/detail/ar_15450602323263592250', cookies=cookies, headers=headers)
print(response.text)
# 新增:解析标题和大家还在搜
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# 抓取标题
title = ''
title_div = soup.find('div', class_='index_articleTitleContainer__cbWVZ index_pcTitleWrapper__VPR8_')
if title_div:
h1 = title_div.find('h1', class_='index_articleTitle__g5diF')
if h1:
title = h1.get_text(strip=True)
# 抓取大家还在搜
remc_list = []
remc_title = soup.find('div', class_='index_remcTitle__U4959')
if remc_title:
remc_list_div = remc_title.find_next('div', class_='index_remcList__YtEhr')
if remc_list_div:
for item in remc_list_div.find_all('div', class_='index_remcItem__V2OJg'):
text_div = item.find('div', class_='index_remcItemText__oJJRv')
if text_div:
remc_list.append(text_div.get_text(strip=True))
# 保存到文件
with open('youlai_result.txt', 'w', encoding='utf-8') as f:
f.write('标题:\n')
f.write(title + '\n')
f.write('\n大家还在搜:\n')
for item in remc_list:
f.write(item + '\n')