Files
baijiahao_data_crawl/add_account_from_cookie.py

262 lines
10 KiB
Python
Raw Permalink Normal View History

2025-12-25 11:16:59 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
从Cookie字符串添加百家号账号到captured_account_cookies.json
"""
import sys
import os
import json
import requests
import urllib3
from datetime import datetime
from typing import Dict, Optional
# 设置UTF-8编码
if sys.platform == 'win32':
import io
if not isinstance(sys.stdout, io.TextIOWrapper) or sys.stdout.encoding != 'utf-8':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
# 禁用SSL警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class AccountAdder:
"""从Cookie字符串添加百家号账号"""
def __init__(self):
self.base_url = "https://baijiahao.baidu.com"
self.cookies_file = os.path.join(os.path.dirname(__file__), "captured_account_cookies.json")
# 初始化session
self.session = requests.Session()
self.session.verify = False
# 禁用代理
os.environ['HTTP_PROXY'] = ''
os.environ['HTTPS_PROXY'] = ''
self.session.trust_env = False
def parse_cookie_string(self, cookie_string: str) -> Dict[str, str]:
"""
解析Cookie字符串为字典
格式: "name1: value1\nname2: value2" "name1=value1; name2=value2"
"""
cookies = {}
# 检测格式
if '\n' in cookie_string:
# 多行格式 (name: value)
for line in cookie_string.strip().split('\n'):
line = line.strip()
if not line or ':' not in line:
continue
name, value = line.split(':', 1)
name = name.strip()
value = value.strip()
if name and value:
cookies[name] = value
else:
# 单行格式 (name=value; name=value)
for item in cookie_string.split(';'):
item = item.strip()
if not item or '=' not in item:
continue
name, value = item.split('=', 1)
name = name.strip()
value = value.strip()
if name and value:
cookies[name] = value
return cookies
def get_account_info(self, cookies: Dict[str, str]) -> Optional[Dict]:
"""
调用百家号API获取账号信息
Args:
cookies: Cookie字典
Returns:
账号信息字典失败返回None
"""
api_url = f"{self.base_url}/builder/app/appinfo"
# 获取token
token = cookies.get('bjhStoken') or cookies.get('devStoken')
if not token:
print("[X] Cookie中未找到 bjhStoken 或 devStoken")
return None
# 设置Cookie
for name, value in cookies.items():
self.session.cookies.set(name, value, domain='.baijiahao.baidu.com')
# 请求头
headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'zh-CN',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Referer': f'{self.base_url}/builder/rc/incomecenter',
'token': token,
}
try:
print("正在请求百家号API...")
response = self.session.get(api_url, headers=headers, timeout=15)
if response.status_code != 200:
print(f"[X] HTTP错误: {response.status_code}")
return None
data = response.json()
if data.get('errno') != 0:
print(f"[X] API错误: {data.get('errmsg', 'Unknown error')}")
return None
return data.get('data', {})
except Exception as e:
print(f"[X] 请求失败: {e}")
return None
def save_account(self, account_id: str, cookies: Dict[str, str], account_info: Dict):
"""
保存账号到captured_account_cookies.json
Args:
account_id: 账号ID使用昵称
cookies: Cookie字典
account_info: API返回的账号信息
"""
# 加载现有数据
existing_data = {}
if os.path.exists(self.cookies_file):
try:
with open(self.cookies_file, 'r', encoding='utf-8') as f:
existing_data = json.load(f)
except:
pass
# 提取用户信息
user_info = account_info.get('user', {})
# 构建账号数据
account_data = {
'cookies': cookies,
'capture_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'capture_domain': 'baijiahao.baidu.com',
'nick': user_info.get('name', ''),
'username': user_info.get('username', ''),
'app_id': str(user_info.get('shoubai_c_appid', '')),
'user_id': user_info.get('userid', 0),
'level': user_info.get('status_cn', ''),
'domain': user_info.get('domain', ''),
'wishes': user_info.get('wishes', ''),
'media_type': user_info.get('media_type', ''),
}
# 添加到数据中
existing_data[account_id] = account_data
# 保存
with open(self.cookies_file, 'w', encoding='utf-8') as f:
json.dump(existing_data, f, ensure_ascii=False, indent=2)
print(f"\n{'='*70}")
print(f"✅ 账号已保存到: {self.cookies_file}")
print(f"{'='*70}")
print(f"账号ID: {account_id}")
print(f"昵称: {account_data['nick']}")
print(f"用户名: {account_data['username']}")
print(f"App ID: {account_data['app_id']}")
print(f"等级: {account_data['level']}")
print(f"领域: {account_data['domain']}")
print(f"{'='*70}")
def add_from_cookie_string(self, cookie_string: str):
"""
从Cookie字符串添加账号
Args:
cookie_string: Cookie字符串多行或单行格式
"""
print("\n" + "="*70)
print("从Cookie字符串添加百家号账号")
print("="*70)
# 解析Cookie
print("\n[1/3] 解析Cookie...")
cookies = self.parse_cookie_string(cookie_string)
if not cookies:
print("[X] Cookie解析失败或为空")
return
print(f"[OK] 解析到 {len(cookies)} 个Cookie字段")
# 获取账号信息
print("\n[2/3] 获取账号信息...")
account_info = self.get_account_info(cookies)
if not account_info:
print("[X] 获取账号信息失败")
return
user_info = account_info.get('user', {})
nick = user_info.get('name', '')
if not nick:
print("[X] 未获取到账号昵称")
return
print(f"[OK] 账号昵称: {nick}")
# 保存账号
print("\n[3/3] 保存账号...")
self.save_account(nick, cookies, account_info)
def main():
"""主函数"""
# 从用户提供的Cookie字符串多行格式
cookie_string = """__bid_n: 1921dd9d4207472857485b
BAIDUID: FFCEE557FC202E38439C05DBA29FF1E6:FG=1
BAIDUID_BFESS: FFCEE557FC202E38439C05DBA29FF1E6:FG=1
BDUSS: kpONlRBMjdGSmhiUFBtVHhDelpiQVA4VWUzSVBZRFpjNGJjVTdOaGF3ZEtYcGRuRUFBQUFBJCQAAAAAAAAAAAEAAACQSTUbeHlodHN5MDcwMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAErRb2dK0W9nV
BDUSS_BFESS: kpONlRBMjdGSmhiUFBtVHhDelpiQVA4VWUzSVBZRFpjNGJjVTdOaGF3ZEtYcGRuRUFBQUFBJCQAAAAAAAAAAAEAAACQSTUbeHlodHN5MDcwMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAErRb2dK0W9nV
BMAP_SECKEY: CE4ZhQE4UpPX1cBmpwJb-k7-XDLA0FbpotCf9OB0N6w-dB1F3NrBAwRfYKx8masoan7tZ6PToMntm5YGeuMbC-gPBsFRmS82-Ej-QH3J35p8Uq1PBVJB7mVceaSvU_nQEBKpV7ZcOT2hhTN1ZxJAGSNHCiCpmpjRvwf_lsPpklA-a7GEEg0UtxWlBpKlKJMi
canary: 0
gray: 1
Hm_lvt_f7b8c775c6c8b6a716a75df506fb72df: 1763973145
HMACCOUNT: F583F3A18D207AB0
openTabIndex: tab_hot_quest
PHPSESSID: tugf8iroinn7gg3am7ar66mbn5
ppfuid: FOCoIC3q5fKa8fgJnwzbE0LGziLN3VHbX8wfShDP6RCsfXQp/69CStRUAcn/QmhIlFDxPrAc/s5tJmCocrihdwitHd04Lvs3Nfz26Zt2holplnIKVacidp8Sue4dMTyfg65BJnOFhn1HthtSiwtygiD7piS4vjG/W9dLb1VAdqPtqDLAxpW7bpPp/JX8kiJ9O0V6uxgO+hV7+7wZFfXG0MSpuMmh7GsZ4C7fF/kTgmtsKg7cD2BVkYtV0XsMM/CA7Ttd1NFiXsCukqfSTYHIe/trpsj7v6DWrflsZy3wtgg1v2iwj13daM+9aWJ5GJCQM+RpBohGNhMcqCHhVhtXpVObaDCHgWJZH3ZrTGYHmi7XJB9z3y2o8Kqxep5XBCsuFAdoiwD9zh1b2MLJsNOZceq7vLGCJ8pzPpNZKTX1qx+Y6sie37xF2/axvcK3uUG6FeLs4wygIv6m069lhsdmzZZ7tuGtBLDDMzV+lM5wt9EAWUYGK/6vri1YTNyEqMxOxcqfdReixTVTfT+miI3ZV5eQE96jz5eP/gEigLYjtZnrOQVr9TB3lK8L3WS99/Zr9ng7DJNA0zsRL0eZGEKF1aDRInbESzVqJcCK3XpGJOV/zZ6wkf5f+PnYbtHcSvBB4lPdCgO/rhHbvTb7w1sYiN/Vk5/GFQKmYmpXiN4dJodGacqAlmwr3VeGGt0kpd2xu3emEg6nEYu6lQuvYr6/UJpAq7e+CnVRC2DzwICP6cu9A5mNm34ZPuoRV+zY3FkhMPIogSdUXhmgZA4QFYm2nZRUXnImSLdnO0vkZt5mrEfhJVPd8EPM2uXc7kAJ+KxEdRGSluqM4FuAgHCvdnqfGnnbe3vsHq3LuF7pombT65cVprejPaivGVaWugm+VA1kVl5OE/aBXOg67P9UlCyJKUMx+vPzRp0n8nh4OMgiK7FDj6TU6gjn6waQU/cc8vmCehgMoOcklInO4L607ifk1fHEk0MlL4OWxuq7efgjrm6OWL/etDxmTUyOZM6zP36ROo4zi6mydDohar/N5lMi0wKAelCG6GviKnveGie7OgmF8kmo9RJfTW+IP3sZd/DuR/5Hpqzck8ARex050OxYQCAmf5kxWsQsjulfPwmSj1e/e5IMej8LeSYUutq68RilSDcVuoSozomZEeqTBGFIlGsX4zI6hgqbqSpGh7XVwmTNGzN5i8XJcI9K4nmjKg6CgI8pcTx9fvdY5nSYCUXWZw==
RECENT_LOGIN: 0
SECKEY_ABVK: iDCiYriSi0nTLZWqv9z4m/pEfspPi57AnV+upmT3CwI%3D
theme: bjh
XFI: 8480c060-da4e-11f0-96c9-6b58b9418e3a
XFT: aSlEXXLLXKfFbEX9TDZU7JaLHsTV7z09fGA1Gx/vsjI=
XFCS: 20D138665DC36EE9C19F30649B914E49D13C2470B4126B585A55C28FFC038102
devStoken: 03312832700ecb67835a6b15801bf5c935c219b9be615a086be979af01d07f84
bjhStoken: a271d73cfab43b446a9deb491c69606e35c219b9be615a086be979af01d07f84
ab_sr: 1.0.1_ZjY4MjdmNTc3NTc2OTExNjBkZWRhMTUyZWI2N2IzZTkwMmVkOTRjOTI3ZjNlMjRiMmFjMjhjNDZkN2YwMzdiOTJiZDVjNmYxYjlkZjM1ZWI3OGE5ZDViYzVhMzgzNDc4ZjBiNzViZWJmZmUzMTY3MDdjZmNlZjA2ZTg0NWU5NGRhMzljZDU5NDliNjcxYjk5NzZhMGY0YmUyZjI2OGM2ODRmNTM1NzEzNGU4Yjc3Yzc4YjJiMjQyMzgzNjExNmVi
RT: "z=1&dm=baidu.com&si=1c50c134-5366-4b01-829c-2c5b4a442d8e&ss=mj88vfl9&sl=12&tt=ww1&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf"
Hm_lpvt_f7b8c775c6c8b6a716a75df506fb72df: 1765871353"""
adder = AccountAdder()
adder.add_from_cookie_string(cookie_string)
if __name__ == '__main__':
main()