607 lines
21 KiB
Python
607 lines
21 KiB
Python
"""
|
||
小红书笔记发布脚本
|
||
提供Cookie、文案(标题、内容、标签、图片)完成发布操作
|
||
支持本地图片路径和网络URL图片
|
||
"""
|
||
import sys
|
||
import json
|
||
import asyncio
|
||
import io
|
||
import os
|
||
import re
|
||
import aiohttp
|
||
import hashlib
|
||
import unicodedata
|
||
from typing import List, Dict, Any, Union
|
||
from pathlib import Path
|
||
from xhs_login import XHSLoginService
|
||
|
||
|
||
class XHSPublishService:
|
||
"""小红书笔记发布服务"""
|
||
|
||
def __init__(self, cookies: Union[List[Dict[str, Any]], Dict[str, str]], proxy: str | None = None, user_agent: str | None = None):
|
||
"""
|
||
初始化发布服务
|
||
|
||
Args:
|
||
cookies: Cookie数据,支持两种格式:
|
||
1. Playwright格式(列表): [{"name": "a1", "value": "xxx", "domain": "...", ...}]
|
||
2. 键值对格式(字典): {"a1": "xxx", "webId": "yyy", ...}
|
||
proxy: 可选的代理地址(例如 http://user:pass@ip:port)
|
||
user_agent: 可选的自定义User-Agent
|
||
"""
|
||
# 转换Cookie格式
|
||
self.cookies = self._normalize_cookies(cookies)
|
||
self.proxy = proxy
|
||
self.user_agent = user_agent
|
||
self.service = XHSLoginService()
|
||
self.temp_dir = "temp_downloads" # 临时下载目录
|
||
self.downloaded_files = [] # 记录下载的文件,用于清理
|
||
|
||
def _normalize_cookies(self, cookies: Union[List[Dict[str, Any]], Dict[str, str]]) -> List[Dict[str, Any]]:
|
||
"""
|
||
将Cookie标准化为Playwright格式
|
||
|
||
Args:
|
||
cookies: 输入的Cookie(支持两种格式)
|
||
|
||
Returns:
|
||
Playwright格式的Cookie列表
|
||
"""
|
||
# 如果已经是列表格式(Playwright格式)
|
||
if isinstance(cookies, list):
|
||
# 检查是否包含必要字段
|
||
if cookies and 'name' in cookies[0] and 'value' in cookies[0]:
|
||
print("✅ 使用 Playwright 格式的 Cookie", file=sys.stderr)
|
||
return cookies
|
||
|
||
# 如果是字典格式(键值对格式),转换为Playwright格式
|
||
if isinstance(cookies, dict):
|
||
print("✅ 检测到键值对格式的 Cookie,转换为 Playwright 格式", file=sys.stderr)
|
||
playwright_cookies = []
|
||
for name, value in cookies.items():
|
||
cookie = {
|
||
"name": name,
|
||
"value": str(value),
|
||
"domain": ".xiaohongshu.com",
|
||
"path": "/",
|
||
"expires": -1, # 会话Cookie
|
||
"httpOnly": False,
|
||
"secure": False,
|
||
"sameSite": "Lax"
|
||
}
|
||
|
||
# 特殊处理某些Cookie的属性
|
||
if name == "web_session":
|
||
cookie["httpOnly"] = True
|
||
cookie["secure"] = True
|
||
elif name in ["acw_tc"]:
|
||
cookie["httpOnly"] = True
|
||
|
||
playwright_cookies.append(cookie)
|
||
|
||
print(f" 转换了 {len(playwright_cookies)} 个 Cookie", file=sys.stderr)
|
||
return playwright_cookies
|
||
|
||
# 如果格式不支持,抛出异常
|
||
raise ValueError(f"不支持的Cookie格式: {type(cookies)}。请使用列表或字典格式。")
|
||
|
||
def _calculate_title_width(self, title: str) -> int:
|
||
width = 0
|
||
for ch in title:
|
||
if unicodedata.east_asian_width(ch) in ("F", "W"):
|
||
width += 2
|
||
else:
|
||
width += 1
|
||
return width
|
||
|
||
def is_url(self, path: str) -> bool:
|
||
"""
|
||
判断是否为网络URL
|
||
|
||
Args:
|
||
path: 图片路径或URL
|
||
|
||
Returns:
|
||
是否为URL
|
||
"""
|
||
url_pattern = re.compile(r'^https?://', re.IGNORECASE)
|
||
return bool(url_pattern.match(path))
|
||
|
||
async def download_image(self, url: str, index: int = 0) -> str:
|
||
"""
|
||
下载网络图片到本地临时目录
|
||
|
||
Args:
|
||
url: 图片URL
|
||
index: 图片索引(用于命名)
|
||
|
||
Returns:
|
||
本地文件路径
|
||
"""
|
||
try:
|
||
print(f" 正在下载图片 [{index + 1}]: {url}", file=sys.stderr)
|
||
|
||
# 创建临时目录
|
||
Path(self.temp_dir).mkdir(exist_ok=True)
|
||
|
||
# 生成文件名(使用URL的hash值)
|
||
url_hash = hashlib.md5(url.encode()).hexdigest()[:10]
|
||
|
||
# 从URL提取文件扩展名
|
||
ext = '.jpg' # 默认扩展名
|
||
url_path = url.split('?')[0] # 去除URL参数
|
||
if '.' in url_path:
|
||
ext = '.' + url_path.split('.')[-1].lower()
|
||
if ext not in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
|
||
ext = '.jpg'
|
||
|
||
filename = f"image_{index}_{url_hash}{ext}"
|
||
filepath = os.path.join(self.temp_dir, filename)
|
||
|
||
# 下载图片
|
||
async with aiohttp.ClientSession() as session:
|
||
async with session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
|
||
if response.status == 200:
|
||
content = await response.read()
|
||
|
||
# 保存文件
|
||
with open(filepath, 'wb') as f:
|
||
f.write(content)
|
||
|
||
# 记录已下载文件
|
||
self.downloaded_files.append(filepath)
|
||
|
||
# 获取文件大小
|
||
file_size = len(content) / 1024 # KB
|
||
print(f" ✅ 下载成功: {filename} ({file_size:.1f}KB)", file=sys.stderr)
|
||
|
||
return os.path.abspath(filepath)
|
||
else:
|
||
raise Exception(f"下载失败,HTTP状态码: {response.status}")
|
||
|
||
except asyncio.TimeoutError:
|
||
raise Exception(f"下载超时: {url}")
|
||
except Exception as e:
|
||
raise Exception(f"下载图片失败 ({url}): {str(e)}")
|
||
|
||
async def process_images(self, images: List[str]) -> List[str]:
|
||
"""
|
||
处理图片列表,将网络URL下载到本地
|
||
|
||
Args:
|
||
images: 图片路径列表(可以是本地路径或网络URL)
|
||
|
||
Returns:
|
||
本地图片路径列表
|
||
"""
|
||
if not images:
|
||
return []
|
||
|
||
local_images = []
|
||
|
||
# OSS域名前缀(用于补充不完整的图片路径)
|
||
oss_prefix = "https://bxmkb-beijing.oss-cn-beijing.aliyuncs.com/Images/"
|
||
|
||
print(f"\n正在处理 {len(images)} 张图片...", file=sys.stderr)
|
||
|
||
for i, img in enumerate(images):
|
||
# 检查是否需要补充OSS前缀
|
||
original_img = img
|
||
print(f" [调试] 处理图片 {i+1}: '{img}'", file=sys.stderr)
|
||
print(f" [调试] is_url={self.is_url(img)}, isabs={os.path.isabs(img)}", file=sys.stderr)
|
||
|
||
if not self.is_url(img) and not os.path.isabs(img):
|
||
# 不是URL也不是绝对路径,检查是否需要补充OSS前缀
|
||
print(f" [调试] 不是URL也不是绝对路径", file=sys.stderr)
|
||
# 如果路径不包含协议且不以/开头,可能是相对OSS路径
|
||
if '/' in img and not img.startswith('/'):
|
||
# 可能是OSS相对路径,补充前缀
|
||
img = oss_prefix + img
|
||
print(f" ✅ 检测到相对路径,补充OSS前缀: {original_img} -> {img}", file=sys.stderr)
|
||
else:
|
||
print(f" [调试] 不满足补充条件: '/' in img={('/' in img)}, not startswith('/')={not img.startswith('/')}", file=sys.stderr)
|
||
|
||
if self.is_url(img):
|
||
# 网络URL,需要下载
|
||
try:
|
||
local_path = await self.download_image(img, i)
|
||
local_images.append(local_path)
|
||
except Exception as e:
|
||
print(f" ⚠️ 图片下载失败: {str(e)}", file=sys.stderr)
|
||
# 继续处理其他图片
|
||
continue
|
||
else:
|
||
# 本地路径
|
||
# 先尝试直接使用,如果不存在则尝试相对路径
|
||
abs_path = None
|
||
|
||
# 1. 尝试作为绝对路径
|
||
if os.path.isabs(img) and os.path.exists(img):
|
||
abs_path = img
|
||
# 2. 尝试相对于当前工作目录
|
||
elif os.path.exists(img):
|
||
abs_path = os.path.abspath(img)
|
||
# 3. 尝试相对于 static 目录
|
||
elif os.path.exists(os.path.join('static', img)):
|
||
abs_path = os.path.abspath(os.path.join('static', img))
|
||
# 4. 尝试相对于 ../go_backend/static 目录
|
||
elif os.path.exists(os.path.join('..', 'go_backend', 'static', img)):
|
||
abs_path = os.path.abspath(os.path.join('..', 'go_backend', 'static', img))
|
||
|
||
if abs_path:
|
||
local_images.append(abs_path)
|
||
print(f" ✅ 本地图片 [{i + 1}]: {os.path.basename(abs_path)} ({abs_path})", file=sys.stderr)
|
||
else:
|
||
print(f" ⚠️ 本地图片不存在: {img}", file=sys.stderr)
|
||
|
||
print(f"\n成功处理 {len(local_images)}/{len(images)} 张图片", file=sys.stderr)
|
||
return local_images
|
||
|
||
def cleanup_temp_files(self):
|
||
"""
|
||
清理临时下载的文件
|
||
"""
|
||
if not self.downloaded_files:
|
||
return
|
||
|
||
print(f"\n清理 {len(self.downloaded_files)} 个临时文件...", file=sys.stderr)
|
||
for filepath in self.downloaded_files:
|
||
try:
|
||
if os.path.exists(filepath):
|
||
os.remove(filepath)
|
||
print(f" 已删除: {os.path.basename(filepath)}", file=sys.stderr)
|
||
except Exception as e:
|
||
print(f" 删除失败 {filepath}: {e}", file=sys.stderr)
|
||
|
||
# 清空记录
|
||
self.downloaded_files = []
|
||
|
||
async def publish(
|
||
self,
|
||
title: str,
|
||
content: str,
|
||
images: List[str] = None,
|
||
tags: List[str] = None,
|
||
cleanup: bool = True
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
发布笔记
|
||
|
||
Args:
|
||
title: 笔记标题
|
||
content: 笔记内容
|
||
images: 图片路径列表(支持本地文件路径或网络URL)
|
||
tags: 标签列表(例如:["美食", "探店"])
|
||
cleanup: 是否清理临时下载的图片文件(默认True)
|
||
|
||
Returns:
|
||
Dict containing success status, message, and publish result
|
||
"""
|
||
try:
|
||
print("\n========== 开始发布小红书笔记 ==========", file=sys.stderr)
|
||
print(f"标题: {title}", file=sys.stderr)
|
||
print(f"内容: {content[:100]}{'...' if len(content) > 100 else ''}", file=sys.stderr)
|
||
print(f"图片: {len(images) if images else 0} 张", file=sys.stderr)
|
||
print(f"标签: {tags if tags else []}", file=sys.stderr)
|
||
|
||
width = self._calculate_title_width(title)
|
||
if width > 40:
|
||
return {
|
||
"success": False,
|
||
"error": f"标题长度超过限制(当前宽度 {width},平台限制 40)"
|
||
}
|
||
|
||
if tags:
|
||
if len(tags) > 10:
|
||
tags = tags[:10]
|
||
print("⚠️ 标签数量超过10,已截取前10个标签", file=sys.stderr)
|
||
|
||
local_images = None
|
||
if images:
|
||
local_images = await self.process_images(images)
|
||
if not local_images:
|
||
print("⚠️ 警告:没有可用的图片", file=sys.stderr)
|
||
return {
|
||
"success": False,
|
||
"error": "没有可用的图片,无法发布笔记"
|
||
}
|
||
|
||
# 初始化浏览器并注入Cookie
|
||
print("\n1. 初始化浏览器...", file=sys.stderr)
|
||
await self.service.init_browser(cookies=self.cookies, proxy=self.proxy, user_agent=self.user_agent)
|
||
|
||
# 验证登录状态
|
||
print("\n2. 验证登录状态...", file=sys.stderr)
|
||
verify_result = await self.service.verify_login_status()
|
||
|
||
if not verify_result.get('logged_in'):
|
||
return {
|
||
"success": False,
|
||
"error": "Cookie已失效或未登录",
|
||
"details": verify_result
|
||
}
|
||
|
||
print("✅ 登录状态有效", file=sys.stderr)
|
||
|
||
# 发布笔记
|
||
print("\n3. 开始发布笔记...", file=sys.stderr)
|
||
result = await self.service.publish_note(
|
||
title=title,
|
||
content=content,
|
||
images=local_images,
|
||
topics=tags
|
||
)
|
||
|
||
print("\n========== 发布完成 ==========", file=sys.stderr)
|
||
return result
|
||
|
||
except Exception as e:
|
||
print(f"\n发布异常: {str(e)}", file=sys.stderr)
|
||
return {
|
||
"success": False,
|
||
"error": str(e)
|
||
}
|
||
|
||
finally:
|
||
# 关闭浏览器
|
||
await self.service.close_browser()
|
||
|
||
# 清理临时文件
|
||
if cleanup:
|
||
self.cleanup_temp_files()
|
||
|
||
|
||
async def publish_from_config(config_file: str) -> Dict[str, Any]:
|
||
"""
|
||
从配置文件读取参数并发布
|
||
|
||
Args:
|
||
config_file: JSON配置文件路径
|
||
|
||
Returns:
|
||
发布结果
|
||
"""
|
||
try:
|
||
# 读取配置文件
|
||
with open(config_file, 'r', encoding='utf-8') as f:
|
||
config = json.load(f)
|
||
|
||
# 提取参数
|
||
cookies = config.get('cookies', [])
|
||
title = config.get('title', '')
|
||
content = config.get('content', '')
|
||
images = config.get('images', [])
|
||
tags = config.get('tags', [])
|
||
proxy = config.get('proxy')
|
||
user_agent = config.get('user_agent')
|
||
|
||
# 验证必需参数
|
||
if not cookies:
|
||
return {
|
||
"success": False,
|
||
"error": "缺少Cookie参数"
|
||
}
|
||
|
||
if not title or not content:
|
||
return {
|
||
"success": False,
|
||
"error": "标题和内容不能为空"
|
||
}
|
||
|
||
# 注意:不再验证图片文件是否存在,因为可能是网络URL
|
||
# 图片验证交给 process_images 方法处理
|
||
|
||
# 创建发布服务并执行
|
||
publisher = XHSPublishService(cookies, proxy=proxy, user_agent=user_agent)
|
||
result = await publisher.publish(
|
||
title=title,
|
||
content=content,
|
||
images=images,
|
||
tags=tags
|
||
)
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
return {
|
||
"success": False,
|
||
"error": f"读取配置文件失败: {str(e)}"
|
||
}
|
||
|
||
|
||
async def publish_from_params(
|
||
cookies_json: str,
|
||
title: str,
|
||
content: str,
|
||
images_json: str = None,
|
||
tags_json: str = None
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
从命令行参数发布
|
||
|
||
Args:
|
||
cookies_json: Cookie JSON字符串 或 Cookie文件路径
|
||
title: 标题
|
||
content: 内容
|
||
images_json: 图片路径数组的JSON字符串 (可选)
|
||
tags_json: 标签数组的JSON字符串 (可选)
|
||
|
||
Returns:
|
||
发布结果
|
||
"""
|
||
try:
|
||
# 解析Cookie - 支持JSON字符串或文件路径
|
||
cookies = None
|
||
|
||
# 检查是否为文件路径
|
||
if os.path.isfile(cookies_json):
|
||
# 从文件读取
|
||
try:
|
||
with open(cookies_json, 'r', encoding='utf-8') as f:
|
||
cookies = json.load(f)
|
||
print(f"✅ 从文件加载 Cookie: {cookies_json}")
|
||
except Exception as e:
|
||
return {
|
||
"success": False,
|
||
"error": f"读取 Cookie 文件失败: {str(e)}"
|
||
}
|
||
else:
|
||
# 解析JSON字符串
|
||
try:
|
||
cookies = json.loads(cookies_json)
|
||
print("✅ 从 JSON 字符串解析 Cookie")
|
||
except json.JSONDecodeError as e:
|
||
return {
|
||
"success": False,
|
||
"error": f"Cookie 参数既不是有效文件路径,也不是有效 JSON 字符串: {str(e)}"
|
||
}
|
||
|
||
if not cookies:
|
||
return {
|
||
"success": False,
|
||
"error": "Cookie 为空"
|
||
}
|
||
|
||
# 解析图片列表
|
||
images = []
|
||
if images_json:
|
||
images = json.loads(images_json)
|
||
|
||
# 解析标签列表
|
||
tags = []
|
||
if tags_json:
|
||
tags = json.loads(tags_json)
|
||
|
||
# 创建发布服务并执行(命令行模式暂不支持传入代理和自定义UA)
|
||
publisher = XHSPublishService(cookies)
|
||
result = await publisher.publish(
|
||
title=title,
|
||
content=content,
|
||
images=images,
|
||
tags=tags
|
||
)
|
||
|
||
return result
|
||
|
||
except json.JSONDecodeError as e:
|
||
return {
|
||
"success": False,
|
||
"error": f"JSON解析失败: {str(e)}"
|
||
}
|
||
except Exception as e:
|
||
return {
|
||
"success": False,
|
||
"error": str(e)
|
||
}
|
||
|
||
|
||
def main():
|
||
"""
|
||
命令行主函数
|
||
|
||
使用方式:
|
||
1. 从配置文件发布:
|
||
python xhs_publish.py --config publish_config.json
|
||
|
||
2. 从命令行参数发布:
|
||
python xhs_publish.py --cookies '<cookies_json>' --title '标题' --content '内容' [--images '<images_json>'] [--tags '<tags_json>']
|
||
"""
|
||
# 设置标准输出为UTF-8编码
|
||
if sys.platform == 'win32':
|
||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
||
|
||
if len(sys.argv) < 2:
|
||
print(json.dumps({
|
||
"success": False,
|
||
"error": "缺少参数,请使用 --config 或 --cookies"
|
||
}, ensure_ascii=False))
|
||
sys.exit(1)
|
||
|
||
try:
|
||
# 解析命令行参数
|
||
args = sys.argv[1:]
|
||
|
||
# 方式1: 从配置文件读取
|
||
if args[0] == '--config':
|
||
if len(args) < 2:
|
||
print(json.dumps({
|
||
"success": False,
|
||
"error": "缺少配置文件路径"
|
||
}, ensure_ascii=False))
|
||
sys.exit(1)
|
||
|
||
config_file = args[1]
|
||
result = asyncio.run(publish_from_config(config_file))
|
||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||
|
||
# 方式2: 从命令行参数
|
||
elif args[0] == '--cookies':
|
||
# 解析参数
|
||
params = {}
|
||
i = 0
|
||
while i < len(args):
|
||
if args[i] == '--cookies' and i + 1 < len(args):
|
||
params['cookies'] = args[i + 1]
|
||
i += 2
|
||
elif args[i] == '--title' and i + 1 < len(args):
|
||
params['title'] = args[i + 1]
|
||
i += 2
|
||
elif args[i] == '--content' and i + 1 < len(args):
|
||
params['content'] = args[i + 1]
|
||
i += 2
|
||
elif args[i] == '--images' and i + 1 < len(args):
|
||
params['images'] = args[i + 1]
|
||
i += 2
|
||
elif args[i] == '--tags' and i + 1 < len(args):
|
||
params['tags'] = args[i + 1]
|
||
i += 2
|
||
else:
|
||
i += 1
|
||
|
||
# 验证必需参数
|
||
if 'cookies' not in params:
|
||
print(json.dumps({
|
||
"success": False,
|
||
"error": "缺少 --cookies 参数"
|
||
}, ensure_ascii=False))
|
||
sys.exit(1)
|
||
|
||
if 'title' not in params or 'content' not in params:
|
||
print(json.dumps({
|
||
"success": False,
|
||
"error": "缺少 --title 或 --content 参数"
|
||
}, ensure_ascii=False))
|
||
sys.exit(1)
|
||
|
||
result = asyncio.run(publish_from_params(
|
||
cookies_json=params['cookies'],
|
||
title=params['title'],
|
||
content=params['content'],
|
||
images_json=params.get('images'),
|
||
tags_json=params.get('tags')
|
||
))
|
||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||
|
||
else:
|
||
print(json.dumps({
|
||
"success": False,
|
||
"error": f"未知参数: {args[0]},请使用 --config 或 --cookies"
|
||
}, ensure_ascii=False))
|
||
sys.exit(1)
|
||
|
||
except Exception as e:
|
||
print(json.dumps({
|
||
"success": False,
|
||
"error": str(e)
|
||
}, ensure_ascii=False))
|
||
sys.exit(1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|
||
|