Files
ai_wht_wechat/backend/xhs_publish.py
2025-12-19 22:36:48 +08:00

572 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
小红书笔记发布脚本
提供Cookie、文案标题、内容、标签、图片完成发布操作
支持本地图片路径和网络URL图片
"""
import sys
import json
import asyncio
import io
import os
import re
import aiohttp
import hashlib
import unicodedata
from typing import List, Dict, Any, Union
from pathlib import Path
from xhs_login import XHSLoginService
class XHSPublishService:
"""小红书笔记发布服务"""
def __init__(self, cookies: Union[List[Dict[str, Any]], Dict[str, str]], proxy: str | None = None, user_agent: str | None = None):
"""
初始化发布服务
Args:
cookies: Cookie数据支持两种格式
1. Playwright格式列表: [{"name": "a1", "value": "xxx", "domain": "...", ...}]
2. 键值对格式(字典): {"a1": "xxx", "webId": "yyy", ...}
proxy: 可选的代理地址(例如 http://user:pass@ip:port
user_agent: 可选的自定义User-Agent
"""
# 转换Cookie格式
self.cookies = self._normalize_cookies(cookies)
self.proxy = proxy
self.user_agent = user_agent
self.service = XHSLoginService()
self.temp_dir = "temp_downloads" # 临时下载目录
self.downloaded_files = [] # 记录下载的文件,用于清理
def _normalize_cookies(self, cookies: Union[List[Dict[str, Any]], Dict[str, str]]) -> List[Dict[str, Any]]:
"""
将Cookie标准化为Playwright格式
Args:
cookies: 输入的Cookie支持两种格式
Returns:
Playwright格式的Cookie列表
"""
# 如果已经是列表格式Playwright格式
if isinstance(cookies, list):
# 检查是否包含必要字段
if cookies and 'name' in cookies[0] and 'value' in cookies[0]:
print("✅ 使用 Playwright 格式的 Cookie", file=sys.stderr)
return cookies
# 如果是字典格式键值对格式转换为Playwright格式
if isinstance(cookies, dict):
print("✅ 检测到键值对格式的 Cookie转换为 Playwright 格式", file=sys.stderr)
playwright_cookies = []
for name, value in cookies.items():
cookie = {
"name": name,
"value": str(value),
"domain": ".xiaohongshu.com",
"path": "/",
"expires": -1, # 会话Cookie
"httpOnly": False,
"secure": False,
"sameSite": "Lax"
}
# 特殊处理某些Cookie的属性
if name == "web_session":
cookie["httpOnly"] = True
cookie["secure"] = True
elif name in ["acw_tc"]:
cookie["httpOnly"] = True
playwright_cookies.append(cookie)
print(f" 转换了 {len(playwright_cookies)} 个 Cookie", file=sys.stderr)
return playwright_cookies
# 如果格式不支持,抛出异常
raise ValueError(f"不支持的Cookie格式: {type(cookies)}。请使用列表或字典格式。")
def _calculate_title_width(self, title: str) -> int:
width = 0
for ch in title:
if unicodedata.east_asian_width(ch) in ("F", "W"):
width += 2
else:
width += 1
return width
def is_url(self, path: str) -> bool:
"""
判断是否为网络URL
Args:
path: 图片路径或URL
Returns:
是否为URL
"""
url_pattern = re.compile(r'^https?://', re.IGNORECASE)
return bool(url_pattern.match(path))
async def download_image(self, url: str, index: int = 0) -> str:
"""
下载网络图片到本地临时目录
Args:
url: 图片URL
index: 图片索引(用于命名)
Returns:
本地文件路径
"""
try:
print(f" 正在下载图片 [{index + 1}]: {url}", file=sys.stderr)
# 创建临时目录
Path(self.temp_dir).mkdir(exist_ok=True)
# 生成文件名使用URL的hash值
url_hash = hashlib.md5(url.encode()).hexdigest()[:10]
# 从URL提取文件扩展名
ext = '.jpg' # 默认扩展名
url_path = url.split('?')[0] # 去除URL参数
if '.' in url_path:
ext = '.' + url_path.split('.')[-1].lower()
if ext not in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
ext = '.jpg'
filename = f"image_{index}_{url_hash}{ext}"
filepath = os.path.join(self.temp_dir, filename)
# 下载图片
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
if response.status == 200:
content = await response.read()
# 保存文件
with open(filepath, 'wb') as f:
f.write(content)
# 记录已下载文件
self.downloaded_files.append(filepath)
# 获取文件大小
file_size = len(content) / 1024 # KB
print(f" ✅ 下载成功: {filename} ({file_size:.1f}KB)", file=sys.stderr)
return os.path.abspath(filepath)
else:
raise Exception(f"下载失败HTTP状态码: {response.status}")
except asyncio.TimeoutError:
raise Exception(f"下载超时: {url}")
except Exception as e:
raise Exception(f"下载图片失败 ({url}): {str(e)}")
async def process_images(self, images: List[str]) -> List[str]:
"""
处理图片列表将网络URL下载到本地
Args:
images: 图片路径列表可以是本地路径或网络URL
Returns:
本地图片路径列表
"""
if not images:
return []
local_images = []
print(f"\n正在处理 {len(images)} 张图片...", file=sys.stderr)
for i, img in enumerate(images):
if self.is_url(img):
# 网络URL需要下载
try:
local_path = await self.download_image(img, i)
local_images.append(local_path)
except Exception as e:
print(f" ⚠️ 图片下载失败: {str(e)}", file=sys.stderr)
# 继续处理其他图片
continue
else:
# 本地路径
if os.path.exists(img):
local_images.append(os.path.abspath(img))
print(f" ✅ 本地图片 [{i + 1}]: {os.path.basename(img)}", file=sys.stderr)
else:
print(f" ⚠️ 本地图片不存在: {img}", file=sys.stderr)
print(f"\n成功处理 {len(local_images)}/{len(images)} 张图片", file=sys.stderr)
return local_images
def cleanup_temp_files(self):
"""
清理临时下载的文件
"""
if not self.downloaded_files:
return
print(f"\n清理 {len(self.downloaded_files)} 个临时文件...", file=sys.stderr)
for filepath in self.downloaded_files:
try:
if os.path.exists(filepath):
os.remove(filepath)
print(f" 已删除: {os.path.basename(filepath)}", file=sys.stderr)
except Exception as e:
print(f" 删除失败 {filepath}: {e}", file=sys.stderr)
# 清空记录
self.downloaded_files = []
async def publish(
self,
title: str,
content: str,
images: List[str] = None,
tags: List[str] = None,
cleanup: bool = True
) -> Dict[str, Any]:
"""
发布笔记
Args:
title: 笔记标题
content: 笔记内容
images: 图片路径列表支持本地文件路径或网络URL
tags: 标签列表(例如:["美食", "探店"]
cleanup: 是否清理临时下载的图片文件默认True
Returns:
Dict containing success status, message, and publish result
"""
try:
print("\n========== 开始发布小红书笔记 ==========", file=sys.stderr)
print(f"标题: {title}", file=sys.stderr)
print(f"内容: {content[:100]}{'...' if len(content) > 100 else ''}", file=sys.stderr)
print(f"图片: {len(images) if images else 0}", file=sys.stderr)
print(f"标签: {tags if tags else []}", file=sys.stderr)
width = self._calculate_title_width(title)
if width > 40:
return {
"success": False,
"error": f"标题长度超过限制(当前宽度 {width},平台限制 40"
}
if tags:
if len(tags) > 10:
tags = tags[:10]
print("⚠️ 标签数量超过10已截取前10个标签", file=sys.stderr)
local_images = None
if images:
local_images = await self.process_images(images)
if not local_images:
print("⚠️ 警告:没有可用的图片", file=sys.stderr)
return {
"success": False,
"error": "没有可用的图片,无法发布笔记"
}
# 初始化浏览器并注入Cookie
print("\n1. 初始化浏览器...", file=sys.stderr)
await self.service.init_browser(cookies=self.cookies, proxy=self.proxy, user_agent=self.user_agent)
# 验证登录状态
print("\n2. 验证登录状态...", file=sys.stderr)
verify_result = await self.service.verify_login_status()
if not verify_result.get('logged_in'):
return {
"success": False,
"error": "Cookie已失效或未登录",
"details": verify_result
}
print("✅ 登录状态有效", file=sys.stderr)
# 发布笔记
print("\n3. 开始发布笔记...", file=sys.stderr)
result = await self.service.publish_note(
title=title,
content=content,
images=local_images,
topics=tags
)
print("\n========== 发布完成 ==========", file=sys.stderr)
return result
except Exception as e:
print(f"\n发布异常: {str(e)}", file=sys.stderr)
return {
"success": False,
"error": str(e)
}
finally:
# 关闭浏览器
await self.service.close_browser()
# 清理临时文件
if cleanup:
self.cleanup_temp_files()
async def publish_from_config(config_file: str) -> Dict[str, Any]:
"""
从配置文件读取参数并发布
Args:
config_file: JSON配置文件路径
Returns:
发布结果
"""
try:
# 读取配置文件
with open(config_file, 'r', encoding='utf-8') as f:
config = json.load(f)
# 提取参数
cookies = config.get('cookies', [])
title = config.get('title', '')
content = config.get('content', '')
images = config.get('images', [])
tags = config.get('tags', [])
proxy = config.get('proxy')
user_agent = config.get('user_agent')
# 验证必需参数
if not cookies:
return {
"success": False,
"error": "缺少Cookie参数"
}
if not title or not content:
return {
"success": False,
"error": "标题和内容不能为空"
}
# 注意不再验证图片文件是否存在因为可能是网络URL
# 图片验证交给 process_images 方法处理
# 创建发布服务并执行
publisher = XHSPublishService(cookies, proxy=proxy, user_agent=user_agent)
result = await publisher.publish(
title=title,
content=content,
images=images,
tags=tags
)
return result
except Exception as e:
return {
"success": False,
"error": f"读取配置文件失败: {str(e)}"
}
async def publish_from_params(
cookies_json: str,
title: str,
content: str,
images_json: str = None,
tags_json: str = None
) -> Dict[str, Any]:
"""
从命令行参数发布
Args:
cookies_json: Cookie JSON字符串 或 Cookie文件路径
title: 标题
content: 内容
images_json: 图片路径数组的JSON字符串 (可选)
tags_json: 标签数组的JSON字符串 (可选)
Returns:
发布结果
"""
try:
# 解析Cookie - 支持JSON字符串或文件路径
cookies = None
# 检查是否为文件路径
if os.path.isfile(cookies_json):
# 从文件读取
try:
with open(cookies_json, 'r', encoding='utf-8') as f:
cookies = json.load(f)
print(f"✅ 从文件加载 Cookie: {cookies_json}")
except Exception as e:
return {
"success": False,
"error": f"读取 Cookie 文件失败: {str(e)}"
}
else:
# 解析JSON字符串
try:
cookies = json.loads(cookies_json)
print("✅ 从 JSON 字符串解析 Cookie")
except json.JSONDecodeError as e:
return {
"success": False,
"error": f"Cookie 参数既不是有效文件路径,也不是有效 JSON 字符串: {str(e)}"
}
if not cookies:
return {
"success": False,
"error": "Cookie 为空"
}
# 解析图片列表
images = []
if images_json:
images = json.loads(images_json)
# 解析标签列表
tags = []
if tags_json:
tags = json.loads(tags_json)
# 创建发布服务并执行命令行模式暂不支持传入代理和自定义UA
publisher = XHSPublishService(cookies)
result = await publisher.publish(
title=title,
content=content,
images=images,
tags=tags
)
return result
except json.JSONDecodeError as e:
return {
"success": False,
"error": f"JSON解析失败: {str(e)}"
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
def main():
"""
命令行主函数
使用方式:
1. 从配置文件发布:
python xhs_publish.py --config publish_config.json
2. 从命令行参数发布:
python xhs_publish.py --cookies '<cookies_json>' --title '标题' --content '内容' [--images '<images_json>'] [--tags '<tags_json>']
"""
# 设置标准输出为UTF-8编码
if sys.platform == 'win32':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
if len(sys.argv) < 2:
print(json.dumps({
"success": False,
"error": "缺少参数,请使用 --config 或 --cookies"
}, ensure_ascii=False))
sys.exit(1)
try:
# 解析命令行参数
args = sys.argv[1:]
# 方式1: 从配置文件读取
if args[0] == '--config':
if len(args) < 2:
print(json.dumps({
"success": False,
"error": "缺少配置文件路径"
}, ensure_ascii=False))
sys.exit(1)
config_file = args[1]
result = asyncio.run(publish_from_config(config_file))
print(json.dumps(result, ensure_ascii=False, indent=2))
# 方式2: 从命令行参数
elif args[0] == '--cookies':
# 解析参数
params = {}
i = 0
while i < len(args):
if args[i] == '--cookies' and i + 1 < len(args):
params['cookies'] = args[i + 1]
i += 2
elif args[i] == '--title' and i + 1 < len(args):
params['title'] = args[i + 1]
i += 2
elif args[i] == '--content' and i + 1 < len(args):
params['content'] = args[i + 1]
i += 2
elif args[i] == '--images' and i + 1 < len(args):
params['images'] = args[i + 1]
i += 2
elif args[i] == '--tags' and i + 1 < len(args):
params['tags'] = args[i + 1]
i += 2
else:
i += 1
# 验证必需参数
if 'cookies' not in params:
print(json.dumps({
"success": False,
"error": "缺少 --cookies 参数"
}, ensure_ascii=False))
sys.exit(1)
if 'title' not in params or 'content' not in params:
print(json.dumps({
"success": False,
"error": "缺少 --title 或 --content 参数"
}, ensure_ascii=False))
sys.exit(1)
result = asyncio.run(publish_from_params(
cookies_json=params['cookies'],
title=params['title'],
content=params['content'],
images_json=params.get('images'),
tags_json=params.get('tags')
))
print(json.dumps(result, ensure_ascii=False, indent=2))
else:
print(json.dumps({
"success": False,
"error": f"未知参数: {args[0]},请使用 --config 或 --cookies"
}, ensure_ascii=False))
sys.exit(1)
except Exception as e:
print(json.dumps({
"success": False,
"error": str(e)
}, ensure_ascii=False))
sys.exit(1)
if __name__ == "__main__":
main()