commit
100
.env.development
Normal file
@@ -0,0 +1,100 @@
|
||||
# ==============================================================================
|
||||
# 开发环境配置文件
|
||||
# ==============================================================================
|
||||
|
||||
# ---------- 环境标识 ----------
|
||||
ENV=development
|
||||
|
||||
# ---------- AdsPower浏览器配置 ----------
|
||||
# AdsPower API地址(本地默认端口50325)
|
||||
ADSPOWER_API_URL=http://127.0.0.1:50325
|
||||
|
||||
# AdsPower用户ID(登录AdsPower后台获取)
|
||||
ADSPOWER_USER_ID=user_h235l72
|
||||
|
||||
# AdsPower API密钥(可选,某些版本需要)
|
||||
ADSPOWER_API_KEY=e5afd5a4cead5589247febbeabc39bcb
|
||||
|
||||
# ---------- 服务配置 ----------
|
||||
# 服务监听地址(0.0.0.0为允许外部访问)
|
||||
SERVER_HOST=127.0.0.1
|
||||
|
||||
# 服务监听端口
|
||||
SERVER_PORT=5000
|
||||
|
||||
# ---------- 点击策略配置 ----------
|
||||
# 每个站点每日最少点击次数
|
||||
MIN_CLICK_COUNT=1
|
||||
|
||||
# 每个站点每日最多点击次数
|
||||
MAX_CLICK_COUNT=3
|
||||
|
||||
# 同一站点两次点击之间的间隔(分钟)
|
||||
CLICK_INTERVAL_MINUTES=5
|
||||
|
||||
# 不同站点任务间最小间隔(分钟)
|
||||
MIN_TASK_INTERVAL_MINUTES=1
|
||||
|
||||
# 不同站点任务间最大间隔(分钟)
|
||||
MAX_TASK_INTERVAL_MINUTES=1
|
||||
|
||||
# 最大并发执行任务数(1为串行执行)
|
||||
MAX_CONCURRENT_WORKERS=2
|
||||
|
||||
# 工作开始时间(小时,24小时制)
|
||||
WORK_START_HOUR=9
|
||||
|
||||
# 工作结束时间(小时,24小时制)
|
||||
WORK_END_HOUR=23
|
||||
|
||||
# 回复等待超时时间(秒)
|
||||
REPLY_WAIT_TIMEOUT=10
|
||||
|
||||
# ---------- 爬虫调度配置 ----------
|
||||
# 是否启用爬虫定时任务(True/False)
|
||||
CRAWLER_ENABLED=False
|
||||
|
||||
# 爬虫执行时间(HH:MM格式,24小时制)
|
||||
CRAWLER_SCHEDULE_TIME=02:00
|
||||
|
||||
# 每次爬取的任务数量
|
||||
CRAWLER_BATCH_SIZE=10
|
||||
|
||||
# ---------- 数据存储路径 ----------
|
||||
# 数据目录(开发环境与生产环境分离)
|
||||
DATA_DIR=./data_dev
|
||||
|
||||
# 日志目录(开发环境与生产环境分离)
|
||||
LOG_DIR=./logs_dev
|
||||
|
||||
# Query挖掘上传目录
|
||||
QUERY_UPLOAD_DIR=./query_upload
|
||||
|
||||
# ---------- 调试配置 ----------
|
||||
# 是否开启调试模式(True/False)
|
||||
DEBUG=True
|
||||
|
||||
# ---------- 测试配置 ----------
|
||||
# 测试完成后是否自动关闭浏览器(True/False)
|
||||
AUTO_CLOSE_BROWSER=True
|
||||
|
||||
# ---------- MySQL数据库配置 ----------
|
||||
# 数据库主机地址
|
||||
MYSQL_HOST=localhost
|
||||
|
||||
# 数据库端口
|
||||
MYSQL_PORT=3306
|
||||
|
||||
# 数据库用户名
|
||||
MYSQL_USER=root
|
||||
|
||||
# 数据库密码
|
||||
MYSQL_PASSWORD=JKjk20011115
|
||||
|
||||
# 数据库名称
|
||||
MYSQL_DATABASE=ai_article
|
||||
|
||||
|
||||
QWEN_API_KEY=sk-6d22dd845a624d9c92a821d24a50e2e8
|
||||
|
||||
QWEN_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions
|
||||
100
.env.production
Normal file
@@ -0,0 +1,100 @@
|
||||
# ==============================================================================
|
||||
# 生产环境配置文件
|
||||
# ==============================================================================
|
||||
|
||||
# ---------- 环境标识 ----------
|
||||
ENV=production
|
||||
|
||||
# ---------- AdsPower浏览器配置 ----------
|
||||
# AdsPower API地址(本地默认端口50325)
|
||||
ADSPOWER_API_URL=http://127.0.0.1:50325
|
||||
|
||||
# AdsPower用户ID(登录AdsPower后台获取)
|
||||
ADSPOWER_USER_ID=user_h23kr4w
|
||||
|
||||
# AdsPower API密钥(可选,某些版本需要)
|
||||
ADSPOWER_API_KEY=4f0329bfdfe85c48370c9970bab9d684
|
||||
|
||||
# ---------- 服务配置 ----------
|
||||
# 服务监听地址(0.0.0.0为允许外部访问)
|
||||
SERVER_HOST=0.0.0.0
|
||||
|
||||
# 服务监听端口
|
||||
SERVER_PORT=8090
|
||||
|
||||
# ---------- 点击策略配置 ----------
|
||||
# 每个站点每日最少点击次数
|
||||
MIN_CLICK_COUNT=1
|
||||
|
||||
# 每个站点每日最多点击次数
|
||||
MAX_CLICK_COUNT=3
|
||||
|
||||
# 同一站点两次点击之间的间隔(分钟)
|
||||
CLICK_INTERVAL_MINUTES=30
|
||||
|
||||
# 不同站点任务间最小间隔(分钟)
|
||||
MIN_TASK_INTERVAL_MINUTES=3
|
||||
|
||||
# 不同站点任务间最大间隔(分钟)
|
||||
MAX_TASK_INTERVAL_MINUTES=5
|
||||
|
||||
# 最大并发执行任务数(1为串行执行)
|
||||
MAX_CONCURRENT_WORKERS=3
|
||||
|
||||
# 工作开始时间(小时,24小时制)
|
||||
WORK_START_HOUR=9
|
||||
|
||||
# 工作结束时间(小时,24小时制)
|
||||
WORK_END_HOUR=21
|
||||
|
||||
# 回复等待超时时间(秒)
|
||||
REPLY_WAIT_TIMEOUT=30
|
||||
|
||||
# ---------- 爬虫调度配置 ----------
|
||||
# 是否启用爬虫定时任务(True/False)
|
||||
CRAWLER_ENABLED=False
|
||||
|
||||
# 爬虫执行时间(HH:MM格式,24小时制)
|
||||
CRAWLER_SCHEDULE_TIME=02:00
|
||||
|
||||
# 每次爬取的任务数量
|
||||
CRAWLER_BATCH_SIZE=10
|
||||
|
||||
# ---------- 数据存储路径 ----------
|
||||
# 数据目录(生产环境)
|
||||
DATA_DIR=/home/work/ai_mip/data
|
||||
|
||||
# 日志目录(生产环境)
|
||||
LOG_DIR=/home/work/ai_mip/logs
|
||||
|
||||
# Query挖掘上传目录
|
||||
QUERY_UPLOAD_DIR=/home/work/ai_mip/query_upload
|
||||
|
||||
# ---------- 调试配置 ----------
|
||||
# 是否开启调试模式(True/False)
|
||||
DEBUG=False
|
||||
|
||||
# ---------- 测试配置 ----------
|
||||
# 测试完成后是否自动关闭浏览器(True/False)
|
||||
AUTO_CLOSE_BROWSER=True
|
||||
|
||||
# ---------- MySQL数据库配置 ----------
|
||||
# 数据库主机地址
|
||||
MYSQL_HOST=8.149.233.36
|
||||
|
||||
# 数据库端口
|
||||
MYSQL_PORT=3306
|
||||
|
||||
# 数据库用户名
|
||||
MYSQL_USER=ai_article_read
|
||||
|
||||
# 数据库密码
|
||||
MYSQL_PASSWORD=7aK_H2yvokVumr84lLNDt8fDBp6P
|
||||
|
||||
# 数据库名称
|
||||
MYSQL_DATABASE=ai_article
|
||||
|
||||
# ---------- 分布式部署配置 ----------
|
||||
# 远程点击服务地址(仅Web服务需要配置)
|
||||
# 为空表示本地模式,设置URL表示远程模式
|
||||
CLICK_SERVICE_URL=http://60.205.132.82:8090
|
||||
1392
ad_automation.py
@@ -477,9 +477,10 @@ class AdsPowerClient:
|
||||
use_proxy: 是否使用代理
|
||||
|
||||
Returns:
|
||||
浏览器信息
|
||||
浏览器信息,如果使用代理会包含 proxy_id 字段
|
||||
"""
|
||||
target_user_id = user_id or self.user_id
|
||||
proxy_id = None
|
||||
|
||||
if use_proxy:
|
||||
# 1. 获取大麦IP代理
|
||||
@@ -507,10 +508,18 @@ class AdsPowerClient:
|
||||
# 3. 更新 Profile 使用新代理
|
||||
if not self.update_profile_proxy(target_user_id, proxy_id):
|
||||
logger.warning("更新 Profile 代理失败,将不使用代理启动浏览器")
|
||||
# 删除刚创建的代理
|
||||
self.delete_proxy(proxy_id)
|
||||
return self.start_browser(user_id=target_user_id)
|
||||
|
||||
# 4. 启动浏览器
|
||||
return self.start_browser(user_id=target_user_id)
|
||||
result = self.start_browser(user_id=target_user_id)
|
||||
|
||||
# 5. 如果启动成功且有代理,在返回结果中添加 proxy_id
|
||||
if result and proxy_id:
|
||||
result['proxy_id'] = proxy_id
|
||||
|
||||
return result
|
||||
|
||||
def start_browser(self, user_id: str = None) -> Optional[Dict]:
|
||||
"""
|
||||
@@ -542,7 +551,11 @@ class AdsPowerClient:
|
||||
# 准备请求体
|
||||
payload = {
|
||||
"profile_id": target_user_id,
|
||||
"launch_args": [], # 可以根据需要添加启动参数
|
||||
"launch_args": [
|
||||
"--no-sandbox", # 禁用沙箱(root用户必需)
|
||||
"--disable-setuid-sandbox", # 禁用setuid沙箱
|
||||
"--disable-dev-shm-usage" # 避免/dev/shm空间不足
|
||||
],
|
||||
"headless": "0",
|
||||
"last_opened_tabs": "1",
|
||||
"proxy_detection": "1",
|
||||
@@ -619,53 +632,31 @@ class AdsPowerClient:
|
||||
Playwright Browser 实例
|
||||
"""
|
||||
try:
|
||||
import asyncio
|
||||
import sys
|
||||
# 获取 CDP WebSocket 端点
|
||||
ws_endpoint = browser_info['data']['ws']['puppeteer']
|
||||
|
||||
# 检测是否在 asyncio 事件循环中
|
||||
# 检查是否在 asyncio 事件循环中
|
||||
import asyncio
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
logger.warning("检测到 asyncio 事件循环,将在新线程中执行 Playwright")
|
||||
|
||||
# 在新线程中执行 Playwright 同步 API
|
||||
import threading
|
||||
result_container = {'browser': None, 'error': None}
|
||||
|
||||
def run_playwright():
|
||||
try:
|
||||
# 获取 CDP WebSocket 端点
|
||||
ws_endpoint = browser_info['data']['ws']['puppeteer']
|
||||
|
||||
# 创建新的 Playwright 实例
|
||||
playwright = sync_playwright().start()
|
||||
|
||||
# 通过 CDP 连接到浏览器
|
||||
browser = playwright.chromium.connect_over_cdp(ws_endpoint)
|
||||
logger.info("成功通过 CDP 连接到 AdsPower 浏览器")
|
||||
|
||||
# 保存引用
|
||||
self.playwright = playwright
|
||||
self.browser = browser
|
||||
result_container['browser'] = browser
|
||||
except Exception as e:
|
||||
result_container['error'] = str(e)
|
||||
|
||||
thread = threading.Thread(target=run_playwright)
|
||||
thread.start()
|
||||
thread.join(timeout=30)
|
||||
|
||||
if result_container['error']:
|
||||
raise Exception(result_container['error'])
|
||||
|
||||
return result_container['browser']
|
||||
|
||||
# 如果有运行中的循环,使用线程来执行同步代码
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(self._connect_browser_sync, ws_endpoint)
|
||||
return future.result(timeout=30)
|
||||
except RuntimeError:
|
||||
# 没有运行中的事件循环,正常执行
|
||||
pass
|
||||
# 没有运行中的循环,直接执行
|
||||
return self._connect_browser_sync(ws_endpoint)
|
||||
|
||||
# 获取 CDP WebSocket 端点
|
||||
ws_endpoint = browser_info['data']['ws']['puppeteer']
|
||||
except Exception as e:
|
||||
logger.error(f"CDP 连接失败: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
def _connect_browser_sync(self, ws_endpoint: str) -> Optional[Browser]:
|
||||
"""同步执行浏览器连接"""
|
||||
try:
|
||||
# 创建新的 Playwright 实例
|
||||
playwright = sync_playwright().start()
|
||||
|
||||
@@ -678,11 +669,8 @@ class AdsPowerClient:
|
||||
self.browser = browser
|
||||
|
||||
return browser
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"CDP 连接失败: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
logger.error(f"CDP 连接失败(sync): {str(e)}")
|
||||
return None
|
||||
|
||||
def get_page(self, browser: Browser) -> Optional[Page]:
|
||||
@@ -804,6 +792,18 @@ class AdsPowerClient:
|
||||
logger.error(f"停止浏览器异常: {str(e)}")
|
||||
return False
|
||||
|
||||
def close_browser(self, profile_id: str = None) -> bool:
|
||||
"""
|
||||
关闭浏览器(stop_browser的别名)
|
||||
|
||||
Args:
|
||||
profile_id: Profile ID
|
||||
|
||||
Returns:
|
||||
是否成功关闭
|
||||
"""
|
||||
return self.stop_browser(user_id=profile_id)
|
||||
|
||||
def get_damai_proxy(self) -> Optional[Dict]:
|
||||
"""
|
||||
从大麦IP代理池获取代理
|
||||
@@ -996,6 +996,67 @@ class AdsPowerClient:
|
||||
logger.error(f"查询代理列表异常: {str(e)}")
|
||||
return None
|
||||
|
||||
def delete_proxy(self, proxy_id: str) -> bool:
|
||||
"""
|
||||
删除代理
|
||||
使用 AdsPower API v2
|
||||
|
||||
Args:
|
||||
proxy_id: 代理ID
|
||||
|
||||
Returns:
|
||||
是否成功删除
|
||||
"""
|
||||
try:
|
||||
url = f"{self.api_url}/api/v2/proxy-list/delete"
|
||||
|
||||
# 准备请求头
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||||
|
||||
# 准备请求体(数组格式)
|
||||
payload = {
|
||||
"proxy_id": [proxy_id]
|
||||
}
|
||||
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("删除 AdsPower 代理")
|
||||
logger.info("="*70)
|
||||
logger.info(f"URL: {url}")
|
||||
logger.info(f"Method: POST")
|
||||
logger.info(f"Payload: {json.dumps(payload, indent=2, ensure_ascii=False)}")
|
||||
|
||||
response = requests.post(url, json=payload, headers=headers, timeout=30)
|
||||
|
||||
logger.info("\n" + "-"*70)
|
||||
logger.info("响应信息")
|
||||
logger.info("-"*70)
|
||||
logger.info(f"Status Code: {response.status_code}")
|
||||
|
||||
try:
|
||||
response_json = response.json()
|
||||
logger.info(f"Response Body: {json.dumps(response_json, indent=2, ensure_ascii=False)}")
|
||||
except:
|
||||
logger.info(f"Response Body (Raw): {response.text}")
|
||||
|
||||
logger.info("="*70 + "\n")
|
||||
|
||||
result = response_json if 'response_json' in locals() else response.json()
|
||||
|
||||
if result.get('code') == 0:
|
||||
logger.success(f"成功删除代理,ID: {proxy_id}")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"删除代理失败: {result.get('msg')}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"删除代理异常: {str(e)}")
|
||||
return False
|
||||
|
||||
def check_browser_status(self, user_id: str = None) -> Optional[Dict]:
|
||||
"""
|
||||
检查浏览器状态
|
||||
@@ -1127,9 +1188,16 @@ class AdsPowerClient:
|
||||
if result and result.get('code') == 0:
|
||||
groups = result.get('data', {}).get('list', [])
|
||||
if groups:
|
||||
group_id = groups[0].get('group_id')
|
||||
logger.success(f"获取到分组ID: {group_id}")
|
||||
# 精确匹配分组名称(API返回的可能包含多个包含关键词的分组)
|
||||
for group in groups:
|
||||
if group.get('group_name') == group_name:
|
||||
group_id = group.get('group_id')
|
||||
logger.success(f"获取到分组ID: {group_id} (名称: {group_name})")
|
||||
return group_id
|
||||
|
||||
# 如果没有精确匹配,记录警告
|
||||
logger.warning(f"未找到精确匹配的分组 '{group_name}',返回的分组: {[g.get('group_name') for g in groups]}")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"未找到名为 '{group_name}' 的分组")
|
||||
return None
|
||||
@@ -1258,6 +1326,85 @@ class AdsPowerClient:
|
||||
logger.error(f"查询 Profile 异常: {str(e)}")
|
||||
return None
|
||||
|
||||
def create_profile(self, group_id: str, name: str = None, proxy_id: str = None) -> Optional[str]:
|
||||
"""
|
||||
创建新的 Profile
|
||||
使用 AdsPower API v2
|
||||
|
||||
Args:
|
||||
group_id: 分组ID
|
||||
name: Profile名称(可选,不填则自动生成)
|
||||
proxy_id: 代理ID(必填)
|
||||
|
||||
Returns:
|
||||
创建的 Profile ID,失败返回 None
|
||||
"""
|
||||
try:
|
||||
url = f"{self.api_url}/api/v2/browser-profile/create"
|
||||
|
||||
# 准备请求头
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
if self.api_key:
|
||||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||||
|
||||
# 准备请求体
|
||||
import time
|
||||
profile_name = name or f"auto_{int(time.time())}"
|
||||
|
||||
payload = {
|
||||
"group_id": group_id,
|
||||
"name": profile_name,
|
||||
"platform": "health.baidu.com",
|
||||
"proxyid": proxy_id,
|
||||
"fingerprint_config": {
|
||||
"automatic_timezone": "1",
|
||||
"language": ["zh-CN", "zh"],
|
||||
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("创建 Profile (API v2)")
|
||||
logger.info("="*70)
|
||||
logger.info(f"URL: {url}")
|
||||
logger.info(f"Method: POST")
|
||||
logger.info(f"Payload: {json.dumps(payload, indent=2, ensure_ascii=False)}")
|
||||
|
||||
response = requests.post(url, json=payload, headers=headers, timeout=30)
|
||||
|
||||
logger.info("\n" + "-"*70)
|
||||
logger.info("响应信息")
|
||||
logger.info("-"*70)
|
||||
logger.info(f"Status Code: {response.status_code}")
|
||||
|
||||
try:
|
||||
response_json = response.json()
|
||||
logger.info(f"Response Body: {json.dumps(response_json, indent=2, ensure_ascii=False)}")
|
||||
except:
|
||||
logger.info(f"Response Body (Raw): {response.text}")
|
||||
|
||||
logger.info("="*70 + "\n")
|
||||
|
||||
result = response_json if 'response_json' in locals() else response.json()
|
||||
|
||||
if result.get('code') == 0:
|
||||
profile_id = result.get('data', {}).get('profile_id')
|
||||
if profile_id:
|
||||
logger.success(f"成功创建 Profile,ID: {profile_id}")
|
||||
return profile_id
|
||||
else:
|
||||
logger.error("创建 Profile 成功但未返回ID")
|
||||
return None
|
||||
else:
|
||||
logger.error(f"创建 Profile 失败: {result.get('msg')}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"创建 Profile 异常: {str(e)}")
|
||||
return None
|
||||
|
||||
def delete_profile(self, profile_id: str) -> bool:
|
||||
"""
|
||||
删除 Profile
|
||||
|
||||
17
config.py
@@ -48,6 +48,7 @@ class BaseConfig:
|
||||
CLICK_INTERVAL_MINUTES = int(os.getenv('CLICK_INTERVAL_MINUTES', 30)) # 点击间隔(分钟)
|
||||
MIN_TASK_INTERVAL_MINUTES = int(os.getenv('MIN_TASK_INTERVAL_MINUTES', 3)) # 任务间最小间隔(分钟)
|
||||
MAX_TASK_INTERVAL_MINUTES = int(os.getenv('MAX_TASK_INTERVAL_MINUTES', 5)) # 任务间最大间隔(分钟)
|
||||
MAX_CONCURRENT_WORKERS = int(os.getenv('MAX_CONCURRENT_WORKERS', 2)) # 最大并发任务数(默认2)
|
||||
WORK_START_HOUR = int(os.getenv('WORK_START_HOUR', 9)) # 工作开始时间
|
||||
WORK_END_HOUR = int(os.getenv('WORK_END_HOUR', 21)) # 工作结束时间
|
||||
REPLY_WAIT_TIMEOUT = int(os.getenv('REPLY_WAIT_TIMEOUT', 30)) # 回复等待超时(秒)
|
||||
@@ -60,6 +61,7 @@ class BaseConfig:
|
||||
# 数据存储路径
|
||||
DATA_DIR = os.getenv('DATA_DIR', './data')
|
||||
LOG_DIR = os.getenv('LOG_DIR', './logs')
|
||||
QUERY_UPLOAD_DIR = os.getenv('QUERY_UPLOAD_DIR', './query_upload') # Query挖掘上传目录
|
||||
|
||||
# 调试模式
|
||||
DEBUG = os.getenv('DEBUG', 'False').lower() == 'true'
|
||||
@@ -74,11 +76,24 @@ class BaseConfig:
|
||||
MYSQL_PASSWORD = os.getenv('MYSQL_PASSWORD', '')
|
||||
MYSQL_DATABASE = os.getenv('MYSQL_DATABASE', 'ai_article')
|
||||
|
||||
# 远程点击服务配置(分布式部署时使用)
|
||||
# 为空表示本地模式,调度器在Web服务内运行
|
||||
# 设置URL表示远程模式,Web服务转发请求到远程点击服务
|
||||
CLICK_SERVICE_URL = os.getenv('CLICK_SERVICE_URL', '') # 例如: http://192.168.1.100:8888
|
||||
|
||||
# 服务模式:web=提供前端界面, click=仅提供调度API
|
||||
SERVICE_MODE = os.getenv('SERVICE_MODE', 'web')
|
||||
|
||||
# 千问大模型API配置
|
||||
QWEN_API_KEY = os.getenv('QWEN_API_KEY', 'sk-6d22dd845a624d9c92a821d24a50e2e8')
|
||||
QWEN_API_URL = os.getenv('QWEN_API_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions')
|
||||
|
||||
@classmethod
|
||||
def ensure_dirs(cls):
|
||||
"""确保必要的目录存在"""
|
||||
os.makedirs(cls.DATA_DIR, exist_ok=True)
|
||||
os.makedirs(cls.LOG_DIR, exist_ok=True)
|
||||
os.makedirs(cls.QUERY_UPLOAD_DIR, exist_ok=True)
|
||||
|
||||
|
||||
class DevelopmentConfig(BaseConfig):
|
||||
@@ -88,7 +103,7 @@ class DevelopmentConfig(BaseConfig):
|
||||
|
||||
class ProductionConfig(BaseConfig):
|
||||
"""生产环境配置"""
|
||||
DEBUG = False
|
||||
DEBUG = True
|
||||
|
||||
|
||||
# 根据环境选择配置
|
||||
|
||||
261
config_manager.py
Normal file
@@ -0,0 +1,261 @@
|
||||
"""
|
||||
配置管理模块
|
||||
提供配置的读取、更新和持久化功能
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
from config import Config
|
||||
|
||||
|
||||
class ConfigManager:
|
||||
"""配置管理器"""
|
||||
|
||||
# 可修改的配置项定义
|
||||
CONFIGURABLE_ITEMS = {
|
||||
'click_strategy': {
|
||||
'MIN_CLICK_COUNT': {'type': 'int', 'min': 1, 'max': 100, 'label': '最小点击次数'},
|
||||
'MAX_CLICK_COUNT': {'type': 'int', 'min': 1, 'max': 100, 'label': '最大点击次数'},
|
||||
'CLICK_INTERVAL_MINUTES': {'type': 'int', 'min': 1, 'max': 1440, 'label': '点击间隔(分钟)'},
|
||||
},
|
||||
'work_time': {
|
||||
'WORK_START_HOUR': {'type': 'int', 'min': 0, 'max': 23, 'label': '工作开始时间'},
|
||||
'WORK_END_HOUR': {'type': 'int', 'min': 0, 'max': 23, 'label': '工作结束时间'},
|
||||
},
|
||||
'task_config': {
|
||||
'MIN_TASK_INTERVAL_MINUTES': {'type': 'int', 'min': 1, 'max': 60, 'label': '任务最小间隔(分钟)'},
|
||||
'MAX_TASK_INTERVAL_MINUTES': {'type': 'int', 'min': 1, 'max': 60, 'label': '任务最大间隔(分钟)'},
|
||||
'REPLY_WAIT_TIMEOUT': {'type': 'int', 'min': 5, 'max': 300, 'label': '回复等待超时(秒)'},
|
||||
},
|
||||
'crawler_config': {
|
||||
'CRAWLER_ENABLED': {'type': 'bool', 'label': '启用爬虫'},
|
||||
'CRAWLER_SCHEDULE_TIME': {'type': 'str', 'pattern': r'^\d{2}:\d{2}$', 'label': '爬虫执行时间'},
|
||||
'CRAWLER_BATCH_SIZE': {'type': 'int', 'min': 1, 'max': 100, 'label': '每次爬取数量'},
|
||||
},
|
||||
'server_config': {
|
||||
'SERVER_PORT': {'type': 'int', 'min': 1024, 'max': 65535, 'label': '服务端口'},
|
||||
'DEBUG': {'type': 'bool', 'label': '调试模式'},
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.env = os.getenv('ENV', 'development')
|
||||
self.env_file = f'.env.{self.env}' if self.env in ['development', 'production'] else '.env'
|
||||
self.env_path = Path(self.env_file)
|
||||
|
||||
def get_current_config(self) -> Dict[str, Any]:
|
||||
"""获取当前配置"""
|
||||
config = {
|
||||
'env': self.env,
|
||||
'click_strategy': {
|
||||
'min_click_count': getattr(Config, 'MIN_CLICK_COUNT', 1),
|
||||
'max_click_count': getattr(Config, 'MAX_CLICK_COUNT', 3),
|
||||
'click_interval_minutes': getattr(Config, 'CLICK_INTERVAL_MINUTES', 30),
|
||||
},
|
||||
'work_time': {
|
||||
'start_hour': getattr(Config, 'WORK_START_HOUR', 9),
|
||||
'end_hour': getattr(Config, 'WORK_END_HOUR', 21),
|
||||
},
|
||||
'task_config': {
|
||||
'min_interval_minutes': getattr(Config, 'MIN_TASK_INTERVAL_MINUTES', 3),
|
||||
'max_interval_minutes': getattr(Config, 'MAX_TASK_INTERVAL_MINUTES', 5),
|
||||
'reply_wait_timeout': getattr(Config, 'REPLY_WAIT_TIMEOUT', 30),
|
||||
},
|
||||
'crawler_config': {
|
||||
'enabled': getattr(Config, 'CRAWLER_ENABLED', True),
|
||||
'schedule_time': getattr(Config, 'CRAWLER_SCHEDULE_TIME', '02:00'),
|
||||
'batch_size': getattr(Config, 'CRAWLER_BATCH_SIZE', 10),
|
||||
},
|
||||
'server_config': {
|
||||
'host': getattr(Config, 'SERVER_HOST', '0.0.0.0'),
|
||||
'port': getattr(Config, 'SERVER_PORT', 5000),
|
||||
'debug': getattr(Config, 'DEBUG', False),
|
||||
},
|
||||
'adspower_config': {
|
||||
'api_url': getattr(Config, 'ADSPOWER_API_URL', 'http://local.adspower.net:50325'),
|
||||
'user_id': getattr(Config, 'ADSPOWER_USER_ID', ''),
|
||||
},
|
||||
'paths': {
|
||||
'data_dir': getattr(Config, 'DATA_DIR', './data'),
|
||||
'log_dir': getattr(Config, 'LOG_DIR', './logs'),
|
||||
}
|
||||
}
|
||||
return config
|
||||
|
||||
def validate_config(self, key: str, value: Any, config_def: Dict) -> tuple:
|
||||
"""
|
||||
验证配置值
|
||||
返回: (是否有效, 错误消息)
|
||||
"""
|
||||
config_type = config_def.get('type', 'str')
|
||||
|
||||
try:
|
||||
if config_type == 'int':
|
||||
value = int(value)
|
||||
min_val = config_def.get('min')
|
||||
max_val = config_def.get('max')
|
||||
if min_val is not None and value < min_val:
|
||||
return False, f'{key} 不能小于 {min_val}'
|
||||
if max_val is not None and value > max_val:
|
||||
return False, f'{key} 不能大于 {max_val}'
|
||||
|
||||
elif config_type == 'bool':
|
||||
if isinstance(value, str):
|
||||
value = value.lower() in ('true', '1', 'yes')
|
||||
else:
|
||||
value = bool(value)
|
||||
|
||||
elif config_type == 'str':
|
||||
value = str(value)
|
||||
pattern = config_def.get('pattern')
|
||||
if pattern and not re.match(pattern, value):
|
||||
return False, f'{key} 格式不正确'
|
||||
|
||||
return True, None
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
return False, f'{key} 值无效: {str(e)}'
|
||||
|
||||
def update_config(self, updates: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
更新配置
|
||||
返回: {'success': bool, 'message': str, 'updated': list, 'requires_restart': bool}
|
||||
"""
|
||||
result = {
|
||||
'success': True,
|
||||
'message': '',
|
||||
'updated': [],
|
||||
'requires_restart': False,
|
||||
'errors': []
|
||||
}
|
||||
|
||||
# 配置项映射
|
||||
config_mapping = {
|
||||
'min_click_count': 'MIN_CLICK_COUNT',
|
||||
'max_click_count': 'MAX_CLICK_COUNT',
|
||||
'click_interval_minutes': 'CLICK_INTERVAL_MINUTES',
|
||||
'start_hour': 'WORK_START_HOUR',
|
||||
'end_hour': 'WORK_END_HOUR',
|
||||
'min_interval_minutes': 'MIN_TASK_INTERVAL_MINUTES',
|
||||
'max_interval_minutes': 'MAX_TASK_INTERVAL_MINUTES',
|
||||
'reply_wait_timeout': 'REPLY_WAIT_TIMEOUT',
|
||||
'crawler_enabled': 'CRAWLER_ENABLED',
|
||||
'schedule_time': 'CRAWLER_SCHEDULE_TIME',
|
||||
'batch_size': 'CRAWLER_BATCH_SIZE',
|
||||
'server_port': 'SERVER_PORT',
|
||||
'debug': 'DEBUG',
|
||||
}
|
||||
|
||||
# 需要重启的配置项
|
||||
restart_required_keys = ['server_port', 'debug', 'crawler_enabled', 'schedule_time']
|
||||
|
||||
env_updates = {}
|
||||
|
||||
for key, value in updates.items():
|
||||
env_key = config_mapping.get(key)
|
||||
if not env_key:
|
||||
continue
|
||||
|
||||
# 查找配置定义
|
||||
config_def = None
|
||||
for category, items in self.CONFIGURABLE_ITEMS.items():
|
||||
if env_key in items:
|
||||
config_def = items[env_key]
|
||||
break
|
||||
|
||||
if not config_def:
|
||||
continue
|
||||
|
||||
# 验证
|
||||
is_valid, error = self.validate_config(key, value, config_def)
|
||||
if not is_valid:
|
||||
result['errors'].append(error)
|
||||
continue
|
||||
|
||||
env_updates[env_key] = value
|
||||
result['updated'].append(key)
|
||||
|
||||
if key in restart_required_keys:
|
||||
result['requires_restart'] = True
|
||||
|
||||
if result['errors']:
|
||||
result['success'] = False
|
||||
result['message'] = '部分配置验证失败: ' + '; '.join(result['errors'])
|
||||
return result
|
||||
|
||||
if not env_updates:
|
||||
result['message'] = '没有需要更新的配置'
|
||||
return result
|
||||
|
||||
# 更新环境变量和Config类
|
||||
for key, value in env_updates.items():
|
||||
os.environ[key] = str(value)
|
||||
if hasattr(Config, key):
|
||||
setattr(Config, key, value)
|
||||
|
||||
# 尝试更新.env文件
|
||||
try:
|
||||
self._update_env_file(env_updates)
|
||||
except Exception as e:
|
||||
result['message'] = f'配置已更新到内存,但写入文件失败: {str(e)}'
|
||||
return result
|
||||
|
||||
result['message'] = f'成功更新 {len(result["updated"])} 项配置'
|
||||
if result['requires_restart']:
|
||||
result['message'] += '(部分配置需要重启服务生效)'
|
||||
|
||||
return result
|
||||
|
||||
def _update_env_file(self, updates: Dict[str, Any]):
|
||||
"""更新.env文件"""
|
||||
if not self.env_path.exists():
|
||||
# 如果文件不存在,创建新文件
|
||||
lines = []
|
||||
else:
|
||||
with open(self.env_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# 更新已存在的配置
|
||||
updated_keys = set()
|
||||
new_lines = []
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith('#') and '=' in stripped:
|
||||
key = stripped.split('=')[0].strip()
|
||||
if key in updates:
|
||||
value = updates[key]
|
||||
if isinstance(value, bool):
|
||||
value = 'true' if value else 'false'
|
||||
new_lines.append(f'{key}={value}\n')
|
||||
updated_keys.add(key)
|
||||
else:
|
||||
new_lines.append(line)
|
||||
else:
|
||||
new_lines.append(line)
|
||||
|
||||
# 添加新的配置项
|
||||
for key, value in updates.items():
|
||||
if key not in updated_keys:
|
||||
if isinstance(value, bool):
|
||||
value = 'true' if value else 'false'
|
||||
new_lines.append(f'{key}={value}\n')
|
||||
|
||||
# 写入文件
|
||||
with open(self.env_path, 'w', encoding='utf-8') as f:
|
||||
f.writelines(new_lines)
|
||||
|
||||
def get_config_schema(self) -> Dict:
|
||||
"""获取配置项定义(用于前端表单生成)"""
|
||||
schema = {}
|
||||
for category, items in self.CONFIGURABLE_ITEMS.items():
|
||||
schema[category] = {}
|
||||
for key, config_def in items.items():
|
||||
schema[category][key] = {
|
||||
'type': config_def['type'],
|
||||
'label': config_def['label'],
|
||||
'min': config_def.get('min'),
|
||||
'max': config_def.get('max'),
|
||||
'pattern': config_def.get('pattern'),
|
||||
}
|
||||
return schema
|
||||
@@ -1,107 +0,0 @@
|
||||
# AI MIP Query Task 表创建说明
|
||||
|
||||
## 1. 创建表
|
||||
|
||||
在MySQL数据库中执行以下文件:
|
||||
|
||||
```bash
|
||||
mysql -u your_user -p your_database < db/ai_mip_query_task.sql
|
||||
```
|
||||
|
||||
或者在MySQL客户端中直接执行 `db/ai_mip_query_task.sql` 文件内容。
|
||||
|
||||
## 2. 表结构说明
|
||||
|
||||
### 字段列表
|
||||
|
||||
| 字段名 | 类型 | 说明 |
|
||||
|--------|------|------|
|
||||
| id | int | 主键ID |
|
||||
| query_word | varchar(512) | 查询词/关键词 |
|
||||
| query_type | enum | 查询类型:keyword/phrase/long_tail |
|
||||
| task_date | char(8) | 任务日期 YYYYMMDD |
|
||||
| threshold_max | int | 最大抓取数量阈值 |
|
||||
| current_count | int | 当前已抓取数量 |
|
||||
| status | enum | 任务状态:ready/doing/failed/finished/closed |
|
||||
| priority | tinyint | 优先级 1-10 |
|
||||
| category | varchar(64) | 分类标签 |
|
||||
| source_platform | varchar(64) | 来源平台 |
|
||||
| crawl_url_count | int | 已爬取URL数量 |
|
||||
| valid_url_count | int | 有效URL数量(带广告) |
|
||||
| error_message | text | 错误信息 |
|
||||
| started_at | timestamp | 开始执行时间 |
|
||||
| finished_at | timestamp | 完成时间 |
|
||||
| closed_at | timestamp | 达到阈值关闭时间 |
|
||||
| created_at | timestamp | 创建时间 |
|
||||
| updated_at | timestamp | 更新时间 |
|
||||
| created_by | varchar(64) | 创建人 |
|
||||
| remark | varchar(512) | 备注信息 |
|
||||
|
||||
### 索引
|
||||
|
||||
- `uniq_query_date`: 同一查询词每天只有一个任务
|
||||
- `idx_date_status`: 按日期和状态查询
|
||||
- `idx_status_priority`: 按状态和优先级查询
|
||||
- `idx_category`: 按分类查询
|
||||
- `idx_threshold`: 阈值监控
|
||||
- `idx_closed`: 关闭时间索引
|
||||
|
||||
## 3. 使用示例
|
||||
|
||||
### Python代码
|
||||
|
||||
```python
|
||||
from db_manager import QueryTaskManager
|
||||
|
||||
# 初始化管理器
|
||||
task_mgr = QueryTaskManager()
|
||||
|
||||
# 创建任务
|
||||
task_id = task_mgr.create_task(
|
||||
query_word="糖尿病治疗",
|
||||
query_type="keyword",
|
||||
threshold_max=50,
|
||||
priority=3,
|
||||
category="医疗"
|
||||
)
|
||||
|
||||
# 获取ready任务
|
||||
ready_tasks = task_mgr.get_ready_tasks(limit=10)
|
||||
|
||||
# 更新任务状态
|
||||
task_mgr.update_task_status(task_id, 'doing')
|
||||
|
||||
# 增加抓取计数
|
||||
task_mgr.increment_crawl_count(task_id, crawl_count=5, valid_count=3)
|
||||
|
||||
# 检查阈值
|
||||
task_mgr.check_threshold(task_id)
|
||||
|
||||
# 获取统计信息
|
||||
stats = task_mgr.get_task_statistics('20260119')
|
||||
```
|
||||
|
||||
## 4. 测试
|
||||
|
||||
运行测试脚本:
|
||||
|
||||
```bash
|
||||
python test_query_task.py
|
||||
```
|
||||
|
||||
## 5. 任务状态流转
|
||||
|
||||
```
|
||||
ready (准备中)
|
||||
↓
|
||||
doing (执行中)
|
||||
↓
|
||||
finished (完成) / failed (失败) / closed (达到阈值关闭)
|
||||
```
|
||||
|
||||
## 6. 注意事项
|
||||
|
||||
1. **唯一约束**:同一查询词在同一天只能有一个任务
|
||||
2. **阈值检查**:达到threshold_max时自动关闭任务
|
||||
3. **优先级**:数字越小优先级越高(1-10)
|
||||
4. **时间戳**:状态变更会自动更新对应的时间字段
|
||||
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
Navicat Premium Dump SQL
|
||||
|
||||
Source Server : mixue
|
||||
Source Server Type : MySQL
|
||||
Source Server Version : 90001 (9.0.1)
|
||||
Source Host : localhost:3306
|
||||
Source Schema : ai_article
|
||||
|
||||
Target Server Type : MySQL
|
||||
Target Server Version : 90001 (9.0.1)
|
||||
File Encoding : 65001
|
||||
|
||||
Date: 12/01/2026 20:31:43
|
||||
*/
|
||||
|
||||
SET NAMES utf8mb4;
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_click
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `ai_mip_click`;
|
||||
CREATE TABLE `ai_mip_click` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
|
||||
`site_id` bigint NOT NULL COMMENT '关联站点ID(外键指向 ai_mip_site.id)',
|
||||
`site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL(冗余字段,便于查询优化)',
|
||||
`click_time` datetime NOT NULL COMMENT '点击发生时间',
|
||||
`user_ip` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '用户IP地址',
|
||||
`user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '浏览器/设备信息',
|
||||
`referer_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源页面URL',
|
||||
`device_type` enum('mobile','pc','tablet') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '设备类型',
|
||||
`click_count` int NULL DEFAULT 1 COMMENT '本次点击事件的计数(一般为1,可用于批量插入)',
|
||||
`is_valid` tinyint(1) NULL DEFAULT 1 COMMENT '是否有效点击(防刷)',
|
||||
`task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID(可选)',
|
||||
`operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者(如自动系统)',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
INDEX `idx_site_id`(`site_id` ASC) USING BTREE,
|
||||
INDEX `idx_click_time`(`click_time` ASC) USING BTREE,
|
||||
INDEX `idx_site_url`(`site_url` ASC) USING BTREE,
|
||||
INDEX `idx_click_time_site`(`click_time` ASC, `site_id` ASC) USING BTREE,
|
||||
INDEX `idx_task_id`(`task_id` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告点击日志表' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Records of ai_mip_click
|
||||
-- ----------------------------
|
||||
INSERT INTO `ai_mip_click` VALUES (1, 1, 'https://example.com', '2026-01-12 20:25:09', NULL, NULL, NULL, NULL, 1, 1, 'TASK20260112001', 'RPA_SYSTEM', '2026-01-12 20:25:09');
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
Navicat Premium Dump SQL
|
||||
|
||||
Source Server : mixue
|
||||
Source Server Type : MySQL
|
||||
Source Server Version : 90001 (9.0.1)
|
||||
Source Host : localhost:3306
|
||||
Source Schema : ai_article
|
||||
|
||||
Target Server Type : MySQL
|
||||
Target Server Version : 90001 (9.0.1)
|
||||
File Encoding : 65001
|
||||
|
||||
Date: 12/01/2026 20:31:30
|
||||
*/
|
||||
|
||||
SET NAMES utf8mb4;
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_interaction
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `ai_mip_interaction`;
|
||||
CREATE TABLE `ai_mip_interaction` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
|
||||
`site_id` bigint NOT NULL COMMENT '关联站点ID',
|
||||
`click_id` bigint NULL DEFAULT NULL COMMENT '关联点击记录ID',
|
||||
`task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID',
|
||||
`interaction_type` enum('reply','comment','message','form_submit','follow','like','share') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '互动类型',
|
||||
`interaction_time` datetime NOT NULL COMMENT '互动发生时间',
|
||||
`interaction_status` enum('pending','success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'pending' COMMENT '互动状态',
|
||||
`reply_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '回复/评论的内容',
|
||||
`reply_template_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的回复模板ID',
|
||||
`ad_element_xpath` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的XPath定位',
|
||||
`ad_element_selector` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的CSS选择器',
|
||||
`ad_text_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '广告的文本内容',
|
||||
`execution_mode` enum('auto','manual','semi_auto') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行方式',
|
||||
`rpa_script` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的RPA脚本名称',
|
||||
`browser_type` enum('headless','headed','playwright','selenium') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器类型',
|
||||
`anti_detection_method` json NULL COMMENT '万金油技术方案',
|
||||
`proxy_ip` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的代理IP',
|
||||
`user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '使用的User-Agent',
|
||||
`custom_headers` json NULL COMMENT '自定义HTTP头',
|
||||
`fingerprint_id` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器指纹ID',
|
||||
`response_received` tinyint(1) NULL DEFAULT 0 COMMENT '是否收到回复',
|
||||
`response_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '对方回复的内容',
|
||||
`response_time` datetime NULL DEFAULT NULL COMMENT '收到回复的时间',
|
||||
`response_delay_seconds` int NULL DEFAULT NULL COMMENT '回复延迟(秒)',
|
||||
`is_successful` tinyint(1) NULL DEFAULT 0 COMMENT '是否成功互动',
|
||||
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '失败原因/错误信息',
|
||||
`retry_count` int NULL DEFAULT 0 COMMENT '重试次数',
|
||||
`conversion_flag` tinyint(1) NULL DEFAULT 0 COMMENT '是否产生转化',
|
||||
`site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签',
|
||||
`campaign_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告活动ID',
|
||||
`operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间',
|
||||
`remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
INDEX `idx_site_id`(`site_id` ASC) USING BTREE,
|
||||
INDEX `idx_click_id`(`click_id` ASC) USING BTREE,
|
||||
INDEX `idx_task_id`(`task_id` ASC) USING BTREE,
|
||||
INDEX `idx_interaction_time`(`interaction_time` ASC) USING BTREE,
|
||||
INDEX `idx_interaction_status`(`interaction_status` ASC) USING BTREE,
|
||||
INDEX `idx_composite`(`site_id` ASC, `interaction_time` ASC, `interaction_status` ASC) USING BTREE,
|
||||
INDEX `idx_response_received`(`response_received` ASC) USING BTREE,
|
||||
INDEX `idx_conversion`(`conversion_flag` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告互动回复日志表' ROW_FORMAT = DYNAMIC;
|
||||
|
||||
-- ----------------------------
|
||||
-- Records of ai_mip_interaction
|
||||
-- ----------------------------
|
||||
INSERT INTO `ai_mip_interaction` VALUES (1, 1, 1, 'TASK20260112001', 'reply', '2026-01-12 20:25:09', 'success', '您好,请问有什么可以帮助您的吗?', NULL, NULL, NULL, NULL, 'auto', NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 1, NULL, 0, 0, NULL, NULL, NULL, '2026-01-12 20:25:09', '2026-01-12 20:25:09', NULL);
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
MIP Query Task Table
|
||||
用于存储查询词任务,抓取需要自动点击的网址
|
||||
|
||||
Date: 2026-01-19
|
||||
*/
|
||||
|
||||
SET NAMES utf8mb4;
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_query_task
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `ai_mip_query_task`;
|
||||
CREATE TABLE `ai_mip_query_task` (
|
||||
`id` int NOT NULL AUTO_INCREMENT COMMENT '主键ID',
|
||||
`query_word` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '查询词/关键词',
|
||||
`query_type` enum('keyword','phrase','long_tail') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'keyword' COMMENT '查询类型:关键词/短语/长尾词',
|
||||
`task_date` char(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '任务日期,格式:YYYYMMDD',
|
||||
`threshold_max` int NOT NULL DEFAULT 100 COMMENT '最大抓取数量阈值',
|
||||
`current_count` int NOT NULL DEFAULT 0 COMMENT '当前已抓取数量',
|
||||
`status` enum('ready','doing','failed','finished','closed') CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT 'ready' COMMENT '任务状态:准备中/执行中/失败/完成/已关闭',
|
||||
`priority` tinyint NOT NULL DEFAULT 5 COMMENT '优先级(1-10,数字越小优先级越高)',
|
||||
`category` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '分类标签(如:医疗、教育、法律等)',
|
||||
`source_platform` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'baidu' COMMENT '来源平台:baidu/sogou/360等',
|
||||
`crawl_url_count` int NOT NULL DEFAULT 0 COMMENT '已爬取URL数量',
|
||||
`valid_url_count` int NOT NULL DEFAULT 0 COMMENT '有效URL数量(带广告)',
|
||||
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '错误信息',
|
||||
`started_at` timestamp NULL DEFAULT NULL COMMENT '开始执行时间',
|
||||
`finished_at` timestamp NULL DEFAULT NULL COMMENT '完成时间',
|
||||
`closed_at` timestamp NULL DEFAULT NULL COMMENT '达到阈值关闭时间',
|
||||
`created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
`created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'system' COMMENT '创建人',
|
||||
`remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE INDEX `uniq_query_date`(`query_word`(191) ASC, `task_date` ASC) USING BTREE COMMENT '同一查询词每天只有一个任务',
|
||||
INDEX `idx_date_status`(`task_date` ASC, `status` ASC) USING BTREE COMMENT '按日期和状态查询',
|
||||
INDEX `idx_status_priority`(`status` ASC, `priority` ASC) USING BTREE COMMENT '按状态和优先级查询',
|
||||
INDEX `idx_category`(`category` ASC) USING BTREE COMMENT '按分类查询',
|
||||
INDEX `idx_threshold`(`threshold_max` ASC, `current_count` ASC) USING BTREE COMMENT '阈值监控',
|
||||
INDEX `idx_closed`(`closed_at` ASC) USING BTREE COMMENT '关闭时间索引'
|
||||
) ENGINE = InnoDB
|
||||
AUTO_INCREMENT = 1
|
||||
CHARACTER SET = utf8mb4
|
||||
COLLATE = utf8mb4_general_ci
|
||||
COMMENT = 'MIP查询任务表 - 用于存储查询词抓取网址任务'
|
||||
ROW_FORMAT = DYNAMIC;
|
||||
|
||||
-- ----------------------------
|
||||
-- 示例数据
|
||||
-- ----------------------------
|
||||
INSERT INTO `ai_mip_query_task`
|
||||
(`query_word`, `query_type`, `task_date`, `threshold_max`, `priority`, `category`, `source_platform`, `remark`)
|
||||
VALUES
|
||||
('糖尿病治疗', 'keyword', '20260119', 50, 3, '医疗', 'baidu', '医疗类关键词测试'),
|
||||
('在线教育平台', 'phrase', '20260119', 30, 5, '教育', 'baidu', '教育类短语测试'),
|
||||
('法律咨询免费在线', 'long_tail', '20260119', 20, 7, '法律', 'baidu', '法律类长尾词测试');
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
@@ -1,55 +0,0 @@
|
||||
/*
|
||||
Navicat Premium Dump SQL
|
||||
|
||||
Source Server : mixue
|
||||
Source Server Type : MySQL
|
||||
Source Server Version : 90001 (9.0.1)
|
||||
Source Host : localhost:3306
|
||||
Source Schema : ai_article
|
||||
|
||||
Target Server Type : MySQL
|
||||
Target Server Version : 90001 (9.0.1)
|
||||
File Encoding : 65001
|
||||
|
||||
Date: 12/01/2026 20:31:23
|
||||
*/
|
||||
|
||||
SET NAMES utf8mb4;
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_site
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `ai_mip_site`;
|
||||
CREATE TABLE `ai_mip_site` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
|
||||
`site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL,唯一',
|
||||
`site_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网站名称(可选)',
|
||||
`status` enum('active','inactive','pending') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'active' COMMENT '状态:激活/停用/待审核',
|
||||
`frequency` int NULL DEFAULT 1 COMMENT '频次(如每小时发几次)',
|
||||
`time_start` time NULL DEFAULT '00:00:00' COMMENT '开始时间(HH:MM:SS)',
|
||||
`time_end` time NULL DEFAULT '23:59:59' COMMENT '结束时间(HH:MM:SS)',
|
||||
`interval_minutes` int NULL DEFAULT 60 COMMENT '执行间隔(分钟)',
|
||||
`ad_feature` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告特征描述(JSON格式,如:{\"color\":\"red\", \"position\":\"top\"})',
|
||||
`click_count` bigint NULL DEFAULT 0 COMMENT '累计点击次数',
|
||||
`reply_count` bigint NULL DEFAULT 0 COMMENT '累计回复次数',
|
||||
`site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签(如:教育、医疗等)',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
`created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '创建人',
|
||||
`updated_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '更新人',
|
||||
`remark` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE INDEX `site_url`(`site_url` ASC) USING BTREE,
|
||||
UNIQUE INDEX `idx_site_url`(`site_url`(191) ASC) USING BTREE,
|
||||
INDEX `idx_status`(`status` ASC) USING BTREE,
|
||||
INDEX `idx_created_at`(`created_at` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 3 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告网址管理表' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Records of ai_mip_site
|
||||
-- ----------------------------
|
||||
INSERT INTO `ai_mip_site` VALUES (1, 'https://example.com', '示例网站1', 'active', 1, '00:00:00', '23:59:59', 60, NULL, 0, 0, '教育', '2026-01-12 20:24:18', '2026-01-12 20:24:18', 'admin', NULL, NULL);
|
||||
INSERT INTO `ai_mip_site` VALUES (2, 'https://test.com', '测试网站2', 'active', 1, '00:00:00', '23:59:59', 60, NULL, 0, 0, '医疗', '2026-01-12 20:24:18', '2026-01-12 20:24:18', 'admin', NULL, NULL);
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
@@ -1,14 +0,0 @@
|
||||
/*
|
||||
为ai_mip_site表添加query_word字段
|
||||
用于记录该URL是从哪个查询词抓取的
|
||||
|
||||
Date: 2026-01-19
|
||||
*/
|
||||
|
||||
ALTER TABLE `ai_mip_site`
|
||||
ADD COLUMN `query_word` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源查询词(从哪个关键词抓取)'
|
||||
AFTER `site_dimension`;
|
||||
|
||||
-- 添加索引,方便按查询词查询
|
||||
ALTER TABLE `ai_mip_site`
|
||||
ADD INDEX `idx_query_word`(`query_word`(191) ASC) USING BTREE COMMENT '按查询词查询';
|
||||
@@ -1,100 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SQLite数据库初始化脚本
|
||||
自动创建开发环境(ai_mip_dev.db)和生产环境(ai_mip_prod.db)数据库
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# 数据库文件路径
|
||||
DB_DIR = Path(__file__).parent
|
||||
DEV_DB = DB_DIR / "ai_mip_dev.db"
|
||||
PROD_DB = DB_DIR / "ai_mip_prod.db"
|
||||
|
||||
# SQL脚本路径
|
||||
INIT_SQL = DB_DIR / "init_sqlite.sql"
|
||||
SEED_DEV_SQL = DB_DIR / "seed_dev.sql"
|
||||
|
||||
|
||||
def execute_sql_file(conn, sql_file):
|
||||
"""执行SQL文件"""
|
||||
with open(sql_file, 'r', encoding='utf-8') as f:
|
||||
sql_script = f.read()
|
||||
|
||||
# SQLite需要逐条执行语句
|
||||
conn.executescript(sql_script)
|
||||
conn.commit()
|
||||
print(f"✓ 已执行: {sql_file.name}")
|
||||
|
||||
|
||||
def init_database(db_path, with_seed=False):
|
||||
"""初始化数据库"""
|
||||
# 如果数据库已存在,询问是否覆盖
|
||||
if db_path.exists():
|
||||
response = input(f"\n数据库 {db_path.name} 已存在,是否覆盖? (y/n): ").strip().lower()
|
||||
if response != 'y':
|
||||
print(f"跳过 {db_path.name}")
|
||||
return
|
||||
os.remove(db_path)
|
||||
print(f"已删除旧数据库: {db_path.name}")
|
||||
|
||||
print(f"\n创建数据库: {db_path.name}")
|
||||
|
||||
# 连接数据库(自动创建)
|
||||
conn = sqlite3.connect(db_path)
|
||||
|
||||
try:
|
||||
# 执行初始化SQL
|
||||
execute_sql_file(conn, INIT_SQL)
|
||||
|
||||
# 如果需要,执行种子数据
|
||||
if with_seed:
|
||||
execute_sql_file(conn, SEED_DEV_SQL)
|
||||
|
||||
print(f"✓ 数据库 {db_path.name} 创建成功")
|
||||
|
||||
# 验证表是否创建成功
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = cursor.fetchall()
|
||||
print(f" 创建的表: {', '.join([t[0] for t in tables])}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ 创建数据库失败: {str(e)}")
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("SQLite数据库初始化工具")
|
||||
print("=" * 60)
|
||||
|
||||
# 检查SQL文件是否存在
|
||||
if not INIT_SQL.exists():
|
||||
print(f"错误: 找不到初始化脚本 {INIT_SQL}")
|
||||
return
|
||||
|
||||
# 初始化开发数据库(带测试数据)
|
||||
print("\n[1] 初始化开发环境数据库")
|
||||
init_database(DEV_DB, with_seed=True)
|
||||
|
||||
# 初始化生产数据库(不带测试数据)
|
||||
print("\n[2] 初始化生产环境数据库")
|
||||
init_database(PROD_DB, with_seed=False)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("数据库初始化完成")
|
||||
print("=" * 60)
|
||||
print(f"开发数据库: {DEV_DB}")
|
||||
print(f"生产数据库: {PROD_DB}")
|
||||
print("\n使用方法:")
|
||||
print(" 开发环境: 在 .env.development 中设置 DATABASE_PATH=db/ai_mip_dev.db")
|
||||
print(" 生产环境: 在 .env.production 中设置 DATABASE_PATH=db/ai_mip_prod.db")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,125 +0,0 @@
|
||||
-- SQLite数据库初始化脚本
|
||||
-- 适用于开发环境(ai_mip_dev.db)和生产环境(ai_mip_prod.db)
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_site
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS ai_mip_site;
|
||||
CREATE TABLE ai_mip_site (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
site_url TEXT NOT NULL UNIQUE,
|
||||
site_name TEXT,
|
||||
status TEXT CHECK(status IN ('active', 'inactive', 'pending')) DEFAULT 'active',
|
||||
frequency INTEGER DEFAULT 1,
|
||||
time_start TEXT DEFAULT '00:00:00',
|
||||
time_end TEXT DEFAULT '23:59:59',
|
||||
interval_minutes INTEGER DEFAULT 60,
|
||||
ad_feature TEXT,
|
||||
click_count INTEGER DEFAULT 0,
|
||||
reply_count INTEGER DEFAULT 0,
|
||||
site_dimension TEXT,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
created_by TEXT,
|
||||
updated_by TEXT,
|
||||
remark TEXT
|
||||
);
|
||||
|
||||
-- 创建索引
|
||||
CREATE UNIQUE INDEX idx_site_url ON ai_mip_site(site_url);
|
||||
CREATE INDEX idx_status ON ai_mip_site(status);
|
||||
CREATE INDEX idx_created_at ON ai_mip_site(created_at);
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_click
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS ai_mip_click;
|
||||
CREATE TABLE ai_mip_click (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
site_id INTEGER NOT NULL,
|
||||
site_url TEXT NOT NULL,
|
||||
click_time DATETIME NOT NULL,
|
||||
user_ip TEXT,
|
||||
user_agent TEXT,
|
||||
referer_url TEXT,
|
||||
device_type TEXT CHECK(device_type IN ('mobile', 'pc', 'tablet')),
|
||||
click_count INTEGER DEFAULT 1,
|
||||
is_valid INTEGER DEFAULT 1,
|
||||
task_id TEXT,
|
||||
operator TEXT,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (site_id) REFERENCES ai_mip_site(id)
|
||||
);
|
||||
|
||||
-- 创建索引
|
||||
CREATE INDEX idx_site_id ON ai_mip_click(site_id);
|
||||
CREATE INDEX idx_click_time ON ai_mip_click(click_time);
|
||||
CREATE INDEX idx_site_url_click ON ai_mip_click(site_url);
|
||||
CREATE INDEX idx_click_time_site ON ai_mip_click(click_time, site_id);
|
||||
CREATE INDEX idx_task_id ON ai_mip_click(task_id);
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_interaction
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS ai_mip_interaction;
|
||||
CREATE TABLE ai_mip_interaction (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
site_id INTEGER NOT NULL,
|
||||
click_id INTEGER,
|
||||
task_id TEXT,
|
||||
interaction_type TEXT CHECK(interaction_type IN ('reply', 'comment', 'message', 'form_submit', 'follow', 'like', 'share')) NOT NULL,
|
||||
interaction_time DATETIME NOT NULL,
|
||||
interaction_status TEXT CHECK(interaction_status IN ('pending', 'success', 'failed', 'skipped')) DEFAULT 'pending',
|
||||
reply_content TEXT,
|
||||
reply_template_id TEXT,
|
||||
ad_element_xpath TEXT,
|
||||
ad_element_selector TEXT,
|
||||
ad_text_content TEXT,
|
||||
execution_mode TEXT CHECK(execution_mode IN ('auto', 'manual', 'semi_auto')) DEFAULT 'auto',
|
||||
rpa_script TEXT,
|
||||
browser_type TEXT CHECK(browser_type IN ('headless', 'headed', 'playwright', 'selenium')),
|
||||
anti_detection_method TEXT,
|
||||
proxy_ip TEXT,
|
||||
user_agent TEXT,
|
||||
custom_headers TEXT,
|
||||
fingerprint_id TEXT,
|
||||
response_received INTEGER DEFAULT 0,
|
||||
response_content TEXT,
|
||||
response_time DATETIME,
|
||||
response_delay_seconds INTEGER,
|
||||
is_successful INTEGER DEFAULT 0,
|
||||
error_message TEXT,
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
conversion_flag INTEGER DEFAULT 0,
|
||||
site_dimension TEXT,
|
||||
campaign_id TEXT,
|
||||
operator TEXT,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
remark TEXT,
|
||||
FOREIGN KEY (site_id) REFERENCES ai_mip_site(id),
|
||||
FOREIGN KEY (click_id) REFERENCES ai_mip_click(id)
|
||||
);
|
||||
|
||||
-- 创建索引
|
||||
CREATE INDEX idx_site_id_interaction ON ai_mip_interaction(site_id);
|
||||
CREATE INDEX idx_click_id_interaction ON ai_mip_interaction(click_id);
|
||||
CREATE INDEX idx_task_id_interaction ON ai_mip_interaction(task_id);
|
||||
CREATE INDEX idx_interaction_time ON ai_mip_interaction(interaction_time);
|
||||
CREATE INDEX idx_interaction_status ON ai_mip_interaction(interaction_status);
|
||||
CREATE INDEX idx_composite ON ai_mip_interaction(site_id, interaction_time, interaction_status);
|
||||
CREATE INDEX idx_response_received ON ai_mip_interaction(response_received);
|
||||
CREATE INDEX idx_conversion ON ai_mip_interaction(conversion_flag);
|
||||
|
||||
-- 创建触发器:自动更新 updated_at
|
||||
CREATE TRIGGER update_ai_mip_site_timestamp
|
||||
AFTER UPDATE ON ai_mip_site
|
||||
BEGIN
|
||||
UPDATE ai_mip_site SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER update_ai_mip_interaction_timestamp
|
||||
AFTER UPDATE ON ai_mip_interaction
|
||||
BEGIN
|
||||
UPDATE ai_mip_interaction SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
||||
END;
|
||||
145
db/mip_table.txt
@@ -1,145 +0,0 @@
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_click
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `ai_mip_click`;
|
||||
CREATE TABLE `ai_mip_click` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
|
||||
`site_id` bigint NOT NULL COMMENT '关联站点ID(外键指向 ai_mip_site.id)',
|
||||
`site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL(冗余字段,便于查询优化)',
|
||||
`click_time` datetime NOT NULL COMMENT '点击发生时间',
|
||||
`user_ip` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '用户IP地址',
|
||||
`user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '浏览器/设备信息',
|
||||
`referer_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '来源页面URL',
|
||||
`device_type` enum('mobile','pc','tablet') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '设备类型',
|
||||
`click_count` int NULL DEFAULT 1 COMMENT '本次点击事件的计数(一般为1,可用于批量插入)',
|
||||
`is_valid` tinyint(1) NULL DEFAULT 1 COMMENT '是否有效点击(防刷)',
|
||||
`task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID(可选)',
|
||||
`operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者(如自动系统)',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
INDEX `idx_site_id`(`site_id` ASC) USING BTREE,
|
||||
INDEX `idx_click_time`(`click_time` ASC) USING BTREE,
|
||||
INDEX `idx_site_url`(`site_url` ASC) USING BTREE,
|
||||
INDEX `idx_click_time_site`(`click_time` ASC, `site_id` ASC) USING BTREE,
|
||||
INDEX `idx_task_id`(`task_id` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告点击日志表' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_interaction
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `ai_mip_interaction`;
|
||||
CREATE TABLE `ai_mip_interaction` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
|
||||
`site_id` bigint NOT NULL COMMENT '关联站点ID',
|
||||
`click_id` bigint NULL DEFAULT NULL COMMENT '关联点击记录ID',
|
||||
`task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'RPA任务ID',
|
||||
`interaction_type` enum('reply','comment','message','form_submit','follow','like','share') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '互动类型',
|
||||
`interaction_time` datetime NOT NULL COMMENT '互动发生时间',
|
||||
`interaction_status` enum('pending','success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'pending' COMMENT '互动状态',
|
||||
`reply_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '回复/评论的内容',
|
||||
`reply_template_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的回复模板ID',
|
||||
`ad_element_xpath` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的XPath定位',
|
||||
`ad_element_selector` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告元素的CSS选择器',
|
||||
`ad_text_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '广告的文本内容',
|
||||
`execution_mode` enum('auto','manual','semi_auto') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行方式',
|
||||
`rpa_script` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的RPA脚本名称',
|
||||
`browser_type` enum('headless','headed','playwright','selenium') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器类型',
|
||||
`anti_detection_method` json NULL COMMENT '万金油技术方案',
|
||||
`proxy_ip` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '使用的代理IP',
|
||||
`user_agent` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '使用的User-Agent',
|
||||
`custom_headers` json NULL COMMENT '自定义HTTP头',
|
||||
`fingerprint_id` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '浏览器指纹ID',
|
||||
`response_received` tinyint(1) NULL DEFAULT 0 COMMENT '是否收到回复',
|
||||
`response_content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '对方回复的内容',
|
||||
`response_time` datetime NULL DEFAULT NULL COMMENT '收到回复的时间',
|
||||
`response_delay_seconds` int NULL DEFAULT NULL COMMENT '回复延迟(秒)',
|
||||
`is_successful` tinyint(1) NULL DEFAULT 0 COMMENT '是否成功互动',
|
||||
`error_message` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '失败原因/错误信息',
|
||||
`retry_count` int NULL DEFAULT 0 COMMENT '重试次数',
|
||||
`conversion_flag` tinyint(1) NULL DEFAULT 0 COMMENT '是否产生转化',
|
||||
`site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签',
|
||||
`campaign_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告活动ID',
|
||||
`operator` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '操作者',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间',
|
||||
`remark` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
INDEX `idx_site_id`(`site_id` ASC) USING BTREE,
|
||||
INDEX `idx_click_id`(`click_id` ASC) USING BTREE,
|
||||
INDEX `idx_task_id`(`task_id` ASC) USING BTREE,
|
||||
INDEX `idx_interaction_time`(`interaction_time` ASC) USING BTREE,
|
||||
INDEX `idx_interaction_status`(`interaction_status` ASC) USING BTREE,
|
||||
INDEX `idx_composite`(`site_id` ASC, `interaction_time` ASC, `interaction_status` ASC) USING BTREE,
|
||||
INDEX `idx_response_received`(`response_received` ASC) USING BTREE,
|
||||
INDEX `idx_conversion`(`conversion_flag` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告互动回复日志表' ROW_FORMAT = DYNAMIC;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_site
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `ai_mip_site`;
|
||||
CREATE TABLE `ai_mip_site` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
|
||||
`site_url` varchar(512) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '网站URL,唯一',
|
||||
`site_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网站名称(可选)',
|
||||
`status` enum('active','inactive','pending') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'active' COMMENT '状态:激活/停用/待审核',
|
||||
`frequency` int NULL DEFAULT 1 COMMENT '频次(如每小时发几次)',
|
||||
`time_start` time NULL DEFAULT '00:00:00' COMMENT '开始时间(HH:MM:SS)',
|
||||
`time_end` time NULL DEFAULT '23:59:59' COMMENT '结束时间(HH:MM:SS)',
|
||||
`interval_minutes` int NULL DEFAULT 60 COMMENT '执行间隔(分钟)',
|
||||
`ad_feature` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '广告特征描述(JSON格式,如:{\"color\":\"red\", \"position\":\"top\"})',
|
||||
`click_count` bigint NULL DEFAULT 0 COMMENT '累计点击次数',
|
||||
`reply_count` bigint NULL DEFAULT 0 COMMENT '累计回复次数',
|
||||
`site_dimension` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网址维度标签(如:教育、医疗等)',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
`created_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '创建人',
|
||||
`updated_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '更新人',
|
||||
`remark` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '备注信息',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE INDEX `site_url`(`site_url` ASC) USING BTREE,
|
||||
UNIQUE INDEX `idx_site_url`(`site_url`(191) ASC) USING BTREE,
|
||||
INDEX `idx_status`(`status` ASC) USING BTREE,
|
||||
INDEX `idx_created_at`(`created_at` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 3 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'MIP页广告网址管理表' ROW_FORMAT = Dynamic;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for ai_mip_task_log
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `ai_mip_task_log`;
|
||||
CREATE TABLE `ai_mip_task_log` (
|
||||
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键ID',
|
||||
`task_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT 'RPA任务唯一ID',
|
||||
`site_id` bigint NOT NULL COMMENT '关联站点ID',
|
||||
`step_1_visit_time` datetime NULL DEFAULT NULL COMMENT '步骤1:访问网址时间',
|
||||
`step_1_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤1状态',
|
||||
`step_2_antibot_time` datetime NULL DEFAULT NULL COMMENT '步骤2:万金油技术方案执行时间',
|
||||
`step_2_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤2状态',
|
||||
`step_3_ad_detection_time` datetime NULL DEFAULT NULL COMMENT '步骤3:广告检测时间',
|
||||
`step_3_has_ad` tinyint(1) NULL DEFAULT NULL COMMENT '是否检测到广告',
|
||||
`step_3_ad_count` int NULL DEFAULT 0 COMMENT '检测到的广告数量',
|
||||
`step_4_click_time` datetime NULL DEFAULT NULL COMMENT '步骤4:点击广告时间',
|
||||
`step_4_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤4状态',
|
||||
`step_5_reply_time` datetime NULL DEFAULT NULL COMMENT '步骤5:获取回复时间',
|
||||
`step_5_status` enum('success','failed','skipped') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '步骤5状态',
|
||||
`task_start_time` datetime NOT NULL COMMENT '任务开始时间',
|
||||
`task_end_time` datetime NULL DEFAULT NULL COMMENT '任务结束时间',
|
||||
`task_duration_seconds` int NULL DEFAULT NULL COMMENT '任务执行时长(秒)',
|
||||
`task_status` enum('running','completed','failed','timeout') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'running' COMMENT '任务整体状态',
|
||||
`total_clicks` int NULL DEFAULT 0 COMMENT '本次任务总点击次数',
|
||||
`total_interactions` int NULL DEFAULT 0 COMMENT '本次任务总互动次数',
|
||||
`successful_interactions` int NULL DEFAULT 0 COMMENT '成功互动次数',
|
||||
`failed_interactions` int NULL DEFAULT 0 COMMENT '失败互动次数',
|
||||
`execution_mode` enum('auto','manual','scheduled') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT 'auto' COMMENT '执行模式',
|
||||
`triggered_by` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '触发者(定时任务/手动触发/队列)',
|
||||
`error_log` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '错误日志',
|
||||
`created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
|
||||
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间',
|
||||
PRIMARY KEY (`id`) USING BTREE,
|
||||
UNIQUE INDEX `task_id`(`task_id` ASC) USING BTREE,
|
||||
UNIQUE INDEX `uk_task_id`(`task_id` ASC) USING BTREE,
|
||||
INDEX `idx_site_id`(`site_id` ASC) USING BTREE,
|
||||
INDEX `idx_task_status`(`task_status` ASC) USING BTREE,
|
||||
INDEX `idx_start_time`(`task_start_time` ASC) USING BTREE
|
||||
) ENGINE = InnoDB AUTO_INCREMENT = 2 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci COMMENT = 'RPA任务执行日志表' ROW_FORMAT = DYNAMIC;
|
||||
@@ -1,20 +0,0 @@
|
||||
-- 开发环境测试数据
|
||||
-- 用于 ai_mip_dev.db
|
||||
|
||||
-- 插入测试站点
|
||||
INSERT INTO ai_mip_site (site_url, site_name, status, frequency, time_start, time_end, interval_minutes, click_count, reply_count, site_dimension, created_by) VALUES
|
||||
('https://health.baidu.com/m/detail/ar_2366617956693492811', '百度健康测试页面', 'active', 3, '09:00:00', '21:00:00', 45, 0, 0, '医疗健康', 'admin'),
|
||||
('https://example.com/test', '测试网站1', 'active', 2, '10:00:00', '20:00:00', 60, 0, 0, '教育', 'admin'),
|
||||
('https://demo.com/page', '演示网站', 'inactive', 1, '00:00:00', '23:59:59', 120, 0, 0, '商业', 'admin');
|
||||
|
||||
-- 插入测试点击记录
|
||||
INSERT INTO ai_mip_click (site_id, site_url, click_time, user_ip, device_type, task_id, operator) VALUES
|
||||
(1, 'https://health.baidu.com/m/detail/ar_2366617956693492811', datetime('now'), '192.168.1.100', 'pc', 'TASK_DEV_001', 'RPA_SYSTEM'),
|
||||
(1, 'https://health.baidu.com/m/detail/ar_2366617956693492811', datetime('now', '-1 hour'), '192.168.1.101', 'mobile', 'TASK_DEV_002', 'RPA_SYSTEM'),
|
||||
(2, 'https://example.com/test', datetime('now', '-2 hours'), '192.168.1.102', 'pc', 'TASK_DEV_003', 'RPA_SYSTEM');
|
||||
|
||||
-- 插入测试互动记录
|
||||
INSERT INTO ai_mip_interaction (site_id, click_id, task_id, interaction_type, interaction_time, interaction_status, reply_content, execution_mode, browser_type, is_successful, operator) VALUES
|
||||
(1, 1, 'TASK_DEV_001', 'reply', datetime('now'), 'success', '测试回复内容', 'auto', 'playwright', 1, 'RPA_SYSTEM'),
|
||||
(1, 2, 'TASK_DEV_002', 'comment', datetime('now', '-1 hour'), 'success', '测试评论内容', 'auto', 'playwright', 1, 'RPA_SYSTEM'),
|
||||
(2, 3, 'TASK_DEV_003', 'reply', datetime('now', '-2 hours'), 'pending', NULL, 'auto', 'playwright', 0, 'RPA_SYSTEM');
|
||||
824
db_manager.py
@@ -48,10 +48,32 @@ class DatabaseManager:
|
||||
return conn
|
||||
|
||||
def _dict_from_row(self, row) -> Dict:
|
||||
"""将数据库行转换为字典"""
|
||||
"""将数据库行转换为字典,处理特殊类型"""
|
||||
if row is None:
|
||||
return None
|
||||
return dict(row) if isinstance(row, dict) else row
|
||||
|
||||
result = dict(row) if isinstance(row, dict) else row
|
||||
|
||||
# 处理特殊类型,确保JSON可序列化
|
||||
if isinstance(result, dict):
|
||||
from datetime import datetime, date, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
for key, value in result.items():
|
||||
if isinstance(value, datetime):
|
||||
result[key] = value.strftime('%Y-%m-%d %H:%M:%S')
|
||||
elif isinstance(value, date):
|
||||
result[key] = value.strftime('%Y-%m-%d')
|
||||
elif isinstance(value, timedelta):
|
||||
# 将timedelta转换为字符串格式 HH:MM:SS
|
||||
total_seconds = int(value.total_seconds())
|
||||
hours, remainder = divmod(total_seconds, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
result[key] = f'{hours:02d}:{minutes:02d}:{seconds:02d}'
|
||||
elif isinstance(value, Decimal):
|
||||
result[key] = float(value)
|
||||
|
||||
return result
|
||||
|
||||
def _get_placeholder(self) -> str:
|
||||
"""获取SQL占位符,MySQL使用 %s"""
|
||||
@@ -816,3 +838,801 @@ class QueryTaskManager(DatabaseManager):
|
||||
except Exception as e:
|
||||
logger.error(f"获取任务统计失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
|
||||
class EnhancedSiteManager(SiteManager):
|
||||
"""增强的站点管理器,支持分页、排序、筛选"""
|
||||
|
||||
def get_sites_paginated(
|
||||
self,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
status: str = None,
|
||||
keyword: str = None,
|
||||
sort_by: str = 'created_at',
|
||||
sort_order: str = 'desc'
|
||||
) -> tuple:
|
||||
"""
|
||||
分页获取站点列表
|
||||
|
||||
Returns:
|
||||
(站点列表, 总数)
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
# 构建WHERE条件
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if status:
|
||||
conditions.append(f"status = {ph}")
|
||||
params.append(status)
|
||||
|
||||
if keyword:
|
||||
conditions.append(f"(site_url LIKE {ph} OR site_name LIKE {ph})")
|
||||
params.extend([f'%{keyword}%', f'%{keyword}%'])
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
# 允许的排序字段
|
||||
allowed_sort_fields = ['created_at', 'click_count', 'reply_count', 'site_url', 'status']
|
||||
if sort_by not in allowed_sort_fields:
|
||||
sort_by = 'created_at'
|
||||
|
||||
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
|
||||
|
||||
# 查询总数
|
||||
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_site WHERE {where_clause}"
|
||||
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
|
||||
total = cursor.fetchone()['total']
|
||||
|
||||
# 查询数据
|
||||
offset = (page - 1) * page_size
|
||||
data_sql = f"""
|
||||
SELECT * FROM ai_mip_site
|
||||
WHERE {where_clause}
|
||||
ORDER BY {sort_by} {sort_order}
|
||||
LIMIT {ph} OFFSET {ph}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
|
||||
cursor = self._execute_query(conn, data_sql, tuple(params))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows], total
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"分页查询站点失败: {str(e)}")
|
||||
return [], 0
|
||||
|
||||
def delete_sites_batch(self, site_ids: List[int]) -> int:
|
||||
"""
|
||||
批量删除站点
|
||||
|
||||
Returns:
|
||||
成功删除的数量
|
||||
"""
|
||||
if not site_ids:
|
||||
return 0
|
||||
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
placeholders = ','.join(['%s'] * len(site_ids))
|
||||
sql = f"DELETE FROM ai_mip_site WHERE id IN ({placeholders})"
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(sql, tuple(site_ids))
|
||||
deleted = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
logger.info(f"批量删除站点: {deleted}/{len(site_ids)}")
|
||||
return deleted
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量删除站点失败: {str(e)}")
|
||||
return 0
|
||||
|
||||
def update_sites_status_batch(self, site_ids: List[int], status: str) -> int:
|
||||
"""
|
||||
批量更新站点状态
|
||||
|
||||
Returns:
|
||||
成功更新的数量
|
||||
"""
|
||||
if not site_ids:
|
||||
return 0
|
||||
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
placeholders = ','.join(['%s'] * len(site_ids))
|
||||
sql = f"UPDATE ai_mip_site SET status = %s WHERE id IN ({placeholders})"
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(sql, (status, *site_ids))
|
||||
updated = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
logger.info(f"批量更新站点状态为{status}: {updated}/{len(site_ids)}")
|
||||
return updated
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量更新站点状态失败: {str(e)}")
|
||||
return 0
|
||||
|
||||
def export_sites(self, status: str = None, keyword: str = None) -> List[Dict]:
|
||||
"""导出站点数据"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if status:
|
||||
conditions.append(f"status = {ph}")
|
||||
params.append(status)
|
||||
|
||||
if keyword:
|
||||
conditions.append(f"(site_url LIKE {ph} OR site_name LIKE {ph})")
|
||||
params.extend([f'%{keyword}%', f'%{keyword}%'])
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
sql = f"""
|
||||
SELECT id, site_url, site_name, status, click_count, reply_count,
|
||||
frequency, time_start, time_end, site_dimension, query_word,
|
||||
created_at
|
||||
FROM ai_mip_site
|
||||
WHERE {where_clause}
|
||||
ORDER BY created_at DESC
|
||||
"""
|
||||
|
||||
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"导出站点数据失败: {str(e)}")
|
||||
return []
|
||||
|
||||
|
||||
class EnhancedClickManager(ClickManager):
|
||||
"""增强的点击记录管理器"""
|
||||
|
||||
def get_clicks_paginated(
|
||||
self,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
site_id: int = None,
|
||||
start_date: str = None,
|
||||
end_date: str = None,
|
||||
sort_by: str = 'click_time',
|
||||
sort_order: str = 'desc'
|
||||
) -> tuple:
|
||||
"""
|
||||
分页获取点击记录
|
||||
|
||||
Returns:
|
||||
(点击记录列表, 总数)
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if site_id:
|
||||
conditions.append(f"c.site_id = {ph}")
|
||||
params.append(site_id)
|
||||
|
||||
if start_date:
|
||||
conditions.append(f"c.click_time >= {ph}")
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
conditions.append(f"c.click_time <= {ph}")
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
allowed_sort_fields = ['click_time', 'site_id', 'device_type']
|
||||
if sort_by not in allowed_sort_fields:
|
||||
sort_by = 'click_time'
|
||||
|
||||
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
|
||||
|
||||
# 查询总数
|
||||
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_click c WHERE {where_clause}"
|
||||
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
|
||||
total = cursor.fetchone()['total']
|
||||
|
||||
# 查询数据
|
||||
offset = (page - 1) * page_size
|
||||
data_sql = f"""
|
||||
SELECT c.*, s.site_name
|
||||
FROM ai_mip_click c
|
||||
LEFT JOIN ai_mip_site s ON c.site_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY c.{sort_by} {sort_order}
|
||||
LIMIT {ph} OFFSET {ph}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
|
||||
cursor = self._execute_query(conn, data_sql, tuple(params))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows], total
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"分页查询点击记录失败: {str(e)}")
|
||||
return [], 0
|
||||
|
||||
def export_clicks(
|
||||
self,
|
||||
site_id: int = None,
|
||||
start_date: str = None,
|
||||
end_date: str = None
|
||||
) -> List[Dict]:
|
||||
"""导出点击记录"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if site_id:
|
||||
conditions.append(f"c.site_id = {ph}")
|
||||
params.append(site_id)
|
||||
|
||||
if start_date:
|
||||
conditions.append(f"c.click_time >= {ph}")
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
conditions.append(f"c.click_time <= {ph}")
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
sql = f"""
|
||||
SELECT c.id, c.site_id, s.site_name, c.site_url, c.click_time,
|
||||
c.user_ip, c.device_type, c.task_id
|
||||
FROM ai_mip_click c
|
||||
LEFT JOIN ai_mip_site s ON c.site_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY c.click_time DESC
|
||||
"""
|
||||
|
||||
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"导出点击记录失败: {str(e)}")
|
||||
return []
|
||||
|
||||
|
||||
class EnhancedInteractionManager(InteractionManager):
|
||||
"""增强的互动记录管理器"""
|
||||
|
||||
def get_interactions_paginated(
|
||||
self,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
site_id: int = None,
|
||||
start_date: str = None,
|
||||
end_date: str = None,
|
||||
status: str = None,
|
||||
sort_by: str = 'interaction_time',
|
||||
sort_order: str = 'desc'
|
||||
) -> tuple:
|
||||
"""
|
||||
分页获取互动记录
|
||||
|
||||
Returns:
|
||||
(互动记录列表, 总数)
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if site_id:
|
||||
conditions.append(f"i.site_id = {ph}")
|
||||
params.append(site_id)
|
||||
|
||||
if start_date:
|
||||
conditions.append(f"i.interaction_time >= {ph}")
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
conditions.append(f"i.interaction_time <= {ph}")
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
if status:
|
||||
conditions.append(f"i.interaction_status = {ph}")
|
||||
params.append(status)
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
allowed_sort_fields = ['interaction_time', 'site_id', 'interaction_status']
|
||||
if sort_by not in allowed_sort_fields:
|
||||
sort_by = 'interaction_time'
|
||||
|
||||
sort_order = 'DESC' if sort_order.upper() == 'DESC' else 'ASC'
|
||||
|
||||
# 查询总数
|
||||
count_sql = f"SELECT COUNT(*) as total FROM ai_mip_interaction i WHERE {where_clause}"
|
||||
cursor = self._execute_query(conn, count_sql, tuple(params) if params else None)
|
||||
total = cursor.fetchone()['total']
|
||||
|
||||
# 查询数据
|
||||
offset = (page - 1) * page_size
|
||||
data_sql = f"""
|
||||
SELECT i.*, s.site_name, s.site_url as site_url_ref
|
||||
FROM ai_mip_interaction i
|
||||
LEFT JOIN ai_mip_site s ON i.site_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY i.{sort_by} {sort_order}
|
||||
LIMIT {ph} OFFSET {ph}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
|
||||
cursor = self._execute_query(conn, data_sql, tuple(params))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows], total
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"分页查询互动记录失败: {str(e)}")
|
||||
return [], 0
|
||||
|
||||
def export_interactions(
|
||||
self,
|
||||
site_id: int = None,
|
||||
start_date: str = None,
|
||||
end_date: str = None
|
||||
) -> List[Dict]:
|
||||
"""导出互动记录"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
if site_id:
|
||||
conditions.append(f"i.site_id = {ph}")
|
||||
params.append(site_id)
|
||||
|
||||
if start_date:
|
||||
conditions.append(f"i.interaction_time >= {ph}")
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
conditions.append(f"i.interaction_time <= {ph}")
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
where_clause = ' AND '.join(conditions) if conditions else '1=1'
|
||||
|
||||
sql = f"""
|
||||
SELECT i.id, i.site_id, s.site_name, s.site_url, i.interaction_time,
|
||||
i.interaction_type, i.interaction_status, i.reply_content,
|
||||
i.response_received, i.response_content, i.proxy_ip
|
||||
FROM ai_mip_interaction i
|
||||
LEFT JOIN ai_mip_site s ON i.site_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY i.interaction_time DESC
|
||||
"""
|
||||
|
||||
cursor = self._execute_query(conn, sql, tuple(params) if params else None)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"导出互动记录失败: {str(e)}")
|
||||
return []
|
||||
|
||||
|
||||
class EnhancedStatisticsManager(StatisticsManager):
|
||||
"""增强的统计管理器,支持图表数据"""
|
||||
|
||||
def get_click_trend(self, days: int = 7) -> Dict:
|
||||
"""
|
||||
获取点击趋势数据
|
||||
|
||||
Args:
|
||||
days: 天数
|
||||
|
||||
Returns:
|
||||
{'dates': [...], 'clicks': [...], 'successes': [...]}
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
# 点击趋势
|
||||
click_sql = f"""
|
||||
SELECT DATE(click_time) as date, COUNT(*) as count
|
||||
FROM ai_mip_click
|
||||
WHERE click_time >= DATE_SUB(CURDATE(), INTERVAL {ph} DAY)
|
||||
GROUP BY DATE(click_time)
|
||||
ORDER BY date
|
||||
"""
|
||||
cursor = self._execute_query(conn, click_sql, (days,))
|
||||
click_rows = cursor.fetchall()
|
||||
|
||||
# 成功次数趋势(is_successful=1)
|
||||
success_sql = f"""
|
||||
SELECT DATE(interaction_time) as date, COUNT(*) as count
|
||||
FROM ai_mip_interaction
|
||||
WHERE interaction_time >= DATE_SUB(CURDATE(), INTERVAL {ph} DAY)
|
||||
AND is_successful = 1
|
||||
GROUP BY DATE(interaction_time)
|
||||
ORDER BY date
|
||||
"""
|
||||
cursor = self._execute_query(conn, success_sql, (days,))
|
||||
success_rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
# 构建结果
|
||||
from datetime import timedelta
|
||||
|
||||
dates = []
|
||||
clicks = []
|
||||
successes = []
|
||||
|
||||
click_map = {str(row['date']): row['count'] for row in click_rows}
|
||||
success_map = {str(row['date']): row['count'] for row in success_rows}
|
||||
|
||||
for i in range(days - 1, -1, -1):
|
||||
date = (datetime.now() - timedelta(days=i)).strftime('%Y-%m-%d')
|
||||
dates.append(date)
|
||||
clicks.append(click_map.get(date, 0))
|
||||
successes.append(success_map.get(date, 0))
|
||||
|
||||
return {
|
||||
'dates': dates,
|
||||
'clicks': clicks,
|
||||
'successes': successes
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取点击趋势失败: {str(e)}")
|
||||
return {'dates': [], 'clicks': [], 'successes': []}
|
||||
|
||||
def get_hourly_distribution(self) -> Dict:
|
||||
"""
|
||||
获取按小时分布的点击数据
|
||||
|
||||
Returns:
|
||||
{'hours': [0-23], 'clicks': [...]}
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
|
||||
sql = """
|
||||
SELECT HOUR(click_time) as hour, COUNT(*) as count
|
||||
FROM ai_mip_click
|
||||
WHERE click_time >= DATE_SUB(NOW(), INTERVAL 7 DAY)
|
||||
GROUP BY HOUR(click_time)
|
||||
ORDER BY hour
|
||||
"""
|
||||
cursor = self._execute_query(conn, sql)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
hour_map = {row['hour']: row['count'] for row in rows}
|
||||
|
||||
hours = list(range(24))
|
||||
clicks = [hour_map.get(h, 0) for h in hours]
|
||||
|
||||
return {
|
||||
'hours': hours,
|
||||
'clicks': clicks
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取时段分布失败: {str(e)}")
|
||||
return {'hours': list(range(24)), 'clicks': [0] * 24}
|
||||
|
||||
def get_top_sites(self, limit: int = 10) -> List[Dict]:
|
||||
"""
|
||||
获取Top活跃站点
|
||||
|
||||
Args:
|
||||
limit: 数量
|
||||
|
||||
Returns:
|
||||
站点列表 [{'site_name', 'click_count', 'reply_count'}, ...]
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
sql = f"""
|
||||
SELECT id, site_name, site_url, click_count, reply_count
|
||||
FROM ai_mip_site
|
||||
WHERE status = 'active'
|
||||
ORDER BY click_count DESC
|
||||
LIMIT {ph}
|
||||
"""
|
||||
cursor = self._execute_query(conn, sql, (limit,))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取Top站点失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def get_reply_rate_distribution(self) -> Dict:
|
||||
"""
|
||||
获取回复率分布数据(用于饼图)
|
||||
|
||||
Returns:
|
||||
{'labels': [...], 'values': [...]}
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
|
||||
# 获取总点击和回复
|
||||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_click")
|
||||
total_clicks = cursor.fetchone()['total']
|
||||
|
||||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM ai_mip_interaction WHERE response_received = 1")
|
||||
total_replies = cursor.fetchone()['total']
|
||||
|
||||
conn.close()
|
||||
|
||||
no_reply = total_clicks - total_replies if total_clicks > total_replies else 0
|
||||
|
||||
return {
|
||||
'labels': ['有回复', '无回复'],
|
||||
'values': [total_replies, no_reply]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取回复率分布失败: {str(e)}")
|
||||
return {'labels': ['有回复', '无回复'], 'values': [0, 0]}
|
||||
|
||||
|
||||
class QueryImportLogManager(DatabaseManager):
|
||||
"""Query导入日志管理器"""
|
||||
|
||||
def ensure_table(self):
|
||||
"""确保 query_import_log 表存在"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS `query_import_log` (
|
||||
`id` INT AUTO_INCREMENT PRIMARY KEY,
|
||||
`filename` VARCHAR(255) NOT NULL COMMENT '上传的文件名',
|
||||
`filepath` VARCHAR(500) NOT NULL COMMENT '文件完整路径',
|
||||
`upload_time` DATETIME NOT NULL COMMENT '上传时间',
|
||||
`import_time` DATETIME NULL COMMENT '实际导入时间',
|
||||
`status` VARCHAR(20) DEFAULT 'pending' COMMENT '导入状态',
|
||||
`total_count` INT DEFAULT 0 COMMENT '总行数',
|
||||
`success_count` INT DEFAULT 0 COMMENT '成功插入数',
|
||||
`skip_count` INT DEFAULT 0 COMMENT '跳过数(已存在)',
|
||||
`fail_count` INT DEFAULT 0 COMMENT '失败数',
|
||||
`error_message` TEXT NULL COMMENT '错误信息',
|
||||
`created_at` DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
`updated_at` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
INDEX `idx_status` (`status`),
|
||||
INDEX `idx_upload_time` (`upload_time`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='关键词导入日志表'
|
||||
""")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.error(f"创建 query_import_log 表失败: {e}")
|
||||
|
||||
def create_log(self, filename: str, filepath: str) -> Optional[int]:
|
||||
"""创建导入日志记录"""
|
||||
try:
|
||||
self.ensure_table()
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
f"INSERT INTO query_import_log (filename, filepath, upload_time, status) VALUES ({ph}, {ph}, NOW(), 'pending')",
|
||||
(filename, filepath)
|
||||
)
|
||||
log_id = cursor.lastrowid
|
||||
conn.commit()
|
||||
conn.close()
|
||||
logger.info(f"创建导入日志: {filename} (ID: {log_id})")
|
||||
return log_id
|
||||
except Exception as e:
|
||||
logger.error(f"创建导入日志失败: {e}")
|
||||
return None
|
||||
|
||||
def update_status(self, log_id: int, status: str,
|
||||
total_count: int = 0, success_count: int = 0,
|
||||
skip_count: int = 0, fail_count: int = 0,
|
||||
error_message: str = None):
|
||||
"""更新导入状态和统计数据"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
cursor = conn.cursor()
|
||||
|
||||
import_time_sql = ", import_time = NOW()" if status in ('running', 'completed', 'failed') else ""
|
||||
|
||||
cursor.execute(
|
||||
f"""UPDATE query_import_log
|
||||
SET status = {ph}, total_count = {ph}, success_count = {ph},
|
||||
skip_count = {ph}, fail_count = {ph}, error_message = {ph}
|
||||
{import_time_sql}
|
||||
WHERE id = {ph}""",
|
||||
(status, total_count, success_count, skip_count, fail_count, error_message, log_id)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.error(f"更新导入日志失败: {e}")
|
||||
|
||||
def get_pending_logs(self) -> List[Dict]:
|
||||
"""获取待处理的导入日志"""
|
||||
try:
|
||||
self.ensure_table()
|
||||
conn = self.get_connection()
|
||||
cursor = self._execute_query(
|
||||
conn, "SELECT * FROM query_import_log WHERE status = 'pending' ORDER BY created_at ASC"
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
return [self._dict_from_row(row) for row in rows]
|
||||
except Exception as e:
|
||||
logger.error(f"查询待处理日志失败: {e}")
|
||||
return []
|
||||
|
||||
def get_logs_paginated(self, page: int = 1, page_size: int = 20) -> Dict:
|
||||
"""分页获取导入日志"""
|
||||
try:
|
||||
self.ensure_table()
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
|
||||
# 总数
|
||||
cursor = self._execute_query(conn, "SELECT COUNT(*) as total FROM query_import_log")
|
||||
total = cursor.fetchone()['total']
|
||||
|
||||
# 分页数据
|
||||
offset = (page - 1) * page_size
|
||||
cursor = self._execute_query(
|
||||
conn,
|
||||
f"SELECT * FROM query_import_log ORDER BY created_at DESC LIMIT {ph} OFFSET {ph}",
|
||||
(page_size, offset)
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'items': [self._dict_from_row(row) for row in rows],
|
||||
'total': total,
|
||||
'page': page,
|
||||
'page_size': page_size
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"分页查询导入日志失败: {e}")
|
||||
return {'items': [], 'total': 0, 'page': page, 'page_size': page_size}
|
||||
|
||||
def is_file_logged(self, filepath: str) -> bool:
|
||||
"""检查文件是否已有导入记录"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
cursor = self._execute_query(
|
||||
conn,
|
||||
f"SELECT COUNT(*) as cnt FROM query_import_log WHERE filepath = {ph}",
|
||||
(filepath,)
|
||||
)
|
||||
cnt = cursor.fetchone()['cnt']
|
||||
conn.close()
|
||||
return cnt > 0
|
||||
except Exception as e:
|
||||
logger.error(f"检查文件日志失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
class QueryKeywordManager(DatabaseManager):
|
||||
"""Query关键词管理器 - 操作 baidu_keyword 表"""
|
||||
|
||||
def insert_keyword(self, keyword: str, seed_id: int = 9999, seed_name: str = '手动提交',
|
||||
crawled: int = 1, department: str = '', department_id: int = 0,
|
||||
author_id: int = 0, author_name: str = '') -> int:
|
||||
"""
|
||||
插入单条关键词到 baidu_keyword 表(INSERT IGNORE)
|
||||
|
||||
Returns:
|
||||
affected rows: 1=新插入, 0=已存在被跳过, -1=失败
|
||||
"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
ph = self._get_placeholder()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
f"""INSERT IGNORE INTO baidu_keyword
|
||||
(keyword, seed_id, seed_name, crawled, parents_id, created_at,
|
||||
department, department_id, query_status, author_id, author_name)
|
||||
VALUES ({ph}, {ph}, {ph}, {ph}, 0, NOW(), {ph}, {ph}, 'manual_review', {ph}, {ph})""",
|
||||
(keyword, seed_id, seed_name, crawled, department, department_id, author_id, author_name)
|
||||
)
|
||||
affected = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return affected
|
||||
except Exception as e:
|
||||
logger.error(f"插入关键词失败: {keyword} - {e}")
|
||||
return -1
|
||||
|
||||
def batch_insert_keywords(self, keyword_list: list, seed_id: int = 9999,
|
||||
seed_name: str = '手动提交', crawled: int = 1,
|
||||
query_status: str = 'manual_review') -> dict:
|
||||
"""
|
||||
批量插入关键词到 baidu_keyword 表(INSERT IGNORE)
|
||||
|
||||
Args:
|
||||
keyword_list: [{'keyword': str, 'department': str, 'seed_name': str(可选)}, ...]
|
||||
query_status: 写入的query_status值,如 'draft' 或 'manual_review'
|
||||
|
||||
Returns:
|
||||
{'success': int, 'skip': int, 'fail': int}
|
||||
"""
|
||||
stats = {'success': 0, 'skip': 0, 'fail': 0}
|
||||
if not keyword_list:
|
||||
return stats
|
||||
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
values = []
|
||||
for item in keyword_list:
|
||||
values.append((
|
||||
item['keyword'], seed_id, seed_name, crawled,
|
||||
item.get('department', ''), query_status
|
||||
))
|
||||
|
||||
cursor.executemany(
|
||||
"""INSERT IGNORE INTO baidu_keyword
|
||||
(keyword, seed_id, seed_name, crawled, parents_id, created_at,
|
||||
department, department_id, query_status, author_id, author_name)
|
||||
VALUES (%s, %s, %s, %s, 0, NOW(), %s, 0, %s, 0, '')""",
|
||||
values
|
||||
)
|
||||
|
||||
# executemany 的 rowcount 返回实际插入的行数
|
||||
inserted = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
stats['success'] = inserted
|
||||
stats['skip'] = len(keyword_list) - inserted
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量插入关键词失败: {e}")
|
||||
stats['fail'] = len(keyword_list)
|
||||
return stats
|
||||
|
||||
|
Before Width: | Height: | Size: 67 KiB |
85
deploy.sh
@@ -1,85 +0,0 @@
|
||||
#!/bin/bash
|
||||
# AI MIP 服务部署脚本
|
||||
# 用法: sudo bash deploy.sh
|
||||
|
||||
set -e
|
||||
|
||||
echo "=========================================="
|
||||
echo " AI MIP 服务部署脚本"
|
||||
echo "=========================================="
|
||||
|
||||
# 配置变量
|
||||
PROJECT_DIR="/opt/ai_mip"
|
||||
SERVICE_NAME="ai_mip"
|
||||
SERVICE_FILE="${SERVICE_NAME}.service"
|
||||
LOG_DIR="/var/log/ai_mip"
|
||||
VENV_DIR="${PROJECT_DIR}/venv"
|
||||
USER="www-data"
|
||||
GROUP="www-data"
|
||||
|
||||
# 检查是否root权限
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo "❌ 错误: 请使用 sudo 运行此脚本"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "📦 步骤1: 创建项目目录"
|
||||
mkdir -p ${PROJECT_DIR}
|
||||
mkdir -p ${LOG_DIR}
|
||||
echo "✅ 目录创建完成"
|
||||
|
||||
echo ""
|
||||
echo "📂 步骤2: 复制项目文件"
|
||||
echo "请确保当前目录是项目根目录"
|
||||
cp -r ./* ${PROJECT_DIR}/
|
||||
echo "✅ 文件复制完成"
|
||||
|
||||
echo ""
|
||||
echo "🐍 步骤3: 创建Python虚拟环境"
|
||||
if [ ! -d "${VENV_DIR}" ]; then
|
||||
python3 -m venv ${VENV_DIR}
|
||||
echo "✅ 虚拟环境创建完成"
|
||||
else
|
||||
echo "⚠️ 虚拟环境已存在,跳过创建"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "📦 步骤4: 安装依赖"
|
||||
${VENV_DIR}/bin/pip install --upgrade pip
|
||||
${VENV_DIR}/bin/pip install -r ${PROJECT_DIR}/requirements.txt
|
||||
echo "✅ 依赖安装完成"
|
||||
|
||||
echo ""
|
||||
echo "🔐 步骤5: 设置权限"
|
||||
chown -R ${USER}:${GROUP} ${PROJECT_DIR}
|
||||
chown -R ${USER}:${GROUP} ${LOG_DIR}
|
||||
chmod +x ${PROJECT_DIR}/main.py
|
||||
echo "✅ 权限设置完成"
|
||||
|
||||
echo ""
|
||||
echo "⚙️ 步骤6: 安装systemd服务"
|
||||
cp ${PROJECT_DIR}/${SERVICE_FILE} /etc/systemd/system/
|
||||
systemctl daemon-reload
|
||||
echo "✅ 服务文件已安装"
|
||||
|
||||
echo ""
|
||||
echo "🚀 步骤7: 启动服务"
|
||||
systemctl enable ${SERVICE_NAME}
|
||||
systemctl restart ${SERVICE_NAME}
|
||||
echo "✅ 服务已启动"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 部署完成!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "📋 常用命令:"
|
||||
echo " 查看状态: sudo systemctl status ${SERVICE_NAME}"
|
||||
echo " 查看日志: sudo journalctl -u ${SERVICE_NAME} -f"
|
||||
echo " 查看服务日志: tail -f ${LOG_DIR}/service.log"
|
||||
echo " 查看错误日志: tail -f ${LOG_DIR}/error.log"
|
||||
echo " 重启服务: sudo systemctl restart ${SERVICE_NAME}"
|
||||
echo " 停止服务: sudo systemctl stop ${SERVICE_NAME}"
|
||||
echo " 健康检查: curl http://localhost:8899/health"
|
||||
echo ""
|
||||
348
export_clicks_to_csv.py
Normal file
@@ -0,0 +1,348 @@
|
||||
"""
|
||||
导出点击记录到CSV文件
|
||||
支持开发环境(dev)和生产环境(prod)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import csv
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from config import Config
|
||||
from db_manager import ClickManager
|
||||
|
||||
# 配置日志
|
||||
logger.remove()
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>",
|
||||
level="INFO"
|
||||
)
|
||||
|
||||
|
||||
class ClickExporter:
|
||||
"""点击记录导出器"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化导出器"""
|
||||
self.config = Config
|
||||
self.click_manager = ClickManager()
|
||||
|
||||
logger.info("=" * 70)
|
||||
logger.info(f"点击记录导出器已初始化")
|
||||
logger.info(f"当前环境: {self.config.ENV}")
|
||||
logger.info(f"数据库配置:")
|
||||
logger.info(f" - Host: {self.config.MYSQL_HOST}:{self.config.MYSQL_PORT}")
|
||||
logger.info(f" - Database: {self.config.MYSQL_DATABASE}")
|
||||
logger.info(f" - User: {self.config.MYSQL_USER}")
|
||||
logger.info("=" * 70)
|
||||
logger.info("提示: 通过设置环境变量 ENV=production 切换到生产环境")
|
||||
logger.info("=" * 70)
|
||||
|
||||
def get_all_clicks(self, start_date: str = None, end_date: str = None,
|
||||
site_id: int = None, limit: int = None, join_mode: str = 'simple') -> list:
|
||||
"""
|
||||
查询点击记录
|
||||
|
||||
Args:
|
||||
start_date: 开始日期 YYYY-MM-DD
|
||||
end_date: 结束日期 YYYY-MM-DD
|
||||
site_id: 站点ID筛选
|
||||
limit: 限制数量
|
||||
join_mode: 查询模式 simple=仅点击表, full=联合三表
|
||||
|
||||
Returns:
|
||||
点击记录列表
|
||||
"""
|
||||
try:
|
||||
conn = self.click_manager.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 根据模式构建不同的SQL查询
|
||||
if join_mode == 'full':
|
||||
# 联合三表查询
|
||||
sql = """
|
||||
SELECT
|
||||
c.id as click_id,
|
||||
c.site_id,
|
||||
c.site_url,
|
||||
c.click_time,
|
||||
c.user_ip,
|
||||
c.device_type,
|
||||
c.task_id as click_task_id,
|
||||
c.operator as click_operator,
|
||||
s.site_name,
|
||||
s.status as site_status,
|
||||
s.site_dimension,
|
||||
s.click_count as total_click_count,
|
||||
s.reply_count as total_reply_count,
|
||||
i.id as interaction_id,
|
||||
i.interaction_type,
|
||||
i.interaction_time,
|
||||
i.interaction_status,
|
||||
i.reply_content,
|
||||
i.response_received,
|
||||
i.response_content,
|
||||
i.is_successful as interaction_successful,
|
||||
i.proxy_ip as interaction_proxy_ip,
|
||||
i.fingerprint_id,
|
||||
i.error_message
|
||||
FROM ai_mip_click c
|
||||
LEFT JOIN ai_mip_site s ON c.site_id = s.id
|
||||
LEFT JOIN ai_mip_interaction i ON c.id = i.click_id
|
||||
WHERE 1=1
|
||||
"""
|
||||
else:
|
||||
# 简单查询,仅点击表
|
||||
sql = "SELECT * FROM ai_mip_click WHERE 1=1"
|
||||
|
||||
params = []
|
||||
|
||||
if start_date:
|
||||
sql += " AND c.click_time >= %s" if join_mode == 'full' else " AND click_time >= %s"
|
||||
params.append(f"{start_date} 00:00:00")
|
||||
|
||||
if end_date:
|
||||
sql += " AND c.click_time <= %s" if join_mode == 'full' else " AND click_time <= %s"
|
||||
params.append(f"{end_date} 23:59:59")
|
||||
|
||||
if site_id:
|
||||
sql += " AND c.site_id = %s" if join_mode == 'full' else " AND site_id = %s"
|
||||
params.append(site_id)
|
||||
|
||||
sql += " ORDER BY c.click_time DESC" if join_mode == 'full' else " ORDER BY click_time DESC"
|
||||
|
||||
if limit:
|
||||
sql += f" LIMIT {limit}"
|
||||
|
||||
logger.info(f"查询模式: {join_mode}")
|
||||
logger.info(f"执行查询: {sql[:200]}..." if len(sql) > 200 else f"执行查询: {sql}")
|
||||
logger.info(f"参数: {params}")
|
||||
|
||||
cursor.execute(sql, params if params else None)
|
||||
|
||||
# 获取列名
|
||||
columns = [desc[0] for desc in cursor.description]
|
||||
|
||||
# 获取所有记录
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# 转换为字典列表
|
||||
clicks = []
|
||||
for row in rows:
|
||||
click_dict = {}
|
||||
for idx, col in enumerate(columns):
|
||||
value = row[idx]
|
||||
# 处理datetime类型
|
||||
if isinstance(value, datetime):
|
||||
value = value.strftime('%Y-%m-%d %H:%M:%S')
|
||||
click_dict[col] = value
|
||||
clicks.append(click_dict)
|
||||
|
||||
conn.close()
|
||||
|
||||
logger.success(f"查询成功,获取到 {len(clicks)} 条记录")
|
||||
return clicks
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查询点击记录失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def export_to_csv(self, clicks: list, output_file: str, encoding: str = 'utf-8-sig'):
|
||||
"""
|
||||
导出到CSV文件
|
||||
|
||||
Args:
|
||||
clicks: 点击记录列表
|
||||
output_file: 输出文件路径
|
||||
encoding: 文件编码,默认utf-8-sig(Excel兼容)
|
||||
"""
|
||||
if not clicks:
|
||||
logger.warning("没有数据可导出")
|
||||
return False
|
||||
|
||||
try:
|
||||
# 确保输出目录存在
|
||||
output_path = Path(output_file)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 定义字段中文映射
|
||||
field_mapping = {
|
||||
# 点击表字段
|
||||
'id': '点击ID',
|
||||
'click_id': '点击ID',
|
||||
'site_id': '站点ID',
|
||||
'site_url': '站点URL',
|
||||
'click_time': '点击时间',
|
||||
'user_ip': '用户IP',
|
||||
'user_agent': '浏览器标识',
|
||||
'referer_url': '来源页面',
|
||||
'device_type': '设备类型',
|
||||
'click_count': '点击次数',
|
||||
'is_valid': '是否有效',
|
||||
'task_id': '任务ID',
|
||||
'click_task_id': '任务ID',
|
||||
'operator': '操作者',
|
||||
'click_operator': '操作者',
|
||||
'created_at': '创建时间',
|
||||
# 站点表字段
|
||||
'site_name': '站点名称',
|
||||
'site_status': '站点状态',
|
||||
'status': '状态',
|
||||
'site_dimension': '站点维度',
|
||||
'total_click_count': '总点击数',
|
||||
'total_reply_count': '总回复数',
|
||||
# 互动表字段
|
||||
'interaction_id': '互动ID',
|
||||
'interaction_type': '互动类型',
|
||||
'interaction_time': '互动时间',
|
||||
'interaction_status': '互动状态',
|
||||
'reply_content': '回复内容',
|
||||
'response_received': '是否收到回复',
|
||||
'response_content': '对方回复内容',
|
||||
'interaction_successful': '互动是否成功',
|
||||
'is_successful': '是否成功',
|
||||
'interaction_proxy_ip': '代理IP',
|
||||
'proxy_ip': '代理IP',
|
||||
'fingerprint_id': '浏览器指纹ID',
|
||||
'error_message': '错误信息',
|
||||
}
|
||||
|
||||
# 获取所有字段名(英文)
|
||||
fieldnames = list(clicks[0].keys())
|
||||
|
||||
# 转换为中文字段名
|
||||
chinese_fieldnames = [field_mapping.get(field, field) for field in fieldnames]
|
||||
|
||||
# 写入CSV
|
||||
with open(output_file, 'w', newline='', encoding=encoding) as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
# 写入中文表头
|
||||
writer.writerow(chinese_fieldnames)
|
||||
# 写入数据行
|
||||
for click in clicks:
|
||||
row = [click.get(field, '') for field in fieldnames]
|
||||
writer.writerow(row)
|
||||
|
||||
logger.success(f"成功导出 {len(clicks)} 条记录到: {output_file}")
|
||||
logger.info(f"文件大小: {output_path.stat().st_size / 1024:.2f} KB")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"导出CSV失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def print_summary(self, clicks: list):
|
||||
"""打印数据摘要"""
|
||||
if not clicks:
|
||||
return
|
||||
|
||||
logger.info("\n" + "=" * 70)
|
||||
logger.info("数据摘要")
|
||||
logger.info("=" * 70)
|
||||
logger.info(f"总记录数: {len(clicks)}")
|
||||
|
||||
# 统计设备类型
|
||||
device_stats = {}
|
||||
for click in clicks:
|
||||
device = click.get('device_type', 'unknown')
|
||||
device_stats[device] = device_stats.get(device, 0) + 1
|
||||
|
||||
logger.info(f"设备类型分布:")
|
||||
for device, count in device_stats.items():
|
||||
logger.info(f" - {device}: {count} 条")
|
||||
|
||||
# 时间范围
|
||||
if clicks:
|
||||
times = [click.get('click_time') for click in clicks if click.get('click_time')]
|
||||
if times:
|
||||
logger.info(f"时间范围: {min(times)} ~ {max(times)}")
|
||||
|
||||
logger.info("=" * 70)
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='导出点击记录到CSV文件(环境通过ENV环境变量控制)',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
使用示例:
|
||||
# 导出开发环境所有点击记录(默认,仅点击表)
|
||||
python export_clicks_to_csv.py -o clicks_dev.csv
|
||||
|
||||
# 导出联合三表的完整数据(包含站点信息和互动记录)
|
||||
python export_clicks_to_csv.py -o clicks_full.csv --join-mode full
|
||||
|
||||
# 导出生产环境点击记录(设置ENV环境变量)
|
||||
$env:ENV="production"; python export_clicks_to_csv.py -o clicks_prod.csv --join-mode full
|
||||
|
||||
# 按日期范围导出
|
||||
python export_clicks_to_csv.py -o clicks.csv --start-date 2026-01-01 --end-date 2026-01-31 --join-mode full
|
||||
|
||||
# 导出指定站点的记录
|
||||
python export_clicks_to_csv.py -o clicks.csv --site-id 123 --join-mode full
|
||||
|
||||
# 限制导出数量
|
||||
python export_clicks_to_csv.py -o clicks.csv --limit 1000 --join-mode full
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('-o', '--output', required=True,
|
||||
help='输出CSV文件路径')
|
||||
parser.add_argument('--start-date', help='开始日期 YYYY-MM-DD')
|
||||
parser.add_argument('--end-date', help='结束日期 YYYY-MM-DD')
|
||||
parser.add_argument('--site-id', type=int, help='站点ID筛选')
|
||||
parser.add_argument('--limit', type=int, help='限制导出数量')
|
||||
parser.add_argument('--join-mode', choices=['simple', 'full'], default='simple',
|
||||
help='查询模式: simple=仅点击表, full=联合三表(点击+站点+互动)')
|
||||
parser.add_argument('--encoding', default='utf-8-sig',
|
||||
help='文件编码,默认utf-8-sig(Excel兼容)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
# 创建导出器
|
||||
exporter = ClickExporter()
|
||||
|
||||
# 查询点击记录
|
||||
logger.info("\n开始查询点击记录...")
|
||||
clicks = exporter.get_all_clicks(
|
||||
start_date=args.start_date,
|
||||
end_date=args.end_date,
|
||||
site_id=args.site_id,
|
||||
limit=args.limit,
|
||||
join_mode=args.join_mode
|
||||
)
|
||||
|
||||
if not clicks:
|
||||
logger.warning("没有找到符合条件的记录")
|
||||
sys.exit(1)
|
||||
|
||||
# 打印摘要
|
||||
exporter.print_summary(clicks)
|
||||
|
||||
# 导出到CSV
|
||||
logger.info(f"\n开始导出到: {args.output}")
|
||||
success = exporter.export_to_csv(clicks, args.output, args.encoding)
|
||||
|
||||
if success:
|
||||
logger.success("\n导出完成!")
|
||||
sys.exit(0)
|
||||
else:
|
||||
logger.error("\n导出失败")
|
||||
sys.exit(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("\n用户中断导出")
|
||||
sys.exit(130)
|
||||
except Exception as e:
|
||||
logger.error(f"导出失败: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -25,38 +25,9 @@ logger.add(
|
||||
class ExcelImporter:
|
||||
"""Excel数据导入器"""
|
||||
|
||||
def __init__(self, env: str = None):
|
||||
"""
|
||||
初始化导入器
|
||||
|
||||
Args:
|
||||
env: 环境标识,dev或prod,默认使用当前配置
|
||||
"""
|
||||
# 如果指定了环境,临时设置环境变量
|
||||
if env:
|
||||
original_env = os.getenv('ENV')
|
||||
if env == 'dev':
|
||||
os.environ['ENV'] = 'development'
|
||||
elif env == 'prod':
|
||||
os.environ['ENV'] = 'production'
|
||||
else:
|
||||
raise ValueError(f"无效的环境标识: {env},必须是 dev 或 prod")
|
||||
|
||||
# 重新加载配置
|
||||
import importlib
|
||||
import config as config_module
|
||||
importlib.reload(config_module)
|
||||
from config import Config as ReloadedConfig
|
||||
self.config = ReloadedConfig
|
||||
|
||||
# 恢复原始环境变量
|
||||
if original_env:
|
||||
os.environ['ENV'] = original_env
|
||||
else:
|
||||
os.environ.pop('ENV', None)
|
||||
else:
|
||||
def __init__(self):
|
||||
"""初始化导入器"""
|
||||
self.config = Config
|
||||
|
||||
self.site_manager = SiteManager()
|
||||
|
||||
logger.info("=" * 70)
|
||||
@@ -67,6 +38,8 @@ class ExcelImporter:
|
||||
logger.info(f" - Database: {self.config.MYSQL_DATABASE}")
|
||||
logger.info(f" - User: {self.config.MYSQL_USER}")
|
||||
logger.info("=" * 70)
|
||||
logger.info("提示: 通过设置环境变量 ENV=production 切换到生产环境")
|
||||
logger.info("=" * 70)
|
||||
|
||||
def read_excel(self, file_path: str) -> pd.DataFrame:
|
||||
"""
|
||||
@@ -258,27 +231,25 @@ class ExcelImporter:
|
||||
def main():
|
||||
"""主函数"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='从Excel文件导入URL数据到数据库',
|
||||
description='从Excel文件导入URL数据到数据库(环境通过ENV环境变量控制)',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
使用示例:
|
||||
# 导入到开发环境
|
||||
python import_excel_to_db.py -f "广告链接 1.26(962条).xlsx" -e dev
|
||||
# 导入到开发环境(默认)
|
||||
python import_excel_to_db.py -f "广告链接 1.26(962条).xlsx"
|
||||
|
||||
# 导入到生产环境
|
||||
python import_excel_to_db.py -f "广告链接 1.26(962条).xlsx" -e prod
|
||||
# 导入到生产环境(设置ENV环境变量)
|
||||
$env:ENV="production"; python import_excel_to_db.py -f "广告链接 1.26(962条).xlsx"
|
||||
|
||||
# 指定查询词和维度
|
||||
python import_excel_to_db.py -f "广告链接.xlsx" -e dev -q "关键词" -d "医疗"
|
||||
python import_excel_to_db.py -f "广告链接.xlsx" -q "关键词" -d "医疗"
|
||||
|
||||
# 试运行模式(不实际插入)
|
||||
python import_excel_to_db.py -f "广告链接.xlsx" -e dev --dry-run
|
||||
python import_excel_to_db.py -f "广告链接.xlsx" --dry-run
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('-f', '--file', required=True, help='Excel文件路径')
|
||||
parser.add_argument('-e', '--env', choices=['dev', 'prod'], required=True,
|
||||
help='目标环境: dev=开发环境, prod=生产环境')
|
||||
parser.add_argument('-q', '--query-word', help='查询词(默认:None)')
|
||||
parser.add_argument('-d', '--dimension', help='站点维度(默认:None)')
|
||||
parser.add_argument('--frequency', type=int, default=1, help='频次(默认:1)')
|
||||
@@ -291,7 +262,7 @@ def main():
|
||||
|
||||
try:
|
||||
# 创建导入器
|
||||
importer = ExcelImporter(env=args.env)
|
||||
importer = ExcelImporter()
|
||||
|
||||
# 读取Excel
|
||||
df = importer.read_excel(args.file)
|
||||
@@ -307,7 +278,7 @@ def main():
|
||||
|
||||
# 确认导入
|
||||
if not args.dry_run:
|
||||
logger.warning(f"\n即将导入 {len(df)} 条数据到【{args.env.upper()}】环境")
|
||||
logger.warning(f"\n即将导入 {len(df)} 条数据到【{importer.config.ENV.upper()}】环境")
|
||||
logger.warning(f"数据库: {importer.config.MYSQL_HOST}:{importer.config.MYSQL_PORT}/{importer.config.MYSQL_DATABASE}")
|
||||
|
||||
response = input("\n确认继续?[y/N]: ")
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
# localAPI
|
||||
AdsPower supports Local API, which has functions like reading and writing account configuration information, opening and closing browsers, searching for accounts. Besides, it can cooperate with Selenium and Puppeteer to execute browser operations automatically.
|
||||
|
||||
<br />
|
||||
|
||||
> ## How to Use AdsPower Local API
|
||||
|
||||
- Users of AdsPower team collaboration version have access to API
|
||||
- Start AdsPower, log in the account with API permission
|
||||
- Go to Account Management-> Setting-> Local API to check the following items
|
||||
- API status: Success
|
||||
- API address: http://local.adspower.net:50325/ or http://localhost:50325/ (port: 50325, which might change and subjects to the address in the setting).
|
||||
- Script can go to Profile Management-> click Settings-> click Cache folder-> local_api file to obtain API address
|
||||
- Use script or http request tool to invoke Local API, allow to configure account data, browser fingerprint, open or close browser and other operations
|
||||
- API parameter type: string, Post format: JSON, unnecessary parameters are optional and can not be passed
|
||||
- Access frequency control for all APIs, max. access frequency: 1 request/second
|
||||
- At the same time, it supports the mode of no-interface api-key to start the Local API service. For details, see: [Local Api Doc](https://localapi-doc-en.adspower.com/)
|
||||
|
||||
<br />
|
||||
|
||||
> ## **What the Local API supports**
|
||||
|
||||
- [x] API Status
|
||||
- [x] Browser Operation
|
||||
- [x] Open Browser
|
||||
- [x] Close Browser
|
||||
- [x] Check Open Status
|
||||
- [x] Group Management
|
||||
- [x] Create Group
|
||||
- [ ] Update Group(coming soon)
|
||||
- [x] Query Group
|
||||
- [x] Profile Management
|
||||
- [x] Create Profile
|
||||
- [x] Update Profile
|
||||
- [x] Query Profile
|
||||
- [x] Delete Profile
|
||||
- [x] Update Profile Group
|
||||
- [x] Delete Profile Cache
|
||||
|
||||
<br />
|
||||
|
||||
## More Details
|
||||
|
||||
👉[Local Api Doc](https://localapi-doc-en.adspower.com/)
|
||||
@@ -1,249 +0,0 @@
|
||||
{
|
||||
"ad": "Andorra",
|
||||
"ae": "United Arab Emirates",
|
||||
"af": "Afghanistan",
|
||||
"ag": "Antigua & Barbuda",
|
||||
"ai": "Anguilla",
|
||||
"al": "Albania",
|
||||
"am": "Armenia",
|
||||
"ao": "Angola",
|
||||
"aq": "Antarctica",
|
||||
"ar": "Argentina",
|
||||
"as": "American Samoa",
|
||||
"at": "Austria",
|
||||
"au": "Australia",
|
||||
"aw": "Aruba",
|
||||
"ax": "_land Islands",
|
||||
"az": "Azerbaijan",
|
||||
"ba": "Bosnia & Herzegovina",
|
||||
"bb": "Barbados",
|
||||
"bd": "Bangladesh",
|
||||
"be": "Belgium",
|
||||
"bf": "Burkina",
|
||||
"bg": "Bulgaria",
|
||||
"bh": "Bahrain",
|
||||
"bi": "Burundi",
|
||||
"bj": "Benin",
|
||||
"bl": "Saint Barthélemy",
|
||||
"bm": "Bermuda",
|
||||
"bn": "Brunei",
|
||||
"bo": "Bolivia",
|
||||
"bq": "Caribbean Netherlands",
|
||||
"br": "Brazil",
|
||||
"bs": "The Bahamas",
|
||||
"bt": "Bhutan",
|
||||
"bv": "Bouvet Island",
|
||||
"bw": "Botswana",
|
||||
"by": "Belarus",
|
||||
"bz": "Belize",
|
||||
"ca": "Canada",
|
||||
"cc": "Cocos (Keeling) Islands",
|
||||
"cf": "Central African Republic",
|
||||
"ch": "Switzerland",
|
||||
"cl": "Chile",
|
||||
"cm": "Cameroon",
|
||||
"co": "Colombia",
|
||||
"cr": "Costa Rica",
|
||||
"cu": "Cuba",
|
||||
"cv": "Cape Verde",
|
||||
"cx": "Christmas Island",
|
||||
"cy": "Cyprus",
|
||||
"cz": "Czech Republic",
|
||||
"de": "Germany",
|
||||
"dj": "Djibouti",
|
||||
"dk": "Denmark",
|
||||
"dm": "Dominica",
|
||||
"do": "Dominican Republic",
|
||||
"dz": "Algeria",
|
||||
"ec": "Ecuador",
|
||||
"ee": "Estonia",
|
||||
"eg": "Egypt",
|
||||
"eh": "Western Sahara",
|
||||
"er": "Eritrea",
|
||||
"es": "Spain",
|
||||
"fi": "Finland",
|
||||
"fj": "Fiji",
|
||||
"fk": "Falkland Islands",
|
||||
"fm": "Federated States of Micronesia",
|
||||
"fo": "Faroe Islands",
|
||||
"fr": "France",
|
||||
"ga": "Gabon",
|
||||
"gd": "Grenada",
|
||||
"ge": "Georgia",
|
||||
"gf": "French Guiana",
|
||||
"gh": "Ghana",
|
||||
"gi": "Gibraltar",
|
||||
"gl": "Greenland",
|
||||
"gn": "Guinea",
|
||||
"gp": "Guadeloupe",
|
||||
"gq": "Equatorial Guinea",
|
||||
"gr": "Greece",
|
||||
"gs": "South Georgia and the South Sandwich Islands",
|
||||
"gt": "Guatemala",
|
||||
"gu": "Guam",
|
||||
"gw": "Guinea-Bissau",
|
||||
"gy": "Guyana",
|
||||
"hk": "China Hong Kong",
|
||||
"hm": "Heard Island and McDonald Islands",
|
||||
"hn": "Honduras",
|
||||
"hr": "Croatia",
|
||||
"ht": "Haiti",
|
||||
"hu": "Hungary",
|
||||
"id": "Indonesia",
|
||||
"ie": "Ireland",
|
||||
"il": "Israel",
|
||||
"im": "Isle of Man",
|
||||
"in": "India",
|
||||
"io": "British Indian Ocean Territory",
|
||||
"iq": "Iraq",
|
||||
"ir": "Iran",
|
||||
"is": "Iceland",
|
||||
"it": "Italy",
|
||||
"je": "Jersey",
|
||||
"jm": "Jamaica",
|
||||
"jo": "Jordan",
|
||||
"jp": "Japan",
|
||||
"kh": "Cambodia",
|
||||
"ki": "Kiribati",
|
||||
"km": "The Comoros",
|
||||
"kw": "Kuwait",
|
||||
"ky": "Cayman Islands",
|
||||
"lb": "Lebanon",
|
||||
"li": "Liechtenstein",
|
||||
"lk": "Sri Lanka",
|
||||
"lr": "Liberia",
|
||||
"ls": "Lesotho",
|
||||
"lt": "Lithuania",
|
||||
"lu": "Luxembourg",
|
||||
"lv": "Latvia",
|
||||
"ly": "Libya",
|
||||
"ma": "Morocco",
|
||||
"mc": "Monaco",
|
||||
"md": "Moldova",
|
||||
"me": "Montenegro",
|
||||
"mf": "Saint Martin (France)",
|
||||
"mg": "Madagascar",
|
||||
"mh": "Marshall islands",
|
||||
"mk": "Republic of Macedonia (FYROM)",
|
||||
"ml": "Mali",
|
||||
"mm": "Myanmar (Burma)",
|
||||
"mo": "China Macao",
|
||||
"mq": "Martinique",
|
||||
"mr": "Mauritania",
|
||||
"ms": "Montserrat",
|
||||
"mt": "Malta",
|
||||
"mv": "Maldives",
|
||||
"mw": "Malawi",
|
||||
"mx": "Mexico",
|
||||
"my": "Malaysia",
|
||||
"na": "Namibia",
|
||||
"ne": "Niger",
|
||||
"nf": "Norfolk Island",
|
||||
"ng": "Nigeria",
|
||||
"ni": "Nicaragua",
|
||||
"nl": "Netherlands",
|
||||
"no": "Norway",
|
||||
"np": "Nepal",
|
||||
"nr": "Nauru",
|
||||
"om": "Oman",
|
||||
"pa": "Panama",
|
||||
"pe": "Peru",
|
||||
"pf": "French polynesia",
|
||||
"pg": "Papua New Guinea",
|
||||
"ph": "The Philippines",
|
||||
"pk": "Pakistan",
|
||||
"pl": "Poland",
|
||||
"pn": "Pitcairn Islands",
|
||||
"pr": "Puerto Rico",
|
||||
"ps": "Palestinian territories",
|
||||
"pw": "Palau",
|
||||
"py": "Paraguay",
|
||||
"qa": "Qatar",
|
||||
"re": "Réunion",
|
||||
"ro": "Romania",
|
||||
"rs": "Serbia",
|
||||
"ru": "Russian Federation",
|
||||
"rw": "Rwanda",
|
||||
"sb": "Solomon Islands",
|
||||
"sc": "Seychelles",
|
||||
"sd": "Sudan",
|
||||
"se": "Sweden",
|
||||
"sg": "Singapore",
|
||||
"si": "Slovenia",
|
||||
"sj": "Template:Country data SJM Svalbard",
|
||||
"sk": "Slovakia",
|
||||
"sl": "Sierra Leone",
|
||||
"sm": "San Marino",
|
||||
"sn": "Senegal",
|
||||
"so": "Somalia",
|
||||
"sr": "Suriname",
|
||||
"ss": "South Sudan",
|
||||
"st": "Sao Tome & Principe",
|
||||
"sv": "El Salvador",
|
||||
"sy": "Syria",
|
||||
"sz": "Swaziland",
|
||||
"tc": "Turks & Caicos Islands",
|
||||
"td": "Chad",
|
||||
"tg": "Togo",
|
||||
"th": "Thailand",
|
||||
"tk": "Tokelau",
|
||||
"tl": "Timor-Leste (East Timor)",
|
||||
"tn": "Tunisia",
|
||||
"to": "Tonga",
|
||||
"tr": "Turkey",
|
||||
"tv": "Tuvalu",
|
||||
"tz": "Tanzania",
|
||||
"ua": "Ukraine",
|
||||
"ug": "Uganda",
|
||||
"us": "United States of America (USA)",
|
||||
"uy": "Uruguay",
|
||||
"va": "Vatican City (The Holy See)",
|
||||
"ve": "Venezuela",
|
||||
"vg": "British Virgin Islands",
|
||||
"vi": "United States Virgin Islands",
|
||||
"vn": "Vietnam",
|
||||
"wf": "Wallis and Futuna",
|
||||
"ws": "Samoa",
|
||||
"ye": "Yemen",
|
||||
"yt": "Mayotte",
|
||||
"za": "South Africa",
|
||||
"zm": "Zambia",
|
||||
"zw": "Zimbabwe",
|
||||
"cn": "China",
|
||||
"cg": "Republic of the Congo",
|
||||
"cd": "Democratic Republic of the Congo",
|
||||
"mz": "Mozambique",
|
||||
"gg": "Guernsey",
|
||||
"gm": "Gambia",
|
||||
"mp": "Northern Mariana Islands",
|
||||
"et": "Ethiopia",
|
||||
"nc": "New Caledonia",
|
||||
"vu": "Vanuatu",
|
||||
"tf": "French Southern Territories",
|
||||
"nu": "Niue",
|
||||
"um": "United States Minor Outlying Islands",
|
||||
"ck": "Cook Islands",
|
||||
"gb": "Great Britain",
|
||||
"tt": "Trinidad & Tobago",
|
||||
"vc": "St. Vincent & the Grenadines",
|
||||
"tw": "China Taiwan",
|
||||
"nz": "New Zealand",
|
||||
"sa": "Saudi Arabia",
|
||||
"la": "Laos",
|
||||
"kp": "North Korea",
|
||||
"kr": "South Korea",
|
||||
"pt": "Portugal",
|
||||
"kg": "Kyrgyzstan",
|
||||
"kz": "Kazakhstan",
|
||||
"tj": "Tajikistan",
|
||||
"tm": "Turkmenistan",
|
||||
"uz": "Uzbekistan",
|
||||
"kn": "St. Kitts & Nevis",
|
||||
"pm": "Saint-Pierre and Miquelon",
|
||||
"sh": "St. Helena & Dependencies",
|
||||
"lc": "St. Lucia",
|
||||
"mu": "Mauritius",
|
||||
"ci": "C_te d'Ivoire",
|
||||
"ke": "Kenya",
|
||||
"mn": "Mongolia"
|
||||
}
|
||||
@@ -1,185 +0,0 @@
|
||||
const fonts = [
|
||||
"Arial",
|
||||
"Calibri",
|
||||
"Cambria",
|
||||
"Cambria Math",
|
||||
"Candara",
|
||||
"Comic Sans MS",
|
||||
"Comic Sans MS Bold",
|
||||
"Comic Sans",
|
||||
"Consolas",
|
||||
"Constantia",
|
||||
"Corbel",
|
||||
"Courier New",
|
||||
"Caurier Regular",
|
||||
"Ebrima",
|
||||
"Fixedsys Regular",
|
||||
"Franklin Gothic",
|
||||
"Gabriola Regular",
|
||||
"Gadugi",
|
||||
"Georgia",
|
||||
"HoloLens MDL2 Assets Regular",
|
||||
"Impact Regular",
|
||||
"Javanese Text Regular",
|
||||
"Leelawadee UI",
|
||||
"Lucida Console Regular",
|
||||
"Lucida Sans Unicode Regular",
|
||||
"Malgun Gothic",
|
||||
"Microsoft Himalaya Regular",
|
||||
"Microsoft JhengHei",
|
||||
"Microsoft JhengHei UI",
|
||||
"Microsoft PhangsPa",
|
||||
"Microsoft Sans Serif Regular",
|
||||
"Microsoft Tai Le",
|
||||
"Microsoft YaHei",
|
||||
"Microsoft YaHei UI",
|
||||
"Microsoft Yi Baiti Regular",
|
||||
"MingLiU_HKSCS-ExtB Regular",
|
||||
"MingLiu-ExtB Regular",
|
||||
"Modern Regular",
|
||||
"Mongolia Baiti Regular",
|
||||
"MS Gothic Regular",
|
||||
"MS PGothic Regular",
|
||||
"MS Sans Serif Regular",
|
||||
"MS Serif Regular",
|
||||
"MS UI Gothic Regular",
|
||||
"MV Boli Regular",
|
||||
"Myanmar Text",
|
||||
"Nimarla UI",
|
||||
"MV Boli Regular",
|
||||
"Myanmar Tet",
|
||||
"Nirmala UI",
|
||||
"NSimSun Regular",
|
||||
"Palatino Linotype",
|
||||
"PMingLiU-ExtB Regular",
|
||||
"Roman Regular",
|
||||
"Script Regular",
|
||||
"Segoe MDL2 Assets Regular",
|
||||
"Segoe Print",
|
||||
"Segoe Script",
|
||||
"Segoe UI",
|
||||
"Segoe UI Emoji Regular",
|
||||
"Segoe UI Historic Regular",
|
||||
"Segoe UI Symbol Regular",
|
||||
"SimSun Regular",
|
||||
"SimSun-ExtB Regular",
|
||||
"Sitka Banner",
|
||||
"Sitka Display",
|
||||
"Sitka Heading",
|
||||
"Sitka Small",
|
||||
"Sitka Subheading",
|
||||
"Sitka Text",
|
||||
"Small Fonts Regular",
|
||||
"Sylfaen Regular",
|
||||
"Symbol Regular",
|
||||
"System Bold",
|
||||
"Tahoma",
|
||||
"Terminal",
|
||||
"Times New Roman",
|
||||
"Trebuchet MS",
|
||||
"Verdana",
|
||||
"Webdings Regular",
|
||||
"Wingdings Regular",
|
||||
"Yu Gothic",
|
||||
"Yu Gothic UI",
|
||||
"Arial",
|
||||
"Arial Black",
|
||||
"Calibri",
|
||||
"Calibri Light",
|
||||
"Cambria",
|
||||
"Cambria Math",
|
||||
"Candara",
|
||||
"Comic Sans MS",
|
||||
"Consolas",
|
||||
"Constantia",
|
||||
"Corbel",
|
||||
"Courier",
|
||||
"Courier New",
|
||||
"Ebrima",
|
||||
"Fixedsys",
|
||||
"Franklin Gothic Medium",
|
||||
"Gabriola",
|
||||
"Gadugi",
|
||||
"Georgia",
|
||||
"HoloLens MDL2 Assets",
|
||||
"Impact",
|
||||
"Javanese Text",
|
||||
"Leelawadee UI",
|
||||
"Leelawadee UI Semilight",
|
||||
"Lucida Console",
|
||||
"Lucida Sans Unicode",
|
||||
"MS Gothic",
|
||||
"MS PGothic",
|
||||
"MS Sans Serif",
|
||||
"MS Serif",
|
||||
"MS UI Gothic",
|
||||
"MV Boli",
|
||||
"Malgun Gothic",
|
||||
"Malgun Gothic Semilight",
|
||||
"Marlett",
|
||||
"Microsoft Himalaya",
|
||||
"Microsoft JhengHei",
|
||||
"Microsoft JhengHei Light",
|
||||
"Microsoft JhengHei UI",
|
||||
"Microsoft JhengHei UI Light",
|
||||
"Microsoft New Tai Lue",
|
||||
"Microsoft PhagsPa",
|
||||
"Microsoft Sans Serif",
|
||||
"Microsoft Tai Le",
|
||||
"Microsoft YaHei",
|
||||
"Microsoft YaHei Light",
|
||||
"Microsoft YaHei UI",
|
||||
"Microsoft YaHei UI Light",
|
||||
"Microsoft Yi Baiti",
|
||||
"MingLiU-ExtB",
|
||||
"MingLiU_HKSCS-ExtB",
|
||||
"Modern",
|
||||
"Mongolian Baiti",
|
||||
"Myanmar Text",
|
||||
"NSimSun",
|
||||
"Nirmala UI",
|
||||
"Nirmala UI Semilight",
|
||||
"PMingLiU-ExtB",
|
||||
"Palatino Linotype",
|
||||
"Roman",
|
||||
"Script",
|
||||
"Segoe MDL2 Assets",
|
||||
"Segoe Print",
|
||||
"Segoe Script",
|
||||
"Segoe UI",
|
||||
"Segoe UI Black",
|
||||
"Segoe UI Emoji",
|
||||
"Segoe UI Historic",
|
||||
"Segoe UI Light",
|
||||
"Segoe UI Semibold",
|
||||
"Segoe UI Semilight",
|
||||
"Segoe UI Symbol",
|
||||
"SimSun",
|
||||
"SimSun-ExtB",
|
||||
"Sitka Banner",
|
||||
"Sitka Display",
|
||||
"Sitka Heading",
|
||||
"Sitka Small",
|
||||
"Sitka Subheading",
|
||||
"Sitka Text",
|
||||
"Small Fonts",
|
||||
"Sylfaen",
|
||||
"Symbol",
|
||||
"System",
|
||||
"Tahoma",
|
||||
"Terminal",
|
||||
"Times New Roman",
|
||||
"Trebuchet MS",
|
||||
"Verdana",
|
||||
"Webdings",
|
||||
"Wingdings",
|
||||
"Yu Gothic",
|
||||
"Yu Gothic Light",
|
||||
"Yu Gothic Medium",
|
||||
"Yu Gothic UI",
|
||||
"Yu Gothic UI Light",
|
||||
"Yu Gothic UI Semibold",
|
||||
"Yu Gothic UI Semilight"
|
||||
];
|
||||
|
||||
module.exports = fonts;
|
||||
@@ -1,17 +0,0 @@
|
||||
const axios = require('axios');
|
||||
|
||||
const profileId = 'XX';
|
||||
|
||||
const config = {
|
||||
method: 'get',
|
||||
url: `http://localhost:50325/api/v1/browser/active?user_id=${profileId}`,
|
||||
headers: { }
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,21 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const data = {
|
||||
group_name: "your_group_name"
|
||||
};
|
||||
|
||||
const config = {
|
||||
method: 'post',
|
||||
url: 'http://local.adspower.net:50325/api/v1/group/create',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
data : data
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,48 +0,0 @@
|
||||
var axios = require('axios');
|
||||
var data = {
|
||||
"name": "test",
|
||||
"group_id": "0",
|
||||
"domain_name": "facebook.com",
|
||||
"repeat_config": [
|
||||
"0"
|
||||
],
|
||||
"country": "us",
|
||||
"fingerprint_config": {
|
||||
"language": [
|
||||
"en-US"
|
||||
],
|
||||
"ua": "Mozilla/5.0 (Linux; Android 8.0.0; BND-AL10 Build/HONORBND-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/63.0.3239.83 Mobile Safari/537.36 T7/11.5 baiduboxapp/11.5.0.10 (Baidu; P1 8.0.0)",
|
||||
"flash": "block",
|
||||
"scan_port_type": "1",
|
||||
"screen_resolution": "1024_600",
|
||||
"fonts": [
|
||||
"all"
|
||||
],
|
||||
"longitude": "180",
|
||||
"latitude": "90",
|
||||
"webrtc": "proxy",
|
||||
"do_not_track": "true",
|
||||
"hardware_concurrency": "default",
|
||||
"device_memory": "default"
|
||||
},
|
||||
"user_proxy_config": {
|
||||
"proxy_soft": "no_proxy"
|
||||
}
|
||||
};
|
||||
|
||||
var config = {
|
||||
method: 'post',
|
||||
url: 'http://local.adspower.net:50325/api/v1/user/create',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
data : data
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,15 +0,0 @@
|
||||
const axios = require('axios');
|
||||
|
||||
const config = {
|
||||
method: 'post',
|
||||
url: 'http://localhost:50325/api/v1/user/delete-cache',
|
||||
headers: { }
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,23 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const data = {
|
||||
"user_ids": [
|
||||
"XX"
|
||||
]
|
||||
};
|
||||
|
||||
const config = {
|
||||
method: 'post',
|
||||
url: 'http://localhost:50325/api/v1/user/delete',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
data : data
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,15 +0,0 @@
|
||||
const axios = require('axios');
|
||||
|
||||
const config = {
|
||||
method: 'get',
|
||||
url: 'http://local.adspower.net:50325/api/v1/group/list?page=1&page_size=15',
|
||||
headers: { }
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,15 +0,0 @@
|
||||
const axios = require('axios');
|
||||
|
||||
const config = {
|
||||
method: 'get',
|
||||
url: 'http://local.adspower.net:50325/api/v1/user/list?page=1&page_size=100',
|
||||
headers: { }
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,24 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const data = {
|
||||
"user_ids": [
|
||||
"XX"
|
||||
],
|
||||
"group_id": "0"
|
||||
};
|
||||
|
||||
const config = {
|
||||
method: 'post',
|
||||
url: 'http://local.adspower.net:50325/api/v1/user/regroup',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
data : data
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,27 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const puppeteer = require('puppeteer-core');
|
||||
|
||||
const profileId = 'XXX';
|
||||
|
||||
// http://local.adspower.net:50325: Script can go to Profile Management-> click Settings-> click Cache folder-> local_api file to obtain API address
|
||||
axios.get(`http://local.adspower.net:50325/api/v1/browser/start?user_id=${profileId}`).then(async (res) => {
|
||||
console.log(res.data);
|
||||
|
||||
if(res.data.code === 0 && res.data.data.ws && res.data.data.ws.puppeteer) {
|
||||
try{
|
||||
const browser = await puppeteer.connect({
|
||||
browserWSEndpoint: res.data.data.ws.puppeteer,
|
||||
defaultViewport:null
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.goto('https://www.adspower.com');
|
||||
await page.screenshot({ path: './adspower.png' });
|
||||
await browser.close();
|
||||
} catch(err){
|
||||
console.log(err.message);
|
||||
}
|
||||
}
|
||||
}).catch((err) => {
|
||||
console.log(err)
|
||||
})
|
||||
@@ -1,16 +0,0 @@
|
||||
const axios = require('axios');
|
||||
|
||||
const profileId = 'XX';
|
||||
const config = {
|
||||
method: 'get',
|
||||
url: `http://local.adspower.net:50325/api/v1/browser/stop?user_id=${profileId}`,
|
||||
headers: { }
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then((response) => {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,41 +0,0 @@
|
||||
const axios = require('axios');
|
||||
const data = {
|
||||
"user_id": "XX",
|
||||
"name": "test",
|
||||
"domain_name": "facebook.com",
|
||||
"repeat_config": [
|
||||
"0"
|
||||
],
|
||||
"open_urls": [
|
||||
"http://www.baidu.com",
|
||||
"https://www.google.com"
|
||||
],
|
||||
"country": "us",
|
||||
"remark": "remark",
|
||||
"fingerprint_config": {
|
||||
"webrtc": "proxy",
|
||||
"do_not_track": "true",
|
||||
"hardware_concurrency": "default",
|
||||
"device_memory": "default"
|
||||
},
|
||||
"user_proxy_config": {
|
||||
"proxy_soft": "no_proxy"
|
||||
}
|
||||
};
|
||||
|
||||
const config = {
|
||||
method: 'post',
|
||||
url: 'http://local.adspower.net:50325/api/v1/user/update',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
data : data
|
||||
};
|
||||
|
||||
axios(config)
|
||||
.then(function (response) {
|
||||
console.log(JSON.stringify(response.data));
|
||||
})
|
||||
.catch(function (error) {
|
||||
console.log(error);
|
||||
});
|
||||
@@ -1,8 +0,0 @@
|
||||
import requests
|
||||
|
||||
profileId = 'XX'
|
||||
url = "http://localhost:50325/api/v1/browser/active?user_id=" + profileId
|
||||
|
||||
response = requests.request("GET", url, headers={}, data={})
|
||||
|
||||
print(response.text)
|
||||
@@ -1,15 +0,0 @@
|
||||
import json
|
||||
import requests
|
||||
|
||||
url = "http://local.adspower.net:50325/api/v1/group/create"
|
||||
|
||||
payload = {
|
||||
"group_name": "your_group_name"
|
||||
}
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, json=payload)
|
||||
|
||||
print(response.text)
|
||||
@@ -1,41 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = "http://local.adspower.net:50325/api/v1/user/create"
|
||||
|
||||
payload = {
|
||||
"name": "test",
|
||||
"group_id": "0",
|
||||
"domain_name": "facebook.com",
|
||||
"repeat_config": [
|
||||
"0"
|
||||
],
|
||||
"country": "us",
|
||||
"fingerprint_config": {
|
||||
"language": [
|
||||
"en-US"
|
||||
],
|
||||
"ua": "Mozilla/5.0 (Linux; Android 8.0.0; BND-AL10 Build/HONORBND-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/63.0.3239.83 Mobile Safari/537.36 T7/11.5 baiduboxapp/11.5.0.10 (Baidu; P1 8.0.0)",
|
||||
"flash": "block",
|
||||
"scan_port_type": "1",
|
||||
"screen_resolution": "1024_600",
|
||||
"fonts": [
|
||||
"all"
|
||||
],
|
||||
"longitude": "180",
|
||||
"latitude": "90",
|
||||
"webrtc": "proxy",
|
||||
"do_not_track": "true",
|
||||
"hardware_concurrency": "default",
|
||||
"device_memory": "default"
|
||||
},
|
||||
"user_proxy_config": {
|
||||
"proxy_soft": "no_proxy"
|
||||
}
|
||||
}
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, json=payload)
|
||||
|
||||
print(response.text)
|
||||
@@ -1,10 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = "http://localhost:50325/api/v1/user/delete-cache"
|
||||
|
||||
payload={}
|
||||
headers = {}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, json=payload)
|
||||
|
||||
print(response.text)
|
||||
@@ -1,16 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = "http://localhost:50325/api/v1/user/delete"
|
||||
|
||||
payload = {
|
||||
"user_ids": [
|
||||
"XX"
|
||||
]
|
||||
}
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, json=payload)
|
||||
|
||||
print(response.text)
|
||||
@@ -1,10 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = "http://local.adspower.net:50325/api/v1/group/list?page=1&page_size=15"
|
||||
|
||||
payload={}
|
||||
headers = {}
|
||||
|
||||
response = requests.request("GET", url, headers=headers, data=payload)
|
||||
|
||||
print(response.text)
|
||||
@@ -1,10 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = "http://local.adspower.net:50325/api/v1/user/list?page=1&page_size=100"
|
||||
|
||||
payload={}
|
||||
headers = {}
|
||||
|
||||
response = requests.request("GET", url, headers=headers, data=payload)
|
||||
|
||||
print(response.text)
|
||||
@@ -1,17 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = "http://local.adspower.net:50325/api/v1/user/regroup"
|
||||
|
||||
payload = {
|
||||
"user_ids": [
|
||||
"XX"
|
||||
],
|
||||
"group_id": "0"
|
||||
}
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, json=payload)
|
||||
|
||||
print(response.text)
|
||||
@@ -1,27 +0,0 @@
|
||||
# The sample passed the test in selenium version 3.141.0
|
||||
|
||||
import requests,time
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import sys
|
||||
|
||||
ads_id = "XXX"
|
||||
# http://local.adspower.net:50325 Script can go to Profile Management-> click Settings-> click Cache folder-> local_api file to obtain API address
|
||||
open_url = "http://local.adspower.net:50325/api/v1/browser/start?user_id=" + ads_id
|
||||
close_url = "http://local.adspower.net:50325/api/v1/browser/stop?user_id=" + ads_id
|
||||
|
||||
resp = requests.get(open_url).json()
|
||||
if resp["code"] != 0:
|
||||
print(resp["msg"])
|
||||
print("please check ads_id")
|
||||
sys.exit()
|
||||
|
||||
chrome_driver = resp["data"]["webdriver"]
|
||||
chrome_options = Options()
|
||||
chrome_options.add_experimental_option("debuggerAddress", resp["data"]["ws"]["selenium"])
|
||||
driver = webdriver.Chrome(chrome_driver, options=chrome_options)
|
||||
print(driver.title)
|
||||
driver.get("https://www.adspower.com")
|
||||
time.sleep(5)
|
||||
driver.quit()
|
||||
requests.get(close_url)
|
||||
@@ -1,8 +0,0 @@
|
||||
import requests
|
||||
|
||||
profildId = 'XX'
|
||||
url = "http://local.adspower.net:50325/api/v1/browser/stop?user_id=" + profildId
|
||||
|
||||
response = requests.request("GET", url, headers={}, data={})
|
||||
|
||||
print(response.text)
|
||||
@@ -1,34 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = "http://local.adspower.net:50325/api/v1/user/update"
|
||||
|
||||
payload = {
|
||||
"user_id": "XX",
|
||||
"name": "test",
|
||||
"domain_name": "facebook.com",
|
||||
"repeat_config": [
|
||||
"0"
|
||||
],
|
||||
"open_urls": [
|
||||
"http://www.baidu.com",
|
||||
"https://www.google.com"
|
||||
],
|
||||
"country": "us",
|
||||
"remark": "remark",
|
||||
"fingerprint_config": {
|
||||
"webrtc": "proxy",
|
||||
"do_not_track": "true",
|
||||
"hardware_concurrency": "default",
|
||||
"device_memory": "default"
|
||||
},
|
||||
"user_proxy_config": {
|
||||
"proxy_soft": "no_proxy"
|
||||
}
|
||||
}
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, json=payload)
|
||||
|
||||
print(response.text)
|
||||
254
log_manager.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""
|
||||
日志管理模块
|
||||
提供日志读取、过滤、搜索和导出功能
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from config import Config
|
||||
|
||||
|
||||
class LogManager:
|
||||
"""日志管理器"""
|
||||
|
||||
LOG_PATTERN = re.compile(
|
||||
r'(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\s*\|\s*(\w+)\s*\|\s*(.*)',
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
LEVEL_PRIORITY = {
|
||||
'DEBUG': 0,
|
||||
'INFO': 1,
|
||||
'WARNING': 2,
|
||||
'ERROR': 3,
|
||||
'CRITICAL': 4
|
||||
}
|
||||
|
||||
def __init__(self, log_dir: str = None):
|
||||
self.log_dir = Path(log_dir or Config.LOG_DIR)
|
||||
if not self.log_dir.exists():
|
||||
self.log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def get_log_files(self) -> List[Dict]:
|
||||
"""获取所有日志文件列表"""
|
||||
log_files = []
|
||||
for file_path in self.log_dir.glob('*.log'):
|
||||
stat = file_path.stat()
|
||||
log_files.append({
|
||||
'name': file_path.name,
|
||||
'path': str(file_path),
|
||||
'size': stat.st_size,
|
||||
'size_human': self._format_size(stat.st_size),
|
||||
'modified_time': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S')
|
||||
})
|
||||
return sorted(log_files, key=lambda x: x['modified_time'], reverse=True)
|
||||
|
||||
def _format_size(self, size: int) -> str:
|
||||
"""格式化文件大小"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
if size < 1024:
|
||||
return f"{size:.1f} {unit}"
|
||||
size /= 1024
|
||||
return f"{size:.1f} TB"
|
||||
|
||||
def _parse_log_line(self, line: str) -> Optional[Dict]:
|
||||
"""解析单行日志"""
|
||||
match = self.LOG_PATTERN.match(line.strip())
|
||||
if match:
|
||||
return {
|
||||
'time': match.group(1),
|
||||
'level': match.group(2).upper(),
|
||||
'message': match.group(3).strip()
|
||||
}
|
||||
return None
|
||||
|
||||
def _read_log_file_reverse(self, file_path: Path, limit: int = 500) -> List[str]:
|
||||
"""从文件末尾反向读取日志行"""
|
||||
lines = []
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
# 读取所有行
|
||||
all_lines = f.readlines()
|
||||
# 取最后limit行
|
||||
lines = all_lines[-limit:] if len(all_lines) > limit else all_lines
|
||||
except Exception:
|
||||
pass
|
||||
return lines
|
||||
|
||||
def get_latest_logs(self, limit: int = 100, level: str = 'ALL') -> List[Dict]:
|
||||
"""获取最新的日志条目(用于实时流)"""
|
||||
logs = []
|
||||
log_files = self.get_log_files()
|
||||
|
||||
if not log_files:
|
||||
return logs
|
||||
|
||||
# 读取最新的日志文件
|
||||
latest_file = Path(log_files[0]['path'])
|
||||
lines = self._read_log_file_reverse(latest_file, limit * 2)
|
||||
|
||||
current_log = None
|
||||
for line in lines:
|
||||
parsed = self._parse_log_line(line)
|
||||
if parsed:
|
||||
if current_log:
|
||||
logs.append(current_log)
|
||||
current_log = parsed
|
||||
elif current_log and line.strip():
|
||||
# 多行日志,追加到消息中
|
||||
current_log['message'] += '\n' + line.strip()
|
||||
|
||||
if current_log:
|
||||
logs.append(current_log)
|
||||
|
||||
# 按级别过滤
|
||||
if level and level.upper() != 'ALL':
|
||||
logs = [log for log in logs if log['level'] == level.upper()]
|
||||
|
||||
# 返回最新的limit条
|
||||
return logs[-limit:]
|
||||
|
||||
def search_logs(
|
||||
self,
|
||||
keyword: str = None,
|
||||
level: str = 'ALL',
|
||||
start_time: str = None,
|
||||
end_time: str = None,
|
||||
page: int = 1,
|
||||
page_size: int = 100
|
||||
) -> Tuple[List[Dict], int]:
|
||||
"""
|
||||
搜索日志
|
||||
返回: (日志列表, 总数)
|
||||
"""
|
||||
all_logs = []
|
||||
log_files = self.get_log_files()
|
||||
|
||||
# 时间范围过滤
|
||||
start_dt = None
|
||||
end_dt = None
|
||||
if start_time:
|
||||
try:
|
||||
start_dt = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
try:
|
||||
start_dt = datetime.strptime(start_time, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
pass
|
||||
if end_time:
|
||||
try:
|
||||
end_dt = datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
try:
|
||||
end_dt = datetime.strptime(end_time, '%Y-%m-%d')
|
||||
end_dt = end_dt.replace(hour=23, minute=59, second=59)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 读取日志文件
|
||||
for log_file in log_files[:5]: # 最多读取最近5个日志文件
|
||||
file_path = Path(log_file['path'])
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
current_log = None
|
||||
for line in f:
|
||||
parsed = self._parse_log_line(line)
|
||||
if parsed:
|
||||
if current_log:
|
||||
all_logs.append(current_log)
|
||||
current_log = parsed
|
||||
elif current_log and line.strip():
|
||||
current_log['message'] += '\n' + line.strip()
|
||||
if current_log:
|
||||
all_logs.append(current_log)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# 过滤
|
||||
filtered_logs = []
|
||||
for log in all_logs:
|
||||
# 级别过滤
|
||||
if level and level.upper() != 'ALL' and log['level'] != level.upper():
|
||||
continue
|
||||
|
||||
# 关键词过滤
|
||||
if keyword and keyword.lower() not in log['message'].lower():
|
||||
continue
|
||||
|
||||
# 时间范围过滤
|
||||
try:
|
||||
log_dt = datetime.strptime(log['time'], '%Y-%m-%d %H:%M:%S')
|
||||
if start_dt and log_dt < start_dt:
|
||||
continue
|
||||
if end_dt and log_dt > end_dt:
|
||||
continue
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
filtered_logs.append(log)
|
||||
|
||||
# 按时间倒序排序
|
||||
filtered_logs.sort(key=lambda x: x['time'], reverse=True)
|
||||
|
||||
# 分页
|
||||
total = len(filtered_logs)
|
||||
start_idx = (page - 1) * page_size
|
||||
end_idx = start_idx + page_size
|
||||
paginated_logs = filtered_logs[start_idx:end_idx]
|
||||
|
||||
return paginated_logs, total
|
||||
|
||||
def get_log_content(self, filename: str) -> str:
|
||||
"""获取指定日志文件的内容"""
|
||||
file_path = self.log_dir / filename
|
||||
if not file_path.exists():
|
||||
return ""
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def get_log_stats(self) -> Dict:
|
||||
"""获取日志统计信息"""
|
||||
stats = {
|
||||
'total_files': 0,
|
||||
'total_size': 0,
|
||||
'total_size_human': '0 B',
|
||||
'level_counts': {
|
||||
'DEBUG': 0,
|
||||
'INFO': 0,
|
||||
'WARNING': 0,
|
||||
'ERROR': 0,
|
||||
'CRITICAL': 0
|
||||
},
|
||||
'latest_error': None,
|
||||
'latest_warning': None
|
||||
}
|
||||
|
||||
log_files = self.get_log_files()
|
||||
stats['total_files'] = len(log_files)
|
||||
stats['total_size'] = sum(f['size'] for f in log_files)
|
||||
stats['total_size_human'] = self._format_size(stats['total_size'])
|
||||
|
||||
# 统计最近日志文件的级别分布
|
||||
if log_files:
|
||||
latest_file = Path(log_files[0]['path'])
|
||||
lines = self._read_log_file_reverse(latest_file, 1000)
|
||||
|
||||
for line in lines:
|
||||
parsed = self._parse_log_line(line)
|
||||
if parsed:
|
||||
level = parsed['level']
|
||||
if level in stats['level_counts']:
|
||||
stats['level_counts'][level] += 1
|
||||
|
||||
if level == 'ERROR' and not stats['latest_error']:
|
||||
stats['latest_error'] = parsed
|
||||
elif level == 'WARNING' and not stats['latest_warning']:
|
||||
stats['latest_warning'] = parsed
|
||||
|
||||
return stats
|
||||
277
main.py
@@ -28,6 +28,7 @@ import signal
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
@@ -108,6 +109,10 @@ MIP广告点击服务
|
||||
self.total_clicks_today = 0
|
||||
self.error_count = 0
|
||||
|
||||
# 线程安全锁(用于并发执行)
|
||||
self._click_records_lock = threading.Lock()
|
||||
self._stats_lock = threading.Lock()
|
||||
|
||||
# 健康检查API
|
||||
self.health_app = Flask(__name__)
|
||||
self.health_app.logger.disabled = True # 禁用Flask日志
|
||||
@@ -116,10 +121,10 @@ MIP广告点击服务
|
||||
logger.info(f"调度器初始化完成")
|
||||
logger.info(f"工作时间: {self.work_start_hour:02d}:00 - {self.work_end_hour:02d}:00")
|
||||
logger.info(f"点击间隔: {self.click_interval_minutes} 分钟")
|
||||
logger.info(f"并发数: {max_workers}")
|
||||
logger.info(f"并发模式: {'启用' if max_workers > 1 else '禁用'} (最大并发数: {max_workers})")
|
||||
|
||||
def _setup_health_api(self):
|
||||
"""配置健康检查API"""
|
||||
"""配置健康检查API和调度器控制API"""
|
||||
@self.health_app.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""健康检查端点"""
|
||||
@@ -141,6 +146,44 @@ MIP广告点击服务
|
||||
'is_working_time': self.is_working_time()
|
||||
})
|
||||
|
||||
@self.health_app.route('/scheduler/status', methods=['GET'])
|
||||
def scheduler_status():
|
||||
"""获取调度器状态(供远程Web服务调用)"""
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': {
|
||||
'status': 'running' if self.running else 'stopped',
|
||||
'is_working_time': self.is_working_time(),
|
||||
'total_sites': len(self.click_records),
|
||||
'completed_sites': sum(1 for r in self.click_records.values() if r['today_count'] >= r['target_count']),
|
||||
'total_clicks_today': sum(r['today_count'] for r in self.click_records.values()),
|
||||
'error_count': self.error_count
|
||||
}
|
||||
})
|
||||
|
||||
@self.health_app.route('/scheduler/start', methods=['POST'])
|
||||
def scheduler_start():
|
||||
"""启动调度器(供远程Web服务调用)"""
|
||||
if self.running:
|
||||
return jsonify({'success': True, 'message': '调度器已在运行中'})
|
||||
|
||||
# 重新初始化并启动
|
||||
self.running = True
|
||||
self.start_time = datetime.now()
|
||||
self.reset_daily_records()
|
||||
logger.info("调度器已通过远程API启动")
|
||||
return jsonify({'success': True, 'message': '调度器已启动'})
|
||||
|
||||
@self.health_app.route('/scheduler/stop', methods=['POST'])
|
||||
def scheduler_stop():
|
||||
"""停止调度器(供远程Web服务调用)"""
|
||||
if not self.running:
|
||||
return jsonify({'success': True, 'message': '调度器未运行'})
|
||||
|
||||
self.running = False
|
||||
logger.info("调度器已通过远程API停止")
|
||||
return jsonify({'success': True, 'message': '调度器已停止'})
|
||||
|
||||
def _acquire_lock(self) -> bool:
|
||||
"""
|
||||
获取进程锁,防止重复启动
|
||||
@@ -222,7 +265,8 @@ MIP广告点击服务
|
||||
'last_click': None,
|
||||
'today_count': 0,
|
||||
'target_count': target_count,
|
||||
'site_url': site.get('site_url')
|
||||
'site_url': site.get('site_url'),
|
||||
'click_count': site.get('click_count', 0) # 历史总点击次数
|
||||
}
|
||||
logger.info(f"站点 {site_id}: {site.get('site_url')} - 今日目标 {target_count} 次")
|
||||
|
||||
@@ -233,12 +277,40 @@ MIP广告点击服务
|
||||
获取待点击的站点列表
|
||||
|
||||
Returns:
|
||||
待点击的站点列表
|
||||
待点击的站点列表(优先返回今日未点击的站点)
|
||||
"""
|
||||
if not self.click_records:
|
||||
logger.warning("点击记录为空,执行重置")
|
||||
self.reset_daily_records()
|
||||
|
||||
# 动态检测新导入的站点
|
||||
current_sites = self.dm.get_active_urls()
|
||||
current_site_ids = {site.get('id') for site in current_sites}
|
||||
existing_site_ids = set(self.click_records.keys())
|
||||
|
||||
# 发现新站点,自动添加到点击记录
|
||||
new_site_ids = current_site_ids - existing_site_ids
|
||||
if new_site_ids:
|
||||
logger.info(f"发现 {len(new_site_ids)} 个新导入的站点,加入点击队列")
|
||||
for site in current_sites:
|
||||
site_id = site.get('id')
|
||||
if site_id in new_site_ids:
|
||||
target_count = random.randint(Config.MIN_CLICK_COUNT, Config.MAX_CLICK_COUNT)
|
||||
self.click_records[site_id] = {
|
||||
'last_click': None,
|
||||
'today_count': 0,
|
||||
'target_count': target_count,
|
||||
'site_url': site.get('site_url'),
|
||||
'click_count': site.get('click_count', 0) # 历史总点击次数
|
||||
}
|
||||
logger.info(f"新站点 {site_id}: {site.get('site_url')} - 今日目标 {target_count} 次")
|
||||
|
||||
# 移除已删除的站点
|
||||
removed_site_ids = existing_site_ids - current_site_ids
|
||||
for site_id in removed_site_ids:
|
||||
del self.click_records[site_id]
|
||||
logger.info(f"站点 {site_id} 已从数据库删除,移除出点击队列")
|
||||
|
||||
now = datetime.now()
|
||||
pending_sites = []
|
||||
|
||||
@@ -257,9 +329,21 @@ MIP广告点击服务
|
||||
'id': site_id,
|
||||
'site_url': record['site_url'],
|
||||
'today_count': record['today_count'],
|
||||
'target_count': record['target_count']
|
||||
'target_count': record['target_count'],
|
||||
'click_count': record.get('click_count', 0), # 历史总点击次数
|
||||
'last_click': record['last_click']
|
||||
})
|
||||
|
||||
# 排序优先级:
|
||||
# 1. 历史从未点击的(click_count=0)最优先
|
||||
# 2. 今日未点击的(today_count=0)次优先
|
||||
# 3. 按上次点击时间升序(最久未点击的优先)
|
||||
pending_sites.sort(key=lambda x: (
|
||||
x['click_count'] > 0, # False(历史0次) 排在最前面
|
||||
x['today_count'] > 0, # False(今日0次) 排在前面
|
||||
x['last_click'] or datetime.min # 从未点击的排在前面
|
||||
))
|
||||
|
||||
return pending_sites
|
||||
|
||||
def execute_click_task(self, site: Dict):
|
||||
@@ -341,13 +425,28 @@ MIP广告点击服务
|
||||
|
||||
logger.info(f"找到 {len(pending_sites)} 个待点击站点")
|
||||
|
||||
# 随机打乱顺序(模拟真实行为)
|
||||
random.shuffle(pending_sites)
|
||||
# 分组:历史从未点击的 vs 历史点击过的
|
||||
never_clicked = [s for s in pending_sites if s.get('click_count', 0) == 0]
|
||||
has_clicked = [s for s in pending_sites if s.get('click_count', 0) > 0]
|
||||
|
||||
# 各组内随机打乱,但保持"历史从未点击优先"的整体顺序
|
||||
random.shuffle(never_clicked)
|
||||
random.shuffle(has_clicked)
|
||||
pending_sites = never_clicked + has_clicked
|
||||
|
||||
if never_clicked:
|
||||
logger.info(f"其中 {len(never_clicked)} 个站点历史从未点击(最优先处理)")
|
||||
|
||||
# 根据并发数执行
|
||||
if self.max_workers == 1:
|
||||
# 串行执行
|
||||
for site in pending_sites:
|
||||
# 每次执行前检查工作时间
|
||||
if not self.is_working_time():
|
||||
current_time = datetime.now().strftime('%H:%M')
|
||||
logger.info(f"当前时间 {current_time} 已超出工作时间,停止执行剩余任务")
|
||||
break
|
||||
|
||||
self.execute_click_task(site)
|
||||
|
||||
# 任务间随机间隔(使用配置文件中的范围)
|
||||
@@ -356,11 +455,8 @@ MIP广告点击服务
|
||||
logger.info(f"等待 {wait_minutes} 分钟后执行下一个任务...")
|
||||
time.sleep(wait_minutes * 60)
|
||||
else:
|
||||
# 并发执行(暂不支持,避免资源冲突)
|
||||
logger.warning("当前版本仅支持串行执行")
|
||||
for site in pending_sites:
|
||||
self.execute_click_task(site)
|
||||
time.sleep(random.randint(Config.MIN_TASK_INTERVAL_MINUTES, Config.MAX_TASK_INTERVAL_MINUTES) * 60)
|
||||
# 并发执行
|
||||
self._run_concurrent_cycle(pending_sites)
|
||||
|
||||
# 显示今日进度
|
||||
completed = sum(1 for r in self.click_records.values() if r['today_count'] >= r['target_count'])
|
||||
@@ -373,6 +469,150 @@ MIP广告点击服务
|
||||
logger.info(f"点击次数: {total_clicks}/{target_clicks} 次")
|
||||
logger.info("-" * 60)
|
||||
|
||||
def _run_concurrent_cycle(self, pending_sites: List[Dict]):
|
||||
"""
|
||||
并发执行点击任务
|
||||
|
||||
Args:
|
||||
pending_sites: 待点击的站点列表
|
||||
"""
|
||||
# 按 max_workers 分批
|
||||
batch_size = self.max_workers
|
||||
batches = [pending_sites[i:i+batch_size] for i in range(0, len(pending_sites), batch_size)]
|
||||
|
||||
logger.info(f"并发模式: 共 {len(pending_sites)} 个任务,分 {len(batches)} 批执行(每批最多 {batch_size} 个)")
|
||||
|
||||
for batch_idx, batch in enumerate(batches):
|
||||
# 检查工作时间
|
||||
if not self.is_working_time():
|
||||
current_time = datetime.now().strftime('%H:%M')
|
||||
logger.info(f"当前时间 {current_time} 已超出工作时间,停止执行剩余批次")
|
||||
break
|
||||
|
||||
logger.info(f"=" * 40)
|
||||
logger.info(f"开始批次 {batch_idx+1}/{len(batches)}: {len(batch)} 个任务并发执行")
|
||||
for i, site in enumerate(batch, 1):
|
||||
logger.info(f" - [Worker {i}] Site {site['id']}: {site['site_url'][:50]}...")
|
||||
|
||||
batch_start_time = time.time()
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
|
||||
# 使用 ThreadPoolExecutor 并发执行
|
||||
with ThreadPoolExecutor(max_workers=len(batch)) as executor:
|
||||
futures = {
|
||||
executor.submit(self._execute_click_task_wrapper, site, worker_id): site
|
||||
for worker_id, site in enumerate(batch, 1)
|
||||
}
|
||||
|
||||
# 等待所有任务完成
|
||||
for future in as_completed(futures):
|
||||
site = futures[future]
|
||||
try:
|
||||
result = future.result()
|
||||
if result.get('success'):
|
||||
success_count += 1
|
||||
else:
|
||||
fail_count += 1
|
||||
except Exception as e:
|
||||
fail_count += 1
|
||||
logger.error(f"任务异常: Site {site['id']} - {e}")
|
||||
|
||||
batch_duration = (time.time() - batch_start_time) / 60
|
||||
logger.info(f"批次 {batch_idx+1} 完成: 成功 {success_count}, 失败 {fail_count}, 耗时 {batch_duration:.1f} 分钟")
|
||||
|
||||
# 批次间随机间隔(不是最后一批)
|
||||
if batch_idx < len(batches) - 1:
|
||||
# 再次检查工作时间
|
||||
if not self.is_working_time():
|
||||
current_time = datetime.now().strftime('%H:%M')
|
||||
logger.info(f"当前时间 {current_time} 已超出工作时间,停止执行")
|
||||
break
|
||||
|
||||
wait_minutes = random.randint(Config.MIN_TASK_INTERVAL_MINUTES, Config.MAX_TASK_INTERVAL_MINUTES)
|
||||
logger.info(f"等待 {wait_minutes} 分钟后执行下一批次...")
|
||||
time.sleep(wait_minutes * 60)
|
||||
|
||||
def _execute_click_task_wrapper(self, site: Dict, worker_id: int) -> Dict:
|
||||
"""
|
||||
线程安全的任务执行包装器
|
||||
|
||||
Args:
|
||||
site: 站点信息
|
||||
worker_id: Worker编号(用于日志标识)
|
||||
|
||||
Returns:
|
||||
执行结果字典
|
||||
"""
|
||||
site_id = site['id']
|
||||
site_url = site['site_url']
|
||||
|
||||
# 错峰启动:每个 worker 间隔 5-10 秒,避免同时调用 AdsPower API 触发限频
|
||||
if worker_id > 1:
|
||||
stagger_delay = (worker_id - 1) * random.randint(5, 10)
|
||||
logger.info(f"[Worker {worker_id}] [Site {site_id}] 错峰等待 {stagger_delay} 秒后启动...")
|
||||
time.sleep(stagger_delay)
|
||||
|
||||
logger.info(f"[Worker {worker_id}] [Site {site_id}] 开始点击: {site_url[:50]}...")
|
||||
|
||||
executor = None
|
||||
try:
|
||||
# 创建独立的 TaskExecutor 实例
|
||||
executor = TaskExecutor(max_workers=1, use_proxy=self.use_proxy)
|
||||
|
||||
# 创建浏览器环境(每个 worker 独立的 Profile 和代理)
|
||||
profile_info = executor.create_browser_profile(worker_id)
|
||||
if not profile_info:
|
||||
logger.error(f"[Worker {worker_id}] [Site {site_id}] 创建浏览器环境失败")
|
||||
with self._stats_lock:
|
||||
self.error_count += 1
|
||||
return {'success': False, 'error': '创建浏览器环境失败'}
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# 获取完整站点信息
|
||||
all_sites = self.dm.get_active_urls()
|
||||
target_site = next((s for s in all_sites if s.get('id') == site_id), None)
|
||||
|
||||
if not target_site:
|
||||
logger.error(f"[Worker {worker_id}] [Site {site_id}] 未找到站点信息")
|
||||
return {'success': False, 'error': '未找到站点信息'}
|
||||
|
||||
# 执行任务
|
||||
result = executor.execute_single_task(target_site, worker_id, profile_info['profile_id'])
|
||||
|
||||
if result['success']:
|
||||
# 线程安全更新点击记录
|
||||
with self._click_records_lock:
|
||||
if site_id in self.click_records:
|
||||
self.click_records[site_id]['last_click'] = datetime.now()
|
||||
self.click_records[site_id]['today_count'] += 1
|
||||
with self._stats_lock:
|
||||
self.total_clicks_today += 1
|
||||
|
||||
logger.info(f"[Worker {worker_id}] [Site {site_id}] ✅ 点击完成")
|
||||
else:
|
||||
with self._stats_lock:
|
||||
self.error_count += 1
|
||||
logger.warning(f"[Worker {worker_id}] [Site {site_id}] ⚠️ 点击失败: {result.get('error', '未知错误')}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
with self._stats_lock:
|
||||
self.error_count += 1
|
||||
logger.error(f"[Worker {worker_id}] [Site {site_id}] ❌ 异常: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return {'success': False, 'error': str(e)}
|
||||
finally:
|
||||
# 确保资源清理
|
||||
if executor:
|
||||
try:
|
||||
executor.close_browser()
|
||||
except Exception as e:
|
||||
logger.warning(f"[Worker {worker_id}] 清理资源失败: {e}")
|
||||
|
||||
def run_crawler_cycle(self):
|
||||
"""执行一次爬虫循环"""
|
||||
if not self.crawler:
|
||||
@@ -397,6 +637,15 @@ MIP广告点击服务
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def run_query_import_scan(self):
|
||||
"""扫描Query上传目录,导入待处理文件"""
|
||||
try:
|
||||
from query_keyword_importer import QueryKeywordImporter
|
||||
importer = QueryKeywordImporter()
|
||||
importer.scan_and_import()
|
||||
except Exception as e:
|
||||
logger.error(f"Query导入扫描异常: {e}")
|
||||
|
||||
def start(self):
|
||||
"""启动调度器"""
|
||||
# 获取进程锁
|
||||
@@ -457,6 +706,10 @@ MIP广告点击服务
|
||||
else:
|
||||
logger.info(" - 网址爬取未启用")
|
||||
|
||||
# 4. Query挖掘目录扫描(每15分钟)
|
||||
schedule.every(15).minutes.do(self.run_query_import_scan)
|
||||
logger.info(" - 每 15 分钟扫描Query上传目录")
|
||||
|
||||
logger.info("")
|
||||
|
||||
# 立即执行一次(如果在工作时间内)
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
echo ====================================
|
||||
echo MIP广告自动化 - 数据库管理页面
|
||||
echo ====================================
|
||||
echo.
|
||||
|
||||
echo 正在启动浏览器打开管理页面...
|
||||
echo.
|
||||
|
||||
REM 检测操作系统并打开浏览器
|
||||
start http://localhost:5000/static/database.html
|
||||
|
||||
echo.
|
||||
echo ✅ 已在浏览器中打开数据库管理页面
|
||||
echo.
|
||||
echo 📌 如果浏览器未自动打开,请手动访问:
|
||||
echo http://localhost:5000/static/database.html
|
||||
echo.
|
||||
echo 💡 提示: 请确保后端服务已启动 (python app.py)
|
||||
echo.
|
||||
pause
|
||||
@@ -1,166 +0,0 @@
|
||||
{
|
||||
"info": {
|
||||
"name": "MIP广告点击服务 API",
|
||||
"description": "MIP页面广告自动化点击服务的API接口集合",
|
||||
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
|
||||
},
|
||||
"item": [
|
||||
{
|
||||
"name": "健康检查",
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/health",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["health"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "添加单个URL",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [
|
||||
{
|
||||
"key": "Content-Type",
|
||||
"value": "application/json"
|
||||
}
|
||||
],
|
||||
"body": {
|
||||
"mode": "raw",
|
||||
"raw": "{\n \"url\": \"https://example.com/mip-page\"\n}"
|
||||
},
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/urls",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "urls"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "批量添加URL",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [
|
||||
{
|
||||
"key": "Content-Type",
|
||||
"value": "application/json"
|
||||
}
|
||||
],
|
||||
"body": {
|
||||
"mode": "raw",
|
||||
"raw": "{\n \"urls\": [\n \"https://example.com/mip-page-1\",\n \"https://example.com/mip-page-2\",\n \"https://example.com/mip-page-3\"\n ]\n}"
|
||||
},
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/urls",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "urls"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "获取所有URL",
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/urls",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "urls"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "获取URL详情",
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/urls/https://example.com/mip-page",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "urls", "https://example.com/mip-page"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "删除URL",
|
||||
"request": {
|
||||
"method": "DELETE",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/urls/https://example.com/mip-page",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "urls", "https://example.com/mip-page"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "重置URL",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/urls/https://example.com/mip-page/reset",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "urls", "https://example.com/mip-page", "reset"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "获取统计数据",
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/statistics",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "statistics"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "启动调度器",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/scheduler/start",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "scheduler", "start"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "停止调度器",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/scheduler/stop",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "scheduler", "stop"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "查询调度器状态",
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"header": [],
|
||||
"url": {
|
||||
"raw": "{{base_url}}/api/scheduler/status",
|
||||
"host": ["{{base_url}}"],
|
||||
"path": ["api", "scheduler", "status"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"variable": [
|
||||
{
|
||||
"key": "base_url",
|
||||
"value": "http://localhost:5000",
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
}
|
||||
204
query_keyword_importer.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""
|
||||
Query关键词导入模块
|
||||
从Excel文件读取关键词,批量导入到baidu_keyword表
|
||||
"""
|
||||
import os
|
||||
import glob
|
||||
import time
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from config import Config
|
||||
from db_manager import QueryKeywordManager, QueryImportLogManager
|
||||
|
||||
|
||||
class QueryKeywordImporter:
|
||||
"""Query关键词导入器"""
|
||||
|
||||
# 支持的query列名
|
||||
QUERY_COLUMNS = ['query', 'Query', '查询词', 'keyword', '关键词']
|
||||
# 支持的科室列名
|
||||
DEPT_COLUMNS = ['科室', 'department', 'Department', '部门']
|
||||
# 批量插入大小
|
||||
BATCH_SIZE = 500
|
||||
|
||||
def __init__(self):
|
||||
self.keyword_mgr = QueryKeywordManager()
|
||||
self.log_mgr = QueryImportLogManager()
|
||||
|
||||
def _detect_column(self, df_columns, candidates):
|
||||
"""从DataFrame列中检测匹配的列名"""
|
||||
for col in candidates:
|
||||
if col in df_columns:
|
||||
return col
|
||||
return None
|
||||
|
||||
def import_file(self, filepath: str, log_id: int = None, import_mode: str = 'query_only') -> dict:
|
||||
"""
|
||||
导入单个Excel文件到baidu_keyword表(批量模式)
|
||||
|
||||
Args:
|
||||
filepath: Excel文件路径
|
||||
log_id: 导入日志ID(可选,如果已创建)
|
||||
import_mode: 导入模式
|
||||
- 'query_only': 仅导入query列,query_status=draft
|
||||
- 'full_import': 三列(科室/关键字/query),query_status=manual_review
|
||||
|
||||
Returns:
|
||||
{'success': bool, 'stats': {...}, 'error': str}
|
||||
"""
|
||||
filename = os.path.basename(filepath)
|
||||
stats = {'total': 0, 'success': 0, 'skip': 0, 'fail': 0}
|
||||
|
||||
# 根据模式确定 query_status
|
||||
query_status = 'draft' if import_mode == 'query_only' else 'manual_review'
|
||||
|
||||
# 创建或获取日志记录
|
||||
if log_id is None:
|
||||
log_id = self.log_mgr.create_log(filename, filepath)
|
||||
|
||||
try:
|
||||
# 更新状态为运行中
|
||||
if log_id:
|
||||
self.log_mgr.update_status(log_id, 'running')
|
||||
|
||||
logger.info(f"[Query导入] 开始导入文件: {filename}, 模式: {import_mode}, query_status: {query_status}")
|
||||
|
||||
# 1. 读取Excel
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError(f"文件不存在: {filepath}")
|
||||
|
||||
df = pd.read_excel(filepath)
|
||||
logger.info(f"[Query导入] 文件 {filename} 包含 {len(df)} 行, 列名: {df.columns.tolist()}")
|
||||
|
||||
# 2. 根据模式确定列映射
|
||||
if import_mode == 'query_only':
|
||||
# 模式1:仅一列query
|
||||
query_col = df.columns[0]
|
||||
dept_col = None
|
||||
keyword_col = None
|
||||
logger.info(f"[Query导入] 仅导入Query模式,使用列: {query_col}")
|
||||
else:
|
||||
# 模式2:三列 - 科室/关键字/query(按位置)
|
||||
dept_col = df.columns[0]
|
||||
keyword_col = df.columns[1]
|
||||
query_col = df.columns[2]
|
||||
logger.info(f"[Query导入] 完整导入模式,科室列: {dept_col}, 关键字列: {keyword_col}, query列: {query_col}")
|
||||
|
||||
# 3. 预处理数据:提取所有有效关键词
|
||||
keyword_list = []
|
||||
for idx, row in df.iterrows():
|
||||
query_val = str(row[query_col]).strip() if pd.notna(row[query_col]) else ''
|
||||
if not query_val or query_val == 'nan':
|
||||
stats['fail'] += 1
|
||||
continue
|
||||
|
||||
item = {'keyword': query_val, 'department': ''}
|
||||
|
||||
if import_mode == 'full_import':
|
||||
# 提取科室
|
||||
if dept_col and pd.notna(row[dept_col]):
|
||||
dept_val = str(row[dept_col]).strip()
|
||||
if dept_val != 'nan':
|
||||
item['department'] = dept_val
|
||||
|
||||
keyword_list.append(item)
|
||||
|
||||
stats['total'] = len(df)
|
||||
total_valid = len(keyword_list)
|
||||
logger.info(f"[Query导入] 有效关键词: {total_valid} / {stats['total']}")
|
||||
|
||||
# 5. 分批插入
|
||||
batch_count = (total_valid + self.BATCH_SIZE - 1) // self.BATCH_SIZE
|
||||
processed = 0
|
||||
|
||||
for batch_idx in range(batch_count):
|
||||
start = batch_idx * self.BATCH_SIZE
|
||||
end = min(start + self.BATCH_SIZE, total_valid)
|
||||
batch = keyword_list[start:end]
|
||||
|
||||
batch_stats = self.keyword_mgr.batch_insert_keywords(batch, query_status=query_status)
|
||||
stats['success'] += batch_stats['success']
|
||||
stats['skip'] += batch_stats['skip']
|
||||
stats['fail'] += batch_stats['fail']
|
||||
processed += len(batch)
|
||||
|
||||
progress = (processed / total_valid) * 100 if total_valid > 0 else 100
|
||||
logger.info(
|
||||
f"[Query导入] [{filename}] 批次 {batch_idx+1}/{batch_count} | "
|
||||
f"进度: {processed}/{total_valid} ({progress:.1f}%) | "
|
||||
f"成功: {stats['success']} | 跳过: {stats['skip']} | 失败: {stats['fail']}"
|
||||
)
|
||||
|
||||
# 6. 更新日志
|
||||
if log_id:
|
||||
self.log_mgr.update_status(
|
||||
log_id, 'completed',
|
||||
total_count=stats['total'],
|
||||
success_count=stats['success'],
|
||||
skip_count=stats['skip'],
|
||||
fail_count=stats['fail']
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[Query导入] 文件 {filename} 导入完成 | "
|
||||
f"总数: {stats['total']} | 成功: {stats['success']} | "
|
||||
f"跳过: {stats['skip']} | 失败: {stats['fail']}"
|
||||
)
|
||||
|
||||
return {'success': True, 'stats': stats}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"[Query导入] 文件 {filename} 导入失败: {error_msg}")
|
||||
|
||||
if log_id:
|
||||
self.log_mgr.update_status(
|
||||
log_id, 'failed',
|
||||
total_count=stats['total'],
|
||||
success_count=stats['success'],
|
||||
skip_count=stats['skip'],
|
||||
fail_count=stats['fail'],
|
||||
error_message=error_msg
|
||||
)
|
||||
|
||||
return {'success': False, 'stats': stats, 'error': error_msg}
|
||||
|
||||
def scan_and_import(self):
|
||||
"""
|
||||
扫描上传目录,导入待处理文件
|
||||
|
||||
1. 处理数据库中 status='pending' 的记录
|
||||
2. 扫描目录中未登记的Excel文件并导入
|
||||
"""
|
||||
upload_dir = Config.QUERY_UPLOAD_DIR
|
||||
|
||||
if not os.path.exists(upload_dir):
|
||||
os.makedirs(upload_dir, exist_ok=True)
|
||||
return
|
||||
|
||||
# 1. 处理数据库中 pending 状态的记录
|
||||
pending_logs = self.log_mgr.get_pending_logs()
|
||||
if pending_logs:
|
||||
logger.info(f"[Query扫描] 发现 {len(pending_logs)} 个待处理导入任务")
|
||||
for log in pending_logs:
|
||||
filepath = log['filepath']
|
||||
if os.path.exists(filepath):
|
||||
logger.info(f"[Query扫描] 处理待导入文件: {log['filename']}")
|
||||
self.import_file(filepath, log_id=log['id'])
|
||||
else:
|
||||
logger.warning(f"[Query扫描] 文件不存在,标记为失败: {filepath}")
|
||||
self.log_mgr.update_status(log['id'], 'failed', error_message='文件不存在')
|
||||
|
||||
# 2. 扫描目录中未登记的文件
|
||||
excel_files = []
|
||||
for pattern in ['*.xlsx', '*.xls']:
|
||||
excel_files.extend(glob.glob(os.path.join(upload_dir, pattern)))
|
||||
|
||||
for filepath in excel_files:
|
||||
filepath = os.path.abspath(filepath)
|
||||
if not self.log_mgr.is_file_logged(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
logger.info(f"[Query扫描] 发现未登记文件: {filename}")
|
||||
log_id = self.log_mgr.create_log(filename, filepath)
|
||||
self.import_file(filepath, log_id=log_id)
|
||||
12
restart.sh
@@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
# AI MIP 重启脚本
|
||||
|
||||
PROJECT_DIR="/opt/ai_mip"
|
||||
|
||||
echo "[INFO] 正在停止服务..."
|
||||
bash ${PROJECT_DIR}/stop.sh
|
||||
|
||||
sleep 2
|
||||
|
||||
echo "[INFO] 正在启动服务..."
|
||||
bash ${PROJECT_DIR}/start.sh
|
||||
680
scheduler.py
Normal file
@@ -0,0 +1,680 @@
|
||||
"""
|
||||
调度器模块 - 集成真正的任务执行
|
||||
提供Web界面所需的调度器功能,并执行实际点击任务
|
||||
"""
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from config import Config
|
||||
from data_manager import DataManager
|
||||
|
||||
|
||||
class ClickScheduler:
|
||||
"""
|
||||
点击调度器 - Web界面集成版
|
||||
|
||||
提供调度器的控制和状态查询功能,并在后台线程中执行实际点击任务。
|
||||
优先执行从未被点击过的链接。
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.data_manager = DataManager()
|
||||
self.running = False
|
||||
self.start_time = None
|
||||
|
||||
# 点击记录: {site_id: {'last_click': datetime, 'today_count': int, 'target_count': int}}
|
||||
self.click_records: Dict[int, dict] = {}
|
||||
|
||||
# 配置
|
||||
self.work_start_hour = getattr(Config, 'WORK_START_HOUR', 9)
|
||||
self.work_end_hour = getattr(Config, 'WORK_END_HOUR', 21)
|
||||
self.click_interval_minutes = getattr(Config, 'CLICK_INTERVAL_MINUTES', 30)
|
||||
|
||||
# 后台线程
|
||||
self._scheduler_thread: Optional[threading.Thread] = None
|
||||
self._stop_event = threading.Event()
|
||||
|
||||
# 运行状态
|
||||
self.current_task = None # 当前执行的任务
|
||||
self.current_executor = None # 当前执行器实例
|
||||
self.current_profile_id = None # 当前浏览器profile ID
|
||||
self.last_cycle_time = None
|
||||
self.total_clicks_today = 0
|
||||
self.error_count = 0
|
||||
|
||||
# 并发配置
|
||||
self.max_workers = getattr(Config, 'MAX_CONCURRENT_WORKERS', 2)
|
||||
|
||||
# 线程安全锁
|
||||
self._click_records_lock = threading.Lock()
|
||||
self._stats_lock = threading.Lock()
|
||||
|
||||
# 使用代理
|
||||
self.use_proxy = True
|
||||
|
||||
logger.info("Web调度器初始化完成(集成任务执行)")
|
||||
|
||||
def start_scheduler(self):
|
||||
"""启动调度器"""
|
||||
if self.running:
|
||||
logger.warning("调度器已在运行中")
|
||||
return
|
||||
|
||||
self.running = True
|
||||
self.start_time = datetime.now()
|
||||
self._stop_event.clear()
|
||||
|
||||
# 初始化每日记录
|
||||
self.reset_daily_records()
|
||||
|
||||
# 启动后台调度线程
|
||||
self._scheduler_thread = threading.Thread(
|
||||
target=self._scheduler_loop,
|
||||
name="SchedulerThread",
|
||||
daemon=True
|
||||
)
|
||||
self._scheduler_thread.start()
|
||||
|
||||
logger.info("调度器已启动,后台任务线程运行中")
|
||||
|
||||
def stop_scheduler(self):
|
||||
"""停止调度器,同时结束当前所有任务"""
|
||||
if not self.running:
|
||||
logger.warning("调度器未运行")
|
||||
return
|
||||
|
||||
logger.info("正在停止调度器...")
|
||||
self.running = False
|
||||
self._stop_event.set()
|
||||
|
||||
# 关闭当前运行的浏览器
|
||||
if self.current_profile_id and self.current_executor:
|
||||
logger.info(f"正在关闭浏览器 (profile: {self.current_profile_id})...")
|
||||
try:
|
||||
self.current_executor.client.close_browser(self.current_profile_id)
|
||||
logger.info("浏览器已关闭")
|
||||
except Exception as e:
|
||||
logger.warning(f"关闭浏览器失败: {str(e)}")
|
||||
|
||||
# 清理当前任务状态
|
||||
self.current_executor = None
|
||||
self.current_profile_id = None
|
||||
self.current_task = None
|
||||
|
||||
# 等待线程结束(最多等待10秒)
|
||||
if self._scheduler_thread and self._scheduler_thread.is_alive():
|
||||
self._scheduler_thread.join(timeout=10)
|
||||
|
||||
logger.info("调度器已停止")
|
||||
|
||||
def is_working_time(self) -> bool:
|
||||
"""检查当前是否是工作时间"""
|
||||
now = datetime.now()
|
||||
return self.work_start_hour <= now.hour < self.work_end_hour
|
||||
|
||||
def reset_daily_records(self):
|
||||
"""重置每日点击记录"""
|
||||
logger.info("=" * 50)
|
||||
logger.info("重置每日点击记录")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# 获取所有活跃站点
|
||||
sites = self.data_manager.get_active_urls()
|
||||
|
||||
# 为每个站点随机生成今日目标点击次数
|
||||
self.click_records = {}
|
||||
for site in sites:
|
||||
site_id = site.get('id')
|
||||
target_count = random.randint(
|
||||
getattr(Config, 'MIN_CLICK_COUNT', 1),
|
||||
getattr(Config, 'MAX_CLICK_COUNT', 3)
|
||||
)
|
||||
self.click_records[site_id] = {
|
||||
'last_click': None,
|
||||
'today_count': 0,
|
||||
'target_count': target_count,
|
||||
'site_url': site.get('site_url'),
|
||||
'click_count': site.get('click_count', 0) # 历史总点击次数
|
||||
}
|
||||
|
||||
total_target = sum(r['target_count'] for r in self.click_records.values())
|
||||
logger.info(f"共 {len(sites)} 个站点,总目标点击次数: {total_target}")
|
||||
|
||||
def get_pending_sites(self) -> List[Dict]:
|
||||
"""
|
||||
获取待点击的站点列表
|
||||
|
||||
优先级排序:
|
||||
1. 历史点击次数为0的站点优先
|
||||
2. 今日点击次数少的站点优先
|
||||
3. 距离上次点击时间长的站点优先
|
||||
|
||||
Returns:
|
||||
待点击的站点列表(已按优先级排序)
|
||||
"""
|
||||
if not self.click_records:
|
||||
logger.warning("点击记录为空,执行重置")
|
||||
self.reset_daily_records()
|
||||
|
||||
# 动态检测新导入的站点
|
||||
current_sites = self.data_manager.get_active_urls()
|
||||
current_site_ids = {site.get('id') for site in current_sites}
|
||||
existing_site_ids = set(self.click_records.keys())
|
||||
|
||||
# 发现新站点,自动添加到点击记录
|
||||
new_site_ids = current_site_ids - existing_site_ids
|
||||
if new_site_ids:
|
||||
logger.info(f"发现 {len(new_site_ids)} 个新导入的站点,加入点击队列")
|
||||
for site in current_sites:
|
||||
site_id = site.get('id')
|
||||
if site_id in new_site_ids:
|
||||
target_count = random.randint(
|
||||
getattr(Config, 'MIN_CLICK_COUNT', 1),
|
||||
getattr(Config, 'MAX_CLICK_COUNT', 3)
|
||||
)
|
||||
self.click_records[site_id] = {
|
||||
'last_click': None,
|
||||
'today_count': 0,
|
||||
'target_count': target_count,
|
||||
'site_url': site.get('site_url'),
|
||||
'click_count': site.get('click_count', 0)
|
||||
}
|
||||
logger.info(f"新站点 {site_id}: {site.get('site_url')} - 今日目标 {target_count} 次")
|
||||
|
||||
# 移除已删除的站点
|
||||
removed_site_ids = existing_site_ids - current_site_ids
|
||||
for site_id in removed_site_ids:
|
||||
del self.click_records[site_id]
|
||||
logger.info(f"站点 {site_id} 已从数据库删除,移除出点击队列")
|
||||
|
||||
now = datetime.now()
|
||||
pending_sites = []
|
||||
|
||||
for site_id, record in self.click_records.items():
|
||||
# 检查是否已完成今日目标
|
||||
if record['today_count'] >= record['target_count']:
|
||||
continue
|
||||
|
||||
# 检查点击间隔(≥30分钟)
|
||||
if record['last_click']:
|
||||
elapsed = (now - record['last_click']).total_seconds() / 60
|
||||
if elapsed < self.click_interval_minutes:
|
||||
continue
|
||||
|
||||
pending_sites.append({
|
||||
'id': site_id,
|
||||
'site_url': record['site_url'],
|
||||
'today_count': record['today_count'],
|
||||
'target_count': record['target_count'],
|
||||
'click_count': record.get('click_count', 0), # 历史总点击次数
|
||||
'last_click': record['last_click']
|
||||
})
|
||||
|
||||
# 按优先级排序:
|
||||
# 1. 历史点击次数为0的优先(从未点击过)
|
||||
# 2. 今日点击次数少的优先
|
||||
# 3. 距离上次点击时间长的优先(last_click 为 None 排最前)
|
||||
def sort_key(site):
|
||||
click_count = site.get('click_count', 0)
|
||||
today_count = site.get('today_count', 0)
|
||||
last_click = site.get('last_click')
|
||||
|
||||
# 从未点击过的排最前 (0),点击过的按点击次数排 (1 + click_count)
|
||||
priority1 = 0 if click_count == 0 else (1 + click_count)
|
||||
|
||||
# 今日点击次数少的优先
|
||||
priority2 = today_count
|
||||
|
||||
# 距离上次点击时间长的优先(None表示从未点击,排最前)
|
||||
if last_click is None:
|
||||
priority3 = 0
|
||||
else:
|
||||
# 将时间转换为负数,时间越早数值越小
|
||||
priority3 = last_click.timestamp()
|
||||
|
||||
return (priority1, priority2, priority3)
|
||||
|
||||
pending_sites.sort(key=sort_key)
|
||||
|
||||
return pending_sites
|
||||
|
||||
def execute_click_task(self, site: Dict) -> bool:
|
||||
"""
|
||||
执行单个站点的点击任务
|
||||
|
||||
Args:
|
||||
site: 站点信息
|
||||
|
||||
Returns:
|
||||
是否成功
|
||||
"""
|
||||
site_id = site['id']
|
||||
site_url = site['site_url']
|
||||
|
||||
self.current_task = {
|
||||
'site_id': site_id,
|
||||
'site_url': site_url,
|
||||
'start_time': datetime.now()
|
||||
}
|
||||
|
||||
logger.info(f"[站点 {site_id}] 开始点击: {site_url}")
|
||||
logger.info(f"[站点 {site_id}] 今日进度: {site['today_count'] + 1}/{site['target_count']}, 历史总点击: {site.get('click_count', 0)}")
|
||||
|
||||
try:
|
||||
# 检查是否被停止
|
||||
if self._stop_event.is_set():
|
||||
logger.info(f"[站点 {site_id}] 调度器已停止,跳过任务")
|
||||
return False
|
||||
|
||||
# 延迟导入避免循环依赖
|
||||
from task_executor import TaskExecutor
|
||||
|
||||
# 创建任务执行器
|
||||
executor = TaskExecutor(
|
||||
max_workers=1,
|
||||
use_proxy=self.use_proxy
|
||||
)
|
||||
self.current_executor = executor # 保存执行器引用
|
||||
|
||||
# 获取完整站点信息
|
||||
all_sites = self.data_manager.get_active_urls()
|
||||
target_site = next((s for s in all_sites if s.get('id') == site_id), None)
|
||||
|
||||
if not target_site:
|
||||
logger.error(f"[站点 {site_id}] 未找到站点信息")
|
||||
return False
|
||||
|
||||
# 检查是否被停止
|
||||
if self._stop_event.is_set():
|
||||
logger.info(f"[站点 {site_id}] 调度器已停止,跳过任务")
|
||||
return False
|
||||
|
||||
# 创建浏览器环境
|
||||
logger.info(f"[站点 {site_id}] 创建浏览器环境...")
|
||||
profile_info = executor.create_browser_profile(1)
|
||||
if not profile_info:
|
||||
logger.error(f"[站点 {site_id}] 创建浏览器环境失败")
|
||||
return False
|
||||
|
||||
self.current_profile_id = profile_info['profile_id'] # 保存profile ID
|
||||
|
||||
# 检查是否被停止
|
||||
if self._stop_event.is_set():
|
||||
logger.info(f"[站点 {site_id}] 调度器已停止,关闭浏览器")
|
||||
executor.client.close_browser(profile_info['profile_id'])
|
||||
return False
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# 执行点击任务
|
||||
logger.info(f"[站点 {site_id}] 执行点击任务...")
|
||||
result = executor.execute_single_task(target_site, 1, profile_info['profile_id'])
|
||||
|
||||
# 清理当前任务状态
|
||||
self.current_executor = None
|
||||
self.current_profile_id = None
|
||||
|
||||
if result['success']:
|
||||
# 更新点击记录
|
||||
self.click_records[site_id]['last_click'] = datetime.now()
|
||||
self.click_records[site_id]['today_count'] += 1
|
||||
self.click_records[site_id]['click_count'] = self.click_records[site_id].get('click_count', 0) + 1
|
||||
self.total_clicks_today += 1
|
||||
|
||||
logger.info(f"[站点 {site_id}] 点击完成: {self.click_records[site_id]['today_count']}/{self.click_records[site_id]['target_count']}")
|
||||
return True
|
||||
else:
|
||||
self.error_count += 1
|
||||
logger.warning(f"[站点 {site_id}] 点击失败: {result.get('error', '未知错误')}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.error_count += 1
|
||||
logger.error(f"[站点 {site_id}] 点击异常: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
finally:
|
||||
self.current_task = None
|
||||
self.current_executor = None
|
||||
self.current_profile_id = None
|
||||
|
||||
def _scheduler_loop(self):
|
||||
"""调度器主循环(后台线程)"""
|
||||
logger.info("调度器线程启动")
|
||||
|
||||
# 检查是否需要立即执行
|
||||
check_interval = 60 # 每60秒检查一次
|
||||
cycle_interval = 10 * 60 # 每10分钟执行一次点击循环
|
||||
last_cycle = 0
|
||||
|
||||
while not self._stop_event.is_set():
|
||||
try:
|
||||
now = time.time()
|
||||
|
||||
# 检查是否到了执行周期
|
||||
if now - last_cycle >= cycle_interval:
|
||||
self._run_click_cycle()
|
||||
last_cycle = now
|
||||
|
||||
# 检查是否需要重置每日记录(跨天)
|
||||
current_date = datetime.now().date()
|
||||
if hasattr(self, '_last_reset_date') and self._last_reset_date != current_date:
|
||||
self.reset_daily_records()
|
||||
self._last_reset_date = current_date
|
||||
else:
|
||||
self._last_reset_date = current_date
|
||||
|
||||
# 等待下一次检查
|
||||
self._stop_event.wait(timeout=check_interval)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"调度器循环异常: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
time.sleep(30) # 出错后等待30秒再继续
|
||||
|
||||
logger.info("调度器线程结束")
|
||||
|
||||
def _run_click_cycle(self):
|
||||
"""执行一次点击循环"""
|
||||
# 检查工作时间
|
||||
if not self.is_working_time():
|
||||
current_time = datetime.now().strftime('%H:%M')
|
||||
logger.info(f"当前时间 {current_time} 不在工作时间 ({self.work_start_hour}:00-{self.work_end_hour}:00),跳过")
|
||||
return
|
||||
|
||||
self.last_cycle_time = datetime.now()
|
||||
|
||||
logger.info("-" * 50)
|
||||
logger.info(f"开始点击循环 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
logger.info("-" * 50)
|
||||
|
||||
# 获取待点击站点(已按优先级排序)
|
||||
pending_sites = self.get_pending_sites()
|
||||
|
||||
if not pending_sites:
|
||||
logger.info("没有待点击的站点")
|
||||
return
|
||||
|
||||
# 显示优先级信息
|
||||
never_clicked = sum(1 for s in pending_sites if s.get('click_count', 0) == 0)
|
||||
logger.info(f"找到 {len(pending_sites)} 个待点击站点,其中 {never_clicked} 个从未点击过(优先执行)")
|
||||
|
||||
# 根据并发数执行
|
||||
if self.max_workers == 1:
|
||||
# 串行执行
|
||||
for site in pending_sites:
|
||||
# 检查是否被停止
|
||||
if self._stop_event.is_set() or not self.running:
|
||||
logger.info("调度器已停止,终止点击循环")
|
||||
break
|
||||
|
||||
# 检查工作时间
|
||||
if not self.is_working_time():
|
||||
current_time = datetime.now().strftime('%H:%M')
|
||||
logger.info(f"当前时间 {current_time} 已超出工作时间,停止执行剩余任务")
|
||||
break
|
||||
|
||||
# 执行点击
|
||||
self.execute_click_task(site)
|
||||
|
||||
# 任务间随机间隔
|
||||
if site != pending_sites[-1] and not self._stop_event.is_set():
|
||||
wait_minutes = random.randint(
|
||||
getattr(Config, 'MIN_TASK_INTERVAL_MINUTES', 3),
|
||||
getattr(Config, 'MAX_TASK_INTERVAL_MINUTES', 5)
|
||||
)
|
||||
logger.info(f"等待 {wait_minutes} 分钟后执行下一个任务...")
|
||||
self._stop_event.wait(timeout=wait_minutes * 60)
|
||||
else:
|
||||
# 并发执行
|
||||
self._run_concurrent_cycle(pending_sites)
|
||||
|
||||
# 显示今日进度
|
||||
completed = sum(1 for r in self.click_records.values() if r['today_count'] >= r['target_count'])
|
||||
total = len(self.click_records)
|
||||
total_clicks = sum(r['today_count'] for r in self.click_records.values())
|
||||
target_clicks = sum(r['target_count'] for r in self.click_records.values())
|
||||
|
||||
logger.info("-" * 50)
|
||||
logger.info(f"今日进度: {completed}/{total} 个站点完成")
|
||||
logger.info(f"点击次数: {total_clicks}/{target_clicks} 次")
|
||||
logger.info("-" * 50)
|
||||
|
||||
def _run_concurrent_cycle(self, pending_sites: List[Dict]):
|
||||
"""
|
||||
并发执行点击任务
|
||||
|
||||
Args:
|
||||
pending_sites: 待点击的站点列表
|
||||
"""
|
||||
# 按 max_workers 分批
|
||||
batch_size = self.max_workers
|
||||
batches = [pending_sites[i:i+batch_size] for i in range(0, len(pending_sites), batch_size)]
|
||||
|
||||
logger.info(f"并发模式: 共 {len(pending_sites)} 个任务,分 {len(batches)} 批执行(每批最多 {batch_size} 个)")
|
||||
|
||||
for batch_idx, batch in enumerate(batches):
|
||||
# 检查是否被停止
|
||||
if self._stop_event.is_set() or not self.running:
|
||||
logger.info("调度器已停止,终止点击循环")
|
||||
break
|
||||
|
||||
# 检查工作时间
|
||||
if not self.is_working_time():
|
||||
current_time = datetime.now().strftime('%H:%M')
|
||||
logger.info(f"当前时间 {current_time} 已超出工作时间,停止执行剩余批次")
|
||||
break
|
||||
|
||||
logger.info(f"=" * 40)
|
||||
logger.info(f"开始批次 {batch_idx+1}/{len(batches)}: {len(batch)} 个任务并发执行")
|
||||
for i, site in enumerate(batch, 1):
|
||||
logger.info(f" - [Worker {i}] Site {site['id']}: {site['site_url'][:50]}...")
|
||||
|
||||
batch_start_time = time.time()
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
|
||||
# 使用 ThreadPoolExecutor 并发执行
|
||||
with ThreadPoolExecutor(max_workers=len(batch)) as executor:
|
||||
futures = {
|
||||
executor.submit(self._execute_click_task_wrapper, site, worker_id): site
|
||||
for worker_id, site in enumerate(batch, 1)
|
||||
}
|
||||
|
||||
# 等待所有任务完成
|
||||
for future in as_completed(futures):
|
||||
site = futures[future]
|
||||
try:
|
||||
result = future.result()
|
||||
if result.get('success'):
|
||||
success_count += 1
|
||||
else:
|
||||
fail_count += 1
|
||||
except Exception as e:
|
||||
fail_count += 1
|
||||
logger.error(f"任务异常: Site {site['id']} - {e}")
|
||||
|
||||
batch_duration = (time.time() - batch_start_time) / 60
|
||||
logger.info(f"批次 {batch_idx+1} 完成: 成功 {success_count}, 失败 {fail_count}, 耗时 {batch_duration:.1f} 分钟")
|
||||
|
||||
# 批次间随机间隔(不是最后一批)
|
||||
if batch_idx < len(batches) - 1 and not self._stop_event.is_set():
|
||||
# 再次检查工作时间和停止状态
|
||||
if not self.is_working_time():
|
||||
current_time = datetime.now().strftime('%H:%M')
|
||||
logger.info(f"当前时间 {current_time} 已超出工作时间,停止执行")
|
||||
break
|
||||
|
||||
wait_minutes = random.randint(
|
||||
getattr(Config, 'MIN_TASK_INTERVAL_MINUTES', 3),
|
||||
getattr(Config, 'MAX_TASK_INTERVAL_MINUTES', 5)
|
||||
)
|
||||
logger.info(f"等待 {wait_minutes} 分钟后执行下一批次...")
|
||||
self._stop_event.wait(timeout=wait_minutes * 60)
|
||||
|
||||
def _execute_click_task_wrapper(self, site: Dict, worker_id: int) -> Dict:
|
||||
"""
|
||||
线程安全的任务执行包装器
|
||||
|
||||
Args:
|
||||
site: 站点信息
|
||||
worker_id: Worker编号(用于日志标识)
|
||||
|
||||
Returns:
|
||||
执行结果字典
|
||||
"""
|
||||
from task_executor import TaskExecutor
|
||||
|
||||
site_id = site['id']
|
||||
site_url = site['site_url']
|
||||
|
||||
# 错峰启动:每个 worker 间隔 5-10 秒,避免同时调用 AdsPower API 触发限频
|
||||
if worker_id > 1:
|
||||
stagger_delay = (worker_id - 1) * random.randint(5, 10)
|
||||
logger.info(f"[Worker {worker_id}] [Site {site_id}] 错峰等待 {stagger_delay} 秒后启动...")
|
||||
time.sleep(stagger_delay)
|
||||
|
||||
logger.info(f"[Worker {worker_id}] [Site {site_id}] 开始点击: {site_url[:50]}...")
|
||||
|
||||
executor = None
|
||||
try:
|
||||
# 创建独立的 TaskExecutor 实例
|
||||
executor = TaskExecutor(max_workers=1, use_proxy=self.use_proxy)
|
||||
|
||||
# 创建浏览器环境(每个 worker 独立的 Profile 和代理)
|
||||
profile_info = executor.create_browser_profile(worker_id)
|
||||
if not profile_info:
|
||||
logger.error(f"[Worker {worker_id}] [Site {site_id}] 创建浏览器环境失败")
|
||||
with self._stats_lock:
|
||||
self.error_count += 1
|
||||
return {'success': False, 'error': '创建浏览器环境失败'}
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# 获取完整站点信息
|
||||
all_sites = self.data_manager.get_active_urls()
|
||||
target_site = next((s for s in all_sites if s.get('id') == site_id), None)
|
||||
|
||||
if not target_site:
|
||||
logger.error(f"[Worker {worker_id}] [Site {site_id}] 未找到站点信息")
|
||||
return {'success': False, 'error': '未找到站点信息'}
|
||||
|
||||
# 执行任务
|
||||
result = executor.execute_single_task(target_site, worker_id, profile_info['profile_id'])
|
||||
|
||||
if result['success']:
|
||||
# 线程安全更新点击记录
|
||||
with self._click_records_lock:
|
||||
if site_id in self.click_records:
|
||||
self.click_records[site_id]['last_click'] = datetime.now()
|
||||
self.click_records[site_id]['today_count'] += 1
|
||||
self.click_records[site_id]['click_count'] = self.click_records[site_id].get('click_count', 0) + 1
|
||||
with self._stats_lock:
|
||||
self.total_clicks_today += 1
|
||||
|
||||
logger.info(f"[Worker {worker_id}] [Site {site_id}] ✅ 点击完成")
|
||||
else:
|
||||
with self._stats_lock:
|
||||
self.error_count += 1
|
||||
logger.warning(f"[Worker {worker_id}] [Site {site_id}] ⚠️ 点击失败: {result.get('error', '未知错误')}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
with self._stats_lock:
|
||||
self.error_count += 1
|
||||
logger.error(f"[Worker {worker_id}] [Site {site_id}] ❌ 异常: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return {'success': False, 'error': str(e)}
|
||||
finally:
|
||||
# 确保资源清理
|
||||
if executor:
|
||||
try:
|
||||
executor.close_browser()
|
||||
except Exception as e:
|
||||
logger.warning(f"[Worker {worker_id}] 清理资源失败: {e}")
|
||||
|
||||
# ========== 以下是原有的Web接口方法 ==========
|
||||
|
||||
def add_url(self, url: str) -> bool:
|
||||
"""添加URL到调度队列"""
|
||||
try:
|
||||
site_id = self.data_manager.add_url(url)
|
||||
if site_id:
|
||||
# 初始化点击记录
|
||||
self.click_records[site_id] = {
|
||||
'last_click': None,
|
||||
'today_count': 0,
|
||||
'target_count': random.randint(
|
||||
getattr(Config, 'MIN_CLICK_COUNT', 1),
|
||||
getattr(Config, 'MAX_CLICK_COUNT', 3)
|
||||
),
|
||||
'click_count': 0
|
||||
}
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"添加URL失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def add_urls(self, urls: List[str]) -> int:
|
||||
"""批量添加URL"""
|
||||
count = 0
|
||||
for url in urls:
|
||||
if self.add_url(url):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
def get_url_detail(self, url: str) -> Optional[Dict]:
|
||||
"""获取URL详情"""
|
||||
return self.data_manager.get_url_detail(url)
|
||||
|
||||
def get_statistics(self) -> Dict:
|
||||
"""获取统计数据"""
|
||||
return self.data_manager.get_statistics()
|
||||
|
||||
def get_queue_status(self) -> Dict:
|
||||
"""获取任务队列状态"""
|
||||
sites = self.data_manager.get_active_urls()
|
||||
|
||||
pending = []
|
||||
completed = []
|
||||
|
||||
for site in sites:
|
||||
site_id = site.get('id')
|
||||
record = self.click_records.get(site_id, {})
|
||||
|
||||
site_info = {
|
||||
'site_id': site_id,
|
||||
'site_url': site.get('site_url'),
|
||||
'site_name': site.get('site_name'),
|
||||
'today_count': record.get('today_count', 0),
|
||||
'target_count': record.get('target_count', 0),
|
||||
'click_count': site.get('click_count', 0), # 历史总点击次数
|
||||
'last_click': record.get('last_click')
|
||||
}
|
||||
|
||||
if record.get('today_count', 0) >= record.get('target_count', 0):
|
||||
completed.append(site_info)
|
||||
else:
|
||||
pending.append(site_info)
|
||||
|
||||
return {
|
||||
'pending': pending[:20],
|
||||
'running': self.current_task,
|
||||
'completed': completed[:20],
|
||||
'scheduler_status': 'running' if self.running else 'stopped',
|
||||
'is_working_time': self.is_working_time(),
|
||||
'total_pending': len(pending),
|
||||
'total_completed': len(completed),
|
||||
'total_clicks_today': self.total_clicks_today,
|
||||
'error_count': self.error_count
|
||||
}
|
||||
58
start.bat
@@ -1,58 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
echo ========================================
|
||||
echo MIP广告点击服务 - 快速启动脚本
|
||||
echo ========================================
|
||||
echo.
|
||||
|
||||
REM 检查Python是否安装
|
||||
python --version >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo 错误: 未检测到Python,请先安装Python 3.8+
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM 检查是否存在虚拟环境
|
||||
if not exist "venv" (
|
||||
echo 未检测到虚拟环境,正在创建...
|
||||
python -m venv venv
|
||||
echo 虚拟环境创建完成
|
||||
echo.
|
||||
)
|
||||
|
||||
REM 激活虚拟环境
|
||||
echo 激活虚拟环境...
|
||||
call venv\Scripts\activate.bat
|
||||
|
||||
REM 检查是否已安装依赖
|
||||
pip show flask >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo 正在安装依赖包...
|
||||
pip install -r requirements.txt
|
||||
echo 依赖安装完成
|
||||
echo.
|
||||
echo 正在安装 Playwright 浏览器...
|
||||
python -m playwright install chromium
|
||||
echo Playwright 浏览器安装完成
|
||||
echo.
|
||||
)
|
||||
|
||||
REM 检查.env文件
|
||||
if not exist ".env" (
|
||||
echo 警告: 未检测到.env配置文件
|
||||
echo 请复制.env.example为.env并配置相关参数
|
||||
echo.
|
||||
echo 是否继续启动服务?
|
||||
pause
|
||||
)
|
||||
|
||||
REM 启动服务
|
||||
echo 正在启动MIP广告点击服务...
|
||||
echo.
|
||||
echo 提示:默认使用开发环境配置
|
||||
echo 如需使用生产环境,请设置环境变量:set ENV=production
|
||||
echo.
|
||||
python app.py
|
||||
|
||||
pause
|
||||
45
start.sh
@@ -1,45 +0,0 @@
|
||||
#!/bin/bash
|
||||
# AI MIP 后台启动脚本
|
||||
|
||||
PROJECT_DIR="/home/work/ai_mip"
|
||||
cd ${PROJECT_DIR}
|
||||
|
||||
echo "[INFO] 检查是否有运行中的服务..."
|
||||
# 查找并停止旧进程
|
||||
OLD_PID=$(pgrep -f "python main.py")
|
||||
|
||||
if [ ! -z "$OLD_PID" ]; then
|
||||
echo "[WARN] 发现运行中的服务 (PID: $OLD_PID),正在停止..."
|
||||
pkill -f "python main.py"
|
||||
sleep 2
|
||||
echo "[INFO] 旧服务已停止"
|
||||
else
|
||||
echo "[INFO] 没有运行中的服务"
|
||||
fi
|
||||
|
||||
echo "[INFO] 正在启动服务..."
|
||||
# 激活虚拟环境并后台运行
|
||||
if [ ! -d "venv" ]; then
|
||||
echo "[ERROR] 虚拟环境不存在,请先执行: python3 -m venv venv"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "venv/bin/activate" ]; then
|
||||
echo "[ERROR] 虚拟环境激活脚本不存在"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source venv/bin/activate
|
||||
|
||||
# 检查依赖是否安装
|
||||
if ! python -c "import schedule" 2>/dev/null; then
|
||||
echo "[WARN] 依赖未安装,正在安装..."
|
||||
pip install -r requirements.txt
|
||||
fi
|
||||
|
||||
nohup python main.py --workers 3 --health-port 8899 > logs/service.log 2>&1 &
|
||||
|
||||
NEW_PID=$!
|
||||
echo "[INFO] 服务已启动"
|
||||
echo "[INFO] 进程ID: $NEW_PID"
|
||||
echo "[INFO] 查看日志: tail -f ${PROJECT_DIR}/logs/service.log"
|
||||
@@ -1,57 +0,0 @@
|
||||
@echo off
|
||||
chcp 65001 >nul
|
||||
echo ========================================
|
||||
echo MIP广告点击服务 - 生产环境启动
|
||||
echo ========================================
|
||||
echo.
|
||||
|
||||
REM 设置生产环境
|
||||
set ENV=production
|
||||
|
||||
REM 检查Python是否安装
|
||||
python --version >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo 错误: 未检测到Python,请先安装Python 3.8+
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM 检查是否存在虚拟环境
|
||||
if not exist "venv" (
|
||||
echo 未检测到虚拟环境,正在创建...
|
||||
python -m venv venv
|
||||
echo 虚拟环境创建完成
|
||||
echo.
|
||||
)
|
||||
|
||||
REM 激活虚拟环境
|
||||
echo 激活虚拟环境...
|
||||
call venv\Scripts\activate.bat
|
||||
|
||||
REM 检查是否已安装依赖
|
||||
pip show flask >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo 正在安装依赖包...
|
||||
pip install -r requirements.txt
|
||||
echo 依赖安装完成
|
||||
echo.
|
||||
echo 正在安装 Playwright 浏览器...
|
||||
python -m playwright install chromium
|
||||
echo Playwright 浏览器安装完成
|
||||
echo.
|
||||
)
|
||||
|
||||
REM 检查生产环境配置文件
|
||||
if not exist ".env.production" (
|
||||
echo 错误: 未检测到生产环境配置文件 .env.production
|
||||
echo 请先配置生产环境参数
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM 启动服务
|
||||
echo 正在启动MIP广告点击服务(生产环境)...
|
||||
echo.
|
||||
python app.py
|
||||
|
||||
pause
|
||||
@@ -1,259 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# MIP广告自动点击系统 - 生产环境启动脚本
|
||||
# 适用于 Ubuntu/Debian 系统
|
||||
|
||||
set -e # 遇到错误立即退出
|
||||
|
||||
echo "============================================================"
|
||||
echo "MIP广告自动点击系统 - 生产环境启动"
|
||||
echo "============================================================"
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 项目目录(脚本所在目录)
|
||||
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
echo -e "${GREEN}项目目录: $PROJECT_DIR${NC}"
|
||||
|
||||
# 检查Python版本
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "检查Python环境"
|
||||
echo "============================================================"
|
||||
|
||||
if ! command -v python3 &> /dev/null; then
|
||||
echo -e "${RED}错误: 未找到 python3${NC}"
|
||||
echo "请安装Python 3.8+: sudo apt-get install python3 python3-pip python3-venv"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PYTHON_VERSION=$(python3 --version | awk '{print $2}')
|
||||
echo -e "${GREEN}Python版本: $PYTHON_VERSION${NC}"
|
||||
|
||||
# 检查Python版本是否 >= 3.8
|
||||
PYTHON_MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1)
|
||||
PYTHON_MINOR=$(echo $PYTHON_VERSION | cut -d. -f2)
|
||||
|
||||
if [ "$PYTHON_MAJOR" -lt 3 ] || ([ "$PYTHON_MAJOR" -eq 3 ] && [ "$PYTHON_MINOR" -lt 8 ]); then
|
||||
echo -e "${RED}错误: Python版本过低,需要 3.8+${NC}"
|
||||
echo "当前版本: $PYTHON_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 虚拟环境目录
|
||||
VENV_DIR="$PROJECT_DIR/venv"
|
||||
|
||||
# 创建虚拟环境(如果不存在)
|
||||
if [ ! -d "$VENV_DIR" ]; then
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "创建Python虚拟环境"
|
||||
echo "============================================================"
|
||||
|
||||
# 检查是否安装了 venv 模块
|
||||
if ! python3 -m venv --help &> /dev/null; then
|
||||
echo -e "${YELLOW}警告: python3-venv 未安装,正在尝试安装...${NC}"
|
||||
|
||||
# 尝试安装(需要sudo权限)
|
||||
if command -v apt-get &> /dev/null; then
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3-venv
|
||||
else
|
||||
echo -e "${RED}错误: 无法自动安装 python3-venv${NC}"
|
||||
echo "请手动执行: sudo apt-get install python3-venv"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}正在创建虚拟环境...${NC}"
|
||||
python3 -m venv "$VENV_DIR"
|
||||
echo -e "${GREEN}✓ 虚拟环境创建成功${NC}"
|
||||
else
|
||||
echo ""
|
||||
echo -e "${GREEN}✓ 虚拟环境已存在: $VENV_DIR${NC}"
|
||||
fi
|
||||
|
||||
# 激活虚拟环境
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "激活虚拟环境"
|
||||
echo "============================================================"
|
||||
|
||||
source "$VENV_DIR/bin/activate"
|
||||
|
||||
if [ "$VIRTUAL_ENV" != "" ]; then
|
||||
echo -e "${GREEN}✓ 虚拟环境已激活: $VIRTUAL_ENV${NC}"
|
||||
echo -e "${GREEN}Python路径: $(which python)${NC}"
|
||||
else
|
||||
echo -e "${RED}错误: 虚拟环境激活失败${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 升级pip
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "升级pip"
|
||||
echo "============================================================"
|
||||
python -m pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
# 安装依赖
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "安装项目依赖"
|
||||
echo "============================================================"
|
||||
|
||||
if [ -f "requirements.txt" ]; then
|
||||
echo -e "${GREEN}从 requirements.txt 安装依赖...${NC}"
|
||||
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
else
|
||||
echo -e "${YELLOW}警告: requirements.txt 不存在${NC}"
|
||||
echo "手动安装核心依赖..."
|
||||
pip install flask playwright requests loguru apscheduler python-dotenv -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
# 安装Playwright浏览器
|
||||
echo "安装Playwright浏览器驱动..."
|
||||
playwright install chromium
|
||||
fi
|
||||
|
||||
# 检查配置文件
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "检查配置文件"
|
||||
echo "============================================================"
|
||||
|
||||
if [ ! -f ".env.production" ]; then
|
||||
echo -e "${YELLOW}警告: .env.production 不存在${NC}"
|
||||
|
||||
if [ -f ".env.example" ]; then
|
||||
echo "从 .env.example 创建配置文件..."
|
||||
cp .env.example .env.production
|
||||
echo -e "${GREEN}✓ 已创建 .env.production${NC}"
|
||||
echo -e "${RED}请编辑 .env.production 配置文件后重新启动${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${RED}错误: 缺少配置文件模板${NC}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo -e "${GREEN}✓ 配置文件存在: .env.production${NC}"
|
||||
fi
|
||||
|
||||
# 初始化数据库
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "初始化数据库"
|
||||
echo "============================================================"
|
||||
|
||||
if [ ! -f "db/ai_mip_prod.db" ]; then
|
||||
echo -e "${YELLOW}数据库不存在,正在初始化...${NC}"
|
||||
|
||||
if [ -f "db/init_databases.py" ]; then
|
||||
# 自动创建生产数据库(跳过交互)
|
||||
python << EOF
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
|
||||
db_dir = Path('db')
|
||||
db_path = db_dir / 'ai_mip_prod.db'
|
||||
|
||||
if not db_path.exists():
|
||||
print(f"创建数据库: {db_path}")
|
||||
|
||||
# 读取并执行SQL脚本
|
||||
init_sql = db_dir / 'init_sqlite.sql'
|
||||
if init_sql.exists():
|
||||
with open(init_sql, 'r', encoding='utf-8') as f:
|
||||
sql_script = f.read()
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.executescript(sql_script)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print("✓ 数据库初始化完成")
|
||||
else:
|
||||
print("错误: 找不到 init_sqlite.sql")
|
||||
exit(1)
|
||||
EOF
|
||||
echo -e "${GREEN}✓ 数据库初始化成功${NC}"
|
||||
else
|
||||
echo -e "${RED}错误: 找不到数据库初始化脚本${NC}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo -e "${GREEN}✓ 数据库已存在: db/ai_mip_prod.db${NC}"
|
||||
fi
|
||||
|
||||
# 创建必要的目录
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "创建必要目录"
|
||||
echo "============================================================"
|
||||
|
||||
mkdir -p logs data
|
||||
echo -e "${GREEN}✓ 目录创建完成${NC}"
|
||||
|
||||
# 检查AdsPower连接
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "检查AdsPower连接"
|
||||
echo "============================================================"
|
||||
|
||||
python << EOF
|
||||
import os
|
||||
os.environ['ENV'] = 'production'
|
||||
|
||||
try:
|
||||
from adspower_client import AdsPowerClient
|
||||
client = AdsPowerClient()
|
||||
profiles = client.list_profiles()
|
||||
|
||||
if profiles:
|
||||
print("\033[0;32m✓ AdsPower连接正常\033[0m")
|
||||
profile_count = len(profiles.get('data', {}).get('list', []))
|
||||
print(f"\033[0;32m Profile数量: {profile_count}\033[0m")
|
||||
else:
|
||||
print("\033[1;33m警告: AdsPower连接失败,请检查配置\033[0m")
|
||||
except Exception as e:
|
||||
print(f"\033[1;33m警告: AdsPower连接异常: {str(e)}\033[0m")
|
||||
print("\033[1;33m 请确保AdsPower客户端已启动\033[0m")
|
||||
EOF
|
||||
|
||||
# 启动服务
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "启动Flask服务"
|
||||
echo "============================================================"
|
||||
|
||||
export ENV=production
|
||||
|
||||
# 检查端口是否被占用
|
||||
PORT=5000
|
||||
if command -v netstat &> /dev/null; then
|
||||
if netstat -tuln | grep ":$PORT " > /dev/null; then
|
||||
echo -e "${YELLOW}警告: 端口 $PORT 已被占用${NC}"
|
||||
echo "尝试查找占用进程..."
|
||||
lsof -i :$PORT || true
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}启动服务中...${NC}"
|
||||
echo "访问地址: http://127.0.0.1:5000"
|
||||
echo "按 Ctrl+C 停止服务"
|
||||
echo ""
|
||||
|
||||
# 使用nohup在后台运行(可选)
|
||||
# nohup python app.py > logs/app.log 2>&1 &
|
||||
# echo $! > app.pid
|
||||
# echo -e "${GREEN}✓ 服务已启动(后台运行)${NC}"
|
||||
# echo "PID: $(cat app.pid)"
|
||||
# echo "日志: logs/app.log"
|
||||
|
||||
# 前台运行(便于调试)
|
||||
python app.py
|
||||
3114
static/app.html
22
status.sh
@@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
# AI MIP 服务状态查看
|
||||
|
||||
echo "=========================================="
|
||||
echo " AI MIP 服务状态"
|
||||
echo "=========================================="
|
||||
|
||||
# 查找进程
|
||||
PID=$(pgrep -f "python main.py")
|
||||
|
||||
if [ -z "$PID" ]; then
|
||||
echo "[INFO] 服务未运行"
|
||||
else
|
||||
echo "[INFO] 服务运行中"
|
||||
echo "[INFO] 进程ID: $PID"
|
||||
echo ""
|
||||
echo "进程详情:"
|
||||
ps aux | grep "python main.py" | grep -v grep
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
11
stop.sh
@@ -1,11 +0,0 @@
|
||||
#!/bin/bash
|
||||
# AI MIP 停止脚本
|
||||
|
||||
# 查找并杀死进程
|
||||
pkill -f "python main.py"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "[INFO] 服务已停止"
|
||||
else
|
||||
echo "[WARN] 未找到运行中的服务"
|
||||
fi
|
||||
@@ -1,65 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# MIP广告自动点击系统 - 停止服务脚本
|
||||
|
||||
set -e
|
||||
|
||||
echo "============================================================"
|
||||
echo "停止MIP广告自动点击系统"
|
||||
echo "============================================================"
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
# 检查PID文件
|
||||
if [ -f "app.pid" ]; then
|
||||
PID=$(cat app.pid)
|
||||
echo "找到PID文件: $PID"
|
||||
|
||||
# 检查进程是否存在
|
||||
if ps -p $PID > /dev/null 2>&1; then
|
||||
echo -e "${YELLOW}正在停止服务 (PID: $PID)...${NC}"
|
||||
kill $PID
|
||||
|
||||
# 等待进程结束
|
||||
sleep 2
|
||||
|
||||
if ps -p $PID > /dev/null 2>&1; then
|
||||
echo -e "${YELLOW}进程未响应,强制终止...${NC}"
|
||||
kill -9 $PID
|
||||
fi
|
||||
|
||||
rm -f app.pid
|
||||
echo -e "${GREEN}✓ 服务已停止${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}进程不存在,清理PID文件${NC}"
|
||||
rm -f app.pid
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}未找到PID文件,尝试通过端口查找进程...${NC}"
|
||||
|
||||
# 通过端口查找进程
|
||||
PORT=5000
|
||||
PID=$(lsof -ti :$PORT 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "$PID" ]; then
|
||||
echo "找到占用端口 $PORT 的进程: $PID"
|
||||
echo -e "${YELLOW}正在停止...${NC}"
|
||||
kill $PID
|
||||
sleep 2
|
||||
echo -e "${GREEN}✓ 服务已停止${NC}"
|
||||
else
|
||||
echo -e "${GREEN}没有运行中的服务${NC}"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "服务已停止"
|
||||
echo "============================================================"
|
||||
399
task_executor.py
@@ -1,303 +1,250 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
任务执行器模块
|
||||
|
||||
提供广告点击任务的执行能力,包括:
|
||||
- 浏览器环境创建
|
||||
- 单个任务执行
|
||||
- 批量任务调度
|
||||
负责管理浏览器生命周期和执行点击任务
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Dict, Optional
|
||||
from loguru import logger
|
||||
|
||||
from config import Config
|
||||
from adspower_client import AdsPowerClient
|
||||
from ad_automation import MIPAdAutomation
|
||||
from config import Config
|
||||
from data_manager import DataManager
|
||||
import time
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
|
||||
class TaskExecutor:
|
||||
"""
|
||||
任务执行器
|
||||
|
||||
负责执行单个或批量广告点击任务。
|
||||
支持代理配置、浏览器环境管理、任务结果追踪。
|
||||
负责:
|
||||
1. 管理AdsPower浏览器环境
|
||||
2. 执行MIP广告点击任务
|
||||
3. 记录执行结果
|
||||
"""
|
||||
|
||||
_browser_start_lock = threading.Lock()
|
||||
|
||||
def __init__(self, max_workers: int = 1, use_proxy: bool = True):
|
||||
"""
|
||||
初始化任务执行器
|
||||
|
||||
Args:
|
||||
max_workers: 最大并发数(1=串行,>1=并发)
|
||||
max_workers: 最大并发数(当前仅支持1)
|
||||
use_proxy: 是否使用代理
|
||||
"""
|
||||
self.max_workers = max_workers
|
||||
self.use_proxy = use_proxy
|
||||
self.client = AdsPowerClient()
|
||||
self.dm = DataManager()
|
||||
self._browser_info = None
|
||||
self._proxy_id = None # 保存创建的代理ID,用于关闭时清理
|
||||
self._profile_id = None # 保存创建的Profile ID,用于关闭时清理
|
||||
|
||||
# 创建截图目录(按日期组织)
|
||||
timestamp = datetime.now().strftime('%Y%m%d')
|
||||
self.screenshot_dir = Path("./test") / f"batch_{timestamp}"
|
||||
self.screenshot_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"TaskExecutor 初始化: max_workers={max_workers}, use_proxy={use_proxy}")
|
||||
|
||||
logger.debug(f"TaskExecutor initialized: workers={max_workers}, proxy={use_proxy}")
|
||||
|
||||
def create_browser_profile(self, index: int) -> Optional[Dict]:
|
||||
def create_browser_profile(self, index: int = 1) -> Optional[Dict]:
|
||||
"""
|
||||
创建浏览器环境
|
||||
创建浏览器环境(启动AdsPower浏览器)
|
||||
|
||||
流程:
|
||||
1. 根据当前运行环境获取对应的分组ID(dev/prod)
|
||||
2. 获取代理并创建AdsPower代理
|
||||
3. 用代理ID创建新的profile
|
||||
4. 启动浏览器
|
||||
|
||||
Args:
|
||||
index: 环境编号
|
||||
index: 任务索引
|
||||
|
||||
Returns:
|
||||
环境信息字典,失败返回None
|
||||
包含 profile_id 的字典,失败返回 None
|
||||
"""
|
||||
try:
|
||||
# 获取分组ID
|
||||
logger.info(f"[Task {index}] 创建浏览器环境...")
|
||||
|
||||
# 1. 获取当前环境对应的分组ID (dev/prod)
|
||||
group_id = self.client.get_group_by_env()
|
||||
time.sleep(0.5)
|
||||
if not group_id:
|
||||
logger.error(f"[Task {index}] 获取分组ID失败,请确保AdsPower中存在dev或prod分组")
|
||||
return None
|
||||
|
||||
# 如果使用代理,获取代理配置
|
||||
proxy_config = {}
|
||||
logger.info(f"[Task {index}] 使用分组ID: {group_id}")
|
||||
|
||||
# 2. 获取大麦代理并创建AdsPower代理
|
||||
proxy_id = None
|
||||
proxy_info = None
|
||||
|
||||
if self.use_proxy:
|
||||
logger.info(f"[环境 {index}] 获取代理IP...")
|
||||
logger.info(f"[Task {index}] 获取大麦IP代理...")
|
||||
proxy_info = self.client.get_damai_proxy()
|
||||
time.sleep(0.5)
|
||||
|
||||
if proxy_info:
|
||||
logger.info(f"[环境 {index}] 代理IP: {proxy_info['host']}:{proxy_info['port']}")
|
||||
|
||||
proxy_data = {
|
||||
proxy_config = {
|
||||
"type": "http",
|
||||
"host": proxy_info["host"],
|
||||
"port": proxy_info["port"],
|
||||
"user": self.client.DAMAI_USER,
|
||||
"password": self.client.DAMAI_PASSWORD,
|
||||
"remark": f"任务代理_{index}"
|
||||
"ipchecker": "ip2location",
|
||||
"remark": "Damai Auto Proxy"
|
||||
}
|
||||
|
||||
proxy_id = self.client.create_proxy(proxy_data)
|
||||
time.sleep(0.5)
|
||||
|
||||
proxy_id = self.client.create_proxy(proxy_config)
|
||||
if proxy_id:
|
||||
logger.info(f"[环境 {index}] 创建代理: {proxy_id}")
|
||||
proxy_config = {"proxyid": proxy_id}
|
||||
|
||||
# 根据环境变量决定操作系统
|
||||
os_type = "Linux" if Config.ENV == "production" else "Windows"
|
||||
|
||||
profile_data = {
|
||||
"name": f"任务_{index}_{datetime.now().strftime('%H%M%S')}",
|
||||
"group_id": str(group_id) if group_id else "0",
|
||||
"platform": "health.baidu.com",
|
||||
"repeat_config": [],
|
||||
"ignore_cookie_error": "1",
|
||||
"country": "cn",
|
||||
"city": "beijing",
|
||||
"remark": f"任务环境 #{index}",
|
||||
"fingerprint_config": {
|
||||
"automatic_timezone": "1",
|
||||
"flash": "block",
|
||||
"scan_port_type": "1",
|
||||
"location": "ask",
|
||||
"location_switch": "1",
|
||||
"canvas": "0",
|
||||
"webgl": "0",
|
||||
"audio": "0",
|
||||
"webrtc": "local",
|
||||
"do_not_track": "true",
|
||||
"hardware_concurrency": "default",
|
||||
"device_memory": "default",
|
||||
"gpu": "2",
|
||||
"mac_address_config": {
|
||||
"model": "1",
|
||||
"address": ""
|
||||
},
|
||||
"browser_kernel_config": {
|
||||
"version": "latest",
|
||||
"type": "chrome"
|
||||
},
|
||||
"random_ua": {
|
||||
"ua_system_version": [os_type]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug(f"[环境 {index}] 操作系统: {os_type} (ENV={Config.ENV})")
|
||||
|
||||
if proxy_config:
|
||||
profile_data.update(proxy_config)
|
||||
|
||||
response = self.client._make_request(
|
||||
'POST',
|
||||
'/api/v2/browser-profile/create',
|
||||
json=profile_data
|
||||
)
|
||||
|
||||
if response and response.get('code') == 0:
|
||||
profile_id = response.get('data', {}).get('profile_id')
|
||||
logger.info(f"✅ 创建环境 #{index}: {profile_id}")
|
||||
return {
|
||||
'index': index,
|
||||
'profile_id': profile_id,
|
||||
'name': profile_data['name'],
|
||||
'proxy': proxy_info,
|
||||
'proxy_id': proxy_id
|
||||
}
|
||||
self._proxy_id = proxy_id
|
||||
logger.info(f"[Task {index}] 创建代理成功: {proxy_id}")
|
||||
else:
|
||||
logger.error(f"❌ 创建环境 #{index} 失败: {response}")
|
||||
logger.warning(f"[Task {index}] 创建代理失败,将不使用代理")
|
||||
else:
|
||||
logger.warning(f"[Task {index}] 获取大麦代理失败,将不使用代理")
|
||||
|
||||
# 3. 创建新的profile(必须带proxy_id)
|
||||
if not proxy_id:
|
||||
logger.error(f"[Task {index}] 没有代理ID,无法创建profile")
|
||||
return None
|
||||
|
||||
import time
|
||||
profile_name = f"task_{index}_{int(time.time())}"
|
||||
profile_id = self.client.create_profile(group_id=group_id, name=profile_name, proxy_id=proxy_id)
|
||||
|
||||
if not profile_id:
|
||||
logger.error(f"[Task {index}] 创建profile失败")
|
||||
# 删除已创建的代理
|
||||
if self._proxy_id:
|
||||
self.client.delete_proxy(self._proxy_id)
|
||||
self._proxy_id = None
|
||||
return None
|
||||
|
||||
self._profile_id = profile_id
|
||||
logger.info(f"[Task {index}] 创建profile: {profile_id} (名称: {profile_name})")
|
||||
|
||||
# 4. 启动浏览器
|
||||
browser_info = self.client.start_browser(user_id=profile_id)
|
||||
|
||||
if not browser_info or browser_info.get('code') != 0:
|
||||
error_msg = browser_info.get('msg', '未知错误') if browser_info else '无响应'
|
||||
logger.error(f"[Task {index}] 启动浏览器失败: {error_msg}")
|
||||
# 清理资源
|
||||
self.client.delete_profile(profile_id)
|
||||
self._profile_id = None
|
||||
if self._proxy_id:
|
||||
self.client.delete_proxy(self._proxy_id)
|
||||
self._proxy_id = None
|
||||
return None
|
||||
|
||||
self._browser_info = browser_info
|
||||
self.client.user_id = profile_id
|
||||
|
||||
logger.info(f"[Task {index}] 浏览器已启动, profile_id: {profile_id}, proxy_id: {self._proxy_id}")
|
||||
|
||||
return {
|
||||
'profile_id': profile_id,
|
||||
'browser_info': browser_info,
|
||||
'proxy_id': self._proxy_id
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 创建环境 #{index} 异常: {str(e)}")
|
||||
logger.error(f"[Task {index}] 创建浏览器环境异常: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
def execute_single_task(self, site_info: Dict, task_index: int, profile_id: str = None) -> Dict:
|
||||
def execute_single_task(self, site: Dict, index: int, profile_id: str) -> Dict:
|
||||
"""
|
||||
执行单个点击任务
|
||||
|
||||
Args:
|
||||
site_info: 站点信息
|
||||
task_index: 任务编号
|
||||
profile_id: 已创建的Profile ID(可选)
|
||||
site: 站点信息,包含 id, site_url 等
|
||||
index: 任务索引
|
||||
profile_id: 浏览器 Profile ID
|
||||
|
||||
Returns:
|
||||
执行结果字典
|
||||
执行结果字典 {'success': bool, 'error': str}
|
||||
"""
|
||||
# 设置线程名称
|
||||
threading.current_thread().name = f"Task-{task_index}"
|
||||
site_id = site.get('id')
|
||||
site_url = site.get('site_url')
|
||||
|
||||
site_id = site_info.get('id')
|
||||
site_url = site_info.get('site_url', site_info.get('url'))
|
||||
|
||||
result = {
|
||||
'task_index': task_index,
|
||||
'site_id': site_id,
|
||||
'site_url': site_url,
|
||||
'success': False,
|
||||
'click_count': 0,
|
||||
'has_ad': False,
|
||||
'has_reply': False,
|
||||
'error': None
|
||||
}
|
||||
|
||||
# 创建任务目录
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
task_folder = self.screenshot_dir / f"task_{task_index}_{timestamp}"
|
||||
task_folder.mkdir(exist_ok=True)
|
||||
|
||||
# 每个线程创建自己的客户端实例
|
||||
client = AdsPowerClient()
|
||||
logger.info(f"[Task {index}] 开始执行: site_id={site_id}, url={site_url}")
|
||||
|
||||
try:
|
||||
logger.info(f"[任务 {task_index}] 开始执行: {site_url}")
|
||||
|
||||
# 如果没有传入profile_id,则创建新的
|
||||
if not profile_id:
|
||||
profiles_data = client.list_profiles()
|
||||
if not profiles_data:
|
||||
result['error'] = "获取Profile列表失败"
|
||||
return result
|
||||
|
||||
profiles = profiles_data.get('data', {}).get('list', [])
|
||||
if not profiles:
|
||||
result['error'] = "没有可用的Profile"
|
||||
return result
|
||||
|
||||
profile_id = profiles[0].get('profile_id')
|
||||
logger.info(f"[任务 {task_index}] 使用Profile: {profile_id}")
|
||||
|
||||
# 使用锁控制浏览器启动
|
||||
with self._browser_start_lock:
|
||||
logger.debug(f"[任务 {task_index}] 启动浏览器...")
|
||||
browser_info = client.start_browser(user_id=profile_id)
|
||||
if not browser_info:
|
||||
result['error'] = "启动浏览器失败"
|
||||
return result
|
||||
time.sleep(1.5)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
# 连接浏览器
|
||||
browser = client.connect_browser(browser_info)
|
||||
if not self._browser_info:
|
||||
return {'success': False, 'error': '浏览器未启动'}
|
||||
|
||||
browser = self.client.connect_browser(self._browser_info)
|
||||
if not browser:
|
||||
result['error'] = "CDP连接失败"
|
||||
return result
|
||||
return {'success': False, 'error': '连接浏览器失败'}
|
||||
|
||||
# 获取页面
|
||||
context = browser.contexts[0]
|
||||
all_pages = context.pages
|
||||
logger.debug(f"[任务 {task_index}] 当前标签页数: {len(all_pages)}")
|
||||
page = self.client.get_page(browser)
|
||||
if not page:
|
||||
return {'success': False, 'error': '获取页面失败'}
|
||||
|
||||
# 关闭AdsPower启动页
|
||||
for p in all_pages:
|
||||
try:
|
||||
if 'start.adspower.net' in p.url:
|
||||
p.close()
|
||||
except:
|
||||
pass
|
||||
# 清理多余标签页
|
||||
self._cleanup_tabs(browser)
|
||||
|
||||
# 获取或创建页面
|
||||
remaining_pages = context.pages
|
||||
page = remaining_pages[0] if remaining_pages else context.new_page()
|
||||
|
||||
# 执行广告点击和消息发送流程
|
||||
logger.info(f"[任务 {task_index}] 开始执行广告点击和咨询流程...")
|
||||
automation = MIPAdAutomation(page, task_index=task_index)
|
||||
click_success, has_reply = automation.check_and_click_ad(
|
||||
url=site_url,
|
||||
site_id=site_id
|
||||
)
|
||||
# 创建自动化实例并执行
|
||||
automation = MIPAdAutomation(page, task_index=index)
|
||||
click_success, has_reply = automation.check_and_click_ad(site_url, site_id=site_id)
|
||||
|
||||
if click_success:
|
||||
result['success'] = True
|
||||
result['click_count'] = 1
|
||||
result['has_ad'] = True
|
||||
result['has_reply'] = has_reply
|
||||
logger.info(f"[任务 {task_index}] ✅ 任务完成: 点击成功={click_success}, 收到回复={has_reply}")
|
||||
logger.info(f"[Task {index}] 点击成功, 收到回复: {has_reply}")
|
||||
return {'success': True, 'has_reply': has_reply}
|
||||
else:
|
||||
result['error'] = "广告点击失败"
|
||||
logger.warning(f"[任务 {task_index}] ❌ 广告点击失败")
|
||||
|
||||
# 关闭浏览器
|
||||
try:
|
||||
if browser:
|
||||
browser.close()
|
||||
time.sleep(0.5)
|
||||
except:
|
||||
pass
|
||||
|
||||
# 停止浏览器
|
||||
try:
|
||||
client.stop_browser(user_id=profile_id)
|
||||
logger.debug(f"[任务 {task_index}] 浏览器已关闭")
|
||||
time.sleep(1)
|
||||
except Exception as e:
|
||||
logger.warning(f"[任务 {task_index}] 停止浏览器失败: {str(e)}")
|
||||
|
||||
# 删除浏览器Profile(释放资源)
|
||||
try:
|
||||
logger.debug(f"[任务 {task_index}] 删除浏览器Profile: {profile_id}")
|
||||
client.delete_profile(profile_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"[任务 {task_index}] 删除Profile异常: {str(e)}")
|
||||
logger.warning(f"[Task {index}] 点击失败")
|
||||
return {'success': False, 'error': '点击广告失败'}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[任务 {task_index}] 执行异常: {str(e)}")
|
||||
result['error'] = str(e)
|
||||
logger.error(f"[Task {index}] 执行异常: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
return result
|
||||
finally:
|
||||
# 关闭浏览器
|
||||
self.close_browser(profile_id)
|
||||
|
||||
def close_browser(self, profile_id: str = None):
|
||||
"""
|
||||
关闭浏览器并清理资源(代理和Profile)
|
||||
|
||||
Args:
|
||||
profile_id: Profile ID(可选)
|
||||
"""
|
||||
target_profile_id = profile_id or self._profile_id
|
||||
|
||||
try:
|
||||
# 1. 关闭浏览器
|
||||
logger.info(f"关闭浏览器: {target_profile_id or self.client.user_id}")
|
||||
self.client.stop_browser(user_id=target_profile_id)
|
||||
self._browser_info = None
|
||||
|
||||
# 2. 删除创建的代理
|
||||
if self._proxy_id:
|
||||
logger.info(f"删除代理: {self._proxy_id}")
|
||||
self.client.delete_proxy(self._proxy_id)
|
||||
self._proxy_id = None
|
||||
|
||||
# 3. 删除创建的Profile
|
||||
if self._profile_id:
|
||||
logger.info(f"删除Profile: {self._profile_id}")
|
||||
self.client.delete_profile(self._profile_id)
|
||||
self._profile_id = None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"关闭浏览器异常: {str(e)}")
|
||||
|
||||
def _cleanup_tabs(self, browser):
|
||||
"""
|
||||
清理多余的标签页,只保留一个
|
||||
|
||||
Args:
|
||||
browser: Playwright Browser 实例
|
||||
"""
|
||||
try:
|
||||
if browser.contexts:
|
||||
context = browser.contexts[0]
|
||||
pages = context.pages
|
||||
|
||||
# 如果有多个标签页,关闭多余的
|
||||
if len(pages) > 1:
|
||||
logger.info(f"清理多余标签页: {len(pages)} -> 1")
|
||||
for page in pages[1:]:
|
||||
try:
|
||||
page.close()
|
||||
except:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"清理标签页异常: {str(e)}")
|
||||
|
||||
@@ -1,777 +0,0 @@
|
||||
"""
|
||||
AdsPower + Playwright CDP 集成测试
|
||||
演示如何通过 CDP 连接到 AdsPower 指纹浏览器
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
from adspower_client import AdsPowerClient
|
||||
from config import Config
|
||||
from db_manager import SiteManager, ClickManager, InteractionManager
|
||||
import sys
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# 配置日志
|
||||
logger.remove()
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>",
|
||||
level="DEBUG" # 改为DEBUG级别
|
||||
)
|
||||
|
||||
|
||||
def create_test_folder(test_url: str):
|
||||
"""为每次测试创建独立的文件夹"""
|
||||
# 创建 test 目录
|
||||
test_base_dir = Path("./test")
|
||||
test_base_dir.mkdir(exist_ok=True)
|
||||
|
||||
# 生成文件夹名:时间_域名
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# 提取域名作为文件夹名称的一部分
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(test_url)
|
||||
domain = parsed.netloc.replace('.', '_').replace(':', '_')
|
||||
|
||||
# 创建测试文件夹
|
||||
test_folder = test_base_dir / f"{timestamp}_{domain}"
|
||||
test_folder.mkdir(exist_ok=True)
|
||||
|
||||
# 在文件夹中创建 info.txt 记录测试信息
|
||||
info_file = test_folder / "info.txt"
|
||||
with open(info_file, 'w', encoding='utf-8') as f:
|
||||
f.write(f"测试时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
f.write(f"测试URL: {test_url}\n")
|
||||
f.write(f"测试环境: {Config.ENV}\n")
|
||||
|
||||
return test_folder
|
||||
|
||||
|
||||
def test_adspower_connection(use_proxy: bool = False, proxy_info: dict = None, use_api_v1: bool = False):
|
||||
"""测试 AdsPower + Playwright CDP 连接
|
||||
|
||||
Args:
|
||||
use_proxy: 是否使用大麦IP代理
|
||||
proxy_info: 已获取的代理信息
|
||||
use_api_v1: 是否使用API v1方式更新代理
|
||||
"""
|
||||
|
||||
# ==================== 配置区 ====================
|
||||
# 访问的网页地址,在这里修改
|
||||
TEST_URL = "https://health.baidu.com/m/detail/ar_2366617956693492811" # IP检测网站,可查看代理是否生效
|
||||
# 其他可选项:
|
||||
# TEST_URL = "https://www.baidu.com" # 百度
|
||||
# TEST_URL = "https://www.google.com" # Google
|
||||
# TEST_URL = "https://你的MIP页面地址" # 你的目标网页
|
||||
# =====================================================
|
||||
|
||||
client = AdsPowerClient()
|
||||
site_id = None
|
||||
click_id = None
|
||||
sent_message = None
|
||||
|
||||
# ============ 新增:创建测试文件夹 ============
|
||||
test_folder = create_test_folder(TEST_URL)
|
||||
logger.info(f"测试文件夹: {test_folder}")
|
||||
|
||||
# 配置日志输出到文件
|
||||
log_file = test_folder / "test.log"
|
||||
logger.add(
|
||||
str(log_file),
|
||||
format="{time:HH:mm:ss} | {level: <8} | {message}",
|
||||
level="DEBUG"
|
||||
)
|
||||
logger.info("=" * 60)
|
||||
logger.info("开始测试")
|
||||
logger.info("=" * 60)
|
||||
# ============================================
|
||||
|
||||
try:
|
||||
# ============ 新增:初始化数据库站点 ============
|
||||
logger.info("=" * 60)
|
||||
logger.info("初始化: 创建或获取测试站点")
|
||||
logger.info("=" * 60)
|
||||
|
||||
site_mgr = SiteManager()
|
||||
site = site_mgr.get_site_by_url(TEST_URL)
|
||||
|
||||
if not site:
|
||||
site_id = site_mgr.add_site(
|
||||
site_url=TEST_URL,
|
||||
site_name="测试站点-Playwright",
|
||||
site_dimension="医疗健康"
|
||||
)
|
||||
logger.info(f"✅ 创建测试站点: site_id={site_id}")
|
||||
else:
|
||||
site_id = site['id']
|
||||
logger.info(f"✅ 使用已存在站点: site_id={site_id}")
|
||||
|
||||
logger.info("")
|
||||
# ============================================
|
||||
# 0. 先根据环境查询分组,然后查询 Profile 列表
|
||||
logger.info("=" * 60)
|
||||
logger.info("步骤 0: 根据环境查询 Profile 列表")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 获取当前环境对应的分组ID
|
||||
group_id = client.get_group_by_env()
|
||||
if group_id:
|
||||
logger.info(f"当前环境: {Config.ENV}, 分组ID: {group_id}")
|
||||
else:
|
||||
logger.warning(f"未找到环境 {Config.ENV} 对应的分组,将查询所有Profile")
|
||||
|
||||
# 查询Profile列表(自动使用环境对应的分组)
|
||||
result = client.list_profiles(group_id=group_id)
|
||||
if not result:
|
||||
logger.error("查询 Profile 失败")
|
||||
return False
|
||||
|
||||
profiles = result.get('data', {}).get('list', [])
|
||||
if not profiles:
|
||||
logger.error(f"在分组 {group_id} 中没有可用的 Profile")
|
||||
logger.error("请在 AdsPower 中创建 Profile 并分配到对应分组")
|
||||
logger.error(f"提示: {Config.ENV} 环境需要创建名为 '{'dev' if Config.ENV == 'development' else 'prod'}' 的分组")
|
||||
return False
|
||||
|
||||
# 使用第一个 Profile
|
||||
first_profile = profiles[0]
|
||||
profile_id = first_profile.get('profile_id')
|
||||
profile_name = first_profile.get('name', 'N/A')
|
||||
|
||||
logger.info(f"将使用 Profile: {profile_name} (ID: {profile_id})")
|
||||
logger.info("")
|
||||
|
||||
# 1. 启动 AdsPower 浏览器(可选使用代理)
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"步骤 1: {'[使用代理] ' if use_proxy else ''}启动 AdsPower 浏览器")
|
||||
if use_proxy:
|
||||
logger.info(f"代理更新方式: {'API v1 (直接传入proxy_config)' if use_api_v1 else 'API v2 (使用proxy_id引用)'}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
if use_proxy and proxy_info:
|
||||
if use_api_v1:
|
||||
# 使用 API v1 方式:直接传入 proxy_config
|
||||
logger.info("使用 API v1 方式更新代理...")
|
||||
proxy_config_v1 = {
|
||||
"proxy_type": "http",
|
||||
"proxy_host": proxy_info["host"],
|
||||
"proxy_port": proxy_info["port"],
|
||||
"proxy_user": client.DAMAI_USER,
|
||||
"proxy_password": client.DAMAI_PASSWORD,
|
||||
"proxy_soft": "other"
|
||||
}
|
||||
|
||||
# 直接更新 Profile
|
||||
success = client.update_profile_proxy_v1(profile_id, proxy_config_v1)
|
||||
if not success:
|
||||
logger.warning("更新代理失败 (API v1),将不使用代理启动")
|
||||
else:
|
||||
# 使用 API v2 方式:先创建代理,再引用
|
||||
logger.info("使用 API v2 方式更新代理...")
|
||||
proxy_config = {
|
||||
"type": "http",
|
||||
"host": proxy_info["host"],
|
||||
"port": proxy_info["port"],
|
||||
"user": client.DAMAI_USER,
|
||||
"password": client.DAMAI_PASSWORD,
|
||||
"ipchecker": "ip2location",
|
||||
"remark": "Damai Auto Proxy"
|
||||
}
|
||||
|
||||
# 创建代理
|
||||
proxy_id = client.create_proxy(proxy_config)
|
||||
if proxy_id:
|
||||
# 更新 Profile
|
||||
client.update_profile_proxy(profile_id, proxy_id)
|
||||
else:
|
||||
logger.warning("创建代理失败,将不使用代理启动")
|
||||
|
||||
browser_info = client.start_browser(user_id=profile_id)
|
||||
else:
|
||||
browser_info = client.start_browser(user_id=profile_id)
|
||||
if not browser_info:
|
||||
logger.error("启动 AdsPower 浏览器失败")
|
||||
return False
|
||||
|
||||
logger.info(f"浏览器信息: {browser_info}")
|
||||
|
||||
# 2. 通过 CDP 连接到浏览器
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("步骤 2: 通过 CDP 连接到浏览器")
|
||||
logger.info("=" * 60)
|
||||
|
||||
browser = client.connect_browser(browser_info)
|
||||
if not browser:
|
||||
logger.error("CDP 连接失败")
|
||||
return False
|
||||
|
||||
logger.info(f"浏览器版本: {browser.version}")
|
||||
logger.info(f"上下文数量: {len(browser.contexts)}")
|
||||
|
||||
# 3. 获取页面
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("步骤 3: 获取浏览器页面")
|
||||
logger.info("=" * 60)
|
||||
|
||||
page = client.get_page(browser)
|
||||
if not page:
|
||||
logger.error("获取页面失败")
|
||||
return False
|
||||
|
||||
logger.info(f"页面 URL: {page.url}")
|
||||
|
||||
# 3.5. 关闭其他标签页,只保留AdsPower启动页
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("步骤 3.5: 清理多余标签页")
|
||||
logger.info("=" * 60)
|
||||
|
||||
context = browser.contexts[0]
|
||||
all_pages = context.pages
|
||||
logger.info(f"当前打开的标签页数: {len(all_pages)}")
|
||||
|
||||
# 遍历所有页面,关闭非 AdsPower 启动页
|
||||
closed_count = 0
|
||||
for p in all_pages:
|
||||
try:
|
||||
page_url = p.url
|
||||
# 保留 AdsPower 启动页
|
||||
if 'start.adspower.net' in page_url:
|
||||
logger.info(f"保留启动页: {page_url}")
|
||||
else:
|
||||
logger.info(f"关闭标签页: {page_url}")
|
||||
p.close()
|
||||
closed_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"关闭页面失败: {str(e)}")
|
||||
|
||||
logger.info(f"已关闭 {closed_count} 个标签页")
|
||||
|
||||
# 重新获取当前页面列表
|
||||
remaining_pages = context.pages
|
||||
logger.info(f"剩余标签页数: {len(remaining_pages)}")
|
||||
|
||||
# 如果所有页面都被关闭了,创建一个新页面
|
||||
if len(remaining_pages) == 0:
|
||||
logger.info("所有页面已关闭,创建新标签页")
|
||||
page = context.new_page()
|
||||
else:
|
||||
# 使用第一个剩余页面
|
||||
page = remaining_pages[0]
|
||||
logger.info(f"使用剩余页面: {page.url}")
|
||||
|
||||
# 4. 测试页面操作
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("步骤 4: 测试页面操作")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 访问配置的网页
|
||||
logger.info(f"访问测试页面: {TEST_URL}")
|
||||
page.goto(TEST_URL, wait_until='domcontentloaded', timeout=60000)
|
||||
|
||||
# 等待页面完全加载
|
||||
logger.info("等待页面完全加载...")
|
||||
import time
|
||||
time.sleep(3)
|
||||
try:
|
||||
page.wait_for_load_state('networkidle', timeout=10000)
|
||||
except Exception:
|
||||
logger.warning("网络空闲超时,继续执行")
|
||||
time.sleep(2)
|
||||
|
||||
# 获取页面标题
|
||||
title = page.title()
|
||||
logger.info(f"页面标题: {title}")
|
||||
|
||||
# 获取页面 URL
|
||||
current_url = page.url
|
||||
logger.info(f"当前 URL: {current_url}")
|
||||
|
||||
# 截图测试(点击前)
|
||||
screenshot_path = test_folder / "01_before_click.png"
|
||||
page.screenshot(path=str(screenshot_path))
|
||||
logger.info(f"截图已保存: {screenshot_path}")
|
||||
|
||||
# 查找并点击广告
|
||||
logger.info("")
|
||||
logger.info("-" * 60)
|
||||
logger.info("开始查找广告元素...")
|
||||
|
||||
try:
|
||||
# 查找所有广告元素
|
||||
ad_selector = 'span.ec-tuiguang.ecfc-tuiguang.xz81bbe'
|
||||
ad_elements = page.locator(ad_selector)
|
||||
ad_count = ad_elements.count()
|
||||
|
||||
logger.info(f"找到 {ad_count} 个广告元素")
|
||||
|
||||
if ad_count > 0:
|
||||
# 点击第一个广告
|
||||
logger.info("准备点击第一个广告...")
|
||||
|
||||
# 滚动到元素可见
|
||||
first_ad = ad_elements.first
|
||||
first_ad.scroll_into_view_if_needed()
|
||||
time.sleep(1)
|
||||
|
||||
# 记录点击前的URL
|
||||
old_url = page.url
|
||||
logger.info(f"点击前URL: {old_url}")
|
||||
|
||||
# 点击广告(页面内跳转)
|
||||
first_ad.click()
|
||||
logger.info("✅ 已点击第一个广告")
|
||||
|
||||
# ============ 记录点击到数据库 ============
|
||||
click_mgr = ClickManager()
|
||||
click_id = click_mgr.record_click(
|
||||
site_id=site_id,
|
||||
site_url=TEST_URL,
|
||||
user_ip=None,
|
||||
device_type='pc'
|
||||
)
|
||||
logger.info(f"✅ 已记录点击: click_id={click_id}")
|
||||
# ============================================
|
||||
|
||||
# 等待页面跳转
|
||||
time.sleep(3)
|
||||
page.wait_for_load_state('domcontentloaded')
|
||||
|
||||
# 获取跳转后的URL
|
||||
new_url = page.url
|
||||
new_title = page.title()
|
||||
logger.info(f"跳转后URL: {new_url}")
|
||||
logger.info(f"跳转后标题: {new_title}")
|
||||
|
||||
# 截图(跳转后)
|
||||
screenshot_path_after = test_folder / "02_after_click.png"
|
||||
page.screenshot(path=str(screenshot_path_after))
|
||||
logger.info(f"跳转后截图已保存: {screenshot_path_after}")
|
||||
|
||||
# ============ 新增:发送咨询消息 ============
|
||||
logger.info("")
|
||||
logger.info("-" * 60)
|
||||
logger.info("开始发送咨询消息...")
|
||||
|
||||
# 预设消息列表
|
||||
consultation_messages = [
|
||||
"我想要预约一个医生,有什么推荐吗?",
|
||||
"我现在本人不在当地,医生什么时候有空,是随时能去吗?有没有推荐的医生。",
|
||||
"咱们医院是周六日是否上班,随时去吗?",
|
||||
"想找医生看看,有没有推荐的医生",
|
||||
"最近很不舒服,也说不出来全部的症状,能不能直接对话医生?"
|
||||
]
|
||||
|
||||
# 随机选择一条消息
|
||||
import random
|
||||
message = random.choice(consultation_messages)
|
||||
logger.info(f"选择的消息: {message}")
|
||||
|
||||
# 等待输入框加载
|
||||
time.sleep(2)
|
||||
|
||||
# 滚动到页面底部,确保输入框可见
|
||||
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||
time.sleep(1)
|
||||
|
||||
# 输出页面HTML用于调试
|
||||
logger.info("正在分析页面结构...")
|
||||
html_content = page.content()
|
||||
html_file = test_folder / "page_html.txt"
|
||||
with open(html_file, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
logger.info(f"页面HTML已保存: {html_file}")
|
||||
|
||||
# 尝试查找输入框(通用策略)
|
||||
input_selectors = [
|
||||
# contenteditable 类型
|
||||
"textarea[contenteditable='true']",
|
||||
"div[contenteditable='true']",
|
||||
"*[contenteditable='true']",
|
||||
# 直接查找textarea
|
||||
"textarea",
|
||||
# 常见的 textarea
|
||||
"textarea[placeholder]",
|
||||
"textarea.input",
|
||||
"textarea[class*='input']",
|
||||
"textarea[class*='text']",
|
||||
"textarea[class*='box']",
|
||||
"textarea[class*='chat']",
|
||||
"textarea[class*='message']",
|
||||
# 常见的 input
|
||||
"input[type='text'][placeholder]",
|
||||
"input[class*='input']",
|
||||
"input[class*='text']",
|
||||
"input[class*='chat']",
|
||||
"input[class*='message']",
|
||||
# 全局备选
|
||||
"input[type='text']"
|
||||
]
|
||||
|
||||
input_found = False
|
||||
for selector in input_selectors:
|
||||
try:
|
||||
logger.debug(f"尝试选择器: {selector}")
|
||||
count = page.locator(selector).count()
|
||||
logger.debug(f" 找到 {count} 个匹配元素")
|
||||
|
||||
if count > 0:
|
||||
# 遍历所有匹配的元素,找第一个可见的
|
||||
for i in range(count):
|
||||
try:
|
||||
input_elem = page.locator(selector).nth(i)
|
||||
is_visible = input_elem.is_visible(timeout=1000)
|
||||
logger.debug(f" 元素 {i}: 可见={is_visible}")
|
||||
|
||||
if is_visible:
|
||||
logger.info(f"✅ 找到可见输入框: {selector} (第{i}个)")
|
||||
|
||||
# 滚动到输入框可见区域
|
||||
input_elem.scroll_into_view_if_needed()
|
||||
time.sleep(0.5)
|
||||
|
||||
# 点击输入框获取焦点
|
||||
input_elem.click()
|
||||
time.sleep(0.5)
|
||||
|
||||
# 输入消息
|
||||
input_elem.fill(message)
|
||||
logger.info("✅ 已输入消息")
|
||||
time.sleep(1)
|
||||
|
||||
# 保存已发送的消息
|
||||
sent_message = message
|
||||
|
||||
input_found = True
|
||||
break
|
||||
except Exception as e2:
|
||||
logger.debug(f" 元素 {i} 失败: {str(e2)}")
|
||||
continue
|
||||
|
||||
if input_found:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f" 失败: {str(e)}")
|
||||
continue
|
||||
|
||||
if not input_found:
|
||||
logger.warning("⚠️ 未找到输入框")
|
||||
debug_screenshot = test_folder / "debug_no_input.png"
|
||||
page.screenshot(path=str(debug_screenshot))
|
||||
logger.info(f"已保存调试截图: {debug_screenshot}")
|
||||
|
||||
# 兔底方案:点击页面底部上方位置,然后输入
|
||||
logger.info("尝试兔底方案:点击页面底部区域...")
|
||||
try:
|
||||
# 获取页面高度
|
||||
viewport_height = page.viewport_size['height']
|
||||
# 点击底部上方10px的位置(水平居中)
|
||||
click_x = page.viewport_size['width'] // 2
|
||||
click_y = viewport_height - 10
|
||||
|
||||
logger.debug(f"点击位置: ({click_x}, {click_y})")
|
||||
page.mouse.click(click_x, click_y)
|
||||
time.sleep(1)
|
||||
|
||||
# 直接输入文本
|
||||
page.keyboard.type(message, delay=50)
|
||||
logger.info("✅ 已输入消息(兔底方案)")
|
||||
time.sleep(1)
|
||||
|
||||
sent_message = message
|
||||
input_found = True
|
||||
except Exception as e:
|
||||
logger.error(f"兔底方案失败: {str(e)}")
|
||||
else:
|
||||
# 尝试发送消息
|
||||
message_sent = False
|
||||
|
||||
# 方法1: 先尝试按 Enter 键
|
||||
logger.info("尝试按 Enter 键发送...")
|
||||
try:
|
||||
page.keyboard.press('Enter')
|
||||
logger.info("✅ 已按 Enter 键")
|
||||
time.sleep(2)
|
||||
message_sent = True
|
||||
except Exception as e:
|
||||
logger.warning(f"按 Enter 键失败: {str(e)}")
|
||||
|
||||
# 方法2: 如果 Enter 键失败,查找并点击发送按钮
|
||||
if not message_sent:
|
||||
send_button_selectors = [
|
||||
# 按文本查找
|
||||
"button:has-text('发送')",
|
||||
"a:has-text('发送')",
|
||||
"span:has-text('发送')",
|
||||
"div:has-text('发送')",
|
||||
# 按类名查找
|
||||
"button[class*='send']",
|
||||
"button[class*='submit']",
|
||||
"a[class*='send']",
|
||||
"div[class*='send']",
|
||||
"span[class*='send']",
|
||||
# 按类型查找
|
||||
"button[type='submit']",
|
||||
# 全局备选
|
||||
"button"
|
||||
]
|
||||
|
||||
for selector in send_button_selectors:
|
||||
try:
|
||||
send_btn = page.locator(selector).first
|
||||
if send_btn.is_visible() and send_btn.is_enabled():
|
||||
send_btn.click()
|
||||
logger.info(f"✅ 已点击发送按钮: {selector}")
|
||||
message_sent = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if message_sent:
|
||||
logger.info("✅✅✅ 消息发送成功!")
|
||||
time.sleep(2)
|
||||
|
||||
# ============ 记录互动到数据库 ============
|
||||
interaction_mgr = InteractionManager()
|
||||
interaction_id = interaction_mgr.record_interaction(
|
||||
site_id=site_id,
|
||||
click_id=click_id,
|
||||
interaction_type='message', # 修复:使用 'message' 而非 'consultation'
|
||||
reply_content=sent_message,
|
||||
is_successful=True,
|
||||
response_received=False, # 后续可以添加检测逻辑
|
||||
response_content=None
|
||||
)
|
||||
logger.info(f"✅ 已记录互动: interaction_id={interaction_id}")
|
||||
# ============================================
|
||||
|
||||
# 截图(发送后)
|
||||
screenshot_path_sent = test_folder / "03_after_send.png"
|
||||
page.screenshot(path=str(screenshot_path_sent))
|
||||
logger.info(f"发送后截图已保存: {screenshot_path_sent}")
|
||||
else:
|
||||
logger.warning("⚠️ 未能发送消息")
|
||||
|
||||
# ============================================
|
||||
|
||||
else:
|
||||
logger.warning("⚠ 未找到广告元素")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查找/点击广告或发送消息失败: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# 保存错误时的截图
|
||||
try:
|
||||
error_screenshot = test_folder / "error.png"
|
||||
page.screenshot(path=str(error_screenshot))
|
||||
logger.info(f"错误截图已保存: {error_screenshot}")
|
||||
except:
|
||||
pass
|
||||
|
||||
# 5. 清理资源
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("步骤 5: 测试完成 - 查询数据库记录")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# ============ 查询数据库记录 ============
|
||||
if site_id:
|
||||
logger.info(f"\n站点ID: {site_id}")
|
||||
logger.info("-" * 60)
|
||||
|
||||
# 查询点击记录
|
||||
click_mgr = ClickManager()
|
||||
clicks = click_mgr.get_clicks_by_site(site_id, limit=5)
|
||||
click_count = click_mgr.get_click_count_by_site(site_id)
|
||||
logger.info(f"总点击次数: {click_count}")
|
||||
if clicks:
|
||||
last_click = clicks[0]
|
||||
logger.info(f"最新点击时间: {last_click['click_time']}")
|
||||
|
||||
# 查询互动记录
|
||||
interaction_mgr = InteractionManager()
|
||||
interactions = interaction_mgr.get_interactions_by_site(site_id, limit=5)
|
||||
success_count = interaction_mgr.get_successful_interactions_count(site_id)
|
||||
logger.info(f"成功互动次数: {success_count}")
|
||||
if interactions:
|
||||
last_interaction = interactions[0]
|
||||
logger.info(f"最新互动内容: {last_interaction['reply_content']}")
|
||||
logger.info(f"是否收到回复: {'是' if last_interaction['response_received'] else '否'}")
|
||||
# ============================================
|
||||
|
||||
logger.info("")
|
||||
# 关闭浏览器前,截图聊天页面最终状态
|
||||
try:
|
||||
logger.info("截图聊天页面...")
|
||||
# 等待可能的回复消息加载
|
||||
time.sleep(2)
|
||||
# 滚动到页面顶部,确保看到完整对话
|
||||
page.evaluate("window.scrollTo(0, 0)")
|
||||
time.sleep(0.5)
|
||||
# 截图整个页面
|
||||
screenshot_path_final = test_folder / "04_final_chat.png"
|
||||
page.screenshot(path=str(screenshot_path_final), full_page=True)
|
||||
logger.info(f"✅ 聊天页面截图已保存: {screenshot_path_final}")
|
||||
except Exception as screenshot_err:
|
||||
logger.warning(f"截图失败: {str(screenshot_err)}")
|
||||
|
||||
logger.info("")
|
||||
# 优雅关闭 Playwright 连接,避免 CancelledError
|
||||
try:
|
||||
if browser:
|
||||
logger.debug("关闭 Playwright 浏览器连接...")
|
||||
browser.close()
|
||||
time.sleep(0.5)
|
||||
except Exception as close_err:
|
||||
logger.debug(f"关闭浏览器连接异常: {str(close_err)}")
|
||||
|
||||
# 根据配置决定是否关闭浏览器
|
||||
if Config.AUTO_CLOSE_BROWSER:
|
||||
logger.info("正在关闭浏览器...")
|
||||
try:
|
||||
client.stop_browser(user_id=profile_id)
|
||||
logger.info("✅ 浏览器已关闭")
|
||||
except Exception as e:
|
||||
logger.warning(f"关闭浏览器失败: {str(e)}")
|
||||
else:
|
||||
logger.info("浏览器保持运行状态,可继续手动操作")
|
||||
logger.info("如需停止浏览器,请在AdsPower中手动关闭")
|
||||
|
||||
logger.info("")
|
||||
logger.info("="*60)
|
||||
logger.info("测试完成!")
|
||||
if Config.AUTO_CLOSE_BROWSER:
|
||||
logger.info("浏览器已关闭")
|
||||
else:
|
||||
logger.info("浏览器未关闭")
|
||||
logger.info("="*60)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"测试异常: {str(e)}")
|
||||
return False
|
||||
|
||||
finally:
|
||||
# 注意:不自动清理资源,保持浏览器运行
|
||||
pass
|
||||
|
||||
|
||||
def test_multiple_pages():
|
||||
"""测试多页面操作"""
|
||||
|
||||
client = AdsPowerClient()
|
||||
profile_id = None
|
||||
|
||||
try:
|
||||
logger.info("=" * 60)
|
||||
logger.info("测试多页面操作")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 根据环境查询 Profile
|
||||
group_id = client.get_group_by_env()
|
||||
result = client.list_profiles(group_id=group_id)
|
||||
if not result:
|
||||
return False
|
||||
|
||||
profiles = result.get('data', {}).get('list', [])
|
||||
if not profiles:
|
||||
logger.error("没有可用的 Profile")
|
||||
return False
|
||||
|
||||
profile_id = profiles[0].get('profile_id')
|
||||
|
||||
# 启动并连接
|
||||
browser_info = client.start_browser(user_id=profile_id)
|
||||
if not browser_info:
|
||||
return False
|
||||
|
||||
browser = client.connect_browser(browser_info)
|
||||
if not browser:
|
||||
return False
|
||||
|
||||
# 获取第一个页面
|
||||
page1 = client.get_page(browser)
|
||||
logger.info("访问百度...")
|
||||
page1.goto("https://www.baidu.com")
|
||||
logger.info(f"页面1标题: {page1.title()}")
|
||||
|
||||
# 创建新页面
|
||||
context = browser.contexts[0]
|
||||
page2 = context.new_page()
|
||||
logger.info("访问必应...")
|
||||
page2.goto("https://www.bing.com")
|
||||
logger.info(f"页面2标题: {page2.title()}")
|
||||
|
||||
logger.info(f"当前打开的页面数: {len(context.pages)}")
|
||||
|
||||
# 关闭页面
|
||||
page2.close()
|
||||
logger.info("已关闭页面2")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"测试异常: {str(e)}")
|
||||
return False
|
||||
|
||||
finally:
|
||||
try:
|
||||
if profile_id:
|
||||
client.stop_browser(user_id=profile_id)
|
||||
else:
|
||||
client.stop_browser()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("开始测试 AdsPower + Playwright CDP 集成")
|
||||
logger.info("")
|
||||
|
||||
logger.info(f"当前环境: {Config.ENV}")
|
||||
logger.info(f"AdsPower API: {Config.ADSPOWER_API_URL}")
|
||||
logger.info("")
|
||||
|
||||
# 创建客户端
|
||||
client = AdsPowerClient()
|
||||
|
||||
# ==================== 配置区 ====================
|
||||
# 默认使用代理,如不需要改为 False
|
||||
use_proxy = True
|
||||
# 默认使用 API v2 Direct 方式(0=v2 proxy_id, 1=v1, 2=v2 direct)
|
||||
use_api_v1 = True # True=API v1, False=API v2
|
||||
# =====================================================
|
||||
|
||||
proxy_info = None
|
||||
|
||||
# 如果使用代理,提前获取
|
||||
if use_proxy:
|
||||
logger.info("")
|
||||
logger.info(f"使用代理模式: {'API v1 (直接传入proxy_config)' if use_api_v1 else 'API v2 (使用proxy_id引用)'}")
|
||||
logger.info("步骤 0: 提前获取大麦IP代理")
|
||||
proxy_info = client.get_damai_proxy()
|
||||
if not proxy_info:
|
||||
logger.error("获取代理失败,终止测试")
|
||||
sys.exit(1)
|
||||
logger.info(f"代理地址: {proxy_info['host']}:{proxy_info['port']}")
|
||||
logger.info("")
|
||||
|
||||
# 测试基本连接
|
||||
if test_adspower_connection(use_proxy=use_proxy, proxy_info=proxy_info, use_api_v1=use_api_v1):
|
||||
logger.info("\n基本连接测试通过\n")
|
||||
else:
|
||||
logger.error("\n基本连接测试失败\n")
|
||||
sys.exit(1)
|
||||
|
||||
# 测试多页面操作
|
||||
# if test_multiple_pages():
|
||||
# logger.info("\n多页面操作测试通过\n")
|
||||
# else:
|
||||
# logger.error("\n多页面操作测试失败\n")
|
||||
@@ -1,98 +0,0 @@
|
||||
"""
|
||||
测试百度搜索爬虫
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
from baidu_crawler import BaiduSearchCrawler
|
||||
from db_manager import QueryTaskManager
|
||||
from datetime import datetime
|
||||
import sys
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stdout, format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>")
|
||||
|
||||
|
||||
def test_single_query():
|
||||
"""测试爬取单个查询词"""
|
||||
print("="*70)
|
||||
print(" 测试爬取单个查询词")
|
||||
print("="*70)
|
||||
|
||||
# headless=False可以看到浏览器
|
||||
# 会自动滚动直到无新内容
|
||||
crawler = BaiduSearchCrawler(headless=False)
|
||||
|
||||
# 测试查询,设置阈值50
|
||||
query_word = "糖尿病治疗"
|
||||
result = crawler.crawl_query(query_word, category="医疗", threshold_max=50)
|
||||
|
||||
print("\n爬取结果:")
|
||||
print(f" 查询词: {result['query_word']}")
|
||||
print(f" 是否成功: {result['success']}")
|
||||
print(f" 爬取数量: {result['crawled_count']}")
|
||||
print(f" 有效数量: {result['valid_count']}")
|
||||
print(f" 新增数量: {result['new_count']}")
|
||||
if result['error']:
|
||||
print(f" 错误信息: {result['error']}")
|
||||
|
||||
|
||||
def test_batch_crawl():
|
||||
"""测试批量爬取任务"""
|
||||
print("="*70)
|
||||
print(" 测试批量爬取任务")
|
||||
print("="*70)
|
||||
|
||||
# 先创建一些测试任务
|
||||
task_mgr = QueryTaskManager()
|
||||
task_date = datetime.now().strftime('%Y%m%d')
|
||||
|
||||
test_queries = [
|
||||
("高血压怎么治疗", "keyword", "医疗", 3, 30), # 阈值30
|
||||
("在线教育平台哪个好", "phrase", "教育", 5, 20), # 阈值20
|
||||
("免费法律咨询", "keyword", "法律", 4, 25), # 阈值25
|
||||
]
|
||||
|
||||
logger.info("创建测试任务...")
|
||||
for query, qtype, category, priority, threshold in test_queries:
|
||||
task_mgr.create_task(
|
||||
query_word=query,
|
||||
query_type=qtype,
|
||||
task_date=task_date,
|
||||
threshold_max=threshold, # 使用各自的阈值
|
||||
priority=priority,
|
||||
category=category,
|
||||
remark="测试任务"
|
||||
)
|
||||
|
||||
print()
|
||||
|
||||
# 执行批量爬取,会自动滚动直到达到阈值
|
||||
crawler = BaiduSearchCrawler(headless=False)
|
||||
stats = crawler.crawl_tasks(limit=3)
|
||||
|
||||
print("\n批量爬取统计:")
|
||||
print(f" 总任务数: {stats['total_tasks']}")
|
||||
print(f" 成功: {stats['success_count']}")
|
||||
print(f" 失败: {stats['failed_count']}")
|
||||
print(f" 总爬取: {stats['total_crawled']}")
|
||||
print(f" 新增保存: {stats['total_saved']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='测试百度搜索爬虫')
|
||||
parser.add_argument('--mode', choices=['single', 'batch'], default='single',
|
||||
help='测试模式:single=单个查询, batch=批量任务')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
if args.mode == 'single':
|
||||
test_single_query()
|
||||
else:
|
||||
test_batch_crawl()
|
||||
except Exception as e:
|
||||
logger.error(f"测试失败: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -1,581 +0,0 @@
|
||||
"""
|
||||
并发测试:批量创建浏览器环境并执行广告点击+聊天操作
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
from adspower_client import AdsPowerClient
|
||||
from config import Config
|
||||
from db_manager import SiteManager, ClickManager, InteractionManager
|
||||
import sys
|
||||
import time
|
||||
import random
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import List, Dict
|
||||
import threading # 添加线程锁支持
|
||||
|
||||
# 配置日志
|
||||
logger.remove()
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>",
|
||||
level="INFO"
|
||||
)
|
||||
|
||||
|
||||
class ConcurrentTester:
|
||||
"""并发测试管理器"""
|
||||
|
||||
# 类级别的浏览器启动锁,确保启动操作串行化
|
||||
_browser_start_lock = threading.Lock()
|
||||
|
||||
def __init__(self, test_url: str, max_workers: int = 3):
|
||||
"""
|
||||
初始化并发测试器
|
||||
|
||||
Args:
|
||||
test_url: 测试的目标URL
|
||||
max_workers: 最大并发数
|
||||
"""
|
||||
self.test_url = test_url
|
||||
self.max_workers = max_workers
|
||||
self.client = AdsPowerClient()
|
||||
self.created_profiles = [] # 记录创建的环境ID
|
||||
self.created_proxies = [] # 记录创建的代理ID
|
||||
|
||||
# 创建测试目录
|
||||
self.test_base_dir = Path("./test_concurrent")
|
||||
self.test_base_dir.mkdir(exist_ok=True)
|
||||
|
||||
# 初始化数据库
|
||||
self.site_mgr = SiteManager()
|
||||
self.site_id = self._init_site()
|
||||
|
||||
def _init_site(self) -> int:
|
||||
"""初始化或获取站点"""
|
||||
site = self.site_mgr.get_site_by_url(self.test_url)
|
||||
if not site:
|
||||
site_id = self.site_mgr.add_site(
|
||||
site_url=self.test_url,
|
||||
site_name="并发测试站点",
|
||||
site_dimension="医疗健康"
|
||||
)
|
||||
logger.info(f"✅ 创建测试站点: site_id={site_id}")
|
||||
else:
|
||||
site_id = site['id']
|
||||
logger.info(f"✅ 使用已存在站点: site_id={site_id}")
|
||||
return site_id
|
||||
|
||||
def create_browser_profile(self, index: int) -> Dict:
|
||||
"""
|
||||
创建浏览器环境
|
||||
|
||||
Args:
|
||||
index: 环境编号
|
||||
|
||||
Returns:
|
||||
环境信息字典
|
||||
"""
|
||||
try:
|
||||
# 获取分组ID
|
||||
group_id = self.client.get_group_by_env()
|
||||
time.sleep(0.5) # API 调用间隔
|
||||
|
||||
# 获取大麦IP代理
|
||||
logger.info(f"[环境 {index}] 获取代理IP...")
|
||||
proxy_info = self.client.get_damai_proxy()
|
||||
time.sleep(0.5) # API 调用间隔
|
||||
|
||||
if not proxy_info:
|
||||
logger.warning(f"[环境 {index}] 获取代理失败,将使用随机代理")
|
||||
proxy_config = {}
|
||||
proxy_id = None
|
||||
else:
|
||||
logger.info(f"[环境 {index}] 代理IP: {proxy_info['host']}:{proxy_info['port']}")
|
||||
|
||||
# 创建代理并记录ID
|
||||
proxy_data = {
|
||||
"type": "http",
|
||||
"host": proxy_info["host"],
|
||||
"port": proxy_info["port"],
|
||||
"user": self.client.DAMAI_USER,
|
||||
"password": self.client.DAMAI_PASSWORD,
|
||||
"remark": f"并发测试代理_{index}"
|
||||
}
|
||||
|
||||
proxy_id = self.client.create_proxy(proxy_data)
|
||||
time.sleep(0.5) # API 调用间隔
|
||||
|
||||
if proxy_id:
|
||||
self.created_proxies.append(proxy_id)
|
||||
logger.info(f"[环境 {index}] 创建代理: {proxy_id}")
|
||||
proxy_config = {"proxyid": proxy_id}
|
||||
else:
|
||||
logger.warning(f"[环境 {index}] 创建代理失败")
|
||||
proxy_config = {}
|
||||
proxy_id = None
|
||||
|
||||
profile_data = {
|
||||
"name": f"并发测试_{index}_{datetime.now().strftime('%H%M%S')}",
|
||||
"group_id": str(group_id) if group_id else "0",
|
||||
"platform": "health.baidu.com",
|
||||
"tabs": [self.test_url],
|
||||
"repeat_config": [],
|
||||
"ignore_cookie_error": "1",
|
||||
"country": "cn",
|
||||
"city": "beijing",
|
||||
"remark": f"并发测试环境 #{index}",
|
||||
"fingerprint_config": {
|
||||
"automatic_timezone": "1",
|
||||
"flash": "block",
|
||||
"scan_port_type": "1",
|
||||
"location": "ask",
|
||||
"location_switch": "1",
|
||||
"canvas": "0",
|
||||
"webgl": "0",
|
||||
"audio": "0",
|
||||
"webrtc": "local",
|
||||
"do_not_track": "true",
|
||||
"hardware_concurrency": "default",
|
||||
"device_memory": "default",
|
||||
"gpu": "2",
|
||||
"mac_address_config": {
|
||||
"model": "1",
|
||||
"address": ""
|
||||
},
|
||||
"browser_kernel_config": {
|
||||
"version": "latest",
|
||||
"type": "chrome"
|
||||
},
|
||||
"random_ua": {
|
||||
"ua_system_version": ["Windows"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 如果有代理配置,添加到profile_data
|
||||
if proxy_config:
|
||||
profile_data.update(proxy_config)
|
||||
|
||||
response = self.client._make_request(
|
||||
'POST',
|
||||
'/api/v2/browser-profile/create',
|
||||
json=profile_data
|
||||
)
|
||||
|
||||
if response and response.get('code') == 0:
|
||||
profile_id = response.get('data', {}).get('profile_id')
|
||||
logger.info(f"✅ 创建环境 #{index}: {profile_id}")
|
||||
self.created_profiles.append(profile_id)
|
||||
return {
|
||||
'index': index,
|
||||
'profile_id': profile_id,
|
||||
'name': profile_data['name'],
|
||||
'proxy': proxy_info if proxy_info else None
|
||||
}
|
||||
else:
|
||||
logger.error(f"❌ 创建环境 #{index} 失败: {response}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 创建环境 #{index} 异常: {str(e)}")
|
||||
return None
|
||||
|
||||
def run_single_task(self, profile_info: Dict) -> Dict:
|
||||
"""
|
||||
执行单个浏览器任务
|
||||
|
||||
Args:
|
||||
profile_info: 环境信息
|
||||
|
||||
Returns:
|
||||
执行结果
|
||||
"""
|
||||
index = profile_info['index']
|
||||
profile_id = profile_info['profile_id']
|
||||
|
||||
result = {
|
||||
'index': index,
|
||||
'profile_id': profile_id,
|
||||
'success': False,
|
||||
'click_id': None,
|
||||
'interaction_id': None,
|
||||
'error': None
|
||||
}
|
||||
|
||||
# 创建任务文件夹
|
||||
task_folder = self.test_base_dir / f"task_{index}_{datetime.now().strftime('%H%M%S')}"
|
||||
task_folder.mkdir(exist_ok=True)
|
||||
|
||||
# 每个线程创建自己的 AdsPowerClient 实例
|
||||
client = AdsPowerClient()
|
||||
|
||||
try:
|
||||
logger.info(f"[任务 {index}] 启动浏览器: {profile_id}")
|
||||
|
||||
# 使用类锁确保浏览器启动串行化,避免 API 频率限制
|
||||
with self._browser_start_lock:
|
||||
logger.debug(f"[任务 {index}] 获取启动锁...")
|
||||
# 启动浏览器
|
||||
browser_info = client.start_browser(user_id=profile_id)
|
||||
if not browser_info:
|
||||
result['error'] = "启动浏览器失败"
|
||||
return result
|
||||
# 启动后等待,避免下一个启动请求过快
|
||||
time.sleep(1.5)
|
||||
logger.debug(f"[任务 {index}] 释放启动锁")
|
||||
|
||||
time.sleep(1) # 额外等待浏览器完全启动
|
||||
|
||||
# 连接浏览器
|
||||
browser = client.connect_browser(browser_info)
|
||||
if not browser:
|
||||
result['error'] = "CDP连接失败"
|
||||
return result
|
||||
|
||||
# 获取页面
|
||||
context = browser.contexts[0]
|
||||
pages = context.pages
|
||||
|
||||
# 清理多余页面
|
||||
for p in pages:
|
||||
if 'start.adspower.net' in p.url:
|
||||
pages.remove(p)
|
||||
|
||||
if pages:
|
||||
page = pages[0]
|
||||
else:
|
||||
page = context.new_page()
|
||||
|
||||
logger.info(f"[任务 {index}] 访问页面: {self.test_url}")
|
||||
page.goto(self.test_url, wait_until='domcontentloaded', timeout=60000)
|
||||
time.sleep(3)
|
||||
|
||||
# 等待页面完全加载
|
||||
try:
|
||||
page.wait_for_load_state('networkidle', timeout=10000)
|
||||
except Exception:
|
||||
logger.warning(f"[任务 {index}] 网络空闲超时,继续执行")
|
||||
time.sleep(2)
|
||||
|
||||
# 截图
|
||||
page.screenshot(path=str(task_folder / "01_loaded.png"))
|
||||
|
||||
# 查找并点击广告
|
||||
ad_selector = 'span.ec-tuiguang.ecfc-tuiguang.xz81bbe'
|
||||
ad_elements = page.locator(ad_selector)
|
||||
ad_count = ad_elements.count()
|
||||
|
||||
logger.info(f"[任务 {index}] 找到 {ad_count} 个广告")
|
||||
|
||||
if ad_count > 0:
|
||||
# 点击第一个广告
|
||||
first_ad = ad_elements.first
|
||||
first_ad.scroll_into_view_if_needed()
|
||||
time.sleep(1)
|
||||
|
||||
# 点击,忽略超时错误
|
||||
try:
|
||||
first_ad.click(timeout=60000)
|
||||
logger.info(f"[任务 {index}] ✅ 已点击广告")
|
||||
except Exception as click_err:
|
||||
logger.warning(f"[任务 {index}] 点击超时,但可能已跳转")
|
||||
|
||||
# 记录点击
|
||||
click_mgr = ClickManager()
|
||||
click_id = click_mgr.record_click(
|
||||
site_id=self.site_id,
|
||||
site_url=self.test_url,
|
||||
user_ip=None,
|
||||
device_type='pc'
|
||||
)
|
||||
result['click_id'] = click_id
|
||||
|
||||
# 等待跳转
|
||||
time.sleep(3)
|
||||
page.wait_for_load_state('domcontentloaded')
|
||||
page.screenshot(path=str(task_folder / "02_after_click.png"))
|
||||
|
||||
# 发送消息
|
||||
messages = [
|
||||
"我想要预约一个医生,有什么推荐吗?",
|
||||
"我现在本人不在当地,医生什么时候有空,是随时能去吗?",
|
||||
"咱们医院是周六日是否上班,随时去吗?",
|
||||
"想找医生看看,有没有推荐的医生",
|
||||
"最近很不舒服,也说不出来全部的症状,能不能直接对话医生?"
|
||||
]
|
||||
message = random.choice(messages)
|
||||
|
||||
# 滚动到底部
|
||||
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||
time.sleep(1)
|
||||
|
||||
# 查找输入框
|
||||
input_selectors = [
|
||||
"textarea[contenteditable='true']",
|
||||
"textarea",
|
||||
"textarea[placeholder]",
|
||||
"input[type='text']"
|
||||
]
|
||||
|
||||
input_found = False
|
||||
for selector in input_selectors:
|
||||
try:
|
||||
count = page.locator(selector).count()
|
||||
if count > 0:
|
||||
for i in range(count):
|
||||
input_elem = page.locator(selector).nth(i)
|
||||
if input_elem.is_visible(timeout=1000):
|
||||
input_elem.scroll_into_view_if_needed()
|
||||
time.sleep(0.5)
|
||||
input_elem.click()
|
||||
time.sleep(0.5)
|
||||
input_elem.fill(message)
|
||||
logger.info(f"[任务 {index}] ✅ 已输入消息")
|
||||
input_found = True
|
||||
break
|
||||
if input_found:
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
# 兜底方案
|
||||
if not input_found:
|
||||
logger.warning(f"[任务 {index}] 未找到输入框,尝试兜底方案...")
|
||||
try:
|
||||
# 检查 viewport_size 是否为 None
|
||||
if page.viewport_size is None:
|
||||
logger.warning(f"[任务 {index}] viewport_size 为 None,设置默认视口")
|
||||
# 设置默认视口大小
|
||||
page.set_viewport_size({"width": 1280, "height": 720})
|
||||
time.sleep(0.5)
|
||||
|
||||
viewport_height = page.viewport_size['height']
|
||||
click_x = page.viewport_size['width'] // 2
|
||||
click_y = viewport_height - 10
|
||||
|
||||
logger.debug(f"[任务 {index}] 点击位置: ({click_x}, {click_y})")
|
||||
page.mouse.click(click_x, click_y)
|
||||
time.sleep(1)
|
||||
page.keyboard.type(message, delay=50)
|
||||
logger.info(f"[任务 {index}] ✅ 已输入消息(兜底)")
|
||||
input_found = True
|
||||
except Exception as fallback_err:
|
||||
logger.error(f"[任务 {index}] 兜底方案失败: {str(fallback_err)}")
|
||||
|
||||
# 发送消息
|
||||
if input_found:
|
||||
try:
|
||||
page.keyboard.press('Enter')
|
||||
logger.info(f"[任务 {index}] ✅ 已发送消息")
|
||||
time.sleep(2)
|
||||
|
||||
# 记录互动
|
||||
interaction_mgr = InteractionManager()
|
||||
interaction_id = interaction_mgr.record_interaction(
|
||||
site_id=self.site_id,
|
||||
click_id=click_id,
|
||||
interaction_type='message',
|
||||
reply_content=message,
|
||||
is_successful=True,
|
||||
response_received=False,
|
||||
response_content=None
|
||||
)
|
||||
result['interaction_id'] = interaction_id
|
||||
|
||||
page.screenshot(path=str(task_folder / "03_sent.png"))
|
||||
result['success'] = True
|
||||
except Exception as e:
|
||||
logger.warning(f"[任务 {index}] 发送失败: {str(e)}")
|
||||
|
||||
# 关闭浏览器前,截图聊天页面最终状态
|
||||
try:
|
||||
logger.info(f"[任务 {index}] 截图聊天页面...")
|
||||
# 等待可能的回复消息加载
|
||||
time.sleep(2)
|
||||
# 滚动到页面顶部,确保看到完整对话
|
||||
page.evaluate("window.scrollTo(0, 0)")
|
||||
time.sleep(0.5)
|
||||
# 截图整个页面
|
||||
page.screenshot(path=str(task_folder / "04_final_chat.png"), full_page=True)
|
||||
logger.info(f"[任务 {index}] ✅ 聊天页面截图已保存")
|
||||
except Exception as screenshot_err:
|
||||
logger.warning(f"[任务 {index}] 截图失败: {str(screenshot_err)}")
|
||||
|
||||
# 优雅关闭 Playwright 连接,避免 CancelledError
|
||||
try:
|
||||
if browser:
|
||||
logger.debug(f"[任务 {index}] 关闭 Playwright 浏览器连接...")
|
||||
browser.close()
|
||||
time.sleep(0.5)
|
||||
except Exception as close_err:
|
||||
logger.debug(f"[任务 {index}] 关闭浏览器连接异常: {str(close_err)}")
|
||||
|
||||
# 根据配置决定是否关闭浏览器进程
|
||||
if Config.AUTO_CLOSE_BROWSER:
|
||||
try:
|
||||
client.stop_browser(user_id=profile_id)
|
||||
logger.info(f"[任务 {index}] 浏览器已关闭")
|
||||
except Exception as stop_err:
|
||||
logger.warning(f"[任务 {index}] 关闭浏览器失败: {str(stop_err)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[任务 {index}] 执行异常: {str(e)}")
|
||||
result['error'] = str(e)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
return result
|
||||
|
||||
def delete_profiles(self, profile_ids: List[str]):
|
||||
"""
|
||||
批量删除环境
|
||||
|
||||
Args:
|
||||
profile_ids: 环境ID列表
|
||||
"""
|
||||
if not profile_ids:
|
||||
return
|
||||
|
||||
try:
|
||||
response = self.client._make_request(
|
||||
'POST',
|
||||
'/api/v2/browser-profile/delete',
|
||||
json={'profile_id': profile_ids}
|
||||
)
|
||||
|
||||
if response and response.get('code') == 0:
|
||||
logger.info(f"✅ 已删除 {len(profile_ids)} 个环境")
|
||||
else:
|
||||
logger.error(f"❌ 删除环境失败: {response}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 删除环境异常: {str(e)}")
|
||||
|
||||
def delete_proxies(self, proxy_ids: List[str]):
|
||||
"""
|
||||
批量删除代理
|
||||
|
||||
Args:
|
||||
proxy_ids: 代理ID列表
|
||||
"""
|
||||
if not proxy_ids:
|
||||
return
|
||||
|
||||
try:
|
||||
response = self.client._make_request(
|
||||
'POST',
|
||||
'/api/v2/proxy-list/delete',
|
||||
json={'proxy_id': proxy_ids}
|
||||
)
|
||||
|
||||
if response and response.get('code') == 0:
|
||||
logger.info(f"✅ 已删除 {len(proxy_ids)} 个代理")
|
||||
else:
|
||||
logger.error(f"❌ 删除代理失败: {response}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 删除代理异帰常: {str(e)}")
|
||||
|
||||
def run_concurrent_test(self, num_tasks: int):
|
||||
"""
|
||||
运行并发测试
|
||||
|
||||
Args:
|
||||
num_tasks: 并发任务数量
|
||||
"""
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"开始并发测试: {num_tasks} 个任务, 最大并发数: {self.max_workers}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 第一步:批量创建环境
|
||||
logger.info("\n步骤 1: 创建浏览器环境")
|
||||
logger.info("-" * 60)
|
||||
|
||||
profiles = []
|
||||
for i in range(num_tasks):
|
||||
profile_info = self.create_browser_profile(i + 1)
|
||||
if profile_info:
|
||||
profiles.append(profile_info)
|
||||
proxy_info = profile_info.get('proxy')
|
||||
if proxy_info:
|
||||
logger.info(f"环境 #{i+1} 使用代理: {proxy_info['host']}:{proxy_info['port']}")
|
||||
# 增加环境创建间隔,避免触发 API 频率限制
|
||||
time.sleep(3) # 每个环境创建后等待 3 秒
|
||||
|
||||
logger.info(f"✅ 成功创建 {len(profiles)} 个环境\n")
|
||||
|
||||
if not profiles:
|
||||
logger.error("没有成功创建任何环境,退出测试")
|
||||
return
|
||||
|
||||
# 第二步:并发执行任务
|
||||
logger.info("步骤 2: 并发执行任务")
|
||||
logger.info("-" * 60)
|
||||
|
||||
results = []
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
futures = {
|
||||
executor.submit(self.run_single_task, profile): profile
|
||||
for profile in profiles
|
||||
}
|
||||
|
||||
for future in as_completed(futures):
|
||||
profile = futures[future]
|
||||
try:
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
|
||||
status = "✅ 成功" if result['success'] else "❌ 失败"
|
||||
logger.info(f"[任务 {result['index']}] {status}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[任务 {profile['index']}] 执行异常: {str(e)}")
|
||||
|
||||
# 第三步:统计结果
|
||||
logger.info("\n步骤 3: 测试结果统计")
|
||||
logger.info("=" * 60)
|
||||
|
||||
success_count = sum(1 for r in results if r['success'])
|
||||
failed_count = len(results) - success_count
|
||||
|
||||
logger.info(f"总任务数: {len(results)}")
|
||||
logger.info(f"成功数: {success_count}")
|
||||
logger.info(f"失败数: {failed_count}")
|
||||
logger.info(f"成功率: {success_count/len(results)*100:.1f}%")
|
||||
|
||||
# 第四步:清理环境(可选)
|
||||
if Config.AUTO_CLOSE_BROWSER:
|
||||
logger.info("\n步骤 4: 清理测试环境")
|
||||
logger.info("-" * 60)
|
||||
|
||||
# 删除环境
|
||||
if self.created_profiles:
|
||||
self.delete_profiles(self.created_profiles)
|
||||
|
||||
# 删除代理
|
||||
if self.created_proxies:
|
||||
self.delete_proxies(self.created_proxies)
|
||||
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("并发测试完成!")
|
||||
logger.info("=" * 60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("并发测试工具")
|
||||
logger.info(f"当前环境: {Config.ENV}")
|
||||
logger.info(f"AdsPower API: {Config.ADSPOWER_API_URL}")
|
||||
logger.info("")
|
||||
|
||||
# ==================== 配置区 ====================
|
||||
TEST_URL = "https://health.baidu.com/m/detail/ar_2366617956693492811"
|
||||
NUM_TASKS = 3 # 并发任务数
|
||||
MAX_WORKERS = 3 # 最大并发执行数(建议不超过3)
|
||||
# =====================================================
|
||||
|
||||
tester = ConcurrentTester(
|
||||
test_url=TEST_URL,
|
||||
max_workers=MAX_WORKERS
|
||||
)
|
||||
|
||||
tester.run_concurrent_test(num_tasks=NUM_TASKS)
|
||||
|
Before Width: | Height: | Size: 70 KiB |
|
Before Width: | Height: | Size: 232 KiB |
|
Before Width: | Height: | Size: 55 KiB |
|
Before Width: | Height: | Size: 92 KiB |
|
Before Width: | Height: | Size: 69 KiB |
|
Before Width: | Height: | Size: 238 KiB |
|
Before Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 252 KiB |
|
Before Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 117 KiB |
|
Before Width: | Height: | Size: 117 KiB |
|
Before Width: | Height: | Size: 258 KiB |
|
Before Width: | Height: | Size: 684 KiB |
|
Before Width: | Height: | Size: 251 KiB |
|
Before Width: | Height: | Size: 233 KiB |
|
Before Width: | Height: | Size: 160 KiB |
|
Before Width: | Height: | Size: 136 KiB |
|
Before Width: | Height: | Size: 245 KiB |
|
Before Width: | Height: | Size: 111 KiB |
|
Before Width: | Height: | Size: 104 KiB |
|
Before Width: | Height: | Size: 233 KiB |
|
Before Width: | Height: | Size: 53 KiB |
|
Before Width: | Height: | Size: 253 KiB |
|
Before Width: | Height: | Size: 163 KiB |
|
Before Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 242 KiB |
|
Before Width: | Height: | Size: 181 KiB |
|
Before Width: | Height: | Size: 196 KiB |
|
Before Width: | Height: | Size: 256 KiB |
|
Before Width: | Height: | Size: 679 KiB |
|
Before Width: | Height: | Size: 142 KiB |
|
Before Width: | Height: | Size: 10 KiB |