Files
ai_mip/test_db_tasks.py

538 lines
20 KiB
Python
Raw Normal View History

2026-01-16 22:06:46 +08:00
"""
从数据库读取URL并执行批量点击任务
支持单线程和并发两种模式
"""
from loguru import logger
from adspower_client import AdsPowerClient
from ad_automation import MIPAdAutomation
from config import Config
from db_manager import SiteManager, ClickManager, InteractionManager
from data_manager import DataManager
import sys
import time
import random
from datetime import datetime
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Optional
import threading
2026-01-17 13:11:40 +08:00
# 配置日志(添加线程标识 + 文件输出)
2026-01-16 22:06:46 +08:00
logger.remove()
2026-01-17 13:11:40 +08:00
# 控制台输出
2026-01-16 22:06:46 +08:00
logger.add(
sys.stdout,
format="<green>{time:HH:mm:ss}</green> | <cyan>[{thread.name}]</cyan> | <level>{level: <8}</level> | <level>{message}</level>",
level="INFO"
)
2026-01-17 13:11:40 +08:00
# 文件输出
from pathlib import Path
log_dir = Path("./logs")
log_dir.mkdir(exist_ok=True)
logger.add(
log_dir / "db_tasks_{time:YYYY-MM-DD}.log",
format="{time:YYYY-MM-DD HH:mm:ss} | [{thread.name}] | {level: <8} | {message}",
level="INFO",
rotation="00:00", # 每天凌晨切割日志
retention="30 days", # 保留30天
encoding="utf-8"
)
2026-01-16 22:06:46 +08:00
class DatabaseTaskExecutor:
"""数据库任务执行器"""
_browser_start_lock = threading.Lock()
def __init__(self, max_workers: int = 1, use_proxy: bool = True):
"""
初始化任务执行器
Args:
max_workers: 最大并发数1=串行>1=并发
use_proxy: 是否使用代理
"""
self.max_workers = max_workers
self.use_proxy = use_proxy
self.client = AdsPowerClient()
self.dm = DataManager()
2026-01-17 13:11:40 +08:00
# 创建截图目录(按日期组织)
timestamp = datetime.now().strftime('%Y%m%d')
self.screenshot_dir = Path("./test") / f"batch_{timestamp}"
self.screenshot_dir.mkdir(parents=True, exist_ok=True)
2026-01-16 22:06:46 +08:00
logger.info(f"执行模式: {'并发' if max_workers > 1 else '串行'}")
logger.info(f"最大并发数: {max_workers}")
logger.info(f"使用代理: {use_proxy}")
2026-01-17 13:11:40 +08:00
logger.info(f"截图目录: {self.screenshot_dir}")
2026-01-16 22:06:46 +08:00
def get_active_tasks(self, limit: Optional[int] = None) -> List[Dict]:
"""
从数据库获取活跃的站点任务
Args:
limit: 限制数量None表示获取全部
Returns:
站点列表
"""
active_sites = self.dm.get_active_urls()
if limit and limit > 0:
active_sites = active_sites[:limit]
logger.info(f"从数据库获取 {len(active_sites)} 个活跃站点")
return active_sites
def create_browser_profile(self, index: int) -> Dict:
"""
创建浏览器环境
Args:
index: 环境编号
Returns:
环境信息字典
"""
try:
# 获取分组ID
group_id = self.client.get_group_by_env()
time.sleep(0.5)
# 如果使用代理,获取代理配置
proxy_config = {}
proxy_id = None
proxy_info = None
if self.use_proxy:
logger.info(f"[环境 {index}] 获取代理IP...")
proxy_info = self.client.get_damai_proxy()
time.sleep(0.5)
if proxy_info:
logger.info(f"[环境 {index}] 代理IP: {proxy_info['host']}:{proxy_info['port']}")
proxy_data = {
"type": "http",
"host": proxy_info["host"],
"port": proxy_info["port"],
"user": self.client.DAMAI_USER,
"password": self.client.DAMAI_PASSWORD,
"remark": f"DB任务代理_{index}"
}
proxy_id = self.client.create_proxy(proxy_data)
time.sleep(0.5)
if proxy_id:
logger.info(f"[环境 {index}] 创建代理: {proxy_id}")
proxy_config = {"proxyid": proxy_id}
# 创建 Profile
# 根据环境变量决定操作系统
from config import Config
os_type = "Linux" if Config.ENV == "production" else "Windows"
profile_data = {
"name": f"DB任务_{index}_{datetime.now().strftime('%H%M%S')}",
"group_id": str(group_id) if group_id else "0",
"platform": "health.baidu.com",
"repeat_config": [],
"ignore_cookie_error": "1",
"country": "cn",
"city": "beijing",
"remark": f"DB任务环境 #{index}",
"fingerprint_config": {
"automatic_timezone": "1",
"flash": "block",
"scan_port_type": "1",
"location": "ask",
"location_switch": "1",
"canvas": "0",
"webgl": "0",
"audio": "0",
"webrtc": "local",
"do_not_track": "true",
"hardware_concurrency": "default",
"device_memory": "default",
"gpu": "2",
"mac_address_config": {
"model": "1",
"address": ""
},
"browser_kernel_config": {
"version": "latest",
"type": "chrome"
},
"random_ua": {
"ua_system_version": [os_type] # 根据环境动态设置
}
}
}
logger.info(f"[环境 {index}] 操作系统: {os_type} (ENV={Config.ENV})")
if proxy_config:
profile_data.update(proxy_config)
response = self.client._make_request(
'POST',
'/api/v2/browser-profile/create',
json=profile_data
)
if response and response.get('code') == 0:
profile_id = response.get('data', {}).get('profile_id')
logger.info(f"✅ 创建环境 #{index}: {profile_id}")
return {
'index': index,
'profile_id': profile_id,
'name': profile_data['name'],
'proxy': proxy_info,
'proxy_id': proxy_id
}
else:
logger.error(f"❌ 创建环境 #{index} 失败: {response}")
return None
except Exception as e:
logger.error(f"❌ 创建环境 #{index} 异常: {str(e)}")
return None
def execute_single_task(self, site_info: Dict, task_index: int, profile_id: str = None) -> Dict:
"""
执行单个点击任务
Args:
site_info: 站点信息
task_index: 任务编号
profile_id: 已创建的Profile ID可选
Returns:
执行结果
"""
# 设置线程名称
threading.current_thread().name = f"Task-{task_index}"
site_id = site_info.get('id')
site_url = site_info.get('site_url', site_info.get('url'))
result = {
'task_index': task_index,
'site_id': site_id,
'site_url': site_url,
'success': False,
'click_count': 0,
'has_ad': False,
'has_reply': False,
'error': None
}
# 创建任务目录
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
task_folder = self.screenshot_dir / f"task_{task_index}_{timestamp}"
task_folder.mkdir(exist_ok=True)
# 每个线程创建自己的客户端实例
client = AdsPowerClient()
proxy_info = None
try:
logger.info(f"[任务 {task_index}] 开始执行: {site_url}")
# 如果没有传入profile_id则创建新的
if not profile_id:
# 获取Profile列表
profiles_data = client.list_profiles()
if not profiles_data:
result['error'] = "获取Profile列表失败"
return result
profiles = profiles_data.get('data', {}).get('list', [])
if not profiles:
result['error'] = "没有可用的Profile"
return result
# 使用第一个Profile
profile_id = profiles[0].get('profile_id')
logger.info(f"[任务 {task_index}] 使用Profile: {profile_id}")
# 如果使用代理更新Profile代理配置
if self.use_proxy:
logger.info(f"[任务 {task_index}] 获取代理IP...")
proxy_info = client.get_damai_proxy()
time.sleep(0.5)
if proxy_info:
logger.info(f"[任务 {task_index}] 代理IP: {proxy_info['host']}:{proxy_info['port']}")
proxy_config = {
"proxy_type": "http",
"proxy_host": proxy_info["host"],
"proxy_port": proxy_info["port"],
"proxy_user": client.DAMAI_USER,
"proxy_password": client.DAMAI_PASSWORD,
"proxy_soft": "other"
}
# 更新代理使用API v1方式
success = client.update_profile_proxy_v1(profile_id, proxy_config)
if success:
logger.info(f"[任务 {task_index}] 代理配置成功")
else:
logger.warning(f"[任务 {task_index}] 代理配置失败,继续执行")
time.sleep(1)
# 使用锁控制浏览器启动
with self._browser_start_lock:
logger.debug(f"[任务 {task_index}] 启动浏览器...")
browser_info = client.start_browser(user_id=profile_id)
if not browser_info:
result['error'] = "启动浏览器失败"
return result
time.sleep(1.5)
time.sleep(1)
# 连接浏览器
browser = client.connect_browser(browser_info)
if not browser:
result['error'] = "CDP连接失败"
return result
# 获取页面
context = browser.contexts[0]
all_pages = context.pages
logger.info(f"[任务 {task_index}] 当前标签页数: {len(all_pages)}")
# 关闭AdsPower启动页
closed_count = 0
for p in all_pages:
try:
if 'start.adspower.net' in p.url:
logger.debug(f"[任务 {task_index}] 关闭启动页: {p.url}")
p.close()
closed_count += 1
except:
pass
if closed_count > 0:
logger.info(f"[任务 {task_index}] 已关闭 {closed_count} 个启动页")
# 获取或创建页面
remaining_pages = context.pages
if remaining_pages:
page = remaining_pages[0]
else:
page = context.new_page()
# 使用 MIPAdAutomation 执行完整的广告点击和消息发送流程
logger.info(f"[任务 {task_index}] 开始执行广告点击和咨询流程...")
automation = MIPAdAutomation(page, task_index=task_index) # 传入task_index创建日志目录
click_success, has_reply = automation.check_and_click_ad(
url=site_url,
site_id=site_id
)
if click_success:
result['success'] = True
result['click_count'] = 1
result['has_ad'] = True
result['has_reply'] = has_reply
logger.info(f"[任务 {task_index}] ✅ 任务完成: 点击成功={click_success}, 收到回复={has_reply}")
else:
result['error'] = "广告点击失败"
logger.warning(f"[任务 {task_index}] ❌ 广告点击失败")
# 关闭浏览器连接
try:
if browser:
browser.close()
time.sleep(0.5)
except:
pass
# 停止浏览器
try:
client.stop_browser(user_id=profile_id)
logger.info(f"[任务 {task_index}] 浏览器已关闭")
time.sleep(1)
except Exception as e:
logger.warning(f"[任务 {task_index}] 停止浏览器失败: {str(e)}")
# 删除浏览器Profile释放资源
try:
logger.info(f"[任务 {task_index}] 删除浏览器Profile: {profile_id}")
delete_result = client.delete_profile(profile_id)
if delete_result:
logger.info(f"[任务 {task_index}] ✅ Profile已删除")
else:
logger.warning(f"[任务 {task_index}] Profile删除失败")
except Exception as e:
logger.warning(f"[任务 {task_index}] 删除Profile异常: {str(e)}")
except Exception as e:
logger.error(f"[任务 {task_index}] 执行异常: {str(e)}")
result['error'] = str(e)
import traceback
traceback.print_exc()
return result
def run_tasks(self, limit: Optional[int] = None):
"""
执行批量任务边创建边执行
Args:
limit: 限制处理的站点数量None表示处理全部
"""
logger.info("=" * 60)
logger.info("从数据库执行批量点击任务")
logger.info("=" * 60)
# 获取任务列表
sites = self.get_active_tasks(limit=limit)
if not sites:
logger.warning("没有可执行的任务")
return
logger.info(f"\n准备执行 {len(sites)} 个任务\n")
# 显示任务列表
for idx, site in enumerate(sites, 1):
site_url = site.get('site_url', site.get('url'))
click_count = site.get('click_count', 0)
logger.info(f" {idx}. {site_url} (已点击: {click_count}次)")
logger.info("\n" + "-" * 60)
logger.info("边创建环境边执行任务...\n")
start_time = time.time()
results = []
if self.max_workers == 1:
# 串行执行:创建一个,执行一个
for idx, site in enumerate(sites, 1):
# 创建环境
logger.info(f"[任务 {idx}] 创建浏览器环境...")
profile_info = self.create_browser_profile(idx)
if not profile_info:
logger.error(f"[任务 {idx}] 创建环境失败,跳过")
continue
if profile_info.get('proxy'):
logger.info(f"[任务 {idx}] 使用代理: {profile_info['proxy']['host']}:{profile_info['proxy']['port']}")
time.sleep(2) # 环境创建间隔
# 立即执行任务
result = self.execute_single_task(site, idx, profile_info['profile_id'])
results.append(result)
# 任务间隔
if idx < len(sites):
wait_time = random.randint(3, 5)
logger.info(f"等待 {wait_time} 秒后执行下一个任务...\n")
time.sleep(wait_time)
else:
# 并发执行:边创建边提交
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
futures = []
for idx, site in enumerate(sites, 1):
# 创建环境
logger.info(f"[任务 {idx}] 创建浏览器环境...")
profile_info = self.create_browser_profile(idx)
if not profile_info:
logger.error(f"[任务 {idx}] 创建环境失败,跳过")
continue
if profile_info.get('proxy'):
logger.info(f"[任务 {idx}] 使用代理: {profile_info['proxy']['host']}:{profile_info['proxy']['port']}")
# 立即提交任务到线程池
future = executor.submit(self.execute_single_task, site, idx, profile_info['profile_id'])
futures.append((future, idx))
time.sleep(2) # 环境创建间隔
# 等待所有任务完成
for future, idx in futures:
try:
result = future.result()
results.append(result)
status = "成功" if result['success'] else "失败"
logger.info(f"[任务 {result['task_index']}] {status}")
except Exception as e:
logger.error(f"[任务 {idx}] 执行异常: {str(e)}")
# 统计结果
elapsed_time = time.time() - start_time
logger.info("\n" + "=" * 60)
logger.info("任务执行完成")
logger.info("=" * 60)
success_count = sum(1 for r in results if r['success'])
failed_count = len(results) - success_count
has_ad_count = sum(1 for r in results if r['has_ad'])
has_reply_count = sum(1 for r in results if r.get('has_reply', False))
total_clicks = sum(r['click_count'] for r in results)
logger.info(f"总任务数: {len(results)}")
logger.info(f"成功数: {success_count}")
logger.info(f"失败数: {failed_count}")
logger.info(f"有广告页面: {has_ad_count}")
logger.info(f"总点击次数: {total_clicks}")
logger.info(f"收到回复数: {has_reply_count}")
logger.info(f"成功率: {success_count/len(results)*100:.1f}%")
logger.info(f"回复率: {has_reply_count/total_clicks*100 if total_clicks > 0 else 0:.1f}%")
logger.info(f"耗时: {elapsed_time:.1f}")
# 显示数据库统计
logger.info("\n数据库统计:")
stats = self.dm.get_statistics()
for key, value in stats.items():
logger.info(f" {key}: {value}")
logger.info("\n" + "=" * 60)
if __name__ == "__main__":
logger.info("数据库任务执行器")
logger.info(f"当前环境: {Config.ENV}")
logger.info(f"AdsPower API: {Config.ADSPOWER_API_URL}")
logger.info("")
# ==================== 配置区 ====================
# 执行模式1=串行,>1=并发
MAX_WORKERS = 1
# 是否使用代理
USE_PROXY = True
# 限制执行数量None=全部,数字=限制数量)
LIMIT = 3
# 是否自动关闭浏览器测试时可设为False保持浏览器打开
Config.AUTO_CLOSE_BROWSER = False
# =====================================================
executor = DatabaseTaskExecutor(
max_workers=MAX_WORKERS,
use_proxy=USE_PROXY
)
executor.run_tasks(limit=LIMIT)