Files
ai_mip/test_db_tasks.py
2026-01-16 22:06:46 +08:00

520 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
从数据库读取URL并执行批量点击任务
支持单线程和并发两种模式
"""
from loguru import logger
from adspower_client import AdsPowerClient
from ad_automation import MIPAdAutomation
from config import Config
from db_manager import SiteManager, ClickManager, InteractionManager
from data_manager import DataManager
import sys
import time
import random
from datetime import datetime
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Optional
import threading
# 配置日志(添加线程标识)
logger.remove()
logger.add(
sys.stdout,
format="<green>{time:HH:mm:ss}</green> | <cyan>[{thread.name}]</cyan> | <level>{level: <8}</level> | <level>{message}</level>",
level="INFO"
)
class DatabaseTaskExecutor:
"""数据库任务执行器"""
_browser_start_lock = threading.Lock()
def __init__(self, max_workers: int = 1, use_proxy: bool = True):
"""
初始化任务执行器
Args:
max_workers: 最大并发数1=串行,>1=并发)
use_proxy: 是否使用代理
"""
self.max_workers = max_workers
self.use_proxy = use_proxy
self.client = AdsPowerClient()
self.dm = DataManager()
# 创建截图目录
self.screenshot_dir = Path("./screenshots")
self.screenshot_dir.mkdir(exist_ok=True)
logger.info(f"执行模式: {'并发' if max_workers > 1 else '串行'}")
logger.info(f"最大并发数: {max_workers}")
logger.info(f"使用代理: {use_proxy}")
def get_active_tasks(self, limit: Optional[int] = None) -> List[Dict]:
"""
从数据库获取活跃的站点任务
Args:
limit: 限制数量None表示获取全部
Returns:
站点列表
"""
active_sites = self.dm.get_active_urls()
if limit and limit > 0:
active_sites = active_sites[:limit]
logger.info(f"从数据库获取 {len(active_sites)} 个活跃站点")
return active_sites
def create_browser_profile(self, index: int) -> Dict:
"""
创建浏览器环境
Args:
index: 环境编号
Returns:
环境信息字典
"""
try:
# 获取分组ID
group_id = self.client.get_group_by_env()
time.sleep(0.5)
# 如果使用代理,获取代理配置
proxy_config = {}
proxy_id = None
proxy_info = None
if self.use_proxy:
logger.info(f"[环境 {index}] 获取代理IP...")
proxy_info = self.client.get_damai_proxy()
time.sleep(0.5)
if proxy_info:
logger.info(f"[环境 {index}] 代理IP: {proxy_info['host']}:{proxy_info['port']}")
proxy_data = {
"type": "http",
"host": proxy_info["host"],
"port": proxy_info["port"],
"user": self.client.DAMAI_USER,
"password": self.client.DAMAI_PASSWORD,
"remark": f"DB任务代理_{index}"
}
proxy_id = self.client.create_proxy(proxy_data)
time.sleep(0.5)
if proxy_id:
logger.info(f"[环境 {index}] 创建代理: {proxy_id}")
proxy_config = {"proxyid": proxy_id}
# 创建 Profile
# 根据环境变量决定操作系统
from config import Config
os_type = "Linux" if Config.ENV == "production" else "Windows"
profile_data = {
"name": f"DB任务_{index}_{datetime.now().strftime('%H%M%S')}",
"group_id": str(group_id) if group_id else "0",
"platform": "health.baidu.com",
"repeat_config": [],
"ignore_cookie_error": "1",
"country": "cn",
"city": "beijing",
"remark": f"DB任务环境 #{index}",
"fingerprint_config": {
"automatic_timezone": "1",
"flash": "block",
"scan_port_type": "1",
"location": "ask",
"location_switch": "1",
"canvas": "0",
"webgl": "0",
"audio": "0",
"webrtc": "local",
"do_not_track": "true",
"hardware_concurrency": "default",
"device_memory": "default",
"gpu": "2",
"mac_address_config": {
"model": "1",
"address": ""
},
"browser_kernel_config": {
"version": "latest",
"type": "chrome"
},
"random_ua": {
"ua_system_version": [os_type] # 根据环境动态设置
}
}
}
logger.info(f"[环境 {index}] 操作系统: {os_type} (ENV={Config.ENV})")
if proxy_config:
profile_data.update(proxy_config)
response = self.client._make_request(
'POST',
'/api/v2/browser-profile/create',
json=profile_data
)
if response and response.get('code') == 0:
profile_id = response.get('data', {}).get('profile_id')
logger.info(f"✅ 创建环境 #{index}: {profile_id}")
return {
'index': index,
'profile_id': profile_id,
'name': profile_data['name'],
'proxy': proxy_info,
'proxy_id': proxy_id
}
else:
logger.error(f"❌ 创建环境 #{index} 失败: {response}")
return None
except Exception as e:
logger.error(f"❌ 创建环境 #{index} 异常: {str(e)}")
return None
def execute_single_task(self, site_info: Dict, task_index: int, profile_id: str = None) -> Dict:
"""
执行单个点击任务
Args:
site_info: 站点信息
task_index: 任务编号
profile_id: 已创建的Profile ID可选
Returns:
执行结果
"""
# 设置线程名称
threading.current_thread().name = f"Task-{task_index}"
site_id = site_info.get('id')
site_url = site_info.get('site_url', site_info.get('url'))
result = {
'task_index': task_index,
'site_id': site_id,
'site_url': site_url,
'success': False,
'click_count': 0,
'has_ad': False,
'has_reply': False,
'error': None
}
# 创建任务目录
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
task_folder = self.screenshot_dir / f"task_{task_index}_{timestamp}"
task_folder.mkdir(exist_ok=True)
# 每个线程创建自己的客户端实例
client = AdsPowerClient()
proxy_info = None
try:
logger.info(f"[任务 {task_index}] 开始执行: {site_url}")
# 如果没有传入profile_id则创建新的
if not profile_id:
# 获取Profile列表
profiles_data = client.list_profiles()
if not profiles_data:
result['error'] = "获取Profile列表失败"
return result
profiles = profiles_data.get('data', {}).get('list', [])
if not profiles:
result['error'] = "没有可用的Profile"
return result
# 使用第一个Profile
profile_id = profiles[0].get('profile_id')
logger.info(f"[任务 {task_index}] 使用Profile: {profile_id}")
# 如果使用代理更新Profile代理配置
if self.use_proxy:
logger.info(f"[任务 {task_index}] 获取代理IP...")
proxy_info = client.get_damai_proxy()
time.sleep(0.5)
if proxy_info:
logger.info(f"[任务 {task_index}] 代理IP: {proxy_info['host']}:{proxy_info['port']}")
proxy_config = {
"proxy_type": "http",
"proxy_host": proxy_info["host"],
"proxy_port": proxy_info["port"],
"proxy_user": client.DAMAI_USER,
"proxy_password": client.DAMAI_PASSWORD,
"proxy_soft": "other"
}
# 更新代理使用API v1方式
success = client.update_profile_proxy_v1(profile_id, proxy_config)
if success:
logger.info(f"[任务 {task_index}] 代理配置成功")
else:
logger.warning(f"[任务 {task_index}] 代理配置失败,继续执行")
time.sleep(1)
# 使用锁控制浏览器启动
with self._browser_start_lock:
logger.debug(f"[任务 {task_index}] 启动浏览器...")
browser_info = client.start_browser(user_id=profile_id)
if not browser_info:
result['error'] = "启动浏览器失败"
return result
time.sleep(1.5)
time.sleep(1)
# 连接浏览器
browser = client.connect_browser(browser_info)
if not browser:
result['error'] = "CDP连接失败"
return result
# 获取页面
context = browser.contexts[0]
all_pages = context.pages
logger.info(f"[任务 {task_index}] 当前标签页数: {len(all_pages)}")
# 关闭AdsPower启动页
closed_count = 0
for p in all_pages:
try:
if 'start.adspower.net' in p.url:
logger.debug(f"[任务 {task_index}] 关闭启动页: {p.url}")
p.close()
closed_count += 1
except:
pass
if closed_count > 0:
logger.info(f"[任务 {task_index}] 已关闭 {closed_count} 个启动页")
# 获取或创建页面
remaining_pages = context.pages
if remaining_pages:
page = remaining_pages[0]
else:
page = context.new_page()
# 使用 MIPAdAutomation 执行完整的广告点击和消息发送流程
logger.info(f"[任务 {task_index}] 开始执行广告点击和咨询流程...")
automation = MIPAdAutomation(page, task_index=task_index) # 传入task_index创建日志目录
click_success, has_reply = automation.check_and_click_ad(
url=site_url,
site_id=site_id
)
if click_success:
result['success'] = True
result['click_count'] = 1
result['has_ad'] = True
result['has_reply'] = has_reply
logger.info(f"[任务 {task_index}] ✅ 任务完成: 点击成功={click_success}, 收到回复={has_reply}")
else:
result['error'] = "广告点击失败"
logger.warning(f"[任务 {task_index}] ❌ 广告点击失败")
# 关闭浏览器连接
try:
if browser:
browser.close()
time.sleep(0.5)
except:
pass
# 停止浏览器
try:
client.stop_browser(user_id=profile_id)
logger.info(f"[任务 {task_index}] 浏览器已关闭")
time.sleep(1)
except Exception as e:
logger.warning(f"[任务 {task_index}] 停止浏览器失败: {str(e)}")
# 删除浏览器Profile释放资源
try:
logger.info(f"[任务 {task_index}] 删除浏览器Profile: {profile_id}")
delete_result = client.delete_profile(profile_id)
if delete_result:
logger.info(f"[任务 {task_index}] ✅ Profile已删除")
else:
logger.warning(f"[任务 {task_index}] Profile删除失败")
except Exception as e:
logger.warning(f"[任务 {task_index}] 删除Profile异常: {str(e)}")
except Exception as e:
logger.error(f"[任务 {task_index}] 执行异常: {str(e)}")
result['error'] = str(e)
import traceback
traceback.print_exc()
return result
def run_tasks(self, limit: Optional[int] = None):
"""
执行批量任务(边创建边执行)
Args:
limit: 限制处理的站点数量None表示处理全部
"""
logger.info("=" * 60)
logger.info("从数据库执行批量点击任务")
logger.info("=" * 60)
# 获取任务列表
sites = self.get_active_tasks(limit=limit)
if not sites:
logger.warning("没有可执行的任务")
return
logger.info(f"\n准备执行 {len(sites)} 个任务\n")
# 显示任务列表
for idx, site in enumerate(sites, 1):
site_url = site.get('site_url', site.get('url'))
click_count = site.get('click_count', 0)
logger.info(f" {idx}. {site_url} (已点击: {click_count}次)")
logger.info("\n" + "-" * 60)
logger.info("边创建环境边执行任务...\n")
start_time = time.time()
results = []
if self.max_workers == 1:
# 串行执行:创建一个,执行一个
for idx, site in enumerate(sites, 1):
# 创建环境
logger.info(f"[任务 {idx}] 创建浏览器环境...")
profile_info = self.create_browser_profile(idx)
if not profile_info:
logger.error(f"[任务 {idx}] 创建环境失败,跳过")
continue
if profile_info.get('proxy'):
logger.info(f"[任务 {idx}] 使用代理: {profile_info['proxy']['host']}:{profile_info['proxy']['port']}")
time.sleep(2) # 环境创建间隔
# 立即执行任务
result = self.execute_single_task(site, idx, profile_info['profile_id'])
results.append(result)
# 任务间隔
if idx < len(sites):
wait_time = random.randint(3, 5)
logger.info(f"等待 {wait_time} 秒后执行下一个任务...\n")
time.sleep(wait_time)
else:
# 并发执行:边创建边提交
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
futures = []
for idx, site in enumerate(sites, 1):
# 创建环境
logger.info(f"[任务 {idx}] 创建浏览器环境...")
profile_info = self.create_browser_profile(idx)
if not profile_info:
logger.error(f"[任务 {idx}] 创建环境失败,跳过")
continue
if profile_info.get('proxy'):
logger.info(f"[任务 {idx}] 使用代理: {profile_info['proxy']['host']}:{profile_info['proxy']['port']}")
# 立即提交任务到线程池
future = executor.submit(self.execute_single_task, site, idx, profile_info['profile_id'])
futures.append((future, idx))
time.sleep(2) # 环境创建间隔
# 等待所有任务完成
for future, idx in futures:
try:
result = future.result()
results.append(result)
status = "成功" if result['success'] else "失败"
logger.info(f"[任务 {result['task_index']}] {status}")
except Exception as e:
logger.error(f"[任务 {idx}] 执行异常: {str(e)}")
# 统计结果
elapsed_time = time.time() - start_time
logger.info("\n" + "=" * 60)
logger.info("任务执行完成")
logger.info("=" * 60)
success_count = sum(1 for r in results if r['success'])
failed_count = len(results) - success_count
has_ad_count = sum(1 for r in results if r['has_ad'])
has_reply_count = sum(1 for r in results if r.get('has_reply', False))
total_clicks = sum(r['click_count'] for r in results)
logger.info(f"总任务数: {len(results)}")
logger.info(f"成功数: {success_count}")
logger.info(f"失败数: {failed_count}")
logger.info(f"有广告页面: {has_ad_count}")
logger.info(f"总点击次数: {total_clicks}")
logger.info(f"收到回复数: {has_reply_count}")
logger.info(f"成功率: {success_count/len(results)*100:.1f}%")
logger.info(f"回复率: {has_reply_count/total_clicks*100 if total_clicks > 0 else 0:.1f}%")
logger.info(f"耗时: {elapsed_time:.1f}")
# 显示数据库统计
logger.info("\n数据库统计:")
stats = self.dm.get_statistics()
for key, value in stats.items():
logger.info(f" {key}: {value}")
logger.info("\n" + "=" * 60)
if __name__ == "__main__":
logger.info("数据库任务执行器")
logger.info(f"当前环境: {Config.ENV}")
logger.info(f"AdsPower API: {Config.ADSPOWER_API_URL}")
logger.info("")
# ==================== 配置区 ====================
# 执行模式1=串行,>1=并发
MAX_WORKERS = 1
# 是否使用代理
USE_PROXY = True
# 限制执行数量None=全部,数字=限制数量)
LIMIT = 3
# 是否自动关闭浏览器测试时可设为False保持浏览器打开
Config.AUTO_CLOSE_BROWSER = False
# =====================================================
executor = DatabaseTaskExecutor(
max_workers=MAX_WORKERS,
use_proxy=USE_PROXY
)
executor.run_tasks(limit=LIMIT)