commit
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple, List
|
||||||
from playwright.sync_api import Page, ElementHandle
|
from playwright.sync_api import Page, ElementHandle
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from config import Config
|
from config import Config
|
||||||
@@ -81,7 +81,7 @@ class MIPAdAutomation:
|
|||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
|
|
||||||
# 检查是否存在商业广告
|
# 检查是否存在商业广告
|
||||||
has_ad, ad_element = self._detect_commercial_ad()
|
has_ad, ad_elements = self._detect_commercial_ad()
|
||||||
|
|
||||||
if not has_ad:
|
if not has_ad:
|
||||||
logger.info("未检测到商业广告,跳过该链接")
|
logger.info("未检测到商业广告,跳过该链接")
|
||||||
@@ -89,12 +89,25 @@ class MIPAdAutomation:
|
|||||||
self._record_click_failure(url, "未检测到商业广告")
|
self._record_click_failure(url, "未检测到商业广告")
|
||||||
return False, False
|
return False, False
|
||||||
|
|
||||||
# 点击广告
|
# 逐个尝试点击广告,直到成功
|
||||||
logger.info("检测到商业广告,准备点击")
|
logger.info(f"检测到商业广告,准备点击(共 {len(ad_elements)} 个)")
|
||||||
if not self._click_advertisement(ad_element):
|
click_success = False
|
||||||
logger.warning("点击广告失败")
|
|
||||||
|
for idx, ad_element in enumerate(ad_elements, 1):
|
||||||
|
logger.info(f"尝试点击第 {idx}/{len(ad_elements)} 个广告...")
|
||||||
|
if self._click_advertisement(ad_element):
|
||||||
|
logger.info(f"✅ 第 {idx} 个广告点击成功")
|
||||||
|
click_success = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.warning(f"❌ 第 {idx} 个广告点击失败,尝试下一个...")
|
||||||
|
# 等待一下再点下一个
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
if not click_success:
|
||||||
|
logger.warning("所有广告均点击失败")
|
||||||
# 记录点击失败
|
# 记录点击失败
|
||||||
self._record_click_failure(url, "广告点击失败,页面未跳转")
|
self._record_click_failure(url, f"所有广告({len(ad_elements)}个)均点击失败")
|
||||||
return False, False
|
return False, False
|
||||||
|
|
||||||
# 记录点击到数据库
|
# 记录点击到数据库
|
||||||
@@ -124,26 +137,26 @@ class MIPAdAutomation:
|
|||||||
# 尝试关闭当前标签页,返回主窗口
|
# 尝试关闭当前标签页,返回主窗口
|
||||||
self._close_current_tab()
|
self._close_current_tab()
|
||||||
|
|
||||||
def _detect_commercial_ad(self) -> Tuple[bool, Optional[ElementHandle]]:
|
def _detect_commercial_ad(self) -> Tuple[bool, List[ElementHandle]]:
|
||||||
"""
|
"""
|
||||||
检测页面是否存在商业广告
|
检测页面是否存在商业广告
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(是否存在商业广告, 广告元素)
|
(是否存在商业广告, 广告元素列表)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 等待评论区加载
|
# 等待评论区加载
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
# 方法1: 查找包含"广告"标识的元素
|
# 查找包含“广告”标识的元素
|
||||||
# 根据实际页面结构调整选择器
|
|
||||||
ad_selectors = [
|
ad_selectors = [
|
||||||
"//div[contains(@class, 'ad') or contains(@class, 'advertisement')]",
|
"//div[contains(@class, 'ad') or contains(@class, 'advertisement')]",
|
||||||
"//div[contains(text(), '广告')]",
|
"//div[contains(text(), '广告')]",
|
||||||
"//*[contains(text(), '广告')]//ancestor::div[contains(@class, 'card')]",
|
"//*[contains(text(), '广告')]//ancestor::div[contains(@class, 'card')]",
|
||||||
"//a[contains(@class, 'ad-link')]",
|
"//a[contains(@class, 'ad-link')]",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
ad_elements = []
|
||||||
for selector in ad_selectors:
|
for selector in ad_selectors:
|
||||||
try:
|
try:
|
||||||
elements = self.page.locator(f"xpath={selector}").all()
|
elements = self.page.locator(f"xpath={selector}").all()
|
||||||
@@ -152,19 +165,25 @@ class MIPAdAutomation:
|
|||||||
for elem in elements:
|
for elem in elements:
|
||||||
if elem.is_visible():
|
if elem.is_visible():
|
||||||
# 进一步验证是否是商业广告(非AI健康管家)
|
# 进一步验证是否是商业广告(非AI健康管家)
|
||||||
elem_text = elem.inner_text().lower()
|
try:
|
||||||
if '广告' in elem_text and 'ai健康' not in elem_text:
|
elem_text = elem.inner_text().lower()
|
||||||
logger.info("检测到商业广告")
|
if '广告' in elem_text and 'ai健康' not in elem_text:
|
||||||
return True, elem
|
ad_elements.append(elem)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if ad_elements:
|
||||||
|
logger.info(f"检测到 {len(ad_elements)} 个商业广告")
|
||||||
|
return True, ad_elements
|
||||||
|
|
||||||
logger.info("未检测到商业广告")
|
logger.info("未检测到商业广告")
|
||||||
return False, None
|
return False, []
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"检测广告异常: {str(e)}")
|
logger.error(f"检测广告异常: {str(e)}")
|
||||||
return False, None
|
return False, []
|
||||||
|
|
||||||
def _click_advertisement(self, ad_element: ElementHandle) -> bool:
|
def _click_advertisement(self, ad_element: ElementHandle) -> bool:
|
||||||
"""
|
"""
|
||||||
@@ -197,7 +216,21 @@ class MIPAdAutomation:
|
|||||||
time.sleep(check_interval)
|
time.sleep(check_interval)
|
||||||
if self.page.url != original_url:
|
if self.page.url != original_url:
|
||||||
logger.info(f"✅ 页面已导航(耗时{i+1}秒): {original_url} -> {self.page.url}")
|
logger.info(f"✅ 页面已导航(耗时{i+1}秒): {original_url} -> {self.page.url}")
|
||||||
self.page.wait_for_load_state('domcontentloaded')
|
|
||||||
|
# 等待页面加载完成(最多15秒)
|
||||||
|
try:
|
||||||
|
logger.info("等待页面加载完成...")
|
||||||
|
self.page.wait_for_load_state('domcontentloaded', timeout=15000)
|
||||||
|
logger.info("✅ 页面加载完成")
|
||||||
|
except Exception as load_err:
|
||||||
|
logger.warning(f"⚠️ 页面加载超时,尝试刷新页面...")
|
||||||
|
try:
|
||||||
|
self.page.reload(wait_until='domcontentloaded', timeout=15000)
|
||||||
|
logger.info("✅ 页面刷新成功")
|
||||||
|
except Exception as refresh_err:
|
||||||
|
logger.error(f"❌ 页面刷新失败: {str(refresh_err)}")
|
||||||
|
return False
|
||||||
|
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# 循环正常结束(未跳转)
|
# 循环正常结束(未跳转)
|
||||||
|
|||||||
@@ -517,7 +517,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# ==================== 配置区 ====================
|
# ==================== 配置区 ====================
|
||||||
# 执行模式:1=串行,>1=并发
|
# 执行模式:1=串行,>1=并发
|
||||||
MAX_WORKERS = 1
|
MAX_WORKERS = 3
|
||||||
|
|
||||||
# 是否使用代理
|
# 是否使用代理
|
||||||
USE_PROXY = True
|
USE_PROXY = True
|
||||||
|
|||||||
Reference in New Issue
Block a user