This commit is contained in:
sjk
2026-01-19 09:28:03 +08:00
parent a6326a8ccb
commit c4711fc84f
2 changed files with 57 additions and 24 deletions

View File

@@ -1,6 +1,6 @@
import time import time
import random import random
from typing import Optional, Tuple from typing import Optional, Tuple, List
from playwright.sync_api import Page, ElementHandle from playwright.sync_api import Page, ElementHandle
from loguru import logger from loguru import logger
from config import Config from config import Config
@@ -81,7 +81,7 @@ class MIPAdAutomation:
time.sleep(3) time.sleep(3)
# 检查是否存在商业广告 # 检查是否存在商业广告
has_ad, ad_element = self._detect_commercial_ad() has_ad, ad_elements = self._detect_commercial_ad()
if not has_ad: if not has_ad:
logger.info("未检测到商业广告,跳过该链接") logger.info("未检测到商业广告,跳过该链接")
@@ -89,12 +89,25 @@ class MIPAdAutomation:
self._record_click_failure(url, "未检测到商业广告") self._record_click_failure(url, "未检测到商业广告")
return False, False return False, False
# 点击广告 # 逐个尝试点击广告,直到成功
logger.info("检测到商业广告,准备点击") logger.info(f"检测到商业广告,准备点击(共 {len(ad_elements)} 个)")
if not self._click_advertisement(ad_element): click_success = False
logger.warning("点击广告失败")
for idx, ad_element in enumerate(ad_elements, 1):
logger.info(f"尝试点击第 {idx}/{len(ad_elements)} 个广告...")
if self._click_advertisement(ad_element):
logger.info(f"✅ 第 {idx} 个广告点击成功")
click_success = True
break
else:
logger.warning(f"❌ 第 {idx} 个广告点击失败,尝试下一个...")
# 等待一下再点下一个
time.sleep(1)
if not click_success:
logger.warning("所有广告均点击失败")
# 记录点击失败 # 记录点击失败
self._record_click_failure(url, "广告点击失败,页面未跳转") self._record_click_failure(url, f"所有广告({len(ad_elements)}个)均点击失败")
return False, False return False, False
# 记录点击到数据库 # 记录点击到数据库
@@ -124,26 +137,26 @@ class MIPAdAutomation:
# 尝试关闭当前标签页,返回主窗口 # 尝试关闭当前标签页,返回主窗口
self._close_current_tab() self._close_current_tab()
def _detect_commercial_ad(self) -> Tuple[bool, Optional[ElementHandle]]: def _detect_commercial_ad(self) -> Tuple[bool, List[ElementHandle]]:
""" """
检测页面是否存在商业广告 检测页面是否存在商业广告
Returns: Returns:
(是否存在商业广告, 广告元素) (是否存在商业广告, 广告元素列表)
""" """
try: try:
# 等待评论区加载 # 等待评论区加载
time.sleep(2) time.sleep(2)
# 方法1: 查找包含"广告"标识的元素 # 查找包含广告标识的元素
# 根据实际页面结构调整选择器
ad_selectors = [ ad_selectors = [
"//div[contains(@class, 'ad') or contains(@class, 'advertisement')]", "//div[contains(@class, 'ad') or contains(@class, 'advertisement')]",
"//div[contains(text(), '广告')]", "//div[contains(text(), '广告')]",
"//*[contains(text(), '广告')]//ancestor::div[contains(@class, 'card')]", "//*[contains(text(), '广告')]//ancestor::div[contains(@class, 'card')]",
"//a[contains(@class, 'ad-link')]", "//a[contains(@class, 'ad-link')]",
] ]
ad_elements = []
for selector in ad_selectors: for selector in ad_selectors:
try: try:
elements = self.page.locator(f"xpath={selector}").all() elements = self.page.locator(f"xpath={selector}").all()
@@ -152,19 +165,25 @@ class MIPAdAutomation:
for elem in elements: for elem in elements:
if elem.is_visible(): if elem.is_visible():
# 进一步验证是否是商业广告非AI健康管家 # 进一步验证是否是商业广告非AI健康管家
elem_text = elem.inner_text().lower() try:
if '广告' in elem_text and 'ai健康' not in elem_text: elem_text = elem.inner_text().lower()
logger.info("检测到商业广告") if '广告' in elem_text and 'ai健康' not in elem_text:
return True, elem ad_elements.append(elem)
except:
continue
except Exception: except Exception:
continue continue
if ad_elements:
logger.info(f"检测到 {len(ad_elements)} 个商业广告")
return True, ad_elements
logger.info("未检测到商业广告") logger.info("未检测到商业广告")
return False, None return False, []
except Exception as e: except Exception as e:
logger.error(f"检测广告异常: {str(e)}") logger.error(f"检测广告异常: {str(e)}")
return False, None return False, []
def _click_advertisement(self, ad_element: ElementHandle) -> bool: def _click_advertisement(self, ad_element: ElementHandle) -> bool:
""" """
@@ -197,7 +216,21 @@ class MIPAdAutomation:
time.sleep(check_interval) time.sleep(check_interval)
if self.page.url != original_url: if self.page.url != original_url:
logger.info(f"✅ 页面已导航(耗时{i+1}秒): {original_url} -> {self.page.url}") logger.info(f"✅ 页面已导航(耗时{i+1}秒): {original_url} -> {self.page.url}")
self.page.wait_for_load_state('domcontentloaded')
# 等待页面加载完成最多15秒
try:
logger.info("等待页面加载完成...")
self.page.wait_for_load_state('domcontentloaded', timeout=15000)
logger.info("✅ 页面加载完成")
except Exception as load_err:
logger.warning(f"⚠️ 页面加载超时,尝试刷新页面...")
try:
self.page.reload(wait_until='domcontentloaded', timeout=15000)
logger.info("✅ 页面刷新成功")
except Exception as refresh_err:
logger.error(f"❌ 页面刷新失败: {str(refresh_err)}")
return False
break break
else: else:
# 循环正常结束(未跳转) # 循环正常结束(未跳转)

View File

@@ -517,7 +517,7 @@ if __name__ == "__main__":
# ==================== 配置区 ==================== # ==================== 配置区 ====================
# 执行模式1=串行,>1=并发 # 执行模式1=串行,>1=并发
MAX_WORKERS = 1 MAX_WORKERS = 3
# 是否使用代理 # 是否使用代理
USE_PROXY = True USE_PROXY = True