224 lines
7.9 KiB
Python
224 lines
7.9 KiB
Python
|
|
"""
|
|||
|
|
准确的Playwright代理IP验证脚本
|
|||
|
|
验证Playwright是否正确使用了带认证信息的代理IP
|
|||
|
|
"""
|
|||
|
|
import asyncio
|
|||
|
|
from playwright.async_api import async_playwright
|
|||
|
|
import requests
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def get_my_ip_requests():
|
|||
|
|
"""使用requests获取当前IP(不使用代理)"""
|
|||
|
|
try:
|
|||
|
|
response = requests.get('http://httpbin.org/ip', timeout=10)
|
|||
|
|
if response.status_code == 200:
|
|||
|
|
data = response.json()
|
|||
|
|
return data.get('origin', 'Unknown')
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"获取本机IP失败: {str(e)}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def get_ip_with_playwright_proxy_correct(proxy_url):
|
|||
|
|
"""使用Playwright获取IP(正确使用代理认证)"""
|
|||
|
|
try:
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
# 正确的代理配置格式,包含认证信息
|
|||
|
|
proxy_parts = proxy_url.replace('http://', '').replace('https://', '').split('@')
|
|||
|
|
if len(proxy_parts) == 2:
|
|||
|
|
# 格式: username:password@host:port
|
|||
|
|
auth_part = proxy_parts[0]
|
|||
|
|
server_part = proxy_parts[1]
|
|||
|
|
|
|||
|
|
username, password = auth_part.split(':')
|
|||
|
|
|
|||
|
|
proxy_config = {
|
|||
|
|
"server": f"http://{server_part}",
|
|||
|
|
"username": username,
|
|||
|
|
"password": password
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
print(f" 使用代理配置: {proxy_config}")
|
|||
|
|
else:
|
|||
|
|
# 如果没有认证信息,直接使用
|
|||
|
|
proxy_config = {"server": proxy_url}
|
|||
|
|
|
|||
|
|
browser = await p.chromium.launch(headless=True, proxy=proxy_config)
|
|||
|
|
context = await browser.new_context()
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
# 访问IP检测网站
|
|||
|
|
await page.goto('http://httpbin.org/ip', wait_until='networkidle', timeout=15000)
|
|||
|
|
|
|||
|
|
# 获取页面内容
|
|||
|
|
content = await page.content()
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
# 尝试解析IP
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
json_match = re.search(r'\{.*\}', content, re.DOTALL)
|
|||
|
|
if json_match:
|
|||
|
|
try:
|
|||
|
|
ip_data = json.loads(json_match.group())
|
|||
|
|
return ip_data.get('origin', 'Unknown')
|
|||
|
|
except:
|
|||
|
|
print(f" JSON解析失败,原始内容: {content[:200]}...")
|
|||
|
|
return 'JSON Parse Error'
|
|||
|
|
|
|||
|
|
print(f" 未找到JSON,原始内容: {content[:200]}...")
|
|||
|
|
return 'No JSON Found'
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" 通过Playwright+代理获取IP失败: {str(e)}")
|
|||
|
|
return f'Error: {str(e)}'
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def test_proxy_formats():
|
|||
|
|
"""测试不同的代理格式"""
|
|||
|
|
print("="*60)
|
|||
|
|
print("🔍 测试不同代理格式")
|
|||
|
|
print("="*60)
|
|||
|
|
|
|||
|
|
# 从代理配置中获取代理信息
|
|||
|
|
from damai_proxy_config import get_proxy_config
|
|||
|
|
|
|||
|
|
# 获取本机IP
|
|||
|
|
print("1️⃣ 获取本机IP...")
|
|||
|
|
local_ip = await get_my_ip_requests()
|
|||
|
|
print(f" 本机IP: {local_ip}")
|
|||
|
|
|
|||
|
|
for i in range(2):
|
|||
|
|
print(f"\n2️⃣ 测试代理 {i+1}...")
|
|||
|
|
proxy_config = get_proxy_config(i)
|
|||
|
|
|
|||
|
|
print(f" 代理信息: {proxy_config}")
|
|||
|
|
|
|||
|
|
# 格式1: http://username:password@host:port
|
|||
|
|
proxy_url_format1 = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['server'][7:]}"
|
|||
|
|
print(f" 格式1 (完整URL): {proxy_url_format1}")
|
|||
|
|
|
|||
|
|
# 测试格式1
|
|||
|
|
ip_with_proxy1 = await get_ip_with_playwright_proxy_correct(proxy_url_format1)
|
|||
|
|
print(f" 使用格式1的IP: {ip_with_proxy1}")
|
|||
|
|
|
|||
|
|
if ip_with_proxy1 != local_ip and ip_with_proxy1 not in ['JSON Parse Error', 'No JSON Found', f'Error:']:
|
|||
|
|
print(f" ✅ 格式1成功: IP已改变,代理生效")
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ 格式1失败: IP未改变或出错")
|
|||
|
|
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def test_direct_proxy_config():
|
|||
|
|
"""测试直接使用代理配置对象"""
|
|||
|
|
print("="*60)
|
|||
|
|
print("🔍 测试直接使用代理配置对象")
|
|||
|
|
print("="*60)
|
|||
|
|
|
|||
|
|
# 获取本机IP
|
|||
|
|
print("1️⃣ 获取本机IP...")
|
|||
|
|
local_ip = await get_my_ip_requests()
|
|||
|
|
print(f" 本机IP: {local_ip}")
|
|||
|
|
|
|||
|
|
from damai_proxy_config import get_proxy_config
|
|||
|
|
|
|||
|
|
for i in range(2):
|
|||
|
|
print(f"\n2️⃣ 测试代理 {i+1} (直接配置)...")
|
|||
|
|
proxy_config = get_proxy_config(i)
|
|||
|
|
|
|||
|
|
# 构建Playwright代理配置对象
|
|||
|
|
playwright_proxy_config = {
|
|||
|
|
"server": proxy_config['server'],
|
|||
|
|
"username": proxy_config['username'],
|
|||
|
|
"password": proxy_config['password']
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
print(f" Playwright代理配置: {playwright_proxy_config}")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.chromium.launch(headless=True, proxy=playwright_proxy_config)
|
|||
|
|
context = await browser.new_context()
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
# 访问IP检测网站
|
|||
|
|
await page.goto('http://httpbin.org/ip', wait_until='networkidle', timeout=15000)
|
|||
|
|
|
|||
|
|
# 获取页面内容
|
|||
|
|
content = await page.content()
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
# 解析IP
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
json_match = re.search(r'\{.*\}', content, re.DOTALL)
|
|||
|
|
if json_match:
|
|||
|
|
try:
|
|||
|
|
ip_data = json.loads(json_match.group())
|
|||
|
|
ip_address = ip_data.get('origin', 'Unknown')
|
|||
|
|
print(f" 代理{i+1} IP: {ip_address}")
|
|||
|
|
|
|||
|
|
if ip_address != local_ip:
|
|||
|
|
print(f" ✅ 代理{i+1}成功: IP已改变,代理生效")
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ 代理{i+1}失败: IP未改变")
|
|||
|
|
except:
|
|||
|
|
print(f" ❌ 代理{i+1} JSON解析失败: {content[:200]}...")
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ 代理{i+1} 未找到IP信息: {content[:200]}...")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ❌ 代理{i+1}连接失败: {str(e)}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def explain_proxy_formats():
|
|||
|
|
"""解释不同的代理格式"""
|
|||
|
|
print("="*60)
|
|||
|
|
print("📋 代理格式说明")
|
|||
|
|
print("="*60)
|
|||
|
|
|
|||
|
|
print("\n在Playwright中使用代理的两种方式:")
|
|||
|
|
print("\n1️⃣ 字典格式(推荐):")
|
|||
|
|
print(" proxy = {")
|
|||
|
|
print(" 'server': 'http://proxy-server:port',")
|
|||
|
|
print(" 'username': 'your_username',")
|
|||
|
|
print(" 'password': 'your_password'")
|
|||
|
|
print(" }")
|
|||
|
|
print(" browser = await playwright.chromium.launch(proxy=proxy)")
|
|||
|
|
|
|||
|
|
print("\n2️⃣ URL格式(包含认证信息):")
|
|||
|
|
print(" proxy_url = 'http://username:password@proxy-server:port'")
|
|||
|
|
print(" # 需要从中提取认证信息并构建字典格式")
|
|||
|
|
|
|||
|
|
print("\n⚠️ 注意:")
|
|||
|
|
print(" - 不能直接使用包含认证信息的URL字符串作为proxy.server")
|
|||
|
|
print(" - 必须将认证信息分离到单独的username和password字段")
|
|||
|
|
print(" - 代理服务器地址格式应为: http://host:port")
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def main():
|
|||
|
|
"""主函数"""
|
|||
|
|
explain_proxy_formats()
|
|||
|
|
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
|
|||
|
|
# 测试直接代理配置
|
|||
|
|
await test_direct_proxy_config()
|
|||
|
|
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
|
|||
|
|
# 测试不同格式
|
|||
|
|
await test_proxy_formats()
|
|||
|
|
|
|||
|
|
print(f"\n{'='*60}")
|
|||
|
|
print("✅ 验证完成!")
|
|||
|
|
print("="*60)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
import sys
|
|||
|
|
if sys.platform == 'win32':
|
|||
|
|
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
|||
|
|
|
|||
|
|
asyncio.run(main())
|