32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
import scrapy
|
||
|
||
class MimvpSpider(scrapy.spiders.Spider):
|
||
name = "mimvp"
|
||
allowed_domains = ["mimvp.com"]
|
||
start_urls = [
|
||
"http://proxy.mimvp.com/exist.php",
|
||
"https://proxy.mimvp.com/exist.php",
|
||
]
|
||
|
||
## 代理设置方式1:直接在代理里设置
|
||
def start_requests(self):
|
||
urls = [
|
||
"http://proxy.mimvp.com/exist.php",
|
||
"https://proxy.mimvp.com/exist.php",
|
||
]
|
||
for url in urls:
|
||
meta_proxy = ""
|
||
if url.startswith("http://"):
|
||
meta_proxy = "http://180.96.27.12:88" # http代理
|
||
elif url.startswith("https://"):
|
||
meta_proxy = "http://109.108.87.136:53281" # https代理
|
||
|
||
yield scrapy.Request(url=url, callback=self.parse, meta={'proxy': meta_proxy})
|
||
|
||
|
||
def parse(self, response):
|
||
mimvp_url = response.url # 爬取时请求的url
|
||
body = response.body # 返回网页内容
|
||
|
||
print("mimvp_url : " + str(mimvp_url))
|
||
print("body : " + str(body)) |