import scrapy class MimvpSpider(scrapy.spiders.Spider): name = "mimvp" allowed_domains = ["mimvp.com"] start_urls = [ "http://proxy.mimvp.com/exist.php", "https://proxy.mimvp.com/exist.php", ] ## 代理设置方式1:直接在代理里设置 def start_requests(self): urls = [ "http://proxy.mimvp.com/exist.php", "https://proxy.mimvp.com/exist.php", ] for url in urls: meta_proxy = "" if url.startswith("http://"): meta_proxy = "http://180.96.27.12:88" # http代理 elif url.startswith("https://"): meta_proxy = "http://109.108.87.136:53281" # https代理 yield scrapy.Request(url=url, callback=self.parse, meta={'proxy': meta_proxy}) def parse(self, response): mimvp_url = response.url # 爬取时请求的url body = response.body # 返回网页内容 print("mimvp_url : " + str(mimvp_url)) print("body : " + str(body))