Initial commit: 百家号文章采集系统
This commit is contained in:
32
scrapy_proxy.py
Normal file
32
scrapy_proxy.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import scrapy
|
||||
|
||||
class MimvpSpider(scrapy.spiders.Spider):
|
||||
name = "mimvp"
|
||||
allowed_domains = ["mimvp.com"]
|
||||
start_urls = [
|
||||
"http://proxy.mimvp.com/exist.php",
|
||||
"https://proxy.mimvp.com/exist.php",
|
||||
]
|
||||
|
||||
## <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>÷<EFBFBD>ʽ1<CABD><31>ֱ<EFBFBD><D6B1><EFBFBD>ڴ<EFBFBD><DAB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
def start_requests(self):
|
||||
urls = [
|
||||
"http://proxy.mimvp.com/exist.php",
|
||||
"https://proxy.mimvp.com/exist.php",
|
||||
]
|
||||
for url in urls:
|
||||
meta_proxy = ""
|
||||
if url.startswith("http://"):
|
||||
meta_proxy = "http://180.96.27.12:88" # http<74><70><EFBFBD><EFBFBD>
|
||||
elif url.startswith("https://"):
|
||||
meta_proxy = "http://109.108.87.136:53281" # https<70><73><EFBFBD><EFBFBD>
|
||||
|
||||
yield scrapy.Request(url=url, callback=self.parse, meta={'proxy': meta_proxy})
|
||||
|
||||
|
||||
def parse(self, response):
|
||||
mimvp_url = response.url # <20><>ȡʱ<C8A1><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>url
|
||||
body = response.body # <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҳ<EFBFBD><D2B3><EFBFBD><EFBFBD>
|
||||
|
||||
print("mimvp_url : " + str(mimvp_url))
|
||||
print("body : " + str(body))
|
||||
Reference in New Issue
Block a user