I wrote an ip proxy pool and wanted him to resend the request for ip every time the return code was not 200. But each time you only run to the third ip, it stops.
class DailishiyanDownloaderMiddleware (object):
def canshu(self):-sharp
db = pymysql.connect("localhost","root","Cliu123-sharp","chen1" )
cursor = db.cursor()
sql="select * from this_time_ip"
cursor.execute(sql)
ip=cursor.fetchall()
return ip
def order(self):-sharpSs
aa=self.canshu()
for i in aa:
yield i
@classmethod
def from_crawler(cls, crawler):
-sharp This method is used by Scrapy to create your spiders.
s = cls()
s.a=s.order()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
aa=self.a.__next__()
ua=random.choice(user_agent_list)
print("this time ua:",ua)
request.headers.setdefault("User-Agent",ua)
request.meta["proxy"]="http://"+aa[1]
print("ip:",str(aa[1]))
return None
def process_response(self, request, response, spider):
-sharp Called with the response returned from the downloader.
print(response.status)
a=response.status
if a !=200:
return request
else:
return response
def process_exception(self, request, exception, spider):
pass
def spider_opened(self, spider):
spider.logger.info("Spider opened: %s" % spider.name)
this is the download middleware I rewrote, and this is the result of the run.
I don"t understand why it stopped