import time,random
from scrapy.downloadermiddlewares.retry import RetryMiddleware
class processallexceptionmiddleware(RetryMiddleware):
def process_response(self, request, response, spider):
-sharp 40x/50xresponse
if str(response.status).startswith("4") or str(response.status).startswith("5")or str(response.status).startswith("3"):
print("45 3")
return request
if "forbidden" in response.url:
return request
return response
def process_exception(self, request, exception, spider):
-sharp
if isinstance(exception, self.EXCEPTIONS_TO_RETRY):
print("")
-sharp
print("Got exception: %s" % (exception))
-sharp responsespider
time.sleep(random.randint(3, 5))
return request
custom_settings = {
"RANDOM_DELAY": 20,
"DOWNLOADER_MIDDLEWARES": {
"Espider.middlewares.proxymiddleware.proxy_middleware": 543,
"Espider.middlewares.processallexceptionmiddleware.processallexceptionmiddleware": 544
},
after I handled it like this, I didn"t call it and asked for advice. Thank you for using the log function to record the behavior of the crawler