from twisted.internet import defer
from twisted.internet.error import TimeoutError, DNSLookupError, \
ConnectionRefusedError, ConnectionDone, ConnectError, \
ConnectionLost, TCPTimedOutError
from twisted.web.client import ResponseFailed
from scrapy.core.downloader.handlers.http11 import TunnelError
import time
class processallexceptionmiddleware(object):
ALL_EXCEPTIONS = (defer.TimeoutError, TimeoutError, DNSLookupError,
ConnectionRefusedError, ConnectionDone, ConnectError,
ConnectionLost, TCPTimedOutError, ResponseFailed,
IOError, TunnelError)
def process_response(self, request, response, spider):
-sharp 40x/50xresponse
if str(response.status).startswith("4") or str(response.status).startswith("5")or str(response.status).startswith("3"):
print("45 3")
return request
if "forbidden" in response.url:
return request
return response
def process_exception(self, request, exception, spider):
-sharp
if isinstance(exception, self.ALL_EXCEPTIONS):
-sharp
print("Got exception: %s" % (exception))
-sharp responsespider
time.sleep(5)
return request
cu
stom_settings = {
"RANDOM_DELAY": 20,
"DOWNLOADER_MIDDLEWARES": {
"Espider.middlewares.proxymiddleware.proxy_middleware": 543,
"Espider.middlewares.processallexceptionmiddleware.processallexceptionmiddleware":544
}
excuse me, I wrote a fault-tolerant processallexceptionmiddleware in spider. Why didn"t I call this middleware?
the error is as follows
Traceback (most recent call last):
161697 File "/ home/shenjianlin/.local/lib/python3.4/site-packages/twisted/internet/defer.py", line 1384, in _ inlineCallbacks
161698 result = result.throwExceptionIntoGenerator (g)
161699 File "/ home/shenjianlin/.local/lib/python3.4/site-packages/twisted/python/failure.py", line 422, in throwExceptionIntoGenerator
161700 return g.throw (self.type, self.value, Self.tb)
161701 File "/ home/shenjianlin/.local/lib/python3.4/site-packages/scrapy/core/downloader/middleware.py", line 43, in process_request
161702 defer.returnValue ((yield download_func (request=request,spider=spider)
161703 File "/ home/shenjianlin/.local/lib/python3.4/site-packages/twisted/internet/defer.py", line 653, in _ runCallbacks
161704 current.result = callback (current.result, args,) * kw)
161705 File "/ home/shenjianlin/.local/lib/python3.4/site-packages/scrapy/core/downloader/handlers/http11.py", line 351, in _ cb_timeout
161706 raise TimeoutError ("Getting% s took longer than% s seconds."% (url, Timeout))
161707 twisted.internet.error.TimeoutError: User timeout caused connection failure: Getting https://www.lagou.com/gongsi/. took longer than 180.0 seconds..
161708
161709 During handling of the above exception, another exception occurred:
161710
161711 Traceback (most recent call last):
161712 File" / home/shenjianlin/.local/lib/python3.4/site-packages/twisted/internet/defer.py ", line 1386, In _ inlineCallbacks
161713 result = g.send (result)
161714 File "/ home/shenjianlin/.local/lib/python3.4/site-packages/scrapy/core/downloader/middleware.py", line 66, in process_exception
161715 spider=spider)
161716 File "/ home/shenjianlin/.local/lib/python3.4/site-packages/scrapy/downloadermiddlewares/retry.py", line 61, in process_exception
161717 return self._retry (request, exception, Spider)
161718 File "/ home/shenjianlin/.local/lib/python3.4/site-packages/scrapy/downloadermiddlewares/retry.py", line 71, in _ retry
161719 stats = spider.crawler.stats
161720 AttributeError: "lagou" object has no attribute" crawler"
there is a timeout error here, and the statement from my timeout is not printed