之前处理超时异常时都在downloadmiddleware中处理,但是总感觉很费劲
今天查文档发现可在errback回调中处理
from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError, TCPTimedOutError
yield scrapy.Request(url=full_url, errback=self.error_httpbin, dont_filter=True, callback=self.parse_list, meta={"hd": header})
def error_httpbin(self, failure):
# failure.request 就是Request对象,如果需要重试,直接yield即可
# if failure.check(HttpError):
# these exceptions come from HttpError spider middleware
# you can get the non-200 response
# response = failure.value.response
# self.logger.error('HttpError on %s', response.url)
if failure.check(DNSLookupError):
print("DNSLookupError------->")
# this is the original request
request = failure.request
yield request
# self.logger.error('DNSLookupError on %s', request.url)
elif failure.check(TimeoutError, TCPTimedOutError):
print("timeout------->")
request = failure.request
yield request
# self.logger.error('TimeoutError on %s', request.url)
特此记录下,之前没有用这种方式处理超时异常