class jingzhun(RedisCrawlSpider):
name = "jingzhun"
allowed_domains = []
-sharp start_urls = ["https://rong.36kr.com/"]
custom_settings = {
"RANDOM_DELAY": 20
}
redis_key = "jingzhun:starturls"
def __init__(self, *args, **kwargs):
domain = kwargs.pop("domain", "")
self.allowed_domains = filter(None, domain.split(","))
super(jingzhun, self).__init__(*args, **kwargs)
self.cookie_str = "acw_tc=b65cfd2515395760831792797e7a30fed7278a95d7c68d0dcad0b9cbc4ac1b; kwlo_iv=1h; kr_stat_uuid=TRRfp25694452; Hm_lvt_e8ec47088ed7458ec32cde3617b23ee3=1541062621,1541150329,1541661241; Hm_lpvt_e8ec47088ed7458ec32cde3617b23ee3=1541667148; download_animation=1; _kr_p_se=9867c144-9614-4298-96f7-0e46ed5efefe; krid_user_id=2014445492; krid_user_version=2; kr_plus_id=2014445492; kr_plus_token=8dnyAhS2t87wW1PU1p91L_jUAHPFmepeJJ75____; kr_plus_utype=0; device-uid=5fa2cef0-e334-11e8-978f-67115035d613"
self.headers = {"Referer": "https://rong.36kr.com/list/detail&?sortField=HOT_SCORE",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
, "Host": "rong.36kr.com"}
self.co_headers = {"Referer": "https://rong.36kr.com/list/detail&?sortField=HOT_SCORE",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
, "Host": "rong.36kr.com", "cookie": self.cookie_str}
self.js_read = open("./js/jingzhun.js", "r").read()
def start_requests(self):
yield scrapy.Request(url="https://rong.36kr.com/n/api/column/0/company?sortField=HOT_SCORE&p=1",
callback=self.get_all_info, headers=self.headers, dont_filter=True,
cookies=get_cookies(self.cookie_str))
the redis_key here is read from the redis queue, but now this URL needs cookie and header to request to obtain data. Excuse me, how to write the format of this situation? Ask for advice, thank you