for link in link_list:
yield Request(url=link,callback=self.get_title(),dont_filter=True,meta={"title_list":title_list})
-sharp time
time_list = re.findall("},"published_at":"(.*?)",", response.text, re.S)
for i in range(0, len(time_list)):
print(time_list[i] + "\n")
self._requests_(title_list,link_list,time_list,"36kr")
def get_title(self,response):
title_list = response.meta["title_list"]
if "video" in response.url:
title = (re.findall("\"small_image","template_title":"(.*?)",", response.text, re.S)[0]).replace("_36", "")
title_list.append(title)
if "html" in response.url:
title = (etree.HTML(response.text).xpath("//head/title/text()")[0]).replace("_36", "")
title_list.append(title)
def _requests_(self, title_list, link_list, time_list, *args):
for num in range(0, len(time_list)):
data = self.http.set_post().http_send("/spider/news/save-source",
{"title": title_list[num], "publishTime": time_list[num],
"link": link_list[num], "source": args[0]})
time.sleep(2)
print(data)
I called back a function get_title in for link, but now the problem is how to call a _ _ request__ this special interface to pass three lists time_list, title_list,link_list