problem description
pyspider is the latest version,
the environmental background of the problems and what methods you have tried
reinstall the version of python3.6 used by the latest spider, on GitHub
related codes
/ / Please paste the code text below (do not replace the code with pictures)
-sharp!/usr/bin/env python
-sharp -*- encoding: utf-8 -*-
-sharp Created on 2018-07-13 15:59:53
-sharp Project: TripAdvisor
from pyspider.libs.base_handler import *
import pymongo
class Handler(BaseHandler):
crawl_config = {
}
client=pymongo.MongoClient("localhost")
db=client["trip"]
@every(minutes=24 * 60)
def on_start(self):
self.crawl("https://www.tripadvisor.cn/Attractions-g186338-Activities-c47-t163-London_England.html", callback=self.index_page,validate_cert=False)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for each in response.doc("-sharpATTR_ENTRY_ > div.attraction_clarity_cell > div > div > div.listing_info > div.listing_title > a").items():
self.crawl(each.attr.href, callback=self.detail_page)
@config(priority=2)
def detail_page(self, response):
url=response.url
name=response.doc(".heading_title").text()
rating=response.doc("div > .more").text()
address=response.doc(".location > .address").text()
phone=response.doc(".phone > div").text()
duration=response.doc(".hours > .duration").text()
return {
"name":name,
"rating":rating,
"address":address,
"phone":phone,
"duration":duration,
"url": url
}
def on_result(self,result):
if result:
self.save_to_mongo(result)
def save_to_mongo(self,result):
if self.db["london"].insert(result):
print("saved to mongo",result)