-sharp -*- coding: utf-8 -*-
import json
import scrapy
from scrapy import Request
from cosmetics.items import CosmeticsItem
class CosSpider(scrapy.Spider):
name = "cos"
-sharp allowed_domains = ["www.jd.com"]
-sharp start_urls = ["https://search.jd.com/Search?keyword=%E5%8F%A3%E7%BA%A2&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&stock=1&page=1&s=54&click=0"]
def start_requests(self):
star_url = "https://search.jd.com/"
yield Request(url=star_url, callback=self.parse, meta={"data": "0"})
def parse(self, response):
item = CosmeticsItem()
ul_list = response.css("-sharpJ_goodsList > ul > li")
page_next = response.css("-sharpJ_bottomPage > span.p-num > a.pn-next")
print("ul_list is :::::::", ul_list)
"""
for li in ul_list:
item = CosItemLoader(item=CosItem(), response=li)
with open("rule.json", "r") as f:
data = json.load(f)
for keys in data:
item.add_xpath(keys, data[keys])
-sharp item.add_xpath("img", ".//div[@class="p-img"]/a/img/@href")
-sharp item.add_xpath("price", ".//div[@class="p-price"]//i/text()")
-sharp item.add_xpath("name", ".//div[@class="p-name p-name-type-2"]//em/text()")
-sharp item.add_xpath("commit_counts", ".//div[@class="p-commit"]//a/text()")
-sharp item.add_xpath("shop", ".//div[@class="p-shop"]/span/a/text()")
yield item.load_item()
"""
for l in ul_list:
-sharpal = ul.extract()
print("ul is :::::", l.extract())
img = "https:" + l.xpath("//div[@class="p-img"]/a/img/@src").extract()[0]
price = l.xpath("//div[@class="p-price"]//i/text()").extract()
name = l.xpath("//div[@class="p-name p-name-type-2"]//em/text()").extract()[0].strip(" ")
commits = l.xpath("//div[@class="p-commit"]//a/text()").extract()[0]
shop = l.xpath("//div[@class="p-shop"]/span/a/text()").extract()[0]
for field in item.fields.keys():
item[field] = eval(field)
yield item
if len(page_next) > 0:
yield Request(url=response.url, callback=self.parse, dont_filter=True, meta={"data": "2"})
else:
print("")
first paste the code in spider, traverse the extracted li list, and then use xpath to retrieve data instead of traversing one by one, but directly take them all out at a time. What"s going on here?
Why not traverse one by one, but fetch it all at once? How to solve this situation? Ask the boss to solve it!