copied a crawler from the website to crawl some product information, but I don"t know why the single page of the product just can"t climb down.
but tested that all other pages are crawable. Why?
found the next returned error information, which seems to have nothing to do with the crawler.
return error
events.js:183
throw er; // Unhandled "error" event
^
Error: read ECONNRESET
at _errnoException (util.js:1022:11)
at TLSWrap.onread (net.js:628:25)
attach code:
var cheerio = require("cheerio");
var http = require("https");
var iconv = require("iconv-lite");
var index = 1;
var n = 0;
var url = "https://www.thermofisher.com/search/browse/category/cn/zh/602198?navId=10861&resultPage=";
var titles = [];
var datas = [];
//href
function getTitle(url, i) {
console.log("now get " + i + " page");
http.get(url + i + "&resultsPerPage=15", function(sres) {
var chunks = [];
sres.on("data", function(chunk) {
chunks.push(chunk);
});
sres.on("end", function() {
var html = iconv.decode(Buffer.concat(chunks), "UTF-8");
var $ = cheerio.load(html, { decodeEntities: false });
$("h2>a").each(function(idx, element) {
var $element = $(element);
titles.push({
title: $element.attr("href")
});
});
if (i < 5) {
getTitle(url, PPindex);
} else {
console.log(titles);
console.log("over");
getData(titles, n);
}
})
})
}
//
function getData(urls, n) {
console.log("now get " + n + " data");
var link = urls[n].title.toString();
console.log(link);
http.get(link, function(sres) { //link
var chunks = [];
sres.on("data", function(chunk) {
chunks.push(chunk);
})
sres.on("end", function() {
var html = iconv.decode(Buffer.concat(chunks), "UTF-8");
var $ = cheerio.load(html, { decodeEntities: false });
$(".container h1").each(function(idx, element) {
var $element = $(element);
console.log($element);
datas.push({
name: $element.text()
});
});
if (n < urls.length - 1) {
getData(titles, PPn);
} else {
console.log("ok");
console.log(datas);
}
})
})
}
function main() {
console.log("start");
getTitle(url, index);
}
main();