the problem with the old code is res.on ("end",.) Is asynchronous, and maybe titleHref is still empty. Now I have updated the code to solve this problem, but how to print the resulting ans array, print is Synchronize, always print out empty data.
New Code:
const cheerio = require("cheerio");
const http = require("http");
const iconv = require("iconv-lite");
let baseUrl = "http://www.ygdy8.net/html/gndy/dyzz/list_23_";
let Host = "http://www.ygdy8.net/";
const totalPage = 2; //
let ans = [];
//
function getTitleHref(url,page) {
let startUrl = url+page+".html";
http.get(startUrl,function(res) {
const { statusCode } = res;
let chunks = [];
res.on("data",function(chunk){
chunks.push(chunk);
});
res.on("end",function(){
let title = [];
let html = iconv.decode(Buffer.concat(chunks),"gb2312");
let $ = cheerio.load(html, {decodeEntities: false});
// console.log($);
$(".co_content8 .ulink").each(function(i,d) {
let $d = $(d);
let titleHref = [];
titleHref.push({
href: $d.attr("href")
});
getLink(titleHref)
});
// console.log(ans);
});
});
}
// /*
//
function getLink(titleHref) {
console.log("getLink");
console.log(titleHref);
if(titleHref) {
titleHref.forEach(function(v,k) {
console.log("~~~~~~~~~~~~~~~~~~~~");
let infoUrl = Host + v.href;
// console.log(infoUrl);
http.get(infoUrl,function(res) {
const { statusCode } = res;
const contentType = res.headers["content-type"];
let error;
if (statusCode !== 200) {
error = new Error("\n" +
`: ${statusCode}`);
}
if (error) {
console.error(error.message);
//
res.resume();
return;
}
console.log("getlink http");
let chunks = [];
res.on("data",function(chunk) {
chunks.push(chunk);
});
res.on("end", function(){
try {
let html = iconv.decode(Buffer.concat(chunks),"gb2312");
let $ = cheerio.load(html, {decodeEntities: false});
let bt = "";
bt = $("-sharpZoom td").children("a").attr("href");
// console.log(bt);
// console.log(typeof bt)
ans.push(bt);
}catch (e) {
console.error("bt",e.message);
}
})
}).on("error", (e) => {
console.error(`: ${e.message}`);
});
});
}
};
// */
for(let i = 1; i <= totalPage; iPP) {
getTitleHref(baseUrl,i);
};
-- split line-
const cheerio = require("cheerio");
const http = require("http");
const iconv = require("iconv-lite");
let baseUrl = "http://www.ygdy8.net/html/gndy/dyzz/list_23_";
let Host = "http://www.ygdy8.net/";
let titleHref = [];
const totalPage = 1; //
let res = [];
//
function getTitleHref(url,page) {
let startUrl = url+page+".html";
http.get(startUrl,function(res) {
let chunks = [];
res.on("data",function(chunk){
chunks.push(chunk);
});
res.on("end",function(){
let title = [];
let html = iconv.decode(Buffer.concat(chunks),"gb2312");
let $ = cheerio.load(html, {decodeEntities: false});
// console.log($);
$(".co_content8 .ulink").each(function(i,d) {
let $d = $(d);
titleHref.push({
href: $d.attr("href")
});
});
console.log(titleHref);
});
if(page <= totalPage) {
getTitleHref(url,PPpage);
}else {
console.log(page);
getLink(titleHref);
}
});
}
//
function getLink(titleHref) {
console.log("getLink");
titleHref.forEach(function(v,k) {
console.log("~~~~~~~~~~~~~~~~~~~~");
let infoUrl = Host + v.href;
console.log(infoUrl);
// try {
http.get(infoUrl,function(res) {
console.log("getlink http");
let chunks = [];
res.on("data",function(chunk) {
chunks.push(chunk);
});
res.on("end", function(){
let html = iconv.decode(Buffer.concat(chunks),"gb2312");
let $ = cheerio.load(html, {decodeEntities: false});
let reg = /.*/;
let info = "";
let bt = "";
let textInfo = $(".co_content8 -sharpZoom p").eq(0).text();
info = textInfo.match(reg)[0];
bt = $("-sharpZoom td").children("a").attr("href");
res.push({
Info:info,
Bt:bt
});
console.log(res);
})
//
//res.on("error",function(){
// console.log("error");
//})
})
// }catch(e) {
// console.log(e);
// }
});
};
getTitleHref(baseUrl,1)
if there is too much asynchronism in the above code, I don"t know what the problem is. Ask node to instruct the titleHref.forEach in the, getLink () function, and the node thread will collapse. Console.log ("enter getlink http") is also not printed