1, requirements: expect to automatically open the page number is 1-10 in the page, the title contains a link to aaa;
2, currently open a page is OK, but I want to automatically execute multiple pages;
3, how can I repeatedly call nightmare to capture the link data from page 1 to page 10 (that is, call nightmare 10 times)? Both async and eventproxy have tried, but without success, ask for help!
var c = require("child_process");
var request = require("request");
var cheerio = require("cheerio");
var path = require("path");
var fs = require("fs");
var Nightmare = require("nightmare");
var nightmare = Nightmare({}); // show: true
var async = require("async");
var EventProxy = require("eventproxy");
var baseUrl = "http://www.abc.cn";
var requrl = "http://www.abc.cn/index.html?uid=123456&pageNum="; //1.URL
var startPage = 1;
var endPage = 10;
// how to fix the code ...
function getPageUrls(page) {
nightmare
//
.goto(requrl + page)
// .type("-sharpsearch_form_input_homepage", "github nightmare")
.inject("js", "./js/jquery.min.js")
//
.click(".default_pgRefresh")
//
.wait(6000)
//
.evaluate(function () {
return document.querySelector(".default_pgContainer").innerHTML;
// return document.querySelector("-sharpzero_click_wrapper .c-info__title a").href;
})
.end()
.then(function (result) { // :evaluate
acquireData(result); //
})
.catch(function (error) {
// console.error("Search failed:", error);
});
}
function acquireData(data) {
var $ = cheerio.load(data);
var tableData = $("li").toArray();
// console.log(tableData.length);
// console.log(tableData);
// li
let mainLength = tableData.length;
var gg = 0, gs = 0;
for (var i=0; i<mainLength; iPP) {
let tempData = tableData[i];
let title = tempData["children"][1].attribs.title;
let detailUrl = tempData["children"][1].attribs.href;
if (title.indexOf("aaa") > 0) {
c.exec("start " + baseUrl + detailUrl);
console.log(baseUrl + detailUrl);
}
}
}