Crawler: expect to automatically open multiple pages with aaa in the title of the page link, but want to repeat the call to nightmare is always unsuccessful, ask for help!

1, requirements: expect to automatically open the page number is 1-10 in the page, the title contains a link to aaa;
2, currently open a page is OK, but I want to automatically execute multiple pages;
3, how can I repeatedly call nightmare to capture the link data from page 1 to page 10 (that is, call nightmare 10 times)? Both async and eventproxy have tried, but without success, ask for help!

var c = require("child_process");
var request = require("request");
var cheerio = require("cheerio");
var path = require("path");
var fs = require("fs");
var Nightmare = require("nightmare");       
var nightmare = Nightmare({}); //  show: true
var async = require("async");
var EventProxy = require("eventproxy");
var baseUrl = "http://www.abc.cn";
var requrl = "http://www.abc.cn/index.html?uid=123456&pageNum=";   //1.URL

var startPage = 1;
var endPage = 10;

// how to fix the code ...

function getPageUrls(page) {
  nightmare
  // 
  .goto(requrl + page)
  //  .type("-sharpsearch_form_input_homepage", "github nightmare")
  .inject("js", "./js/jquery.min.js")
  // 
  .click(".default_pgRefresh")
  // 
  .wait(6000)
  // 
  .evaluate(function () {
    return document.querySelector(".default_pgContainer").innerHTML;
    // return document.querySelector("-sharpzero_click_wrapper .c-info__title a").href;
  })
  .end()
  .then(function (result) { // :evaluate
    acquireData(result);  // 
  })
  .catch(function (error) {
    // console.error("Search failed:", error);
  });
}

function acquireData(data) {
    var $ = cheerio.load(data); 
    var tableData = $("li").toArray();
    // console.log(tableData.length);
    // console.log(tableData);

    // li
    let mainLength = tableData.length;
    var gg = 0, gs = 0;

    for (var i=0; i<mainLength; iPP) {
          let tempData = tableData[i];

          let title = tempData["children"][1].attribs.title;
          let detailUrl = tempData["children"][1].attribs.href;

          if (title.indexOf("aaa") > 0) {
              c.exec("start " + baseUrl + detailUrl);
              console.log(baseUrl + detailUrl);
          }
     }  

}



Mar.06,2021

you first get the links of 10 pages, and you can climb one by one


solved the problem that nightmare uses in the loop. The code is as follows:

async function main() {
  var urls = [
    'http://www.abc.cn/index.html?uid=123456&pageNum=60',
    'http://www.abc.cn/index.html?uid=123456&pageNum=61',
    'http://www.abc.cn/index.html?uid=123456&pageNum=62'
  ]

  var nightmare = Nightmare({ show: true })

  for (let j = 0; j < urls.length; jPP) {
    const url = urls[j]
    const title = await nightmare
    .goto(url)
    //  .type('-sharpsearch_form_input_homepage', 'github nightmare')
    .inject('js', './js/jquery.min.js')
    // 
    .click('.default_pgRefresh')
    // 
    .wait(5000)
    // 
    .evaluate(function () {
      return document.querySelector('.default_pgContainer').innerHTML;
      // return document.querySelector('-sharpzero_click_wrapper .c-info__title a').href;
    })
    // .end() 
    .then(function (result) { // :evaluate
      // console.log(result);
      acquireData(result);
      // console.log('aaa' + j);
    })
    .catch(function (error) {
      // console.error('Search failed:', error);
    });

    // console.log(title)
  }

  await nightmare.end()
}

main().catch(console.error)
MySQL Query : SELECT * FROM `codeshelper`.`v9_news` WHERE status=99 AND catid='6' ORDER BY rand() LIMIT 5
MySQL Error : Disk full (/tmp/#sql-temptable-64f5-1eaf17c-47ffe.MAI); waiting for someone to free some space... (errno: 28 "No space left on device")
MySQL Errno : 1021
Message : Disk full (/tmp/#sql-temptable-64f5-1eaf17c-47ffe.MAI); waiting for someone to free some space... (errno: 28 "No space left on device")
Need Help?