import requests
from lxml import html
from requests.exceptions import RequestException
import time
import queue
import threading
class MyThread (threading.Thread):
def __init__(self, func):
threading.Thread.__init__(self)
self.func = func
def run(self):
self.func()
def worker ():
while not q.empty():
page = q.get() -sharp
print(" : " + str(page) + "url")
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}
main_page_url = "https://www.qiushibaike.com/hot/page/" + str(page)
url_list = []
try:
rep = requests.get(main_page_url, headers = headers)
time.sleep(1)
if rep.status_code == 200:
print("" +str(page) + "")
con = rep.content
sel = html.fromstring(con)
urls = sel.xpath("//a[@class="contentHerf"]/@href")
for url in urls:
message_url = "https://www.qiushibaike.com" + url
url_list.append(message_url)
print(url_list)
**-sharpreturn url_list**
except RequestException:
print("")
return None
time.sleep(1)
def main ():
threads = []
-sharpall_url = []
-sharpurl_list = worker()
for page in range(1, 7): -sharp6
q.put(page)
for i in range(threadNum): -sharp2
thread = MyThread(worker)
thread.start()
-sharpall_url.append(url_list)
threads.append(thread)
for thread in threads:
thread.join() -sharp22
if name ="_ _ main__":
q = queue.Queue()
threadNum = 2 -sharp
main()
in worker, each return only runs the first two threads and then ends. Normally, it runs two threads and then continues to run two threads. Or the way I return the data is wrong.