problem description
is going to crawl the penalty information of several listed companies on the Shenzhen Stock Exchange website in a multi-process way at the same time, but the code is only executed on one of the listed company names, which is very confusing.
I would like to ask if the following changes should be made, and what can be optimized in the code. Thank you very much!
the code is as follows:
related codes
-sharp
import requests,time,os,multiprocessing
from selenium import webdriver
def szse(name):
driver = webdriver.Edge()
driver.maximize_window()
driver.get("http://www.szse.cn/disclosure/listed/credit/record/index.html")
time.sleep(5)
nameElem=driver.find_element_by_id("1759_cxda_tab1_txtDsr")
buttonElem=driver.find_element_by_class_name("confirm-query btn-query-primary")
nameElem.clear()
nameElem.send_keys(name)
buttonElem.click()
time.sleep(20)
pdfElem = driver.find_elements_by_link_text("")-sharppdfElem
if pdfElem==[]:
driver.save_screenshot(".\\_\\%s.png" % (name + "_"))-sharp or get_screenshot_as_file
print(name + ":")
else:
driver.save_screenshot(".\\_\\%s.png" % (name + "_"))
print(name + "pdf")
a=1
for i in range(len(pdfElem))
partial_url=pdfElem[i].get_attribute("encode-open")
url="http://reportdocs.static.szse.cn/"+partial_url
res=requests.get(url)
res.raise_for_status()
time.sleep(10)
open(".\\_\\%s__%d.pdf"%(name,a),"wb").write(res.content)
print(name+""+str(a)+"")
a = a + 1
print(name+""+str(a-1)+"")
driver.quit()
-sharp
if __name__=="__main__":
print("")
nameSZ = input() -sharp:
st = time.time()
SZlist = nameSZ.split(" ")
os.mkdir(".\\_")
-sharp:
pool=multiprocessing.Pool()
for name in SZlist:
pool.apply_async(func=shse, args=(name,))
-sharp:pool.apply_async(func=shse, args=SZlist)
pool.close()
pool.join()
print(" %d %d "%((time.time()-st)//60,round((time.time()-st)%60)))