-sharp -*- coding: utf-8 -*-
import time
import json
import tempfile
from PIL import Image
from selenium import webdriver
-sharp keys
from selenium.webdriver.common.keys import Keys
class CreditSpider(object):
def __init__(self):
-sharp PhantomJS
self.driver = webdriver.PhantomJS()
-sharp self.driver.maximize_window() -sharp
-sharp PhantomJS
-sharp self.driver = webdriver.PhantomJS(executable_path="/Users/bianyachao/Downloads/phantomjs-2.1.1-macosx/bin/phantomjs")
self.driver.set_window_size(1366, 1098)
def get_credit(self):
url = "http://hd.chinatax.gov.cn/fagui/action/InitCredit.do"
-sharp gettime.sleep(2)
self.driver.get(url)
time.sleep(2)
-sharp
-sharp self.driver.save_screenshot("nsrxy.png")
self.on_click(1)
def on_click(self, n):
while n < 3:
if n == 1:
self.driver.find_element_by_xpath("""//a[@onclick="changeParam("articleField01","")"]""").click()
else:
self.driver.find_element_by_xpath("//*[@title=""]").click()
time.sleep(2)
self.driver.save_screenshot("yzm.png")
self.jietu_img(n)
self.click_yzm(n)
n += 1
return
def click_yzm(self, n):
print("the path is {}".format(n))
element = self.driver.find_element_by_id("verifyCode")
self.driver.find_element_by_id("verifyCode").clear()
-sharp im = Image.open("codeImage.png")
-sharp im.show()
yzm = input("please input code: ")
element.send_keys(yzm)
self.driver.find_element_by_xpath("""//*[@id="layui-layer1"]/div[3]/a[1]""").click()
time.sleep(4)
-sharp self.driver.save_screenshot("result{}.png".format(n))
self.parse_page(n)
return
def parse_page(self, n):
try:
res_list = []
data = self.driver.find_elements_by_xpath("//td[@class="sv_hei"]//tr/td")
if not data:
self.on_click(n)
for a in data:
d = a.text
res_list.append(d)
self.save_res(res_list)
except Exception as e:
print(e)
return
def save_res(self, res_list):
cont_list = []
del res_list[-1]
for i in range(0, 3):
del res_list[0]
for i in range(0, len(res_list), 3):
res_dic = {
"NSSBH": res_list[i],
"NSRMC": res_list[i+1],
"YEAR": res_list[i+2],
}
cont_list.append(res_dic)
print(cont_list)
-sharp json_str = json.dumps(cont_list)
-sharp with open("res.text", "a+") as f:
-sharp f.write(json_str + "\n\n")
return
def jietu_img(self, n):
-sharp
-sharp x
-sharp y
-sharp x
-sharp y
png = Image.open("yzm.png")
if n == 1:
bbox = (531, 510, 731, 560)
else:
bbox = (531, 512, 731, 562)
region = png.crop(bbox) -sharp region
time.sleep(1)
region.save("codeImage.png")
return
if __name__ == "__main__":
s = time.time()
a = CreditSpider()
a.get_credit()
-sharp a.jietu_img_2()
print(time.time()-s)
error report:
Traceback (most recent call last):
File "credit_spider.py", line 122, in <module>
a.get_credit()
File "credit_spider.py", line 31, in get_credit
self.on_click(1)
File "credit_spider.py", line 36, in on_click
self.driver.find_element_by_xpath("""//a[@onclick="changeParam("articleField01","")"]""").click()
File "/root/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/webelement.py", line 80, in click
self._execute(Command.CLICK_ELEMENT)
File "/root/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/webelement.py", line 501, in _execute
return self._parent.execute(command, params)
File "/root/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 308, in execute
self.error_handler.check_response(response)
File "/root/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotVisibleException: Message: {"errorMessage":"Element is not currently visible and may not be manipulated","request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Connection":"close","Content-Length":"81","Content-Type":"application/json;charset=UTF-8","Host":"127.0.0.1:48854","User-Agent":"Python http auth"},"httpVersion":"1.1","method":"POST","post":"{\"id\": \":wdc:1531296937655\", \"sessionId\": \"9443d970-84e2-11e8-9955-092f7358cfea\"}","url":"/click","urlParsed":{"anchor":"","query":"","file":"click","directory":"/","path":"/click","relative":"/click","port":"","host":"","password":"","user":"","userInfo":"","authority":"","protocol":"","source":"/click","queryKey":{},"chunks":["click"]},"urlOriginal":"/session/9443d970-84e2-11e8-9955-092f7358cfea/element/:wdc:1531296937655/click"}}
Screenshot: available via screen
what is the reason for this? Ask for God"s guidance, thank you!