multiple processes call Baidu Map api, to get longitude and latitude first, and then use longitude and latitude to obtain tolls and save them to excel (get one to save one). Because I want to speed up, I wrote multi-process, but after testing, multi-process is even slower than a single process. I would like to know why? Attached code (my ak is hidden):
< H2 > single process < / H2 >import requests
from openpyxl import load_workbook
import time
-sharp
def geocode(address):
base = url = "http://api.map.baidu.com/geocoder/v2/?address=" + address + "&output=json&ak="
response = requests.get(base)
answer = response.json()
return answer["result"]["location"]
-sharp
def get(origin_lat,origin_lng,destination_lat,destination_lng):
base = url = "http://api.map.baidu.com/direction/v2/driving?origin=" + str(origin_lng) + "," + str(origin_lat) + "&destination=" \
+ str(destination_lng)+","+str(destination_lat) + "&output=json&ak="
response = requests.get(base)
answer = response.json()
info = [answer["result"]["routes"][0]["duration"]/60,answer["result"]["routes"][0]["distance"]/1000,answer["result"]["routes"][0]["toll"]]
return info
if __name__=="__main__":
start = time.clock()
data = load_workbook(r"ODdata.xlsx")
table = data.get_sheet_by_name("locationcode")
nrows = table.max_row
ncols = table.max_column
origin_table = data.get_sheet_by_name("OD")
origin_nrows = origin_table.max_row
origin_ncols = origin_table.max_column
go_outset = []
go_destination = []
for r in range(2,nrows+1):
go_outset.append(table.cell(row=r,column=2).value) -sharp
for r in range(2,nrows+1):
go_destination.append(table.cell(row=r,column=5).value) -sharp
go_outset_count=1
go_destination_count=1
go_outset_locationcode = []
for i in go_outset:
try:
go_outset_locationcode.append(geocode(i)) -sharp
print("%d"%go_outset_count)
go_outset_count+=1
except:
go_outset_locationcode.append({"lat":"wrong","lng":"wrong"})
go_destination_locationcode = []
for i in go_destination:
try:
go_destination_locationcode.append(geocode(i))-sharp
print("%d" % go_destination_count)
go_destination_count+=1
except:
go_destination_locationcode.append({"lat":"wrong","lng":"wrong"})
go_outset_locationcodelist = []
go_destination_locationcodelist = []
for i in range(len(go_outset_locationcode)):
go_outset_locationcodelist.append(go_outset_locationcode[i].values())
for i in range(len(go_destination_locationcode)):
go_destination_locationcodelist.append(go_destination_locationcode[i].values())
-sharp excel
for i in range(2,nrows+1):
for j in range(3,5):
_ = table.cell(column=j, row=i, value=list(go_outset_locationcodelist[i-2])[j-3])
for i in range(2,nrows+1):
for j in range(6,8):
_ = table.cell(column=j, row=i, value=list(go_destination_locationcodelist[i-2])[j-6])
data.save(r"ODdata.xlsx")
-sharp
info = []
go_count=1
for i in range(0,len(go_outset)):
if list(go_outset_locationcodelist[i])[0]=="wrong":
continue
else:
try:
info.append(get(list(go_outset_locationcodelist[i])[0],list(go_outset_locationcodelist[i])[1],list(go_destination_locationcodelist[i])[0],list(go_destination_locationcodelist[i])[1]))
print("%d" % go_count)
go_count+=1
except:
info.append(["wrong","wrong","wrong"])
print("%d"%i)
finally:
for j in range(8,11):
_ = origin_table.cell(column=j, row=i+3, value=info[i][j - 8])
data.save(r"ODdata.xlsx")
elapsed = (time.clock() - start)
print("Time used:", elapsed)
< H2 > multiple processes < / H2 >
import requests
from openpyxl import load_workbook
import multiprocessing
from multiprocessing import Lock,Pool
import time
-sharp
def geocode(address):
base = url = "http://api.map.baidu.com/geocoder/v2/?address=" + address + "&output=json&ak="
response = requests.get(base)
answer = response.json()
return answer["result"]["location"]
-sharp excel
def save(info):
-sharp data_new = load_workbook(r"ODdata.xlsx")
-sharp origin_table_new = data_new["OD"]
for j in range(8, 11):
_ = origin_table.cell(column=j, row=i + 3, value=info[j-8])
data.save(r"ODdata.xlsx")
print("%d " % (i + 1))
-sharp
def getall(i,origin_lat,origin_lng,destination_lat,destination_lng):
try:
base = url = "http://api.map.baidu.com/direction/v2/driving?origin=" + str(origin_lng) + "," + str(
origin_lat) + "&destination=" \
+ str(destination_lng) + "," + str(
destination_lat) + "&output=json&ak="
response = requests.get(base)
answer = response.json()
info = [answer["result"]["routes"][0]["duration"] / 60, answer["result"]["routes"][0]["distance"] / 1000,
answer["result"]["routes"][0]["toll"]]
print("%d " % (i+1))
except:
info=["wrong", "wrong", "wrong"]
print("%d " % (i+1))
-sharp finally:
-sharp for j in range(8, 11):
-sharp _ = origin_table.cell(column=j, row=i + 3, value=info[j - 8])
-sharp data.save(r"ODdata.xlsx")
-sharp print("%d " % (i + 1))
return info
if __name__=="__main__":
start = time.clock()
data = load_workbook(r"ODdata.xlsx")
table = data["locationcode"]
nrows = table.max_row
ncols = table.max_column
origin_table = data["OD"]
origin_nrows = origin_table.max_row
origin_ncols = origin_table.max_column
go_outset = []
go_destination = []
for r in range(2,nrows+1):
go_outset.append(table.cell(row=r,column=2).value) -sharp
for r in range(2,nrows+1):
go_destination.append(table.cell(row=r,column=5).value) -sharp
go_outset_count=1
go_destination_count=1
go_outset_locationcode = []
for i in go_outset:
try:
go_outset_locationcode.append(geocode(i)) -sharp
print("%d"%go_outset_count)
go_outset_count+=1
except:
go_outset_locationcode.append({"lat":"wrong","lng":"wrong"})
go_destination_locationcode = []
for i in go_destination:
try:
go_destination_locationcode.append(geocode(i))-sharp
print("%d" % go_destination_count)
go_destination_count+=1
except:
go_destination_locationcode.append({"lat":"wrong","lng":"wrong"})
go_outset_locationcodelist = []
go_destination_locationcodelist = []
for i in range(len(go_outset_locationcode)):
go_outset_locationcodelist.append(go_outset_locationcode[i].values())
for i in range(len(go_destination_locationcode)):
go_destination_locationcodelist.append(go_destination_locationcode[i].values())
-sharp excel
for i in range(2,nrows+1):
for j in range(3,5):
_ = table.cell(column=j, row=i, value=list(go_outset_locationcodelist[i-2])[j-3])
for i in range(2,nrows+1):
for j in range(6,8):
_ = table.cell(column=j, row=i, value=list(go_destination_locationcodelist[i-2])[j-6])
data.save(r"ODdata.xlsx")
-sharp
for i in range(0,len(go_outset)):
if list(go_outset_locationcodelist[i])[0]=="wrong":
continue
else:
pool = multiprocessing.Pool(processes=5)
pool.apply_async(getall,(i,list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],list(go_destination_locationcodelist[i])[0], list(go_destination_locationcodelist[i])[1],),callback=save)
-sharp pool.apply(getall, (i, list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],
-sharp list(go_destination_locationcodelist[i])[0],
-sharp list(go_destination_locationcodelist[i])[1],))
-sharp p = multiprocessing.Process(target=save, args=(i,list(go_outset_locationcodelist[i])[0], list(go_outset_locationcodelist[i])[1],
-sharp list(go_destination_locationcodelist[i])[0], list(go_destination_locationcodelist[i])[1],lock))
-sharp p.start()
pool.close()
pool.join()
elapsed = (time.clock() - start)
print("Time used:", elapsed)