GIS中有个专业术语最邻近分析,属于空间统计或空间分析的范畴。Arcgis中有近邻分析,生成近邻表,计算紧邻点距离等相关功能。
举例如下,有下面两张模拟表:
需求是要计算出每个小区同城市的最近的充电桩。也就是对于表二每行小区,在表一对应相同城市的充电桩里找到最近的一行。
如下图,对于表二中的A城市的9000003小区,从表一同为A城市的充电桩中找到最近的800008充电桩。
python代码如下:
代码语言:python代码运行次数:0复制# -*- coding:utf-8 -*-
from math import radians, cos, sin, asin, sqrt,pi
import pandas as pd
import itertools
import json
def geodistance(lng1,lat1,lng2,lat2):
lng1, lat1, lng2, lat2 = map(radians, [float(lng1), float(lat1), float(lng2), float(lat2)])
dlon=lng2-lng1
dlat=lat2-lat1
a=sin(dlat/2)**2 cos(lat1) * cos(lat2) * sin(dlon/2)**2
distance=2*asin(sqrt(a))*6371*1000
#单位转换为公里
#distance=round(distance/1000,3)
distance=int(distance)
return distance
def getNear(point1,regionField,lngField,latField):
if point1[regionField] not in df_points2.index:
result= point1.to_list() ['-'] ["-" for i in df_points2.columns] ['-']
return result
df0=df_points2.loc[point1[regionField]]
if(isinstance(df0,pd.Series)):
df0=pd.DataFrame([df0])
df0=df0.apply(lambda x:x.to_list()
[geodistance(x[lngField],x[latField],point1[lngField],point1[latField])],
axis=1,result_type='expand')
distanceField=df0.columns[-1]
df0.sort_values(distanceField,inplace=True)
df0.reset_index(inplace=True)
df0=df0.iloc[0]
return point1.to_list() df0.tolist()
def getDistance(point1,regionField,lngField,latField):
#return '[[1,2,3,4,5,6,7,8,9,10,11]]'
if point1[regionField] not in df_points2.index:
result= point1.to_list() ['-'] ["-" for i in df_points2.columns] ['-']
result=dict(zip([str(i) for i in range(len(result))],result))
print(result)
return json.dumps([result])
df0=df_points2.loc[point1[regionField]]
if(isinstance(df0,pd.Series)):
df0=pd.DataFrame([df0])
df0=df0.apply(lambda x:x.to_list()
[geodistance(x[lngField],x[latField],point1[lngField],point1[latField])],
axis=1,result_type='expand')
distanceField=df0.columns[-1]
df0.sort_values(distanceField,inplace=True)
df0.reset_index(inplace=True)
result=df0.apply(lambda x:point1.to_list() x.to_list(),result_type='expand',axis=1)
#print(type(result))
return result.to_json(orient='records')
def getResult(xlsxPath,regionField,lngField,latField,outPath,mode='near'):
global df_points1
global df_points2
#两个表经度,纬度字,地区字段名称相同
#表2中每条记录计算出离表1相同地区的最近记录
#df_points1为小区表(表一),df_points2为充电桩表(表二)
df_points1=pd.read_excel(xlsxPath,1)
df_points2=pd.read_excel(xlsxPath,0)
header=['(表1)%s'%i for i in df_points1.columns] ['(表2)%s'%i for i in df_points2.columns] ['距离(米)']
df_points1[regionField]=df_points1[regionField].apply(lambda x:str(x).strip())
df_points2[regionField]=df_points2[regionField].apply(lambda x:str(x).strip())
df_points2.set_index(regionField,inplace=True)
if(mode=='near'):
result=df_points1.apply(lambda x:getNear(x,regionField,lngField,latField),axis=1,result_type='expand')
result.to_csv(outPath.replace('xlsx','csv'),header=header,index=False)
if(mode=='distance'):
result=df_points1.apply(lambda x:getDistance(x,regionField,lngField,latField),axis=1)
#print(result)
result=list(map(lambda x:json.loads(x),result))
#print(result)
result=list(itertools.chain(*result))
print(result)
result=pd.DataFrame(result)
print(result)
result.to_excel(outPath,header=header,index=False)
result.to_csv(outPath.replace('xlsx','csv'),header=header,index=False)
if __name__=='__main__':
pass