pandas简单应用---近邻分析

2021-07-15 18:00:58 浏览数 (1)

GIS中有个专业术语最邻近分析,属于空间统计或空间分析的范畴。Arcgis中有近邻分析,生成近邻表,计算紧邻点距离等相关功能。

举例如下,有下面两张模拟表:

小区表小区表
充电桩表充电桩表

需求是要计算出每个小区同城市的最近的充电桩。也就是对于表二每行小区,在表一对应相同城市的充电桩里找到最近的一行。

如下图,对于表二中的A城市的9000003小区,从表一同为A城市的充电桩中找到最近的800008充电桩。

python代码如下:

代码语言:python代码运行次数:0复制
# -*- coding:utf-8 -*-
from math import radians, cos, sin, asin, sqrt,pi
import pandas as pd
import itertools
import json
def geodistance(lng1,lat1,lng2,lat2):
    lng1, lat1, lng2, lat2 = map(radians, [float(lng1), float(lat1), float(lng2), float(lat2)])
    dlon=lng2-lng1
    dlat=lat2-lat1
    a=sin(dlat/2)**2   cos(lat1) * cos(lat2) * sin(dlon/2)**2
    distance=2*asin(sqrt(a))*6371*1000
    #单位转换为公里
    #distance=round(distance/1000,3)
    distance=int(distance)
    return distance

def getNear(point1,regionField,lngField,latField):   
    if point1[regionField] not in df_points2.index:
        result= point1.to_list() ['-'] ["-" for i in df_points2.columns] ['-']
        return result
    df0=df_points2.loc[point1[regionField]]
    if(isinstance(df0,pd.Series)):
        df0=pd.DataFrame([df0])
    df0=df0.apply(lambda x:x.to_list() 
                  [geodistance(x[lngField],x[latField],point1[lngField],point1[latField])],
    axis=1,result_type='expand') 
    distanceField=df0.columns[-1]
    df0.sort_values(distanceField,inplace=True)
    df0.reset_index(inplace=True)
    df0=df0.iloc[0]     
    return point1.to_list() df0.tolist()


def getDistance(point1,regionField,lngField,latField):
    #return '[[1,2,3,4,5,6,7,8,9,10,11]]'
    if point1[regionField] not in df_points2.index:
        result= point1.to_list() ['-'] ["-" for i in df_points2.columns] ['-']
        result=dict(zip([str(i) for i in range(len(result))],result))
        print(result)
        return json.dumps([result])
    df0=df_points2.loc[point1[regionField]]
    
    if(isinstance(df0,pd.Series)):
        df0=pd.DataFrame([df0])
    df0=df0.apply(lambda x:x.to_list() 
                  [geodistance(x[lngField],x[latField],point1[lngField],point1[latField])],
    axis=1,result_type='expand') 
    distanceField=df0.columns[-1]
    df0.sort_values(distanceField,inplace=True)
    df0.reset_index(inplace=True)
    
    result=df0.apply(lambda x:point1.to_list() x.to_list(),result_type='expand',axis=1)
    #print(type(result))
    return result.to_json(orient='records')
def getResult(xlsxPath,regionField,lngField,latField,outPath,mode='near'):
    global df_points1
    global df_points2
    #两个表经度,纬度字,地区字段名称相同
    #表2中每条记录计算出离表1相同地区的最近记录
    #df_points1为小区表(表一),df_points2为充电桩表(表二)
    df_points1=pd.read_excel(xlsxPath,1)
    df_points2=pd.read_excel(xlsxPath,0)

    header=['(表1)%s'%i for i in df_points1.columns] ['(表2)%s'%i for i in df_points2.columns] ['距离(米)']
    df_points1[regionField]=df_points1[regionField].apply(lambda x:str(x).strip())
    df_points2[regionField]=df_points2[regionField].apply(lambda x:str(x).strip())
    df_points2.set_index(regionField,inplace=True)
    if(mode=='near'):
        result=df_points1.apply(lambda x:getNear(x,regionField,lngField,latField),axis=1,result_type='expand')
        result.to_csv(outPath.replace('xlsx','csv'),header=header,index=False)
    if(mode=='distance'):
        
        result=df_points1.apply(lambda x:getDistance(x,regionField,lngField,latField),axis=1)
        #print(result)
        result=list(map(lambda x:json.loads(x),result))
        #print(result)
        result=list(itertools.chain(*result))
        print(result)
    result=pd.DataFrame(result)
    print(result)
    result.to_excel(outPath,header=header,index=False)
    result.to_csv(outPath.replace('xlsx','csv'),header=header,index=False)
if __name__=='__main__':
    pass
    
    
    

0 人点赞