众(小众)所周知,excel只能存一百万条数据,csv文件只能显示一百万条数据。。。无可避免的需要使用数据库,而我所知的开源数据库中,postgresql有个很大的特点,就是对地理数据支持度较高。无可避免的又要用python去操作,那。。。
'''
pm2.5-数据库
'''
import psycopg2
conn=psycopg2.connect(database="postgres",user="postgres",password="1234",host="127.0.0.1",port="5432")
cur = conn.cursor()
cur.execute("CREATE TABLE mxndata1(data timestamp,point varchar,long double precision,lat double precision,pm25 double precision,
pm10 double precision,so2 double precision,no2 double precision,co double precision,
o3 double precision,qy double precision,wd double precision,xdsd double precision,fs double precision,fx double precision);")
conn.commit()
cur.close()
conn.close()
'''
postgres=# create table mxndata1
postgres-# (data timestamp,point varchar,long double precision,lat double precision,pm25 double precision,pm10 double precision,so2 double precision,no2 double precision,co double precision,o3 double precision,qy double precision,wd double precision,xdsd double precision,fs double precision,fx double precision);
'''
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
from io import StringIO
data=pd.read_csv(r'D:/minxinan/wrw/2018/2018.csv',header=None,encoding='gbk')
data1 = pd.DataFrame(data)
output = StringIO()
data1.to_csv(output, sep='t', index=False, header=False)
output1 = output.getvalue()
conn=psycopg2.connect(database="postgres",user="postgres",password="1234",host="127.0.0.1",port="5432")
cur = conn.cursor()
#cur.execute("CREATE TABLE mxndata1();")
cur.copy_from(StringIO(output1), 'mxndata1',columns=('data','point','long','lat','pm25','pm10','so2','no2','co','o3','qy','wd','xdsd','fs','fx'))
#cur.copy_expert("""COPY mxndata1 FROM 'D:/minxinan/wrw/2018/2018.csv' WITH (FORMAT CSV)""", StringIO(output1))
conn.commit()
cur.close()
conn.close()
print('done')
打开复制了10多秒,也算很快了