获取impala下所有的数据库建表语句

2022-10-04 19:05:01 浏览数 (2)

大家好,又见面了,我是你们的朋友全栈君。

本博文介绍三种方法,推荐使用第三种,前两种都是尝试。

方法一:

现在的导出还是有缺陷的,导出的文件中还是存在其他不必要的信息

代码语言:javascript复制
#!/bin/bash
##获取数据库
databases=$(hive -e "show databases; exit;") 
for database in $databases;
do        
#获取hive建表语句        
tables=$(hive -e "use $database; show tables;")       
 for table in $tables;        
 do                             
 echo "--=========== db: $database , table: $table ===========" >> $database.sql
 echo "$(hive -e "use $database;show create table $table;");" >> $database.sql
 done
done

还没有找到其他方法。有其他解决方法,可以Mark一下我

方法二:

20191108今天有想出来一个方法,有点繁杂,但也是可以的,用impala-shell

1.先准备一个文件(tables_name.txt),我们会读这个文件

代码语言:javascript复制
[root@bigdata zw]# more tables_name.txt 
show create table cdata.c01_bill_distr_stat
show create table cdata.c01_bill_distr_stat_temp1
show create table cdata.c01_bill_pro_bal
show create table cdata.c01_bill_repay_stat
show create table cdata.c01_bill_repay_stat_temp1

2.一个小脚本

代码语言:javascript复制
#!/usr/bin/python
# -*- coding:utf-8 -*-

import time,sys
import os
reload(sys)
sys.setdefaultencoding("utf8")



file=open("tables_name.txt")
send_file = file.readlines()
for i in send_file:
   os_cmd1 = "impala-shell -q 'use cdata' "
   os_cmd2 = "impala-shell -q '"  i.strip('n')  "'"
   os.system(os_cmd2)
file.close()

都放在一个目录下,运行python脚本,这时候,日志会打印到屏幕上,需要获取屏幕上的日志内容即可。

我用的xshell工具

这个时候,所有的日志都会打印到文件中(bigdata_2019-11-08_17-20-11),可以找到自己想要的内容。

方法三:

代码语言:javascript复制
#!/usr/bin/env python
#-*- coding:utf8 -*-
# 从mysql中提取hive建表语句
import os,sys
import fileinput
import datetime
import mysql.connector
reload(sys)
sys.setdefaultencoding("utf8")
def hive_create_table():
conn = mysql.connector.connect(host="192.168.xxx.xxx",user='hive',passwd='123456',database='hive',charset='utf8')
mycursor = conn.cursor()
# 获取DB_ID
select_DB_ID = "select DB_ID from DBS;"
mycursor.execute(select_DB_ID)
result_DB_ID = mycursor.fetchall()
fo = open("create_tab.sql", "w")
for dir_DB_ID in result_DB_ID :
# 获取数据库名
DB_ID = str(dir_DB_ID)[1:].split(',')[0]
print(DB_ID)
select_DB_NAME = "select NAME from DBS where DB_ID=" DB_ID ";"
print(select_DB_NAME )
mycursor.execute(select_DB_NAME)
result_DB_NAME = mycursor.fetchone()
fo.write("n===========数据库:" str(result_DB_NAME).split(''')[1] "===========n")
DBname=str(result_DB_NAME).split(''')[1]
print '数据库名字:'   DBname
print(result_DB_NAME)
# 获取表名
select_table_name_sql = "select TBL_NAME from TBLS where DB_ID=" DB_ID ";"
mycursor.execute(select_table_name_sql)
result_table_names = mycursor.fetchall()
for table_name in result_table_names :
fo.write("nCREATE TABLE " DBname  '.`' str(table_name).split(''')[1] "`(n")
# 根据表名获取SD_ID
select_table_SD_ID = "select SD_ID from TBLS where tbl_name='" str(table_name).split(''')[1] "' and DB_ID=" DB_ID ";"
print(select_table_SD_ID)
mycursor.execute(select_table_SD_ID)
result_SD_ID = mycursor.fetchone()
print(result_SD_ID )
# 根据SD_ID获取CD_ID
SD_ID=str(result_SD_ID)[1:].split(',')[0]
select_table_CD_ID = "select CD_ID from SDS where SD_ID=" str(result_SD_ID)[1:].split(',')[0] ";"
print(select_table_CD_ID)
mycursor.execute(select_table_CD_ID)
result_CD_ID = mycursor.fetchone()
print(result_CD_ID)        
# 根据CD_ID获取表的列
CD_ID=str(result_CD_ID)[1:].split(',')[0]
select_table_COLUMN_NAME = "select COLUMN_NAME,TYPE_NAME,COMMENT from COLUMNS_V2 where CD_ID=" str(result_CD_ID)[1:].split(',')[0] " order by INTEGER_IDX;"
print(select_table_COLUMN_NAME)
mycursor.execute(select_table_COLUMN_NAME)
result_COLUMN_NAME = mycursor.fetchall()
print(result_COLUMN_NAME)        
index=0
for col,col_type,col_name in result_COLUMN_NAME:
print(col)
print(col_type)
print(col_name)
print(len(result_COLUMN_NAME) )
# 写入表的列和列的类型到文件
if col_name is None:
fo.write("  `" str(col) "`  " str(col_type))
else:
fo.write("  `" str(col) "`  " str(col_type)   " COMMENT '"   str(col_name)   "'")
if index < len(result_COLUMN_NAME)-1:
index = index   1
fo.write(",n")
elif index == len(result_COLUMN_NAME)-1:
fo.write("n)")
# 根据表名获取TBL_ID
select_table_SD_ID = "select TBL_ID from TBLS where tbl_name='" str(table_name).split(''')[1] "' and DB_ID=" DB_ID ";"
print(select_table_SD_ID)
mycursor.execute(select_table_SD_ID)
result_TBL_ID = mycursor.fetchone()
print(result_TBL_ID)
# 根据TBL_ID获取分区信息
select_table_PKEY_NAME_TYPE = "select PKEY_NAME,PKEY_TYPE,PKEY_COMMENT from PARTITION_KEYS where TBL_ID=" str(result_TBL_ID)[1:].split(',')[0] " order by INTEGER_IDX;"
print(select_table_PKEY_NAME_TYPE)
mycursor.execute(select_table_PKEY_NAME_TYPE)
result_PKEY_NAME_TYPE = mycursor.fetchall()
print(result_PKEY_NAME_TYPE)
if len(result_PKEY_NAME_TYPE) > 0:
fo.write("nPARTITIONED BY (n")
else :
fo.write("n")
i=0
for pkey_name,pkey_type,PKEY_COMMENT in result_PKEY_NAME_TYPE:
if str(PKEY_COMMENT) is None:
fo.write("  `" str(pkey_name) "`  " str(pkey_type))
else:
fo.write("  `" str(pkey_name) "`  " str(pkey_type)   " COMMENT '"   str(PKEY_COMMENT)   "'n")
if i < len(result_PKEY_NAME_TYPE)- 1:
i = i   1
fo.write(",")
elif i == len(result_PKEY_NAME_TYPE) - 1:
fo.write(")n")
# 根据表TBL_ID 获得中文名称
select_PARAM_VALUE01 = "select PARAM_VALUE from TABLE_PARAMS  WHERE TBL_ID=( select TBL_ID from TBLS where tbl_name='" str(table_name).split(''')[1] "' and DB_ID=" DB_ID ") and PARAM_KEY='comment';"
print(select_PARAM_VALUE01)
mycursor.execute(select_PARAM_VALUE01)
result_PARAM_VALUE01 = mycursor.fetchone()
print result_PARAM_VALUE01
if result_PARAM_VALUE01 is None:
print '未设置表名'
elif not result_PARAM_VALUE01[0]:
print '表名为空'
else:
fo.write("COMMENT '"   str(result_PARAM_VALUE01[0])  "' n" )
# 根据SD_ID和CD_ID获取SERDE_ID
select_SERDE_ID = "select SERDE_ID from SDS where SD_ID=" SD_ID " and CD_ID=" CD_ID ";"
print(select_SERDE_ID)
mycursor.execute(select_SERDE_ID)
result_SERDE_ID = mycursor.fetchone()
print(result_SERDE_ID)
# 根据SERDE_ID获取PARAM_VALUE(列分隔符)
select_PARAM_VALUE = "select PARAM_VALUE from SERDE_PARAMS where SERDE_ID=" str(result_SERDE_ID)[1:].split(",")[0] " and PARAM_KEY='field.delim';"
print(select_PARAM_VALUE)
mycursor.execute(select_PARAM_VALUE)
result_PARAM_VALUE = mycursor.fetchone()
print(result_PARAM_VALUE)
if result_PARAM_VALUE is not None:
fo.write("ROW FORMAT DELIMITEDn")
fo.write("FIELDS TERMINATED BY '" str(result_PARAM_VALUE).split(''')[1] "'n")
# 根据SERDE_ID获取PARAM_VALUE(行分隔符)
select_PARAM_HNAG = "select PARAM_VALUE from SERDE_PARAMS where SERDE_ID=" str(result_SERDE_ID)[1:].split(",")[0] " and PARAM_KEY='line.delim';"
print(select_PARAM_HNAG)
mycursor.execute(select_PARAM_HNAG)
RESULT_PARAM_HNAG = mycursor.fetchone()
print(RESULT_PARAM_HNAG)
if RESULT_PARAM_HNAG is not None:
fo.write("LINES TERMINATED BY '" str(RESULT_PARAM_HNAG).split(''')[1] "'n")
# 根据SD_ID和CD_ID获取输入输出格式
select_table_STORE_FORMAT = "select INPUT_FORMAT from SDS where SD_ID=" SD_ID " and CD_ID=" CD_ID ";"
print(select_table_STORE_FORMAT)
mycursor.execute(select_table_STORE_FORMAT)
result_table_STORE_FORMAT= mycursor.fetchall()
print(result_table_STORE_FORMAT)
for store_format in result_table_STORE_FORMAT:
if "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat" in str(store_format):
fo.write("STORED AS ORC;n")
elif "org.apache.hadoop.mapred.TextInputFormat" in str(store_format):
fo.write("STORED AS TEXTFILE;n")
elif "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat" in str(store_format):
fo.write("STORED AS PARQUET;n")
elif "org.apache.kudu.mapreduce.KuduTableInputFormat" in str(store_format):
fo.write("STORED AS KuduTable;n")
else :
fo.write("STORED AS null;n")
fo.close()
hive_create_table()

直接生成建表脚本的SQL文件。可以直接运行建表

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

发布者:全栈程序员栈长,转载请注明出处:https://javaforall.cn/196206.html原文链接:https://javaforall.cn

0 人点赞