版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/u014365862/article/details/100982613
spark-env.sh环境配置:(添加hive支持)
代码语言:javascript复制export JAVA_HOME=/usr/lib/jdk1.8.0_171
export SPARK_HISTORY_OPTS="-Dspark.history.kerberos.enabled=false
-Dspark.history.kerberos.principal=
-Dspark.history.kerberos.keytab="
HADOOP_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop
export HADOOP_HOME=/***/emr-hadoop-2.7.2
export HADOOP_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop
export YARN_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop
export CLASSPATH=$CLASSPATH:/***/emr-apache-hive-2.3.2-bin/lib
export HIVE_CONF_DIR=/***/emr-apache-hive-2.3.2-bin/conf
export SPARK_CLASSPATH=$SPARK_CLASSPATH:/***/emr-apache-hive-2.3.2-bin/lib/mysql-connector-java-5.1.38.jar
scala代码:
代码语言:javascript复制import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.hive.HiveContext
object SparkConfTrait {
val conf = new SparkConf( ).setAppName( "TestSpark Pipeline" )
val sparkContext = new SparkContext( conf )
val hiveContext = new HiveContext(sparkContext)
val sqlContext = new SQLContext(sparkContext)
val spark = SparkSession.builder().enableHiveSupport.appName("TestSpark").getOrCreate()
}
object SparkSQL{
def sqlFromFile( dataSqlFile:String ): DataFrame = {
val sqlQuery = Source.fromFile( dataSqlFile ).mkString
val dataSqlFrame = SparkConfTrait.spark.sql( sqlQuery )
dataSqlFrame
}
// 测试
def main(args: Array[String]): Unit = {
// val sqlQuery = Source.fromFile("path/to/data.sql").mkString //read file
val trainDataSqlFrame = sqlFromFile( "path/to/data.sql" )
trainDataSqlFrame.show()
}
}
object HiveQL{
def sqlFromFile( dataSqlFile:String ): DataFrame = {
val sqlQuery = Source.fromFile( dataSqlFile ).mkString
val dataSqlFrame = SparkConfTrait.hiveContext.sql( sqlQuery )
dataSqlFrame
}
// 测试
def main(args: Array[String]): Unit = {
// val sqlQuery = Source.fromFile("path/to/data.sql").mkString //read file
val trainDataSqlFrame = sqlFromFile( "path/to/data.sql" )
trainDataSqlFrame.show()
}
}