四,函数
1,排序
order by(全局排序):不经常用 sort by distrbutre by :经常用
set mapreduce.job.reduce=3; select * from emp sort by empno desc; select sal,deptno from emp distribute by sal sort by sal;
cluster by:只能是升序排序,相当于(sort by distrbutre by ) select sal,deptno from emp cluster bY sal;
2.自带函数
select concat_ws('_','sdfsdf','sdfs','123'); select cast('000000000000123123123123' as bigint); select parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1','HOST'); select get_json_object('{"name":"jack","age":"20"}','$.name');
实现wordcount create table t_movie( name string, type string ) row format delimited fields terminated by ' ' lines terminated by 'n' location '/data/inner/ODS/01/t_movie';
load data inpath '/data/inner/RAW/01/t_movie/movie' into table t_movie;
select * from t_movie; select name,split(type,',') from t_movie; select explode(split(type,',')) from t_movie;
select name,tcol from t_movie LATERAL VIEW explode(split(type,',')) typetable AS tcol;
create table t_wordcount( wordline string ) row format delimited fields terminated by 'n' location '/data/inner/ODS/01/t_wordcount';
load data inpath '/data/inner/RAW/01/t_wordcount/harry' into table t_wordcount; select word,count(word) from t_wordcount lateral view explode(split(wordline,' ')) eswtable as word group by word; select word,count(word) from (select explode(split(wordline,' ')) word from t_wordcount) esw group by word;
3.自定义函数
1,继承类 2,重写方法(实现逻辑) 3,打包 4,上传,创建函数 <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.5</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.5</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>1.2.1</version> </dependency> </dependencies>
UDF: public class UDFHello extends UDF { public static String evaluate(String value) { .................. } }
hadoop fs -mkdir -p /bdp/hive/bin/lib hadoop fs -put ./demouf.jar /bdp/hive/bin/lib create function sxt_hello as 'com.vincent.UDFHello' using jar 'hdfs:////bdp/hive/bin/lib/demouf.jar';
UDAF: public static class SxtInnerClass implements UDAFEvaluator { @Override //获取Hive的集群信息 public void init() {} //输入数据端,进行map操作 public boolean iterate(String value) {} //数据输出端,进行reduce输出 public int terminatePartial() {} public String terminate() {} } }
UDTF: public class ExplodeMap extends GenericUDTF { @Override //数据结果:主要用来写逻辑操作 public void process(Object[] args){} @Override //数据输入端:主要用来检测数据是否符合标准 public StructObjectInspector initialize(ObjectInspector[] args){} @Override //关闭 public void close() {}
}