1,现有数据
现在有两个表scores和part_student。
代码语言:javascript复制hive (demo)> select * from scores;
OK
scores.student_id scores.scores
1 {"Chinese":91,"mathematics":100,"Physics":87,"Chemistry":97,"English":93}
2 {"Chinese":62,"mathematics":93,"Physics":77,"Chemistry":96,"English":97}
3 {"Chinese":43,"mathematics":73,"Physics":87,"Chemistry":66,"English":97}
4 {"Chinese":97,"mathematics":97,"Physics":63,"Chemistry":97,"English":96}
5 {"Chinese":96,"mathematics":99,"Physics":87,"Chemistry":99,"English":95}
6 {"Chinese":93,"mathematics":67,"Physics":89,"Chemistry":95,"English":84}
7 {"Chinese":96,"mathematics":53,"Physics":86,"Chemistry":80,"English":92}
8 {"Chinese":87,"mathematics":71,"Physics":84,"Chemistry":76,"English":93}
9 {"Chinese":77,"mathematics":73,"Physics":93,"Chemistry":91,"English":91}
10 {"Chinese":94,"mathematics":81,"Physics":81,"Chemistry":93,"English":90}
11 {"Chinese":91,"mathematics":90,"Physics":87,"Chemistry":97,"English":93}
12 {"Chinese":97,"mathematics":84,"Physics":87,"Chemistry":78,"English":90}
13 {"Chinese":92,"mathematics":83,"Physics":87,"Chemistry":96,"English":97}
14 {"Chinese":97,"mathematics":97,"Physics":63,"Chemistry":97,"English":96}
15 {"Chinese":96,"mathematics":99,"Physics":87,"Chemistry":99,"English":95}
16 {"Chinese":93,"mathematics":67,"Physics":89,"Chemistry":95,"English":94}
17 {"Chinese":96,"mathematics":83,"Physics":86,"Chemistry":90,"English":92}
18 {"Chinese":87,"mathematics":71,"Physics":84,"Chemistry":76,"English":93}
19 {"Chinese":87,"mathematics":73,"Physics":83,"Chemistry":91,"English":91}
20 {"Chinese":84,"mathematics":71,"Physics":81,"Chemistry":93,"English":90}
21 {"Chinese":91,"mathematics":100,"Physics":87,"Chemistry":97,"English":93}
22 {"Chinese":87,"mathematics":94,"Physics":87,"Chemistry":78,"English":90}
23 {"Chinese":92,"mathematics":93,"Physics":77,"Chemistry":86,"English":97}
24 {"Chinese":97,"mathematics":87,"Physics":83,"Chemistry":87,"English":86}
25 {"Chinese":96,"mathematics":99,"Physics":87,"Chemistry":99,"English":95}
26 {"Chinese":93,"mathematics":87,"Physics":89,"Chemistry":95,"English":84}
27 {"Chinese":86,"mathematics":53,"Physics":86,"Chemistry":90,"English":92}
28 {"Chinese":87,"mathematics":71,"Physics":84,"Chemistry":76,"English":93}
29 {"Chinese":87,"mathematics":73,"Physics":83,"Chemistry":91,"English":91}
30 {"Chinese":94,"mathematics":71,"Physics":81,"Chemistry":93,"English":90}
31 {"Chinese":98,"mathematics":83,"Physics":93,"Chemistry":91,"English":81}
32 {"Chinese":84,"mathematics":81,"Physics":91,"Chemistry":93,"English":90}
Time taken: 0.087 seconds, Fetched: 32 row(s)
代码语言:javascript复制hive (demo)> select * from part_student;
OK
part_student.id part_student.info part_student.year part_student.month part_student.day
1 {"name":"Jerry","age":"19"} 2022 2 1
2 {"name":"Tom","age":"19"} 2022 2 1
3 {"name":"Peter","age":"19"} 2022 2 1
4 {"name":"Jessca","age":"19"} 2022 2 1
5 {"name":"White","age":"18"} 2022 2 1
6 {"name":"Terry","age":"19"} 2022 9 1
7 {"name":"Zhi","age":"19"} 2022 9 1
8 {"name":"Cindy","age":"19"} 2022 9 1
9 {"name":"Smith","age":"19"} 2022 9 1
10 {"name":"Toy","age":"19"} 2022 9 1
11 {"name":"Kerry","age":"19"} 2023 2 1
12 {"name":"Linda","age":"19"} 2023 2 1
13 {"name":"Susan","age":"19"} 2023 2 1
14 {"name":"Yao","age":"19"} 2023 2 1
15 {"name":"Jack","age":"19"} 2023 2 1
16 {"name":"Mokey","age":"20"} 2023 9 1
17 {"name":"Mouse","age":"20"} 2023 9 1
18 {"name":"Cat","age":"20"} 2023 9 1
19 {"name":"Dog","age":"20"} 2023 9 1
20 {"name":"Snack","age":"20"} 2023 9 1
21 {"name":"Qian","age":"20"} 2024 2 1
22 {"name":"Yong","age":"20"} 2024 2 1
23 {"name":"Xiang","age":"20"} 2024 2 1
24 {"name":"Kun","age":"20"} 2024 2 1
25 {"name":"Hunter","age":"20"} 2024 2 1
26 {"name":"Qian","age":"20"} 2024 9 1
27 {"name":"Yong","age":"20"} 2024 9 1
28 {"name":"Xiang","age":"20"} 2024 9 1
29 {"name":"Kun","age":"20"} 2024 9 1
30 {"name":"Hunter","age":"20"} 2024 9 1
36 {"name":"Harry","age":"19"} 2025 2 1
37 {"name":"Jeckson","age":"19"} 2025 2 1
38 {"name":"JiM","age":"19"} 2025 2 1
39 {"name":"White","age":"18"} 2025 2 1
40 {"name":"BaiDEn","age":"18"} 2025 2 1
Time taken: 0.091 seconds, Fetched: 35row(s)
2,Hive的JOIN连接语法
Hive JOIN的语法如下:
INNER JOIN内连接:只有进行连接的两个表中都存在与连接条件相匹配的数据才会被保留下来。
代码语言:javascript复制hive> SELECT * FROM teacher t INNER JOIN
course c ON t.t_id = c.t_id; -- inner可省略
LEFT OUTER JOIN左外连接:左边所有数据会被返回,右边符合条件的被返回
代码语言:javascript复制hive> SELECT * FROM teacher t LEFT JOIN
course c ON t.t_id = c.t_id; -- outer可省略
RIGHT OUTER JOIN右外连接:右边所有数据会被返回,左边符合条件的被返回
代码语言:javascript复制hive> SELECT * FROM teacher t RIGHT JOIN
course c ON t.t_id = c.t_id;
FULL OUTER JOIN满外(全外)连接: 将会返回所有表中符合条件的所有记录。如果任一表的指定字段没有符合条件的值的话,那么就使用NULL值替代。
代码语言:javascript复制SELECT * FROM teacher t FULL JOIN course c
ON t.t_id = c.t_id ;
注意:
- Hive2版本已经支持不等值连接,就是 JOIN ON条件后面可以使用大于小于符号了;并且也支持 JOIN ON 条件后跟or (早前版本 ON 后只支持 = 和 and,不支持 > < 和OR)
- 如hive执行引擎使用MapReduce,一个JOIN就会启动一个job,一条SQL语句中如有多个JOIN,则会启动多个job。
另外
表之间用逗号(,)连接和 INNER JOIN 是一样的
代码语言:javascript复制hive> SELECT * FROM table_a,table_b
WHERE table_a.id=table_b.id;
等同于
代码语言:javascript复制hive> SELECT * FROM table_a INNER JOIN table_b
ON table_a.id=table_b.id;
它们的执行效率没有区别,只是书写方式不同,用逗号是SQL 89标准,JOIN 是SQL 92标准。用逗号连接后面过滤条件用 WHERE ,用 JOIN 连接后面过滤条件是ON。
3,实例
1)[INNER] JOIN…ON
代码语言:javascript复制hive> SELECT sc.student_id,
get_json_object(st.info, '$.name') AS name,
get_json_object(st.info, '$.age') AS age,
get_json_object(sc.scores, '$.Chinese') AS Chinese,
get_json_object(sc.scores, '$.mathematics') AS mathematics,
get_json_object(sc.scores, '$.Physics') AS Physics,
get_json_object(sc.scores, '$.Chemistry') AS Chemistry,
get_json_object(sc.scores, '$.English') AS English
FROM scores sc JOIN part_student st ON
sc.student_id=st.id;
OK
sc.student_id name age chinese mathematics physics chemistry english
1 Jerry 19 91 100 87 97 93
2 Tom 19 62 93 77 96 97
3 Peter 19 43 73 87 66 97
4 Jessca 19 97 97 63 97 96
5 White 18 96 99 87 99 95
6 Terry 19 93 67 89 95 84
7 Zhi 19 96 53 86 80 92
8 Cindy 19 87 71 84 76 93
9 Smith 19 77 73 93 91 91
10 Toy 19 94 81 81 93 90
11 Kerry 19 91 90 87 97 93
12 Linda 19 97 84 87 78 90
13 Susan 19 92 83 87 96 97
14 Yao 19 97 97 63 97 96
15 Jack 19 96 99 87 99 95
16 Mokey 20 93 67 89 95 94
17 Mouse 20 96 83 86 90 92
18 Cat 20 87 71 84 76 93
19 Dog 20 87 73 83 91 91
20 Snack 20 84 71 81 93 90
21 Qian 20 91 100 87 97 93
22 Yong 20 87 94 87 78 90
23 Xiang 20 92 93 77 86 97
24 Kun 20 97 87 83 87 86
25 Hunter 20 96 99 87 99 95
26 Qian 20 93 87 89 95 84
27 Yong 20 86 53 86 90 92
28 Xiang 20 87 71 84 76 93
29 Kun 20 87 73 83 91 91
30 Hunter 20 94 71 81 93 90
Time taken: 9.961 seconds, Fetched: 30
row(s)
2) LEFT [OUTER] JOIN…ON
代码语言:javascript复制hive> SELECT sc.student_id,
get_json_object(st.info, '$.name') AS name,
get_json_object(st.info, '$.age') AS age,
get_json_object(sc.scores, '$.Chinese') AS Chinese,
get_json_object(sc.scores, '$.mathematics') AS mathematics,
get_json_object(sc.scores, '$.Physics') AS Physics,
get_json_object(sc.scores, '$.Chemistry') AS Chemistry,
get_json_object(sc.scores, '$.English') AS English
FROM scores sc LEFT JOIN part_student st ON
sc.student_id=st.id;
sc.student_id name age chinese mathematics physics chemistry english
1 Jerry 19 91 100 87 97 93
2 Tom 19 62 93 77 96 97
3 Peter 19 43 73 87 66 97
4 Jessca 19 97 97 63 97 96
5 White 18 96 99 87 99 95
6 Terry 19 93 67 89 95 84
7 Zhi 19 96 53 86 80 92
8 Cindy 19 87 71 84 76 93
9 Smith 19 77 73 93 91 91
10 Toy 19 94 81 81 93 90
11 Kerry 19 91 90 87 97 93
12 Linda 19 97 84 87 78 90
13 Susan 19 92 83 87 96 97
14 Yao 19 97 97 63 97 96
15 Jack 19 96 99 87 99 95
16 Mokey 20 93 67 89 95 94
17 Mouse 20 96 83 86 90 92
18 Cat 20 87 71 84 76 93
19 Dog 20 87 73 83 91 91
20 Snack 20 84 71 81 93 90
21 Qian 20 91 100 87 97 93
22 Yong 20 87 94 87 78 90
23 Xiang 20 92 93 77 86 97
24 Kun 20 97 87 83 87 86
25 Hunter 20 96 99 87 99 95
26 Qian 20 93 87 89 95 84
27 Yong 20 86 53 86 90 92
28 Xiang 20 87 71 84 76 93
29 Kun 20 87 73 83 91 91
30 Hunter 20 94 71 81 93 90
31 NULL NULL 98 83 93 91 81
32 NULL NULL 84 81 91 93 90
显示左表scores sc所有记录,右表part_student st没有的记录以NULL表示
3)RIGHT [OUTER] JOIN…ON
代码语言:javascript复制hive> SELECT sc.student_id,
get_json_object(st.info, '$.name') AS name,
get_json_object(st.info, '$.age') AS age,
get_json_object(sc.scores, '$.Chinese') AS Chinese,
get_json_object(sc.scores, '$.mathematics') AS mathematics,
get_json_object(sc.scores, '$.Physics') AS Physics,
get_json_object(sc.scores, '$.Chemistry') AS Chemistry,
get_json_object(sc.scores, '$.English') AS English
FROM scores sc RIGHT JOIN part_student st
ON sc.student_id=st.id;
.student_id name age chinese mathematics physics chemistry english
1 Jerry 19 91 100 87 97 93
2 Tom 19 62 93 77 96 97
3 Peter 19 43 73 87 66 97
4 Jessca 19 97 97 63 97 96
5 White 18 96 99 87 99 95
6 Terry 19 93 67 89 95 84
7 Zhi 19 96 53 86 80 92
8 Cindy 19 87 71 84 76 93
9 Smith 19 77 73 93 91 91
10 Toy 19 94 81 81 93 90
11 Kerry 19 91 90 87 97 93
12 Linda 19 97 84 87 78 90
13 Susan 19 92 83 87 96 97
14 Yao 19 97 97 63 97 96
15 Jack 19 96 99 87 99 95
16 Mokey 20 93 67 89 95 94
17 Mouse 20 96 83 86 90 92
18 Cat 20 87 71 84 76 93
19 Dog 20 87 73 83 91 91
20 Snack 20 84 71 81 93 90
21 Qian 20 91 100 87 97 93
22 Yong 20 87 94 87 78 90
23 Xiang 20 92 93 77 86 97
24 Kun 20 97 87 83 87 86
25 Hunter 20 96 99 87 99 95
26 Qian 20 93 87 89 95 84
27 Yong 20 86 53 86 90 92
28 Xiang 20 87 71 84 76 93
29 Kun 20 87 73 83 91 91
30 Hunter 20 94 71 81 93 90
NULL Harry 19 NULL NULL NULL NULL NULL
NULL Jeckson 19 NULL NULL NULL NULL NULL
NULL JiM 19 NULL NULL NULL NULL NULL
NULL White 18 NULL NULL NULL NULL NULL
NULL BaiDEn 18 NULL NULL NULL NULL NULL
Time taken: 8.651 seconds, Fetched: 35
row(s)
显示右表part_student st所有记录,左表scores sc没有的记录以NULL表示。
4)FULL [OUTER] JOIN…ON
代码语言:javascript复制hive> SELECT sc.student_id,
get_json_object(st.info, '$.name') AS name,
get_json_object(st.info, '$.age') AS age,
get_json_object(sc.scores, '$.Chinese') AS Chinese,
get_json_object(sc.scores, '$.mathematics') AS mathematics,
get_json_object(sc.scores, '$.Physics') AS Physics,
get_json_object(sc.scores, '$.Chemistry') AS Chemistry,
get_json_object(sc.scores, '$.English') AS English
FROM scores sc FULL JOIN part_student st ON
sc.student_id=st.id; sc.student_id name age chinese mathematics physics chemistry english
1 Jerry 19 91 100 87 97 93
2 Tom 19 62 93 77 96 97
3 Peter 19 43 73 87 66 97
4 Jessca 19 97 97 63 97 96
5 White 18 96 99 87 99 95
6 Terry 19 93 67 89 95 84
7 Zhi 19 96 53 86 80 92
8 Cindy 19 87 71 84 76 93
9 Smith 19 77 73 93 91 91
10 Toy 19 94 81 81 93 90
11 Kerry 19 91 90 87 97 93
12 Linda 19 97 84 87 78 90
13 Susan 19 92 83 87 96 97
14 Yao 19 97 97 63 97 96
15 Jack 19 96 99 87 99 95
16 Mokey 20 93 67 89 95 94
17 Mouse 20 96 83 86 90 92
18 Cat 20 87 71 84 76 93
19 Dog 20 87 73 83 91 91
20 Snack 20 84 71 81 93 90
21 Qian 20 91 100 87 97 93
22 Yong 20 87 94 87 78 90
23 Xiang 20 92 93 77 86 97
24 Kun 20 97 87 83 87 86
25 Hunter 20 96 99 87 99 95
26 Qian 20 93 87 89 95 84
27 Yong 20 86 53 86 90 92
28 Xiang 20 87 71 84 76 93
29 Kun 20 87 73 83 91 91
30 Hunter 20 94 71 81 93 90
31 NULL NULL 98 83 93 91 81
32 NULL NULL 84 81 91 93 90
NULL Harry 19 NULL NULL NULL NULL NULL
NULL Jeckson 19 NULL NULL NULL NULL NULL
NULL JiM 19 NULL NULL NULL NULL NULL
NULL White 18 NULL NULL NULL NULL NULL
NULL BaiDEn 18 NULL NULL NULL NULL NULL
Time taken: 1.374 seconds, Fetched: 37
row(s)
显示左右表所有记录,对应不能存在的记录以NULL表示。