[WITH CommonTableExpression(,CommonTableExxpression)*]
SELECT [ALL|DISTINCT] select_expr,select_expr,...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[ORDER BY col_list]
[CLUSTER BY col_list | [DISTRIBUTE BY col_list][SORT BY col_list]]
[LIMIT[offset,]row];
SELECT country,cases,deaths FROM t_usa_covid12_p;
# 设置反引号不再解释为其他含义,被解释为正则表达式
SET hive.support.quoted.identifiers = none;
SELECT `^C.*` FROM t_usa_covid19_p;
SELECT current_database(); --省去from
SELECT COUNT(country) FROM t_usa_covid19_p;
SELECT state FROM t_usa_covid12_p;
相当于
SELECT ALL state FROM t_usa_covid12_p;
SELECT DISTINCT state FROM t_usa_covid12_p;
SELECT * FROM t_usa_covid19_p WHERE length(state) > 10;
SELECT *
FROM A
WHERE A.a IN (SELECT foo FROM B);
SELECT state,sum(deaths)
FROM count_date = "2021-01-28"
GROUP BY state
HAVING sum(deaths) > 10000;
# 从第3条数据开始的三条数据。(偏移量初始值为0)
SELECT * FROM t_usa_covid19_p
WHERE count_date = "2021-01-28"
AND state = "California"
LIMIT 2,3;
SELECT state,sum(deaths) AS cnts
FROM count_date = "2021-01-28"
GROUP BY state
HAVING cnts > 10000;
SELECT * FROM t_usa_covid19_p
WHERE count_date = "2021-01-28"
AND state = "California"
ORDER BY deaths DESC
LIMIT 3;
# 手动设置reducetask个数,否则将根据数据量自主推断。
SET mapreduce.job.reduces=2;
SELECT * FROM student CLUSTER BY num;
SELECT * FROM student DISTRIBUTE BY sex SORT BY age DESC;
select_statement UNION [ALL|DISTINCT] select_statement UNION ...
SELECT num,name FROM student_local
UNION ALL
SELECT num,name FROM student_hdfs;
SELECT num,name FROM (SELECT num,name FROM student_local LIMIT 2) subq1
UNION ALL
SELECT num,name FROM (SELECT num,name FROM student_hdfs LIMIT 3) subq2;
[WITH CommonTableExpression(,CommonTableExxpression)*]
WITH q1 AS (SELECT num,name,age FROM student WHERE num = 95002)
SELECT * FROM q1
WITH q1 AS (SELECT * FROM student WHERE num = 95002),
q2 AS (SELECT num,name,age FROM q1)
SELECT * FROM (SELECT num FROM q2) a;
WITH q1 AS (SELECT * FROM student WHERE num = 95002),
q2 AS (SELECT * FROM student WHERE num = 95004)
SELECT * FROM q1 UNION ALL SELECT * FROM q2;
WITH q1 AS (SELECT * FROM student WHERE num = 95002)
INSERT OVERWRITE TABLE table_name SELECT * FROM q1;
CREATE TABLE table_name AS
WITH q1 AS (SELECT * FROM student WHERE num = 95002)
SELECT * FROM q1;
table_reference [INNER] JOIN table_factor [join_condition]
| table_reference {LEFT|RIGHT|FULL}[OUTER]JOIN table_reference join_condition
| table_reference LEFT SEMI JOIN table_reference join_condition
| table_reference CROSS JOIN table_reference [join_condition]
join_condition: ON expression
SELECT e.id,e.name,e_a.city,e_a.street
FROM employee e INNER JOIN employee_adress e_a
ON e.id = e_a.id;
SELECT e.id,e.name,e_a.city,e_a.street
FROM employee e, employee_adress e_a
WHERE e.id = e_a.id;
SELECT e.id,e.name,e_a.city,e_a.street
FROM employee e LEFT JOIN employee_adress e_a
ON e.id = e_a.id;
还可以实现非等值连接:
SELECT e.id,e.name,e_a.city,e_a.street
FROM employee e LEFT JOIN employee_adress e_a
ON (e.id <> e_a.id);
SELECT e.id,e.name,e_a.city,e_a.street
FROM employee e FULL OUTER JOIN employee_adress e_a
ON e.id = e_a.id;
SELECT *
FROM employee e LEFT SEMI JOIN employee_adress e_a
ON e.id = e_a.id;
结果上相当于:
SELECT e.*
FROM employee e INNER JOIN employee_adress e_a
ON e.id = e_a.id;
SELECT e.*
FROM employee e CORSS JOIN employee_adress e_a
ON e.id = e_a.id;
# 连接中仅设计b的key1列,因此转换为1个MR作业
SELECT a.val,b.val,c.val
FROM a JOIN b
ON (a.key = b.key1) JOIN c ON (c.key=b.key1);
# 连接仅设计b的key1列和key2列,因此转换为2个MR作业
SELECT a.val,b.val,c.val
FROM a JOIN b
ON (a.key = b.key1) JOIN c ON (c.key=b.key2);
SELECT /*+STREAMTABLE(a) */a.val,b.val,c.val
FROM a JOIN b
ON (a.key = b.key1) JOIN c ON (c.key=b.key2);
SELECT /*+MAPJOIN(b) */a.val,b.val,c.val
FROM a JOIN b
ON (a.key = b.key1) JOIN c ON (c.key=b.key2);