参考
from & join --确定数据源
where --行级过滤
group by --分组
having --组级过滤
窗口函数 --计算窗口函数结果
select --选择列
distinct --去重
order by --最终排序(可对窗口函数结果进行排序)
limit/offset --分页
--c1,分组内从起点到当前行的c累积
select
a,b,c,
sum(c) over(partition by a order by b) as c1
from bigdata_t1
--c2,同c1
select
a,b,c,
sum(c) over(partition by a order by b rows between unbounded preceding and current row) as c2
from bigdata_t1
--c3,分组内所有的c累积
select
a,b,c,
sum(c) over(partition by a) as c3
from bigdata_t1
--c4,分组内当前行+往前3行
select
a,b,c,
sum(c) over(partition by a order by b rows between 3 preceding and current row) as c4
from bigdata_t1
--c5,分组内当前行+往前3行+往后1行
select
a,b,c,
sum(c) over(partition by a order by b rows between 3 preceding and 1 following) as c5
from bigdata_t1
--c6,分组内当前行+往后所有行
select
a,b,c,
sum(c) over(partition by a order by b rows between current row and unbounded following) as c6
from bigdata_t1
select
a,b,c,
row_number() over(partition by a order by b desc) as rn1,--按顺序分配唯一行号,相同值也分配不同序号
rank() over(partition by a order by b desc) as rn2,--相同值排名相同,后续排名跳号
dense_rank() over(partition by a order by b desc) as rn3--相同值排名相同,后续排名不跳号
from
bigdata_t1
select
a,b,c,
ntile(2) over(partition by a order by b) as rn1,--分为2桶
ntile(3) over(partition by a order by b) as rn2,--分为3桶
ntile(4) over(order by b) as rn3--分为4桶
from
bigdate_t1
ntile可以看成是:
select
a,b,c,
lag(b,1,'1970-01-01 00:00:0') over(partition by a order by b) as last_1_b,
lag(b,2) over(partition by a order by b) as last_2_b
from
bigdata_t4
select
a,b,c
lead(b,1,'1970-01-01 00:00:00') over(partition by a order by b) as next_1_b,
lead(b,2) over(partition by a order by b) as next_2_b
from bigdata_t4
select
a,b,c,
first_val(c) over(partition by a order by b) as first_c
from
bigdata_t4
select
a,b,c,
last_value(c) over(partition by a order by b) as last_b
from
bigdata_t4
上述俩函数如果用desc倒序排序,则first_value取的是最后一个值,last_value取的是第一个值。
5. 序列分析函数,不支持window子句
select
a,b,c,
cume_dist() over(order by c) rn1,--所有小于等于当前行的c所占比例
cume_dist() over(partition by a order by c) as rn2--所有a分组中小于等于当前行c所占比例
from
bigdata_t3
select
a,b,c,
percent_rank() over(order by c) as rn1,
rank() over(order by c) as rn11,--分组内rank值
sum(1) over(partition by null) as rn12,--分组内总行数
percent_rank() over(partition by a order by c) as rn2
from
bigdata_t3
--rn1 = (rn11 - 1)/(rn12 - 1),因为未设置分组,故为总的百分比
--rn2:按a的分组将上述公式的rn11和rn12都替换为分组内
select
col1,col2,sum(col3)
from
bigdata_t3
group by grouping sets(
(col1,col2), --组合1
(col1), --组合2
(col2), --组合3
() --总计行
)
select
col1,col2,sum(col3)
from
bigdata_t3
group by cube(col1,col2)
--等价于
group by grouping sets(
(col1,col2), --组合1
(col1), --组合2
(col2), --组合3
() --总计行
)
select col1,col2,sum(col3)
from
bigdata_t3
group by rollup(col1,col2)
--等价于
group by grouping sets(
(col1,col2), --层次1
(col1), --层次2
() --总计行
)