窗口函数
CREATE TABLE students ( id INT PRIMARY KEY auto_increment, NAME VARCHAR ( 30 ), math INT, english INT, num INT );
INSERT INTO students ( NAME, math, english, num )
VALUES
( "张三", 99, 88, 4 ),
( "李四", 77, 88, 5 ),
( "王五", 96, 74, 3 ),
( "赵六", 76, 89, 1 ),
( "天好", 85, 81, 2 );
SELECT
id,
math,
lag( math ) over ( ORDER BY math ) AS lag1,
lag( math, 1 ) over ( ORDER BY math ) AS lag2,
lag( math, 1, NULL ) over ( ORDER BY math ) AS lag3,
lead( math ) over ( ORDER BY math ) AS lead1,
class,
FIRST_VALUE( math ) over ( PARTITION BY class ORDER BY id ASC rows BETWEEN unbounded preceding AND unbounded following ) AS FIRST,
LAST_VALUE( math ) over ( PARTITION BY class ORDER BY id ASC rows BETWEEN unbounded preceding AND unbounded following ) AS last,
sum( math ) over ( ORDER BY math ) AS sum1,
sum( math ) over ( ORDER BY math rows BETWEEN unbounded preceding AND current ROW ) AS sum2,
sum( math ) over ( PARTITION BY class ) AS sum3,
sum( math ) over ( PARTITION BY class rows BETWEEN unbounded preceding AND unbounded following ) AS sum4,
sum( math ) over ( PARTITION BY class ORDER BY math ) AS sum5,
sum( math ) over ( PARTITION BY class ORDER BY math rows BETWEEN unbounded preceding AND current ROW ) AS sum6,
sum( math ) over ( ORDER BY math RANGE BETWEEN 10 preceding AND 10 following ) AS sum7,
rank() over ( PARTITION BY class ORDER BY math ) AS rank1,
dense_rank() over ( PARTITION BY class ORDER BY math ) AS rank2,
row_number() over ( PARTITION BY class ) AS row_num
FROM
students;
lag(math)
: 窗口函数,需要搭配 over
使用,有三个参数,后两个参数可以省略。
lag(math) over (ORDER BY math)
: 按数学成绩升序,得到之前一行的同学的数学成绩
lag(math, 1) over (ORDER BY math)
: 同上,参数 1
表示错位一行,即上一行,默认为1,可以省略,
lag(math, 1, NULL) over (ORDER BY math)
: 同上,参数null
表示若不存在前一行,该值为null,默认为null,可以省略
lead(math) over (ORDER BY math)
:按数学成绩升序排序,得到后一行的同学的数学成绩。
FIRST_VALUE(math)
: 获取指定列math在窗口范围内的第一行的值
over ( )
:指定窗口函数的窗口范围。
partition by class
: 按班级分组
order by math
: 窗口内行记录的排序规则,在窗口内按数学成绩排序
rows BETWEEN unbounded preceding AND unbounded following
: 限制窗口的边界,这里表示窗口范围从当前行之前的第一行(无限制)到当前行之后的最后一行(无限制)
FIRST_VALUE( math ) over ( PARTITION BY class ORDER BY id ASC rows BETWEEN unbounded preceding AND unbounded following ) AS FIRST
: 在每个班级内,学生按照id排序,获取排序第一的学生的数学成绩。这样,每一行都会有一个额外的列"FIRST",其中包含了每个班级内第一个学生的数学成绩。
LAST_VALUE( math )
: 用于获取指定列math在窗口范围内的最后一个值。
sum( math ) over ( ORDER BY math )
: 窗口按数学成绩排序,获取排序后当前行及之前所有行的数学成绩总和。
sum( math ) over ( ORDER BY math rows BETWEEN unbounded preceding AND current ROW )
: 同上,显示指定了滑动窗口范围是 排序后的当前行及之前所有行
sum( math ) over ( PARTITION BY class )
: 获取每个班的所有学生数学成绩的总和
rank() over ( PARTITION BY class ORDER BY math )
: 获取每个班内的数学成绩排名
- 排名相同的行会被分配相同的排名值,而下一行就会跳过这些排名值
- 如 1、1、3、4、5、5、5、8
dense_rank()
:也是窗口函数,需要搭配over使用
- 允许有相同的排名
- 排名值整体连续
- 如1、2、2、3、4、5、6、7、7、7、8
row_number
: 也是窗口函数,需要搭配over使用,给窗口内的数据分配行号
- 行号依次递增
- 如果两个行排序字段相同,行号也是依次递增
- 如:1、2、3、5、6、7、8、9
sum(math) over (ORDER BY math RANGE BETWEEN 10 preceding AND 10 following)
:
- 按数学成绩排序
RANGE BETWEEN 10 preceding AND 10 following
: 当前行的窗口范围是 数学成绩不低于当前行十分的且不高于当前行十分的所有学生。
range
表示按照数值选取范围
rows
:表示按照行来选取范围
查询结果排序
select * from students order by math;
select * from students distribute by class sort by math;
- distribute by: 指定class作为分发键,保证了相同的class一定会分发给相同的producer处理
- sort by:在单个producer内进行排序的规则,能保证局部有序,不保证全局有序。
select * from students cluster by math;
- cluster by math 等价于 distribute by math sort by math