GROUP BY是MySQL中用于分组聚合数据的重要子句,它通常与聚合函数(如COUNT, SUM, AVG等)一起使用,对结果集进行分组计算。
SELECT column_name(s), aggregate_function(column_name)
FROM table_name
WHERE condition
GROUP BY column_name(s)
[ORDER BY column_name(s)]
[HAVING condition];
-- 按部门统计员工数量
SELECT department, COUNT(*) AS employee_count
FROM employees
GROUP BY department;
-- 按部门和职位统计员工数量
SELECT department, job_title, COUNT(*) AS employee_count
FROM employees
GROUP BY department, job_title;
-- 计算每个部门的平均工资和最高工资
SELECT
department,
AVG(salary) AS avg_salary,
MAX(salary) AS max_salary,
MIN(salary) AS min_salary
FROM employees
GROUP BY department;
WHERE在分组前过滤数据:
-- 只统计销售部门的员工数量
SELECT department, COUNT(*)
FROM employees
WHERE department = 'Sales'
GROUP BY department;
HAVING在分组后过滤结果:
-- 找出员工数量超过5人的部门
SELECT department, COUNT(*) AS emp_count
FROM employees
GROUP BY department
HAVING emp_count > 5;
-- 按部门平均工资降序排列
SELECT department, AVG(salary) AS avg_salary
FROM employees
GROUP BY department
ORDER BY avg_salary DESC;
生成小计和总计行:
-- 按部门和职位分组,并生成小计和总计
SELECT
department,
job_title,
COUNT(*) AS emp_count,
SUM(salary) AS total_salary
FROM employees
GROUP BY department, job_title WITH ROLLUP;
-- 按年份分组统计订单数量
SELECT YEAR(order_date) AS order_year, COUNT(*)
FROM orders
GROUP BY YEAR(order_date);
-- 按名字长度分组统计员工数量
SELECT LENGTH(first_name) AS name_length, COUNT(*)
FROM employees
GROUP BY LENGTH(first_name);
-- 按年龄段分组统计员工
SELECT
CASE
WHEN age < 20 THEN 'Under 20'
WHEN age BETWEEN 20 AND 30 THEN '20-30'
ELSE 'Over 30'
END AS age_group,
COUNT(*) AS emp_count
FROM employees
GROUP BY age_group;
SELECT列表规则:
NULL值处理:
性能考虑:
与DISTINCT的区别:
-- 错误示例
SELECT department, employee_name, COUNT(*)
FROM employees
GROUP BY department;
-- 正确做法
SELECT department, employee_name, COUNT(*)
FROM employees
GROUP BY department, employee_name;
-- 错误:在HAVING中使用分组前条件
SELECT department, AVG(salary)
FROM employees
HAVING salary > 5000
GROUP BY department;
-- 正确:分组前条件用WHERE
SELECT department, AVG(salary)
FROM employees
WHERE salary > 5000
GROUP BY department;
GROUP BY是数据分析中不可或缺的工具,合理使用可以高效完成各种数据汇总和统计任务。