SQL 中的 SELECT DISTINCT 用于从查询结果中去除重复的行,返回唯一值。以下是几种常见的实现方式:
SELECT DISTINCT column_name
FROM table_name;
SELECT DISTINCT column1, column2, column3
FROM table_name;
这会基于所有指定列的 组合 来去重。
-- 统计不重复的个数
SELECT COUNT(DISTINCT column_name)
FROM table_name;
-- 可以与多个聚合函数结合
SELECT
COUNT(DISTINCT category) as unique_categories,
AVG(DISTINCT price) as avg_unique_price
FROM products;
GROUP BY 也可以实现类似效果,通常用于需要聚合数据时:
-- 等同于 SELECT DISTINCT column1, column2
SELECT column1, column2
FROM table_name
GROUP BY column1, column2;
-- 结合聚合函数
SELECT column1, COUNT(*)
FROM table_name
GROUP BY column1;
对于更复杂的去重逻辑(如保留最新记录):
-- 保留每个分组中的第一条记录
WITH ranked_data AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY column1, column2 ORDER BY id) as rn
FROM table_name
)
SELECT column1, column2, column3
FROM ranked_data
WHERE rn = 1;
| id | name | department | salary |
|---|---|---|---|
| 1 | Alice | IT | 5000 |
| 2 | Bob | HR | 4500 |
| 3 | Alice | IT | 5000 |
| 4 | Charlie | IT | 5500 |
-- 获取所有不重复的部门
SELECT DISTINCT department
FROM employees;
-- 结果: IT, HR
-- 获取不重复的姓名和部门组合
SELECT DISTINCT name, department
FROM employees;
-- 结果: (Alice, IT), (Bob, HR), (Charlie, IT)
-- 统计不重复的姓名数量
SELECT COUNT(DISTINCT name)
FROM employees;
-- 结果: 3
DISTINCT 与 GROUP BY 性能:
GROUP BY 可能更快(特别是需要聚合时)索引优化:
-- 为经常去重的列创建索引
CREATE INDEX idx_column ON table_name(column_name);
NULL 值处理:
DISTINCT 将多个 NULL 值视为相同-- 去重并排序
SELECT DISTINCT column_name
FROM table_name
ORDER BY column_name;
-- 去重并限制结果数
SELECT DISTINCT column_name
FROM table_name
LIMIT 10;
-- 使用子查询去重
SELECT *
FROM table1 t1
WHERE t1.id IN (
SELECT DISTINCT table1_id
FROM table2
);
-- 使用 DISTINCT ON 按指定列去重,保留每组的第一行
SELECT DISTINCT ON (department) id, name, department
FROM employees
ORDER BY department, salary DESC;
-- MySQL 8.0+ 支持窗口函数
-- 较早版本可能需要使用 GROUP BY 或临时表
选择哪种方法取决于具体需求、数据量、数据库类型和性能要求。对于简单去重,SELECT DISTINCT 通常是最直观的选择;对于需要聚合或复杂逻辑的去重,考虑使用 GROUP BY 或窗口函数。