MySQL查找重复数据,删除重复数据
数据库版本 Server version: 5.1.41-community-log MySQL Community Server (GPL)
例1,表中有主键(可唯一标识的字段),且该字段为数字类型
例1测试数据
代码语言:javascript复制/* 表结构 */DROP TABLE IF EXISTS `t1`;CREATE TABLE IF NOT EXISTS `t1`( `id` INT(1) NOT NULL AUTO_INCREMENT, `name` VARCHAR(20) NOT NULL, `add` VARCHAR(20) NOT NULL,PRIMARY KEY(`id`))Engine=InnoDB;/* 插入测试数据 */INSERT INTO `t1`(`name`,`add`) VALUES('abc',"123"),('abc',"123"),('abc',"321"),('abc',"123"),('xzy',"123"),('xzy',"456"),('xzy',"456"),('xzy',"456"),('xzy',"789"),('xzy',"987"),('xzy',"789"),('ijk',"147"),('ijk',"147"),('ijk',"852"),('opq',"852"),('opq',"963"),('opq',"741"),('tpk',"741"),('tpk',"963"),('tpk',"963"),('wer',"546"),('wer',"546"),('once',"546");SELECT * FROM `t1`; ---- ------ ----- | id | name | add | ---- ------ ----- | 1 | abc | 123 || 2 | abc | 123 || 3 | abc | 321 || 4 | abc | 123 || 5 | xzy | 123 || 6 | xzy | 456 || 7 | xzy | 456 || 8 | xzy | 456 || 9 | xzy | 789 || 10 | xzy | 987 || 11 | xzy | 789 || 12 | ijk | 147 || 13 | ijk | 147 || 14 | ijk | 852 || 15 | opq | 852 || 16 | opq | 963 || 17 | opq | 741 || 18 | tpk | 741 || 19 | tpk | 963 || 20 | tpk | 963 || 21 | wer | 546 || 22 | wer | 546 || 23 | once | 546 | ---- ------ ----- 23 rows in set (0.00 sec)
查找id最小的重复数据(只查找id字段)
代码语言:javascript复制/* 查找id最小的重复数据(只查找id字段) */SELECT DISTINCT MIN(`id`) AS `id`FROM `t1`GROUP BY `name`,`add`HAVING COUNT(1) > 1; ------ | id | ------ | 1 || 12 || 19 || 21 || 6 || 9 | ------ 6 rows in set (0.00 sec)
查找所有重复数据源码
代码语言:javascript复制/* 查找所有重复数据 */SELECT `t1`.*FROM `t1`,(SELECT `name`,`add`FROM `t1`GROUP BY `name`,`add`HAVING COUNT(1) > 1) AS `t2`WHERE `t1`.`name` = `t2`.`name`AND `t1`.`add` = `t2`.`add`; ---- ------ ----- | id | name | add | ---- ------ ----- | 1 | abc | 123 || 2 | abc | 123 || 4 | abc | 123 || 6 | xzy | 456 || 7 | xzy | 456 || 8 | xzy | 456 || 9 | xzy | 789 || 11 | xzy | 789 || 12 | ijk | 147 || 13 | ijk | 147 || 19 | tpk | 963 || 20 | tpk | 963 || 21 | wer | 546 || 22 | wer | 546 | ---- ------ ----- 14 rows in set (0.00 sec)
查找除id最小的数据外的重复数据
代码语言:javascript复制/* 查找除id最小的数据外的重复数据 */SELECT `t1`.*FROM `t1`,(SELECT DISTINCT MIN(`id`) AS `id`,`name`,`add`FROM `t1`GROUP BY `name`,`add`HAVING COUNT(1) > 1) AS `t2`WHERE `t1`.`name` = `t2`.`name`AND `t1`.`add` = `t2`.`add`AND `t1`.`id` <> `t2`.`id`; ---- ------ ----- | id | name | add | ---- ------ ----- | 2 | abc | 123 || 4 | abc | 123 || 7 | xzy | 456 || 8 | xzy | 456 || 11 | xzy | 789 || 13 | ijk | 147 || 20 | tpk | 963 || 22 | wer | 546 | ---- ------ ----- 8 rows in set (0.00 sec)
例2,表中没有主键(可唯一标识的字段),或者主键并非数字类型(也可以删除重复数据,但效率上肯定比较慢)
例2测试数据
代码语言:javascript复制/* 表结构 */DROP TABLE IF EXISTS `noid`;CREATE TABLE IF NOT EXISTS `noid`( `pk` VARCHAR(20) NOT NULL COMMENT '字符串主键', `name` VARCHAR(20) NOT NULL, `add` VARCHAR(20) NOT NULL,PRIMARY KEY(`pk`))Engine=InnoDB;/* 测试数据,与上例一样的测试数据,只是主键变为字符串形式 */INSERT INTO `noid`(`pk`,`name`,`add`) VALUES('a','abc',"123"),('b','abc',"123"),('c','abc',"321"),('d','abc',"123"),('e','xzy',"123"),('f','xzy',"456"),('g','xzy',"456"),('h','xzy',"456"),('i','xzy',"789"),('j','xzy',"987"),('k','xzy',"789"),('l','ijk',"147"),('m','ijk',"147"),('n','ijk',"852"),('o','opq',"852"),('p','opq',"963"),('q','opq',"741"),('r','tpk',"741"),('s','tpk',"963"),('t','tpk',"963"),('u','wer',"546"),('v','wer',"546"),('w','once',"546");SELECT * FROM `noid`; ---- ------ ----- | pk | name | add | ---- ------ ----- | a | abc | 123 || b | abc | 123 || c | abc | 321 || d | abc | 123 || e | xzy | 123 || f | xzy | 456 || g | xzy | 456 || h | xzy | 456 || i | xzy | 789 || j | xzy | 987 || k | xzy | 789 || l | ijk | 147 || m | ijk | 147 || n | ijk | 852 || o | opq | 852 || p | opq | 963 || q | opq | 741 || r | tpk | 741 || s | tpk | 963 || t | tpk | 963 || u | wer | 546 || v | wer | 546 || w | once | 546 | ---- ------ ----- 23 rows in set (0.00 sec)
为表添加自增长的id字段
代码语言:javascript复制/* 为表添加自增长的id字段 */ALTER TABLE `noid` ADD `id` INT(1) NOT NULL AUTO_INCREMENT, ADD INDEX `id`(`id`);Query OK, 23 rows affected (0.16 sec)Records: 23 Duplicates: 0 Warnings: 0SELECT * FROM `noid`; ---- ------ ----- ---- | pk | name | add | id | ---- ------ ----- ---- | a | abc | 123 | 1 || b | abc | 123 | 2 || c | abc | 321 | 3 || d | abc | 123 | 4 || e | xzy | 123 | 5 || f | xzy | 456 | 6 || g | xzy | 456 | 7 || h | xzy | 456 | 8 || i | xzy | 789 | 9 || j | xzy | 987 | 10 || k | xzy | 789 | 11 || l | ijk | 147 | 12 || m | ijk | 147 | 13 || n | ijk | 852 | 14 || o | opq | 852 | 15 || p | opq | 963 | 16 || q | opq | 741 | 17 || r | tpk | 741 | 18 || s | tpk | 963 | 19 || t | tpk | 963 | 20 || u | wer | 546 | 21 || v | wer | 546 | 22 || w | once | 546 | 23 | ---- ------ ----- ---- 23 rows in set (0.00 sec)
MySQL中必须是有索引的字段才可以使用AUTO_INCREMENT
删除重复数据与上例一样,记得删除完数据把id字段也删除了
删除重复数据,只保留一条数据
代码语言:javascript复制/* 删除重复数据,只保留一条数据 */DELETE FROM `noid`USING `noid`,(SELECT DISTINCT MIN(`id`) AS `id`,`name`,`add`FROM `noid`GROUP BY `name`,`add`HAVING COUNT(1) > 1) AS `t2`WHERE `noid`.`name` = `t2`.`name`AND `noid`.`add` = `t2`.`add`AND `noid`.`id` <> `t2`.`id`;Query OK, 8 rows affected (0.05 sec)/* 删除id字段 */ALTER TABLE `noid` DROP `id`;Query OK, 15 rows affected (0.16 sec)Records: 15 Duplicates: 0 Warnings: 0SELECT * FROM `noid`; ---- ------ ----- | pk | name | add | ---- ------ ----- | a | abc | 123 || c | abc | 321 || e | xzy | 123 || f | xzy | 456 || i | xzy | 789 || j | xzy | 987 || l | ijk | 147 || n | ijk | 852 || o | opq | 852 || p | opq | 963 || q | opq | 741 || r | tpk | 741 || s | tpk | 963 || u | wer | 546 || w | once | 546 | ---- ------ ----- 15 rows in set (0.00 sec)