[sql] view plain copy
- -- 环境:64位11.2G
- -- 一、建立测试表,生成2000万测试数据,其中200万重复
- CREATE TABLE test_t
- (
- id NUMBER (8) NOT NULL PRIMARY KEY,
- name VARCHAR2 (32)
- );
- BEGIN
- FOR i IN 1 .. 18000000
- LOOP
- INSERT INTO test_t (id, name)
- VALUES (i, SYS_GUID ());
- END LOOP;
- COMMIT;
- END;
- /
- INSERT INTO test_t (id, name)
- SELECT 18000000 (id / 9), name
- FROM test_t
- WHERE MOD (id, 9) = 0;
- COMMIT;
- CREATE INDEX idx_test_anme
- ON test_t (name);
- ANALYZE TABLE test_t COMPUTE STATISTICS FOR TABLE FOR ALL INDEXES FOR ALL INDEXED COLUMNS;
- -- 二、两种去重方案对比执行时间
- -- 方案1:建立中间表,生成辅助对象,删除原表,改表名
- CREATE TABLE temp
- AS
- SELECT *
- FROM test_t a
- WHERE a.ROWID = (SELECT MIN (b.ROWID)
- FROM test_t b
- WHERE a.name = b.name);
- -- 用时:08:14.79
- CREATE INDEX idx
- ON temp (name);
- -- 用时:02:45.73
- ALTER TABLE temp ADD CONSTRAINT temp_pk PRIMARY KEY (id);
- -- 用时:01:15.79
- -- 总用时:12分16秒
- -- 方案2:直接删除原表中的重复数据
- DELETE test_t a
- WHERE a.ROWID > (SELECT MIN (b.ROWID)
- FROM test_t b
- WHERE a.name = b.name);
- -- 用时:2个小时没出来,中断退出。