Oracle 大数据量去重实验

2019-05-25 19:44:57 浏览数 (1)

[sql] view plain copy

  1. -- 环境:64位11.2G
  2. -- 一、建立测试表,生成2000万测试数据,其中200万重复
  3. CREATE TABLE test_t
  4. (
  5. id NUMBER (8) NOT NULL PRIMARY KEY,
  6. name VARCHAR2 (32)
  7. );
  8. BEGIN
  9. FOR i IN 1 .. 18000000
  10. LOOP
  11. INSERT INTO test_t (id, name)
  12. VALUES (i, SYS_GUID ());
  13. END LOOP;
  14. COMMIT;
  15. END;
  16. /
  17. INSERT INTO test_t (id, name)
  18. SELECT 18000000 (id / 9), name
  19. FROM test_t
  20. WHERE MOD (id, 9) = 0;
  21. COMMIT;
  22. CREATE INDEX idx_test_anme
  23. ON test_t (name);
  24. ANALYZE TABLE test_t COMPUTE STATISTICS FOR TABLE FOR ALL INDEXES FOR ALL INDEXED COLUMNS;
  25. -- 二、两种去重方案对比执行时间
  26. -- 方案1:建立中间表,生成辅助对象,删除原表,改表名
  27. CREATE TABLE temp
  28. AS
  29. SELECT *
  30. FROM test_t a
  31. WHERE a.ROWID = (SELECT MIN (b.ROWID)
  32. FROM test_t b
  33. WHERE a.name = b.name);
  34. -- 用时:08:14.79
  35. CREATE INDEX idx
  36. ON temp (name);
  37. -- 用时:02:45.73
  38. ALTER TABLE temp ADD CONSTRAINT temp_pk PRIMARY KEY (id);
  39. -- 用时:01:15.79
  40. -- 总用时:12分16秒
  41. -- 方案2:直接删除原表中的重复数据
  42. DELETE test_t a
  43. WHERE a.ROWID > (SELECT MIN (b.ROWID)
  44. FROM test_t b
  45. WHERE a.name = b.name);
  46. -- 用时:2个小时没出来,中断退出。

0 人点赞