MySQL 正则表达式 - regexp_count、regexp_extract

2023-10-14 09:49:58 浏览数 (3)

        在学习 MySQL 正则表达式时可能会发现,有些常用功能并未提供。最典型的两个是:返回匹配项数目;一次返回所有匹配项。但我们可以创建自定义函数,在其中使用递归查询来实现。

一、regexp_count

代码语言:javascript复制
drop function if exists regexp_count;
delimiter //
create function regexp_count(x text,r text, match_type varchar(5)) returns bigint
    reads sql data
    deterministic
begin
    set @ret=0;

    with recursive cte as 
    (select n, regexp_substr(x,r,1,t.n,match_type) b from (select 1 n) t
      union all
     select n 1, regexp_substr(x,r,1, n   1,match_type) from cte 
      where b is not null)
    select count(*) into @ret from cte where b is not null; 

    return @ret;
end;
//
delimiter ;

二、regexp_extract

代码语言:javascript复制
drop function if exists regexp_extract;
delimiter //
create function regexp_extract(x text,r text, match_type varchar(5)) returns text charset utf8mb4
    reads sql data
    deterministic
begin
    set @ret='';

    with recursive cte as 
    (select n, regexp_substr(x,r,1,t.n,match_type) b from (select 1 n) t
      union all
     select n 1, regexp_substr(x,r,1, n   1,match_type) from cte 
      where b is not null)
    select * into @ret from 
    (select convert(group_concat(b) using utf8mb4) a from cte) t
     where a is not null;

    return @ret;
end;
//
delimiter ;

三、regexp_extract_index

代码语言:javascript复制
drop function if exists regexp_extract_index;
delimiter //
create function regexp_extract_index(x text,r text, return_option int, match_type varchar(5)) returns text charset utf8mb4
    reads sql data
    deterministic
begin
    set @ret='';
 
    with recursive cte as 
    (select n, regexp_instr(x,r,1,t.n,return_option,match_type) b from (select 1 n) t
      union all
     select n 1, regexp_instr(x,r,1, n   1,return_option,match_type) from cte 
      where b > 0)
    select * into @ret from 
    (select convert(group_concat(b) using utf8mb4) a from cte where b >0) t;
 
    return @ret;
end;
//
delimiter ;

四、测试

1. 测试数据

代码语言:javascript复制
drop table if exists t_regexp;
create table t_regexp(a text);
insert into t_regexp values (
'THE RIME OF THE ANCYENT MARINERE, IN SEVEN PARTS.
ARGUMENT.
How a Ship having passed the Line was driven by Storms to the cold Country
towards the South Pole; and how from thence she made her course to the tropical
Latitude of the Great Pacific Ocean; and of the strange things that befell;
and in what manner the Ancyent Marinere came back to his own Country.
I.
1       It is an ancyent Marinere,
2          And he stoppeth one of three:
3       "By thy long grey beard and thy glittering eye
4          "Now wherefore stoppest me?');
 
insert into t_regexp values ('THE RIME OF THE ANCYENT MARINERE, IN SEVEN PARTS.'),
('ARGUMENT.'),
('How a Ship having passed the Line was driven by Storms to the cold Country'),
('towards the South Pole; and how from thence she made her course to the tropical'),
('Latitude of the Great Pacific Ocean; and of the strange things that befell;'),
('and in what manner the Ancyent Marinere came back to his own Country.'),
('I.'),
('1       It is an ancyent Marinere,'),
('2          And he stoppeth one of three:'),
('3       "By thy long grey beard and thy glittering eye'),
('4          "Now wherefore stoppest me?');

2. 测试结果

代码语言:javascript复制
mysql> -- 匹配单词the,统计出现的次数
mysql> select regexp_extract(a,'\bthe\b','') a,regexp_count(a,'\bthe\b','') c from t_regexp;
 ------------------------------------- ------ 
| a                                   | c    |
 ------------------------------------- ------ 
| THE,THE,the,the,the,the,the,the,the |    9 |
| THE,THE                             |    2 |
|                                     |    0 |
| the,the                             |    2 |
| the,the                             |    2 |
| the,the                             |    2 |
| the                                 |    1 |
|                                     |    0 |
|                                     |    0 |
|                                     |    0 |
|                                     |    0 |
|                                     |    0 |
 ------------------------------------- ------ 
12 rows in set (0.00 sec)

mysql> -- 匹配小写单词the,统计出现的次数
mysql> select regexp_extract(a,'\bthe\b','c') a,regexp_count(a,'\bthe\b','c') c from t_regexp;
 ----------------------------- ------ 
| a                           | c    |
 ----------------------------- ------ 
| the,the,the,the,the,the,the |    7 |
|                             |    0 |
|                             |    0 |
| the,the                     |    2 |
| the,the                     |    2 |
| the,the                     |    2 |
| the                         |    1 |
|                             |    0 |
|                             |    0 |
|                             |    0 |
|                             |    0 |
|                             |    0 |
 ----------------------------- ------ 
12 rows in set (0.00 sec)

mysql> -- 多行模式匹配的所有单词,统计单词个数
mysql> select regexp_extract(a,'\w ','') a,regexp_count(a,'\w ','m') c from t_regexp;
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ------ 
| a                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | c    |
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ------ 
| THE,RIME,OF,THE,ANCYENT,MARINERE,IN,SEVEN,PARTS,ARGUMENT,How,a,Ship,having,passed,the,Line,was,driven,by,Storms,to,the,cold,Country,towards,the,South,Pole,and,how,from,thence,she,made,her,course,to,the,tropical,Latitude,of,the,Great,Pacific,Ocean,and,of,the,strange,things,that,befell,and,in,what,manner,the,Ancyent,Marinere,came,back,to,his,own,Country,I,1,It,is,an,ancyent,Marinere,2,And,he,stoppeth,one,of,three,3,By,thy,long,grey,beard,and,thy,glittering,eye,4,Now,wherefore,stoppest,me |   95 |
| THE,RIME,OF,THE,ANCYENT,MARINERE,IN,SEVEN,PARTS                                                                                                                                                                                                                                                                                                                                                                                                                                                            |    9 |
| ARGUMENT                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |    1 |
| How,a,Ship,having,passed,the,Line,was,driven,by,Storms,to,the,cold,Country                                                                                                                                                                                                                                                                                                                                                                                                                                 |   15 |
| towards,the,South,Pole,and,how,from,thence,she,made,her,course,to,the,tropical                                                                                                                                                                                                                                                                                                                                                                                                                             |   15 |
| Latitude,of,the,Great,Pacific,Ocean,and,of,the,strange,things,that,befell                                                                                                                                                                                                                                                                                                                                                                                                                                  |   13 |
| and,in,what,manner,the,Ancyent,Marinere,came,back,to,his,own,Country                                                                                                                                                                                                                                                                                                                                                                                                                                       |   13 |
| I                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |    1 |
| 1,It,is,an,ancyent,Marinere                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |    6 |
| 2,And,he,stoppeth,one,of,three                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |    7 |
| 3,By,thy,long,grey,beard,and,thy,glittering,eye                                                                                                                                                                                                                                                                                                                                                                                                                                                            |   10 |
| 4,Now,wherefore,stoppest,me                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |    5 |
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ------ 
12 rows in set (0.01 sec)

mysql> -- 区分大小写、多行、dotall模式匹配999,统计出现的次数
mysql> select regexp_extract(a,'999','') a,regexp_count(a,'999','cmn') c from t_regexp;
 ------ ------ 
| a    | c    |
 ------ ------ 
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
|      |    0 |
 ------ ------ 
12 rows in set (0.00 sec)

mysql> -- 返回单词the出现的位置
mysql> select regexp_extract_index(a,'\bthe\b',0,'') a from t_regexp;
 --------------------------------- 
| a                               |
 --------------------------------- 
| 1,13,86,119,144,203,228,260,311 |
| 1,13                            |
| NULL                            |
| 26,59                           |
| 9,68                            |
| 13,45                           |
| 20                              |
| NULL                            |
| NULL                            |
| NULL                            |
| NULL                            |
| NULL                            |
 --------------------------------- 
12 rows in set (0.00 sec)

mysql> -- 返回小写单词the出现的位置(后)
mysql> select regexp_extract_index(a,'\bthe\b',1,'c') a from t_regexp;
 ---------------------------- 
| a                          |
 ---------------------------- 
| 89,122,147,206,231,263,314 |
| NULL                       |
| NULL                       |
| 29,62                      |
| 12,71                      |
| 16,48                      |
| 23                         |
| NULL                       |
| NULL                       |
| NULL                       |
| NULL                       |
| NULL                       |
 ---------------------------- 
12 rows in set (0.00 sec)

mysql> -- 多行模式匹配,返回所有单词出现的位置
mysql> select regexp_extract_index(a,'\w ',0,'m') a from t_regexp;
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
| a                                                                                                                                                                                                                                                                                                                                                                      |
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
| 1,5,10,13,17,25,35,38,44,51,61,65,67,72,79,86,90,95,99,106,109,116,119,123,128,136,144,148,154,160,164,168,173,180,184,189,193,200,203,207,216,225,228,232,238,246,253,257,260,264,272,279,284,292,296,299,304,311,315,323,332,337,342,345,349,353,362,365,373,376,379,382,390,400,411,415,418,427,431,434,441,450,453,457,462,467,473,477,481,492,496,508,512,522,531 |
| 1,5,10,13,17,25,35,38,44                                                                                                                                                                                                                                                                                                                                               |
| 1                                                                                                                                                                                                                                                                                                                                                                      |
| 1,5,7,12,19,26,30,35,39,46,49,56,59,63,68                                                                                                                                                                                                                                                                                                                              |
| 1,9,13,19,25,29,33,38,45,49,54,58,65,68,72                                                                                                                                                                                                                                                                                                                             |
| 1,10,13,17,23,31,38,42,45,49,57,64,69                                                                                                                                                                                                                                                                                                                                  |
| 1,5,8,13,20,24,32,41,46,51,54,58,62                                                                                                                                                                                                                                                                                                                                    |
| 1                                                                                                                                                                                                                                                                                                                                                                      |
| 1,9,12,15,18,26                                                                                                                                                                                                                                                                                                                                                        |
| 1,12,16,19,28,32,35                                                                                                                                                                                                                                                                                                                                                    |
| 1,10,13,17,22,27,33,37,41,52                                                                                                                                                                                                                                                                                                                                           |
| 1,13,17,27,36                                                                                                                                                                                                                                                                                                                                                          |
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 
12 rows in set (0.00 sec)

mysql> -- 区分大小写、多行、dotall模式匹配,返回999出现的位置
mysql> select regexp_extract_index(a,'999',1,'cmn') a from t_regexp;
 ------ 
| a    |
 ------ 
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
 ------ 
12 rows in set (0.00 sec)

0 人点赞