在学习 MySQL 正则表达式时可能会发现,有些常用功能并未提供。最典型的两个是:返回匹配项数目;一次返回所有匹配项。但我们可以创建自定义函数,在其中使用递归查询来实现。
一、regexp_count
代码语言:javascript复制drop function if exists regexp_count;
delimiter //
create function regexp_count(x text,r text, match_type varchar(5)) returns bigint
reads sql data
deterministic
begin
set @ret=0;
with recursive cte as
(select n, regexp_substr(x,r,1,t.n,match_type) b from (select 1 n) t
union all
select n 1, regexp_substr(x,r,1, n 1,match_type) from cte
where b is not null)
select count(*) into @ret from cte where b is not null;
return @ret;
end;
//
delimiter ;
二、regexp_extract
代码语言:javascript复制drop function if exists regexp_extract;
delimiter //
create function regexp_extract(x text,r text, match_type varchar(5)) returns text charset utf8mb4
reads sql data
deterministic
begin
set @ret='';
with recursive cte as
(select n, regexp_substr(x,r,1,t.n,match_type) b from (select 1 n) t
union all
select n 1, regexp_substr(x,r,1, n 1,match_type) from cte
where b is not null)
select * into @ret from
(select convert(group_concat(b) using utf8mb4) a from cte) t
where a is not null;
return @ret;
end;
//
delimiter ;
三、regexp_extract_index
代码语言:javascript复制drop function if exists regexp_extract_index;
delimiter //
create function regexp_extract_index(x text,r text, return_option int, match_type varchar(5)) returns text charset utf8mb4
reads sql data
deterministic
begin
set @ret='';
with recursive cte as
(select n, regexp_instr(x,r,1,t.n,return_option,match_type) b from (select 1 n) t
union all
select n 1, regexp_instr(x,r,1, n 1,return_option,match_type) from cte
where b > 0)
select * into @ret from
(select convert(group_concat(b) using utf8mb4) a from cte where b >0) t;
return @ret;
end;
//
delimiter ;
四、测试
1. 测试数据
代码语言:javascript复制drop table if exists t_regexp;
create table t_regexp(a text);
insert into t_regexp values (
'THE RIME OF THE ANCYENT MARINERE, IN SEVEN PARTS.
ARGUMENT.
How a Ship having passed the Line was driven by Storms to the cold Country
towards the South Pole; and how from thence she made her course to the tropical
Latitude of the Great Pacific Ocean; and of the strange things that befell;
and in what manner the Ancyent Marinere came back to his own Country.
I.
1 It is an ancyent Marinere,
2 And he stoppeth one of three:
3 "By thy long grey beard and thy glittering eye
4 "Now wherefore stoppest me?');
insert into t_regexp values ('THE RIME OF THE ANCYENT MARINERE, IN SEVEN PARTS.'),
('ARGUMENT.'),
('How a Ship having passed the Line was driven by Storms to the cold Country'),
('towards the South Pole; and how from thence she made her course to the tropical'),
('Latitude of the Great Pacific Ocean; and of the strange things that befell;'),
('and in what manner the Ancyent Marinere came back to his own Country.'),
('I.'),
('1 It is an ancyent Marinere,'),
('2 And he stoppeth one of three:'),
('3 "By thy long grey beard and thy glittering eye'),
('4 "Now wherefore stoppest me?');
2. 测试结果
代码语言:javascript复制mysql> -- 匹配单词the,统计出现的次数
mysql> select regexp_extract(a,'\bthe\b','') a,regexp_count(a,'\bthe\b','') c from t_regexp;
------------------------------------- ------
| a | c |
------------------------------------- ------
| THE,THE,the,the,the,the,the,the,the | 9 |
| THE,THE | 2 |
| | 0 |
| the,the | 2 |
| the,the | 2 |
| the,the | 2 |
| the | 1 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
------------------------------------- ------
12 rows in set (0.00 sec)
mysql> -- 匹配小写单词the,统计出现的次数
mysql> select regexp_extract(a,'\bthe\b','c') a,regexp_count(a,'\bthe\b','c') c from t_regexp;
----------------------------- ------
| a | c |
----------------------------- ------
| the,the,the,the,the,the,the | 7 |
| | 0 |
| | 0 |
| the,the | 2 |
| the,the | 2 |
| the,the | 2 |
| the | 1 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
----------------------------- ------
12 rows in set (0.00 sec)
mysql> -- 多行模式匹配的所有单词,统计单词个数
mysql> select regexp_extract(a,'\w ','') a,regexp_count(a,'\w ','m') c from t_regexp;
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ------
| a | c |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ------
| THE,RIME,OF,THE,ANCYENT,MARINERE,IN,SEVEN,PARTS,ARGUMENT,How,a,Ship,having,passed,the,Line,was,driven,by,Storms,to,the,cold,Country,towards,the,South,Pole,and,how,from,thence,she,made,her,course,to,the,tropical,Latitude,of,the,Great,Pacific,Ocean,and,of,the,strange,things,that,befell,and,in,what,manner,the,Ancyent,Marinere,came,back,to,his,own,Country,I,1,It,is,an,ancyent,Marinere,2,And,he,stoppeth,one,of,three,3,By,thy,long,grey,beard,and,thy,glittering,eye,4,Now,wherefore,stoppest,me | 95 |
| THE,RIME,OF,THE,ANCYENT,MARINERE,IN,SEVEN,PARTS | 9 |
| ARGUMENT | 1 |
| How,a,Ship,having,passed,the,Line,was,driven,by,Storms,to,the,cold,Country | 15 |
| towards,the,South,Pole,and,how,from,thence,she,made,her,course,to,the,tropical | 15 |
| Latitude,of,the,Great,Pacific,Ocean,and,of,the,strange,things,that,befell | 13 |
| and,in,what,manner,the,Ancyent,Marinere,came,back,to,his,own,Country | 13 |
| I | 1 |
| 1,It,is,an,ancyent,Marinere | 6 |
| 2,And,he,stoppeth,one,of,three | 7 |
| 3,By,thy,long,grey,beard,and,thy,glittering,eye | 10 |
| 4,Now,wherefore,stoppest,me | 5 |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ------
12 rows in set (0.01 sec)
mysql> -- 区分大小写、多行、dotall模式匹配999,统计出现的次数
mysql> select regexp_extract(a,'999','') a,regexp_count(a,'999','cmn') c from t_regexp;
------ ------
| a | c |
------ ------
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
| | 0 |
------ ------
12 rows in set (0.00 sec)
mysql> -- 返回单词the出现的位置
mysql> select regexp_extract_index(a,'\bthe\b',0,'') a from t_regexp;
---------------------------------
| a |
---------------------------------
| 1,13,86,119,144,203,228,260,311 |
| 1,13 |
| NULL |
| 26,59 |
| 9,68 |
| 13,45 |
| 20 |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
---------------------------------
12 rows in set (0.00 sec)
mysql> -- 返回小写单词the出现的位置(后)
mysql> select regexp_extract_index(a,'\bthe\b',1,'c') a from t_regexp;
----------------------------
| a |
----------------------------
| 89,122,147,206,231,263,314 |
| NULL |
| NULL |
| 29,62 |
| 12,71 |
| 16,48 |
| 23 |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
----------------------------
12 rows in set (0.00 sec)
mysql> -- 多行模式匹配,返回所有单词出现的位置
mysql> select regexp_extract_index(a,'\w ',0,'m') a from t_regexp;
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| a |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| 1,5,10,13,17,25,35,38,44,51,61,65,67,72,79,86,90,95,99,106,109,116,119,123,128,136,144,148,154,160,164,168,173,180,184,189,193,200,203,207,216,225,228,232,238,246,253,257,260,264,272,279,284,292,296,299,304,311,315,323,332,337,342,345,349,353,362,365,373,376,379,382,390,400,411,415,418,427,431,434,441,450,453,457,462,467,473,477,481,492,496,508,512,522,531 |
| 1,5,10,13,17,25,35,38,44 |
| 1 |
| 1,5,7,12,19,26,30,35,39,46,49,56,59,63,68 |
| 1,9,13,19,25,29,33,38,45,49,54,58,65,68,72 |
| 1,10,13,17,23,31,38,42,45,49,57,64,69 |
| 1,5,8,13,20,24,32,41,46,51,54,58,62 |
| 1 |
| 1,9,12,15,18,26 |
| 1,12,16,19,28,32,35 |
| 1,10,13,17,22,27,33,37,41,52 |
| 1,13,17,27,36 |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
12 rows in set (0.00 sec)
mysql> -- 区分大小写、多行、dotall模式匹配,返回999出现的位置
mysql> select regexp_extract_index(a,'999',1,'cmn') a from t_regexp;
------
| a |
------
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
------
12 rows in set (0.00 sec)