简介
官方7.9版本:https://www.elastic.co/guide/en/elasticsearch/reference/7.9/modules-scripting.html
通过ES脚本来解决,其允许用户在一些特定的API中对自定义表达式进行求值。
1. ES Scripting历史
版本 | 使用脚本 |
---|---|
< Elasticsearch 1.4 | MVEL 脚本 |
< Elasticsearch 5.0 | Groovy 脚本 |
‘>= Elasticsearch 5.0 | painless 脚本 |
2. Painless Scripting 简介
Painless是一种简单,安全的脚本语言,专为与Elasticsearch一起使用而设计。它是Elasticsearch的默认脚本语言,可以安全地用于内联和存储脚本。
Painless特点:
- 性能优秀:Painless脚本运行速度比备选方案(包括Groovy)快几倍。
- 安全性强:使用白名单来限制函数与字段的访问,避免了可能的安全隐患。
- 可选输入:变量和参数可以使用显式类型或动态def类型。
- 上手容易:扩展了java的基本语法,并兼容groove风格的脚本语言特性。
- 特定优化:是ES官方专为Elasticsearch脚本编写而设计。
3. Scripting 应用场景
增删改查能解决业务场景80%的问题,Painless脚本操作一般应用于相对复杂的业务场景中。
- 自定义字段
- 自定义评分
- 自定义更新
- 自定义reindex
- 聚合
- 其他自定义操作
4. Scripting 使用模板
代码语言:javascript复制"script": {
"lang": "...", # 代表language脚本语言,默认指定为:painless
"source" | "id": "...", # 脚本的核心部分,id应用于:stored script
"params": { ... } # 传递给脚本使用的变量参数
}
Scripting应用
1. 自定义字段
如:需要给每个doc的complexrank字段翻倍并返回翻倍后的值
代码语言:javascript复制POST sphinx-doctor/_search
{
"from": 0,
"size": 20,
"query": {
"match_all": {}
},
"_source": "complexrank",
"script_fields": {
"custom_field": {
"script": {
"lang": "expression", # 这里脚本语言选择的expression
"source": "doc['complexrank'] * multiplier",
"params": {
"multiplier": 2
}
}
}
}
}
说明:对原索引中的complexrank字段值进行2倍返回给custom_field字段
{
"took":20,
"timed_out":false,
"_shards":{
"total":1,
"successful":1,
"skipped":0,
"failed":0
},
"hits":{
"total":194354,
"max_score":1,
"hits":[
{
"_index":"sphinx-doctor-20.11.06-103145",
"_type":"_doc",
"_id":"740",
"_score":1,
"_source":{
"complexrank":"5985"
},
"fields":{
"custom_field":[
11970
]
}
},
{
"_index":"sphinx-doctor-20.11.06-103145",
"_type":"_doc",
"_id":"742",
"_score":1,
"_source":{
"complexrank":"5325"
},
"fields":{
"custom_field":[
10650
]
}
}
]
}
}
如:返回日期字段中的“年”或“月”或“日”等
代码语言:javascript复制POST drug/_search
{
"from": 0,
"size": 20,
"query": {
"match_all": {}
},
"script_fields": {
"custom_field": {
"script": {
"source": "doc.ctime.value.year" #月:month
}
}
}
}
{
"took":57,
"timed_out":false,
"_shards":{
"total":1,
"successful":1,
"skipped":0,
"failed":0
},
"hits":{
"total":173536,
"max_score":1,
"hits":[
{
"_index":"drug-20.12.03-151452",
"_type":"_doc",
"_id":"1883795984",
"_score":1,
"fields":{
"custom_field":[
2014
]
}
},
{
"_index":"drug-20.12.03-151452",
"_type":"_doc",
"_id":"1883795985",
"_score":1,
"fields":{
"custom_field":[
2014
]
}
}
]
}
}
2. 自定义评分
代码语言:javascript复制POST sphinx-doctor/_search
{
"from": 0,
"size": 2,
"_source": "hospitalname",
"query": {
"function_score": {
"query": {
"match": {
"hospitalname": {
"query": "北京协和医院"
}
}
},
"functions": [
{
"filter": {
"match_all": {
"boost": 1
}
},
"script_score": {
"script": {
"source": "_score *params._score doc['rank'].value * params.rank doc['adminlevel'].value * params.adminlevel",
"lang": "painless", # # 是Lucene’s expressions 脚本语言
"params": {
"adminlevel": 2500,
"rank": 0.5,
"_score": 1
}
}
}
}
],
"score_mode": "sum",
"max_boost": 3.4028235e 38,
"boost": 1
}
},
"track_scores": true
}
{
"took" : 487,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 777965,
"max_score" : 449721.3,
"hits" : [
{
"_index" : "sphinx-doctor-20.12.09-010000",
"_type" : "_doc",
"_id" : "175477",
"_score" : 449721.3,
"_source" : {
"hospitalname" : "北京协和医院"
}
},
{
"_index" : "sphinx-doctor-20.12.09-010000",
"_type" : "_doc",
"_id" : "221609",
"_score" : 440269.34,
"_source" : {
"hospitalname" : "北京协和医院"
}
}
]
}
}
Java API:排序
String scriptText = "_score *params._score doc['rank'].value * params.rank doc['adminlevel'].value * params.adminlevel";
Map<String, Object> params = new HashMap<>();
params.put("_score", 1.0f);
params.put("rank", 0.5f);
params.put("adminlevel", 2500f);
Script script = new Script(ScriptType.INLINE, "painless", scriptText, params);
ScriptScoreFunctionBuilder scriptScoreFunctionBuilder = ScoreFunctionBuilders.scriptFunction(script);
FunctionScoreQueryBuilder functionScoreQueryBuilder = QueryBuilders.functionScoreQuery(boolQueryBuilder, scriptScoreFunctionBuilder)
.scoreMode(FunctionScoreQuery.ScoreMode.SUM);
builder.query(functionScoreQueryBuilder);
builder.from(offset);
builder.size(limit);
//当使用排序_sort 来指定依据某些字段排序。就不会计算得分,需要设置"track_scores":true,这样分数就会被计算和跟踪。
builder.trackScores(true);
3. 自定义更新
update
将已有字段值赋值给其他字段。
代码语言:javascript复制POST sphinx-doctor/_doc/102647/_update
{
"script": {
"lang": "painless",
"source": "ctx._source.hospitalname = params.hospitalname;ctx._source.name = params.name",
"params": {
"hospitalname": "北京协和医院",
"name": "协和医生"
}
}
}
update_by_query
代码语言:javascript复制POST sphinx-doctor/_update_by_query
{
"query": {
"term": {
"id": {
"value": "102647"
}
}
},
"script": {
"lang": "painless",
"source": """
if (ctx._source.registerordercount >= 500) {
ctx._source.registerordercount = 200;
} else {
ctx.op = 1000;
}
"""
}
}
对row<=3且sold为false的,将cost字段值减2
POST /seats/_update_by_query
{
"query": {
"bool": {
"filter": [
{
"range": {
"row": {
"lte": 3
}
}
},
{
"match": {
"sold": false
}
}]
}
},
"script": {
"source": "ctx._source.cost -= params.discount",
"lang": "painless",
"params": {
"discount": 2
}
}
}
ingest pipeline
在ingest pipeline中使用script processor:如果字段locales
值为空,则其设置成"en-en"
PUT _ingest/pipeline/fix_locales
{
"processors": [
{
"script": {
"source": """
if (ctx.locales == "") {
ctx.locales = "en-en";
}
"""
}
}
]
}
值得注意的是,在 ingest processor上下文 中,通过ctx变量访问文档字段是不需要加
_source
的。
4.自定义reindex
代码语言:javascript复制POST _reindex
{
"source": {
"index": "sphinx-doctor-20.12.02-150616",
"size": 4000
},
"dest": {
"index": "sphinx-doctor-20.12.03-151452"
},
"script": {
"source": "ctx._source.complexrank = 10;nctx._source.netcaseprice = 100;"
}
}
5. 聚合
代码语言:javascript复制POST sphinx-doctor/_search
{
"from": 0,
"size": 20,
"query": {
"match": {
"hospitalname": "北京协和医院"
}
},
"aggs": {
"grade": {
"terms": {
"script": {
"source": "doc['grade.keyword'].value",
"lang": "painless"
}
}
}
}
}
{
"took" : 187,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 193445,
"max_score" : 44.18691,
"hits" : [
{
"_index" : "sphinx-doctor-20.11.06-103145",
"_type" : "_doc",
"_id" : "30626487346",
"_score" : 44.18691,
"_source" : {
"registerordercount" : 0,
"hospitalfacultyname" : "神经内科",
"idx_doctoridprimary" : "DoctorIdPrimary_335",
"hospitaldistrict" : "东城",
"caseopened" : true,
"isonline" : null,
"dummy" : "SELECT_ALL",
"facultyid" : "1007000",
"price" : "10",
"rank" : "0",
"ctime" : "1568269475",
"id" : "30626487346",
"clinicprice" : "20.00",
"idx_hospitalfacultyid" : "HospitalFacultyId_335",
"educategrade" : "",
"adminlevel" : "1",
"isprimarydoctor" : 1,
"isexpert" : true,
"bookingvisitnum" : 0,
"primaryid" : "30626487346",
"hospitalfacultyid" : "335",
"spaceid" : "200007660088",
"hospitalcity" : "北京",
"isopenvideo" : true,
"grade" : "主任医师",
"name" : "3 0626487346",
"idx_fix_bigcity" : "北京协和医院北京协和医院北京协和医院北京协和医院测试用",
"phoneopened" : true,
"onlineschedule" : "20201207,20201214,20201221",
"isvideotpl" : false,
"issanjia" : true,
"hospitalprovince" : "北京",
"servicelevel" : 2,
"title" : "",
"confirmed" : "1",
"isvip" : false,
"general" : 0,
"menzhenprice" : "10",
"isopencosvideo" : false,
"hospitalname" : "北京协和医院北京协和医院北京协和医院北京协和医院测试用",
"netcaseprice" : "20.00",
"registrprice" : 0,
"free" : 0,
"workstatus" : "0",
"registtationopened" : null,
"indextype" : "doctor",
"utime" : 1606978403,
"complexrank" : "5985",
"sex" : "1",
"reservationopened" : null,
"idx_doctorid" : "DoctorId_30626487346",
"idx_hospital_info" : "北京协和医院北京协和医院北京协和医院北京协和医院测试用,北京协和医院,33北京协和医院,协和医院,协和东院,协和东院区,北京协和医院东院,协和西院,北京邮电总医院,邮电总医院,北京协和西院,北京协和医院西院区,北京协和医院西院,北京协和,北京市协和,北京邮电医院,协合医院,北京市协和医院,中国医学科学院北京协和医院,北京协合医院45,协和医院,北京协和,北京市协和,协和东院,北京协和医院东院,协和西院,北京邮电医院,北京协和西院,北京协和医院西院,协合医院,北京市协和医院,中国医学科学院北京协和医院,北京协合医院",
"isopenrecipe" : false,
"hits" : "0",
"iscosmetologydoctor" : null,
"hospitalid" : "1",
"idx_facultyid" : "FacultyId_1007000",
"activitytime" : "1568269520",
"hospitalgrade" : "6",
"idx_hospitalid" : "HospitalId_1",
"username" : "francoliu"
}
},
{
"_index" : "sphinx-doctor-20.11.06-103145",
"_type" : "_doc",
"_id" : "310346050824",
"_score" : 40.138744,
"_source" : {
"registerordercount" : 0,
"hospitalfacultyname" : "心血管内科",
"idx_doctoridprimary" : "DoctorIdPrimary_341",
"hospitaldistrict" : "东城",
"caseopened" : false,
"isonline" : null,
"dummy" : "SELECT_ALL",
"facultyid" : "1010000",
"price" : 0,
"rank" : "0",
"ctime" : "1605859848",
"id" : "310346050824",
"clinicprice" : 0,
"idx_hospitalfacultyid" : "HospitalFacultyId_341",
"educategrade" : "教授",
"adminlevel" : "4",
"isprimarydoctor" : 1,
"isexpert" : true,
"bookingvisitnum" : 0,
"primaryid" : "310346050824",
"hospitalfacultyid" : "341",
"spaceid" : "0",
"hospitalcity" : "北京",
"isopenvideo" : null,
"grade" : "主任医师",
"name" : "地名",
"idx_fix_bigcity" : "北京协和医院",
"phoneopened" : false,
"onlineschedule" : "",
"isvideotpl" : false,
"issanjia" : true,
"hospitalprovince" : "北京",
"servicelevel" : 0,
"title" : "",
"confirmed" : "",
"isvip" : false,
"general" : 0,
"menzhenprice" : 0,
"isopencosvideo" : false,
"hospitalname" : "北京协和医院",
"netcaseprice" : 0,
"registrprice" : 0,
"free" : 0,
"workstatus" : "0",
"registtationopened" : null,
"indextype" : "doctor",
"utime" : 1605859882,
"complexrank" : "0",
"sex" : "1",
"reservationopened" : null,
"idx_doctorid" : "DoctorId_310346050824",
"idx_hospital_info" : "北京协和医院,北京协和医院,33北京协和医院,协和医院,协和东院,协和东院区,北京协和医院东院,协和西院,北京邮电总医院,邮电总医院,北京协和西院,北京协和医院西院区,北京协和医院西院,北京协和,北京市协和,北京邮电医院,协合医院,北京市协和医院,中国医学科学院北京协和医院,北京协合医院45,协和医院,北京协和,北京市协和,协和东院,北京协和医院东院,协和西院,北京邮电医院,北京协和西院,北京协和医院西院,协合医院,北京市协和医院,中国医学科学院北京协和医院,北京协合医院",
"isopenrecipe" : null,
"hits" : "0",
"iscosmetologydoctor" : null,
"hospitalid" : "1",
"idx_facultyid" : "FacultyId_1010000",
"activitytime" : null,
"hospitalgrade" : "6",
"idx_hospitalid" : "HospitalId_1",
"username" : ""
}
}
]
},
"aggregations" : {
"grade" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 479,
"buckets" : [
{
"key" : "主任医师",
"doc_count" : 77699
},
{
"key" : "副主任医师",
"doc_count" : 72939
},
{
"key" : "主治医师",
"doc_count" : 20399
},
{
"key" : "",
"doc_count" : 13558
},
{
"key" : "住院医师",
"doc_count" : 5731
},
{
"key" : "副主任技师",
"doc_count" : 896
},
{
"key" : "副主任药师",
"doc_count" : 612
},
{
"key" : "主管技师",
"doc_count" : 468
},
{
"key" : "主任药师",
"doc_count" : 355
},
{
"key" : "副主任检验师",
"doc_count" : 309
}
]
}
}
}
6.自定义排序
两个或多个字段进行运算返回一个值进行排序
代码语言:javascript复制POST sphinx-doctor/_search
{
"_source": [
"clinicprice",
"menzhenprice"
],
"query": {
"match_all": {}
},
"sort": {
"_script": {
"type": "number", # string
"order": "desc",
"script": {
"lang": "painless",
"source": "doc['clinicprice'].value doc['menzhenprice'].value"
}
}
}
}
{
"took" : 334,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 782852,
"max_score" : null,
"hits" : [
{
"_index" : "sphinx-doctor-20.12.09-010000",
"_type" : "_doc",
"_id" : "102647",
"_score" : null,
"_source" : {
"menzhenprice" : "1400",
"clinicprice" : "2700.00"
},
"sort" : [
4100.0
]
},
{
"_index" : "sphinx-doctor-20.12.09-010000",
"_type" : "_doc",
"_id" : "8231",
"_score" : null,
"_source" : {
"menzhenprice" : "2000.00",
"clinicprice" : "2000.00"
},
"sort" : [
4000.0
]
}
]
}
}
l
逻辑运算符进行判断来影响排序
代码语言:javascript复制POST sphinx-doctor/_search
{
"_source": [
"clinicprice",
"menzhenprice"
],
"from": 0,
"size": 2,
"query": {
"match_all": {}
},
"sort": [
{
"complexrank": {
"order": "desc"
}
},
{
"_script": {
"script": {
"inline": "'北京 '==doc['hospitalprovince.keyword'].value?0:('陕西'==doc['hospitalprovince.keyword'].value?1:2)"
},
"type": "number",
"order": "asc"
}
}
]
}
{
"took" : 80,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 782852,
"max_score" : null,
"hits" : [
{
"_index" : "sphinx-doctor-20.12.09-010000",
"_type" : "_doc",
"_id" : "5279893112",
"_score" : null,
"_source" : {
"menzhenprice" : "500",
"clinicprice" : 0
},
"sort" : [
981,
2.0
]
},
{
"_index" : "sphinx-doctor-20.12.09-010000",
"_type" : "_doc",
"_id" : "124096",
"_score" : null,
"_source" : {
"menzhenprice" : "200.00",
"clinicprice" : "200.00"
},
"sort" : [
980,
2.0
]
}
]
}
}
在 update, update-by-query, 或 reindex API中使用脚本时,需要通过ctx
去访问文档中的字段。
ctx._source
: 访问文档_source
字段ctx.op
: 对文档对应的操作,包括index
和delete
ctx._index
: 访问文档的 meta-fields- 其他字段或变量的访问见: update context
7.删除一个字段
我们可以使用脚本删除字段/嵌套字段。 您要做的就是使用 remove 方法并传入字段/嵌套字段名称。 例如,假设我们要删除 ID 为5的文档的嵌套字段 “device”。
代码语言:javascript复制POST tweets/_update/5
{
"script": {
"source": "ctx._source.info.remove(params.fieldname)",
"params": {
"fieldname": "device"
}
}
}