1. 准备测试数据
代码语言:javascript复制PUT nba
{
"mappings": {
"_doc": {
"properties": {
"birthDay": {"type": "date"},
"birthDayStr": {"type": "keyword"},
"age": {"type": "integer"},
"code": {"type": "text"},
"country": {"type": "text"},
"countryEn": {"type": "text"},
"displayAffiliation": {"type": "text"},
"displayName": {"type": "text"},
"displayNameEn": {"type": "text"},
"draft": {"type": "long"},
"heightValue": {"type": "float"},
"jerseyNo": {"type": "text"},
"playYear": {"type": "long"},
"playerId": {"type": "keyword"},
"position": {"type": "text"},
"schoolType": {"type": "text"},
"teamCity": {"type": "text"},
"teamCityEn": {"type": "text"},
"teamConference": {"type": "keyword"},
"teamConferenceEn": {"type": "keyword"},
"teamName": {"type": "keyword"},
"teamNameEn": {"type": "keyword"},
"weight": {"type": "text"}
}
}
}
}
POST /_bulk
{"index":{"_index":"nba","_type":"_doc","_id":"1"}}
{"countryEn":"United States","teamName":"老鹰","birthDay":831182400000,"country":"美国","teamCityEn":"Atlanta","code":"jaylen_adams","displayAffiliation":"United States","displayName":"杰伦 亚当斯","schoolType":"College","teamConference":"东部","teamConferenceEn":"Eastern","weight":"86.2 公斤","teamCity":"亚特兰大","playYear":1,"jerseyNo":"10","teamNameEn":"Hawks","draft":2018,"displayNameEn":"Jaylen Adams","heightValue":1.88,"birthDayStr":"1996-05-04","position":"后卫","age":23,"playerId":"1629121"}
{"index":{"_index":"nba","_type":"_doc","_id":"2"}}
{"countryEn":"New Zealand","teamName":"雷霆","birthDay":743140800000,"country":"新西兰","teamCityEn":"Oklahoma City","code":"steven_adams","displayAffiliation":"Pittsburgh/New Zealand","displayName":"斯蒂文 亚当斯","schoolType":"College","teamConference":"西部","teamConferenceEn":"Western","weight":"120.2 公斤","teamCity":"俄克拉荷马城","playYear":6,"jerseyNo":"12","teamNameEn":"Thunder","draft":2013,"displayNameEn":"Steven Adams","heightValue":2.13,"birthDayStr":"1993-07-20","position":"中锋","age":26,"playerId":"203500"}
{"index":{"_index":"nba","_type":"_doc","_id":"3"}}
{"countryEn":"United States","teamName":"热火","birthDay":869198400000,"country":"美国","teamCityEn":"Miami","code":"bam_adebayo","displayAffiliation":"Kentucky/United States","displayName":"巴姆 阿德巴约","schoolType":"College","teamConference":"东部","teamConferenceEn":"Eastern","weight":"115.7 公斤","teamCity":"迈阿密","playYear":2,"jerseyNo":"13","teamNameEn":"Heat","draft":2017,"displayNameEn":"Bam Adebayo","heightValue":2.08,"birthDayStr":"1997-07-18","position":"中锋-前锋","age":22,"playerId":"1628389"}
{"index":{"_index":"nba","_type":"_doc","_id":"4"}}
{"countryEn":"South Sudan","teamName":"骑士","birthDay":854773200000,"country":"南苏丹","teamCityEn":"Cleveland","code":"deng_adel","displayAffiliation":"University of Louisville/South Sudan","displayName":"邓 Adel","schoolType":"","teamConference":"东部","teamConferenceEn":"Eastern","weight":"90.7 公斤","teamCity":"克利夫兰","playYear":1,"jerseyNo":"32","teamNameEn":"Cavaliers","draft":2018,"displayNameEn":"Deng Adel","heightValue":2.01,"birthDayStr":"1997-02-01","position":"前锋","age":22,"playerId":"1629061"}
{"index":{"_index":"nba","_type":"_doc","_id":"5"}}
{"countryEn":"United States","teamName":"马刺","birthDay":490593600000,"country":"美国","teamCityEn":"San Antonio","code":"lamarcus_aldridge","displayAffiliation":"Texas/United States","displayName":"拉马库斯 阿尔德里奇","schoolType":"College","teamConference":"西部","teamConferenceEn":"Western","weight":"117.9 公斤","teamCity":"圣安东尼奥","playYear":13,"jerseyNo":"12","teamNameEn":"Spurs","draft":2006,"displayNameEn":"LaMarcus Aldridge","heightValue":2.11,"birthDayStr":"1985-07-19","position":"中锋-前锋","age":34,"playerId":"200746"}
{"index":{"_index":"nba","_type":"_doc","_id":"6"}}
{"countryEn":"Canada","teamName":"鹈鹕","birthDay":887000400000,"country":"加拿大","teamCityEn":"New Orleans","code":"nickeil_alexander-walker","displayAffiliation":"Virginia Tech/Canada","displayName":"Nickeil Alexander-Walker","schoolType":"College","teamConference":"西部","teamConferenceEn":"Western","weight":"92.5 公斤","teamCity":"新奥尔良","playYear":0,"jerseyNo":"","teamNameEn":"Pelicans","draft":2019,"displayNameEn":"Nickeil Alexander-Walker","heightValue":1.96,"birthDayStr":"1998-02-09","position":"后卫","age":21,"playerId":"1629638"}
{"index":{"_index":"nba","_type":"_doc","_id":"7"}}
{"countryEn":"United States","teamName":"公牛","birthDay":878101200000,"country":"美国","teamCityEn":"Chicago","code":"rawle_alkins","displayAffiliation":"University of Arizona/United States","displayName":"劳勒 Alkins","schoolType":"","teamConference":"东部","teamConferenceEn":"Eastern","weight":"102.1 公斤","teamCity":"芝加哥","playYear":1,"jerseyNo":"20","teamNameEn":"Bulls","draft":2018,"displayNameEn":"Rawle Alkins","heightValue":1.96,"birthDayStr":"1997-10-29","position":"后卫","age":22,"playerId":"1628959"}
{"index":{"_index":"nba","_type":"_doc","_id":"8"}}
{"countryEn":"United States","teamName":"灰熊","birthDay":813124800000,"country":"美国","teamCityEn":"Memphis","code":"","displayAffiliation":"Duke University/United States","displayName":"格雷森 艾伦","schoolType":"College","teamConference":"西部","teamConferenceEn":"Western","weight":"89.8 公斤","teamCity":"孟菲斯","playYear":1,"jerseyNo":"","teamNameEn":"Grizzlies","draft":2018,"displayNameEn":"Grayson Allen","heightValue":1.96,"birthDayStr":"1995-10-08","position":"后卫","age":24,"playerId":"1628960"}
{"index":{"_index":"nba","_type":"_doc","_id":"9"}}
{"countryEn":"United States","teamName":"篮网","birthDay":893131200000,"country":"美国","teamCityEn":"Brooklyn","code":"jarrett_allen","displayAffiliation":"Texas/United States","displayName":"贾瑞特 艾伦","schoolType":"College","teamConference":"东部","teamConferenceEn":"Eastern","weight":"107.5 公斤","teamCity":"布鲁克林","playYear":2,"jerseyNo":"31","teamNameEn":"Nets","draft":2017,"displayNameEn":"Jarrett Allen","heightValue":2.11,"birthDayStr":"1998-04-21","position":"中锋","age":21,"playerId":"1628386"}
{"index":{"_index":"nba","_type":"_doc","_id":"10"}}
{"countryEn":"United States","teamName":"尼克斯","birthDay":727074000000,"country":"美国","teamCityEn":"New York","code":"kadeem_allen","displayAffiliation":"Arizona/United States","displayName":"卡迪姆 艾伦","schoolType":"College","teamConference":"东部","teamConferenceEn":"Eastern","weight":"90.7 公斤","teamCity":"纽约","playYear":2,"jerseyNo":"0","teamNameEn":"Knicks","draft":2017,"displayNameEn":"Kadeem Allen","heightValue":1.9,"birthDayStr":"1993-01-15","position":"后卫","age":26,"playerId":"1628443"}
2. Term查询
2.1 Term Query:精确匹配查询
代码语言:javascript复制# 查找号码为32号的球员
GET /nba/_doc/_search
{
"query": {
"term": {
"jerseyNo": "32"
}
}
}
2.2 Exsit Query:查询某字段非空的document
代码语言:javascript复制# 查询"teamNameEn"字段非空的全部文档
GET /nba/_doc/_search
{
"query": {
"exists": {
"field": "teamNameEn"
}
}
}
2.3 Prefix Query:查询某字段的前缀是指定字段的全部文档,指定的前缀精确匹配
代码语言:javascript复制# 查询"teamNameEn"字段的前缀是"Kni"的全部文档
# 有结果
GET /nba/_doc/_search
{
"query": {
"prefix": {
"teamNameEn": "Kni"
}
}
}
# 无结果
GET /nba/_doc/_search
{
"query": {
"prefix": {
"teamNameEn": "kni"
}
}
}
2.4 Wildcard Query:通配符查询
代码语言:javascript复制# 搜索"teamNameEn"以"Kn"开头以"s"结尾的全部document
# *可以代表多个字符
GET /nba/_doc/_search
{
"query": {
"wildcard": {
"teamNameEn": "Kn*s"
}
}
}
# ?只能代表一个字符
GET /nba/_doc/_search
{
"query": {
"wildcard": {
"teamNameEn": "Knic?s"
}
}
}
2.5 Regexp Query:正则表达式查询
代码语言:javascript复制GET /nba/_doc/_search
{
"query": {
"regexp": {
"teamNameEn": "Kn.*s"
}
}
}
2.6 Ids Query:查询多个指定id的document
代码语言:javascript复制GET /nba/_doc/_search
{
"query": {
"ids": {
"values": [1, 2, 3]
}
}
}
3. 范围查询
代码语言:javascript复制# 查询在nba打了[2,10]年的球员
GET /nba/_doc/_search
{
"query": {
"range": {
"playYear": {
"gte": 2,
"lte": 10
}
}
}
}
# 查询[1980,1999]年出生的球员
GET /nba/_doc/_search
{
"query": {
"range": {
"birthDay": {
"gte": "1980",
"lte": "01/01/1999",
"format": "dd/MM/yyyy||yyyy"
}
}
}
}
4. 排序查询
代码语言:javascript复制# 查询篮网队的球员,并按照球龄降序排序
GET /nba/_doc/_search
{
"query": {
"match": {
"teamNameEn": "Nets"
}
},
"sort": {
"playYear": {
"order": "desc"
}
}
}
# 查询篮网队的球员,并按照球龄降序排序,如果球龄相同,那么按照身高升序排序
GET /nba/_doc/_search
{
"query": {
"match": {
"teamNameEn": "Nets"
}
},
"sort": [
{
"playYear": {
"order": "desc"
}
},
{
"heightValue": {
"order": "asc"
}
}
]
}
5. 聚合统计
5.1 max/min/sum/avg
代码语言:javascript复制# 求尼克斯队球员的平均年龄
GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"avgAge": {
"avg": {
"field": "age"
}
}
},
"size": 0
}
5.2 value_count:统计某字段非空的document数
代码语言:javascript复制# 求尼克斯队球员打球时间不为空的数量
GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"countPlayYear": {
"value_count": {
"field": "playYear"
}
}
},
"size": 0
}
5.3 Cardinality:去重统计次数
代码语言:javascript复制# 统计尼克斯队的球员有多少种不同的年龄
GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"countAge": {
"cardinality": {
"field": "age"
}
}
},
"size": 0
}
5.4 stats:统计count/max/min/avg/sum这5个值
代码语言:javascript复制GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"statsAge": {
"stats": {
"field": "age"
}
}
},
"size": 0
}
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"statsAge" : {
"count" : 1,
"min" : 26.0,
"max" : 26.0,
"avg" : 26.0,
"sum" : 26.0
}
}
}
5.5 extended_stats:除stats统计的5个值,还加入了平方和、方差、标准差、平均值加/减两个标准差的区间
代码语言:javascript复制{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"extendedStatsAge": {
"extended_stats": {
"field": "age"
}
}
},
"size": 0
}
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"extendedStatsAge" : {
"count" : 1,
"min" : 26.0,
"max" : 26.0,
"avg" : 26.0,
"sum" : 26.0,
"sum_of_squares" : 676.0,
"variance" : 0.0,
"std_deviation" : 0.0,
"std_deviation_bounds" : {
"upper" : 26.0,
"lower" : 26.0
}
}
}
}
5.6 percentiles:占比百分位对应的值统计,默认返回[1, 5, 25, 50, 75, 95, 99]分位上的值
代码语言:javascript复制GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"percentAge": {
"percentiles": {
"field": "age"
}
}
},
"size": 0
}
# 结果分析
# 1%的age在26.0以内
# 5%的age在26.0以内
# 25%的age在26.0以内
# 50%的age在26.0以内
# ......
{
"took" : 19,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"percentAge" : {
"values" : {
"1.0" : 26.0,
"5.0" : 26.0,
"25.0" : 26.0,
"50.0" : 26.0,
"75.0" : 26.0,
"95.0" : 26.0,
"99.0" : 26.0
}
}
}
}
# 指定分位值
GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"percentAge": {
"percentiles": {
"field": "age",
"percents": [20, 50, 75]
}
}
},
"size": 0
}
6. 分组聚合
6.1 Terms Aggregation:根据字段值进行分组聚合
代码语言:javascript复制# 将尼克斯队的球员根据年龄进行分组,并统计每组中document的个数
GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"aggsAge": {
"terms": {
"field": "age"
}
}
},
"size": 0
}
# 将尼克斯队的球员根据年龄进行分组,并统计每组中document的个数,结果按照年龄降序排序
GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"aggsAge": {
"terms": {
"field": "age",
"order": {
"_key": "desc"
}
}
}
},
"size": 0
}
# 将尼克斯队的球员根据年龄进行分组,并统计每组中document的个数,结果每组中的文档个数降序排序
GET /nba/_doc/_search
{
"query": {
"term": {
"teamNameEn": {
"value": "Knicks"
}
}
},
"aggs": {
"aggsAge": {
"terms": {
"field": "age",
"order": {
"_count": "desc"
}
}
}
},
"size": 0
}
# 按照队名进行分组,最多分5组,其他文档直接忽略
# 每组内按照球员平均年龄降序排序
GET /nba/_doc/_search
{
"aggs": {
"aggsTeamName": {
"terms": {
"field": "teamNameEn",
"size": 5,
"order": {
"avgAge": "desc"
}
},
"aggs": {
"avgAge": {
"avg": {
"field": "age"
}
}
}
}
},
"size": 0
}
# 按照队名进行分组,可以指定只对"Hawks"和"Nets"队进行分组以及不对"Heat"队进行分组
# 每组内按照球员平均年龄降序排序
GET /nba/_doc/_search
{
"aggs": {
"aggsTeamName": {
"terms": {
"field": "teamNameEn",
"include": ["Hawks", "Nets"],
"exclude": ["Heat"],
"order": {
"avgAge": "desc"
}
},
"aggs": {
"avgAge": {
"avg": {
"field": "age"
}
}
}
}
},
"size": 0
}
# 分组筛选时可以使用正则表达式
GET /nba/_doc/_search
{
"aggs": {
"aggsTeamName": {
"terms": {
"field": "teamNameEn",
"include": "Hawks|Ne.*|Kn.*",
"exclude": "Heat",
"order": {
"avgAge": "desc"
}
},
"aggs": {
"avgAge": {
"avg": {
"field": "age"
}
}
}
}
},
"size": 0
}
6.2 Range Aggregation: 范围分组聚合
代码语言:javascript复制# 统计nba小于20岁、年龄在[20,30)之间以及年龄>=30的球员个数
GET /nba/_doc/_search
{
"aggs": {
"ageRange": {
"range": {
"field": "age",
"ranges": [
{"to": 20},
{"from": 20, "to": 30},
{"from": 30}
]
}
}
},
"size": 0
}
# 自定义组名
GET /nba/_doc/_search
{
"aggs": {
"ageRange": {
"range": {
"field": "age",
"ranges": [
{"to": 20, "key": "A"},
{"from": 20, "to": 30, "key": "B"},
{"from": 30, "key": "C"}
]
}
}
},
"size": 0
}
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 10,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"ageRange" : {
"buckets" : [
{
"key" : "A",
"to" : 20.0,
"doc_count" : 0
},
{
"key" : "B",
"from" : 20.0,
"to" : 30.0,
"doc_count" : 9
},
{
"key" : "C",
"from" : 30.0,
"doc_count" : 1
}
]
}
}
}
6.3 Date Range Aggregation:时间范围分组聚合
代码语言:javascript复制# 统计nba球员出生年份小于1980、出生年份在[1980,1990)之间以及出生年份>=1990年的球员个数
GET /nba/_doc/_search
{
"aggs": {
"birthDayRange": {
"date_range": {
"field": "birthDay",
"format": "yyyy",
"ranges": [
{"to": "1980"},
{"from": "1980", "to": "1990"},
{"from": "1990"}
]
}
}
},
"size": 0
}
6.4 Date Histogram Aggregation:时间柱状图聚合
代码语言:javascript复制# 求每个年份出生的球员的个数
GET /nba/_doc/_search
{
"aggs": {
"birthDayAggs": {
"date_histogram": {
"field": "birthDay",
"format": "yyyy",
"interval": "year"
}
}
},
"size": 0
}
# interval的值可以是:
year
quarter
month
week
day
hour
minute
second
7. 滚动查询
如果一次性要查寻大量数据,比如10万条数据,那么性能会很差,此时一般会采取用scoll滚动查询,一批一批的查,直到所有数据都查询完处理完。
使用scoll滚动搜索,可以先搜索一批数据,然后下次再搜索一批数据,以此类推,直到搜索出全部的数据来。scoll搜索会在第一次搜索的时候,保存一个当时的视图快照,之后只会基于该旧的视图快照提供数据搜索,如果这个期间数据变更,是不会让用户看到的。
采用基于_doc进行排序的方式,性能较高。每次发送scroll请求,我们还需要指定一个scoll参数,指定一个时间窗口,每次搜索请求只要在这个时间窗口内能完成就可以了。
代码语言:javascript复制# 第一次查询
# scroll=1m,本次滚动查询返回的scroll_id的有效期为1min
# "sort": [ "_doc" ],基于_doc进行排序,提升查询性能
# "size": 1,本次查询1条数据
GET /nba/_doc/_search?scroll=1m
{
"query": {
"match_all": {}
},
"sort": [ "_doc" ],
"size": 1
}
# 返回一个_scroll_id
{
"_scroll_id" : "GET /_search/scroll
{
"scroll": "1m",
"scroll_id" : "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAAO6xFmhnMlMxTUN1UVVxQXlZbkhnREJkSEEAAAAAAAAAdBZ2ZVB2ZHFURFNtQ0xHZ1laUjJFVVNnAAAAAAAAAHMWdmVQdmRxVERTbUNMR2dZWlIyRVVTZwAAAAAAAPEYFmJma2dQcWxoU2JXSDhPNmZMMVI0N1EAAAAAAADushZoZzJTMU1DdVFVcUF5WW5IZ0RCZEhB"
}",
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 10,
"max_score" : null,
"hits" : [
{
"_index" : "nba",
"_type" : "_doc",
"_id" : "5",
"_score" : null,
"_source" : {
"countryEn" : "United States",
"teamName" : "马刺",
"birthDay" : 490593600000,
"country" : "美国",
"teamCityEn" : "San Antonio",
"code" : "lamarcus_aldridge",
"displayAffiliation" : "Texas/United States",
"displayName" : "拉马库斯 阿尔德里奇",
"schoolType" : "College",
"teamConference" : "西部",
"teamConferenceEn" : "Western",
"weight" : "117.9 公斤",
"teamCity" : "圣安东尼奥",
"playYear" : 13,
"jerseyNo" : "12",
"teamNameEn" : "Spurs",
"draft" : 2006,
"displayNameEn" : "LaMarcus Aldridge",
"heightValue" : 2.11,
"birthDayStr" : "1985-07-19",
"position" : "中锋-前锋",
"age" : 34,
"playerId" : "200746"
},
"sort" : [
0
]
}
]
}
}
# 之后的每一次查询都要带上上一次查询时返回的scroll_id
GET /_search/scroll
{
"scroll": "1m",
"scroll_id" : "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAAO6xFmhnMlMxTUN1UVVxQXlZbkhnREJkSEEAAAAAAAAAdBZ2ZVB2ZHFURFNtQ0xHZ1laUjJFVVNnAAAAAAAAAHMWdmVQdmRxVERTbUNMR2dZWlIyRVVTZwAAAAAAAPEYFmJma2dQcWxoU2JXSDhPNmZMMVI0N1EAAAAAAADushZoZzJTMU1DdVFVcUF5WW5IZ0RCZEhB"
}
# 可以手动清除scroll_id
DELETE /_search/scroll
{
"scroll_id": "xxx"
}