聚合结果buckets默认以doc_count 排序方式呈现,即: _count asc 表达。其它还有 _term, _key 为排序控制元素。_key适用于histogram,date_histogram,如下:
代码语言:javascript复制POST /cartxns/_search
{
"aggs": {
"colors": {
"terms": {
"field": "color.keyword",
"order": {
"_count": "asc"
}
}
}
}
}
...
"aggregations" : {
"colors" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "blue",
"doc_count" : 2
},
{
"key" : "green",
"doc_count" : 2
},
{
"key" : "red",
"doc_count" : 4
}
]
}
}
elastic4s表达式如下:
代码语言:javascript复制 val aggTerms = search("cartxns").aggregations(
termsAggregation("colors")
.field("color.keyword")
.order(TermsOrder("_count",false))
)
println(aggTerms.show)
val termsResult = client.execute(aggTerms).await
if (termsResult.isSuccess) {
termsResult.result.aggregations.terms("colors").buckets
.foreach(b => println(s"${b.key},${b.docCount}"))
} else println(s"error: ${termsResult.error.causedBy.getOrElse("unknown")}")
...
POST:/cartxns/_search?
StringEntity({"aggs":{"colors":{"terms":{"field":"color.keyword","order":{"_count":"desc"}}}}},Some(application/json))
red,4
blue,2
green,2
再来一个date_histogram聚合例子:
代码语言:javascript复制POST /cartxns/_search
{
"aggs": {
"monthly_sales": {
"date_histogram": {
"field": "sold",
"calendar_interval": "1M",
"format": "yyyy-MM-dd"
, "order": {
"_count": "desc"
}
}
}
}
}
...
"aggregations" : {
"monthly_sales" : {
"buckets" : [
{
"key_as_string" : "2014-11-01",
"key" : 1414800000000,
"doc_count" : 2
},
{
"key_as_string" : "2014-01-01",
"key" : 1388534400000,
"doc_count" : 1
},
{
"key_as_string" : "2014-02-01",
"key" : 1391212800000,
"doc_count" : 1
},
{
"key_as_string" : "2014-05-01",
"key" : 1398902400000,
"doc_count" : 1
},
{
"key_as_string" : "2014-07-01",
"key" : 1404172800000,
"doc_count" : 1
},
{
"key_as_string" : "2014-08-01",
"key" : 1406851200000,
"doc_count" : 1
},
{
"key_as_string" : "2014-10-01",
"key" : 1412121600000,
"doc_count" : 1
},
{
"key_as_string" : "2014-03-01",
"key" : 1393632000000,
"doc_count" : 0
},
{
"key_as_string" : "2014-04-01",
"key" : 1396310400000,
"doc_count" : 0
},
{
"key_as_string" : "2014-06-01",
"key" : 1401580800000,
"doc_count" : 0
},
{
"key_as_string" : "2014-09-01",
"key" : 1409529600000,
"doc_count" : 0
}
]
}
}
elastic4s示例:
代码语言:javascript复制 val aggHist = search("cartxns").aggregations(
dateHistogramAggregation("monthly_sales")
.field("sold")
.calendarInterval(DateHistogramInterval.Month)
.format("yyyy-MM-dd")
.minDocCount(1)
.order(HistogramOrder("_key",false))
)
println(aggTerms.show)
val histResult = client.execute(aggHist).await
if (histResult.isSuccess) {
histResult.result.aggregations.dateHistogram("monthly_sales").buckets
.foreach(b => println(s"${b.date},${b.docCount}"))
} else println(s"error: ${histResult.error.causedBy.getOrElse("unknown")}")
...
POST:/cartxns/_search?
StringEntity({"aggs":{"colors":{"terms":{"field":"color.keyword","order":{"_count":"desc"}}}}},Some(application/json))
2014-11-01,2
2014-10-01,1
2014-08-01,1
2014-07-01,1
2014-05-01,1
2014-02-01,1
2014-01-01,1
_count,_terms,_key三种固定排序当然是不足以表达实际的聚合结果。以度量结果进行排序才真正能够做到有针对性的,灵活的,广覆盖面的聚合结果排序,如:
代码语言:javascript复制POST /cartxns/_search
{
"aggs": {
"makes": {
"terms": {
"field": "make.keyword",
"size": 10
, "order": {
"avg_price": "desc"
}
},
"aggs": {
"avg_price": {
"avg": {"field": "price"}
}
}
}
}
}
...
"aggregations" : {
"makes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bmw",
"doc_count" : 1,
"avg_price" : {
"value" : 80000.0
}
},
{
"key" : "ford",
"doc_count" : 2,
"avg_price" : {
"value" : 27500.0
}
},
{
"key" : "honda",
"doc_count" : 3,
"avg_price" : {
"value" : 16666.666666666668
}
},
{
"key" : "toyota",
"doc_count" : 2,
"avg_price" : {
"value" : 13500.0
}
}
]
}
}
以上是个以avg_price倒排序的例子。elastic4s示范:
代码语言:javascript复制 val aggAvg = search("cartxns").aggregations(
termsAggregation("makes")
.field("make.keyword")
.order(TermsOrder("avg_price",false)).subaggs(
avgAggregation("avg_price").field("price")
)
)
println(aggAvg.show)
val avgResult = client.execute(aggAvg).await
if (avgResult.isSuccess) {
avgResult.result.aggregations.terms("makes").buckets
.foreach(b => println(s"${b.key},${b.docCount},${b.avg("avg_price").value}"))
} else println(s"error: ${avgResult.error.causedBy.getOrElse("unknown")}")
...
POST:/cartxns/_search?
StringEntity({"aggs":{"makes":{"terms":{"field":"make.keyword","order":{"avg_price":"desc"}},"aggs":{"avg_price":{"avg":{"field":"price"}}}}}},Some(application/json))
bmw,1,80000.0
ford,2,27500.0
honda,3,16666.666666666668
toyota,2,13500.0
可以用 .path 方式来实现对多值度量结果的排序,如:
代码语言:javascript复制POST /cartxns/_search
{
"aggs": {
"colors": {
"terms": {
"field": "color.keyword",
"size": 10,
"order": {
"stats.sum": "desc"
}
},
"aggs": {
"stats": {
"extended_stats": {
"field": "price"
}
}
}
}
}
}
...
"aggregations" : {
"colors" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "red",
"doc_count" : 4,
"stats" : {
"count" : 4,
"min" : 10000.0,
"max" : 80000.0,
"avg" : 32500.0,
"sum" : 130000.0,
"sum_of_squares" : 7.3E9,
"variance" : 7.6875E8,
"std_deviation" : 27726.341266023544,
"std_deviation_bounds" : {
"upper" : 87952.6825320471,
"lower" : -22952.68253204709
}
}
},
{
"key" : "green",
"doc_count" : 2,
"stats" : {
"count" : 2,
"min" : 12000.0,
"max" : 30000.0,
"avg" : 21000.0,
"sum" : 42000.0,
"sum_of_squares" : 1.044E9,
"variance" : 8.1E7,
"std_deviation" : 9000.0,
"std_deviation_bounds" : {
"upper" : 39000.0,
"lower" : 3000.0
}
}
},
{
"key" : "blue",
"doc_count" : 2,
"stats" : {
"count" : 2,
"min" : 15000.0,
"max" : 25000.0,
"avg" : 20000.0,
"sum" : 40000.0,
"sum_of_squares" : 8.5E8,
"variance" : 2.5E7,
"std_deviation" : 5000.0,
"std_deviation_bounds" : {
"upper" : 30000.0,
"lower" : 10000.0
}
}
}
]
}
}
extended_stats返回多值。我们可以用stats.sum来选择sum值。elastic4s示范:
代码语言:javascript复制val aggStats = search("cartxns").aggregations(
termsAggregation("makes")
.field("make.keyword")
.order(TermsOrder("price_stats.sum",false)).subaggs(
extendedStatsAggregation("price_stats").field("price")
)
)
println(aggStats.show)
val sumResult = client.execute(aggStats).await
if (sumResult.isSuccess) {
sumResult.result.aggregations.terms("makes").buckets
.foreach(b => println(s"${b.key},${b.docCount}," s"${b.extendedStats("price_stats").sum}"))
} else println(s"error: ${sumResult.error.causedBy.getOrElse("unknown")}")
...
POST:/cartxns/_search?
StringEntity({"aggs":{"makes":{"terms":{"field":"make.keyword","order":{"price_stats.sum":"desc"}},"aggs":{"price_stats":{"extended_stats":{"field":"price"}}}}}},Some(application/json))
bmw,1,80000.0
ford,2,55000.0
honda,3,50000.0
toyota,2,27000.0
最后,用作排序的度量结果可能在聚合结构的内层。我们可以用>符合来代表上一层结构:
代码语言:javascript复制POST /cartxns/_search
{
"aggs": {
"sales": {
"histogram": {
"field": "price",
"interval": 20000,
"min_doc_count": 1,
"order": {
"red_green>stats.avg": "desc"
}
},
"aggs": {
"red_green": {
"filter": {"terms": {"color.keyword" : ["red","green"]}
},
"aggs": {
"stats": {
"extended_stats": {
"field": "price"
}
}
}
}
}
}
}
}
...
"aggregations" : {
"salse" : {
"buckets" : [
{
"key" : 80000.0,
"doc_count" : 1,
"red_green" : {
"doc_count" : 1,
"stats" : {
"count" : 1,
"min" : 80000.0,
"max" : 80000.0,
"avg" : 80000.0,
"sum" : 80000.0,
"sum_of_squares" : 6.4E9,
"variance" : 0.0,
"std_deviation" : 0.0,
"std_deviation_bounds" : {
"upper" : 80000.0,
"lower" : 80000.0
}
}
}
},
{
"key" : 20000.0,
"doc_count" : 4,
"red_green" : {
"doc_count" : 3,
"stats" : {
"count" : 3,
"min" : 20000.0,
"max" : 30000.0,
"avg" : 23333.333333333332,
"sum" : 70000.0,
"sum_of_squares" : 1.7E9,
"variance" : 2.222222222222225E7,
"std_deviation" : 4714.04520791032,
"std_deviation_bounds" : {
"upper" : 32761.42374915397,
"lower" : 13905.242917512693
}
}
}
},
{
"key" : 0.0,
"doc_count" : 3,
"red_green" : {
"doc_count" : 2,
"stats" : {
"count" : 2,
"min" : 10000.0,
"max" : 12000.0,
"avg" : 11000.0,
"sum" : 22000.0,
"sum_of_squares" : 2.44E8,
"variance" : 1000000.0,
"std_deviation" : 1000.0,
"std_deviation_bounds" : {
"upper" : 13000.0,
"lower" : 9000.0
}
}
}
}
]
}
}
elastic4s:
代码语言:javascript复制 val innerStats = search("cartxns").aggregations(
histogramAggregation("sales")
.field("price")
.interval(20000)
.minDocCount(1)
.order(HistogramOrder("red_green>stats.sum",false)).subaggs(
filterAggregation("red_green").query(
termsQuery("color.keyword","red","green")
).subaggs(
extendedStatsAggregation("stats").field("price")
)
)
)
println(innerStats.show)
val innerResult = client.execute(innerStats).await
if (innerResult.isSuccess) {
val hist = innerResult.result.aggregations.histogram("sales")
hist.buckets
.foreach(b => println(s"${b.key},${b.docCount},"
s"${b.filter("red_green").extendedStats("stats").sum}"))
} else println(s"error: ${innerResult.error.causedBy.getOrElse("unknown")}")
...
POST:/cartxns/_search?
StringEntity({"aggs":{"sales":{"histogram":{"interval":20000.0,"min_doc_count":1,"order":{"red_green>stats.sum":"desc"},"field":"price"},"aggs":{"red_green":{"filter":{"terms":{"color.keyword":["red","green"]}},"aggs":{"stats":{"extended_stats":{"field":"price"}}}}}}}},Some(application/json))
80000.0,1,80000.0
20000.0,4,70000.0
0.0,3,22000.0