search(15)- elastic4s-sorting buckets

2020-05-18 22:48:52 浏览数 (2)

聚合结果buckets默认以doc_count 排序方式呈现,即: _count asc 表达。其它还有 _term, _key 为排序控制元素。_key适用于histogram,date_histogram,如下:

代码语言:javascript复制
POST /cartxns/_search
{
  "aggs": {
    "colors": {
      "terms": {
        "field": "color.keyword",
        "order": {
          "_count": "asc"
        }
      }
    }
  }
}

...

  "aggregations" : {
    "colors" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "blue",
          "doc_count" : 2
        },
        {
          "key" : "green",
          "doc_count" : 2
        },
        {
          "key" : "red",
          "doc_count" : 4
        }
      ]
    }
  }

elastic4s表达式如下:

代码语言:javascript复制
 val aggTerms = search("cartxns").aggregations(
     termsAggregation("colors")
       .field("color.keyword")
      .order(TermsOrder("_count",false))
  )
  println(aggTerms.show)

  val termsResult = client.execute(aggTerms).await

  if (termsResult.isSuccess) {
    termsResult.result.aggregations.terms("colors").buckets
      .foreach(b => println(s"${b.key},${b.docCount}"))
  } else println(s"error: ${termsResult.error.causedBy.getOrElse("unknown")}")

...

POST:/cartxns/_search?
StringEntity({"aggs":{"colors":{"terms":{"field":"color.keyword","order":{"_count":"desc"}}}}},Some(application/json))
red,4
blue,2
green,2

再来一个date_histogram聚合例子:

代码语言:javascript复制
POST /cartxns/_search
{
  "aggs": {
    "monthly_sales": {
      "date_histogram": {
        "field": "sold",
        "calendar_interval": "1M",
        "format": "yyyy-MM-dd"
        , "order": {
          "_count": "desc"
        }
      }
    }
  }
}

...

  "aggregations" : {
    "monthly_sales" : {
      "buckets" : [
        {
          "key_as_string" : "2014-11-01",
          "key" : 1414800000000,
          "doc_count" : 2
        },
        {
          "key_as_string" : "2014-01-01",
          "key" : 1388534400000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2014-02-01",
          "key" : 1391212800000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2014-05-01",
          "key" : 1398902400000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2014-07-01",
          "key" : 1404172800000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2014-08-01",
          "key" : 1406851200000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2014-10-01",
          "key" : 1412121600000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2014-03-01",
          "key" : 1393632000000,
          "doc_count" : 0
        },
        {
          "key_as_string" : "2014-04-01",
          "key" : 1396310400000,
          "doc_count" : 0
        },
        {
          "key_as_string" : "2014-06-01",
          "key" : 1401580800000,
          "doc_count" : 0
        },
        {
          "key_as_string" : "2014-09-01",
          "key" : 1409529600000,
          "doc_count" : 0
        }
      ]
    }
  }

elastic4s示例:

代码语言:javascript复制
 val aggHist = search("cartxns").aggregations(
      dateHistogramAggregation("monthly_sales")
      .field("sold")
      .calendarInterval(DateHistogramInterval.Month)
      .format("yyyy-MM-dd")
      .minDocCount(1)
      .order(HistogramOrder("_key",false))
  )

  println(aggTerms.show)

  val histResult = client.execute(aggHist).await

  if (histResult.isSuccess) {
    histResult.result.aggregations.dateHistogram("monthly_sales").buckets
      .foreach(b => println(s"${b.date},${b.docCount}"))
  } else println(s"error: ${histResult.error.causedBy.getOrElse("unknown")}")

...

POST:/cartxns/_search?
StringEntity({"aggs":{"colors":{"terms":{"field":"color.keyword","order":{"_count":"desc"}}}}},Some(application/json))
2014-11-01,2
2014-10-01,1
2014-08-01,1
2014-07-01,1
2014-05-01,1
2014-02-01,1
2014-01-01,1

_count,_terms,_key三种固定排序当然是不足以表达实际的聚合结果。以度量结果进行排序才真正能够做到有针对性的,灵活的,广覆盖面的聚合结果排序,如:

代码语言:javascript复制
POST /cartxns/_search
{
  "aggs": {
    "makes": {
      "terms": {
        "field": "make.keyword",
        "size": 10
        , "order": {
          "avg_price": "desc"
        }
      },
      "aggs": {
        "avg_price": {
          "avg": {"field": "price"}
        }
      }
    }
  }
}

...

  "aggregations" : {
    "makes" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "bmw",
          "doc_count" : 1,
          "avg_price" : {
            "value" : 80000.0
          }
        },
        {
          "key" : "ford",
          "doc_count" : 2,
          "avg_price" : {
            "value" : 27500.0
          }
        },
        {
          "key" : "honda",
          "doc_count" : 3,
          "avg_price" : {
            "value" : 16666.666666666668
          }
        },
        {
          "key" : "toyota",
          "doc_count" : 2,
          "avg_price" : {
            "value" : 13500.0
          }
        }
      ]
    }
  }

以上是个以avg_price倒排序的例子。elastic4s示范:

代码语言:javascript复制
 val aggAvg = search("cartxns").aggregations(
    termsAggregation("makes")
      .field("make.keyword")
      .order(TermsOrder("avg_price",false)).subaggs(
    avgAggregation("avg_price").field("price")
    )
  )
  println(aggAvg.show)

  val avgResult = client.execute(aggAvg).await

  if (avgResult.isSuccess) {
    avgResult.result.aggregations.terms("makes").buckets
      .foreach(b => println(s"${b.key},${b.docCount},${b.avg("avg_price").value}"))
  } else println(s"error: ${avgResult.error.causedBy.getOrElse("unknown")}")

...

POST:/cartxns/_search?
StringEntity({"aggs":{"makes":{"terms":{"field":"make.keyword","order":{"avg_price":"desc"}},"aggs":{"avg_price":{"avg":{"field":"price"}}}}}},Some(application/json))
bmw,1,80000.0
ford,2,27500.0
honda,3,16666.666666666668
toyota,2,13500.0

可以用 .path 方式来实现对多值度量结果的排序,如:

代码语言:javascript复制
POST /cartxns/_search
{
  "aggs": {
    "colors": {
      "terms": {
        "field": "color.keyword",
        "size": 10,
        "order": {
          "stats.sum": "desc"
        }
      },
      "aggs": {
        "stats": {
          "extended_stats": {
            "field": "price"
          }
        }
      }
    }
  }
}

...

"aggregations" : {
    "colors" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "red",
          "doc_count" : 4,
          "stats" : {
            "count" : 4,
            "min" : 10000.0,
            "max" : 80000.0,
            "avg" : 32500.0,
            "sum" : 130000.0,
            "sum_of_squares" : 7.3E9,
            "variance" : 7.6875E8,
            "std_deviation" : 27726.341266023544,
            "std_deviation_bounds" : {
              "upper" : 87952.6825320471,
              "lower" : -22952.68253204709
            }
          }
        },
        {
          "key" : "green",
          "doc_count" : 2,
          "stats" : {
            "count" : 2,
            "min" : 12000.0,
            "max" : 30000.0,
            "avg" : 21000.0,
            "sum" : 42000.0,
            "sum_of_squares" : 1.044E9,
            "variance" : 8.1E7,
            "std_deviation" : 9000.0,
            "std_deviation_bounds" : {
              "upper" : 39000.0,
              "lower" : 3000.0
            }
          }
        },
        {
          "key" : "blue",
          "doc_count" : 2,
          "stats" : {
            "count" : 2,
            "min" : 15000.0,
            "max" : 25000.0,
            "avg" : 20000.0,
            "sum" : 40000.0,
            "sum_of_squares" : 8.5E8,
            "variance" : 2.5E7,
            "std_deviation" : 5000.0,
            "std_deviation_bounds" : {
              "upper" : 30000.0,
              "lower" : 10000.0
            }
          }
        }
      ]
    }
  }

extended_stats返回多值。我们可以用stats.sum来选择sum值。elastic4s示范:

代码语言:javascript复制
val aggStats = search("cartxns").aggregations(
    termsAggregation("makes")
      .field("make.keyword")
      .order(TermsOrder("price_stats.sum",false)).subaggs(
      extendedStatsAggregation("price_stats").field("price")
    )
  )
  println(aggStats.show)

  val sumResult = client.execute(aggStats).await

  if (sumResult.isSuccess) {
    sumResult.result.aggregations.terms("makes").buckets
      .foreach(b => println(s"${b.key},${b.docCount},"   s"${b.extendedStats("price_stats").sum}"))
  } else println(s"error: ${sumResult.error.causedBy.getOrElse("unknown")}")

...

POST:/cartxns/_search?
StringEntity({"aggs":{"makes":{"terms":{"field":"make.keyword","order":{"price_stats.sum":"desc"}},"aggs":{"price_stats":{"extended_stats":{"field":"price"}}}}}},Some(application/json))
bmw,1,80000.0
ford,2,55000.0
honda,3,50000.0
toyota,2,27000.0

最后,用作排序的度量结果可能在聚合结构的内层。我们可以用>符合来代表上一层结构:

代码语言:javascript复制
POST /cartxns/_search
{
  "aggs": {
    "sales": {
      "histogram": {
        "field": "price",
        "interval": 20000,
        "min_doc_count": 1, 
        "order": {
          "red_green>stats.avg": "desc"
        }
      },
      "aggs": {
        "red_green": {
          "filter": {"terms": {"color.keyword" : ["red","green"]}
          },
          "aggs": {
            "stats": {
              "extended_stats": {
                "field": "price"
              }
            }
          }
        }
      }
    }
  }
}

...

  "aggregations" : {
    "salse" : {
      "buckets" : [
        {
          "key" : 80000.0,
          "doc_count" : 1,
          "red_green" : {
            "doc_count" : 1,
            "stats" : {
              "count" : 1,
              "min" : 80000.0,
              "max" : 80000.0,
              "avg" : 80000.0,
              "sum" : 80000.0,
              "sum_of_squares" : 6.4E9,
              "variance" : 0.0,
              "std_deviation" : 0.0,
              "std_deviation_bounds" : {
                "upper" : 80000.0,
                "lower" : 80000.0
              }
            }
          }
        },
        {
          "key" : 20000.0,
          "doc_count" : 4,
          "red_green" : {
            "doc_count" : 3,
            "stats" : {
              "count" : 3,
              "min" : 20000.0,
              "max" : 30000.0,
              "avg" : 23333.333333333332,
              "sum" : 70000.0,
              "sum_of_squares" : 1.7E9,
              "variance" : 2.222222222222225E7,
              "std_deviation" : 4714.04520791032,
              "std_deviation_bounds" : {
                "upper" : 32761.42374915397,
                "lower" : 13905.242917512693
              }
            }
          }
        },
        {
          "key" : 0.0,
          "doc_count" : 3,
          "red_green" : {
            "doc_count" : 2,
            "stats" : {
              "count" : 2,
              "min" : 10000.0,
              "max" : 12000.0,
              "avg" : 11000.0,
              "sum" : 22000.0,
              "sum_of_squares" : 2.44E8,
              "variance" : 1000000.0,
              "std_deviation" : 1000.0,
              "std_deviation_bounds" : {
                "upper" : 13000.0,
                "lower" : 9000.0
              }
            }
          }
        }
      ]
    }
  }

elastic4s:

代码语言:javascript复制
 val innerStats = search("cartxns").aggregations(
    histogramAggregation("sales")
        .field("price")
        .interval(20000)
        .minDocCount(1)
        .order(HistogramOrder("red_green>stats.sum",false)).subaggs(
      filterAggregation("red_green").query(
        termsQuery("color.keyword","red","green")
      ).subaggs(
        extendedStatsAggregation("stats").field("price")
      )
    )
  )

  println(innerStats.show)

  val innerResult = client.execute(innerStats).await

  if (innerResult.isSuccess) {
    val hist = innerResult.result.aggregations.histogram("sales")
    hist.buckets
      .foreach(b => println(s"${b.key},${b.docCount},"  
        s"${b.filter("red_green").extendedStats("stats").sum}"))
  } else println(s"error: ${innerResult.error.causedBy.getOrElse("unknown")}")

...

POST:/cartxns/_search?
StringEntity({"aggs":{"sales":{"histogram":{"interval":20000.0,"min_doc_count":1,"order":{"red_green>stats.sum":"desc"},"field":"price"},"aggs":{"red_green":{"filter":{"terms":{"color.keyword":["red","green"]}},"aggs":{"stats":{"extended_stats":{"field":"price"}}}}}}}},Some(application/json))
80000.0,1,80000.0
20000.0,4,70000.0
0.0,3,22000.0

0 人点赞