背景
go-redis提供了给出简单易用的API帮助我们使用redis, 但是经过对组内各个业务线的调研发现大家都有一个共同的需求: 希望对redis的每个操作集成Prometheus监控统计, 已方便业务侧进行更加细致的分析和优化
方案设计
故在使用go-redis作为客户端的前提下, 针对go-redis和Prometheus的集成方案进行了一次调研, 总结出3个方案:
方案 | 描述 | 优缺点 |
---|---|---|
方案1 | 不做封装,直接在使用的时候打点 | 简单, 代码侵入性强 |
方案2 | 将Redis Client集成Prometheus在一起, 并重写常用的命令 | 代码复用, 但需要重写常用的redis命令, 实现复杂, 后期维护困难 |
方案3 | 使用go-redis自带的hook集成Prometheus | 插件化, 即用即插 |
经过分析, 决定使用方案3进行实现
实现
定义指标并实现hook方法
代码语言:go复制package redis
import (
"context"
"github.com/go-redis/redis/v8"
"github.com/prometheus/client_golang/prometheus"
"time"
)
var redisServiceNameKey = "service_name"
// RedisMetricsHook redis prometheus metrics hook
type RedisMetricsHook struct {
requestCount *prometheus.CounterVec
requestLatency *prometheus.HistogramVec
}
func NewRedisMetricsHook(namespace, subsystem string, buckets []float64, labels []string) *RedisMetricsHook {
nCli := &RedisMetricsHook{}
nCli.initMetrics(namespace, subsystem, buckets, labels)
return nCli
}
// initMetrics 指标初始化方法
func (h *RedisMetricsHook) initMetrics(namespace, subsystem string, buckets []float64, labels []string) {
h.requestCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_count",
Help: "Number of requests received.",
},
labels,
)
h.requestLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_latency_microseconds",
Help: "Total duration of requests in microseconds.",
Buckets: buckets,
}, labels)
prometheus.MustRegister(h.requestCount)
prometheus.MustRegister(h.requestLatency)
}
// BeforeProcess 前置处理
func (h *RedisMetricsHook) BeforeProcess(ctx context.Context, cmd redis.Cmder) (context.Context, error) {
ctx1 := context.WithValue(ctx, "begin", time.Now())
return ctx1, nil
}
// AfterProcess 后置处理
func (h *RedisMetricsHook) AfterProcess(ctx context.Context, cmd redis.Cmder) error {
serviceName := ctx.Value(redisServiceNameKey).(string)
err := ""
if cmd.Err() != nil {
err = cmd.Err().Error()
}
h.requestCount.WithLabelValues(serviceName, err).Add(1)
println(int(time.Since(ctx.Value("begin").(time.Time)).Microseconds()))
h.requestLatency.WithLabelValues(serviceName, err).Observe(float64(time.Since(ctx.Value("begin").(time.Time)).Microseconds()))
return cmd.Err()
}
// BeforeProcessPipeline
func (h *RedisMetricsHook) BeforeProcessPipeline(ctx context.Context, cmds []redis.Cmder) (context.Context, error) {
return context.WithValue(ctx, "begin", time.Now()), nil
}
// AfterProcessPipeline
func (h *RedisMetricsHook) AfterProcessPipeline(ctx context.Context, cmds []redis.Cmder) error {
serviceName := ctx.Value(redisServiceNameKey).(string)
err := ""
err0 := handlerPipeline(cmds)
if err0 != nil {
err = err0.Error()
}
h.requestCount.WithLabelValues(serviceName, err).Add(1)
h.requestLatency.WithLabelValues(serviceName, err).Observe(float64(time.Since(ctx.Value("begin").(time.Time)).
Milliseconds()))
return err0
}
func handlerPipeline(cmds []redis.Cmder) error {
for i := 0; i < len(cmds); i {
cmd := cmds[i]
if cmd.Err() != nil {
return cmd.Err()
}
}
return nil
}
注册hook到客户端, 并开启Prometheus指标收集
代码语言:go复制package main
import (
"context"
"fmt"
"git.code.oa.com/PlanX/global/redis"
"github.com/prometheus/client_golang/prometheus/promhttp"
"net/http"
)
func main() {
// step1. 获取一个redis client连接
client, err := GetRedisClient()
if err != nil {
fmt.Println(err.Error())
}
// step2. 添加监控指标, 通过context透传数据
// 时延指标单位: 微秒
buckets := []float64{10, 50, 100, 200, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000}
labels := []string{"service_name", "error"}
hook := redis.NewRedisMetricsHook("DDD", "test001", buckets, labels)
client.AddHook(hook)
ctx0 := context.Background()
ctx1 := context.WithValue(ctx0, "service_name", "test001")
// step3. 模拟业务请求
_, err = client.Set(ctx1, "testkey", "test", 0).Result()
if err != nil {
fmt.Println(err.Error())
}
// step4. 开启prometheus监控
http.Handle("/metrics", promhttp.Handler())
http.ListenAndServe(":8080", nil)
}
// Redis 获取client
func GetRedisClient() (*redis.Client, error) {
ctx := context.Background()
client := redis.NewClient(&redis.Options{
Addr: "127.0.0.1:6379",
Password: "", // no password set
})
_, err := client.Ping(ctx).Result()
if err != nil {
return nil, err
} else {
return client, nil
}
}
查看指标统计情况
代码语言:txt复制$ curl 'localhost:8080/metrics'
# HELP DDD_test001_request_count Number of requests received.
# TYPE DDD_test001_request_count counter
DDD_test001_request_count{error="",service_name="test001"} 1
# HELP DDD_test001_request_latency_microseconds Total duration of requests in microseconds.
# TYPE DDD_test001_request_latency_microseconds histogram
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="10"} 0
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="50"} 0
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="100"} 0
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="200"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="1000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="1500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="2000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="2500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="3000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="3500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="4000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="4500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="5000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le=" Inf"} 1
DDD_test001_request_latency_microseconds_sum{error="",service_name="test001"} 114
DDD_test001_request_latency_microseconds_count{error="",service_name="test001"} 1
...