Redis Client集成Prometheus指标

2022-07-21 20:52:19 浏览数 (1)

背景

go-redis提供了给出简单易用的API帮助我们使用redis, 但是经过对组内各个业务线的调研发现大家都有一个共同的需求: 希望对redis的每个操作集成Prometheus监控统计, 已方便业务侧进行更加细致的分析和优化

方案设计

故在使用go-redis作为客户端的前提下, 针对go-redis和Prometheus的集成方案进行了一次调研, 总结出3个方案:

方案

描述

优缺点

方案1

不做封装,直接在使用的时候打点

简单, 代码侵入性强

方案2

将Redis Client集成Prometheus在一起, 并重写常用的命令

代码复用, 但需要重写常用的redis命令, 实现复杂, 后期维护困难

方案3

使用go-redis自带的hook集成Prometheus

插件化, 即用即插

经过分析, 决定使用方案3进行实现

实现

定义指标并实现hook方法

代码语言:go复制
package redis

import (
	"context"
	"github.com/go-redis/redis/v8"
	"github.com/prometheus/client_golang/prometheus"
	"time"
)

var redisServiceNameKey = "service_name"

// RedisMetricsHook redis prometheus metrics hook
type RedisMetricsHook struct {
	requestCount   *prometheus.CounterVec
	requestLatency *prometheus.HistogramVec
}

func NewRedisMetricsHook(namespace, subsystem string, buckets []float64, labels []string) *RedisMetricsHook {

	nCli := &RedisMetricsHook{}
	nCli.initMetrics(namespace, subsystem, buckets, labels)
	return nCli
}

// initMetrics 指标初始化方法
func (h *RedisMetricsHook) initMetrics(namespace, subsystem string, buckets []float64, labels []string) {
	h.requestCount = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "request_count",
			Help:      "Number of requests received.",
		},
		labels,
	)
	h.requestLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "request_latency_microseconds",
		Help:      "Total duration of requests in microseconds.",
		Buckets:   buckets,
	}, labels)

	prometheus.MustRegister(h.requestCount)
	prometheus.MustRegister(h.requestLatency)
}

// BeforeProcess 前置处理
func (h *RedisMetricsHook) BeforeProcess(ctx context.Context, cmd redis.Cmder) (context.Context, error) {
	ctx1 := context.WithValue(ctx, "begin", time.Now())
	return ctx1, nil
}

// AfterProcess 后置处理
func (h *RedisMetricsHook) AfterProcess(ctx context.Context, cmd redis.Cmder) error {
	serviceName := ctx.Value(redisServiceNameKey).(string)
	err := ""
	if cmd.Err() != nil {
		err = cmd.Err().Error()
	}
	h.requestCount.WithLabelValues(serviceName, err).Add(1)
	println(int(time.Since(ctx.Value("begin").(time.Time)).Microseconds()))
	h.requestLatency.WithLabelValues(serviceName, err).Observe(float64(time.Since(ctx.Value("begin").(time.Time)).Microseconds()))
	return cmd.Err()
}

// BeforeProcessPipeline
func (h *RedisMetricsHook) BeforeProcessPipeline(ctx context.Context, cmds []redis.Cmder) (context.Context, error) {
	return context.WithValue(ctx, "begin", time.Now()), nil
}

// AfterProcessPipeline
func (h *RedisMetricsHook) AfterProcessPipeline(ctx context.Context, cmds []redis.Cmder) error {
	serviceName := ctx.Value(redisServiceNameKey).(string)
	err := ""
	err0 := handlerPipeline(cmds)
	if err0 != nil {
		err = err0.Error()
	}
	h.requestCount.WithLabelValues(serviceName, err).Add(1)
	h.requestLatency.WithLabelValues(serviceName, err).Observe(float64(time.Since(ctx.Value("begin").(time.Time)).
		Milliseconds()))
	return err0
}

func handlerPipeline(cmds []redis.Cmder) error {
	for i := 0; i < len(cmds); i   {
		cmd := cmds[i]
		if cmd.Err() != nil {
			return cmd.Err()
		}
	}
	return nil
}

注册hook到客户端, 并开启Prometheus指标收集

代码语言:go复制
package main

import (
	"context"
	"fmt"
	"git.code.oa.com/PlanX/global/redis"
	"github.com/prometheus/client_golang/prometheus/promhttp"
	"net/http"
)

func main() {

	// step1. 获取一个redis client连接
	client, err := GetRedisClient()
	if err != nil {
		fmt.Println(err.Error())
	}

	// step2. 添加监控指标, 通过context透传数据
	// 时延指标单位: 微秒
	buckets := []float64{10, 50, 100, 200, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000}
	labels := []string{"service_name", "error"}
	hook := redis.NewRedisMetricsHook("DDD", "test001", buckets, labels)
	client.AddHook(hook)

	ctx0 := context.Background()
	ctx1 := context.WithValue(ctx0, "service_name", "test001")

	// step3. 模拟业务请求
	_, err = client.Set(ctx1, "testkey", "test", 0).Result()
	if err != nil {
		fmt.Println(err.Error())
	}

	// step4. 开启prometheus监控
	http.Handle("/metrics", promhttp.Handler())
	http.ListenAndServe(":8080", nil)
}


// Redis 获取client
func GetRedisClient() (*redis.Client, error) {
	ctx := context.Background()
	client := redis.NewClient(&redis.Options{
		Addr:     "127.0.0.1:6379",
		Password: "", // no password set
	})
	_, err := client.Ping(ctx).Result()
	if err != nil {
		return nil, err
	} else {
		return client, nil
	}
}

查看指标统计情况

代码语言:txt复制
$ curl 'localhost:8080/metrics'
# HELP DDD_test001_request_count Number of requests received.
# TYPE DDD_test001_request_count counter
DDD_test001_request_count{error="",service_name="test001"} 1
# HELP DDD_test001_request_latency_microseconds Total duration of requests in microseconds.
# TYPE DDD_test001_request_latency_microseconds histogram
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="10"} 0
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="50"} 0
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="100"} 0
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="200"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="1000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="1500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="2000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="2500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="3000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="3500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="4000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="4500"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le="5000"} 1
DDD_test001_request_latency_microseconds_bucket{error="",service_name="test001",le=" Inf"} 1
DDD_test001_request_latency_microseconds_sum{error="",service_name="test001"} 114
DDD_test001_request_latency_microseconds_count{error="",service_name="test001"} 1
...

0 人点赞