基于Prometheus的监控告警系统的Python开发(三)

2024-10-01 08:39:46 浏览数 (3)

篇一、 https://cloud.tencent.com/developer/article/2195953

篇二、 https://cloud.tencent.com/developer/article/2395624

篇一里面,我的告警截图是调用的chrome来做的,这种代码写起来比较简单,但是缺点就是速度比较慢。因此这里再补充一个基于python的matplotlib的绘图方法。

绘图的代码如下:

代码语言:python代码运行次数:0复制
# -*- coding: utf-8 -*-
# 调用prometheus接口,将指定表达式的趋势图绘制出来
# 参考阳明的博客 https://cloud.tencent.com/developer/article/1972462

# pip install matplotlib

import requests
from datetime import datetime, timedelta
import json
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def draw_pic(raw_data):
    # 创建图形和轴
    fig, ax = plt.subplots(figsize=(10, 6))

    # 设置时间格式
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))

    # 遍历每个指标并绘制
    for metric in raw_data:

        # 将其它自定义的label提取出来
        _lab = ""
        for ii in list(raw_data[0]['metric'].keys()):
            if ii not in ['instance','job']: 
                _tmp=metric['metric'][ii]
                _lab = _lab   " - "   _tmp
    
        # 提取时间戳和值
        timestamps = [datetime.fromtimestamp(ts) for ts, _ in metric['values']]
        values = [value for _, value in metric['values']]
        
        # 拼接出metrics的label
        label = f"{metric['metric']['job']} ({metric['metric']['instance']}) {_lab}"

        
        # 绘制时间序列
        ax.plot(timestamps, values, marker='o', label=label)

    # 添加图例
    ax.legend()

    # 设置标题和标签
    ax.set_title('Prometheus Metrics')
    ax.set_xlabel('Time')
    ax.set_ylabel('Value')

    plt.xticks(rotation=45, ha='right')  # 旋转45度,并水平对齐到右侧,为了排版更好看

    # 自动调整布局
    plt.tight_layout()

    # 保存图表到文件
    output_filename = 'metrics_chart.png'
    plt.savefig(output_filename, dpi=300)  # 可以根据需要更改dpi和文件格式



def metrics(server, query, query_time, duration, step):
    # 计算开始时间和结束时间
    start = query_time - duration
    end = query_time

    # 构造查询参数
    params = {
        'query': query,
        'start': int(start.timestamp()),
        'end': int(end.timestamp()),
        'step': step
    }

    # 发送请求到 Prometheus
    try:
        response = requests.get(f"http://{server}/api/v1/query_range", params=params)
        response.raise_for_status()  # 如果响应状态码不是200,抛出异常
    except requests.RequestException as e:
        return None, f"Failed to query Prometheus: {e}"

    # 解析返回的数据
    data = response.json()
    if data['status'] != 'success':
        return None, f"Query failed: {data.get('error', 'Unknown error')}"

    # 获取结果
    result = data['data']['result']
    
    # 将结果转换为类似于 prometheus api的数据结构
    matrix = []
    for item in result:
        metric = {
            'metric': item['metric'],
            'values': [(float(ts), float(value)) for ts, value in item['values']]
        }
        matrix.append(metric)

    return matrix, None



if __name__ == "__main__":
    server = "localhost:9090"
    
    # 告警表达式
    query = 'irate(node_disk_written_bytes_total[5m])>100000'

    # 时间范围和步长
    query_time = datetime.now()
    duration = timedelta(minutes=60)  # 只拉取最近1小时的数据,画出来的指标不会太拥挤
    step = 15  # 每15秒采样一个点

    result, err = metrics(server, query, query_time, duration, step)

    if err:
        print(err)
    else:
        # print(json.dumps(result))
        draw_pic(raw_data=result)
        print("image saved")

绘制的图片大致长这样:

告警截图告警截图

实际测试,上面这个绘图大概耗时3秒,而之前调用chrome进行绘图大概要10秒。性能提升了3倍左右。

0 人点赞