采集Linux服务器上内存占用Top的进程信息

2024-06-27 10:37:14 浏览数 (3)

建议采集下Linux服务器上内存占用Top的进程信息,在内存抖动的时候便于排查问题。

下面是一个python版的DEMO,待修改完善。 生产上建议使用golang来编写。

代码语言:python代码运行次数:0复制
# -*- coding: utf-8 -*-
# 采集指标,并上报到pushgateway

import psutil  
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway  

# 获取所有进程信息  
processes = psutil.process_iter(['pid', 'name', 'memory_percent','memory_info'])

# 创建一个字典来存储进程ID、名称和内存使用率  
proc_info = [(proc.info['pid'], proc.info['name'], proc.info['memory_percent'], proc.info['memory_info']) for proc in processes]  

# 过滤掉内存使用率为0的进程(通常表示无法获取准确信息)  
proc_info = [proc for proc in proc_info if proc[2] != 0]  
 
# 按照内存使用率降序排序  
sorted_proc_info = sorted(proc_info, key=lambda x: x[2], reverse=True)  

# 打印内存使用率最高的10个进程  
data=[]
for proc in sorted_proc_info[:10]:  
    pid, name, memory_percent,memory_info = proc
    print(f"PID: {pid}, Name: {name}, Memory Usage: {memory_percent}% , RSS_Mem: {memory_info._asdict()['rss']}")
    res = {"pid": pid,"name":name,"mem_usage": memory_percent,"rss_mem": memory_info._asdict()['rss']}
    data.append(res)
    
              
# Pushgateway的地址  
pushgateway_url = 'http://localhost:9091'  
# 创建CollectorRegistry实例  
registry = CollectorRegistry()  
  
# 创建Gauge类型的metrics  
metrics = {  
    'mem_usage': Gauge('memory_usage', 'Percentage of usage', registry=registry, labelnames=['pid', 'name']),  
    'rss_mem': Gauge('rss_mem', 'Resident Set Size in bytes', registry=registry, labelnames=['pid', 'name']),  
}

# 设置metrics的值  
for item in data:  
    pid = item['pid']  
    name = item['name']  
    mem_usage = item['mem_usage']  
    rss_mem = item['rss_mem']  
      
    # 根据进程名选择正确的metrics  
    metrics['mem_usage'].labels(pid=pid, name=name).set(mem_usage)  
    metrics['rss_mem'].labels(pid=pid, name=name).set(rss_mem)  
  
    # 将metrics推送到Pushgateway  
    # TODO 这里的instance要改为获取主机的主机名或者IP地址
    push_to_gateway(pushgateway_url, job='process_metrics', registry=registry,grouping_key={"instance": "devops-all-01"},)
  
    print("Metrics successfully pushed to Pushgateway")

执行如下:

最终grafana的效果如下(建议根据instance绘图,在左上角配置个下拉列表):

UPDATE 20240627 用golang重写了下,便于各处拷贝运行,代码如下:

代码语言:txt复制
package main

import (
    "fmt"
    "flag"
    "log"
    "net"
    "sort"
    "os"
    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/client_golang/prometheus/push"
    "github.com/shirou/gopsutil/v3/process"
)

func main() {

    // 命令行参数解析
    pushgatewayURL := flag.String("url", "http://localhost:9091", "The URL of the Pushgateway.")
    flag.Parse()

    // 获取所有进程
    processes, err := process.Processes()
    if err != nil {
        log.Fatalf("Failed to get processes: %v", err)
    }

    // 初始化进程信息切片,这次直接使用RSS作为排序依据
    type ProcessInfo struct {
        PID  int32
        Name string
        RSS  uint64
    }

    var processInfos []ProcessInfo

    // 收集每个进程的RSS信息
    for _, p := range processes {
        memInfo, err := p.MemoryInfo()
        if err != nil {
            log.Printf("Failed to get memory info for PID %d: %v", p.Pid, err)
            continue
        }

        name, err := p.Name()
        if err != nil {
            log.Printf("Failed to get name for PID %d: %v", p.Pid, err)
            continue
        }

        processInfos = append(processInfos, ProcessInfo{
            PID:  p.Pid,
            Name: name,
            RSS:  memInfo.RSS,
        })
    }

    // 按RSS降序排序
    sort.Slice(processInfos, func(i, j int) bool {
        return processInfos[i].RSS > processInfos[j].RSS
    })

    // 取前10个
    topProcesses := processInfos[:10]

    // 获取本机IP地址或主机名
    var instanceID string
    addrs, err := net.InterfaceAddrs()
    if err != nil {
        log.Printf("Failed to get IP addresses: %v, falling back to hostname.", err)
        instanceID = os.Getenv("HOSTNAME")
    } else {
        for _, addr := range addrs {
            if ipnet, ok := addr.(*net.IPNet); ok && !ipnet.IP.IsLoopback() {
                if ipnet.IP.To4() != nil {
                    instanceID = ipnet.IP.String()
                    break
                }
            }
        }
        if instanceID == "" {
            // 如果没有找到合适的IPv4地址,则使用hostname作为备选
            instanceID = os.Getenv("HOSTNAME")
        }
    }

    // Prometheus配置
    registry := prometheus.NewRegistry()

    // 创建Gauge向量,直接记录RSS
    rssGauge := prometheus.NewGaugeVec(
        prometheus.GaugeOpts{
            Name: "top_process_rss",
            Help: "Top 10 process Resident Set Size (RSS)",
        },
        []string{"pid", "process_name"},
    )

    registry.MustRegister(rssGauge)

    // 设置Gauge值
    for _, proc := range topProcesses {
        rssGauge.WithLabelValues(fmt.Sprintf("%d", proc.PID), proc.Name).Set(float64(proc.RSS))
    }

    // 推送数据到Pushgateway,使用动态获取的instanceID
    if err := push.New(*pushgatewayURL, "top_memory_processes").
        Collector(rssGauge).
        Grouping("instance", instanceID).
        Push(); err != nil {
        log.Fatalf("Failed to push to Pushgateway: %v", err)
    }

    fmt.Println("Top 10 memory consuming processes data pushed to Pushgateway.")
}

执行方法:

./main -url=http://192.168.31.181:9091 这里填的是你的pushgateway的地址,不加参数的话默认就是上报到本机的9091端口

0 人点赞