背景:
远程服务器上, 部署的爬虫, 经常会因为负载 cpu 内存的过高而导致本地ssh无法连接
废话就不多说, 直接上代码, 主要解释在代码里 主要使用的psutil 模块
######
大神请略过.......
######
代码语言:javascript复制# -*- coding: utf-8 -*-
# @Author: Mehaei
# @Date: 2019-08-27 16:57:58
# @Last Modified by: Mehaei
# @Last Modified time: 2019-08-30 17:00:04
import os
import sys
# 解决不同路径启动脚本, 自定义模块无法导入问题
work_dir, file_name = os.path.split(__file__)
os.chdir(work_dir if work_dir else "./")
sys.path.append("../")
import time
# 主要使用的模块
import psutil
from submit_data.to_email import ToEmail
# 项目名 发送邮件是subject会使用
PROJECT_NAME = "Test"
# unit: s 检查时间间隔 每10分钟检查一次
_CHECK_TIME_INTERVAL = 60 * 10
# 内存标准值
_MEMORY_NORMAL = 90
# 负载总值 等于cpu的核数
_LOADAVG_NORMAL = psutil.cpu_count()
# cpu标准值
_CPU_NORMAL = 90
# 检查文件路径大小
_DISK_MONITOR_LIST = ["/home"]
# 文件夹正常值
_DISK_NORMAL = 90
# 根据爬虫文件名 杀死爬虫进程
_CRAWL_SPIDER_FILE = ["spider.py"]
# 根据启动爬虫命令 杀死爬虫进程
_KILL_PROCESS_COMMAND = ["python3 ./spider.py"]
CPU = "cpu"
MEMORY = "memory"
DISK = "disk"
SYS_LOAD = "sys_load"
NORMAL = "Normal"
# 需要监控的服务
MONITOR_LIST = [CPU, MEMORY, SYS_LOAD]
# inform or kill
# 如果是inform 则会发送邮件, kill 或杀死爬虫进程
EXCEPTION_HANDLING_METHOD = "inform"
# 如果在这个时间段内异常存在则不会在次发送邮件
SEND_EMAIL_INTERVAL = 60 * 60
class ResourceMonitor(object):
def __init__(self):
self._keep_check()
def _keep_check(self) -> None:
send_time = 0
while True:
check_result = self.check_hardware_status()
for hardware, status in check_result.items():
if status == NORMAL:
continue
else:
# send email
if EXCEPTION_HANDLING_METHOD == "inform":
if (time.time() - send_time) < SEND_EMAIL_INTERVAL:
continue
ToEmail(SUBJECT="%s ResourceMonitor Exception" % PROJECT_NAME).send(["%s: %s" % (h, s) for h, s in check_result.items()])
send_time = time.time()
# kill crawl spider
elif EXCEPTION_HANDLING_METHOD == "kill":
# self.kill_crawl_process()
pass
else:
pass
time.sleep(_CHECK_TIME_INTERVAL)
def check_hardware_status(self) -> dict:
result = {}
error_msg = "%s exception, usage rate: %s"
if MEMORY in MONITOR_LIST:
memory_usage_rate = self.memory_monitor()
if memory_usage_rate > _MEMORY_NORMAL:
result[MEMORY] = error_msg % (MEMORY, memory_usage_rate)
else:
result[MEMORY] = NORMAL
if CPU in MONITOR_LIST:
cpu_usage_rate = self.cpu_monitor()
if cpu_usage_rate > _CPU_NORMAL:
result[CPU] = error_msg % (CPU, cpu_usage_rate)
else:
result[CPU] = NORMAL
if SYS_LOAD in MONITOR_LIST:
loadavg = self.loadavg_monitor()
if loadavg[0] > _LOADAVG_NORMAL:
result[SYS_LOAD] = error_msg % (SYS_LOAD, loadavg[0])
else:
result[SYS_LOAD] = NORMAL
return result
# 内存监控
def memory_monitor(self) -> float:
"""
return memory usage rate
type: float
"""
memory = psutil.virtual_memory()
return memory.percent
# cpu监控
def cpu_monitor(self):
"""
return cpu useage rate
type: float
"""
# interval=0.01, percpu=False is solve run script return 0 or 100.0
return psutil.cpu_percent(interval=0.01, percpu=False)
# 磁盘监控
def disk_monitor(self) -> dict:
"""
return _DISK_MONITOR_LIST usage
type: dict
"""
check_result = {}
for path in _DISK_MONITOR_LIST:
usage_rate = self.disk_status(path)
if usage_rate < _DISK_NORMAL:
check_result[path] = "Normal"
else:
check_result[path] = "Error"
return check_result
def disk_status(self, path: str) -> float:
"""
params: path, need check file path, example: /amazon
type: str
return path useage
type float
"""
disk_usage_rate = psutil.disk_usage(path)
return disk_usage_rate.percent
# 负载监控
def loadavg_monitor(self) -> tuple:
"""
return system loadavg
type: tuple
"""
return psutil.getloadavg()
# 执行杀死爬虫
def kill_crawl_process(self) -> None:
"""
find all process, and kill crawl process
"""
pid = psutil.pids()
error_msg = ""
for k,i in enumerate(pid):
try:
proc = psutil.Process(i)
# print k,i,"%.2f%%"%(proc.memory_percent()),"%",proc.name(),proc.exe()
cmdline = proc.cmdline()
if " ".join(cmdline) in _KILL_PROCESS_COMMAND:
cmdline.terminate()
if all((len(cmdline) == 2, cmdline[0] == "python3", cmdline[-1].split("/")[-1] in _CRAWL_SPIDER_FILE)):
cmdline.terminate()
except Exception as e:
error_msg = e
finally:
# send email
pass
if __name__ == "__main__":
# 启动监控
ResourceMonitor()