需求描述
在默认配置文件 process.cfg
下,实现一个脚本工具,该脚本提供类似 supervisor 功能,可以对配置文件中的进程进行管理
- 一键查看所有进程运行状态
- 单个或批量启动进程,单个或批量停止进程
- 提供进程分组功能,可以按组查看进行运行状态,可以按组启动或停止该组内所有进程
示例
查看状态
按照组查看进程状态
代码语言:javascript复制> bash supervisor.sh -g DB show
-----------------------------
欢迎使用 supervisor 管理器
-----------------------------
ProcessName---------GroupName---Status----PID---CPU----MEMORY----StartTime---
mysql DB RUNNING 1114 0.0 0.0 Sat Feb 25 14:31:46 2023
postgresql DB STOPPED NULL NULL NULL NULL
oracle DB STOPPED NULL NULL NULL NULL
redis DB RUNNING 1222 0.1 0.2 Sat Feb 25 14:31:47 2023
按照名称查看进程状态
代码语言:javascript复制> bash supervisor.sh -p nginx show
-----------------------------
欢迎使用 supervisor 管理器
-----------------------------
ProcessName---------GroupName---Status----PID---CPU----MEMORY----StartTime---
nginx WEB RUNNING 7474 0.0 0.0 Sat Feb 25 16:18:51 2023
启动
按照组启动进程
代码语言:javascript复制> bash supervisor.sh -g WEB start
-----------------------------
欢迎使用 supervisor 管理器
-----------------------------
ProcessName---------GroupName---Status----PID---CPU----MEMORY----StartTime---
web-nginx.conftail: cannot determine location of ‘/www/wwwroot/code/shell/learn-shell/best-practices/web-nginx.conf’. reverting to polling: Input/output error
nginx WEB RUNNING 1121 0.0 0.0 Sat Feb 25 16:59:55 2023
httpd WEB RUNNING 3180 0.0 0.0 Sat Feb 25 17:14:47 2023
按照名称启动进程
代码语言:javascript复制> bash supervisor.sh -p nginx start
-----------------------------
欢迎使用 supervisor 管理器
-----------------------------
ProcessName---------GroupName---Status----PID---CPU----MEMORY----StartTime---
nginx WEB RUNNING 1121 0.0 0.0 Sat Feb 25 16:59:55 2023
停止
和启动差不多 按照组启动进程
代码语言:javascript复制> bash supervisor.sh -g DB start
按照名称启动进程
代码语言:javascript复制> bash supervisor.sh -p nginx start
代码
配置文件代码
代码语言:javascript复制[GROUP]
WEB
DB
YARN
[WEB]
nginx
httpd
[DB]
mysql
postgresql
oracle
redis
[YARN]
resourcemanager
nodemanager
[nginx]
description="Web Server 1"
program_name=tail
parameter=-f /www/wwwroot/code/shell/learn-shell/best-practices/web-nginx.conf
[httpd]
description="Web Server 2"
program_name=tail
parameter=-f /www/wwwroot/code/shell/learn-shell/best-practices/web-httpd.conf
[mysql]
description="High Performance DataBase"
program_name=tail
parameter=-f /www/wwwroot/code/shell/learn-shell/best-practices/mysql.conf
[postgresql]
description="PG Server"
program_name=tail
parameter=-f /www/wwwroot/code/shell/learn-shell/best-practices/postgresql.conf
[oracle]
description="The Best DB Server In The World"
program_name=tail
parameter=-f /www/wwwroot/code/shell/learn-shell/best-practices/oracle.conf
脚本
显示进程状态函数库
代码语言:javascript复制#!/bin/bash
HOME=$(pwd)'/'
CONFIG_FILE='process.cfg'
this_pid=$$
# 无需输入任何参数;返回配置文件 process.cfg中所有的组信息,例如WEB、DB等
function get_all_group {
group_list=$(sed -n '/[GROUP]/,/[.*]/p' ${HOME}${CONFIG_FILE} | grep -v -E '([.*]|^$)')
echo $group_list
}
# 无需输入任何参数;返回配置文件 process.cfg中所有的进程信息。
function get_all_process {
for g in $(get_all_group); do
p_list=$(sed -n "/[${g}]/,/[.*]/p" process.cfg | grep -v -E '([.*]|^$)')
echo $p_list
done
}
# 接收一个参数,参数为组名称;返回值是对应组内的所有进程名称列表
function get_all_process_by_group {
is_group_in_config $1
if [ $? -eq 0 ]; then
p_list=$(sed -n "/[$1]/,/[.*]/p" $HOME/$CONFIG_FILE | egrep -v "(^$|^#|[.*])")
echo $p_list
else
echo "GroupName $1 is not in process.cfg"
fi
}
# 接收一个参数,参数是一个进程名称;返回值是一个组名
function get_group_by_process_name {
for gn in $(get_all_group); do
for pn in $(get_all_process_by_group $gn); do
if [ $pn == $1 ]; then
echo "$gn"
fi
done
done
}
# 接收一个参数,参数为进程PID;返回值是一个进程运行信息的列表,列表包含运行状态、CPU占用率、内存占用率、进程启动时间
function get_process_info_by_pid {
if [ $(ps -ef | awk -v pid=$1 '$2==pid{print }' | wc -l) -eq 1 ]; then
pro_status="RUNNING"
else
pro_status="STOPED"
fi
pro_cpu=$(ps aux | awk -v pid=$1 '$2==pid{print $3}')
pro_mem=$(ps aux | awk -v pid=$1 '$2==pid{print $4}')
pro_start_time=$(ps -p $1 -o lstart | grep -v STARTED)
}
# 接收一个参数,参数为组的名称;返回值是0或1,0代表该组在配置文件中,1代表该组不在配置文件中
function get_process_pid_by_name {
if [ $# -ne 1 ]; then
return 1
else
pids=$(ps -ef | grep $1 | grep -v grep | grep -v $0 | awk '{print $2"n"}')
echo $pids
fi
}
# 接收一个参数,判断组是否存在
function is_group_in_config {
for gn in $(get_all_group); do
if [ $gn == $1 ]; then
return
fi
done
echo "Group $1 is not in process.cfg"
return 1
}
# 接收一个参数,判断进程是否存在
function is_process_in_config {
for pn in $(get_all_process); do
if [ $pn == $1 ]; then
return
fi
done
echo "Process $1 is not in process.cfg"
return 1
}
# 接收二个参数,第一个参数为process_name,第二个参数为组名称返回值,是针对每一个进程PID的运行信息
function format_print {
ps -ef | grep $1 | grep -v grep | grep -v $this_pid &>/dev/null
if [ $? -eq 0 ]; then
pids=$(get_process_pid_by_name $1)
for pid in $pids; do
get_process_info_by_pid $pid
awk -v p_name=$1
-v g_name=$2
-v p_status=$pro_status
-v p_pid=$pid
-v p_cpu=$pro_cpu
-v p_mem=$pro_mem
-v p_start_time="$pro_start_time"
'BEGIN{printf "%-20s%-12s%-10s%-6s%-7s%-10s%-20sn",p_name,g_name,p_status,p_pid,p_cpu,p_mem,p_start_time}'
done
else
awk -v p_name=$1 -v g_name=$2 'BEGIN{printf "%-20s%-12s%-10s%-6s%-7s%-10s%-20sn",p_name,g_name,"STOPPED","NULL","NULL","NULL","NULL"}'
fi
}
# 输出前置标题
#awk 'BEGIN{printf "%-20s%-10s%-10s%-6s%-7s%-10s%-20sn","ProcessName---------","GroupName---","Status----","PID---","CPU----","MEMORY----","StartTime---"}'
# 执行有三种情况:
# 1 无参数 列出配置文件中所有进程的运行信息
# 2 -g GroupName 列出GroupName组内的所有进程
# 3 process_name1 列出指定进程的运行信息
#if [ $# -gt 0 ]; then
# if [ "$1" == '-g' ]; then
# # 2 -g GroupName 列出GroupName组内的所有进程
# shift
# for gn in $@; do
# is_group_in_config $gn || continue
# for pn in $(get_all_process_by_group $gn); do
# is_process_in_config $pn && format_print $pn $gn
# done
# done
# else
# # 3 process_name1 列出指定进程的运行信息
# for pn in $@; do
# gn=$(get_group_by_process_name $pn)
# is_process_in_config $pn && format_print $pn $gn
# done
# fi
#else
# # 1 无参数 列出配置文件中所有进程的运行信息
# for pn in $(get_all_process); do
# gn=$(get_group_by_process_name $pn)
# is_process_in_config $pn && format_print $pn $gn
# done
#fi
启动进程函数库
代码语言:javascript复制#!/bin/bash
HOME=$(pwd)'/'
CONFIG_FILE='process.cfg'
function get_config_program_name() {
pn=$(sed -n "/[$1]/,/[.*]/p" ${HOME}${CONFIG_FILE} | grep -v -E '([.*]|^$)' | awk 'BEGIN{FS="="}$1=="program_name"{print $2}')
echo $pn
}
function get_config_description() {
pn=$(sed -n "/[$1]/,/[.*]/p" ${HOME}${CONFIG_FILE} | grep -v -E '([.*]|^$)' | awk 'BEGIN{FS="="}$1=="description"{print $2}')
echo $pn
}
function get_config_parameter() {
pn=$(sed -n "/[$1]/,/[.*]/p" ${HOME}${CONFIG_FILE} | grep -v -E '([.*]|^$)' | awk 'BEGIN{FS="="}$1=="parameter"{print $2}')
echo $pn
}
function start() {
name=$(get_config_program_name $1)
config=$(get_config_parameter $1)
$name $config &
}
supervisor 脚本
代码语言:javascript复制#!/bin/bash
. ./app_status.lib
. ./app_start.lib
parameter_total=$#
parameter_last=${!parameter_total}
mode='-p'
if (($parameter_total > 0)); then
if [ "$1" == '-p' ] || [ "$1" == '-g' ]; then
mode=$1
shift
fi
fi
function print_tips() {
echo "-----------------------------"
echo "欢迎使用 supervisor 管理器"
# echo "命令格式 sh supervisor.sh [-g|-p] [group name | process name] ... [start | stop | show ]"
# echo "sh supervisor.sh -g DB start"
# echo "(4).替换全部 Hadoop 为 Mapreduce"
echo "-----------------------------"
}
function to_process_to_show() {
parameter=$@
if [ $parameter_total == 0 ]; then
for pn in $(get_all_process); do
gn=$(get_group_by_process_name $pn)
is_process_in_config $pn && format_print $pn $gn
done
elif [ $mode == '-p' ]; then
for pn in $parameter; do
if [ $pn == 'show' ]; then
continue ;
fi
gn=$(get_group_by_process_name $pn)
is_process_in_config $pn && format_print $pn $gn
done
elif [ $mode == '-g' ]; then
for gn in $parameter; do
if [ $gn == 'show' ]; then
continue ;
fi
is_group_in_config $gn || continue
for pn in $(get_all_process_by_group $gn); do
is_process_in_config $pn && format_print $pn $gn
done
done
fi
}
function to_process_start
{
parameter=$@
parameter_length=$#
awk 'BEGIN{printf "%-20s%-10s%-10s%-6s%-7s%-10s%-20sn","ProcessName---------","GroupName---","Status----","PID---","CPU----","MEMORY----","StartTime---"}'
if [ $parameter_length == 1 ]; then
for pn in $(get_all_process); do
start $pn
gn=$(get_group_by_process_name $pn)
format_print $pn $gn
done
elif [ $mode == '-p' ]; then
for pn in $parameter;do
if [ $pn == 'start' ]; then
continue ;
fi
is_process_in_config $pn && start $pn && gn=$(get_group_by_process_name $pn) && format_print $pn $gn
done
elif [ $mode == '-g' ]; then
for gn in $parameter;do
if [ $gn == 'start' ]; then
continue ;
fi
is_group_in_config $gn || continue
for pn in $(get_all_process_by_group $gn); do
is_process_in_config $pn && start $pn && format_print $pn $gn
done
done
fi
}
function to_process_stop
{
parameter=$@
}
print_tips
case "$parameter_last" in
"stop")
to_process_stop $@
;;
"start")
to_process_start $@
;;
"show" | *)
awk 'BEGIN{printf "%-20s%-10s%-10s%-6s%-7s%-10s%-20sn","ProcessName---------","GroupName---","Status----","PID---","CPU----","MEMORY----","StartTime---"}'
to_process_to_show $@
;;
esac