#!/bin/bash ##系统工具箱脚本 ##目前包括的功能有: ##系统负载;cpu使用率;内存使用率;swap使用情况;处于D/R状态的进程;系统inode号占用情况 ##系统目录占用大小;系统io使用情况 ##1.对输入的参数进行判断 ##如果输入的参数为空或是help的话,会输出说明 if [ "$1" = "" ] || [ "$1" = "help" ];then cat <> $toolslog && if [ -f "$logname" ];then echo 'systatus log file:' `echo $logname` echo 'systatus log location: $logname (pid: $pid_lo)' >> $toolslog fi && exit 10" TERM TSTP EXIT echo -e "\033[1;32m##############system status check begin###############\033[0m" echo "" echo "Systools is running begin,time: `date +%F_%H:%M:%S` (pid: $pid_lo)" >> $toolslog sleep 2 ##5.判断根分区剩余容量是否符合要求 ##1).剩余空间50M以下退出脚本运行 echo -e "\033[1msystools space check: \033[0m" echo "systools space check:" >> $toolslog space=`df -m / | awk '{print $4}' | tail -1` if [ "$space" -le 50 ];then echo "Root partition free space: !!abnormal!!" echo "Error: Root partition free space: abnormal (pid: $pid_lo)" >> $toolslog exit 10 else echo "Root partition free space: ok" echo "Root partition free space: ok (pid: $pid_lo)" >> $toolslog fi ##2).判断目录大小,大于50M停止运行 if [ -d "/var/log/systatus/" ];then dirsize=`du -s /var/log/systatus | awk '{print $1}'` if [ "$dirsize" -ge 51200 ];then echo "systatus log director size: !!abnormal!!" echo "Error: Systatus log director size: abnormal (pid: $pid_lo)" >> $toolslog exit 10 else echo "systatus log director size: ok" echo "systatus log director size: ok (pid: $pid_lo)" >> $toolslog fi fi echo "" sleep 2 ##6.判断是否有之前执行的脚本进程未退出 ##对脚本运行的进程号进行过滤 ##由执行用户决定是否杀掉之前长期未结束的相关进程 ##使用kill -USR1将之前执行的脚本进程停止 oldtmp=`ls /var/sysadmin_toolbox/other/systools/tmp/ | wc -l` protmp="$tempdir/pro.tmp" ps -ef | grep 'systatus' | grep -v 'grep' | grep -v "$pid_lo" >> $protmp pid_new=`cat $protmp | awk '{print $2}'` if [ -n "$pid_new" ];then echo "old systatus process:" cat $protmp | sed '/^$/d' read -p "process is exist,whether to kill[y/n]" jucon for i in $pid_new;do if [ "$jucon" = y ];then kill -USR1 $i sleep 5 ps -ef | grep 'systatus' | grep -v 'grep' | grep -w "$i" &>/dev/null if [ $? -ne 0 ];then echo "pid $i: is stopped in single \"USR1\"" ##single USR1 echo "pid $i is stoped (single:USR1)" >> $toolslog sleep 2 else kill -9 $i if [ $? -eq 0 ];then echo "$i is killed in single 9" ##single9,kill,kill -USR2不起作用时执行 echo "pid:$i is killed (single:9)" >> $toolslog sleep 2 else echo "!!$i: is not killed,please check!!" echo "Error: pid $i not to killed" >> $toolslog sleep 2 fi fi elif [ "$jucon" = n ];then echo "continue to run $0" break else echo 'print error' exit 10 fi done echo "" fi date=`date +%F_%H:%M:%S` ##3.日志清理策略, ##对一天以前生成的日志进行清理 dellog=`find /var/log/systatus/* -type f -mtime 1 -o -mtime +1 2>/dev/null` if [ -n "$dellog" ];then for log in $dellog;do rm -rf $log done echo "" echo "Old systatus log clean finished" echo "Old systatus log clean finished" >> $toolslog fi dellogt=`find /var/sysadmin_toolbox/other/systools/logs/* -type f -mtime +5 2>/dev/null` if [ -n "$dellogt" ];then for logt in $dellogt;do rm -rf $logt done echo "" echo "Old systools run log clean finished" echo "Old systools run log clean finished" >> $toolslog fi ##5.输出检查时间:执行时间,系统版本,pid,ip的信息 ##1).系统版本号,判断是suse还是redhat系统 if [ -f "/etc/redhat-release" ];then sysver=`cat /etc/redhat-release` else sysver=`cat /etc/Suse-release | head -1` fi ##2).输出系统基础信息 echo "" echo -e "\033[1mbase information: \033[0m" echo "check time: $date" echo "run pid: $pid_lo" echo "ip: `ifconfig -a | grep -v 127.0.0.1 | grep inet | grep -v inet6 | awk '{print $2}' | head -1`" echo "system version: $sysver" echo "" echo -e "\033[1mInspection items: \033[0m" sleep 2 lonum=1 swnum=1 sinum=1 finum=1 while [ "$lonum" -le "$lofre" ];do ##在循环执行的每一次执行前,判断如果日志文件大于20M,为了防止循环执行时脚本过大,退出脚本执行 if [ -f "$logname" ];then filesize=`du -s $logname | awk '{print $1}'` if [ "$filesize" -ge 20480 ];then echo "!!Systatus log is more than 20M!!" echo "Error: Systatus log is more than 20M,exit to run script" >> $toolslog exit 10 fi fi ##6.每个检查项情况 ##1).cpu load情况 clo(){ ##获取1分钟;5分钟;15分钟负载情况 load1=`uptime | awk -F"[, ]+" '{print $(NF-2)}'` load5=`uptime | awk -F"[, ]+" '{print $(NF-1)}'` load15=`uptime | awk -F"[, ]+" '{print $NF}'` ##获取逻辑cpu数量;物理cpu数量;cpu核数 log_cpu=`cat /proc/cpuinfo| grep "processor"| wc -l` phy_cpu=`cat /proc/cpuinfo| grep "physical id"| sort| uniq| wc -l` cpu_core=`cat /proc/cpuinfo| grep "cpu cores"| uniq | awk -F":" '{print $2}'` ##对cpuload数值进行计算,并与逻辑cpu个数进行比较 ave_load=`printf "%.2f" "$(echo "scale=2;((${load1}+${load5}+${load15})/3)" | bc)"` load_com=`expr "$ave_load > $log_cpu" | bc` echo -e "\033[1;35m-------------check cpu load------------\033[0m" sleep 2 echo "###cpu load status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname sleep 2 echo "" echo "" >> $logname #echo -e "\033[1;30;47m####cpu load status####\033[0m" ##输出逻辑cpu个数及1,5,15min的cpu负载情况 echo "logical cpu num: $log_cpu" echo "logical cpu num: $log_cpu" >> $logname echo "cpu load status: 1m $load1, 5m $load5, 15m $load15" echo "cpu load status: 1m $load1, 5m $load5, 15m $load15" >> $logname echo "" echo "" >> $logname #sleep 2 } ##2).cpu使用率情况 cuse(){ echo -e "\033[1;35m-------------check cpu used------------\033[0m" sleep 2 echo "###cpu used status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname #sleep 2 echo "" echo "" >> $logname cpufile="$tempdir/cuse.tmp" sarfile="$tempdir/sar.tmp" echo "" > $cpufile echo "" > $sarfile ##通过sar命令获取系统cpu使用率 sar 1 3 >> $sarfile ##cpu使用率的各项指标 cpu_user=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $3}')` ##%user cpu_nice=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $4}')` ##%nice cpu_sys=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $5}')` ##%sys cpu_io=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $6}')` ##%iowait cpu_st=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $7}')` ##%steal cpu_used=`printf "%.1f" $(echo "scale=2;($cpu_user+$cpu_nice+$cpu_sys+$cpu_io+$cpu_st)" | bc)` ##对系统cpu使用率进行判断 ##50%-60%;60%-70%;70%以上 if [ "`echo "$cpu_used >= 50" | bc`" -eq 1 ] && [ "`echo "$cpu_used < 60" | bc`" -eq 1 ];then cpu_level="!relatively high!" ##50%== 60" | bc`" -eq 1 ] && [ "`echo "$cpu_used < 70" | bc`" -eq 1 ];then cpu_level="!!high!!" ##60%== 70" | bc`" -eq 1 ];then cpu_level="!!!extremely high!!!" ##70%=> $logname echo "Composition: %user $cpu_user, %nice $cpu_nice, %sys $cpu_sys, %io $cpu_io %steal $cpu_st" echo "Composition: %user $cpu_user, %nice $cpu_nice, %sys $cpu_sys, %io $cpu_io %steal $cpu_st" >> $logname echo "" echo "" >> $logname echo "detection result: $cpu_level" echo "detection result: $cpu_level" >> $logname echo "" echo "" >> $logname sleep 2 ##获取cpu使用率前10的线程,输出到临时文件中 #ps H -eo pid,tid,etimes,%cpu,comm --no-headers --sort -%cpu | head -10 >> $cpufile ps H -eo pid,tid,stime,%cpu,comm --no-headers --sort -%cpu | head -10 >> $cpufile echo "Top 10 cpu usage(current):" echo "Top 10 cpu usage(current):" >> $logname printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "NAME(THREAD)" "TID" "PID" "STIME" "%CPU" printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "NAME(THREAD)" "TID" "PID" "STIME" "%CPU" >> $logname ##在cpu情况统计的临时文件中抓取出pid,tid,运行时间,cpu使用率,线程名称的信息,并进行输出 for i in `cat $cpufile | sed '/^$/d' | awk '{print $2}'`;do cline1=`awk -v a="$i" '$2==a {print $1}' $cpufile` ##pid cline2=`awk -v a="$i" '$2==a {print $2}' $cpufile` ##tid cline3=`awk -v a="$i" '$2==a {print $3}' $cpufile` ##运行时间 cline4=`awk -v a="$i" '$2==a {print $4}' $cpufile` ##cpu使用率 cline5=`awk -v a="$i" '$2==a {print $5}' $cpufile` ##线程名称 printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" $cline5 $cline2 $cline1 $cline3 $cline4% printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" $cline5 $cline2 $cline1 $cline3 $cline4% >> $logname # sleep 1 done echo "" echo "" >> $logname #sleep 2 } ##3).内存使用率情况 muse(){ freefile="$tempdir/free.tmp" memfile="$tempdir/psmem.tmp" echo "" > $freefile echo "" > $memfile echo -e "\033[1;35m-----------check memory used-----------\033[0m" sleep 2 echo "###memory used status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname sleep 1 echo "" echo "" >> $logname ##通过free命令抓取出内存数据输入到临时文件中 ##按照监控的计算方式进行取值,目前监控的已使用内存计算方式是free命令的used数值加上meminfo文件 ##中的shmem数值 free -k >> $freefile ##获取内存使用率排名前10的线程输出到临时文件中 ps -eo pid,stime,%mem,comm --no-headers --sort -%mem | head -10 >> $memfile grep "cache:" $freefile > /dev/null if [[ $? -eq 0 ]];then ##centos6;suse计算方式 usedMem1=`cat $freefile|grep "cache:" |awk '{print $3}'` freeMem=`cat $freefile|grep "cache:" |awk '{print $4}'` shmem=`cat /proc/meminfo |grep -w Shmem|awk '{print $2}'` usedMem=`expr $usedMem1 + $shmem` else ##redhat7计算方式 usedMem1=`cat $freefile|grep "Mem:" |awk '{print $3}'` freeMem=`cat $freefile|grep "Mem:" |awk '{print $4}'` shmem=`cat /proc/meminfo |grep -w Shmem|awk '{print $2}'` usedMem=`expr $usedMem1 + $shmem` ##内存占用used+shmem fi totalMem=`cat $freefile|grep "Mem:" |awk '{print $2}'` usedMemPct=`expr $usedMem \* 100 / $totalMem ` freeMemPct=`expr 100 - $usedMemPct ` if [ "$usedMemPct" -ge 50 ] && [ "$usedMemPct" -lt 60 ];then mem_level="!relatively high!" ##50%=> $logname echo "detection result: $mem_level" echo "detection result: $mem_level" >> $logname echo "" echo "" >> $logname sleep 2 echo "Top 10 memory usage:" echo "Top 10 memory usage:" >> $logname printf "%-20s %-10s %-10s %-10s %-10s\n" "NAME(process)" "PID" "STIME" "%MEM" printf "%-20s %-10s %-10s %-10s %-10s\n" "NAME(process)" "PID" "STIME" "%MEM" >> $logname for i in `cat $memfile | sed '/^$/d' | awk '{print $1}'`;do mline1=`awk -v a="$i" '$1==a {print $1}' $memfile` ##pid mline2=`awk -v a="$i" '$1==a {print $2}' $memfile` ##运行时间 mline3=`awk -v a="$i" '$1==a {print $3}' $memfile` ##内存使用率 mline4=`awk -v a="$i" '$1==a {print $4}' $memfile` ##进程名称 printf "%-20s %-10s %-10s %-10s %-10s\n" $mline4 $mline1 $mline2 $mline3% printf "%-20s %-10s %-10s %-10s %-10s\n" $mline4 $mline1 $mline2 $mline3% >> $logname # sleep 1 done echo "" echo "" >> $logname #sleep 2 } ##swap分区使用率 swu(){ ##swap利用率执行次数 swapfile="$tempdir/swa.tmp" pswapfile="$tempdir/pswa.tmp" echo -e "\033[1;35m------------check swap used------------\033[0m" sleep 2 echo "###swap used status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname sleep 2 echo "" echo "" >> $logname echo "" > $swapfile echo "" > $pswapfile ##swap分区总大小、已使用 free -k >> $swapfile swato=`cat $swapfile | tail -1 | awk '{print $2}'` ##swap总空间大小 if [ "$swato" != 0 ];then swaused=`cat $swapfile | tail -1 | awk '{print $3}'` #swap已使用大小 swapuser=`printf "%.1f" "$(echo "scale=3;($swaused/$swato)*100" | bc)" 2>/dev/null` ##swap利用率 vmst=`vmstat` swin=`echo "$vmst" | tail -1 | awk '{print $7}'` swout=`echo "$vmst" | tail -1 | awk '{print $8}'` fi ##判断swap分区是否开启 if [ "$swato" = 0 ];then swapsta="--close--" swstan=0 else swapsta="open" swstan=1 fi echo "swap status: $swapsta" if [ "$swstan" = 0 ];then echo "" echo "" >> $logname return 1 else echo "swap total: `free -h | tail -1 | awk '{print $2}'`" echo "swap total: `free -h | tail -1 | awk '{print $2}'`" >> $logname echo "swap used: $swapuser%" echo "swap used: $swapuser%" >> $logname if [ "`echo "$swapuser >= 20" | bc`" -eq 1 ] && [ "`echo "$swapuser < 30" | bc`" -eq 1 ];then ##swap占用率大于20% swcheck="!relatively high!" elif [ "`echo "$swapuser >= 30" | bc`" -eq 1 ] && [ "`echo "$swapuser < 40" | bc`" -eq 1 ];then ##swap占用率大于30% swcheck="!!high!!" elif [ "`echo "$swapuser >= 40" | bc`" -eq 1 ];then ##swap占用率大于40% swcheck="!!!extremely high!!!" else swcheck="normal" fi echo "detection result: $swcheck" echo "detection result: $swcheck" >> $logname echo "" echo "" >> $logname echo "swap in(kb/s) $swin, swap out(kb/s) $swout" echo "swap in(kb/s) $swin, swap out(kb/s) $swout" >> $logname echo "" echo "" >> $logname fi if [ "$swapuser" = '0.0' ];then echo "" echo "" >> $logname return 1 fi sleep 2 ##判断时间间隔,距离上次执行小于30min,不执行获取每个进程占用swap分区的状态 ##当前时间戳 if [ "$swnum" -gt 1 ];then curstm=`date -d "$(date)" +%s` difstm=`printf "%.0f" $(scale=1;echo "$curstm-$oldsta" | bc)` if [ "`echo "$difstm >= 1800" | bc`" = 0 ];then echo "process swap usage check time interval less than 30min in last check" echo "process swap usage check time interval less than 30min in last check" >> $logname echo "" echo "" >> $logname return 1 fi fi ##进程占用swap分区的百分比 getswap(){ SUM=0 swato=`free -m | tail -1 | awk '{print $2}'` for DIR in `find /proc/ -maxdepth 1 -type d | egrep "^/proc/[0-9]"`;do PID=`echo $DIR | cut -d / -f 3` PROGNAME=`ps -p $PID -o comm --no-headers` for SWAP in `grep Swap $DIR/smaps 2>/dev/null| awk '{ print $2 }'`;do let SUM=$SUM+$SWAP # sleep 2 done SUM_M=`printf "%.1f" "$(echo "scale=4;($SUM/1024)" | bc)"` swrtime=`ps -A -o pid,stime | awk -v a="$PID" '$1==a {print $2}'` printf "%-20s %-10s %-10s %-13s %-15s\n" "$PROGNAME" "$PID" "$swrtime" "${SUM_M}M" `printf "%.1f" "$(echo "scale=4;($SUM_M/$swato)*100" | bc)"`"%" SUM=0 # sleep 1 done } swexc=`getswap | sort -nr -k4 | head -10` echo "Top 10 in swap usage:" echo "excute num: $swnum" echo "Top 10 in swap usage:" >> $logname echo "excute num: $swnum" >> $logname printf "%-20s %-10s %-10s %-10s %-15s\n" "NAME(PROCESS)" "PID" "STIME" "USED_VALUE(M)" "USED(%)" printf "%-20s %-10s %-10s %-10s %-15s\n" "NAME(PROCESS)" "PID" "STIME" "USED_VALUE(M)" "USED(%)" >> $logname echo "$swexc" echo "$swexc" >> $logname echo "" echo "" >> $logname oldsta=`date -d "$(date)" +%s` let swnum++ } ##进程D状态信息 psdr(){ psdfile="$tempdir/psd.tmp" psrfile="$tempdir/psr.tmp" echo "" > $psdfile echo "" > $psrfile echo -e "\033[1;35m-----check process in D&&R status------\033[0m" sleep 2 echo "###process in D&&R status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname sleep 2 echo "" echo "" >> $logname ##获取D状态进程 statd(){ numd=0 for pidd in `ls /proc | grep "^[0-9]"`;do if [ "$pidd" -gt 0 ] 2>/dev/null;then rund=`cat /proc/$pidd/status 2>/dev/null | grep "disk sleep" | wc -l` if [ "$rund" -gt 0 ] 2>/dev/null;then runtimed=`ps -A -o pid,stime | awk -v var1=$pidd '$1==var1{print $2}'` tasknamed=`cat /proc/$pidd/status 2>/dev/null| grep Name | awk -F" " '{print $2}'` wchand=`cat /proc/$pidd/wchan` if [[ "$wchand" != "" ]] && [[ "$runtimed" != "" ]] && [[ "$tasknamed" != "" ]];then numd=$(($numd+1)) printf "%-10s %-20s %-15s %-15s\n" "$pidd" "$tasknamed" "$runtimed" "$wchand" >> $psdfile else continue fi fi fi #sleep 1 done echo "dnum:$numd" >> $psdfile } statd tonumd=`grep "dnum" $psdfile | awk -F":" '{print $2}'` echo "---process in D state---" echo "---process in D state---" >> $logname echo "D state number: $tonumd" echo "D state number: $tonumd" >> $logname if [ "$tonumd" = 0 ];then echo "detection result: not exist" echo "detection result: not exist" >> $logname else echo "detection result: !!exist!!" echo "detection result: !!exist!!" >> $logname fi echo "" echo "" >> $logname if [ "$tonumd" -gt 0 ];then echo "PROCESS DETAIL(D):" printf "%-10s %-20s %-15s %-15s\n" "PID" "NAME" "STIME" "WCHAN" printf "%-10s %-20s %-15s %-15s\n" "PID" "NAME" "STIME" "WCHAN" >> $logname cat $psdfile | grep -v "dnum" | sed '/^$/d' cat $psdfile | grep -v "dnum" | sed '/^$/d' >> $logname fi echo "" echo "" >> $logname sleep 2 ##获取R状态进程 statr(){ rnum=0 for pidr in `ls /proc`;do if [ "$pidr" -gt 0 ] 2>/dev/null;then runr=`cat /proc/$pidr/status 2>/dev/null| grep "running" | wc -l` if [ "$runr" -gt 0 ] 2>/dev/null;then runtimer=`ps -A -o pid,stime | awk -v var1=$pidr '$1==var1{print $2}'` tasknamer="`cat /proc/$pidr/status 2>/dev/null | grep Name | awk -F" " '{print $2}'`" wchanr=`cat /proc/$pidr/wchan` if [[ "$runtimer" != "" ]] && [[ "$tasknamer" != "" ]] && [[ "$wchanr" != "" ]];then rnum=$(($rnum+1)) printf "%-10s %-20s %-15s %-15s\n" "$pidr" "$tasknamer" "$runtimer" "$wchanr" >> $psrfile fi fi fi # sleep 1 done echo "rnum:$rnum" >> $psrfile } statr tonumr=`grep "rnum" $psrfile | awk -F":" '{print $2}'` echo "---process in R state---" echo "---process in R state---" >> $logname echo "R state number: $tonumr" echo "R state number: $tonumr" >> $logname if [ "$tonumr" -gt 0 ];then echo "" echo "" >> $logname echo "PROCESS DETAIL(R):" echo "PROCESS DETAIL(R):" >> $logname printf "%-10s %-20s %-15s %-15s\n" "PID" "NAME" "STIME" "WCHAN" printf "%-10s %-20s %-15s %-15s\n" "PID" "NAME" "STIME" "WCHAN" >> $logname cat $psrfile | grep -v "rnum" | sed '/^$/d' cat $psrfile | grep -v "rnum" | sed '/^$/d' >> $logname fi echo "" echo "" >> $logname #sleep 2 } ##inode号 ino(){ echo -e "\033[1;35m------------check inode used-----------\033[0m" sleep 2 echo "###inode used status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname sleep 2 echo "" echo "" >> $logname inodefile="$tempdir/ino.tmp" echo "" > $inodefile df -i | awk 'NR>2{print line}{line=$0} END{print line}' | sort -nr -k5 | head -3 >> $inodefile echo "inode usage(top 3):" echo "inode usage(top 3):" >> $logname printf "%-20s %-10s\n" "MOUNT" "USED(%)" printf "%-20s %-10s\n" "MOUNT" "USED(%)" >> $logname for i in `cat $inodefile | awk '{print $6}'`;do inodeuse=`awk -v a="$i" '$6==a{print $5}' $inodefile` printf "%-20s %-10s\n" $i $inodeuse printf "%-20s %-10s\n" $i $inodeuse >> $logname sleep 1 done echo "" echo "" >> $logname sleep 2 } ##获取占用空间前3的目录 ##获取到2级目录下空间大小占用前3的目录 size(){ echo -e "\033[1;35m------check directory size status------\033[0m" echo "###directory size status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname sleep 2 echo "" echo "" >> $logname sizetmp="$tempdir/size.tmp" ##判断执行时间是否间隔30min if [ "$lonum" -gt 1 ];then cursiti=`date -d "$(date)" +%s` difsiva=`printf "%.0f" $(scale=1;echo "$cursiti-$oldstasi" | bc)` if [ "`echo "$difsiva>=1800" | bc`" = 0 ];then echo "directory size status check time interval less than 30min in last check" echo "directory size status check time interval less than 30min in last check" >> $logname echo "" echo "" >> $logname return 1 fi fi echo "" > $sizetmp ##/下空间占用排名前3的目录 du -sh /* 2>/dev/null | sort -rh -k1 | head -3 >> $sizetmp echo "directory space usage(top 3,secondary directory):" echo "excute num: $sinum" echo "directory space usage(top 3,secondary directory):" >> $logname echo "excute num: $sinum" >> $logname for i in `cat $sizetmp | awk '{print $2}'`;do ##获取/下占用空间前3的目录排序 sizechil=`du -sh $i/* | sort -rh -k1 | head -3` sizet=`grep -w $i $sizetmp | awk '{print $1}'` printf "%-10s %-10s\n" "$i" "$sizet" printf "%-10s %-10s\n" "$i" "$sizet" >> $logname sleep 1 ##获取/下占用空间前3的目录下,占用空间前3的二级目录 for j in `echo "$sizechil" | awk '{print $2}'`;do sizec=`echo "$sizechil" | grep -w "$j" | awk '{print $1}'` printf "%-25s %-10s\n" " --- $j" "$sizec" printf "%-25s %-10s\n" " --- $j" "$sizec" >> $logname done echo "" echo "" >> $logname sleep 1 done let sinum++ oldstasi=`date -d "$(date)" +%s` } ##系统进程io情况 iot(){ echo -e "\033[1;35m---------check system io status--------\033[0m" sleep 2 echo "###system io status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname sleep 2 echo "" echo "" >> $logname ##生成的临时文件 diskio="$tempdir/diskio.tmp" proio="$tempdir/proio.tmp" echo '' > $diskio echo '' > $proio echo "---disk io status---" echo "---disk io status---" >> $logname ##磁盘io情况 diskname=`lsblk | grep '^[a-z]d[a-z]' | awk '{print $1}'` iow=`iostat -x | grep -A 1 'avg-cpu' | awk 'NR==2{print $4}'` iostat -x >> $diskio echo "system iowait(%): $iow" echo "system iowait(%): $iow" >> $logname echo "" echo "" >> $logname sleep 2 ##磁盘io情况 echo "disk usage io in detail" echo "disk usage io in detail" >> $logname printf "%-10s %-10s %-10s %-10s %-10s\n" "DISK" "TPS" "AWAIT(ms)" "SVCTM(ms)" "UTIL(%)" printf "%-10s %-10s %-10s %-10s %-10s\n" "DISK" "TPS" "AWAIT(ms)" "SVCTM(ms)" "UTIL(%)" >> $logname for i in $diskname;do ##tps tpsio=`iostat | grep -w "^$i" | awk '{print $2}'` ##await waitio=`cat $diskio | grep -w "^$i" | awk '{print $10}'` ##svctm ctmio=`cat $diskio | grep -w "^$i" | awk '{print $13}'` ##util utiio=`cat $diskio | grep -w "^$i" | awk '{print $14}'` printf "%-10s %-10s %-10s %-10s %-10s\n" "$i" "$tpsio" "$waitio" "$ctmio" "$utiio" printf "%-10s %-10s %-10s %-10s %-10s\n" "$i" "$tpsio" "$waitio" "$ctmio" "$utiio" >> $logname sleep 1 done sleep 1 echo "" echo "" >> $logname echo "---process io status---" echo "---process io status---" >> $logname ##进程io情况 pidstat -d 1 3 | grep -i 'Average' | grep -v 'UID' >> $proio pionum=`cat $proio | sed '/^$/d' | wc -l` if [ "$pionum" = 0 ];then echo "process use io is not exist" echo "process use io is not exist" >> $logname else echo "process io usage in detail(current):" echo "process io usage in detail(current):" >> $logname printf "%-15s %-10s %-10s %-10s %-10s %-10s\n" "NAME" "PID" "STIME" "IO_R(KB/S)" "IO_W(KB/S)" "IO_CCWR(KB/S)" printf "%-15s %-10s %-10s %-10s %-10s %-10s\n" "NAME" "PID" "STIME" "IO_R(KB/S)" "IO_W(KB/S)" "IO_CCWR(KB/S)" >> $logname pion=0 for j in `cat $proio | awk '{print $3}'`;do iona=`cat $proio | awk -v a="$j" '$3==a {print $7}'` iopr=`cat $proio | awk -v a="$j" '$3==a {print $4}'` iopw=`cat $proio | awk -v a="$j" '$3==a {print $5}'` ioccw=`cat $proio | awk -v a="$j" '$3==a {print $6}'` iotime=`ps -A -o pid,stime | awk -v a="$j" '$1==a {print $2}'` printf "%-15s %-10s %-10s %-10s %-10s %-10s\n" "$iona" "$j" "$iotime" "$iopr" "$iopw" "$ioccw" printf "%-15s %-10s %-10s %-10s %-10s %-10s\n" "$iona" "$j" "$iotime" "$iopr" "$iopw" "$ioccw" >> $logname let pion++ # sleep 1 done echo "use io process num(current): $pion" echo "use io process num(current): $pion" >> $logname fi echo "" echo "" >> $logname #sleep 2 } ##文件描述符 fde(){ echo -e "\033[1;35m---------check system fd status--------\033[0m" sleep 2 echo "###system fd status###" >> $logname echo "time: `date +%F_%H:%M:%S`" echo "time: `date +%F_%H:%M:%S`" >> $logname sleep 2 echo "" echo "" >> $logname ##判断执行时间间隔是否大于30min if [ "$lonum" -gt 1 ];then fdcurti=`date -d "$(date)" +%s` fddifti=`printf "%.0f" $(scale=1;echo "$fdcurti-$fdotime" | bc)` if [ "`echo "$fddifti>=1800" | bc`" = 0 ];then echo "system fd check time interval less than 30min in last check" echo "system fd check time interval less than 30min in last check" >> $logname echo "" echo "" >> $logname return 1 fi fi echo "excute num: $finum" fdtmp="$tempdir/fdfile.tmp" echo '' > $fdtmp ##基本信息 ##配置的文件最大打开数,单个用户最大文件打开数 fileto=`cat /proc/sys/fs/file-max` userfito=`ulimit -n` echo "max open file number in file(system config): $fileto" echo "max open file number(user config): $userfito" echo "" echo "max open number in file(system config): $fileto" >> $logname echo "max open number in file(user config): $userfito" >> $logname echo "" >> $logname sleep 2 sysps=`ps -A -o pid,user,stime,comm` ##打开文件总数 lsof -n | grep -v "^COMMAND.*PID" >> $fdtmp filenum=`cat $fdtmp | sed '/^$/d' | wc -l` ##计算当前进程打开的文件数量情况 fiperuse=`printf "%.2f\n" $(echo "scale=5;($filenum/$fileto)*100" | bc)` echo "file open number(current): $filenum" echo "file open used(%): ${fiperuse}%" echo "" echo "file open number(current): $filenum" >> $logname echo "file open used(%): ${fiperuse}%" >> $logname echo "" >> $logname sleep 2 ##输出打开文件前10的进程 filepro=`cat $fdtmp | sed '/^$/d' | awk '{ A[$2]++ } END{for (B in A){ print B, A[B] } }'| sort -nr -k2` echo "process open file number in Top 10:" echo "process open file number in Top 10:" >> $logname printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "NAME" "USER" "PID" "STIME" "OPENNUM" "USED(%)" printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "NAME" "USER" "PID" "STIME" "OPENNUM" "USED(%)" >> $logname for i in `echo "$filepro" | awk '{print $1}'`;do ##进程名称 #fpsname=`ps -A -o pid,comm | awk -v a=$i '$1==a{print $2}'` fpsname=`echo "$sysps" | awk -v a=$i '$1==a{print $4}'` ##进程运行时间 #fpstim=`ps -A -o pid,stime | awk -v a=$i '$1==a{print $2}'` fpstim=`echo "$sysps" | awk -v a=$i '$1==a{print $3}'` ##进程运行用户 fpuser=`echo "$sysps" | awk -v a=$i '$1==a{print $2}'` ##进程文件打开数 psfinum=`echo "$filepro" | awk -v a=$i '$1==a{print $2}'` ##进程文件打开数占比 psperfi=`printf "%.2f\n" $(echo "scale=5;($psfinum/$fileto)*100" | bc)` ##输出所有信息 printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "$fpsname" "$fpuser" "$i" "$fpstim" "$psfinum" "$psperfi%" printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "$fpsname" "$fpuser" "$i" "$fpstim" "$psfinum" "$psperfi%" >> $logname done | head -10 echo "" echo "" >> $logname fdotime=`date -d "$(date)" +%s` } $1 2>/dev/null if [ "$1" = "all" ];then clo cuse muse swu psdr ino size iot fde fi $2 2>/dev/null $3 2>/dev/null $4 2>/dev/null $5 2>/dev/null $6 2>/dev/null $7 2>/dev/null $8 2>/dev/null $9 2>/dev/null $10 2>/dev/null $11 2>/dev/null echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++" echo -e "\033[1;32mexecution completed number: $lonum\033[0m" echo "------execution completed number: $lonum------" >> $logname echo "" echo "" >> $logname if [ -n "$lofre" ];then if [ "$lofre" != "$lonum" ];then sleep $extime fi fi let lonum++ done finlonum=$(($lonum-1)) trap "rm -rf $tempdir && echo 'Systools execution completed,time: `date +%F_%H:%M:%S` (pid: $pid_lo)' >> $toolslog && echo 'number of runs: $finlonum (pid: $pid_lo)' >> $toolslog && echo 'systatus log location: $logname (pid: $pid_lo)' >> $toolslog && echo 'pid: $pid_lo number of runs in this time:' $finlonum && echo 'systatus log file:' `echo $logname` && exit 10" EXIT echo -e "\033[1;32m##############system status check finish##############\033[0m"