Monitoring LA

Материал из Webko Wiki
Перейти к навигации Перейти к поиску
vim mon_la.sh
chmod +x mon_la.sh
#!/bin/sh

## system variables
export PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin
export LC_ALL=C

srv=`uname -n`
info_mail='[email protected]'
lock='/usr/local/var/mon_la.*'
la_info=10
lock_info='/usr/local/var/mon_la.info'
la_warning=15
lock_warning='/usr/local/var/mon_la.warning'
la_critical=20
lock_critical='/usr/local/var/mon_la.critical'
la_death=80
lock_death='/usr/local/var/mon_la.death'
death_services='httpd perl'
topout='/var/tmp/top.tmp'

la=`cat /proc/loadavg | cut -d'.' -f1`
#la=12

# normal LA level
if [ $la -lt $la_info ]
then
	if [ -e "$lock_info" -o -e "$lock_warning" -o -e "$lock_critical" -o -e "$lock_death" ]
	then
		rm -rf $lock >/dev/null 2>/dev/null
#		mail -s "[monitoring] LA on $srv change to normal value = $la" $info_mail
		exit
	fi
	exit
fi

# info LA level
if [ $la -ge $la_info -a $la -lt $la_warning ]
then
	if [ -e $lock_info ]
	then
		n=`cat $lock_info`
	fi
	case $n in
		1) echo 2 > $lock_info;;
		2) echo 3 > $lock_info;;
		3) echo 4 > $lock_info;;
		4) echo 5 > $lock_info;;
		5)
			mail -s "[monitoring] LA=$la last 5 min. on $srv" $info_mail
			echo 1 > $lock_info
		;;
		*) echo 1 > $lock_info;;
	esac
	exit
fi

# warning LA level
if [ $la -ge $la_warning -a $la -lt $la_critical -a ! -e "$lock_warning" ]
then
	touch $lock_warning
	mail -s "[monitoring] WARNING LA=$la on $srv" $info_mail
	exit
fi

# critical LA level
if [ $la -ge $la_critical -a $la -lt $la_death -a ! -e "$lock_critical" ]
then
	touch $lock_critical
	top -b -n1 > $topout
	grep -E "(httpd\s+$|proftpd\s+$|perl\s+$)" $topout | grep -Ev "(\s+root\s+|\s+ftp\s+)" | awk '{print $1}' | while read pid
	do
		kill -9 $pid
	done
	echo -e "top output:\n\n`cat $topout`" | mail -s "[monitoring] CRITICAL LA=$la on $srv" $info_mail
	rm -rf $topout
	exit
fi

# death LA level
if [ $la -ge $la_death ]
then
	touch $lock_death
	top -b -n1 > /var/tmp/top.tmp
	grep -E "(httpd\s+$|proftpd\s+$|perl\s+$)" $topout | grep -Ev "(\s+root\s+|\s+ftp\s+)" | awk '{print $1}' | while read pid
	do
		kill -9 $pid
	done
	echo -e "top output:\n\n`cat /var/tmp/top.tmp`" | mail -s "[monitoring] DEATH LA=$la on $srv" $info_mail
	rm -rf $topout
	exit
fi