本帖最后由 jinm 于 2016-3-8 14:51 编辑
公司的监控脚本大多数是python写的,我目前只看的懂一些简单的sh脚本,发几个给大家参考一下。某些具体的IP地址我都替换成了“IP”,脚本基本上都是组合的,存在互相调用的情况。
1、#!/bin/bash
alert='/root/monitor_sc/smsalert.sh'
web_stat_log=/root/monitor_sc/outbound_status.log
url="http://ifaxin.com/"
server_status_code=`curl -o /dev/null -s -m 30 --connect-timeout 30 -w %{http_code} "$url"`
if [ "$server_status_code" != "200" ]; then
echo "`date '+%Y-%m-%d %H:%M:%S'` ifaxin website is unreacheable!!!" >>$web_stat_log
#sms
/bin/bash $alert "" "ifaxin website is unreacheable!!!" "sms"
#mail
#/bin/bash $alert "outbound $outbound 2501 port error!!!" "$outbound 2501 port error!!!" "mail"
/usr/local/bin/python /root/monitor_sc/notice_alarm.py "ifaxin website is down !!!" "ifaxin website is unreacheable !!" "ifaxin website监控"
fi
exit 0
2、
#!/bin/bash
set -x
conn1=`mysql -h IP -P3306 -u2 -p1 -e "use information_schema;select count(host) from processlist;"|grep -v 'count'`
query1=`mysql -h IP -P3306 -u2 -p1 -e "show processlist;"|grep "Query"|wc -l`
conn2=`mysql -h IP -P3306 -u1 -p2 -e "use information_schema;select count(host) from processlist;"|grep -v 'count'`
query2=`mysql -h IP -P3306 -u1 -p2 -e "show processlist;"|grep "Query"|wc -l`
conn3=`mysql -h IP -P3306 -u1 -p2 -e "use information_schema;select count(host) from processlist;"|grep -v 'count'`
query3=`mysql -h IP -P3306 -u1 -p2 -e "show processlist;"|grep "Query"|wc -l`
conn4=`mysql -h IP -P3306 -u2 -p2 -e "use information_schema;select count(host) from processlist;"|grep -v 'count'`
query4=`mysql -h IP -P3306 -u1-p2 -e "show processlist;"|grep "Query"|wc -l`
for conn in $conn1 $conn2 $conn3 $conn4
do
if expr $conn \> 5000 ;then
/usr/local/bin/python /root/monitor_sc/notice_alarm.py "mysql conn max" "mysql conn max" "mysql 检查"
fi
done
for query in $query1 $query2 $query3 $query4
do
if expr $query \> 2000 ;then
/usr/local/bin/python /root/monitor_sc/notice_alarm.py "mysql query max" "mysql query max" "mysql 检查"
fi
done
3、
#!/bin/bash
#
#script_name:chk_ping.sh
#check network
# ping -f -c 20 IP
#PING IP (IP) 56(84) bytes of data.
#....................
#--- IP ping statistics ---
#20 packets transmitted, 0 received, 100% packet loss, time 236ms
#delay
# ping -f -c 20 199.192.152.2
#PING 199.192.152.2 (199.192.152.2) 56(84) bytes of data.
#--- 199.192.152.2 ping statistics ---
#20 packets transmitted, 20 received, 0% packet loss, time 227ms
#rtt min/avg/max/mdev = 266.758/268.957/272.593/1.787 ms, pipe 20, ipg/ewma 11.959/269.829 ms
#
#variables
delay_limit=100
loss_limit=95
sh_dir="/root/monitor_sc"
#hosts=`cat /root/monitor_sc/all_hosts`
hosts=`cat /opt/lm/all_host`
alert='/root/monitor_sc/smsalert.sh'
#
for HOST in $hosts ;do
flag_ping_file="$sh_dir/log/$HOST.ping"
error_log="$sh_dir/log/ping_error.log"
#true
ping -W 60 -f -c 20 $HOST > $sh_dir/log/ping.log 2>&1
grep "packet loss" $sh_dir/log/ping.log >$sh_dir/log/ping.loss
loss=`awk '{print $6}' $sh_dir/log/ping.loss | awk -F% '{print $1}'`
#loss
if [ "$loss" -ge "$loss_limit" ];then
#if [ ! -f $flag_ping_file ];then
#sms
/bin/bash $alert "" "$HOST is unreachable" "sms" && /usr/local/bin/python /root/monitor_sc/notice_alarm.py "$HOST is unreachable" "$HOST is unreachable" "SendCloud主机存活监控">>/root/monitor_sc/test.log 2>&1
#mail
/bin/bash $alert "$HOST is unreachable" "$HOST is unreachable" "mail"
#log
date +'%F %T' >>$error_log
cat $sh_dir/log/ping.log >>$error_log
#flag
echo "$HOST network error">$flag_ping_file
#fi
else
#loss С
if [ -f $flag_ping_file ];then
#sms
/bin/bash $alert "" "$HOST network ok" "sms"
/usr/local/bin/python /root/monitor_sc/notice_alarm.py "$HOST network ok" "$HOST network ok" "SendCloud主机存活监控"
/usr/local/bin/python /root/monitor_sc/duanxin_alarm.py
#mail
/bin/bash $alert "$HOST network ok" "$HOST network ok" "mail"
#delete flag
rm -f $flag_ping_file
fi
fi
公司的监控脚本大多数是python写的,我目前只看的懂一些简单的sh脚本,发几个给大家参考一下。某些具体的IP地址我都替换成了“IP”,脚本基本上都是组合的,存在互相调用的情况。
1、#!/bin/bash
alert='/root/monitor_sc/smsalert.sh'
web_stat_log=/root/monitor_sc/outbound_status.log
url="http://ifaxin.com/"
server_status_code=`curl -o /dev/null -s -m 30 --connect-timeout 30 -w %{http_code} "$url"`
if [ "$server_status_code" != "200" ]; then
echo "`date '+%Y-%m-%d %H:%M:%S'` ifaxin website is unreacheable!!!" >>$web_stat_log
#sms
/bin/bash $alert "" "ifaxin website is unreacheable!!!" "sms"
#/bin/bash $alert "outbound $outbound 2501 port error!!!" "$outbound 2501 port error!!!" "mail"
/usr/local/bin/python /root/monitor_sc/notice_alarm.py "ifaxin website is down !!!" "ifaxin website is unreacheable !!" "ifaxin website监控"
fi
exit 0
2、
#!/bin/bash
set -x
conn1=`mysql -h IP -P3306 -u2 -p1 -e "use information_schema;select count(host) from processlist;"|grep -v 'count'`
query1=`mysql -h IP -P3306 -u2 -p1 -e "show processlist;"|grep "Query"|wc -l`
conn2=`mysql -h IP -P3306 -u1 -p2 -e "use information_schema;select count(host) from processlist;"|grep -v 'count'`
query2=`mysql -h IP -P3306 -u1 -p2 -e "show processlist;"|grep "Query"|wc -l`
conn3=`mysql -h IP -P3306 -u1 -p2 -e "use information_schema;select count(host) from processlist;"|grep -v 'count'`
query3=`mysql -h IP -P3306 -u1 -p2 -e "show processlist;"|grep "Query"|wc -l`
conn4=`mysql -h IP -P3306 -u2 -p2 -e "use information_schema;select count(host) from processlist;"|grep -v 'count'`
query4=`mysql -h IP -P3306 -u1-p2 -e "show processlist;"|grep "Query"|wc -l`
for conn in $conn1 $conn2 $conn3 $conn4
do
if expr $conn \> 5000 ;then
/usr/local/bin/python /root/monitor_sc/notice_alarm.py "mysql conn max" "mysql conn max" "mysql 检查"
fi
done
for query in $query1 $query2 $query3 $query4
do
if expr $query \> 2000 ;then
/usr/local/bin/python /root/monitor_sc/notice_alarm.py "mysql query max" "mysql query max" "mysql 检查"
fi
done
3、
#!/bin/bash
#
#script_name:chk_ping.sh
#check network
# ping -f -c 20 IP
#PING IP (IP) 56(84) bytes of data.
#....................
#--- IP ping statistics ---
#20 packets transmitted, 0 received, 100% packet loss, time 236ms
#delay
# ping -f -c 20 199.192.152.2
#PING 199.192.152.2 (199.192.152.2) 56(84) bytes of data.
#--- 199.192.152.2 ping statistics ---
#20 packets transmitted, 20 received, 0% packet loss, time 227ms
#rtt min/avg/max/mdev = 266.758/268.957/272.593/1.787 ms, pipe 20, ipg/ewma 11.959/269.829 ms
#
#variables
delay_limit=100
loss_limit=95
sh_dir="/root/monitor_sc"
#hosts=`cat /root/monitor_sc/all_hosts`
hosts=`cat /opt/lm/all_host`
alert='/root/monitor_sc/smsalert.sh'
#
for HOST in $hosts ;do
flag_ping_file="$sh_dir/log/$HOST.ping"
error_log="$sh_dir/log/ping_error.log"
#true
ping -W 60 -f -c 20 $HOST > $sh_dir/log/ping.log 2>&1
grep "packet loss" $sh_dir/log/ping.log >$sh_dir/log/ping.loss
loss=`awk '{print $6}' $sh_dir/log/ping.loss | awk -F% '{print $1}'`
#loss
if [ "$loss" -ge "$loss_limit" ];then
#if [ ! -f $flag_ping_file ];then
#sms
/bin/bash $alert "" "$HOST is unreachable" "sms" && /usr/local/bin/python /root/monitor_sc/notice_alarm.py "$HOST is unreachable" "$HOST is unreachable" "SendCloud主机存活监控">>/root/monitor_sc/test.log 2>&1
/bin/bash $alert "$HOST is unreachable" "$HOST is unreachable" "mail"
#log
date +'%F %T' >>$error_log
cat $sh_dir/log/ping.log >>$error_log
#flag
echo "$HOST network error">$flag_ping_file
#fi
else
#loss С
if [ -f $flag_ping_file ];then
#sms
/bin/bash $alert "" "$HOST network ok" "sms"
/usr/local/bin/python /root/monitor_sc/notice_alarm.py "$HOST network ok" "$HOST network ok" "SendCloud主机存活监控"
/usr/local/bin/python /root/monitor_sc/duanxin_alarm.py
/bin/bash $alert "$HOST network ok" "$HOST network ok" "mail"
#delete flag
rm -f $flag_ping_file
fi
fi
编辑回复