#!/usr/bin/ksh
#
# Program : tng_checks.ksh
#
# Date : 4th August, 2003
#
# Author : Troy Muller
#
# Syntax : tng_check.ksh
#
# Description : This scripts perform various local
# checks that are logged to syslog and can be picked up
# by TNG for alerting.
#
# Amendments :
###################################################################################
# Define variables here
progname=${0##*/} # The name of this program
hostname=$(hostname) # The name of the host running the program
basedir=/var/adm/scripts # top level directory for scripts
bindir=${basedir} # executables directory
logdir=${basedir}/logs # logs directory
etcdir=${basedir}/etc # input files directory
outdir=${basedir}/output # output files directory
tmpdir=${basedir}/tmp # temporary files directory
arcdir=${basedir}/archives # archived files directory
logfile=${logdir}/${progname%%.*}.log
localfs=${etcdir}/chk_dsk_${hostname}
# Redirect all output to the logfile
exec >>${logfile} 2>&1
# Define functions here
log_msg()
{
message=$1
echo "${progname} : date : $message"
logger -p local1.alert "${progname} : $message"
}
cycle_logs()
{
logf=$1
log_msg "Cycling log file ${logf}."
loop=9
rm ${logf}.$loop > /dev/null
while [[ $loop -gt 1 ]]
do
(( loop2 = $loop - 1 ))
mv ${logf}.${loop2} ${logf}.${loop}
(( loop = $loop - 1 ))
done
mv ${logf} ${logf}.1
}
# Script starts here
log_msg "------------------------------------"
log_msg "Started."
# Check CPU thresholds
log_msg "Checking CPU..."
gtotal=0
vmstat 1 10 | grep -v kthr | grep -v "\-" | grep -v r | awk '{print $14,$15}' | while read usr sys
do
(( total = usr + sys ))
(( gtotal = gtotal + total ))
done
(( avcpu = gtotal / 10 ))
if [[ $avcpu -gt 80 ]];then
log_msg "CPU problem : CPU bottleneck"
fi
# Check memory thresholds
log_msg "Checking memory..."
avppaging=0
avpaging=0
for loop in 1 2 3 4 5 6 7 8 9 10
do
ppageins=vmstat -s | grep "paging space page" | awk '{print $1}'
ppageouts=vmstat -s | grep "paging space page outs" | awk '{print $1}'
pageins=vmstat -s | grep "page ins" | grep -v "paging" | awk '{print $1}'
pageouts=vmstat -s | grep "page outs" | grep -v "paging" | awk '{print $1}'
(( totalppaging = ppageins + ppageouts ))
(( totalpaging = pageins + pageouts ))
(( avppaging = avppaging + totalppaging ))
(( avpaging = avpaging + totalpaging ))
done
(( avppaging = avppaging / 10 ))
(( avpaging = avpaging / 10 ))
perc_paging=$((avpaging*4/10))
if [[ $avppaging -gt $perc_paging ]];then
log_msg "Memory problem : memory bottleneck"
fi
# Check to see if thrashing is or has occurred
trace -aj207 ; trcstop
thrashing=trcrpt | grep "thrashing = -1"
if [[ $thrashing != "" ]];then
log_msg "Memory problem : thrashing has occurred"
fi
# Check excessive memory activity
gtotal=0
ptotal=0
realmem=lsattr -El mem0 | grep -v goodsize | awk '{print $2}'
(( compare = realmem - 8 ))
vmstat 1 10 | grep -v kthr | grep -v "\-" | grep -v r | awk '{print $4,$7}' | while read fre pi
do
# convert free pages to Mb
(( b_value = fre * 4096 ))
(( kb_value = b_value / 1024 ))
(( mb_value = kb_value / 1024 ))
(( gtotal = gtotal + mb_value ))
(( ptotal - ptotal + pi ))
done
(( avfree = gtotal / 10 ))
(( avpi = ptotal / 10 ))
(( perc_comp = compare * 1/10 ))
if [[ $avfree -lt $perc_comp && $avpi -gt 5 ]];then
log_msg "Memory problem : excessive memory activity"
fi
# Check I/O
log_msg "Checking i/o..."
gtotal=0
vmstat 1 10 | grep -v kthr | grep -v "\-" | grep -v r | awk '{print $15,$17}' | while read sys wio
do
(( total = sys + wio ))
(( gtotal = gtotal + total ))
done
(( avcpu = gtotal / 10 ))
if [[ $avcpu -gt 80 ]];then
log_msg "I/O problem : I/O bottleneck"
fi
# Check MQ
log_msg "Checking MQ..."
MQ_FILE=${outdir}/mq_info.txt
sender=cat $MQ_FILE | grep SDR
receiv=cat $MQ_FILE | grep RCVR
echo $sender | grep RUNNING >/dev/null
rc=$?
if [[ $rc -gt 0 ]];then
log_msg "MQ Sender is down."
fi
echo $receiv | grep RUNNING >/dev/null
rc=$?
if [[ $rc -gt 0 ]];then
log_msg "MQ Receiver is down."
fi
# Check for network IP layer packet drops
log_msg "Checking network..."
netstat -s | grep oveflow | while read value element
do
if [[ $value -ne 0 ]];then
log_msg "Network problem : $element"
fi
done
log_msg "Finished."
cycle_logs ${logfile}
exit 0 |
#!/usr/bin/ksh
#
# Program : tng_checks.ksh
#
# Date : 4th August, 2003
#
# Author : Troy Muller
#
# Syntax : tng_check.ksh
#
# Description : This scripts perform various local
# checks that are logged to syslog and can be picked up
# by TNG for alerting.
#
# Amendments :
###################################################################################
# Define variables here
progname=${0##*/} # The name of this program
hostname=$(hostname) # The name of the host running the program
basedir=/var/adm/scripts # top level directory for scripts
bindir=${basedir} # executables directory
logdir=${basedir}/logs # logs directory
etcdir=${basedir}/etc # input files directory
outdir=${basedir}/output # output files directory
tmpdir=${basedir}/tmp # temporary files directory
arcdir=${basedir}/archives # archived files directory
logfile=${logdir}/${progname%%.*}.log
localfs=${etcdir}/chk_dsk_${hostname}
# Redirect all output to the logfile
exec >>${logfile} 2>&1
# Define functions here
log_msg()
{
message=$1
echo "${progname} : date : $message"
logger -p local1.alert "${progname} : $message"
}
cycle_logs()
{
logf=$1
log_msg "Cycling log file ${logf}."
loop=9
rm ${logf}.$loop > /dev/null
while [[ $loop -gt 1 ]]
do
(( loop2 = $loop - 1 ))
mv ${logf}.${loop2} ${logf}.${loop}
(( loop = $loop - 1 ))
done
mv ${logf} ${logf}.1
}
# Script starts here
log_msg "------------------------------------"
log_msg "Started."
# Check CPU thresholds
log_msg "Checking CPU..."
gtotal=0
vmstat 1 10 | grep -v kthr | grep -v "\-" | grep -v r | awk '{print $14,$15}' | while read usr sys
do
(( total = usr + sys ))
(( gtotal = gtotal + total ))
done
(( avcpu = gtotal / 10 ))
if [[ $avcpu -gt 80 ]];then
log_msg "CPU problem : CPU bottleneck"
fi
# Check memory thresholds
log_msg "Checking memory..."
avppaging=0
avpaging=0
for loop in 1 2 3 4 5 6 7 8 9 10
do
ppageins=vmstat -s | grep "paging space page" | awk '{print $1}'
ppageouts=vmstat -s | grep "paging space page outs" | awk '{print $1}'
pageins=vmstat -s | grep "page ins" | grep -v "paging" | awk '{print $1}'
pageouts=vmstat -s | grep "page outs" | grep -v "paging" | awk '{print $1}'
(( totalppaging = ppageins + ppageouts ))
(( totalpaging = pageins + pageouts ))
(( avppaging = avppaging + totalppaging ))
(( avpaging = avpaging + totalpaging ))
done
(( avppaging = avppaging / 10 ))
(( avpaging = avpaging / 10 ))
perc_paging=$((avpaging*4/10))
if [[ $avppaging -gt $perc_paging ]];then
log_msg "Memory problem : memory bottleneck"
fi
# Check to see if thrashing is or has occurred
trace -aj207 ; trcstop
thrashing=trcrpt | grep "thrashing = -1"
if [[ $thrashing != "" ]];then
log_msg "Memory problem : thrashing has occurred"
fi
# Check excessive memory activity
gtotal=0
ptotal=0
realmem=lsattr -El mem0 | grep -v goodsize | awk '{print $2}'
(( compare = realmem - 8 ))
vmstat 1 10 | grep -v kthr | grep -v "\-" | grep -v r | awk '{print $4,$7}' | while read fre pi
do
# convert free pages to Mb
(( b_value = fre * 4096 ))
(( kb_value = b_value / 1024 ))
(( mb_value = kb_value / 1024 ))
(( gtotal = gtotal + mb_value ))
(( ptotal - ptotal + pi ))
done
(( avfree = gtotal / 10 ))
(( avpi = ptotal / 10 ))
(( perc_comp = compare * 1/10 ))
if [[ $avfree -lt $perc_comp && $avpi -gt 5 ]];then
log_msg "Memory problem : excessive memory activity"
fi
# Check I/O
log_msg "Checking i/o..."
gtotal=0
vmstat 1 10 | grep -v kthr | grep -v "\-" | grep -v r | awk '{print $15,$17}' | while read sys wio
do
(( total = sys + wio ))
(( gtotal = gtotal + total ))
done
(( avcpu = gtotal / 10 ))
if [[ $avcpu -gt 80 ]];then
log_msg "I/O problem : I/O bottleneck"
fi
# Check MQ
log_msg "Checking MQ..."
MQ_FILE=${outdir}/mq_info.txt
sender=cat $MQ_FILE | grep SDR
receiv=cat $MQ_FILE | grep RCVR
echo $sender | grep RUNNING >/dev/null
rc=$?
if [[ $rc -gt 0 ]];then
log_msg "MQ Sender is down."
fi
echo $receiv | grep RUNNING >/dev/null
rc=$?
if [[ $rc -gt 0 ]];then
log_msg "MQ Receiver is down."
fi
# Check for network IP layer packet drops
log_msg "Checking network..."
netstat -s | grep oveflow | while read value element
do
if [[ $value -ne 0 ]];then
log_msg "Network problem : $element"
fi
done
log_msg "Finished."
cycle_logs ${logfile}
exit 0