diff --git a/README.md b/README.md index 0acad49..2bd8b13 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ # nagios-plugins-linux_stats +This nagios plugins checks the linux system performance (cpu, mem, load, disk usage, disk io, network usage, open files and processes). + diff --git a/check_linux_stats.pl b/check_linux_stats.pl new file mode 100644 index 0000000..748cf48 --- /dev/null +++ b/check_linux_stats.pl @@ -0,0 +1,688 @@ +#!/usr/bin/perl +# ---------------------------------------------------- # +# File : check_linux_stats +# Author : Damien SIAUD +# Date : 07/12/2009 +# Rev. Date : 07/05/2010 +# ---------------------------------------------------- # +# This script require Sys::Statistics::Linux +# +# Plugin check for nagios +# +# License Information: +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# ---------------------------------------------------- # + +use FindBin; +use lib $FindBin::Bin; +#use lib "/usr/local/nagios/libexec"; +use utils qw($TIMEOUT %ERRORS &print_revision &support); +use Getopt::Long; +use Sys::Statistics::Linux; +use Sys::Statistics::Linux::Processes; +use Sys::Statistics::Linux::SysInfo; +#use Data::Dumper; + +use vars qw($script_name $script_version $o_sleep $o_pattern $o_cpu $o_procs $o_process $o_mem $o_net $o_disk $o_io $o_load $o_file $o_socket $o_paging $o_uptime $o_help $o_version $o_warning $o_critical $o_unit); +use strict; + +# --------------------------- globals -------------------------- # + +$script_name = "check_linux_stats"; +$script_version = "1.3.1"; +$o_help = undef; +$o_pattern = undef; +$o_version = undef; +$o_warning = 0; +$o_critical = 0; +$o_sleep = 1; +$o_unit = "MB"; +my $status = 'UNKNOWN'; + +# ---------------------------- main ----------------------------- # +check_options(); + +if($o_cpu){ + check_cpu(); +} +elsif($o_mem){ + check_mem(); +} +elsif($o_disk){ + check_disk(); +} +elsif($o_io){ + check_io(); +} +elsif($o_net){ + check_net(); +} +elsif($o_load){ + check_load(); +} +elsif($o_file){ + check_file(); +} +elsif($o_procs){ + check_procs(); +} +elsif($o_socket){ + check_socket(); +} +elsif($o_process){ + check_process(); +} +elsif($o_paging){ + check_paging(); +} +elsif($o_uptime){ + check_uptime(); +} +else { + help(); +} + +print "\n"; + +exit $ERRORS{$status}; + + +sub check_cpu { + my $lxs = Sys::Statistics::Linux->new(cpustats => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + + if(defined($stat->cpustats)) { + $status = "OK"; + my $cpu = $stat->cpustats->{cpu}; + my $cpu_used=sprintf("%.2f", (100-$cpu->{idle})); + + if ($cpu_used >= $o_critical) { + $status = "CRITICAL"; + } + elsif ($cpu_used >= $o_warning) { + $status = "WARNING"; + } + + my $perfdata .= "|" + ."user=$cpu->{user}% " + ."system=$cpu->{system}% " + ."iowait=$cpu->{iowait}% " + ."idle=$cpu->{idle}%;$o_warning;$o_critical"; + + print "CPU $status : idle $cpu->{idle}% $perfdata"; + } + else { + print "No data"; + } +} + +sub check_procs { + my $lxs = Sys::Statistics::Linux->new(procstats => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + + if(defined($stat->procstats)) { + $status = "OK"; + my $procs = $stat->procstats; + + if($procs->{count} >= $o_critical) { + $status = "CRITICAL"; + } + elsif ($procs->{count} >= $o_warning) { + $status = "WARNING"; + } + + my $perfdata .= "|" + ."count=$procs->{count};$o_warning;$o_critical " + ."runqueue=$procs->{runqueue} " + ."blocked=$procs->{blocked} " + ."running=$procs->{running} " + ."new=$procs->{new}"; + print "PROCS $status : count $procs->{count} $perfdata"; + } +} + + +sub check_process { + my $return_str = ""; + my $perfdata = ""; + # pidfiles + my @pids = (); + for my $file (split(/,/, $o_pattern)) { + open FILE, $file or die "Could not read from $file, program halting."; + # read the record, and chomp off the newline + chomp(my $pid = ); + close FILE; + if($pid=~/^\d+$/){ + push @pids,$pid; + } + } + + if($#pids>-1) { + my $lxs = Sys::Statistics::Linux::Processes->new(pids => \@pids); + $lxs->init; + sleep $o_sleep; + my $processes = $lxs->get; + my @pname = (); + + if(defined($processes)) { + $status = "OK"; + + my $crit = 0; #critical counter + my $warn = 0; #warning counter + foreach my $process (keys (%$processes)) { + my $vsize = $processes->{$process}->{vsize}; + my $nswap = $processes->{$process}->{nswap}; + my $cnswap = $processes->{$process}->{cnswap}; + my $cpu = $processes->{$process}->{cpu}; + my $cmd = $processes->{$process}->{cmd}; + $cmd =~s/\W+//g; + + if($vsize >= $o_critical) {$crit++; push @pname,$cmd;} + elsif($vsize >= $o_warning){ $warn++; push @pname,$cmd;} + + $perfdata .= "|" + .$cmd."_vsize=$vsize;$o_warning;$o_critical " + .$cmd."_nswap=$nswap " + .$cmd."_cnswap=$cnswap " + .$cmd."_cpu=$cpu"; + } + + if($crit>0) {$status="CRITICAL";} + elsif($warn>0) {$status="WARNING";} + + } + print "PROCESSES $status : ".join(',',@pname)." $perfdata"; + } +} + +sub check_socket { + my $lxs = Sys::Statistics::Linux->new(sockstats => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + + if(defined($stat->sockstats)) { + $status = "OK"; + my $socks = $stat->sockstats; + + if($socks->{used} >= $o_critical) { + $status = "CRITICAL"; + } + elsif($socks->{used} >= $o_warning) { + $status = "WARNING"; + } + + my $perfdata .= "|" + ."used=$socks->{used};$o_warning;$o_critical " + ."tcp=$socks->{tcp} " + ."udp=$socks->{udp} raw=$socks->{raw}"; + + print "SOCKET USAGE $status : used $socks->{used} $perfdata"; + } + else { + print "No data"; + } +} + +sub check_file { + my $lxs = Sys::Statistics::Linux->new(filestats => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + + if(defined($stat->filestats)) { + $status = "OK"; + my $file = $stat->filestats; + + my ($fh_crit,$inode_crit) = split(/,/,$o_critical); + my ($fh_warn,$inode_warn) = split(/,/,$o_warning); + + if(($file->{fhalloc}>=$fh_crit)||($file->{inalloc}>=$inode_crit)) { + $status = "CRITICAL"; + } + elsif(($file->{fhalloc}>=$fh_warn)||($file->{inalloc}>=$inode_warn)) { + $status = "WARNING"; + } + + my $perfdata .= "|" + ."fhalloc=$file->{fhalloc};$fh_warn;$fh_crit;$file->{fhmax} " + ."inalloc=$file->{inalloc};$inode_warn;$inode_crit;$file->{inmax} " + ."dentries=$file->{dentries}"; + + print "OPEN FILES $status allocated: $file->{fhalloc} (inodes: $file->{inalloc}) $perfdata"; + } + else { + print "No data"; + } +} + +sub check_mem { + my $lxs = Sys::Statistics::Linux->new(memstats => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + + if(defined($stat->memstats)) { + $status = "OK"; + + my ($mem_crit,$swap_crit) = split(/,/,$o_critical); + my ($mem_warn,$swap_warn) = split(/,/,$o_warning); + + my $mem = $stat->memstats; + my $memused = sprintf("%.2f", ($mem->{memused}/$mem->{memtotal})*100); + my $memcached = sprintf("%.2f", ($mem->{cached}/$mem->{memtotal})*100); + my $swapused = sprintf("%.2f", ($mem->{swapused}/$mem->{swaptotal})*100); + my $swapcached = sprintf("%.2f", ($mem->{swapcached}/$mem->{swaptotal})*100); + my $active = sprintf("%.2f", ($mem->{active}/$mem->{memtotal})*100); + + if(($memused>=$mem_crit)||($swapused>=$swap_crit)) { + $status = "CRITICAL"; + } + elsif (($memused>=$mem_warn)||($swapused>=$swap_warn)) { + $status = "WARNING"; + } + + my $perfdata .= "|" + ."MemUsed=$memused%;$mem_warn;$mem_crit " + ."SwapUsed=$swapused;$swap_warn;$swap_crit " + ."MemCached=$memcached SwapCached=$swapcached " + ."Active=$active"; + + print "MEMORY $status : Mem used: $memused%, Swap used: $swapused% $perfdata"; + } + else { + print "No data"; + } +} + +sub check_disk { + my $lxs = Sys::Statistics::Linux->new(diskusage => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + my $return_str = ""; + my $perfdata = ""; + + if(defined($stat->diskusage)) { + $status = "OK"; + + my $disk = $stat->diskusage; + if(!defined($o_pattern)){ $o_pattern = 'all';} + + my $checkthis; + map {$checkthis->{$_}++} split(/,/, $o_pattern); + + my $crit = 0; #critical counter + my $warn = 0; #warning counter + foreach my $device (keys (%$disk)) { + my $usage = $disk->{$device}->{usage}; # KB + my $free = $disk->{$device}->{free}; # KB + my $total = $disk->{$device}->{total}; # KB + my $mountpoint = $disk->{$device}->{mountpoint}; + my $percentused = sprintf("%.2f", ($usage/$total)*100); + my $percentfree = sprintf("%.2f", ($free/$total)*100); + + if(defined($checkthis->{$mountpoint})||defined($checkthis->{all})){ + $return_str .= " $mountpoint $percentfree% free"; + + if($o_unit =~ /\%/) { + if($percentfree<=$o_critical){ $crit++;} + elsif($percentfree<=$o_warning){ $warn++;} + + $perfdata .= " $mountpoint=$usage".'KB'; + } + else { + # KB + my $tmpfree = $free; + my $tmpusage = $usage; + my $tmptotal = $total; + + if($o_unit =~ /MB/i) { + $tmpfree = sprintf("%.2f", ($free/1024)); + $tmpusage = sprintf("%.2f", ($usage/1024)); + $tmptotal = sprintf("%.2f", ($total/1024)); + } + elsif($o_unit =~ /GB/i) { + $tmpfree = sprintf("%.2f", ($free/1048576)); + $tmpusage = sprintf("%.2f", ($usage/1048576)); + $tmptotal = sprintf("%.2f", ($total/1048576)); + } + + if($tmpfree<=$o_warning){ $warn++;} + elsif($tmpfree<=$o_critical){ $crit++;} + + $perfdata .= " $mountpoint=$usage$o_unit"; + } + } + } + + if($crit>0) {$status="CRITICAL";} + elsif($warn>0) {$status="WARNING";} + } + print "DISK $status used : $return_str |$perfdata"; +} + +sub check_io { + my $lxs = Sys::Statistics::Linux->new(diskstats => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + my $return_str = "io :"; + my $perfdata = ""; + + if(defined($stat->diskstats)) { + $status = "OK"; + + my $disk = $stat->diskstats; + if(!defined($o_pattern)){ $o_pattern = 'all';} + + my $checkthis; + map {$checkthis->{$_}++} split(/,/, $o_pattern); + + my ($read_crit,$write_crit) = split(/,/,$o_critical); + my ($read_warn,$write_warn) = split(/,/,$o_warning); + + my $crit = 0; #critical counter + my $warn = 0; #warning counter + foreach my $device (keys (%$disk)) { + my $rdreq = $disk->{$device}->{rdreq}; + my $wrtreq = $disk->{$device}->{wrtreq}; + my $ttreq = $disk->{$device}->{ttreq}; + my $rdbyt = $disk->{$device}->{rdbyt}; + my $wrtbyt = $disk->{$device}->{wrtbyt}; + my $ttbyt = $disk->{$device}->{ttbyt}; + + if(defined($checkthis->{$device})||defined($checkthis->{all})){ + if($o_unit =~ /BYTES/i) { + if(($rdbyt>=$read_crit)||($wrtbyt>=$write_crit)){ $crit++;} + elsif(($rdbyt>=$read_warn)||($wrtbyt>=$write_warn)){ $warn++;} + + $perfdata .= "|" + .$device."_read=$rdbyt;$read_warn;$read_crit " + .$device."_write=$wrtbyt;$write_warn;$write_crit"; + } + else { + if(($rdreq>=$read_crit)||($wrtreq>=$write_crit)){ $crit++;} + elsif(($rdreq>=$read_warn)||($wrtreq>=$write_warn)){ $warn++;} + + $perfdata .= "|" + .$device."_read=$rdreq;$read_warn;$read_crit " + .$device."_write=$wrtreq;$write_warn;$write_crit"; + } + } + } + if($crit>0) {$status="CRITICAL";} + elsif($warn>0) {$status="WARNING";} + + print "DISK $status $return_str $perfdata"; + } +} + +sub check_net { + my $lxs = Sys::Statistics::Linux->new(netstats => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + + my $return_str = ""; + my $perfdata = ""; + if(defined($stat->netstats)) { + $status = "OK"; + my $net = $stat->netstats; + if(!defined($o_pattern)){ $o_pattern = 'all';} + + my $checkthis; + map {$checkthis->{$_}++} split(/,/, $o_pattern); + + my $crit = 0; #critical counter + my $warn = 0; #warning counter + foreach my $device (keys (%$net)) { + my $txbyt = $net->{$device}->{txbyt}; + my $rxerrs = $net->{$device}->{rxerrs}; + my $ttbyt = $net->{$device}->{ttbyt}; + my $txerrs = $net->{$device}->{txerrs}; + my $txdrop = $net->{$device}->{txdrop}; + my $txcolls = $net->{$device}->{txcolls}; + my $rxbyt = $net->{$device}->{rxbyt}; + my $rxdrop = $net->{$device}->{rxdrop}; + + if(defined($checkthis->{$device})||defined($checkthis->{all})){ + if($ttbyt>=$o_critical){ $crit++;} + elsif($ttbyt>=$o_warning){ $warn++;} + + $return_str .= $device.":".bytes_to_readable($ttbyt)." "; + + $perfdata .= "|" + .$device."_txbyt=".$txbyt."B " + .$device."_txerrs=".$txerrs."B " + .$device."_rxbyt=".$rxbyt."B " + .$device."_rxerrs=".$rxerrs."B"; + } + } + + if($crit>0) {$status="CRITICAL";} + elsif($warn>0) {$status="WARNING";} + + print "NET USAGE $status $return_str $perfdata"; + } +} + +sub check_load { + my $lxs = Sys::Statistics::Linux->new(loadavg => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + + if(defined($stat->loadavg)) { + $status = "OK"; + my $load = $stat->loadavg; + my ($warn_1,$warn_5,$warn_15) = split(/,/,$o_warning); + my ($crit_1,$crit_5,$crit_15) = split(/,/,$o_critical); + + if(($load->{avg_1}>=$crit_1)||($load->{avg_5}>=$crit_5)||($load->{avg_15}>=$crit_15)) { + $status = "CRITICAL"; + } + elsif(($load->{avg_1}>=$warn_1)||($load->{avg_5}>=$warn_5)||($load->{avg_15}>=$warn_15)) { + $status = "WARNING"; + } + + my $perfdata = "|" + ."load1=$load->{avg_1};$warn_1;$crit_1;0 " + ."load5=$load->{avg_5};$warn_5;$crit_5;0 " + ."load15=$load->{avg_15};$warn_15;$crit_15;0"; + + print "LOAD AVERAGE $status : $load->{avg_1},$load->{avg_5},$load->{avg_15} $perfdata"; + } + else { + print "No data"; + } +} + +sub check_paging { + my $lxs = Sys::Statistics::Linux->new(pgswstats => 1); + $lxs->init; + sleep $o_sleep; + my $stat = $lxs->get; + if(defined($stat->pgswstats)) { + $status = "OK"; + my $page = $stat->pgswstats; + my ($warn_in,$warn_out) = split(/,/,$o_warning); + my ($crit_in,$crit_out) = split(/,/,$o_critical); + if(($page->{pgpgin}>=$crit_in)||($page->{pgpgout}>=$crit_out)) { + $status = "CRITICAL"; + } + elsif(($page->{pgpgin}>=$warn_in)||($page->{pgpgout}>=$warn_out)) { + $status = "WARNING"; + } + + my $perfdata = "|" + ."pgpgin=$page->{pgpgin};$warn_in;$crit_in;0 " + ."pgpgout=$page->{pgpgout};$warn_out;$crit_out;0 " + ."pswpin=$page->{pswpin} pswpout=$page->{pswpout}"; + + print "Paging $status : in:$page->{pgpgin},out:$page->{pgpgout} $perfdata"; + } + else { + print "No data"; + } +} + +sub check_uptime { + # Read the uptime in seconds from /proc/uptime + open FILE, "< /proc/uptime" or return ("Cannot open /proc/uptime: $!"); + my ($uptime, undef) = split / /, ; + close FILE; + + if(defined($uptime)) { + $status = "OK"; + my $days = int($uptime / 86400); + my $seconds = $uptime % 86400; + my $hours = int($seconds / 3600); + $seconds = $seconds % 3600; + my $minutes = int($seconds / 60); + $seconds = $seconds % 60; + + $status = "WARNING" if($o_warning && (int($uptime/60))<=$o_warning); + + print "$status : up $days days, " + .sprintf("%02d", $hours).":" + .sprintf("%02d", $minutes).":" + .sprintf("%02d", $seconds) + ." |uptime=".int($uptime); + } + else { + print "No data"; + } +} + +sub usage { + print "Usage: $0 -C|-P|-M|-N|-D|-I|-L|-F|-S|-W|-U -p -w -c [-s ] [-u ] [-V] [-h]\n"; +} + + +sub version { + print "$script_name v$script_version\n"; +} + + +sub help { + version(); + usage(); + + print < \$o_help, 'help' => \$o_help, + 's:i' => \$o_sleep, 'sleep:i' => \$o_sleep, + 'C' => \$o_cpu, 'cpu' => \$o_cpu, + 'P' => \$o_procs, 'procs' => \$o_procs, + 'T' => \$o_process, 'top' => \$o_process, + 'M' => \$o_mem, 'memory' => \$o_mem, + 'N' => \$o_net, 'network' => \$o_net, + 'D' => \$o_disk, 'disk' => \$o_disk, + 'I' => \$o_io, 'io' => \$o_io, + 'L' => \$o_load, 'load' => \$o_load, + 'F' => \$o_file, 'file' => \$o_file, + 'S' => \$o_socket, 'socket' => \$o_socket, + 'W' => \$o_paging, 'paging' => \$o_paging, + 'U' => \$o_uptime, 'uptime' => \$o_uptime, + 'V' => \$o_version, 'version' => \$o_version, + 'p:s' => \$o_pattern, 'pattern:s' => \$o_pattern, + 'w:s' => \$o_warning, 'warning:s' => \$o_warning, + 'c:s' => \$o_critical, 'critical:s' => \$o_critical, + 'u:s' => \$o_unit, 'unit:s' => \$o_unit + ); + + if(defined($o_help)) { + help(); + exit $ERRORS{'UNKNOWN'}; + } + + if(defined($o_version)) { + version(); + exit $ERRORS{'UNKNOWN'}; + } +} + +sub bytes_to_readable { + my ($bignum) = @_; + + foreach my $unit ("B","KB","MB","GB") { + return sprintf("%.2f",$bignum)."$unit" if $bignum < 1024; + $bignum /= 1024; + } +} + +sub bytes_to_kilobytes { + my ($bignum) = @_; + + return sprintf("%.2f", $bignum/1024); +} + +sub bytes_to_megabytes { + my ($bignum) = @_; + + return sprintf("%.2f", $bignum/1048576); +} + +sub bytes_to_gigabytes { + my ($bignum) = @_; + + return sprintf("%.2f", $bignum/1073741824); +} + diff --git a/nagios-plugins-linux_stats.spec b/nagios-plugins-linux_stats.spec new file mode 100644 index 0000000..456f371 --- /dev/null +++ b/nagios-plugins-linux_stats.spec @@ -0,0 +1,39 @@ +Name: nagios-plugins-linux_stats +Version: 1.3 +Release: 1mamba +Summary: Check the time the server is running +Group: Network/Monitoring +Vendor: openmamba +Distribution: openmamba +Packager: Davide Madrisan +URL: http://exchange.nagios.org/directory/Plugins/Operating-Systems/Linux/check_linux_stats/details +Source0: check_linux_stats.pl +License: GPL +## AUTOBUILDREQ-BEGIN +## AUTOBUILDREQ-END +Requires: nagios-nrpe +Requires: perl-Sys-Statistics-Linux +BuildRoot: %{_tmppath}/%{name}-%{version}-root + +%description +This nagios plugins checks the linux system performance (cpu, mem, load, disk usage, disk io, network usage, open files and processes). + +%prep + +%build + +%install +[ "%{buildroot}" != / ] && rm -rf "%{buildroot}" +install -d %{buildroot}%{_libdir}/nagios/plugins +install -p -m 0755 %{S:0} %{buildroot}%{_libdir}/nagios/plugins/check_linux_stats.pl + +%clean +[ "%{buildroot}" != / ] && rm -rf "%{buildroot}" + +%files +%defattr(-,root,root) +%{_libdir}/nagios/plugins/check_linux_stats.pl + +%changelog +* Tue May 28 2013 Davide Madrisan 1.3-1mamba +- package created by autospec