#!/usr/bin/perl -w
#
#   watchdog - Check disk space and load, send mails if specified numbers
#	       are exceeded.
#
#   Author:	Jochen Wiedmann
#		Am Eisteich 9
#		72555 Metzingen
#		Germany
#
#		E-Mail: joe@ispsoft.de
#
############################################################################
use strict;

#
#   Configurable section
#
my $MAX_LOAD = 3;	# Send mail, if this load is exceeded
my $MAX_DISC = 80;	# Send mail, if a partition's usage in percent
			# exceeds this number
my $MAX_DISC_INC = 10;  # Send mail, if a partitions capacity raises
			# this number of percents between two checks
my $ADMIN = 'root';	# Send mail to this address
my $HOST = 'monitor.wuestenrot.de';	# Local host name


############################################################################
#
#   Features:
#
#	- Mail will be sent only once for a specified event until the
#	  event happens to be fixed.
#
#   These features depend on the existence of the following cache file:
#
############################################################################

my $CACHE_FILE = $^O eq 'linux' ? "/var/log/watchdog.log" :
    $^O eq 'sco3.2v5.0' ? '/var/adm/watchdog.log' : die "Unknown OS: $^O";

# Command to use for sending mail
# Must read the mail body from stdin.
my $MAIL = (($^O eq 'linux' || $^O eq 'sco3.2v5.0') ?
	    '/usr/bin/mail -s $subject $to' :
	    die "Unknown OS: $^O");

# Command to determine disk usage; must print to stdout
my $DF = ($^O eq 'linux' ? '/bin/df -vk' :
	  $^O eq 'sco3.2v5.0' ? '/bin/df -Bk' :
	  die "Unknown OS: $^O");

# Command to determine inode usage; must print to stdout
my $DFI = ($^O eq 'linux' ? '/bin/df -i' :
	   $^O eq 'sco3.2v5.0' ? '/bin/df -I' :
	   die "Unknown OS: $^O");

# Command to determine the load; must print to stdout
my $UPTIME = (($^O eq 'linux' || $^O eq 'sco3.2v5.0') ? '/usr/bin/uptime' :
	      die "Unknown OS: $^O");

my $USE_LOCKS = ($^O eq 'linux' ? 1 :
		 $^O eq 'sco3.2v5.0' ? 0 :
		 die "Unknown OS: $^O");


# If you want to specify a different capacity limit for some drive, you
# can do it here. By default $MAX_DISC will be used.

my %CAPACITIES = (
    # Example:
    # '/dev/hda1' => 75,
    # '/dev/hda2' => 70
);


use vars qw($debug $verbose);


############################################################################
#
#   We use *no* external modules here.
#   This command will be executed from within cron, thus it should be
#   small and not use much resources.
#
############################################################################

# This code stolen from the "Symbol" module. See "perldoc Symbol".
package Symbol;

{
  my $genpkg = "Symbol::";
  my $genseq = 0;

  sub gensym () {
    my $name = "GEN" . $genseq++;
    no strict 'refs';
    my $ref = \*{$genpkg . $name};
    delete $$genpkg{$name};
    $ref;
  }
}


# This code similar to Data::Dumper. We save a complex hash ref
#   { var1 => 'val1',
#     var2 => 'val2',
#     var3 => { var4 => 'val4',
#               var5 => 'val5' }
#   }
# into the following format:
#     var1=val1
#     var2=val2
#     var3__var4=val4
#     var3__var5=val5
#
package Dump;

sub _new {
    my $proto = shift; my $ref = shift; my $prefix = shift;
    my $dump = '';
    while (my($var, $val) = each %$ref) {
	if (ref($val)) {
	    $dump .= $proto->_new($val, $prefix ? "$prefix\__$var" : $var);
	} else {
	    $dump .= "$prefix\__$var=$val\n";
	}
    }
    $dump;
}

sub new {
    my $proto = shift; my $ref = shift;
    my $dump = $proto->_new($ref, '');
    my $self = \$dump;
    bless($self, (ref($proto) || $proto));
}

sub Dump { my $str = shift; $$str }

sub Read {
    my $proto = shift; my $file = shift;
    my $ref = {};
    my $num = 0;
    my $fh = Symbol::gensym();
    if (!open($fh, "<$file")) {
	print STDERR "Failed to open file $file: $!\n";
	return $ref;
    }
    while(defined(my $line = <$fh>)) {
        ++$num;
	# Ignore comments and empty lines
	next if $line =~ /^\s*$/ || $line =~ /^\s*\#/;
	if (my($var, $val) = ($line =~ /^(\w+)=(.*)/)) {
	    my $r = $ref;
	    my @vars = split(/__/, $var);
	    print "Dump::Read: Setting ", join("->", @vars), " to $val.\n"
	        if $main::verbose;
	    while (defined(my $v = shift @vars)) {
	        if (@vars) {
		    $r->{$v} ||= {};
		    $r = $r->{$v};
		} else {
		    $r->{$v} = $val;
		}
	    }
	} else {
	    print STDERR "Invalid line $num in file $file.\n";
	}
    }
    $ref;
}


package main;

sub Mail ($$;$) {
    my($subject, $body, $to) = @_;
    # $to defaults to $ADMIN
    $to ||= $ADMIN;
    my $command = $MAIL;
    $command =~ s/\$subject/quotemeta($subject)/eg;
    $command =~ s/\$to/quotemeta($to)/eg;
    if ($verbose) {
	print "Sending mail via command: $command\n$body\n";
    }
    return if $debug;
    open(PIPE, "| $command") and (print PIPE $body) and close(PIPE);
}

sub DiskUsage ($$$$$) {
    my($DF, $MAX_DISC, $MAX_DISC_INC, $cache, $key) = @_;

    my $ph = Symbol::gensym();
    if (!open($ph, "$DF |")) {
	print STDERR "Failed to open pipe to command $DF: $!\n";
	return 1;
    }
    while (defined(my $line = <$ph>)) {
	if (my($device, $total, $used, $avail, $capacity, $mount) =
	    ($line =~ /^(\S+)			# Device
	     		 \s+(\d+) 		# Blocks total
	    	         \s+(\d+) 		# Blocks used
		         \s+(\d+) 		# Blocks available
		         \s+(\d+(?:\.\d+)?)\%	# Capacity (in percent)
		         \s+(\S.*)/x)) {	# Mount point
	    my $name = $key . $device;
	    $name =~ s/\//_/g;

	    my $old_capacity = $cache->{'diskusage'}->{$name}->{'capacity'};
	    $cache->{'diskusage'}->{$name}->{'capacity'} = $capacity;

	    print "Capacity of $mount ($device) is $capacity\%.\n" if $verbose;
	    my $cap = exists($CAPACITIES{$device}) ?
		$CAPACITIES{$device} : $MAX_DISC;

	    if (!($capacity <= $cap)) {
		print "=> Exceeds allowed capacity of $cap\%.\n" if $verbose;
		if ($cache->{'diskusage'}->{$name}->{'mail_sent'}) {
		    print "Mail already sent, ignoring.\n" if $verbose;
		} else {
		    Mail("Disk capacity exceeded on $HOST", <<"EOF");

Drive $mount ($device) exceeds its allowed disk capacity of $cap \%.
The current parameters are:

	Mount point:	$mount
	Device:		$device
	Blocks total:	$total
	       used:	$used
	       avail:   $avail
	Capacity:	$capacity %
	Allowed:	$cap %

This message is generated by the script $0.
EOF
                    $cache->{'diskusage'}->{$name}->{'mail_sent'} = 1;
		}
	    } else {
	        $cache->{'diskusage'}->{$name}->{'mail_sent'} = 0;
		print "=> Doesn't exceed allowed capacity of $cap\%.\n"
		    if $verbose;
	    }


	    printf("Checking increment for $device: $capacity <=> %s.\n",
		   defined($old_capacity) ? $old_capacity : "undef")
	        if $verbose;
	    if (!(!defined($old_capacity)  ||
		  ($MAX_DISC_INC  &&
		   ($capacity < $old_capacity + $MAX_DISC_INC)))) {
		print "=> Exceeds allowed increment $MAX_DISC_INC\%.\n"
		    if $verbose;
		if ($cache->{'diskusage'}->{$name}->{'inc_mail_sent'}) {
		    print "Mail already sent, ignoring.\n" if $verbose;
		} else {
		    Mail("Disk capacity raised on $HOST", <<"EOF");

Drive $mount ($device) has raised its capacity from $old_capacity
to $capacity since the last check. The current parameters are:

	Mount point:	$mount
	Device:		$device
	Blocks total:	$total
	       used:	$used
	       avail:   $avail
	Capacity:	$capacity %
	Allowed:	$cap %
        Old capacity:   $old_capacity %

This message is generated by the script $0.
EOF
                    $cache->{'diskusage'}->{$name}->{'inc_mail_sent'} = 1;
		}
	    } else {
	        print "=> Doesn't exceed allowed increment of $MAX_DISC_INC.\n"
		    if $verbose;
	        $cache->{'diskusage'}->{$name}->{'inc_mail_sent'} = 0;
	    }
	}
    }
    return 0;
}

sub Uptime ($$) {
    my $MAX_LOAD = shift; my $cache = shift;

    my $ph = Symbol::gensym();
    if (!open($ph, "$UPTIME |")) {
	print STDERR "Failed to open pipe to load command $UPTIME: $!\n";
	return 1;
    }
    my $line = <$ph>;
    if (!$line) {
	print STDERR "Uptime command $UPTIME returned empty output.\n";
	return 1;
    }
    undef $ph;
    if (my($one, $five, $fifteen) =
	($line =~ /load\s+average\:\s+
			(\d+(?:\.\d+)?)		# 1 Minute average
			,\s+(\d+(?:\.\d+)?)	# 5 Minutes average
			,\s+(\d+(?:\.\d+)?)	# 15 Minutes average
	          $/x)) {
	printf("Detected average loads %s, %s, %s (1, 5 and 15 minutes)\n",
	       $one, $five, $fifteen) if $verbose;

	if (!($one <= $MAX_LOAD  &&  $five <= $MAX_LOAD  &&
	       $fifteen <= $MAX_LOAD)) {
	    print "=> Exceeds allowed maximum of $MAX_LOAD.\n" if $verbose;
	    if ($cache->{'uptime'}->{'mail_sent'}) {
	        print "Mail already sent, ignoring.\n" if $verbose;
	    } else {
	        Mail("Maximum load exceeded on $HOST", <<"EOF");

The maximum load of $MAX_LOAD is exceeded on host $HOST. The average loads
are:

    Last minute:	$one
    Last 5 minutes:	$five
    Last 15 minutes:	$fifteen

This message is generated by the script $0.
EOF
                $cache->{'uptime'}->{'mail_sent'} = 1;
            }
	} else {
	    print "=> Doesn't exceed allowed maximum of $MAX_LOAD.\n"
		if $verbose;
            $cache->{'uptime'}->{'mail_sent'} = 0;
	}
    } else {
	print STDERR "Failed to parse output of Uptime command $UPTIME: $!\n";
	return 1;
    }
    return 0;
}


sub Usage {
    print STDERR <<"EOF";
Usage: $0 [options]

Possible options are:

  --cache-file=<file>  Store results in the given file; will be used for
		       watching increments and to prevent sending more than
		       one mail for a certain event. Defaults to
		       $CACHE_FILE.
  --max-load=<num>     Set maximum load; defaults to $MAX_LOAD
  --max-disc=<num>     Set maximum disk capacity in percent; defaults to
                       $MAX_DISC.
  --max-disc-inc=<num> Set the maximum increment of the disk capacity
		       between two checks. Defaults to $MAX_DISC_INC.
  --debug	       Enable debugging mode (implies --verbose)
  --verbose	       Enable verbose mode
  --help	       Print this message
EOF
    exit 1;
}


sub LoadCacheFile ($$) {
    my $file = shift; my $ref = shift;
    return unless ($file and $file ne "none" and -f $file);
    print "Loading cache file $file.\n" if $verbose;
    if ($USE_LOCKS) {
	my $lfh = Symbol::gensym();
	if (!open($lfh, ">>$file.lock")  ||  !flock($lfh, 1)) {
	    print STDERR "Failed to open lock file $file.lock: $!";
	    return 1;
	}
    }
    %$ref = %{Dump->Read($file)};
    return 0;
}

sub SaveCacheFile ($$) {
    my $file = shift; my $ref = shift;
    if ($USE_LOCKS) {
	my $lfh = Symbol::gensym();
	if (!open($lfh, ">>$file.lock")  ||  !flock($lfh, 2)) {
	    print STDERR "Failed to open lock file $file.lock: $!";
	    return 1;
	}
    }
    my $cfh = Symbol::gensym();
    my $dump = Dump->new($ref)->Dump();
    my $time = localtime();
    $dump = <<"EOF";
#
# Automatically generated by $0 at $time.
# Do not edit.
#
$dump
EOF
    print "Saving cache:\n$dump\n" if $verbose;
    return 0 if $debug;
    if (!open($cfh, ">$file.new")  or
	!(print $cfh $dump)  or
	!close($cfh)) {
	print STDERR "Failed to open cache file $file.new: $!";
	return 1;
    }
    if (-f "$file.bak"  &&  !unlink "$file.bak") {
	print STDERR "Failed to remove backup file $file.bak: $!";
	return 1;
    }
    if (-f $file  &&  !rename $file, "$file.bak") {
	print STDERR "Failed to rename $file to $file.bak: $!";
	return 1;
    }
    if (!rename "$file.new", $file) {
	print STDERR "Failed to rename $file.new to $file: $!";
	return 1;
    }
    return 0;
}


############################################################################
#
#   This is main().
#
############################################################################

{
    my $max_load = $MAX_LOAD;
    my $max_disc = $MAX_DISC;
    my $max_disc_inc = $MAX_DISC_INC;
    my $cache_file = $CACHE_FILE;

    while (defined(my $arg = shift @ARGV)) {
        if ($arg =~ /^\-\-cache-file(?:=(.*))?$/) {
	    $cache_file = defined $1 ? $1 : shift(@ARGV);
	    Usage() unless defined $cache_file;
	} elsif ($arg =~ /^\-\-max\-load(?:=(.*))?$/) {
	    $max_load = defined $1 ? $1 : shift(@ARGV);
	    Usage() unless defined $max_load;
	} elsif ($arg =~ /^\-\-max\-disc-inc(?:=(.*))?$/) {
	    $max_disc_inc = defined $1 ? $1 : shift(@ARGV);
	    Usage() unless defined $max_disc_inc;
	} elsif ($arg =~ /^\-\-max\-disc(?:=(.*))?$/) {
	    $max_disc = defined $1 ? $1 : shift(@ARGV);
	    Usage() unless defined $max_disc;
	} elsif ($arg eq "--debug") {
	    $debug = $verbose = 1;
	} elsif ($arg eq "--verbose") {
	    $verbose = 1;
	} else {
	    Usage();
	}
    }

    my %cache;
    my $status = 0;
    $status ||= LoadCacheFile($cache_file, \%cache);
    $status ||= DiskUsage($DF, $max_disc, $max_disc_inc, \%cache, "f");
    $status ||= DiskUsage($DFI, $max_disc, $max_disc_inc, \%cache, "i");
    $status ||= Uptime($max_load, \%cache);
    $status ||= SaveCacheFile($cache_file, \%cache);
    exit $status;
}