releasesystem
view release on metacpan or search on metacpan
utils/sysmonitor/sysmonitord view on Meta::CPAN
#!/usr/local/bin/perl
# System Process Monitor Daemon
# by Kevin Greene
# copyright 1996-2000 by hewlett-packard company
# may be distributed under the terms of the artistic license
use subs qw(fork_this);
use Time::Local; # get timelocal function
use FileHandle; # get select methods
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use Mail::Send;
use Sys::Hostname;
use Time::Period; # get inPeriod function
$ALARM_VAR = 0; #an alarm will set this to 1 - this will initiate a reset
# (reinitialize the variables
fork_this() if (defined $ARGV[0] and $ARGV[0] eq '-f');
$SIG{'HUP'} = sub {&log ("Caught HUP Alarm Signal", "INFO"); $ALARM_VAR=1;}; #trigger reset
$SIG{'ALRM'} = sub {&log ("Caught Alarm Signal", "INFO"); $ALARM_VAR=1;}; #trigger reset
###############################################################
# Global Setup
$DEBUG = 0; # Normally set to 0. Use 1 for debugging
$DEBUG_EMAIL = (getlogin || (getpwuid ($<))[0]) . "\@nafohq.hp.com";
if ($DEBUG) # debug values
{
$STARTUP_SLEEP = 1; # 1 seconds
$SLEEP_TIME = 10; # 10 seconds
$CONFIG_FILE = "$ENV{HOME}/public_html/wmTools/scripts/syscheck/syscheck.rc";
$SYSCHECK_LOG = "$ENV{HOME}/public_html/wmTools/scripts/syscheck/log";
$PID_FILE = "$ENV{HOME}/public_html/wmTools/scripts/syscheck/pid";
}
else #standard values
{
$STARTUP_SLEEP = 60 * 10; # 10 minutes
$SLEEP_TIME = 60 * 3; # 3 minutes
$CONFIG_FILE = "/opt/ims/local/sysmonitor/syscheck.rc";
$SYSCHECK_LOG = "/opt/ims/local/sysmonitor/log";
$PID_FILE = "/opt/ims/local/sysmonitor/pid";
}
# Uncomment this only if running *inside* the firewall
# $HTTP_PROXY = 'http://web_proxy:8088';
$PS = '/bin/ps -ef ';
$HOSTNAME = hostname;
# $NO_PROXY = 'hp.com';
#Parameters for config file (1st on line is id, these below follow)
$SEMAPHORE_PARAMETER = 0;
$REGEX_PARAMETER = 1;
$URL_PARAMETER = 2;
$COMMAND_PARAMETER = 3;
$EMAIL_PARAMETER = 4;
$SUBJECT_PARAMETER = 5;
$THRESHOLD_PARAMETER = 6;
$ERROR_REGEX_PARAMETER = 7;
$OPERATING_HOURS_PARAMETER = 8;
$DEAD_PARAMETER = 9; # NOTE! - this must ALWAYS be the last one
# because it does not actually exist in the
# config file
###############################################################
&main;
exit;
###############################################################
sub main
{
my (%idlist);
&initialize (\%idlist);
&event_loop (\%idlist);
}
###############################################################
sub initialize
{
my ($listref) = @_;
my ($pid) = $$;
my ($killit) = 1 if $ARGV[0] eq '-kill'; #cheap command line
#processing
my ($startup_sleep) = $STARTUP_SLEEP;
my ($curpid) = &Get_PID ($PID_FILE);
&log ("Current PID: $curpid", "INFO", 1);
if ($curpid && (kill 0, $curpid) && (!$killit)) #is pid listed valid?
{
die "Only one process daemon can be running! Current pid: $curpid\n";
}
elsif ($curpid && (kill 0, $curpid) && $killit) #is pid listed valid?
{
&log ("Current pid: $curpid. Killing...", "INFO");
&log ("Kill successful", "INFO") if kill 'KILL', $curpid;
}
&Set_PID ($PID_FILE, $pid);
#print "[" . &Get_PID($PID_FILE) . "]"; ###!!!remove, for debug
&log ("Loading config file: $CONFIG_FILE", "INFO");
&read_config ($CONFIG_FILE, $listref);
utils/sysmonitor/sysmonitord view on Meta::CPAN
###############################################################
sub get_ps_list
{
my ($ps_string) = "$PS";
open (PID, "$ps_string|");
my (@raw_pid_output) = <PID>;
close (PID);
my (@test) = @raw_pid_output;
grep (($_) = (split /\s+/, $_, 9)[8], @raw_pid_output); #keep process name
@raw_pid_output;
}
###############################################################
sub check_process_status
{
my ($listref) = @_;
my ($id, %ps);
my (@ps) = &get_ps_list;
my ($regex, $now, $period);
foreach (@ps)
{
$ps{$_} = 1;
}
foreach $id (keys %$listref)
{
next unless ($regex = $$listref{$id}->[$REGEX_PARAMETER]); #ignore if no process name regex
$now = time;
$period = $$listref{$id}->[$OPERATING_HOURS_PARAMETER];
next unless (inPeriod ($now, $period)); #ignore if not in operating hours
#print "!: $id\n";
unless (grep (m{$$listref{$id}->[$REGEX_PARAMETER]}, @ps)) # missing process
{
&process_error ($listref, $id, 'process', $regex, 'missing process', 0);
#&log ("missing process - grep: $id") unless grep (/$$listref{$id}->[$REGEX_PARAMETER]/, @ps);
#search for this process regex in the process list via grep
}
else #process in list
{
if ($$listref{$id}->[$DEAD_PARAMETER])
{
&process_error ($listref, $id, 'process', $regex, 'success', 1);
}
}
}
}
###############################################################
sub get_url_status
{
my ($url, $regex) = @_;
my ($ua, $request, $response);
$ua = new LWP::UserAgent;
$ua->proxy (('http'), $HTTP_PROXY) if $HTTP_PROXY;
$ua->no_proxy ($NO_PROXY) if $NO_PROXY;
$request = new HTTP::Request 'GET', $url;
eval {$response = $ua->request ($request);};
#eval is a hack to stop timeout from dieing in the program...
#(which is called somewhere in IO::Select, I think...)
#print "error\n" if $response->is_error ();
if ($regex)
{
if ($response->content =~ /$regex/) #if error - ie regex matched
{
return (0, "regex error match:$regex"); #return error, matched err string
}
}
($response->is_success (), $response->code ());
}
###############################################################
sub check_urls
{
my ($listref) = @_;
my ($id, $success, $code, $url);
my ($now, $period);
foreach $id (keys %$listref)
{
next unless ($url = $$listref{$id}->[$URL_PARAMETER]); # ignore if no url
$now = time;
$period = $$listref{$id}->[$OPERATING_HOURS_PARAMETER];
next unless (inPeriod ($now, $period)); # ignore if not in operating hours
($success, $code) = &get_url_status ($url,
$$listref{$id}->[$ERROR_REGEX_PARAMETER]);
unless ($success)
{
#&log ("URL error code $code: $id ($$listref{$id}->[$URL_PARAMETER])", "FAILURE", 2);
&process_error ($listref, $id, 'URL', $url, $code, 0);
# Dunno what this does or why it's needed
# &log ("URLOUT: $output", "FAILURE", 2);
}
else #if success
{
if ($$listref{$id}->[$DEAD_PARAMETER])
{
&process_error ($listref, $id, 'URL', $url, $code, 1);
}
}
}
}
###############################################################
#listref = reference to syscheck file list,
#id=item id, $label = type of item (human readable),
#$name = item value - url or process regex, $code = error
#$success = 1 if was failure before and now is working again
# = 0 error conditition
( run in 0.410 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )