releasesystem

 view release on metacpan or  search on metacpan

utils/sysmonitor/sysmonitord  view on Meta::CPAN

#!/usr/local/bin/perl

# System Process Monitor Daemon

# by Kevin Greene

# copyright 1996-2000 by hewlett-packard company

# may be distributed under the terms of the artistic license


use subs qw(fork_this);

use Time::Local;                      # get timelocal function
use FileHandle;                       # get select methods
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use Mail::Send;
use Sys::Hostname;
use Time::Period;                     # get inPeriod function

$ALARM_VAR = 0;  #an alarm will set this to 1 - this will initiate a reset
		 # (reinitialize the variables

fork_this() if (defined $ARGV[0] and $ARGV[0] eq '-f');

$SIG{'HUP'} = sub {&log ("Caught HUP Alarm Signal", "INFO"); $ALARM_VAR=1;}; #trigger reset
$SIG{'ALRM'} = sub {&log ("Caught Alarm Signal", "INFO"); $ALARM_VAR=1;}; #trigger reset

###############################################################
# Global Setup

$DEBUG       = 0;            # Normally set to 0. Use 1 for debugging
$DEBUG_EMAIL = (getlogin || (getpwuid ($<))[0]) . "\@nafohq.hp.com";

if ($DEBUG)                  # debug values
{
  $STARTUP_SLEEP = 1;        #  1 seconds
  $SLEEP_TIME    = 10;       # 10 seconds
  $CONFIG_FILE   = "$ENV{HOME}/public_html/wmTools/scripts/syscheck/syscheck.rc";
  $SYSCHECK_LOG  = "$ENV{HOME}/public_html/wmTools/scripts/syscheck/log";
  $PID_FILE      = "$ENV{HOME}/public_html/wmTools/scripts/syscheck/pid";
}
else #standard values
{
  $STARTUP_SLEEP = 60 * 10;  # 10 minutes
  $SLEEP_TIME    = 60 * 3;   #  3 minutes 
  $CONFIG_FILE   = "/opt/ims/local/sysmonitor/syscheck.rc";
  $SYSCHECK_LOG  = "/opt/ims/local/sysmonitor/log";
  $PID_FILE      = "/opt/ims/local/sysmonitor/pid";
}

# Uncomment this only if running *inside* the firewall
# $HTTP_PROXY = 'http://web_proxy:8088';

$PS         = '/bin/ps -ef ';
$HOSTNAME   = hostname;

# $NO_PROXY   = 'hp.com';

#Parameters for config file (1st on line is id, these below follow)
$SEMAPHORE_PARAMETER       = 0;
$REGEX_PARAMETER           = 1;
$URL_PARAMETER             = 2;  
$COMMAND_PARAMETER         = 3;
$EMAIL_PARAMETER           = 4;
$SUBJECT_PARAMETER         = 5;
$THRESHOLD_PARAMETER       = 6; 
$ERROR_REGEX_PARAMETER     = 7;
$OPERATING_HOURS_PARAMETER = 8;
$DEAD_PARAMETER            = 9; # NOTE! - this must ALWAYS be the last one
			        # because it does not actually exist in the
			        # config file
###############################################################

&main;
exit;

###############################################################
sub main
{
  my (%idlist);

  &initialize (\%idlist);
  &event_loop (\%idlist);
}

###############################################################
sub initialize
{
  my ($listref) = @_;
  my ($pid) = $$;

  my ($killit) = 1 if $ARGV[0] eq '-kill';  #cheap command line
					    #processing
  my ($startup_sleep) = $STARTUP_SLEEP;
  
  my ($curpid) = &Get_PID ($PID_FILE);
  &log ("Current PID: $curpid", "INFO", 1);
  if ($curpid && (kill 0, $curpid) && (!$killit))  #is pid listed valid?
  {
    die "Only one process daemon can be running! Current pid: $curpid\n";
  }
  elsif ($curpid && (kill 0, $curpid) && $killit)  #is pid listed valid?
  {
    &log ("Current pid: $curpid.  Killing...", "INFO");
    &log ("Kill successful", "INFO") if kill 'KILL', $curpid;
  }

  &Set_PID ($PID_FILE, $pid);
  #print "[" . &Get_PID($PID_FILE) . "]";  ###!!!remove, for debug

  &log ("Loading config file: $CONFIG_FILE", "INFO");
  &read_config ($CONFIG_FILE, $listref);

utils/sysmonitor/sysmonitord  view on Meta::CPAN

###############################################################
sub get_ps_list
{
  my ($ps_string) = "$PS";

  open (PID, "$ps_string|");
  my (@raw_pid_output) = <PID>;
  close (PID);
  my (@test) = @raw_pid_output;
  grep (($_) = (split /\s+/, $_, 9)[8], @raw_pid_output); #keep process name
  @raw_pid_output;
}

###############################################################
sub check_process_status
{
  my ($listref) = @_;
  my ($id, %ps);
  my (@ps) = &get_ps_list;
  my ($regex, $now, $period);

  foreach (@ps)
  {
    $ps{$_} = 1;
  }

  foreach $id (keys %$listref)
  {
    next unless ($regex = $$listref{$id}->[$REGEX_PARAMETER]);  #ignore if no process name regex
    
    $now = time;
    $period = $$listref{$id}->[$OPERATING_HOURS_PARAMETER];
    next unless (inPeriod ($now, $period));                     #ignore if not in operating hours

    #print "!: $id\n";
	  
    unless (grep (m{$$listref{$id}->[$REGEX_PARAMETER]}, @ps)) # missing process
    {
      &process_error ($listref, $id, 'process', $regex, 'missing process', 0);
      #&log ("missing process - grep: $id") unless grep (/$$listref{$id}->[$REGEX_PARAMETER]/, @ps);
      #search for this process regex in the process list via grep
    }
    else  #process in list
    {
      if ($$listref{$id}->[$DEAD_PARAMETER])
      {
        &process_error ($listref, $id, 'process', $regex, 'success', 1);
      }
    }
  }
}

###############################################################
sub get_url_status
{
  my ($url, $regex) = @_;
  my ($ua, $request, $response);

  $ua = new LWP::UserAgent;

  $ua->proxy (('http'), $HTTP_PROXY) if $HTTP_PROXY;
  $ua->no_proxy ($NO_PROXY) if $NO_PROXY;

  $request = new HTTP::Request 'GET', $url;

  eval {$response = $ua->request ($request);};
  #eval is a hack to stop timeout from dieing in the program...
  #(which is called somewhere in IO::Select, I think...)

  #print "error\n" if $response->is_error ();

  if ($regex)
  {
    if ($response->content =~ /$regex/) #if error - ie regex matched
    {
      return (0, "regex error match:$regex"); #return error, matched err string
    }
  }
  ($response->is_success (), $response->code ());
}

###############################################################
sub check_urls
{
  my ($listref) = @_;
  my ($id, $success, $code, $url);
  my ($now, $period);

  foreach $id (keys %$listref)
  {
    next unless ($url = $$listref{$id}->[$URL_PARAMETER]);  # ignore if no url
    
    $now = time;
    $period = $$listref{$id}->[$OPERATING_HOURS_PARAMETER];
    next unless (inPeriod ($now, $period));                 # ignore if not in operating hours

    ($success, $code) = &get_url_status ($url,
		       $$listref{$id}->[$ERROR_REGEX_PARAMETER]);
    unless ($success) 
    {
      #&log ("URL error code $code: $id ($$listref{$id}->[$URL_PARAMETER])", "FAILURE", 2);
      &process_error ($listref, $id, 'URL', $url, $code, 0);
# Dunno what this does or why it's needed
#      &log ("URLOUT: $output", "FAILURE", 2);
    }
    else  #if success
    {
      if ($$listref{$id}->[$DEAD_PARAMETER])
      {
        &process_error ($listref, $id, 'URL', $url, $code, 1);
      }
    }
  }
}

###############################################################
#listref = reference to syscheck file list,
#id=item id, $label = type of item (human readable),
#$name = item value - url or process regex, $code = error 
#$success = 1 if was failure before and now is working again
#         = 0 error conditition



( run in 0.410 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )