DB2-Admin

 view release on metacpan or  search on metacpan

sample/db2_check_lock.pl  view on Meta::CPAN

#!/usr/bin/perl5
#
# db2_check_lock - Check for blocked applications, optionally kill
#
# $Id: db2_check_lock.pl,v 145.1 2007/11/20 21:51:24 biersma Exp $
#

use strict;
use Carp;
use DB2::Admin;
use Getopt::Std;
use Pod::Usage;
use Socket;

sample/db2_check_lock.pl  view on Meta::CPAN

			       )
			   ],
	      'Node' => 'SQLM_ALL_NODES', # V8 or higher
	     );
unless (defined $snap) {
    print "No active databases. Nothing to do.\n";
    exit(0);
}

my %agent_ids;			# Agent Id -> Appl node
my %blocked_apps;		# Agent Id -> Agent Id (blocked -> lock owner)
my $no_long_blocked = 0;	# No of jobs blocked for > block time
my $now = time();
foreach my $node ($snap->findNodes('APPL')) {
    #print $node->Format(); exit(0);

    #
    # Implement filtering on -D
    #
    my $db_name = $node->findValue('APPL_INFO/DB_NAME');
    $db_name =~ s/\s+$//;
    next if (defined $args{D} && lc $args{D} ne lc $db_name);

    my $agent_id = $node->findValue('APPL_INFO/AGENT_ID');
    $agent_ids{$agent_id} = $node;

    my $is_blocked = $node->findValue('LOCKS_WAITING');
    next unless ($is_blocked);
    my $blocker = $node->findValue('LOCK_WAIT/AGENT_ID_HOLDING_LK');
    $blocked_apps{$agent_id} = $blocker;

    my $start = $node->findValue('LOCK_WAIT/LOCK_WAIT_START_TIME/SECONDS');
    if (($now - $start) > $args{t}) {
	$no_long_blocked++;
    }
}

if ($args{k}) {
    my $agent_id = $args{k};
    unless (defined $agent_id) {
	warn "Invalid '-kill $agent_id' - application is not (no longer) active\n";
	exit(1);
    }
    my $appl_node = $agent_ids{$agent_id};

sample/db2_check_lock.pl  view on Meta::CPAN

	warn "Invalid '-kill $agent_id' - application does not hold any locks\n";
	exit(1);
    }

    print "Killing application with agent id $agent_id\n";
    my $db_name = $appl_node->findValue('APPL_INFO/DB_NAME');
    DB2::Admin::->ForceApplications($agent_id);
    exit(0);
}

unless ($no_long_blocked) {
    print "No applications are blocked for over $args{t} seconds\n";
    exit(0);
}

if (0) {
    #
    # Fake a tree
    #
    $blocked_apps{4} = 5;
    $blocked_apps{5} = 6;
    $blocked_apps{6} = 8;
    $blocked_apps{7} = 8;
    #  $blocked_apps{8} = 5; # Deadlock
}

#
# See if we can compute a blocking tree
#
my %lock_owner;			# Blocked -> Final lock owner
my %lock_children;		# Lock owner -> Child -> 1
BLOCKED:
foreach my $agent_id (keys %blocked_apps) {
    my $blocker = $blocked_apps{$agent_id};
    my $parent = $blocked_apps{$blocker};
    if (defined $parent) {
	#print "Have lock hierarchy: $agent_id -> $blocker -> $parent\n";
	while (defined $blocked_apps{$parent}) {
	    if ($parent == $blocker) {
		print "Deadlock detected ($agent_id -> $blocker ... $blocker)\n";
		next BLOCKED;
	    }
	    $parent = $blocked_apps{$parent};
	}
	$lock_owner{$agent_id} = $parent;
	$lock_children{$parent}{$agent_id} = 1;
    } else {
	$lock_owner{$agent_id} = $blocker;
	$lock_children{$blocker}{$agent_id} = 1;
    }
}
print STDERR "Have lock owners [", join(',', sort keys %lock_children), "]\n"
  if ($args{d});

sample/db2_check_lock.pl  view on Meta::CPAN

#
# Generate the lock alert text for a single lock holder.
# We send this by mail - YMMV
#
sub generate_lock_alert {
    my ($lock_owner) = @_;
    print STDERR "Generate alert for [$lock_owner]\n"
      if ($args{d});

    my $owner_node = $agent_ids{$lock_owner};
    my $no_blocked = keys %{ $lock_children{$lock_owner} };

    #
    # Alert should look like: Error: Server NYPIBA4 agent id 71 has been
    # blocking X applications since Aug 17 2004 2:00AM
    #
    my $since;
    foreach my $blocked (keys %{ $lock_children{$lock_owner} }) {
	my $appl_node = $agent_ids{$blocked};
	my $start = $appl_node->findValue('LOCK_WAIT/LOCK_WAIT_START_TIME/SECONDS');
	#print "Have [$blocked] lock wait start time [$start] [" . localtime($start) . "]\n";
	if (! defined $since || $start < $since) {
	    $since = $start;
	}
    }
    my @time_elems = localtime($since);
    my $display_time = sprintf("%4d/%02d/%02d %02d:%02d",
			       $time_elems[5] + 1900,
			       $time_elems[4] + 1,
			       $time_elems[3],
			       $time_elems[2],
			       $time_elems[1]);
    my $alert = "Server $ENV{DB2INSTANCE} agent id $lock_owner has been blocking $no_blocked applications since $display_time";

    my $now = localtime(time);
    my $dbname = $owner_node->findValue('APPL_INFO/DB_NAME');
    $dbname =~ s/\s+$//;

    my @descendants = keys %{ $lock_children{$lock_owner} };
    my @direct_children;
    foreach my $id (@descendants) {
	next unless ($blocked_apps{$id} == $lock_owner);
	push @direct_children, $id;
    }
    my %blocked_tables;
    my %schema_names;
    foreach my $id (@direct_children) {
	my $node = $agent_ids{$id};
	foreach my $wl ($node->findNodes('LOCK_WAIT')) {
	    my $schema = $wl->findValue('TABLE_SCHEMA');
	    $schema =~ s/\s+$//;
	    $schema_names{$schema} = 1;
	    my $table = $wl->findValue('TABLE_NAME');
	    $table =~ s/\s+$//;
	    $blocked_tables{"$schema.$table"} = 1;
	}
    }
    my $table_list = join(', ', sort keys %blocked_tables);

    my $desc = "$ENV{DB2INSTANCE}.$dbname";

    my $email = <<_END_HEADER_;
Please be advised that there is blocking in $desc.
The blocking is caused by agent id $lock_owner in the table(s) $table_list.
Please deal with the blocking process so that other processing
may continue.

Thank You.

sample/db2_check_lock.pl  view on Meta::CPAN

    $email .= "Host name and pid of the blocking process:\n";
    $email .= '#' x 20 . "\n";
    $email .= display_client_info($lock_owner);
    $email .= "\n";

    $email .= "For an explanation of the status column, go to:\n";
    $email .= "http://publib.boulder.ibm.com/infocenter/db2help/topic/com.ibm.db2.udb.doc/admin/r0001162.htm\n";
    $email .= "\n";

    #
    # Show at most 10 blocked processes
    #
    if (@descendants > 10) {
	$email .= "There are " . scalar(@descendants) . " processes being blocked\n";
	$email .= "The first 10 blocked processes are listed below\n";
	$email .= '#' x 20 . "\n";
	$email .= display_agents(@descendants[0..9]);
	$email .= "\n";
    } else {
	$email .= "The following processes are being blocked:\n";
	$email .= '#' x 20 . "\n";
	$email .= display_agents(@descendants);
	$email .= "\n";
    }

    #
    # Fire off mail - you'll have to adjust this
    #
    print "Generated the following output:\n\n$email\n";

sample/db2_check_lock.pl  view on Meta::CPAN

	my $hostname = $values->{'CLIENT_NNAME'};
	unless (defined $hostname && $hostname =~ /\S/) {
	    my ($ip_addr) = ($values->{'APPL_ID'} =~ /^(\S+) port /);
	    my $host = gethostbyaddr(inet_aton($ip_addr), AF_INET);
	    $hostname = $host || $ip_addr;
	}
	my $hostproc = $values->{'CLIENT_PID'};
	my $status = $values->{'APPL_STATUS'};
	my $block_start = $appl_node->
          findValue('LOCK_WAIT/LOCK_WAIT_START_TIME/SECONDS');
	my $time_blocked = (defined $block_start ?
			    $snap_time - $block_start : 'n/a');

	#
	# For multi-tier apps, add the TP Monitor information if
	# available.
	#
	if ($values->{TPMON_CLIENT_USERID} =~ /\S/) {
	    $userid .= " (for $values->{TPMON_CLIENT_USERID})";
	}
	if ($values->{TPMON_CLIENT_WKSTN} =~ /\S/) {
	    $hostname .= " (for $values->{TPMON_CLIENT_WKSTN})";
	}
	if ($values->{TPMON_CLIENT_APP} =~ /\S/) {
	    $program .= " (for $values->{TPMON_CLIENT_APP})";
	}

	$table->row($id, $userid, $cmd, $program, $hostname,
		    $hostproc, $status, $time_blocked);
    }

    return $table->render(120);
}


#
# Display locks for a single agent id
#
sub display_agent_locks  {

sample/db2_check_lock.pl  view on Meta::CPAN

=head1 SYNOPSIS

  db2_check_lock [ -D<database> ]
                 [ -k<agent id> ]
                 [ -t<block time> ]
                 [ -d ]

=head1 DESCRIPTION

This script performs a database instance snapshot to determine
blocking.  If any agent process is blocked for over a specified time,
an email is generated with information on the blocking process and its
victims. Optionally, the blocking agent process can be killed.  This
script must be run on the database server.

This script first gets an application snapshot with basic locking
information.  If the C<-k> parameter is specified, the script verifies
whather the specified agent id still exists and still is holding a
lock. If so, the application is forced.

In the absence of a C<-k> parameter, the script verifies whether any
applications is blocked for over the specified time (taken from the
JobParams table, defaulting to 120 seconds). If not, the script takes
no action.

If there is long blocking, the script find the responsible agent
process by following the lock chain.  For each responsible agent, an
email is generated that lists the agent, the locks it holds, the
responsible SQL statement, client process information, plus a list of
the victim processes.

=head1 OPTIONS



( run in 0.474 second using v1.01-cache-2.11-cpan-49f99fa48dc )