Parallel-Mpich-MPD
view release on metacpan or search on metacpan
lib/Parallel/Mpich/MPD.pm view on Meta::CPAN
package Parallel::Mpich::MPD;
use warnings;
use strict;
use File::Temp;
use IO::All;
use Carp;
use Time::HiRes qw( usleep);
use Data::Dumper;
use Parallel::Mpich::MPD::Common;
use Parallel::Mpich::MPD::Job;
=head1 NAME
Parallel::Mpich::MPD - Mpich MPD wrapper
=item I<$VERSION>
=cut
our $VERSION = '0.9.3';
=head1 SYNOPSIS
use Parallel::Mpich::MPD;
# VERBOSE LEVEL
#$Parallel::Mpich::MPD::Common::WARN=1;
#$Parallel::Mpich::MPD::Common::DEBUG=1;
#CHECK ENV
Parallel::Mpich::MPD::Common::env_Hostsfile(/path/to/machinesfile);
Parallel::Mpich::MPD::Common::env_MpichHome(/path/to/mpdhome);
Parallel::Mpich::MPD::Common::env_Check();
#CHECK MPD AND NETWORK
my %hostsup;
my %hostsdown;
my %info=Parallel::Mpich::MPD::info(); #check mpd master
print Dumper(\%info)
%hostsup= Parallel::Mpich::MPD::Common::checkHosts(hostsdown => \%hostsdown ); #check ping and ssh on machines
%hostsup= Parallel::Mpich::MPD::check( reboot =>1:0, hostsdown=>\%hostsdown); #check mpds instances and try to repair
...
# USE MPD
Parallel::Mpich::MPD::boot(); #start mpd instances defined by default machinesfile
my $alias1=Parallel::Mpich::MPD::makealias();
if Parallel::Mpich::MPD::createJob(cmd => $cmd, params => $parms, $machinesfile => $hostsfile, alias => $alias1)){
my $job=Parallel::Mpich::MPD::findJob(jobalias => $alias, getone => 1);
$job->sig_kill() if defined $job;
}
=head1 DESCRIPTION
This I<Parallel::Mpich::MPD>, a wrapper module for MPICH2 Process Management toolkit from L<http://www-unix.mcs.anl.gov/mpi/mpich2/>.
The wrapper include the following tools: basic configuration, mpdcheck, mpdboot, mpdcleanup, mpdtrace,
mpdringtest, mpdallexit, mpiexec, mpdsigjob and mpdlistjobs.
=over 4
=item boot(hosts => @hosts, machinesfile => $machines, checkOnly => 1|0, output => \$output)
starts a set of mpd's on a list of machines. boot try to verify that the hosts in the host
file are up before attempting start mpds on any of them.
=item rebootHost(host => $hostname)
restart mpd on the specified host. rebootHost will kill old mpds before restarting a new one.
The killed MPDS are filtered by specific port and host.
=item check(machinesfile => $file, hostsup => \%hosts, hostsdown => \%hostsdown , reboot => 1)
Check if MPD master and nodes are well up. If MPD master is down it try to ping and ssh machines.
If you use the option reboot, check will try to restart mpd on specified machines or to reboot the master.
=item info( )
return an %info of the master with the following keys (master, hostname, port)
=item validateMachinesfile(machinefiles => $filename)
check with mpdtrace if all machines specified by filename are up. If not, a temporary file is
created with the resized machinesfile
=item shutdown( )
causes all mpds in the ring to exit
=item createJob({cmd => $cmd , machinesfile=> $filename, [params => $params], [ncpu => $ncpu], [alias => $alias])
start a new job with the command line and his params. It return true if ok.
WARNING ncpu could be redefined if mpdtrace return à small hosts list
Example:
Parallel::Mpich::MPD::createJob(cmd => $cmd, params => $parms, ncpu => '3', alias => 'job1');
=item listJobs([mpdlistjobs_contents=>$str])
Return an Parallel::Mpich::MPD::Job array for all available jobs
If mpdlistjobs_contents argument is present, the code will not call mpdlistjobs but
take the parameter as a fake results of this command
=item findJob([%criteria][, return=>(getone|host2pidlist))
find a job from crtiteria. It return a Job instance or undef for no match
=over 4
=item Criteria can be of
=item username=>'somename' or username=>\@arrayOfNames
=item jobid=>'somename' or jobid=>\@arrayOfJobid
=item jobalias=>'somename' or jobalias=>\@arrayOfJobalias
( run in 0.936 second using v1.01-cache-2.11-cpan-71847e10f99 )