GRID-Cluster

 view release on metacpan or  search on metacpan

README  view on Meta::CPAN

NAME
    GRID::Cluster - Virtual clusters using SSH links

SYNOPSIS
      use GRID::Cluster;

      my $np = 4;     # Number of processes
      my $N = 1000;   # Number of iterations
      my $clean = 0;  # The files are not removed when the execution is finished

      my $machine = [ 'host1', 'host2', 'host3' ];                # Hosts
      my $debug = { host1 => 0, host2 => 0, host3 => 0 };         # Debug mode in every host
      my $max_num_np = { host1 => 1, host2 => 1, host3 => 1 };    # Maximum number of processes supported by every host

      my $c = GRID::Cluster->new(host_names => $machine, debug => $debug, max_num_np => $max_num_np);
        || die "No machines has been initialized in the cluster";

      # Transference of files to remote hosts

examples/pi/qx/pi_grid.pl  view on Meta::CPAN

$c->chdir("pi/") || die "Can't change to pi/\n";

my @commands = map {  "./pi $_ $N $np " } 0..$np-1;

my $t0 = [gettimeofday];

my $pi = sum @{$c->qx(@commands)};

my $elapsed = tv_interval($t0);

print "Calculating Pi with $N iterations and $np processes\n";
print "Elapsed Time: $elapsed seconds\n";
print "Pi Value: $pi\n";

__END__


=head1 NAME

pi_grid.pl -- A simple example of parallel distributed computing

examples/pi/qx/pi_grid.pl  view on Meta::CPAN

       flag: clean files after execution

=cut

#
#
cat MachineConfig.pm
return ({europa => 0, beowulf => 0, orion => 0}, {europa => 4, beowulf => 1, orion => 1});

pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 1
Calculating Pi with 1000000000 iterations and 1 processes
Elapsed Time: 62.915575 seconds
Pi Value: 3.141593
pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 6
Calculating Pi with 1000000000 iterations and 6 processes
Elapsed Time: 10.586874 seconds
Pi Value: 3.141594
pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 3
Calculating Pi with 1000000000 iterations and 3 processes
Elapsed Time: 20.986131 seconds
Pi Value: 3.141594
pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ !cat
pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 12
Calculating Pi with 1000000000 iterations and 12 processes
Elapsed Time: 10.736294 seconds
Pi Value: 3.141588


*********************************************************
cat MachineConfig.pm
return ({europa => 0, beowulf => 0, orion => 0}, {europa => 1, beowulf => 1, orion => 1});

pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 3
Calculating Pi with 1000000000 iterations and 3 processes
Elapsed Time: 20.956588 seconds
Pi Value: 3.141594

real    0m22.549s
user    0m0.296s
sys     0m0.068s
pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 6
Calculating Pi with 1000000000 iterations and 6 processes
Elapsed Time: 15.694753 seconds
Pi Value: 3.141594

real    0m17.285s
user    0m0.304s
sys     0m0.104s
# gana porque europa son 4 y beowulf son 2
pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 12
Calculating Pi with 1000000000 iterations and 12 processes
Elapsed Time: 13.246352 seconds
Pi Value: 3.141588

real    0m14.798s
user    0m0.328s
sys     0m0.116s
pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 15
Calculating Pi with 1000000000 iterations and 15 processes
Elapsed Time: 12.924256 seconds
Pi Value: 3.1416

real    0m14.500s
user    0m0.372s
sys     0m0.108s

pp2@europa:~/LGRID-Cluster-edusegre/examples/pi/open$ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 18
Calculating Pi with 1000000000 iterations and 18 processes
Elapsed Time: 14.406338 seconds
Pi Value: 3.141594

real    0m16.008s
user    0m0.364s
sys     0m0.120s

lib/GRID/Cluster.pm  view on Meta::CPAN


=head1 NAME

GRID::Cluster - Virtual clusters using SSH links

=head1 SYNOPSIS

  use GRID::Cluster;

  my $np = 4;     # Number of processes
  my $N = 1000;   # Number of iterations
  my $clean = 0;  # The files are not removed when the execution is finished

  my $machine = [ 'host1', 'host2', 'host3' ];                # Hosts
  my $debug = { host1 => 0, host2 => 0, host3 => 0 };         # Debug mode in every host
  my $max_num_np = { host1 => 1, host2 => 1, host3 => 1 };    # Maximum number of processes supported by every host

  my $c = GRID::Cluster->new(host_names => $machine, debug => $debug, max_num_np => $max_num_np);
    || die "No machines has been initialized in the cluster";

  # Transference of files to remote hosts

lib/GRID/Cluster/Tutorial.pm  view on Meta::CPAN

__END__

=head1 NAME

GRID::Cluster::Tutorial - An introduction to parallel computing using components

=head1 SYNOPSIS

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 1
  Calculating Pi with 1000000000 iterations and 1 processes
  Elapsed Time: 56.591251 seconds
  Pi Value: 3.141593

  real    0m58.374s
  user    0m0.520s
  sys     0m0.048s

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 2
  Calculating Pi with 1000000000 iterations and 2 processes
  Elapsed Time: 28.459958 seconds
  Pi Value: 3.141592

  real    0m30.610s
  user    0m0.524s
  sys     0m0.056s

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 3
  Calculating Pi with 1000000000 iterations and 3 processes
  Elapsed Time: 20.956588 seconds
  Pi Value: 3.141594

  real    0m22.549s
  user    0m0.296s
  sys     0m0.068s

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 6
  Calculating Pi with 1000000000 iterations and 6 processes
  Elapsed Time: 15.694753 seconds
  Pi Value: 3.141594

  real    0m17.285s
  user    0m0.304s
  sys     0m0.104s

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 12
  Calculating Pi with 1000000000 iterations and 12 processes
  Elapsed Time: 13.246352 seconds
  Pi Value: 3.141588

  real    0m14.798s
  user    0m0.328s
  sys     0m0.116s

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 15
  Calculating Pi with 1000000000 iterations and 15 processes
  Elapsed Time: 12.924256 seconds
  Pi Value: 3.1416

  real    0m14.500s
  user    0m0.372s
  sys     0m0.108s

=head1 SUMMARY

Programming is difficult. Parallel programming is harder.

lib/GRID/Cluster/Tutorial.pm  view on Meta::CPAN

directory of every remote machine associated to the virtual parallel machine.

 43 my @commands = map {  "./pi $_ $N $np |" } 0..$np-1;
 44
 45 my $t0 = [gettimeofday];
 46
 47 my $pi = sum @{$c->qx(@commands)};
 48
 49 my $elapsed = tv_interval($t0);
 50
 51 print "Calculating Pi with $N iterations and $np processes\n";
 52 print "Elapsed Time: $elapsed seconds\n";
 53 print "Pi Value: $pi\n";

Last step consists in creating the commands that are going to be executed
in different machines (line 43) by the use of the method I<qx> of a GRID::Cluster
object. This method allows the execution of different tasks or processes
following an approximation based on farms, this is, initially, a maximum number
of processes are run, and when one of them finishes its execution, a new process
is run, if there are more pending processes to be executed. This feature allows
a good load balancing among different machines.

lib/GRID/Cluster/Tutorial.pm  view on Meta::CPAN

  user    0m28.654s
  sys     0m0.049s

These results indicate that the first machine is slower than the other two.

Now let us run the driver using only the fastest machine and one process. The time
spent is comparable to the pure C<C> time, and that is great because the overhead
introduced by the coordination tasks is not as large:

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 1
  Calculating Pi with 1000000000 iterations and 1 processes
  Elapsed Time: 30.919523 seconds
  Pi Value: 3.141593

  real    0m32.690s
  user    0m0.516s
  sys     0m0.060s

Now we are going to execute the driver using two different machines, each one with only
one process:

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 2
  Calculating Pi with 1000000000 iterations and 2 processes
  Elapsed Time: 28.459958 seconds
  Pi Value: 3.141592

  real    0m30.610s
  user    0m0.524s
  sys     0m0.056s

We can see that the sequential pure C version took 56 seconds in the slowest machine.
By using two machines, each one with one process,  the time has been reduced to 23 seconds.
This a factor of 56/31 = 1.80 times faster. This factor is even better if I don't consider
the set-up time: 56/29 = 1.93. The total time decreases if three machines are used, every
one with only one process:

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 3
  Calculating Pi with 1000000000 iterations and 3 processes
  Elapsed Time: 20.956588 seconds
  Pi Value: 3.141594

  real    0m22.549s
  user    0m0.296s
  sys     0m0.068s

which gives a speed factor of 56/23 = 2.43 or not considering the set-up time 56/21 = 2.66.

If you increase the number of processes, the use of the method I<qx> allows to obtain
better results, due to the load balancing produced by the use of a mechanism based
on a farm. The results increasing the number of processes (but only using three machines,
every one with a process in every moment) are in the following lines:

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 6
  Calculating Pi with 1000000000 iterations and 6 processes
  Elapsed Time: 15.694753 seconds
  Pi Value: 3.141594

  real    0m17.285s
  user    0m0.304s
  sys     0m0.104s

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 12
  Calculating Pi with 1000000000 iterations and 12 processes
  Elapsed Time: 13.246352 seconds
  Pi Value: 3.141588

  real    0m14.798s
  user    0m0.328s
  sys     0m0.116s

  $ time ./pi_grid.pl -co MachineConfig.pm -N 1000000000 -np 15
  Calculating Pi with 1000000000 iterations and 15 processes
  Elapsed Time: 12.924256 seconds
  Pi Value: 3.1416

  real    0m14.500s
  user    0m0.372s
  sys     0m0.108s

Using 3 processes with 3 machines (every one with only one process), the
fastest machines have to wait for the slowest one to finish the execution.
Using 6, 12 and 15 processes, the time is decreased. Because of the

t/GRID-Cluster.t  view on Meta::CPAN

my $executable = "pi_qx/pi_grid.pl";
if (-x $executable) {
}
else {
  chdir "t";
}
SKIP: {
  skip("Developer test", 12) unless ($ENV{DEVELOPER} && $ENV{GRID_REMOTE_MACHINES} && -x "$executable" && ($^O =~ /nux$|darwin/));

     my $output = `perl $executable -N 1000 2>&1`;
     like($output, qr{Pi Value: 3.14159.*}, "Example to calculate PI with 1000 iterations");
     
     $output = `perl $executable -N 1000000 2>&1`;
     like($output, qr{Pi Value: 3.14159.*}, "Example to calculate PI with 1000000 iterations");
     
     $output = `perl $executable -N 1000000000 2>&1`;
     like($output, qr{Pi Value: 3.14159.*}, "Example to calculate PI with 1000000000 iterations");
     
     my $old_machines = $ENV{GRID_REMOTE_MACHINES};
     $ENV{GRID_REMOTE_MACHINES} = "";
     
     $output = `perl $executable 2>&1`;
     like($output,
          qr{No machines has been initialized in the cluster at $executable line 26.},
          "Error: No machines have been initialized in the cluster");

     $ENV{GRID_REMOTE_MACHINES} = "not_exists";



( run in 0.629 second using v1.01-cache-2.11-cpan-71847e10f99 )