frame results from the CPAN

frame

Apache-Hadoop-Config

view release on metacpan or search on metacpan

lib/Apache/Hadoop/Config.pm view on Meta::CPAN

            disk       => $args{'diskinfo'} || undef,
        },
    };
    bless $self, $class;
    return $self;
}

# internal utils
sub _minimum {
    my ($self, @arr) = (@_);
    my $min;
    for (@arr) { $min = $_ if !$min || $_ < $min; }
    return $min;
}

sub _maximum {
    my ($self, @arr) = (@_);
    my $max;
    for (@arr) { $max = $_ if !$max || $_ > $max; }
    return $max;
}

sub _copyconf {
    my ($self, $config) = (@_);
    unless ( defined $self->{'config'} ) {
        $self->{'config'} = $config;
        return;
    }

    foreach my $file ( keys %{$config} ) {
        foreach my $param ( keys %{$config->{$file}} ) {
            $self->{'config'}->{$file}->{$param} = $config->{$file}->{$param}
                unless defined $self->{'config'}->{$file}->{$param};
        }
    }
}

# get config template
sub basic_config {
    my ($self) = (@_);
    my $config = {
        'core-site.xml' => {
            'fs.defaultFS' => 'http://'.$self->{namenode}.':9000',
            'hadoop.tmp.dir' => $self->{'hdfs_tmp'},
            },
        'hdfs-site.xml' => {
            'dfs.replication' => 1,
            'dfs.namenode.name.dir' => join ( ',', map { 'file://'.$_ } @{$self->{'hdfs_name_disks'}} ),
            'dfs.datanode.data.dir' => join ( ',', map { 'file://'.$_ } @{$self->{'hdfs_data_disks'}} ),

            # secondary namenode 
            'dfs.namenode.secondary.http-address' => $self->{'secondary'}.':50090',
            'dfs.namenode.secondary.https-address'=> $self->{'secondary'}.':50091',
            },
        'yarn-site.xml' => {
            'yarn.nodemanager.aux-services' => 'mapreduce_shuffle',
            'yarn.nodemanager.aux-services.mapreduce.shuffle.class' => 'org.apache.hadoop.mapred.ShuffleHandler',
            'yarn.web-proxy.address' => $self->{'proxynode'}.':'.$self->{'proxyport'},
            },
        'mapred-site.xml' => {
            'mapreduce.framework.name' => 'yarn',
            }
        };
    
    $self->_copyconf ( $config );
}

#
# directory management
#
sub _mkdir {
    my ($self, %opts) = (@_);
    my $mode = $opts{'mode'} || 0750;
    my $u = umask (0);
    map { mkdir $_, $mode; chdir $_; } split (/\//, $opts{'directory'});
}

sub create_hdfs_name_disks {
    my ($self) = (@_);
    foreach my $dir ( @{$self->{'hdfs_name_disks'}} ) {
        print "creating ".$dir, "\n" if defined $self->{'debug'};
        $self->_mkdir( directory => $dir );
    }
}

sub create_hdfs_data_disks {
    my ($self) = (@_);
    foreach my $dir ( @{$self->{'hdfs_data_disks'}} ) {
        print "creating ".$dir, "\n" if defined $self->{'debug'};
        $self->_mkdir( directory => $dir );
    }
}

sub create_hdfs_tmpdir {
    my ($self) = (@_);
    print "creating ".$self->{'hdfs_tmp'}, "\n" if defined $self->{'debug'};
    $self->_mkdir( directory => $self->{'hdfs_tmp'}, mode => 01775 );
}

sub create_hadoop_logdir {
    # $hadoop_install/tmp 1775
    my ($self) = (@_);
    foreach my $dir ( @{$self->{'hadoop_logs'}} ) {
        print "creating ".$self->{'hadoop_install'}.$dir, "\n" if defined $self->{'debug'};
        $self->_mkdir( directory => $self->{'hadoop_install'}.$dir, mode => 01775 );
    }
}
# ends

#
# begin recommended settings
#

# get cpu core count
sub _get_cpu_cores {
    my ($self) = (@_);
    my $cpuinfo = '/proc/cpuinfo';
    open CPU, $cpuinfo or die "Cannot open $cpuinfo, $!\n";
    $self->{'sysinfo'}->{'cpu'} = scalar (map /^processor/, <CPU>);
    close CPU;
}

lib/Apache/Hadoop/Config.pm view on Meta::CPAN


=head1 EXAMPLES

Below are a few examples of different uses. The first example is to create
recommended configurations for the localhost or command-line provided data:

        #!/usr/bin/perl -w
        use strict;
        use warnings;
        use Apache::Hadoop::Config;
        use Getopt::Long;
        
        my %opts;
        GetOptions (\%opts, 'disks=s','memory=s','cores=s');
        
        my $h = Apache::Hadoop::Config->new (
                meminfo=>$opts{'memory'} || undef,
                cpuinfo=>$opts{'cores'} || undef,
                diskinfo=>$opts{'disks'} || undef,
                );
        
        # setup configs
        $h->basic_config;
        $h->memory_config;
        
        # print and save
        $h->print_config;
        $h->write_config (confdir=>'.');
        
        exit(0);

The above gives an output like below, if no argument is supplied:

        min cont size (mb)    : 256
        num of containers     : 7
        mem per container (mb): 368
         disk : 4
          cpu : 4
          mem : 3.52075958251953
        ---------------
        hdfs-site.xml
          dfs.namenode.secondary.http-address: 0.0.0.0:50090
          dfs.replication: 1
          dfs.datanode.data.dir: file:///hdfs/data1,file:///hdfs/data2,file:///hdfs/data3,file:///hdfs/data4
          dfs.namenode.secondary.https-address: 0.0.0.0:50091
          dfs.namenode.name.dir: file:///hdfs/name1,file:///hdfs/name2
        yarn-site.xml
          yarn.web-proxy.address: localhost:8888
          yarn.nodemanager.aux-services: mapreduce_shuffle
          yarn.scheduler.minimum-allocation-mb: 368
          yarn.scheduler.maximum-allocation-mb: 2576
          yarn.nodemanager.aux-services.mapreduce.shuffle.class: org.apache.hadoop.mapred.ShuffleHandler
          yarn.nodemanager.resource.memory-mb: 2576
        core-site.xml
          hadoop.tmp.dir: /hdfs/tmp
          fs.defaultFS: http://localhost:9000
        mapred-site.xml
          mapreduce.reduce.java.opts: -Xmx588m
          mapreduce.map.memory.mb: 368
          mapreduce.map.java.opts: -Xmx294m
          mapreduce.framework.name: yarn
          mapreduce.reduce.memory.mb: 736
        ---------------
        -> writing to ./hdfs-site.xml ...
        -> writing to ./yarn-site.xml ...
        -> writing to ./core-site.xml ...
        -> writing to ./mapred-site.xml ...

If supplied with some arguments, basically for a different clusters, the configuration files
can still be generated:

        $ perl hadoop_config.pl --cores 16 --memory 64 --disks 6
        min cont size (mb)    : 2048
        num of containers     : 10
        mem per container (mb): 5734
         disk : 6
          cpu : 16
          mem : 64
        ---------------
        hdfs-site.xml
          dfs.namenode.secondary.http-address: 0.0.0.0:50090
          dfs.replication: 1
          dfs.datanode.data.dir: file:///hdfs/data1,file:///hdfs/data2,file:///hdfs/data3,file:///hdfs/data4
          dfs.namenode.secondary.https-address: 0.0.0.0:50091
          dfs.namenode.name.dir: file:///hdfs/name1,file:///hdfs/name2
        yarn-site.xml
          yarn.web-proxy.address: localhost:8888
          yarn.nodemanager.aux-services: mapreduce_shuffle
          yarn.scheduler.minimum-allocation-mb: 5734
          yarn.scheduler.maximum-allocation-mb: 57340
          yarn.nodemanager.aux-services.mapreduce.shuffle.class: org.apache.hadoop.mapred.ShuffleHandler
          yarn.nodemanager.resource.memory-mb: 57340
        core-site.xml
          hadoop.tmp.dir: /hdfs/tmp
          fs.defaultFS: http://localhost:9000
        mapred-site.xml
          mapreduce.reduce.java.opts: -Xmx9174m
          mapreduce.map.memory.mb: 5734
          mapreduce.map.java.opts: -Xmx4587m
          mapreduce.framework.name: yarn
          mapreduce.reduce.memory.mb: 11468
        ---------------
        -> writing to ./hdfs-site.xml ...
        -> writing to ./yarn-site.xml ...
        -> writing to ./core-site.xml ...
        -> writing to ./mapred-site.xml ...

Different customization can be done, using object's constructor arguments. 


=head1 SEE ALSO

hadoop.apache.org - The Hadoop documentation and authoritative source for 
Apache Hadoop and its components.


=head1 AUTHOR

Snehasis Sinha, E<lt>snehasis@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2015 by Snehasis Sinha

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.


=cut

( run in 2.206 seconds using v1.01-cache-2.11-cpan-df04353d9ac )