Apache-Hadoop-Config
view release on metacpan or search on metacpan
lib/Apache/Hadoop/Config.pm view on Meta::CPAN
disk => $args{'diskinfo'} || undef,
},
};
bless $self, $class;
return $self;
}
# internal utils
sub _minimum {
my ($self, @arr) = (@_);
my $min;
for (@arr) { $min = $_ if !$min || $_ < $min; }
return $min;
}
sub _maximum {
my ($self, @arr) = (@_);
my $max;
for (@arr) { $max = $_ if !$max || $_ > $max; }
return $max;
}
sub _copyconf {
my ($self, $config) = (@_);
unless ( defined $self->{'config'} ) {
$self->{'config'} = $config;
return;
}
foreach my $file ( keys %{$config} ) {
foreach my $param ( keys %{$config->{$file}} ) {
$self->{'config'}->{$file}->{$param} = $config->{$file}->{$param}
unless defined $self->{'config'}->{$file}->{$param};
}
}
}
# get config template
sub basic_config {
my ($self) = (@_);
my $config = {
'core-site.xml' => {
'fs.defaultFS' => 'http://'.$self->{namenode}.':9000',
'hadoop.tmp.dir' => $self->{'hdfs_tmp'},
},
'hdfs-site.xml' => {
'dfs.replication' => 1,
'dfs.namenode.name.dir' => join ( ',', map { 'file://'.$_ } @{$self->{'hdfs_name_disks'}} ),
'dfs.datanode.data.dir' => join ( ',', map { 'file://'.$_ } @{$self->{'hdfs_data_disks'}} ),
# secondary namenode
'dfs.namenode.secondary.http-address' => $self->{'secondary'}.':50090',
'dfs.namenode.secondary.https-address'=> $self->{'secondary'}.':50091',
},
'yarn-site.xml' => {
'yarn.nodemanager.aux-services' => 'mapreduce_shuffle',
'yarn.nodemanager.aux-services.mapreduce.shuffle.class' => 'org.apache.hadoop.mapred.ShuffleHandler',
'yarn.web-proxy.address' => $self->{'proxynode'}.':'.$self->{'proxyport'},
},
'mapred-site.xml' => {
'mapreduce.framework.name' => 'yarn',
}
};
$self->_copyconf ( $config );
}
#
# directory management
#
sub _mkdir {
my ($self, %opts) = (@_);
my $mode = $opts{'mode'} || 0750;
my $u = umask (0);
map { mkdir $_, $mode; chdir $_; } split (/\//, $opts{'directory'});
}
sub create_hdfs_name_disks {
my ($self) = (@_);
foreach my $dir ( @{$self->{'hdfs_name_disks'}} ) {
print "creating ".$dir, "\n" if defined $self->{'debug'};
$self->_mkdir( directory => $dir );
}
}
sub create_hdfs_data_disks {
my ($self) = (@_);
foreach my $dir ( @{$self->{'hdfs_data_disks'}} ) {
print "creating ".$dir, "\n" if defined $self->{'debug'};
$self->_mkdir( directory => $dir );
}
}
sub create_hdfs_tmpdir {
my ($self) = (@_);
print "creating ".$self->{'hdfs_tmp'}, "\n" if defined $self->{'debug'};
$self->_mkdir( directory => $self->{'hdfs_tmp'}, mode => 01775 );
}
sub create_hadoop_logdir {
# $hadoop_install/tmp 1775
my ($self) = (@_);
foreach my $dir ( @{$self->{'hadoop_logs'}} ) {
print "creating ".$self->{'hadoop_install'}.$dir, "\n" if defined $self->{'debug'};
$self->_mkdir( directory => $self->{'hadoop_install'}.$dir, mode => 01775 );
}
}
# ends
#
# begin recommended settings
#
# get cpu core count
sub _get_cpu_cores {
my ($self) = (@_);
my $cpuinfo = '/proc/cpuinfo';
open CPU, $cpuinfo or die "Cannot open $cpuinfo, $!\n";
$self->{'sysinfo'}->{'cpu'} = scalar (map /^processor/, <CPU>);
close CPU;
}
lib/Apache/Hadoop/Config.pm view on Meta::CPAN
=head1 EXAMPLES
Below are a few examples of different uses. The first example is to create
recommended configurations for the localhost or command-line provided data:
#!/usr/bin/perl -w
use strict;
use warnings;
use Apache::Hadoop::Config;
use Getopt::Long;
my %opts;
GetOptions (\%opts, 'disks=s','memory=s','cores=s');
my $h = Apache::Hadoop::Config->new (
meminfo=>$opts{'memory'} || undef,
cpuinfo=>$opts{'cores'} || undef,
diskinfo=>$opts{'disks'} || undef,
);
# setup configs
$h->basic_config;
$h->memory_config;
# print and save
$h->print_config;
$h->write_config (confdir=>'.');
exit(0);
The above gives an output like below, if no argument is supplied:
min cont size (mb) : 256
num of containers : 7
mem per container (mb): 368
disk : 4
cpu : 4
mem : 3.52075958251953
---------------
hdfs-site.xml
dfs.namenode.secondary.http-address: 0.0.0.0:50090
dfs.replication: 1
dfs.datanode.data.dir: file:///hdfs/data1,file:///hdfs/data2,file:///hdfs/data3,file:///hdfs/data4
dfs.namenode.secondary.https-address: 0.0.0.0:50091
dfs.namenode.name.dir: file:///hdfs/name1,file:///hdfs/name2
yarn-site.xml
yarn.web-proxy.address: localhost:8888
yarn.nodemanager.aux-services: mapreduce_shuffle
yarn.scheduler.minimum-allocation-mb: 368
yarn.scheduler.maximum-allocation-mb: 2576
yarn.nodemanager.aux-services.mapreduce.shuffle.class: org.apache.hadoop.mapred.ShuffleHandler
yarn.nodemanager.resource.memory-mb: 2576
core-site.xml
hadoop.tmp.dir: /hdfs/tmp
fs.defaultFS: http://localhost:9000
mapred-site.xml
mapreduce.reduce.java.opts: -Xmx588m
mapreduce.map.memory.mb: 368
mapreduce.map.java.opts: -Xmx294m
mapreduce.framework.name: yarn
mapreduce.reduce.memory.mb: 736
---------------
-> writing to ./hdfs-site.xml ...
-> writing to ./yarn-site.xml ...
-> writing to ./core-site.xml ...
-> writing to ./mapred-site.xml ...
If supplied with some arguments, basically for a different clusters, the configuration files
can still be generated:
$ perl hadoop_config.pl --cores 16 --memory 64 --disks 6
min cont size (mb) : 2048
num of containers : 10
mem per container (mb): 5734
disk : 6
cpu : 16
mem : 64
---------------
hdfs-site.xml
dfs.namenode.secondary.http-address: 0.0.0.0:50090
dfs.replication: 1
dfs.datanode.data.dir: file:///hdfs/data1,file:///hdfs/data2,file:///hdfs/data3,file:///hdfs/data4
dfs.namenode.secondary.https-address: 0.0.0.0:50091
dfs.namenode.name.dir: file:///hdfs/name1,file:///hdfs/name2
yarn-site.xml
yarn.web-proxy.address: localhost:8888
yarn.nodemanager.aux-services: mapreduce_shuffle
yarn.scheduler.minimum-allocation-mb: 5734
yarn.scheduler.maximum-allocation-mb: 57340
yarn.nodemanager.aux-services.mapreduce.shuffle.class: org.apache.hadoop.mapred.ShuffleHandler
yarn.nodemanager.resource.memory-mb: 57340
core-site.xml
hadoop.tmp.dir: /hdfs/tmp
fs.defaultFS: http://localhost:9000
mapred-site.xml
mapreduce.reduce.java.opts: -Xmx9174m
mapreduce.map.memory.mb: 5734
mapreduce.map.java.opts: -Xmx4587m
mapreduce.framework.name: yarn
mapreduce.reduce.memory.mb: 11468
---------------
-> writing to ./hdfs-site.xml ...
-> writing to ./yarn-site.xml ...
-> writing to ./core-site.xml ...
-> writing to ./mapred-site.xml ...
Different customization can be done, using object's constructor arguments.
=head1 SEE ALSO
hadoop.apache.org - The Hadoop documentation and authoritative source for
Apache Hadoop and its components.
=head1 AUTHOR
Snehasis Sinha, E<lt>snehasis@cpan.orgE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2015 by Snehasis Sinha
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.
=cut
( run in 2.206 seconds using v1.01-cache-2.11-cpan-df04353d9ac )