Net-Hadoop-Oozie

 view release on metacpan or  search on metacpan

lib/Net/Hadoop/Oozie.pm  view on Meta::CPAN

    lazy    => 1,
);

has expand_xml_conf => (
    is      => 'rw',
    default => sub { 0 },
);

has shortcircuit_via_callback => (
    is      => 'rw',
    default => sub { 0 },
);

#------------------------------------------------------------------------------#

# API

sub admin {
    my $self     = shift;
    my $endpoint = shift || confess "No endpoint specified for admin";
    my $valid    = $RE_VALID_ENDPOINT->{ $self->api_version };
    my $ep       = "admin/$endpoint";

    if ( $ep !~ $valid ) {
        confess sprintf '%s is not a valid admin endpoint!', $endpoint;
    }

    return $self->agent_request( $self->_make_full_uri( $ep ) );
}

sub kerberos_enabled {
    # All relevant config keys:
    #
    # oozie.authentication.kerberos.keytab
    # oozie.authentication.kerberos.name.rules
    # oozie.authentication.kerberos.principal
    # oozie.authentication.type
    # oozie.server.authentication.type
    # oozie.service.HadoopAccessorService.kerberos.enabled
    # oozie.service.HadoopAccessorService.kerberos.principal
    # oozie.service.HadoopAccessorService.keytab.file
    #

    state $krb_key = 'oozie.service.HadoopAccessorService.kerberos.enabled';
    my $self = shift;
    my $conf = $self->admin('configuration')
                    || confess "Failed to collect admin/configuration";
    my $krb_val = $conf->{ $krb_key } || return;
    return $krb_val eq 'true';
}

sub build_version {
    my $self = shift;
    my $version = $self->admin("build-version")->{buildVersion};
    return $version;
}

sub oozie_version {
    my $self = shift;
    my $build = $self->build_version;
    my($v) = split m{ [-] }xms, $build, 2;
    return $v;
}

sub max_node_name_len {
    my $self    = shift;
    my $version = $self->oozie_version;

    # A simple grep in oozie.git shows that it was always set to "50"
    # up until v4.3.0. So, no need to check any older version for even
    # lower limits.

    return $version ge '4.3.0' ? 128 : 50;
}

# Takes a hash[ref] for the options

sub jobs {
    my $self = shift->clone; # as we are clobbering lots of attributes

    my $options = @_ > 1 ? {@_} : ($_[0] || {});

    # TODO: this is a broken logic!
    #
    for (qw(len offset jobtype)) {
        $self->$_($options->{$_}) if defined $options->{$_};
    }

    # TODO: rework this, logic makes no sense. Filter should have a default and
    # be overridable in a flexible manner
    $self->filter(
        $options->{filter}
        || $self->filter
        || { status => "RUNNING" }
    ); # maybe merge instead?

    my $jobs = $self->agent_request( $self->_make_full_uri('jobs') );

    $self->_expand_meta_data($jobs); # make this optional given the horrible implementation?

    return $jobs;
}

# IMPORTANT ! FIXME ?
#
# when querying a coordinator, the actions field will contain action details,
# in execution order. Since the defaults are offset 1 and len 50, for most
# coordinators this information will be useless. the proper way of querying
# would then be (to obtain the last 50 actions):
#
#  my $details = Net::Hadoop::Oozie->new({ len => 1 })->job( $coordJobId );
#  my $total_actions = $details->{total};
#  my $offset = $details->{total} - 49;
#  $offset = 1 if $offset < 1;
#  $details = Net::Hadoop::Oozie->new({ len => 50, offset => $offset })->job( $coordJobId );
#
#  NOTE: this should be fixed in oozie 4, which has an 'order' (asc by default, can be desc) parameter

sub job {
    my $self = shift->clone; # as we are clobbering lots of attributes
    my $id = shift || confess "No job id specified";



( run in 0.653 second using v1.01-cache-2.11-cpan-71847e10f99 )