App-Oozie

 view release on metacpan or  search on metacpan

lib/App/Oozie/Deploy/Validate/DAG/Workflow.pm  view on Meta::CPAN


use App::Oozie::Constants qw( EMPTY_STRING );
use App::Oozie::Deploy::Validate::DAG::Vertex;

use Carp ();
use Graph::Directed;
use Moo;
use Ref::Util       qw( is_hashref );
use Types::Standard qw( HashRef    );
use XML::LibXML;

with qw(
    App::Oozie::Role::Log
);

has node_types => (
    is      => 'ro',
    isa     => HashRef,
    default => sub {
        +{
            action   => { to => [ 'ok.to', 'error.to' ] },
            decision => { to => [ 'switch/case.to', 'switch/default.to' ] },
            end      => {},
            fork     => { to => 'path.start' },
            join     => { to => 'to' },
            kill     => {},
            start    => { vname => 'start', to => 'to' },
        }
    },
);

has current_graph => (
    is      => 'rw',
    default => sub {},
);

has current_nodes => (
    is      => 'rw',
    default => sub {},
);

has current_vertices => (
    is      => 'rw',
    default => sub {},
);

has graph_filename => (
    is      => 'rw',
    default => sub { 'graph.png' },
);

has _vertex_lookup => (
    is      => 'ro',
    isa     => HashRef,
    default => sub { {} },
);

sub assert {
    my $self = shift;
    my $file = shift;
    my @errors = $self->validate( $file );

    if ( @errors ) {
        $self->logger->fatal( 'Some errors were encountered.' );
        for my $error (@errors) {
            $self->logger->fatal( $error->[0] );
            $self->logger->fatal( $error->[1] );
        }
        die "Errors found, aborting.\n";
    }

    $self->logger->info( "$file validated OK" );

    return;
}

sub validate {
    my $self = shift;
    my $file = shift || die 'No file was specified!';

    state $is_end_or_kill = { map { $_ => 1 } qw( end kill ) };

    $self->logger->info( "DAG validation for $file" );

    if ( ! -e $file ) {
        die sprintf 'File %s does not exist', $file;
    }

    my $xml  = XML::LibXML->load_xml( location => $file );
    my $root = $xml->getDocumentElement;

    my($all_nodes, $all_vertices) = $self->_descend( $root );

    my $g = Graph::Directed->new( refvertexed_stringified => 1 );

    my @errors;
    for my $v ( values %{ $all_vertices } ) {
        for my $to ( @{ $v->{data}{to} || [] } ) {
            if ( !$all_vertices->{$to} ) {
                push @errors,
                    [
                        sprintf(
                            q{vertex `%s` has an edge to `%s`, which doesn't exist},
                                $v,
                                $to,
                        ),
                        'nodes cannot reference nodes that do not exist in the workflow',
                    ];
                next;
            }
            $g->add_edge( $v, $all_vertices->{$to} );
        }
    }

    if ( ! $g->is_dag ) {
        push @errors,
            [
                'graph is not a DAG',
                q{an oozie workflow should always be a directed acyclic graph; why this one isn't can probably be found in the next errors},
            ];
    }

    if ( my @cycle = $g->find_a_cycle ) {
        push @errors,
            [
                sprintf(
                    'at least one cycle found: %s',
                        join( ' -> ', @cycle )
                ),
                'since an oozie workflow is a DAG, there should be no cycles (loops)'
            ];
        }

    if ( !$g->is_weakly_connected ) {
        push @errors,
            [
                'graph is not fully connected',

lib/App/Oozie/Deploy/Validate/DAG/Workflow.pm  view on Meta::CPAN

sub _dump_perl {
    my $self      = shift;
    my $g         = $self->current_graph || die 'current_graph is not set!';
    my $all_nodes = $self->current_nodes || die 'current_nodes is not set!';

    my $debug = {
        nodes => $all_nodes,
        graph => $g,
    };

    require Data::Dumper;
    my $d = Data::Dumper->new([ $debug ], [ 'graph' ]);
    $self->logger->debug( $d->Dump );

    return;
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

App::Oozie::Deploy::Validate::DAG::Workflow

=head1 VERSION

version 0.020

=head1 SYNOPSIS

=head1 DESCRIPTION

Used by Oozie deploy tool to prevent mistakes before submission. 
Checks the workflow is a properly formed DAG.

=head1 NAME

App::Oozie::Deploy::Validate::DAG::Workflow - Part of the Oozie workflow DAG validator.

=head1 Methods

=head2 assert

=head2 current_graph

=head2 current_nodes

=head2 current_vertices

=head2 dump_graph

=head2 graph_filename

=head2 node_types

=head2 validate

=head1 Possible Extensions

    sub _dump_graphviz {
        my $self = shift;
        my $g    = $self->current_graph || die "current_graph is not set!";
        my $file = $self->graph_filename;

        require Graph::Writer::GraphViz;

        Graph::Writer::GraphViz->new(
            -edge_color => 1,
            -fontsize   => 8,
            -format     => 'png',
            -layout     => 'twopi',
            -node_color => 2,
            -ranksep    => 1.5,
        )->write_graph( $g, $file );

        $self->logger->info( "$file is created." );

        return;
    }

    sub _dump_d3 {
        my $self = shift;
        my $g    = $self->current_graph || die "current_graph is not set!";
        require Graph::D3;
        my $d3 = Graph::D3->new(
                    graph => $g,
                    type  => 'json',
                );
        print $d3->force_directed_graph;
    }

=head1 SEE ALSO

L<App::Oozie>.

=head1 AUTHORS

=over 4

=item *

David Morel

=item *

Burak Gursoy

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2023 by Booking.com.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.



( run in 0.745 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )