Algorithm-ConstructDFA
view release on metacpan or search on metacpan
lib/Algorithm/ConstructDFA.pm view on Meta::CPAN
);
local $Storable::canonical = 1;
sub _memoizess {
my ($sub) = @_;
my %cache;
return sub {
my ($s) = @_;
if (not exists $cache{$s}) {
$cache{$s} = $sub->($s);
}
return $cache{$s};
};
}
sub _get_graph {
my ($roots, $labelf, $nullablef, $successorsf, $acceptingf) = @_;
my $m = Data::AutoBimap->new();
my $label = _memoizess(sub { $labelf->($m->n2s($_[0])) });
my $successors = memoize(sub {
map { $m->s2n($_) } $successorsf->($m->n2s($_[0]))
});
my $accepting = sub {
!!$acceptingf->(map { $m->n2s($_) } @_)
};
my %nullable;
my $nullable = _memoizess(sub {
!!$nullablef->($m->n2s($_[0]));
});
my $all_reachable_and_self = _memoizess(sub {
my ($v) = @_;
my %seen;
my @todo = ($v);
while (@todo) {
my $c = pop @todo;
next if $seen{$c}++;
push @todo, $successors->($c) if $nullable->($c);
}
[keys %seen];
});
my $all_reachable_and_self_many = sub {
my %seen;
my @todo = (@_);
while (@todo) {
my $c = pop @todo;
next if $seen{$c}++;
push @todo, $successors->($c) if $nullable->($c);
}
keys %seen;
};
my $start = [
sort { $a cmp $b }
uniq map {
$nullable->($_) ? @{ $all_reachable_and_self->($_) } : $_
}
map { $m->s2n($_) }
@$roots
];
my $start_s = join ' ', @$start;
my @todo = ($start);
my %seen;
my $dfa;
my @accepting_dfa_states;
my %predecessors;
while (@todo) {
my $src = pop @todo;
my @src = @{ $src };
my $src_s = join ' ', @src;
next if $seen{$src_s}++;
my $src_accepts = $accepting->(@src);
push @accepting_dfa_states, $src_s if $src_accepts;
my %p = partition_by { $label->($_) }
grep { defined $label->($_) } @src;
while (my ($k, $v) = each %p) {
my @dst = sort { $a cmp $b } uniq
$all_reachable_and_self_many->(map { $successors->($_) } @$v);
push @todo, \@dst;
my $dst_s = join ' ', @dst;
$dfa->{$src_s}->{$k} = $dst_s;
$predecessors{$dst_s}->{$src_s}++;
}
}
my %reachable = do {
my %seen;
my @todo = @accepting_dfa_states;
while (@todo) {
my $c = pop @todo;
next if $seen{$c}++;
push @todo, keys %{ $predecessors{$c} };
}
map { $_ => 1 } keys %seen;
};
my $o = Data::AutoBimap->new(start => 0);
# Ensure that DFA state 0 is the one that corresponds to no
# vertices in the input graph. This is an API convention and
# does not have significance beyond that.
my $r = { $o->s2n('') => {
Combines => [],
Accepts => $accepting->()
} };
# Ensure start state is 1 also as a convention
$o->s2n($start_s);
while (my ($src, $x) = each %$dfa) {
# Merge dead states
$src = '' unless $reachable{$src};
my @src_combines = map { $m->n2s($_) } split/ /, $src;
$r->{$o->s2n($src)}{Combines} //= \@src_combines;
$r->{$o->s2n($src)}{Combines} = [ sort { $a cmp $b }
uniq (@{$r->{$o->s2n($src)}{Combines} // []}, @src_combines) ]
if $src eq '';
$r->{$o->s2n($src)}{Accepts} //=
0 + $accepting->(split/ /, $src);
while (my ($k, $dst) = each %{$x}) {
$dst = '' unless $reachable{$dst};
$r->{$o->s2n($src)}{NextOver}{$k} = $o->s2n($dst);
if ((not defined $r->{$o->s2n($dst)}{Combines}) or $dst eq '') {
my @dst_combines = map { $m->n2s($_) } split/ /, $dst;
$r->{$o->s2n($dst)}{Combines} //= \@dst_combines;
$r->{$o->s2n($dst)}{Combines} = [ sort { $a cmp $b }
uniq (@{$r->{$o->s2n($dst)}{Combines} // []}, @dst_combines) ]
if $dst eq '';
}
$r->{$o->s2n($dst)}{Accepts} //=
0 + $accepting->(split/ /, $dst);
}
}
return $r;
}
sub construct_dfa {
my (%o) = @_;
die unless ref $o{is_nullable};
die unless ref $o{is_accepting} or exists $o{final};
die unless ref $o{successors};
die unless ref $o{get_label};
die unless exists $o{start};
die if ref $o{is_accepting} and exists $o{final};
if (exists $o{final}) {
my %in_final = map { $_ => 1 } @{ $o{final} };
$o{is_accepting} = sub {
grep { $in_final{$_} } @_
};
}
_get_graph($o{start}, $o{get_label}, $o{is_nullable},
$o{successors}, $o{is_accepting});
}
1;
__END__
=head1 NAME
Algorithm::ConstructDFA - Deterministic finite automaton construction
=head1 SYNOPSIS
use Algorithm::ConstructDFA;
my $dfa = construct_dfa(
start => [ $start_vertex ],
is_accepting => sub { grep { $_ eq $final_vertex } @_ },
is_nullable => sub {
$g->has_vertex_attribute($_[0], 'label')
},
successors => sub { $g->successors($_[0]) },
get_label => sub {
$g->get_vertex_attribute($_[0], 'label')
},
);
=head1 DESCRIPTION
This module provides a generic deterministic finite automaton
construction function. The input model is a graph with possibly
labeled (usually with "non-terminals") vertices. Edges in the
graph are always unlabeled.
=head1 FUNCTIONS
=over
=item construct_dfa(%options)
Construct a DFA using the given options.
=over
=item start
An array of start states for the initial configuration of the
automaton.
=item final
An array of final accepting states. This can be used instead
of specifying a subroutine in C<is_accepting>.
=item is_accepting
A subroutine returning a boolean indicating whether this is an
accepting final state of the automaton. It is passed all the
vertices the states combines. For single-vertex acceptance, it
would usually C<grep> over the arguments. Having access to all
the states of the input automaton allows more complex acceptance
conditions (e.g. to compute the intersection of two graphs).
=item is_nullable
A subroutine returning a boolean indicating whether the automaton
can move past the supplied state without consuming any input.
=item successors
A subroutine that returns a list of all immediate successors of
the given vertex.
=item get_label
A subroutine that returns a caller-defined string representing what
kind of input is expected to move past the supplied vertex. Can also
be C<undef> for vertices without label.
=back
The function returns the DFA as hash reference with integer keys. The
key C<0> is a non-accepting state with no transitions to other states
(the automaton would go into this state if the match has failed). The
key C<1> is the start state. The value of each entry is another hash
reference. As an example:
'1':
Accepts: 1
Combines:
- 0
- 1
- 2
NextOver:
a: 0
b: 1
The C<Accepts> key indicates whether this is an accepting state. The
C<Combines> key provides access to the list of states in the input
automaton this DFA state corresponds to. The C<NextOver> field is the
transition table out of this state. This automaton matches any sequence
of zero or more C<b>s. The alphabet also includes the label C<a> but
the automaton moves from the start state over the label C<a> to the
non-accepting sink state C<0> and would never enter an accepting state
after that.
An exception to the rule above is when C<is_accepting> returns a true
value when passed no arguments (i.e., the automaton accepts when it is
in none of the states in the input automaton). Then state C<0> is made
an accepting state (and combines states from which the final vertex is
unreachable as before). This can be useful to compute complement graphs.
=back
=head1 EXPORTS
The functions C<construct_dfa> and C<construct_dfa_as_graph> by default.
=head1 AUTHOR / COPYRIGHT / LICENSE
Copyright (c) 2014 Bjoern Hoehrmann <bjoern@hoehrmann.de>.
This module is licensed under the same terms as Perl itself.
=cut
( run in 1.984 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )