Algorithm-ConstructDFA-XS
view release on metacpan or search on metacpan
See https://github.com/Perl-XS/notes/issues/7 for details.
0.17 March 2013
- Fixes for `edges_from` support
0.15 March 2013
- Initial support for `edges_from` parameter to accomodate
input with labeled edges instead of labeled vertices
0.13 February 2014
- Fixed bugs in handling of multiple start states
0.09 February 2014
- added many_start option (documented in the Pure Perl version)
0.03 February 2014
- Initial release
ConstructDFA.xs view on Meta::CPAN
build_dfa(SV* accept_sv, AV* args) {
typedef map<pair<StatesId, Label>, StatesId> Automaton;
StatesBimap m;
VectorBasedSet<State> sub_todo;
// Input from Perl
map<State, vector<State>> successors;
map<State, bool> nullable;
map<State, Label> label;
map<size_t, States> start_states;
I32 args_len = av_len(args);
for (int ix = 0; ix <= args_len; ++ix) {
SV** current_svp = av_fetch(args, ix, 0);
if (current_svp == NULL)
croak("Bad arguments");
SV* current_sv = (SV*)*current_svp;
if (!( SvROK(current_sv) && SvTYPE(SvRV(current_sv)) == SVt_PVAV))
croak("Bad arguments");
AV* current_av = (AV*)SvRV(current_sv);
// [vertex, label, nullable, in_start, successors...]
if (av_len(current_av) < 3)
croak("Bad arguments");
SV** vertex_svp = av_fetch(current_av, 0, 0);
SV** label_svp = av_fetch(current_av, 1, 0);
SV** null_svp = av_fetch(current_av, 2, 0);
SV** start_svp = av_fetch(current_av, 3, 0);
if (!(vertex_svp && label_svp && null_svp && start_svp))
croak("Internal error");
nullable[SvUV(*vertex_svp)] = SvTRUE(*null_svp);
if (SvOK(*label_svp))
label[SvUV(*vertex_svp)] = SvUV(*label_svp);
if (!( SvROK(*start_svp) && SvTYPE(SvRV(*start_svp)) == SVt_PVAV))
croak("Bad arguments");
AV* start_av = (AV*)SvRV(*start_svp);
I32 start_av_len = av_len(start_av);
for (int k = 0; k <= start_av_len; ++k) {
SV** item_svp = av_fetch(start_av, k, 0);
if (SvUV(*item_svp) == 0) {
croak("Bad arguments (start vertex)");
}
start_states[SvUV(*item_svp)].insert(SvUV(*vertex_svp));
}
I32 current_av_len = av_len(current_av);
for (int k = 4; k <= current_av_len; ++k) {
SV** successor_svp = av_fetch(current_av, k, 0);
if (!successor_svp)
croak("Internal error");
ConstructDFA.xs view on Meta::CPAN
}
VectorBasedSet<State> sub_temp;
set<StatesId> seen;
list<StatesId> todo;
set<StatesId> final_states;
Automaton automaton;
map<StatesId, set<StatesId>> predecessors;
map<StatesId, bool> accepting;
for (auto s = start_states.begin(); s != start_states.end(); ++s) {
States& start_state = s->second;
sub_temp.clear();
for (auto i = start_state.begin(); i != start_state.end(); ++i) {
sub_temp.insert(*i);
}
add_all_reachable_and_self(sub_todo, sub_temp, nullable, successors);
start_state.insert(sub_temp.elements.begin(), sub_temp.elements.end());
auto startId = m.states_to_id(start_state);
todo.push_front(startId);
}
while (!todo.empty()) {
StatesId currentId = todo.front();
todo.pop_front();
if (seen.find(currentId) != seen.end()) {
continue;
}
ConstructDFA.xs view on Meta::CPAN
auto sinkId = m.states_to_id(sink);
if (accepting.find(sinkId) == accepting.end()) {
accepting[sinkId] = does_accept(accept_sv, m.id_to_states(sinkId));
}
seen.insert(sinkId);
map<StatesId, size_t> state_map;
state_map[sinkId] = 0;
size_t state_next = 1 + start_states.size();
map<StatesId, size_t> start_ix_to_state_map_id;
for (auto s = start_states.begin(); s != start_states.end(); ++s) {
auto startIx = s->first;
auto state = s->second;
auto startId = m.states_to_id(state);
if (reachable.find(startId) == reachable.end()) {
croak("start state %u unreachable", startIx);
}
if (state_map.find(startId) != state_map.end()) {
// This happens when equivalent start states are passed to the
// construction function.
} else {
state_map[startId] = startIx;
}
}
// ...
map<size_t, HV*> dfa;
reachable.insert(sinkId);
for (auto s = reachable.begin(); s != reachable.end(); ++s) {
if (state_map.find(*s) == state_map.end()) {
state_map[*s] = state_next++;
}
}
map<size_t, StatesId> state_map_r;
for (auto s = state_map.begin(); s != state_map.end(); ++s) {
state_map_r[s->second] = s->first;
}
// If multiple start states are passed to the construction function and
// they either are identical, or turn out to be equivalent once all the
// epsilon-reachable states are added to them, mapping distinct states
// to distinct numbers leaves out the duplicates. Since the API conven-
// tion is that states 1..n in the generated DFA correspond to the 1..n
// start state in the input, the duplicates have to be generated here.
for (auto s = start_states.begin(); s != start_states.end(); ++s) {
auto startIx = s->first;
auto state = s->second;
auto startId = m.states_to_id(state);
state_map_r[startIx] = startId;
}
multimap<StatesId, HV*> id_to_hvs;
for (auto s = state_map_r.begin(); s != state_map_r.end(); ++s) {
HV* state_hv = newHV();
AV* combines_av = newAV();
SV* combines_rv = newRV_noinc((SV*)combines_av);
HV* next_over_hv = newHV();
lib/Algorithm/ConstructDFA/XS.pm view on Meta::CPAN
require XSLoader;
XSLoader::load('Algorithm::ConstructDFA::XS', $VERSION);
sub construct_dfa_xs {
my (%o) = @_;
die unless ref $o{is_nullable};
die unless ref $o{is_accepting} or exists $o{final};
die unless ref $o{successors} or ref $o{edges_from};
die unless ref $o{get_label} or ref $o{edges_from};
die unless exists $o{start} or exists $o{many_start};
die if ref $o{is_accepting} and exists $o{final};
die if ref $o{successors} and exists $o{edges_from};
die if ref $o{get_label} and ref $o{edges_from};
my $class = 'Algorithm::ConstructDFA::XS::Synth';
if (exists $o{edges_from}) {
my $old_accepting = $o{is_accepting};
$o{is_accepting} = sub {
lib/Algorithm/ConstructDFA/XS.pm view on Meta::CPAN
}
if (exists $o{final}) {
my %in_final = map { $_ => 1 } @{ $o{final} };
$o{is_accepting} = sub {
grep { $in_final{$_} } @_
};
}
$o{many_start} //= [$o{start}];
my $dfa = _construct_dfa_xs($o{many_start}, $o{get_label},
$o{is_nullable}, $o{successors}, $o{is_accepting});
if (exists $o{edges_from}) {
for (values %$dfa) {
$_->{Combines} = [ grep {
ref $_ ne $class;
} @{ $_->{Combines} } ];
}
}
lib/Algorithm/ConstructDFA/XS.pm view on Meta::CPAN
}
sub _construct_dfa_xs {
my ($roots, $labelf, $nullablef, $successorsf, $acceptingf) = @_;
my @todo = map { @$_ } @$roots;
my %seen;
my @args;
my $sm = Data::AutoBimap->new;
my $rm = Data::AutoBimap->new;
my %is_start;
for (my $ix = 0; $ix < @$roots; ++$ix) {
for my $v (@{ $roots->[$ix] }) {
push @{ $is_start{$v} }, $ix + 1;
}
}
while (@todo) {
my $c = pop @todo;
next if $seen{$c}++;
my $is_nullable = !!$nullablef->($c);
my $label = $labelf->($c);
my $label_x = defined $label ? $rm->s2n($label) : undef;
# [vertex, label, nullable, start, successors...]
my @data = ($sm->s2n($c), $label_x, !!$is_nullable, $is_start{$c} // []);
for ($successorsf->($c)) {
push @data, $sm->s2n($_);
push @todo, $_;
}
push @args, \@data;
}
my %h = _internal_construct_dfa_xs(sub {
t/02simple.t view on Meta::CPAN
for my $via (keys %{ $dfa->{$src}{NextOver} }) {
my $dst = $dfa->{$src}{NextOver}{$via};
push @{ $edges_from{$dst} }, [$src, $via];
}
}
return construct_dfa_xs(
is_nullable => sub { 1 },
is_accepting => sub { grep { $_ eq '1' } @_ },
edges_from => sub { @{ $edges_from{$_[0]} } },
start => [ grep { $dfa->{$_}{Accepts} } keys %$dfa ],
);
}
for (1 .. 30) {
my $g = Graph::Directed->random_graph(
vertices => int(rand(32)),
edges_fill => 0.2
);
my %labels;
my @vertices = $g->vertices;
for my $v (@vertices) {
next unless rand > 0.3;
my $label = ['a', 'b', 'c']->[int rand 3];
$g->set_vertex_attribute($v, 'label', $label);
}
my $start = [sort_by { scalar $g->successors($_) } @vertices]->[-1];
next unless defined $start;
my $final = [$g->all_successors($start), $start]
->[int rand(1 + scalar $g->all_successors($start))];
next unless defined $final;
my $dfa = construct_dfa_xs(
is_nullable => sub {
not $g->has_vertex_attribute($_[0], 'label')
},
is_accepting => sub { grep { $_ eq $final } @_ },
successors => sub { $g->successors($_[0]) },
get_label => sub { $g->get_vertex_attribute($_[0], 'label') },
start => [ $start ],
);
if (rand > 0.5) {
$dfa = construct_dfa_xs(
is_nullable => sub {
return 1;
},
is_accepting => sub { grep { $dfa->{$_}{Accepts} } @_ },
edges_from => sub {
my ($src) = @_;
my @edges;
for my $via (keys %{ $dfa->{$src}{NextOver} }) {
my $dst = $dfa->{$src}{NextOver}{$via};
push @edges, [$dst, $via];
}
return @edges;
},
start => [ 1 ],
);
} elsif (rand > 0.5) {
$dfa = revdet(revdet( $dfa ));
}
my $dfa_g = Graph::Directed->new;
my $dfa_g_final = "final";
for my $s (keys %$dfa) {
for my $label (keys %{$dfa->{$s}{NextOver}}) {
t/02simple.t view on Meta::CPAN
last unless defined $s;
push @path, $s;
}
unless ($path[-1] eq $dst) {
splice @path, $#path, 1, $copy->SP_Dijkstra($path[-1], $dst);
}
return @path;
}
};
for my $config ([$g, $start, $final, $dfa_g, 1, $dfa_g_final],
[$dfa_g, 1, $dfa_g_final, $g, $start, $final]
) {
my ($g1, $start, $final, $g2, $start2, $final2) = @$config;
my $rnd;
eval {
$rnd = Graph::RandomPath->create_generator($g1, $start, $final);
};
next if $@;
for (1 .. 4) {
my @path = $rnd->();
my @word =
map { $g1->get_vertex_attribute($_, 'label') }
grep { $g1->has_vertex_attribute($_, 'label') }
@path[0 .. $#path - 1];
# use YAML::XS;
# warn Dump { path => \@path, word => join('/', @word), };
my @word_copy = @word;
my @state = $start2;
while (1) {
my %seen;
my @todo = @state;
@state = ();
while (@todo) {
my $t = pop @todo;
next if $seen{$t}++;
push @state, $t;
next if $g2->has_vertex_attribute($t, 'label');
);
my %labels;
my @vertices = $g->vertices;
for my $v (@vertices) {
next unless rand > 0.3;
my $label = ['a', 'b', 'c']->[int rand 3];
$g->set_vertex_attribute($v, 'label', $label);
}
my $start = [sort_by { scalar $g->successors($_) } @vertices]->[-1];
next unless defined $start;
my $final = [$g->all_successors($start), $start]
->[int rand(1 + scalar $g->all_successors($start))];
next unless defined $final;
my $dfa_xs = construct_dfa_xs(
is_nullable => sub {
not $g->has_vertex_attribute($_[0], 'label')
},
is_accepting => sub { grep { $_ eq $final } @_ },
successors => sub { $g->successors($_[0]) },
get_label => sub { $g->get_vertex_attribute($_[0], 'label') // '' },
start => [ $start ],
);
my $dfa_pp = construct_dfa(
is_nullable => sub {
not $g->has_vertex_attribute($_[0], 'label')
},
is_accepting => sub { grep { $_ eq $final } @_ },
successors => sub { $g->successors($_[0]) },
get_label => sub { $g->get_vertex_attribute($_[0], 'label') // '' },
start => [ $start ],
);
# use YAML::XS;
# print Dump $dfa_xs;
my %pp = partition_by { join ' ', sort @{ $_->{Combines} } } values %$dfa_pp;
my %xs = partition_by { join ' ', sort @{ $_->{Combines} } } values %$dfa_xs;
# print join "\n", sort keys %pp;
# print "###\n";
# print join "\n", sort keys %xs;
( run in 0.264 second using v1.01-cache-2.11-cpan-0d8aa00de5b )