Algorithm-ConstructDFA-XS

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

    See https://github.com/Perl-XS/notes/issues/7 for details.

0.17  March 2013
  - Fixes for `edges_from` support

0.15  March 2013
  - Initial support for `edges_from` parameter to accomodate
    input with labeled edges instead of labeled vertices

0.13  February 2014
  - Fixed bugs in handling of multiple start states

0.09  February 2014
  - added many_start option (documented in the Pure Perl version)

0.03  February 2014
	- Initial release

ConstructDFA.xs  view on Meta::CPAN

build_dfa(SV* accept_sv, AV* args) {

  typedef map<pair<StatesId, Label>, StatesId> Automaton;
  StatesBimap               m;
  VectorBasedSet<State>     sub_todo;

  // Input from Perl
  map<State, vector<State>> successors;
  map<State, bool>          nullable;
  map<State, Label>         label;
  map<size_t, States>       start_states;
  
  I32 args_len = av_len(args);

  for (int ix = 0; ix <= args_len; ++ix) {
    SV** current_svp = av_fetch(args, ix, 0);

    if (current_svp == NULL)
      croak("Bad arguments");

    SV* current_sv = (SV*)*current_svp;

    if (!( SvROK(current_sv) && SvTYPE(SvRV(current_sv)) == SVt_PVAV))
      croak("Bad arguments");

    AV* current_av = (AV*)SvRV(current_sv);

    // [vertex, label, nullable, in_start, successors...]

    if (av_len(current_av) < 3)
      croak("Bad arguments");

    SV** vertex_svp = av_fetch(current_av, 0, 0);
    SV** label_svp  = av_fetch(current_av, 1, 0);
    SV** null_svp   = av_fetch(current_av, 2, 0);
    SV** start_svp  = av_fetch(current_av, 3, 0);

    if (!(vertex_svp && label_svp && null_svp && start_svp))
      croak("Internal error");

    nullable[SvUV(*vertex_svp)] = SvTRUE(*null_svp);

    if (SvOK(*label_svp))
      label[SvUV(*vertex_svp)] = SvUV(*label_svp);


    if (!( SvROK(*start_svp) && SvTYPE(SvRV(*start_svp)) == SVt_PVAV))
      croak("Bad arguments");

    AV* start_av = (AV*)SvRV(*start_svp);
    I32 start_av_len = av_len(start_av);
    
    for (int k = 0; k <= start_av_len; ++k) {
      SV** item_svp  = av_fetch(start_av, k, 0);
      if (SvUV(*item_svp) == 0) {
        croak("Bad arguments (start vertex)");
      }
      start_states[SvUV(*item_svp)].insert(SvUV(*vertex_svp));
    }
    
    I32 current_av_len = av_len(current_av);

    for (int k = 4; k <= current_av_len; ++k) {
      SV** successor_svp = av_fetch(current_av, k, 0);
      
      if (!successor_svp)
        croak("Internal error");

ConstructDFA.xs  view on Meta::CPAN

  }

  VectorBasedSet<State>         sub_temp;
  set<StatesId>                 seen;
  list<StatesId>                todo;
  set<StatesId>                 final_states;
  Automaton                     automaton;
  map<StatesId, set<StatesId>>  predecessors;
  map<StatesId, bool>           accepting;
  
  for (auto s = start_states.begin(); s != start_states.end(); ++s) {
    States& start_state = s->second;

    sub_temp.clear();
  
    for (auto i = start_state.begin(); i != start_state.end(); ++i) {
      sub_temp.insert(*i);
    }

    add_all_reachable_and_self(sub_todo, sub_temp, nullable, successors);

    start_state.insert(sub_temp.elements.begin(), sub_temp.elements.end());

    auto startId = m.states_to_id(start_state);
    todo.push_front(startId);
  }

  while (!todo.empty()) {
    StatesId currentId = todo.front();
    todo.pop_front();

    if (seen.find(currentId) != seen.end()) {
      continue;
    }

ConstructDFA.xs  view on Meta::CPAN

  auto sinkId = m.states_to_id(sink);
  
  if (accepting.find(sinkId) == accepting.end()) {
    accepting[sinkId] = does_accept(accept_sv, m.id_to_states(sinkId));
  }

  seen.insert(sinkId);

  map<StatesId, size_t> state_map;
  state_map[sinkId] = 0;
  size_t state_next = 1 + start_states.size();
  
  map<StatesId, size_t> start_ix_to_state_map_id;
  
  for (auto s = start_states.begin(); s != start_states.end(); ++s) {
    auto startIx = s->first;
    auto state = s->second;
    auto startId = m.states_to_id(state);
    
    if (reachable.find(startId) == reachable.end()) {
      croak("start state %u unreachable", startIx);
    }
    
    if (state_map.find(startId) != state_map.end()) {
      // This happens when equivalent start states are passed to the
      // construction function.
    } else {
      state_map[startId] = startIx;
    }
  }

  // ...
  map<size_t, HV*> dfa;
  
  reachable.insert(sinkId);

  for (auto s = reachable.begin(); s != reachable.end(); ++s) {
    if (state_map.find(*s) == state_map.end()) {
      state_map[*s] = state_next++;
    }
  }

  map<size_t, StatesId> state_map_r;

  for (auto s = state_map.begin(); s != state_map.end(); ++s) {
    state_map_r[s->second] = s->first;
  }
  
  // If multiple start states are passed to the construction function and
  // they either are identical, or turn out to be equivalent once all the
  // epsilon-reachable states are added to them, mapping distinct states
  // to distinct numbers leaves out the duplicates. Since the API conven-
  // tion is that states 1..n in the generated DFA correspond to the 1..n
  // start state in the input, the duplicates have to be generated here.

  for (auto s = start_states.begin(); s != start_states.end(); ++s) {
    auto startIx = s->first;
    auto state = s->second;
    auto startId = m.states_to_id(state);
    state_map_r[startIx] = startId;
  }

  multimap<StatesId, HV*> id_to_hvs;
  
  for (auto s = state_map_r.begin(); s != state_map_r.end(); ++s) {
    
    HV* state_hv     = newHV();
    AV* combines_av  = newAV();
    SV* combines_rv  = newRV_noinc((SV*)combines_av);
    HV* next_over_hv = newHV();

lib/Algorithm/ConstructDFA/XS.pm  view on Meta::CPAN

require XSLoader;
XSLoader::load('Algorithm::ConstructDFA::XS', $VERSION);

sub construct_dfa_xs {
  my (%o) = @_;
  
  die unless ref $o{is_nullable};
  die unless ref $o{is_accepting} or exists $o{final};
  die unless ref $o{successors} or ref $o{edges_from};
  die unless ref $o{get_label} or ref $o{edges_from};
  die unless exists $o{start} or exists $o{many_start};
  die if ref $o{is_accepting} and exists $o{final};
  die if ref $o{successors} and exists $o{edges_from};
  die if ref $o{get_label} and ref $o{edges_from};
  
  my $class = 'Algorithm::ConstructDFA::XS::Synth';
  
  if (exists $o{edges_from}) {

    my $old_accepting = $o{is_accepting};
    $o{is_accepting} = sub {

lib/Algorithm/ConstructDFA/XS.pm  view on Meta::CPAN


  }

  if (exists $o{final}) {
    my %in_final = map { $_ => 1 } @{ $o{final} };
    $o{is_accepting} = sub {
      grep { $in_final{$_} } @_
    };
  }

  $o{many_start} //= [$o{start}];
  
  my $dfa = _construct_dfa_xs($o{many_start}, $o{get_label},
    $o{is_nullable}, $o{successors}, $o{is_accepting});
    
  if (exists $o{edges_from}) {
    for (values %$dfa) {
      $_->{Combines} = [ grep {
        ref $_ ne $class;
      } @{ $_->{Combines} } ];
    }
  }
  

lib/Algorithm/ConstructDFA/XS.pm  view on Meta::CPAN

}

sub _construct_dfa_xs {
  my ($roots, $labelf, $nullablef, $successorsf, $acceptingf) = @_;

  my @todo = map { @$_ } @$roots;
  my %seen;
  my @args;
  my $sm = Data::AutoBimap->new;
  my $rm = Data::AutoBimap->new;
  my %is_start;
  
  for (my $ix = 0; $ix < @$roots; ++$ix) {
    for my $v (@{ $roots->[$ix] }) {
      push @{ $is_start{$v} }, $ix + 1;
    }
  }
  
  while (@todo) {
    my $c = pop @todo;
    
    next if $seen{$c}++;
    
    my $is_nullable = !!$nullablef->($c);
    my $label = $labelf->($c);
    my $label_x = defined $label ? $rm->s2n($label) : undef;
    
    # [vertex, label, nullable, start, successors...]
    my @data = ($sm->s2n($c), $label_x, !!$is_nullable, $is_start{$c} // []);

    for ($successorsf->($c)) {
      push @data, $sm->s2n($_);
      push @todo, $_;
    }
    
    push @args, \@data;
  }

  my %h = _internal_construct_dfa_xs(sub {

t/02simple.t  view on Meta::CPAN

    for my $via (keys %{ $dfa->{$src}{NextOver} }) {
      my $dst = $dfa->{$src}{NextOver}{$via};
      push @{ $edges_from{$dst} }, [$src, $via];
    }
  }
  
  return construct_dfa_xs(
    is_nullable  => sub { 1 },
    is_accepting => sub { grep { $_ eq '1' } @_ },
    edges_from   => sub { @{ $edges_from{$_[0]} } },
    start        => [ grep { $dfa->{$_}{Accepts} } keys %$dfa ],
  );
}

for (1 .. 30) {
  my $g = Graph::Directed->random_graph(
    vertices   => int(rand(32)),
    edges_fill => 0.2
  );

  my %labels;
  my @vertices = $g->vertices;
  for my $v (@vertices) {
    next unless rand > 0.3;
    my $label = ['a', 'b', 'c']->[int rand 3];
    $g->set_vertex_attribute($v, 'label', $label);
  }
  
  my $start = [sort_by { scalar $g->successors($_) } @vertices]->[-1];
  
  next unless defined $start;
  
  my $final = [$g->all_successors($start), $start]
    ->[int rand(1 + scalar $g->all_successors($start))];
    
  next unless defined $final;
    
  my $dfa = construct_dfa_xs(
    is_nullable  => sub {
      not $g->has_vertex_attribute($_[0], 'label')
    },
    is_accepting => sub { grep { $_ eq $final } @_ },
    successors   => sub { $g->successors($_[0]) },
    get_label    => sub { $g->get_vertex_attribute($_[0], 'label') },
    start        => [ $start ],
  );
  
  if (rand > 0.5) {
    $dfa = construct_dfa_xs(
      is_nullable  => sub {
        return 1;
      },
      is_accepting => sub { grep { $dfa->{$_}{Accepts} } @_ },
      edges_from   => sub {
        my ($src) = @_;
        my @edges;
        for my $via (keys %{ $dfa->{$src}{NextOver} }) {
          my $dst = $dfa->{$src}{NextOver}{$via};
          push @edges, [$dst, $via];
        }
        return @edges;
      },
      start        => [ 1 ],
    );

  } elsif (rand > 0.5) {
    $dfa = revdet(revdet( $dfa ));
  }
  
  my $dfa_g = Graph::Directed->new;
  my $dfa_g_final = "final";
  for my $s (keys %$dfa) {
    for my $label (keys %{$dfa->{$s}{NextOver}}) {

t/02simple.t  view on Meta::CPAN

        last unless defined $s;
        push @path, $s;
      }
      unless ($path[-1] eq $dst) {
        splice @path, $#path, 1, $copy->SP_Dijkstra($path[-1], $dst);
      }
      return @path;
    }
  };

  for my $config ([$g, $start, $final, $dfa_g, 1, $dfa_g_final],
                  [$dfa_g, 1, $dfa_g_final, $g, $start, $final]
                  ) {

    my ($g1, $start, $final, $g2, $start2, $final2) = @$config;

    my $rnd;
    eval {
      $rnd = Graph::RandomPath->create_generator($g1, $start, $final);
    };
    next if $@;

    for (1 .. 4) {
      my @path =  $rnd->();
      
      my @word =
        map { $g1->get_vertex_attribute($_, 'label') } 
        grep { $g1->has_vertex_attribute($_, 'label') }
        @path[0 .. $#path - 1];

#      use YAML::XS;
#      warn Dump { path => \@path, word => join('/', @word),  };
        
      my @word_copy = @word;

      my @state = $start2;

      while (1) {
        my %seen;
        my @todo = @state;
        @state = ();
        while (@todo) {
          my $t = pop @todo;
          next if $seen{$t}++;
          push @state, $t;
          next if $g2->has_vertex_attribute($t, 'label');

t/03cmp.t  view on Meta::CPAN

  );

  my %labels;
  my @vertices = $g->vertices;
  for my $v (@vertices) {
    next unless rand > 0.3;
    my $label = ['a', 'b', 'c']->[int rand 3];
    $g->set_vertex_attribute($v, 'label', $label);
  }
  
  my $start = [sort_by { scalar $g->successors($_) } @vertices]->[-1];
  
  next unless defined $start;
  
  my $final = [$g->all_successors($start), $start]
    ->[int rand(1 + scalar $g->all_successors($start))];
    
  next unless defined $final;
    
  my $dfa_xs = construct_dfa_xs(
    is_nullable  => sub {
      not $g->has_vertex_attribute($_[0], 'label')
    },
    is_accepting => sub { grep { $_ eq $final } @_ },
    successors   => sub { $g->successors($_[0]) },
    get_label    => sub { $g->get_vertex_attribute($_[0], 'label') // '' },
    start        => [ $start ],
  );

  my $dfa_pp = construct_dfa(
    is_nullable  => sub {
      not $g->has_vertex_attribute($_[0], 'label')
    },
    is_accepting => sub { grep { $_ eq $final } @_ },
    successors   => sub { $g->successors($_[0]) },
    get_label    => sub { $g->get_vertex_attribute($_[0], 'label') // '' },
    start        => [ $start ],
  );

#  use YAML::XS;
#  print Dump $dfa_xs;
  
  my %pp = partition_by { join ' ', sort @{ $_->{Combines} } } values %$dfa_pp;
  my %xs = partition_by { join ' ', sort @{ $_->{Combines} } } values %$dfa_xs;
#  print join "\n", sort keys %pp;
#  print "###\n";
#  print join "\n", sort keys %xs;



( run in 0.255 second using v1.01-cache-2.11-cpan-0d8aa00de5b )