Algorithm-ConstructDFA2

 view release on metacpan or  search on metacpan

lib/Algorithm/ConstructDFA2.pm  view on Meta::CPAN

  $self->_dbh->commit();
  return $state_id;
}

sub _vertex_str_from_partial_list {
  my ($self, @vertices) = @_;

  return $self->_vertex_str_from_vertices() unless @vertices;

  my $escaped_roots = join ", ", map {
    $self->_dbh->quote($_)
  } @vertices;

  my ($vertex_str) = $self->_dbh->selectrow_array(qq{
    SELECT _canonical(json_group_array(closure.e_reachable))
    FROM Closure
    WHERE root IN ($escaped_roots)
  });

  return $vertex_str;
}

sub find_or_create_state_id {
  my ($self, @vertices) = @_;

  my $vertex_str = _vertex_str_from_partial_list($self, @vertices);

  return _find_or_create_state_from_vertex_str($self, $vertex_str);
}

sub vertices_in_state {
  my ($self, $state_id) = @_;

  return map { @$_ } $self->_dbh->selectall_array(q{
    SELECT vertex FROM Configuration WHERE state = ?
  }, {}, $state_id);
}

sub cleanup_dead_states {
  my ($self, $vertices_accept) = @_;

  $self->_dbh->sqlite_create_function( '_vertices_accept', 1, sub {
    my @vertices = $self->_vertex_str_to_vertices(@_);
    return !! $vertices_accept->(@vertices);
  });

  $self->_dbh->begin_work();

  $self->_dbh->do(q{
    CREATE TEMPORARY TABLE accepting AS
    SELECT state_id AS state
    FROM State
    WHERE _vertices_accept(vertex_str)+0 = 1
  });

  my @accepting = map { @$_ } $self->_dbh->selectall_array(q{
    SELECT state FROM accepting
  });

  # NOTE: this also renames states in transitions involving
  # possible start states, but they would then simply have no
  # transitions, which should be fine.

  $self->_dbh->do(q{
    WITH RECURSIVE all_living(state) AS (
      SELECT state FROM accepting
      
      UNION
      
      SELECT src AS state
      FROM Transition
        INNER JOIN all_living
          ON (Transition.dst = all_living.state)
    )
    UPDATE Transition
    SET dst = ?
    WHERE dst NOT IN (SELECT state FROM all_living)
  }, {}, $self->dead_state_id);

  $self->_dbh->do(q{
    DROP TABLE accepting;
  });

  $self->_dbh->commit();

  # TODO: is there a better way to drop the function?
  $self->_dbh->sqlite_create_function( '_vertices_accept', 1, undef );

  return @accepting;
}

sub compute_some_transitions {
  my ($self, $limit) = @_;

  $limit //= 1_000;

  my $sth = $self->_dbh->prepare_cached(q{
    SELECT
        s.state_id AS src 
      , i.value AS input
      , _canonical(json_group_array(closure.e_reachable))
          AS dst_vertex_str
    FROM 
      state s 
      CROSS JOIN input i
      LEFT JOIN configuration c
        ON (s.state_id = c.state)
      LEFT JOIN match m
        ON (m.vertex = c.vertex AND m.input = i.value)
      LEFT JOIN edge
        ON (m.vertex = edge.src)
      LEFT JOIN closure
        ON (edge.dst = closure.root)
      LEFT JOIN transition t
        ON (t.src = s.state_id AND t.input = i.value)
    WHERE
      t.dst IS NULL
    GROUP BY
      s.state_id, i.rowid
    ORDER BY
      s.state_id, i.rowid

lib/Algorithm/ConstructDFA2.pm  view on Meta::CPAN


sub transitions_as_5tuples {
  my ($self) = @_;

  return $self->_dbh->selectall_array(q{
    SELECT * FROM view_transitions_as_5tuples
  });
}

sub backup_to_file {
  my ($self, $schema_version, $file) = @_;
  die unless $schema_version eq 'v0';
  $self->_dbh->sqlite_backup_to_file($file);
}

# sub backup_to_dbh {
#   my ($self, $schema_version) = @_;
# 
#   die unless $schema_version eq 'v0';
# 
#   require File::Temp;
# 
#   my ($fh, $filename) = File::Temp::tempfile();
# 
#   $self->_dbh->sqlite_backup_to_file($filename);
# 
#   my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:');
# 
#   $dbh->sqlite_backup_from_file($filename);
# 
#   File::Temp::unlink0($fh, $filename);
# 
#   undef $fh;
# 
#   return $dbh;
# }

1;

__END__

=head1 NAME

Algorithm::ConstructDFA2 - Deterministic finite automaton construction

=head1 SYNOPSIS

  use Algorithm::ConstructDFA2;

  my $dfa = Algorithm::ConstructDFA2->new(
    input_alphabet     => [ @symbols ],
    input_vertices     => [ qw/ 2 3 4 / ],
    input_edges        => [ [ 2, 3 ], [ 3, 4 ] ],

    vertex_nullable    => sub($vertex)         { ... },
    vertex_matches     => sub($vertex, $input) { ... },

    storage_dsn        => 'dbi:SQLite:dbname=...',
  );

  my $start_id = $dfa->find_or_create_state_id(qw/ 2 /);

  while (my $count = $dfa->compute_some_transitions(1_000)) {
    ...
  }

  my @accepting = $dfa->cleanup_dead_states(sub(@vertices) {
    ...
  });

=head1 DESCRIPTION

This module computes deterministic finite automata from equivalent
non-deterministic finite automata. The input NFA must be expressed
as directed graph with labeled vertices. Vertex labels indicate if
vertices match a particular terminal symbol from an input alphabet,
or match the empty string, meaning they can be crossed without any
input when matching a string.

This is slightly different from how NFA graphs are usually encoded
in literature (as graph with labeled edges), but the conversion is
straightforward (turn edges into additional vertices). Finding a
suitable alphabet is more difficult, L<Set::IntSpan::Partition> can
help with that (the module splits sets of sets of terminals like
"letters" and "digits" and "hexdigits" into non-overlapping sets,
each of which can then be used as a terminal for this module).

DFAs can be exponentially larger than equivalent NFAs; to accomodate
large or complicated NFAs, computed data is held in a SQLite database
to reduce memory use. Since a DFA is basically just the result of
exhaustively computing cross-products, most computation is done in
SQL, leaving only minimal Perl code.

=head1 CONSTRUCTOR

=over

=item new(%options)

The C<%options> hash supports the following keys:

=over

=item C<input_vertices>

Array of vertices (unsigned integers) in the input graph.

=item C<input_edges>

Array of edges (arrays of two vertices) in the input graph.

=item C<input_alphabet>

Array of terminal symbols (unsigned integers).

=item C<vertex_nullable>

Code reference called for each vertex in the input graph. Should
return a true value if and only if the vertex matches the empty
string.

=item C<vertex_matches>

Code reference called for each pair of input vertex and input symbol
from the input alphabet. Should return a true value if and only if
the vertex matches the input symbol.

=item C<storage_dsn>

Database to use for computations, C<dbi:SQLite:dbname=:memory:> by
default.

=back

=back

=head1 METHODS

=over

=item $dfa->find_or_create_state_id(@vertices)

Given a list of vertices, computes a new state, adds it to the
automaton if it does not already exist, and returns an identifier
for the state. This is used to create a start state in the DFA.

=item $dfa->compute_some_transitions($limit)

Computes up to C<$limit> additional transitions and returns the
number of transitions actually computed. A return value of zero
indicates that all transitions have been computed.

=item $dfa->dead_state_id()

Returns the state identifier for a fixed dead state (from which
no accepting configuration can be reached).

=item $dfa->cleanup_dead_states(\&vertices_accept)

Given a code reference that takes a list of vertices and returns
true if and only if the vertices are an accepting configuration,
this method changes the automaton so that dead states have no
transitions to different dead states.

If, for example, the input NFA has a special "final" vertex that
indicates acceptance when reached, the code reference would check
if the vertex list contains this vertex.

=item $dfa->transitions_as_3tuples()

Returns a list of all transitions computed so far as. Transitions
are arrays with three identifiers for the source state, the input
symbol, and the destination state.

  for my $transition ( $dfa->transitions_as_3tuples() ) {
    my ($src_state, $input, $dst_state) = @$transition;
    ...
  }

=item $dfa->vertices_in_state($state_id)

Returns a list of vertices in the state C<$state_id>.

=item $dfa->transitions_as_5tuples()

Returns a list of all transitions computed so far as. Transitions
are arrays with five identifiers: the source state, an input vertex
included in the source state, the input symbol, the destination state
and an input vertex included in the destination state.

  for my $transition ( $dfa->transitions_as_5tuples() ) {
    my ($src_state, $src_vertex, $input, $dst_state, $dst_vertex) =
      @$transition;
    ...
  }

Note that unlike C<transitions_as_3tuples> this omits transitions
involving the main dead state.

=item $dfa->backup_to_file('v0', $file)

Create a backup of the database used to store input and computed data
into C<$file>. The first parameter must be C<v0> and indicates the
version of the database schema.



( run in 2.089 seconds using v1.01-cache-2.11-cpan-5a3173703d6 )