Algorithm-ConstructDFA-XS

 view release on metacpan or  search on metacpan

ConstructDFA.xs  view on Meta::CPAN

#include <bitset>
#include <map>
#include <list>
#include <vector>
#include <set>
#include <cstdint>
#include <queue>
#include <algorithm>
#include <iterator>
#include <utility>
#include <stack>
#include <deque>

extern "C"
{
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
}

using namespace std;

// TODO: more error handling when Perl API functions fail
// TODO: use a vector for lookup tables like `successors`
// TODO: exploit that a list rather than a sub can be used
//       to identify accepting configurations, the call_sv
//       and the associated array copying is a bottleneck

typedef UV            State;
typedef set<State>    States;
typedef UV            Label;
typedef size_t        StatesId;

template <class T>
class VectorBasedSet {
public:
  std::vector<bool>  included;
  std::vector<T> elements;
  bool empty() { return elements.empty(); };
  bool contains(const T& s) {
    return s < included.size() && included[s];
  }
  void insert(const T& s) {
    if (!contains(s)) {
      if (included.size() <= s) {
        included.resize(s + 1);
      }
      included[s] = true;
      elements.push_back(s);
    }
  };
  T& back() {
    return elements.back();
  }
  void pop_back() {
    auto back_element = back();
    included[back_element] = false;
    elements.pop_back();
  }
  void clear() {
    included.clear();
    elements.clear();
  }
};

void
add_all_reachable_and_self(
  VectorBasedSet<State>& todo,
  VectorBasedSet<State>& s,
  map<State, bool>& nullable,
  map<State, vector<State>>& successors) {

  for (auto i = s.elements.begin(); i != s.elements.end(); ++i) {
    if (!nullable[*i])
      continue;

    auto x = successors[*i];
    
    for (auto k = x.begin(); k != x.end(); ++k)
      todo.insert(*k);
  }

  while (!todo.empty()) {
    State current = todo.back();
    todo.pop_back();

    if (s.contains(current)) {
      continue;
    }

    s.insert(current);

    if (nullable[current]) {
      auto x = successors[current];
      
      for (auto i = x.begin(); i != x.end(); ++i)
        todo.insert(*i);
    }
  }
}

bool does_accept(SV* accept_sv, vector<State> s) {
  dSP;

  ENTER;
  SAVETMPS;

  PUSHMARK(SP);
  
  for (auto i = s.begin(); i != s.end(); ++i) {
    mXPUSHs(newSVuv(*i));
  }

  PUTBACK;

  I32 count = call_sv(accept_sv, G_SCALAR);

  SPAGAIN;

  bool result = false;

  if (count == 1) {
    result = (bool)POPi;
  } else {
    warn("bad accept");
  }

  PUTBACK;
  FREETMPS;
  LEAVE;

  return result;
}

class StatesBimap {
public:
  std::map<vector<State>, StatesId> s2id;
  std::vector<vector<State>> id2s;

  StatesBimap() {
    states_to_id(States());
  };
  
  StatesId states_to_id(const States& s) {
    vector<State> v;
    
    std::copy(s.begin(), s.end(),
      std::back_inserter(v));
    
    return states_to_id(v);
  }

  StatesId states_to_id(const vector<State>& s) {
    auto v = s;
    std::sort(v.begin(), v.end());
    
    if (s2id.find(v) == s2id.end()) {
      s2id[v] = id2s.size();
      id2s.push_back(v);
    }
    return s2id[v];
  }
  
  const vector<State>& id_to_states(StatesId id) {
    if (id >= id2s.size()) {
      warn("Programmer error looking up %u", id);
    }
    return id2s[id];
  }
};

map<size_t, HV*>
build_dfa(SV* accept_sv, AV* args) {

  typedef map<pair<StatesId, Label>, StatesId> Automaton;
  StatesBimap               m;
  VectorBasedSet<State>     sub_todo;

  // Input from Perl
  map<State, vector<State>> successors;
  map<State, bool>          nullable;
  map<State, Label>         label;
  map<size_t, States>       start_states;
  
  I32 args_len = av_len(args);

  for (int ix = 0; ix <= args_len; ++ix) {
    SV** current_svp = av_fetch(args, ix, 0);

    if (current_svp == NULL)
      croak("Bad arguments");

    SV* current_sv = (SV*)*current_svp;

    if (!( SvROK(current_sv) && SvTYPE(SvRV(current_sv)) == SVt_PVAV))
      croak("Bad arguments");



( run in 0.406 second using v1.01-cache-2.11-cpan-39bf76dae61 )