Ufal-UDPipe

 view release on metacpan or  search on metacpan

udpipe/udpipe.cpp  view on Meta::CPAN


// SGD
bool neural_network_trainer::trainer_sgd::need_trainer_data = false;
float neural_network_trainer::trainer_sgd::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& /*data*/) {
  return trainer.learning_rate * gradient;
}

// SGD with momentum
bool neural_network_trainer::trainer_sgd_momentum::need_trainer_data = true;
float neural_network_trainer::trainer_sgd_momentum::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& data) {
  data.delta = trainer.momentum * data.delta + trainer.learning_rate * gradient;
  return data.delta;
}

// AdaGrad
bool neural_network_trainer::trainer_adagrad::need_trainer_data = true;
float neural_network_trainer::trainer_adagrad::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& data) {
  data.gradient += gradient * gradient;
  return trainer.learning_rate / sqrt(data.gradient + trainer.epsilon) * gradient;
}

// AdaDelta
bool neural_network_trainer::trainer_adadelta::need_trainer_data = true;
float neural_network_trainer::trainer_adadelta::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& data) {
  data.gradient = trainer.momentum * data.gradient + (1 - trainer.momentum) * gradient * gradient;
  float delta = sqrt(data.delta + trainer.epsilon) / sqrt(data.gradient + trainer.epsilon) * gradient;
  data.delta = trainer.momentum * data.delta + (1 - trainer.momentum) * delta * delta;
  return delta;
}

// Adam
bool neural_network_trainer::trainer_adam::need_trainer_data = true;
float neural_network_trainer::trainer_adam::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& data) {
  data.gradient = trainer.momentum * data.gradient + (1 - trainer.momentum) * gradient;
  data.delta = trainer.momentum2 * data.delta + (1 - trainer.momentum2) * gradient * gradient;
  return trainer.learning_rate * data.gradient / sqrt(data.delta + trainer.epsilon);
}

// Backpropagation
template <class TRAINER>
void neural_network_trainer::backpropagate_template(vector<embedding>& embeddings, const vector<const vector<int>*>& embedding_ids_sequences, unsigned required_outcome, workspace& w) {
  size_t hidden_layer_size = network.weights[0].front().size();
  size_t outcomes_size = network.weights[1].front().size();

  // Allocate space for delta accumulators
  if (network.weights[0].size() > w.weights_batch[0].size()) w.weights_batch[0].resize(network.weights[0].size());
  if (network.weights[1].size() > w.weights_batch[1].size()) w.weights_batch[1].resize(network.weights[1].size());
  if (embeddings.size() > w.error_embedding.size()) w.error_embedding.resize(embeddings.size());
  if (embeddings.size() > w.error_embedding_nonempty.size()) w.error_embedding_nonempty.resize(embeddings.size());

  // Allocate space for trainer_data if required)
  workspace::trainer_data none_trainer_data;
  if (TRAINER::need_trainer_data) {
    while (network.weights[0].size() > w.weights_trainer[0].size()) w.weights_trainer[0].emplace_back(network.weights[0].front().size());
    while (network.weights[1].size() > w.weights_trainer[1].size()) w.weights_trainer[1].emplace_back(outcomes_size);
  }

  // Compute error vector
  w.error_outcomes.resize(outcomes_size);
  for (unsigned i = 0; i < outcomes_size; i++)
    w.error_outcomes[i] = (i == required_outcome) - w.outcomes[i];

  // Backpropagate error_outcomes to error_hidden
  w.error_hidden.assign(hidden_layer_size, 0);
  for (auto&& i : w.hidden_kept)
    for (unsigned j = 0; j < outcomes_size; j++)
      w.error_hidden[i] += network.weights[1][i][j] * w.error_outcomes[j];
  // Dropout normalization
  if (dropout_hidden) {
    float dropout_factor = 1. / (1. - dropout_hidden);
    for (auto&& i : w.hidden_kept)
      w.error_hidden[i] *= dropout_factor;
  }

  // Perform activation function derivation
  switch (network.hidden_layer_activation) {
    case activation_function::TANH:
      for (auto&& i : w.hidden_kept)
        w.error_hidden[i] *= 1 - w.hidden_layer[i] * w.hidden_layer[i];
      break;
    case activation_function::CUBIC:
      for (auto&& i : w.hidden_kept) {
        float hidden_layer = cbrt(w.hidden_layer[i]);
        w.error_hidden[i] *= 3 * hidden_layer * hidden_layer;
      }
      break;
    case activation_function::RELU:
      for (auto&& i : w.hidden_kept)
        if (w.hidden_layer[i] <= 0)
          w.error_hidden[i] = 0;
      break;
  }

  // Update weights[1]
  for (auto&& i : w.hidden_kept) {
    if (w.weights_batch[1][i].empty()) w.weights_batch[1][i].resize(outcomes_size);
    for (unsigned j = 0; j < outcomes_size; j++)
      w.weights_batch[1][i][j] += w.hidden_layer[i] * w.error_outcomes[j];
  }
  // Bias
  if (w.weights_batch[1][hidden_layer_size].empty()) w.weights_batch[1][hidden_layer_size].resize(outcomes_size);
  for (unsigned i = 0; i < outcomes_size; i++)
    w.weights_batch[1][hidden_layer_size][i] += w.error_outcomes[i];

  // Dropout normalization
  if (dropout_input) {
    float dropout_factor = 1. / (1. - dropout_input);
    for (auto&& i : w.hidden_kept)
      w.error_hidden[i] *= dropout_factor;
  }
  // Update weights[0] and backpropagate to error_embedding
  unsigned index = 0;
  for (auto&& embedding_ids : embedding_ids_sequences)
    // Note: The unnecessary brackets on the following for cycle are needed
    // to compile on VS 2015 Update 3, which otherwise fail to compile it.
    for (unsigned i = 0; i < embeddings.size(); i++) {
      if (embedding_ids && (*embedding_ids)[i] >= 0) {
        int embedding_id = (*embedding_ids)[i];

        float* error_embedding = nullptr; // Accumulate embedding error if required
        if (embeddings[i].can_update_weights(embedding_id)) {



( run in 0.842 second using v1.01-cache-2.11-cpan-39bf76dae61 )