Ufal-UDPipe
view release on metacpan or search on metacpan
udpipe/udpipe.cpp view on Meta::CPAN
// SGD
bool neural_network_trainer::trainer_sgd::need_trainer_data = false;
float neural_network_trainer::trainer_sgd::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& /*data*/) {
return trainer.learning_rate * gradient;
}
// SGD with momentum
bool neural_network_trainer::trainer_sgd_momentum::need_trainer_data = true;
float neural_network_trainer::trainer_sgd_momentum::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& data) {
data.delta = trainer.momentum * data.delta + trainer.learning_rate * gradient;
return data.delta;
}
// AdaGrad
bool neural_network_trainer::trainer_adagrad::need_trainer_data = true;
float neural_network_trainer::trainer_adagrad::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& data) {
data.gradient += gradient * gradient;
return trainer.learning_rate / sqrt(data.gradient + trainer.epsilon) * gradient;
}
// AdaDelta
bool neural_network_trainer::trainer_adadelta::need_trainer_data = true;
float neural_network_trainer::trainer_adadelta::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& data) {
data.gradient = trainer.momentum * data.gradient + (1 - trainer.momentum) * gradient * gradient;
float delta = sqrt(data.delta + trainer.epsilon) / sqrt(data.gradient + trainer.epsilon) * gradient;
data.delta = trainer.momentum * data.delta + (1 - trainer.momentum) * delta * delta;
return delta;
}
// Adam
bool neural_network_trainer::trainer_adam::need_trainer_data = true;
float neural_network_trainer::trainer_adam::delta(float gradient, const network_trainer& trainer, workspace::trainer_data& data) {
data.gradient = trainer.momentum * data.gradient + (1 - trainer.momentum) * gradient;
data.delta = trainer.momentum2 * data.delta + (1 - trainer.momentum2) * gradient * gradient;
return trainer.learning_rate * data.gradient / sqrt(data.delta + trainer.epsilon);
}
// Backpropagation
template <class TRAINER>
void neural_network_trainer::backpropagate_template(vector<embedding>& embeddings, const vector<const vector<int>*>& embedding_ids_sequences, unsigned required_outcome, workspace& w) {
size_t hidden_layer_size = network.weights[0].front().size();
size_t outcomes_size = network.weights[1].front().size();
// Allocate space for delta accumulators
if (network.weights[0].size() > w.weights_batch[0].size()) w.weights_batch[0].resize(network.weights[0].size());
if (network.weights[1].size() > w.weights_batch[1].size()) w.weights_batch[1].resize(network.weights[1].size());
if (embeddings.size() > w.error_embedding.size()) w.error_embedding.resize(embeddings.size());
if (embeddings.size() > w.error_embedding_nonempty.size()) w.error_embedding_nonempty.resize(embeddings.size());
// Allocate space for trainer_data if required)
workspace::trainer_data none_trainer_data;
if (TRAINER::need_trainer_data) {
while (network.weights[0].size() > w.weights_trainer[0].size()) w.weights_trainer[0].emplace_back(network.weights[0].front().size());
while (network.weights[1].size() > w.weights_trainer[1].size()) w.weights_trainer[1].emplace_back(outcomes_size);
}
// Compute error vector
w.error_outcomes.resize(outcomes_size);
for (unsigned i = 0; i < outcomes_size; i++)
w.error_outcomes[i] = (i == required_outcome) - w.outcomes[i];
// Backpropagate error_outcomes to error_hidden
w.error_hidden.assign(hidden_layer_size, 0);
for (auto&& i : w.hidden_kept)
for (unsigned j = 0; j < outcomes_size; j++)
w.error_hidden[i] += network.weights[1][i][j] * w.error_outcomes[j];
// Dropout normalization
if (dropout_hidden) {
float dropout_factor = 1. / (1. - dropout_hidden);
for (auto&& i : w.hidden_kept)
w.error_hidden[i] *= dropout_factor;
}
// Perform activation function derivation
switch (network.hidden_layer_activation) {
case activation_function::TANH:
for (auto&& i : w.hidden_kept)
w.error_hidden[i] *= 1 - w.hidden_layer[i] * w.hidden_layer[i];
break;
case activation_function::CUBIC:
for (auto&& i : w.hidden_kept) {
float hidden_layer = cbrt(w.hidden_layer[i]);
w.error_hidden[i] *= 3 * hidden_layer * hidden_layer;
}
break;
case activation_function::RELU:
for (auto&& i : w.hidden_kept)
if (w.hidden_layer[i] <= 0)
w.error_hidden[i] = 0;
break;
}
// Update weights[1]
for (auto&& i : w.hidden_kept) {
if (w.weights_batch[1][i].empty()) w.weights_batch[1][i].resize(outcomes_size);
for (unsigned j = 0; j < outcomes_size; j++)
w.weights_batch[1][i][j] += w.hidden_layer[i] * w.error_outcomes[j];
}
// Bias
if (w.weights_batch[1][hidden_layer_size].empty()) w.weights_batch[1][hidden_layer_size].resize(outcomes_size);
for (unsigned i = 0; i < outcomes_size; i++)
w.weights_batch[1][hidden_layer_size][i] += w.error_outcomes[i];
// Dropout normalization
if (dropout_input) {
float dropout_factor = 1. / (1. - dropout_input);
for (auto&& i : w.hidden_kept)
w.error_hidden[i] *= dropout_factor;
}
// Update weights[0] and backpropagate to error_embedding
unsigned index = 0;
for (auto&& embedding_ids : embedding_ids_sequences)
// Note: The unnecessary brackets on the following for cycle are needed
// to compile on VS 2015 Update 3, which otherwise fail to compile it.
for (unsigned i = 0; i < embeddings.size(); i++) {
if (embedding_ids && (*embedding_ids)[i] >= 0) {
int embedding_id = (*embedding_ids)[i];
float* error_embedding = nullptr; // Accumulate embedding error if required
if (embeddings[i].can_update_weights(embedding_id)) {
( run in 0.842 second using v1.01-cache-2.11-cpan-39bf76dae61 )