Games-Go-AGA-BayRate

 view release on metacpan or  search on metacpan

lib/Games/Go/AGA/BayRate/Collection.pm  view on Meta::CPAN

    print("my_f v:\n"); print_vector($raw_v);
    my $pt = $collection->calc_pt($raw_v);
    printf("my_f returns % .24g\n", $pt);
    return -$pt;
}

sub DEBUG_df {
    my ($raw_v, $collection, $raw_df) = @_;

    print("my_df v:\n"); print_vector($raw_v);
    print("my_df df:\n"); print_vector($raw_df);
    $collection->calc_pt_df($raw_v, $raw_df);
    gsl_vector_scale($raw_df, -1.0);
    print("my_df returns v:\n"); print_vector($raw_v);
    print("my_df returns df:\n"); print_vector($raw_df);
}

sub DEBUG_fdf {
    my ($raw_v, $collection, $f, $raw_df) = @_;

    print("my_fdf v:\n"); print_vector($raw_v);
    print("my_fdf df:\n"); print_vector($raw_df);
    ${$f} = my_f($raw_v, $collection);
    my_df($raw_v, $collection, $raw_df);
    print("my_fdf returns v:\n"); print_vector($raw_v);
    print("my_fdf returns df:\n"); print_vector($raw_df);
    printf("my_fdf sets: \$f = % .24g\n", ${$f});
}

# same functions as previous three, but without debug prints
sub gsl_f {
    my ($raw_v, $collection)  = @_;

    my $pt = $collection->calc_pt($raw_v);
    return -$pt;
}

sub gsl_df {
    my ($raw_v, $collection, $raw_df) = @_;

    $collection->calc_pt_df($raw_v, $raw_df);
    gsl_vector_scale($raw_df, -1.0);
}

sub gsl_fdf {
    my ($raw_v, $collection, $f, $raw_df) = @_;

    ${$f} = my_f($raw_v, $collection);
    my_df($raw_v, $collection, $raw_df);
}

sub new {
    my ($proto, %args) = @_;

    # Initialize a random number generator
    # NOTE: we use a different method, see "Populate the storage vector"
    # $self->{rng} = gsl_rng_alloc(gsl_rng_default);

    my $self = {};
    bless($self, ref($proto) || $proto);
    $self->{fdf_iterations}    = 10000;
    $self->{fdf_gradient_spec} = 0.001;
    $self->{f_iterations}      = 1000000;
    $self->{f_size}            = 0.00001;
    foreach my $name (                  # optional arguments
            'iter_hook',                # called once per f or fdf iteration
            'fdf_iterations',           # number fdf_iterations to perform
            'fdf_gradient_spec',        # fdf gradient to test against
            'f_iterations',             # number f_iterations to perform
            'f_size',                   # f size to test against
            'calc_ratings_failover',    # force failover to calc_ratings_f
            'calc_sigma_failover',      # force failover to calc_sigma2
            'strict_compliance',        # adnere exactly to original bayrate C++ code
            'verbose',                  # print lots of info about vectos and matrix assignments
        ) {
        $self->{$name} = delete $args{$name} if (exists $args{$name});
    }
    $self->tournamentDate(delete $args{tournamentDate} || '0000-01-01');
    if (keys %args) {
        croak sprintf "Unknown argument: %s", join(', ', keys %args);
    }
    $self->{players_array} = [];
    $self->{players_by_id} = {};
    $self->{games} = [];

    # fiddle with the symbol table to alias various calls to
    # the debugging version for help in debugging
    if ($self->{verbose}) {
        # alias to the chatty versions
        *MY_gsl_vector_set     = \&DEBUG_gsl_vector_set;
        *MY_gsl_vector_get     = \&DEBUG_gsl_vector_get;
        *MY_gsl_matrix_set     = \&DEBUG_gsl_matrix_set;
        *MY_gsl_matrix_get     = \&DEBUG_gsl_matrix_get;
        *my_f                  = \&DEBUG_f;
        *my_df                 = \&DEBUG_df;
        *my_fdf                = \&DEBUG_fdf;
    }
    else {  # quiet versions:
        *MY_gsl_vector_set     = \&gsl_vector_set;
        *MY_gsl_vector_get     = \&gsl_vector_get;
        *MY_gsl_matrix_set     = \&gsl_matrix_set;
        *MY_gsl_matrix_get     = \&gsl_matrix_get;
        *my_f                  = \&gsl_f;
        *my_df                 = \&gsl_df;
        *my_fdf                = \&gsl_fdf;
    }
    return($self);
}

sub add_game {
    my ($self, %args) = @_;

    foreach my $name (qw(black white handicap komi)) {
        if (not exists $args{$name}) {
            croak("$name not defined - usage: add_game(white=>\$white, black=>\$black, handicap=>\$handicap, komi=>\$komi)\n");
        }
    }
    foreach my $color (qw(black white)) {
        my $id = $args{$color}->get_id;
        if (not $self->player_with_id($id)) {
            croak "The $color player (id=$id) isn't in my player list\n";
        }
    }
    my $game;
    eval {
        # this can croak for various handicap/komi out or range reasons
        $game = Games::Go::AGA::BayRate::Game->new(
            white     => $args{white},
            black     => $args{black},

lib/Games/Go/AGA/BayRate/Collection.pm  view on Meta::CPAN

            $dp = -$dp / gsl_sf_erfc($rd / ($M_SQRT2 * $sigma_px));
        }
        MY_gsl_vector_set($raw_df, $w_idx, MY_gsl_vector_get($raw_df, $w_idx) + $dp);
        MY_gsl_vector_set($raw_df, $b_idx, MY_gsl_vector_get($raw_df, $b_idx) - $dp);
    }

    return 0;
}

#****************************************************************
#
# calc_ratings_f ()
#
# Calculate ratings using a multidimensional simplex method.  This
# technique is slower than the conjuagate gradient method, but it
# is more reliable.
#
# This function should be slow, but foolproof.  If an error occurs here
# the program prints an error message and fails.
#
#****************************************************************
sub calc_ratings_f {
    my ($self) = @_;

    foreach my $game (@{$self->{games}}) {
        $game->calc_handicapeqv;
    }

    # $self->close_boundary;
    # Starting point
    my $count = scalar @{$self->{players_array}};
    my $x = Math::GSL::Vector->new($count);
    my $raw_x = $x->raw;

    my (@idx, @val);
    foreach my $player (@{$self->{players_array}}) {
        MY_gsl_vector_set($raw_x, $player->get_index, $player->get_cseed);
    }

    # Set initial step sizes to 2
    my $ss = Math::GSL::Vector->new($count);
    my $raw_ss = $ss->raw;
    gsl_vector_set_all($raw_ss, 2);

    # minimizer 'state'
    my $state = my_gsl_multimin_fminimizer_set(
        $gsl_multimin_fminimizer_nmsimplex,    # type
        # gsl_multimin_function_f structure members:
            \&my_f,    # f       function
            $count,    # n       number of free variables
            $self,     # params  function params passed to f
        # end of gsl_multimin_function_f structure members:
        $raw_x,    # vector containing the player seeds
        $raw_ss,   # step size
    );

    my $iter = 0;
    my $status = $GSL_CONTINUE;
    my $f_size = $self->{f_size};
    while (    $status == $GSL_CONTINUE
           and $iter   <= $self->{f_iterations}) {
        $iter++;
        $status = gsl_multimin_fminimizer_iterate($state);

        last if ($status != $GSL_SUCCESS);  # eh?  why not CONTINUE?

        $status = gsl_multimin_test_size(gsl_multimin_fminimizer_size($state), $f_size);
        if ($self->{iter_hook}) {
            &{$self->{iter_hook}}($self, $state, $iter, $status);     # call user iteration hook
        }

    #   my $f = $state->fval;
    #   if ($status == $GSL_SUCCESS) {
    #       printf "\nConverged to minimum. f() = $f\n";
    #   }
    #   else {
    #       print "Iteration $iter\tf() = $f \tsimplex f_size = $f_size\n",
    #   }
    }

    if ($status == $GSL_CONTINUE) {
        # carp(gsl_strerror($status));
    }
    elsif ($status != $GSL_SUCCESS) {
        # $self->open_boundary;
        croak(gsl_strerror($status));
    }

    # Update new ratings
    my $xx = gsl_multimin_fminimizer_x($state);
    foreach my $player (@{$self->{players_array}}) {
        $player->set_crating(MY_gsl_vector_get($xx, $player->get_index));
    }

    # Calculate new sigmas
    $self->calc_sigma;

    # $self->open_boundary;
    return $status;
}

#****************************************************************
#
# calc_ratings_fdf ()
#
# Calculate ratings using a conjugate gradient method.  Technique fails if the initial guess
# happens to be exactly correct, which makes 'easy' test cases a little more difficult.
#
#****************************************************************

sub calc_ratings_fdf {
    my ($self) = @_;

    foreach my $game (@{$self->{games}}) {
        $game->calc_handicapeqv;
    }

    # $self->close_boundary;
    # Storage vector for player ratings
    my $count = scalar @{$self->{players_array}};
    my $x = Math::GSL::Vector->new($count);
    my $raw_x = $x->raw;

    # Populate the storage vector
    # This function crashes if we happen to seed players at a point where the gradient is
    # identically zero.  This sounds improbable, but two new players entering the rating system
    # at the same rank and who break even in a match against each other will trigger this case.
    # Accordingly, we add a small random offset to each initial guess to take it away from the
    # potential minimum point.
    #
    # reid: change seed only if it actually collides with
    #           an already existing seed
    my %seeds;
    foreach my $player (@{$self->{players_array}}) {
        my $seed = $player->get_cseed;
        while (exists $seeds{$seed}) {
            $seed = $player->get_cseed + rand(0.001);
        }

        $seeds{$seed} = 1;
        # $x->set($player->get_index, $player->get_cseed + gsl_ran_flat(r, 0, 0.1));
        MY_gsl_vector_set($raw_x, $player->get_index, $seed);
    }

    # minimizer 'state'
    my $state = my_gsl_multimin_fdfminimizer_set(
        $gsl_multimin_fdfminimizer_vector_bfgs2,    # type
        # gsl_multimin_function_fdf structure members:
            \&my_f,    # f       function
            \&my_df,   # df      derivative of f
            \&my_fdf,  # fdf     f and df
            $count,    # n       number of free variables
            $self,     # params  function params passed to f, df, and fdf
        # end of gsl_multimin_function_fdf structure members:
        $raw_x,    # vector containing the player seeds
        2.0,       # step size
        0.1,       # accuracy required
    );

    # Main loop.  Continue iterating until the likelihood function hits an extreme, or
    # until an error occurs.
    my $iter = 0;
    my $status = $GSL_CONTINUE;
    my $gradient_spec = $self->{fdf_gradient_spec};
    while (    $status == $GSL_CONTINUE
           and $iter < $self->{fdf_iterations}) {
        $iter++;
        gsl_multimin_fdfminimizer_iterate($state);

        last if ($status != $GSL_CONTINUE);

        my $gradient = raw_gsl_multimin_fdfminimizer_gradient($state);
        $status = gsl_multimin_test_gradient($gradient, $gradient_spec);
        if ($self->{iter_hook}) {
            &{$self->{iter_hook}}($self, $state, $iter, $status);     # call user iteration hook
        }

   #    printf "%s %5d  f()=%g  Norm=%g %s\n",
   #        ($status == $GSL_SUCCESS)
   #            ?  "Converged:"
   #            :  "Iteration:",
   #        $iter,
   #        $state->minimum,
   #        $state->gradient->blas_dnrm2,
   #        ($status == $GSL_CONTINUE) ? '' : gsl_strerror($status);
    }

    die "Test calc_ratings_failover" if ($self->{calc_ratings_failover});
    if ($status == $GSL_CONTINUE) {
        # carp(gsl_strerror($status));
    }
    elsif ($status != $GSL_SUCCESS) {
        # Can hit an error by accident if the initial guess on player ratings happens to be exactly right.
        # In that case, the gradient vector vanishes and the suggested update doesn't pass the tolerance
        # threshold.
        # $self->open_boundary;
        croak(gsl_strerror($status));
    }

    # Update new ratings
    my $xx = gsl_multimin_fdfminimizer_x($state);
    foreach my $player (@{$self->{players_array}}) {
        $player->set_crating(MY_gsl_vector_get($xx, $player->get_index));
    }

    # Calculate new sigmas
    $self->calc_sigma;
    # $self->open_boundary;

    return $status;
}

#****************************************************************
#
# calc_ratings ()
#
# Calculate ratings using first the fdf method, and if that fails,
# use the slower (but more reliable) _f method.
#
#****************************************************************
sub calc_ratings {
    my ($self) = @_;

    eval {
        $self->calc_ratings_fdf;    # croaks on failure
    };

lib/Games/Go/AGA/BayRate/Collection.pm  view on Meta::CPAN

#            $player->get_sigma);
#        if ($tdList_player) {
#            printf("\tTD List: %g,\t%g",
#                $tdList_player->{rating},
#                $tdList_player->{sigma});
#        }
#        printf("\n");
    }

    # Assign individual handicap equivalents and sigma_px parameters to each game.
    foreach my $game (@{$self->{games}}) {
        $game->calc_handicapeqv;
    }
}

#****************************************************************
#
# findImprobables ()
#
# Identify games that are highly improbable (<10% chance of being correct)
#    reid:  Probablity calculation moved to a Game method.
#           Comment says <10% but code looks like <1%.  In any case, I
#               changed to pass the probablility threshold as parameter,
#               and also to return array of improbable games.
# Improbables usually indicates a data entry error or a player who has
# improved dramatically since their last rating who needs to be reseeded.
#
#****************************************************************
sub findImprobables {
    my ($self, $prob) = @_;

    $prob ||= 0.01;     # one percent
    my @improbable_games;
    foreach my $game (@{$self->{games}}) {
        my $p = $game->seed_probability;
        if ($p < $prob) {
            my $white = $game->get_white;
            my $black = $game->get_black;
            printf "    White: %s Rating = %g\n", $white->get_id, $white->get_seed;
            printf "    Black: %s Rating = %g\n", $black->get_id, $black->get_seed;
            printf "    H/K: %d/%d\n", $game->get_handicap, $game->get_komi;
            printf "    Result' %s wins\n", $game->get_whiteWins ? 'White' : 'Black';
            printf "    Prob: %g\n", $p;
            push @improbable_games, $game;
        }
    }
    return wantarray ? @improbable_games : \@improbable_games;
}

1;

__END__

=head1 SYNOPSIS

  use Games::Go::AGA::BayRate::Collection;
  my $collection = Games::Go::AGA::BayRate::Collection->new(
          iter_hook      => \&iter_hook,
          tournamentDate => $tournamentDate,
          strict_compliance => $strict_compliance, # match original bayrate C++ code exactly
          #f_iterations   => 50,    # iteration limit for f method
          #fdf_iterations => 50,    # iteration limit for fdf method
          #verbose => 1,            # lots of debugging info (to STDOUT)
  );

    foreach my $player (@players) {
        $collection->add_player(
            id     => $_player->id,
            seed   => $_player->seed,
          # sigma  => 0.0,  # may not need this
        );
    }

    # enter all the games
    foreach my $game (@games) {
        $collection->add_game(
            white     => $game->white->id,
            black     => $game->black->id,
            whiteWins => $game->whiteWins,
            handicap  => $game->handicap,
            komi      => $game->komi,
        );
    }

    $collection->initSeeding($tdList);

    $collection->calc_ratings;
    # Copy the new ratings into the internal TDList for the next tournament update
    foreach my $c_player (@{$collection->players}) {
        ... # do something with new $c_player->get_rating
    }

=head1 DESCRIPTION

Games::Go::AGA::BayRate::Collection is a perl implementation of
collection.cpp found in the C<bayrate.zip> package from the American Go
Association (http://usgo.org).

Much of the following documentation is shamelessly stolen directly from
C<collection.cpp>.

=head1 METHODS

=over

= $collection->add_player( %args )

Add tournament players.  C<%args> B<must> include: C<

        id   => unique_strings,
        seed => value,

C<%args> B<may> include:

        sigma => value,

C<id>s are unique for all players in the tournament.

C<seed> is the AGA rating converted (if necessary) to a floating point
number with a 'gap' between -1.0 (1 kyu) and +1.0 (1 dan).
Specifically, a medium-strength 1 dan converts to 1.5.  A player who is
just barely above 1 kyu is 1.01.  A player who is just below 1 dan is



( run in 1.000 second using v1.01-cache-2.11-cpan-71847e10f99 )