GitInsight
view release on metacpan or search on metacpan
lib/GitInsight.pm view on Meta::CPAN
package GitInsight;
# XXX: Add behavioural change detection, focusing on that period for predictions
BEGIN {
$| = 1;
$^W = 1;
}
our $VERSION = '0.06';
#use Carp::Always;
use GitInsight::Obj -base;
use strict;
use warnings;
use 5.008_005;
use GD::Simple;
use Carp;
use Storable qw(dclone);
use POSIX;
use Time::Local;
use GitInsight::Util
qw(markov markov_list LABEL_DIM gen_m_mat gen_trans_mat info error warning wday label prob label_step);
use List::Util qw(max);
use LWP::UserAgent;
use POSIX qw(strftime ceil);
has [qw(username contribs calendar)];
has 'verbose' => sub {0};
has 'no_day_stats' => sub {0};
has 'statistics' => sub {0};
has 'ca_output' => sub {1};
has 'accuracy' => sub {0};
has [qw(left_cutoff cutoff_offset file_output)];
sub contrib_calendar {
my $self = shift;
my $username = shift || $self->username;
$self->username($username) if !$self->username;
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $response
= $ua->get(
'https://github.com/users/' . $username . '/contributions' );
info "Getting "
. 'https://github.com/users/'
. $username
. '/contributions'
if $self->verbose;
if ( $response->is_success ) {
$self->decode( $response->decoded_content );
return $self->contribs;
}
else {
die $response->status_line;
}
}
sub draw_ca {
my $self = shift;
my @CA = @_;
my $cols = ceil( $#CA / 7 ) + 1;
my $rows = 7;
my $cell_width = 50;
my $cell_height = 50;
my $border = 3;
my $width = $cols * $cell_width;
my $height = $rows * $cell_height;
my $img = GD::Simple->new( $width, $height );
$img->font(gdSmallFont); #i'll need that later
for ( my $c = 0; $c < $cols; $c++ ) {
for ( my $r = 0; $r < $rows; $r++ ) {
my $color = $CA[ $c * $rows + $r ]
or
next; #infering ca from sequences of colours generated earlier
my @topleft = ( $c * $cell_width, $r * $cell_height );
my @botright = (
$topleft[0] + $cell_width - $border,
$topleft[1] + $cell_height - $border
);
eval {
$img->bgcolor( @{$color} );
$img->fgcolor( @{$color} );
};
$img->rectangle( @topleft, @botright );
$img->moveTo( $topleft[0] + 2, $botright[1] + 2 );
$img->fgcolor( 255, 0, 0 )
and $img->rectangle( @topleft, @botright )
if ( $c * $rows + $r >= ( scalar(@CA) - 7 ) );
$img->fgcolor( 0, 0, 0 )
and $img->string( $GitInsight::Util::wday[$r] )
if ( $c == 0 );
}
}
if ( defined $self->file_output ) {
my $filename = $self->file_output . ".png";
#. "/"
#. join( "_", $self->start_day, $self->last_day ) . "_"
#. $self->username . "_"
#. scalar(@CA) .
open my $PNG, ">" . $filename;
binmode($PNG);
print $PNG $img->png;
close $PNG;
info "File written in : " . $filename if $self->verbose;
return $filename;
}
else {
return $img->png;
}
}
# useful when interrogating the object
sub start_day { shift->{first_day}->{data} }
sub last_day { @{ shift->{result} }[-1]->[2] }
sub prediction_start_day { @{ shift->{result} }[0]->[2] }
sub _accuracy {
my $self = shift;
my ( @chunks, @commits );
push @chunks, [ splice @{ $self->calendar }, 0, 7 ]
while @{ $self->calendar };
#@chunks contain a list of arrays of 7 days each
my $total_days = 0;
my $accuracy = 0;
for (@chunks) {
# next if @{$_} < 4;
push( @commits, @{$_} );
my $Insight = GitInsight->new(
no_day_stats => $self->no_day_stats,
ca_output => 0,
username => $self->username
); #disable png generation
$Insight->decode( [@commits] )
; #using $_ for small contributors is better
$Insight->process;
foreach my $res ( @{ $Insight->{result} } ) {
next if ( !exists $self->contribs->{ $res->[2] }->{l} );
$accuracy++
if ( $self->contribs->{ $res->[2] }->{l} == $res->[1] );
$total_days++;
}
}
my $accuracy_prob = prob( $total_days, $accuracy );
$self->{accuracy} = $accuracy_prob;
info "Accuracy is $accuracy / $total_days" if $self->verbose;
info sprintf( "%.5f", $accuracy_prob * 100 ) . " \%" if $self->verbose;
return $self;
}
sub _decode_calendar {
shift;
my $content = shift;
my @out;
push( @out, [ $2, $1 ] )
while ( $content =~ m/data\-count="(.*?)" data\-date="(.*?)"/g );
return \@out;
}
# first argument is the data:
# it should be a string in the form [ [2013-01-20, 9], .... ] a stringified form of arrayref. each element must be an array ref containing in the first position the date, and in the second the commits .
sub decode {
my $self = shift;
#my $response = ref $_[0] ne "ARRAY" ? eval(shift) : shift;
my $response
= ref $_[0] ne "ARRAY" ? $self->_decode_calendar(shift) : shift;
$self->calendar( dclone($response) );
my %commits_count;
my $min = $self->left_cutoff || 0;
$self->{result} = []; #empty the result
$min = 0 if ( $min < 0 ); # avoid negative numbers
my $max
= $self->cutoff_offset || ( scalar( @{$response} ) - 1 );
$max = scalar( @{$response} )
if $max > scalar( @{$response} )
; # maximum cutoff boundary it's array element number
info "$min -> $max portion" if $self->verbose;
my $max_commit
= max( map { $_->[1] } @{$response} ); #Calculating label steps
label_step( 0 .. $max_commit ); #calculating quartiles over commit count
info( "Max commit is: " . $max_commit ) if $self->verbose;
$self->{first_day}->{day} = wday( $response->[0]->[0] )
; #getting the first day of the commit calendar, it's where the ca will start
my ($index)
= grep { $GitInsight::Util::wday[$_] eq $self->{first_day}->{day} }
0 .. $#GitInsight::Util::wday;
$self->{first_day}->{index} = $index;
$self->{first_day}->{data} = $response->[$min]->[0];
push( @{ $self->{ca} }, [ 255, 255, 255 ] )
for (
0 .. scalar(@GitInsight::Util::wday) #white fill for labels
+ ( $index - 1 )
); #white fill for no contribs
$self->{transition} = gen_trans_mat( $self->no_day_stats );
my $last;
$self->{last_week}
= [ map { [ $_->[0], label( $_->[1] ) ] }
( @{$response} )[ ( ( $max + $min ) - 6 ) .. ( $max + $min ) ] ]
; # cutting the last week from the answer and substituting the label instead of the commit number
#print( $self->{transition}->{$_} ) for (last_week keys $self->{transition} );
# $self->{max_commit} =0;
$self->contribs(
$self->no_day_stats
? { map {
my $l = label( $_->[1] );
push( @{ $self->{ca} }, $GitInsight::Util::CA_COLOURS{$l} )
; #building the ca
$last = $l if ( !$last );
# $commits_count{ $_->[1] } = 1;
$self->{stats}->{$l}++
if $self->statistics == 1; #filling stats hashref
$self->{transition_hash}->{$last}->{$l}++
; #filling transition_hash hashref from $last (last seen label) to current label
$self->{transition}
->slice("$last,$l")++; #filling transition matrix
#$self->{max_commit} = $_->[1] if ($_->[1]>$self->{max_commit});
$last = $l;
$_->[0] => {
c => $_->[1], #commits
l => $l #label
}
} splice( @{$response}, $min, ( $max + 1 ) )
}
: { map {
my $w = wday( $_->[0] );
my $l = label( $_->[1] );
push( @{ $self->{ca} }, $GitInsight::Util::CA_COLOURS{$l} );
$last = $l if ( !$last );
# $commits_count{ $_->[1] } = 1;
$self->{stats}->{$w}->{$l}++
if $self->statistics == 1; #filling stats hashref
$self->{transition_hash}->{$w}->{$last}
->{$l}++; #filling stats hashref
$self->{transition}->{$w}
->slice("$last,$l")++; #filling transition matrix
$last = $l;
$_->[0] => {
c => $_->[1], #commits
d => $w, #day in the week
l => $l #label
}
} splice( @{$response}, $min, ( $max + 1 ) )
}
);
return $self->contribs;
}
sub process {
my $self = shift;
croak "process() called while you have not specified an username"
if !$self->username;
$self->contrib_calendar( $self->username )
if !$self->contribs and $self->username;
$self->_transition_matrix;
$self->_markov;
$self->_gen_stats if ( $self->statistics );
$self->{png} = $self->draw_ca( @{ $self->{ca} } )
if ( $self->ca_output == 1 );
$self->{steps} = \%GitInsight::Util::LABEL_STEPS;
$self->_accuracy if $self->accuracy and $self->accuracy == 1;
return $self;
}
sub _gen_stats {
my $self = shift;
my $sum = 0;
if ( $self->no_day_stats ) {
$sum += $_ for values %{ $self->{stats} };
foreach my $k ( keys %{ $self->{stats} } ) {
info "Calculating probability for label $k $sum / "
. $self->{stats}->{$k}
if $self->verbose;
my $prob = prob( $sum, $self->{stats}->{$k} );
info "Is: $prob" if $self->verbose;
$self->{stats}->{$k} = sprintf "%.5f", $prob;
}
}
else {
foreach my $k ( keys %{ $self->{stats} } ) {
$sum = 0;
$sum += $_ for values %{ $self->{stats}->{$k} };
map {
info "Calculating probability for $k -> label $_ $sum / "
. $self->{stats}->{$k}->{$_}
if $self->verbose;
my $prob = prob( $sum, $self->{stats}->{$k}->{$_} );
info "Is: $prob" if $self->verbose;
$self->{stats}->{$k}->{$_} = sprintf "%.5f", $prob;
} ( keys %{ $self->{stats}->{$k} } );
}
}
}
sub _markov {
my $self = shift;
info "Markov chain phase" if $self->verbose;
my $dayn = 1;
info "Calculating predictions for "
. ( scalar( @{ $self->{last_week} } ) ) . " days"
if $self->verbose;
foreach my $day ( @{ $self->{last_week} } ) { #cycling the last week
my $wd = wday( $day->[0] ); #computing the weekday
my $ld = $day->[1]; #getting the label
my $M = markov_list(
gen_m_mat($ld),
$self->no_day_stats
? $self->{transition}
: $self->{transition}->{$wd},
$dayn
); #Computing the markov for the state
lib/GitInsight.pm view on Meta::CPAN
sub _transition_matrix {
#transition matrix, sum all the transitions occourred in each day, and do prob(sumtransitionrow ,current transation occurrance )
my $self = shift;
info "Going to build transation matrix probabilities" if $self->verbose;
if ( $self->no_day_stats ) {
my $sum = $self->{transition}->sumover();
map {
foreach my $c ( 0 .. LABEL_DIM ) {
$self->{transition}->slice("$_,$c")
.= prob( # slice of the single element of the matrix , calculating bayesian inference
$sum->at($c), #contains the transition sum of the row
$self->{transition}->at( $_, $c )
); # all the transation occurred, current transation
}
} ( 0 .. LABEL_DIM );
}
else {
foreach my $k ( keys %{ $self->{transition} } ) {
my $sum = $self->{transition}->{$k}->sumover();
map {
foreach my $c ( 0 .. LABEL_DIM ) {
$self->{transition}->{$k}->slice("$_,$c")
.= prob( # slice of the single element of the matrix , calculating bayesian inference
$sum->at($c)
, #contains the transition sum of the row over the day
$self->{transition}->{$k}->at( $_, $c )
)
; # all the transation occurred in those days, current transation
}
} ( 0 .. LABEL_DIM );
}
}
}
1;
__END__
=encoding utf-8
=head1 NAME
GitInsight - Predict your github contributions using Bayesian inference and Markov chain
=head1 SYNOPSIS
gitinsight --username [githubusername] (--nodaystats) (--accuracy) #using the shipped bin
#or using the module
my $Insight= GitInsight->new(no_day_stats => 0, username => "markov", accuracy=> 1);
my $Result= $Insight->process;
my $accuracy = $Insight->{accuracy};
$Result = $Insight->{result};
# $Result contains the next week predictions and is an arrayref of arrayrefs [ [ 'Sat', 1, '2014-07-1', [ 0 , '0.151515151515152', '0.0606060606060606', '0.0404040404040404', 0 ] ], .. [ 'DayofWeek', 'winner_label', '...
=head1 DESCRIPTION
GitInsight is module that allow you to predict your github contributions in the "calendar contribution" style of github (the table of contribution that you see on your profile page).
=head1 HOW DOES IT WORK?
GitInsight generates a transation probrability matrix from your github contrib_calendar to compute the possibles states for the following days. Given that GitHub split the states thru 5 states (or here also called label), the probability can be infer...
=head2 THEORY
We trace the transitions states in a matrix and increasing the count as far as we observe a transition (L<https://en.wikipedia.org/wiki/Transition_matrix>), then we inference the probabilities using Bayesan method L<https://en.wikipedia.org/wiki/Baye...
=head1 INSTALLATION
GitInsight requires the installation of gsl (GNU scientific library), gd(http://libgd.org/), PDL and PDL::Stats (to be installed after the gsl library set).
on Debian:
apt-get install gsl-bin libgs10-devt
apt-get install pdl libpdl-stats-perl libgd2-xpm-dev
It's reccomended to use cpanm to install all the required deps, install it thru your package manager or just do:
cpan App::cpanminus
After the installation of gsl, clone the repository and install all the dependencies with cpanm:
cpanm --installdeps .
Then install it as usual:
perl Build.PL
./Build
./Build test #ensure that the module works correctly
./Build install
=head1 OPTIONS
=head2 username
required, it's the GitHub username used to calculate the prediction
=head2 ca_output
you can enable/disable the cellular autmata output using this option (1/0)
=head2 no_day_stats
setting this option to 1, will slightly change the prediction: it will be calculated a unique transition matrix instead one for each day
=head2 left_cutoff
used to cut the days from the start (e.g. if you want to delete the first 20 days from the prediction, just set this to 20)
=head2 cutoff_offset
used to select a range where the prediction happens (e.g. if you want to calculate the prediction of a portion of your year of contribution)
=head2 file_output
here you can choose the file output name for ca.
=head2 accuracy
Enable/disable accuracy calculation (1/0)
=head2 verbose
Enable/disable verbosity (1/0)
=head1 METHODS
=head2 contrib_calendar($username)
Fetches the github contrib_calendar of the specified user
=head2 process
Calculate the predictions and generate the CA
=head2 start_day
Returns the first day of the contrib_calendar
=head2 last_day
Returns the last day of the contrib calendar (prediction included)
=head2 prediction_start_day
Returns the first day of the prediction (7 days of predictions)
=head1 AUTHOR
mudler E<lt>mudler@dark-lab.netE<gt>
=head1 COPYRIGHT
Copyright 2014- mudler
=head1 LICENSE
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
L<GitInsight::Util>, L<PDL>, L<PDL::Stats>
=cut
( run in 0.628 second using v1.01-cache-2.11-cpan-39bf76dae61 )