Acme-KeyboardMarathon

 view release on metacpan or  search on metacpan

source-tree-marathon.pl  view on Meta::CPAN

if ( $add ) {
  print STDERR "\nSyncing...\n";
  $ref->sync();
}

### Process stats

my %filecounts;
my %filedists;

my $grand_total = Math::BigInt->new();

for my $file ( keys %data ) {
  next unless $file =~ /\.([^\.\/]+)$/;

  my $type = $1;
  $filecounts{$type}++;

  my ($mtime,$size,$dist) = split ':', $data{$file}, 3;

  $filedists{$type} = Math::BigInt->new() unless defined $filedists{$type};
  $filedists{$type} += $dist;
  $grand_total += $dist;
}

print "Generated on " . scalar(localtime) . "\n\n";

print "\nGrand total: ", display($grand_total), "\n\nTop 10 distance:\n\n";

my $i = 1;
for my $type ( sort { $filedists{$b} <=> $filedists{$a}  } keys %filedists ) {
  printf "%20s : %4s files : %s\n", $type, $filecounts{$type}, display($filedists{$type});
  last if $i++ > 10;
}

print "\nDistances by file count:\n\n";

for my $type ( sort { $filecounts{$b} <=> $filecounts{$a} } keys %filecounts ) {
  printf "%20s : %4s files : %s\n", $type, $filecounts{$type}, display($filedists{$type});
}

### Subroutines

sub check_stats {
  if ( -d $_ ) {
    my $localdir = $File::Find::name;
    $localdir = $1 if $localdir =~ /^$base_dir\/(.+)$/;
    print STDERR "DIR: $localdir\n";
    return;
  }
  $skip++ and print STDERR "SKIP: $_ (regex)\n" and return if $_ =~ /$skip_file_extension_regex/i;

  my $localdir = $File::Find::dir;
  $localdir = $1 if $localdir =~ /^$base_dir\/(.+)$/;
  $skip++ and print STDERR "SKIP: $localdir (directory)\n" and return if $localdir =~ /$skip_dirs_regex/;

  $skip++ and print STDERR "SKIP: $_ (symlink)\n" and return if -l $File::Find::name;
  $skip++ and print STDERR "SKIP: $_ (zero size)\n" and return if -z $File::Find::name;
  $skip++ and print STDERR "SKIP: $_ (binary)\n" and return if -B $File::Find::name;

  my @stat = stat($File::Find::name);
  my $mtime = $stat[9];
  my $size  = $stat[7];

  if ( defined $data{$File::Find::name} and $data{$File::Find::name} =~ /^$mtime\:$size\:/ ) {
    $cache++ and print STDERR "CACHE: $_\n";
    return;
  }

  $add++ and print STDERR "ADD: $_ ";

  my $text = read_file($File::Find::name);
  my $dist = $akm->distance($text);

  $data{$File::Find::name} = "$mtime:$size:$dist";

  print STDERR "(".display($dist).")\n";

  unless ( $add % 250 ) {
    print STDERR "syncing...\n";
    $ref->sync();
  }
}

sub display {
  my $total = "$_[0]"; # Convert to 
  if ( $total > 100000 ) {
    $total /= 100000;
    return sprintf('%0.2f',$total) . ' km';
  } elsif ( $total > 100 ) {
    $total /= 100;
    return sprintf('%0.2f',$total) . ' m';
  } else {
    return $total . ' cm';
  }
}



( run in 2.391 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )