ApacheLog-Parser
view release on metacpan or search on metacpan
bin/loghack view on Meta::CPAN
}
}
sub pipe_out {
my ($file) = @_;
$file =~ m/\.(gz|bz2)$/ or die "unknown extension on $file";
my $ext = $1;
my %prog = (
gz => 'gzip',
bz2 => 'bzip2',
);
my $prog = $prog{$ext} or die "cannot write $ext files";
my $pid = open(my $fh, '|-');
unless($pid) {
local $SIG{CHLD};
open(STDOUT, '>', $file) or die "cannot write '$file' $!";
exec($prog, '-c') or die "ack $!";
}
#warn "launch $prog > $file on $pid\n";
return($fh);
}
sub main {
my (@args) = @_;
my %o = (
archive => '',
repository => '',
missok => 0,
daemon => '',
cluster => '',
skip => 1,
quiet => 0,
);
my $hopt = Getopt::Helpful->new(
usage => 'CALLER <mode> [options] <arguments>',
['a|archive=s', \$o{archive}, '<dir>', 'archive dir'],
['r|repository=s', \$o{repository}, '<dir>', 'repository dir'],
['missok', \$o{missok}, '', 'skip missing files'],
['d|daemon=s', \$o{daemon}, '<dir>', 'daemon mode - needs chdir'],
['c|cluster=s', \$o{cluster}, '<hosts>', 'cluster mode'],
['s|skip!', \$o{skip}, '', 'use skipper (default yes)'],
['q|quiet', \$o{quiet}, '', 'suppress status'],
'+help',
);
$hopt->Get_from(\@args);
if(not $o{repository}) {
$o{repository} = '.' if(-e '.config');
}
if($o{daemon}) {
daemon(\%o, @args);
exit;
}
my %modes = map({$_ => 1} qw(
makelinks
import
prep check sweep verify confirm list
unique day_unique month_unique month_unique2
compile
aggregate report date dump tabulate count reskip
));
my $mode = shift(@args);
$modes{$mode} or die "USAGE: mode must be one of ",
join(", ", sort(keys(%modes))), "\n";
# TODO deal with the do_ stuff
if($o{cluster}) {
cluster(\%o, $mode, @args);
}
else {
my $run = __PACKAGE__->can('do_' . $mode) or
die "cannot find method 'do_$mode'";
$run->(\%o, @args);
}
}
sub name_as_date {
my ($n) = @_;
$n =~ s/(?:.*\.)?(\d{4}-\d{2}-\d{2})\..*/$1/ or
croak("weird name -- $n");
$n =~ s#.*/##;
return($n);
}
sub nice_name {
my ($name) = @_;
my @d = split(/\/+/, $name);
my $n = '*.' . name_as_date(pop(@d)) . '.*';
@d or return($n);
return(join("/", $d[-1], $n));
}
sub record_source {
my ($opt, $file, $dir, $md5) = @_;
my $writefile = $dir.$md5;
want_dir($dir);
if(-e $writefile) {
warn "skipping $writefile ($file)\n";
return;
}
open(my $fh, '>', $writefile) or die "cannot write '$writefile' $!";
print $fh File::Basename::basename($file), "\n";
close($fh) or die "cannot write '$writefile' $!";
}
sub want_dir {
my ($dir) = @_;
return if(-d $dir);
unless(mkdir($dir)) {
die "cannot create $dir $!" unless(-d $dir);
}
}
sub daemon {
my ($opt, @args) = @_;
bin/loghack view on Meta::CPAN
}
# TODO include timezone in this calc
my $datestring = get_datestring($date);
# make the tz three digits
(my $tzout = $tz) =~ s/00$//;
$tzout = '+' . $tzout if(length($tzout) == 2);
my $outfile = $outpath . $datestring . ".$hour$tzout.$chunk.tsv.gz";
push(@loaded, $outfile);
#warn "writing $outfile\n";
if(-e $outfile) {
# XXX how to decide whether to skip completely?
die "already have $outfile\n";
}
$chunk = 1; # from now on
if($skipper) { # TODO how to reset skipcount?
my $skipfile = skipfilename($opt, $outfile);
$sw = $skipper->new_writer($skipfile);
}
$out = pipe_out($outfile);
print $ch File::Basename::basename($outfile), "\n";
$outhandles{"$date$hour$tz"} = [$out, $sw];
};
my $cdate = '';
my %lc;
while(my $line = <$fh>) {
$linecount++;
chomp($line);
my $v = parse_line($line);
# check date/time
my ($d, $h, $rest) = split(/:/, $v->[dtime], 3);
my ($tz) = ($rest =~ m/ ([-+]?\d+)/);
if("$d$h$tz" ne $cdate) {
$next_chunk->($d, $h, $tz);
$cdate = "$d$h$tz";
$lc{$cdate} ||= 0;
#warn "$d $h $tz\n";
}
my $lnum = ++$lc{$cdate};
# create skiplist
if($doskip->($v)) {$sw->skip($lnum);}
print $out join("\t", @$v), "\n";
}
print $ch "$linecount\n";
close($ch) or die "write '$checkfile' failed $!";
# TODO race checks/chmod
}
wait(); # XXX need this?
return(@loaded);
}
=for doc ###############################################################
Examine the */.loaded files and verify that each one has a linecount
(finished loading.)
loghack check */.loaded/*
=cut
sub do_check {
my ($opt, @files) = @_;
foreach my $file (@files) {
my $err = run_check($file) or next;
print "NC $file (", scalar(@$err), " parts)\n";
}
}
sub do_sweep {
my ($opt, @files) = @_;
foreach my $file (@files) {
my $err = run_check($file) or next;
print "NC $file (", scalar(@$err), " parts)\n";
foreach my $part (@$err) {
print " $part\n";
if(-e $part) {
unlink($part) or die "cannot unlink('$part') $!";
}
}
unlink($file) or die "cannot unlink('$file') $!";
}
}
sub run_check {
my ($checkfile) = @_;
die "'$checkfile' is a directory" if(-d $checkfile);
open(my $fh, '<', $checkfile) or die "cannot read '$checkfile' $!";
my @list = map({chomp; $_} <$fh>);
return() if(@list and $list[-1] and $list[-1] =~ m/^\d+$/);
my $dir = File::Basename::dirname(File::Basename::dirname($checkfile));
return([map({"$dir/$_"} @list)]);
}
sub _date_dwim {
my (@in) = @_;
my @dates;
while(@in) {
my $date = shift(@in);
if($date eq 'thru') {
push(@dates, date(pop(@dates))->thru(date(shift(@in))));
next;
}
push(@dates, $date);
}
return(@dates);
}
=for doc ###############################################################
Given a date range, verify that all files + hours for that server are
done (with the exception of those listed in the .MIA file.)
=cut
sub do_verify {
my ($opt, @in) = @_;
my @dates = _date_dwim(@in) or die "you gave no dates";
foreach my $dir (glob('*')) {
(-d $dir) or next;
foreach my $date (@dates) {
my @got = glob("$dir/$date*");
print "$dir/$date ", scalar(@got), "\n";
}
}
}
=for doc ###############################################################
Make sure that all files are claimed somewhere. This is useful when a
load-in crashed.
loghack confirm *
=cut
sub do_confirm {
my ($opt, @dirs) = @_;
foreach my $dir (@dirs) {
my %loaded = map({$_ => 1} sub {
my ($s_dir) = @_;
$s_dir .= '/.loaded';
-d $s_dir or return();
opendir(my $dh, $s_dir) or die "cannot opendir '$s_dir' $!";
my @ans;
foreach my $name (grep({$_ !~ m/^\./} readdir($dh))) {
my $file = "$s_dir/$name";
open(my $fh, '<', $file) or die "cannot read '$file' $!";
my @list = map({chomp; $_} <$fh>);
pop(@list) if($list[-1] =~ m/^\d+$/);
push(@ans, @list);
}
return(@ans);
}->($dir)
);
$dir =~ s#/*$#/#;
opendir(my $dh, $dir) or die "cannot opendir '$dir' $!";
foreach my $name (grep({$_ !~ m/^\./} readdir($dh))) {
unless($loaded{$name}) {
print "$dir$name\n";
}
}
}
# TODO exit with error?
}
=head2 list
List files in the repository.
loghack list 2008-01-01 thru 2008-01-31 in *
=cut
sub do_list {
( run in 0.789 second using v1.01-cache-2.11-cpan-39bf76dae61 )