ApacheLog-Parser
view release on metacpan or search on metacpan
bin/loghack view on Meta::CPAN
my $checksum = checksum($fh, 50);
my $checkfile = "$outpath.loaded/$checksum";
my $linecount = 0;
my $ch;
if(-e "$outpath/.loaded/$checksum") {
warn "assume $nicename is done\n";
{local $SIG{CHLD}; close($fh);} # stupid macs
next;
# TODO fast-forward support
# $linecount = $old_linecount; and etc
}
else {
record_source($opt, $file, "$outpath.sources/", $checksum);
# TODO this could stand to be more atomic
{ # record results
want_dir("$outpath.loaded");
my $tag = ($ENV{HOSTNAME} || '') . '.' . $$;
open($ch, '>', "$checkfile.$tag") or
die "cannot write '$checkfile.$tag' $!";
# TODO chmod
rename("$checkfile.$tag", $checkfile) or
die "cannot make $checkfile $!";
}
# TODO a replayable pipe would be nice
{local $SIG{CHLD}; close($fh);} # stupid macs
$fh = open_file($file);
}
$opt->{quiet} or print "$nicename -- ",
sprintf("%02d:%02d:%02d", (localtime)[2,1,0]), "\n";
my %outhandles;
my $sw;
my $out;
my $chunk = 2;
my $next_chunk = sub {
my ($date, $hour, $tz) = @_;
# might have already started that chunk
if(my $handles = $outhandles{"$date$hour$tz"}) {
#warn "back to $date:$hour$tz\n";
($out, $sw) = @$handles;
return;
}
# TODO include timezone in this calc
my $datestring = get_datestring($date);
# make the tz three digits
(my $tzout = $tz) =~ s/00$//;
$tzout = '+' . $tzout if(length($tzout) == 2);
my $outfile = $outpath . $datestring . ".$hour$tzout.$chunk.tsv.gz";
push(@loaded, $outfile);
#warn "writing $outfile\n";
if(-e $outfile) {
# XXX how to decide whether to skip completely?
die "already have $outfile\n";
}
$chunk = 1; # from now on
if($skipper) { # TODO how to reset skipcount?
my $skipfile = skipfilename($opt, $outfile);
$sw = $skipper->new_writer($skipfile);
}
$out = pipe_out($outfile);
print $ch File::Basename::basename($outfile), "\n";
$outhandles{"$date$hour$tz"} = [$out, $sw];
};
my $cdate = '';
my %lc;
while(my $line = <$fh>) {
$linecount++;
chomp($line);
my $v = parse_line($line);
# check date/time
my ($d, $h, $rest) = split(/:/, $v->[dtime], 3);
my ($tz) = ($rest =~ m/ ([-+]?\d+)/);
if("$d$h$tz" ne $cdate) {
$next_chunk->($d, $h, $tz);
$cdate = "$d$h$tz";
$lc{$cdate} ||= 0;
#warn "$d $h $tz\n";
}
my $lnum = ++$lc{$cdate};
# create skiplist
if($doskip->($v)) {$sw->skip($lnum);}
print $out join("\t", @$v), "\n";
}
print $ch "$linecount\n";
close($ch) or die "write '$checkfile' failed $!";
# TODO race checks/chmod
}
wait(); # XXX need this?
return(@loaded);
}
=for doc ###############################################################
Examine the */.loaded files and verify that each one has a linecount
(finished loading.)
loghack check */.loaded/*
=cut
sub do_check {
my ($opt, @files) = @_;
foreach my $file (@files) {
my $err = run_check($file) or next;
print "NC $file (", scalar(@$err), " parts)\n";
}
}
( run in 1.421 second using v1.01-cache-2.11-cpan-39bf76dae61 )