ApacheLog-Compressor
view release on metacpan or search on metacpan
0.004 2011-06-11 17:31:35 Europe/London
Provide on_bad_data_event so that unparsable lines can be handled separately.
Includes some extra code for parsing data as UTF8, including unescaping URLS - currently commented out since
in most cases it's either not useful, or just wrong.
0.003 2011-02-20 18:20:53 Europe/London
Improve filtering support, basic callbacks for things that need to know when new URL packets etc. are seen,
and apply post processing before sending the log line event.
0.002 2011-02-20 04:28:50 Europe/London
Handle partial incoming data streams for expanding, and avoid sending timestamp
for filtered entries.
0.001 2011-02-20 01:12:14 Europe/London
Initial CPAN upload
Changes
LICENSE
MANIFEST
META.yml
Makefile.PL
README
dist.ini
examples/compress.pl
examples/expand.pl
lib/ApacheLog/Compressor.pm
t/00-compile.t
t/00-pod.t
t/00-use.t
t/author-test-eol.t
t/compress.t
t/release-mojibake.t
t/release-pod-coverage.t
t/release-pod-linkcheck.t
t/release-pod-syntax.t
examples/expand.pl view on Meta::CPAN
# Write all data to plain text file
open my $out_fh, '>', $out or die "Failed to create output file $out - $!";
binmode $out_fh, ':encoding(utf8)';
use Data::Dumper;
# Provide a callback to send data through to the file
my $alc = ApacheLog::Compressor->new(
on_log_line => sub {
my ($self, $data) = @_;
# Use the helper method to expand back to plain text representation
print { $out_fh } $self->data_to_text($data) . "\n";
},
);
# Input file - normally use whichever one's just been closed + rotated
open my $in_fh, '<', $in or die "Failed to open input file $in - $!";
binmode $in_fh;
# Read and expand all the lines in the files
my $buffer = '';
while(read($in_fh, my $data, 1024) >= 0) {
$buffer .= $data;
$alc->expand(\$buffer);
}
close $in_fh or die $!;
close $out_fh or die $!;
# Dump the stats in case anyone finds them useful
$alc->stats;
lib/ApacheLog/Compressor.pm view on Meta::CPAN
process_in => sub {
my ($self, $data) = @_;
return $data->{url} = '' unless defined $data->{url};
($data->{url}, $data->{query}) = split /\?/, $data->{url}, 2;
# Dodgy UTF8 handling, currently disabled - no guarantee that URLs are UTF8 anyway
# if(length $data->{url}) {
# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
# my $txt = $data->{url};
# $txt = encode_utf8($txt); # turn OFF utf8
# $txt =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/ge; # expand
# $txt = decode_utf8($txt); # turn ON utf8 where applicable
# $data->{url} = $txt;
# }
# if(defined $data->{query} && length $data->{query}) {
# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
# (my $txt = $data->{query}) =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/eg;
# $data->{query} = decode_utf8($txt, FB_DEFAULT);
# }
}
},
lib/ApacheLog/Compressor.pm view on Meta::CPAN
push @{ $self->{entry_index}->{$type} }, $v;
++$self->{entry_count}->{$type};
$id = $self->{entry_cache}->{$type}->{$v} = scalar(@{ $self->{entry_index}->{$type} }) - 1;
$self->send_packet($type, id => $id, data => encode_utf8($v));
}
return $id;
}
=head2 from_cache
Read a value from the cache, for expanding compressed log format entries.
=cut
sub from_cache {
my $self = shift;
my ($type, $id) = @_;
die "ID $id not found for $type\n" unless defined $self->{entry_index}->{$type}->[$id];
return $self->{entry_index}->{$type}->[$id];
}
=head2 set_key
Set a cache index key to a value when expanding a packet stream.
=cut
sub set_key {
my $self = shift;
my $type = shift;
my %args = @_;
my $v = decode_utf8($args{data});
$self->{entry_cache}->{$type}->{$v} = $args{id};
$self->{entry_index}->{$type}->[$args{id}] = $v;
lib/ApacheLog/Compressor.pm view on Meta::CPAN
Write a packet to the output handler.
=cut
sub write_packet {
my ($self, $pkt) = @_;
$self->{on_write}->($self, $pkt);
return $self;
}
=head2 expand
Expand incoming data.
=cut
sub expand {
my $self = shift;
my $pkt = shift;
my $type = unpack('C1', $$pkt);
unless($self->{packet_handler}->{$type}) {
print substr $$pkt, 0, 16;
die "what is $type?";
}
my $method = 'handle_' . $self->{packet_handler}->{$type};
return $self->$method($pkt) if $self->can($method);
t/compress.t view on Meta::CPAN
},
on_log_line => sub {
my ($self, $data) = @_;
is($self->data_to_text($data), 'api.example.com 105327 123.15.16.108 - apiuser@example.com [19/Dec/2009:03:12:07 +0000] "POST /api/status.json HTTP/1.1" 200 80516 "-" "-"', 'converted line matches');
}
]);
ok($comp->send_packet('server',
hostname => 'apache-server1'
), 'send initial server packet');
is_hex($buffer, '01 61 70 61 63 68 65 2d 73 65 72 76 65 72 31 00', 'initial server packet is correct');
$exp->expand(\$buffer);
is($buffer, '', 'buffer now empty');
ok($comp->compress('api.example.com 105327 123.15.16.108 - apiuser@example.com [19/Dec/2009:03:12:07 +0000] "POST /api/status.json HTTP/1.1" 200 80516 "-" "-"'), 'compress a line');
my $copy = $buffer;
is_hex(substr($buffer, 0, 5, ''), '02 4b 2c 44 87', 'timestamp packet is correct');
is_hex(substr($buffer, 0, 21, ''), '03 00 00 00 00 61 70 69 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d 00', 'vhost packet is correct');
is_hex(substr($buffer, 0, 25, ''), '04 00 00 00 00 61 70 69 75 73 65 72 40 65 78 61 6d 70 6c 65 2e 63 6f 6d 00', 'user packet is correct');
is_hex(substr($buffer, 0, 22, ''), '07 00 00 00 00 2f 61 70 69 2f 73 74 61 74 75 73 2e 6a 73 6f 6e 00', 'URL packet is correct');
is_hex(substr($buffer, 0, 6, ''), '0a 00 00 00 00 00', 'query packet is correct');
is_hex(substr($buffer, 0, 7, ''), '06 00 00 00 00 2d 00', 'referer packet is correct');
is_hex(substr($buffer, 0, 7, ''), '05 00 00 00 00 2d 00', 'useragent packet is correct');
is_hex(substr($buffer, 0, 33, ''), '00 00 00 00 01 9b 6f 7b 0f 10 6c 00 00 03 00 00 00 00 00 00 00 00 01 00 c8 00 01 3a 84 00 00 00 00', 'log packet is correct');
is(length($buffer), 0, 'buffer now empty');
my $idx = 0;
$exp->expand(\$copy) while length $copy && ++$idx < 100;
{
my $bad_data = 'api.example.com 105327 123.15.16.108 - apiuser@example.com [19/Dec/2009:03:12:07 +0000] "SOME INVALID DATA HERE" 14124 1231 -';
local $comp->{on_bad_data} = sub {
my $data = shift;
pass('have bad data event');
is($data, $bad_data, 'data matches');
};
ok($comp->compress($bad_data), 'pass bad data into compressor');
}
t/unicode.t view on Meta::CPAN
ok($comp->compress($line), 'compress the line');
# Try to prove we have binary data
open my $out_fh, '>', \my $tmp or die $!;
binmode $out_fh;
print $out_fh $buffer;
close $out_fh;
$buffer = $tmp;
ok(!is_utf8($buffer), 'utf8 not set');
my $idx = 0;
$exp->expand(\$buffer) while length $buffer && ++$idx < 100;
( run in 2.645 seconds using v1.01-cache-2.11-cpan-5b529ec07f3 )