ApacheLog-Compressor
view release on metacpan or search on metacpan
examples/compress.pl view on Meta::CPAN
#!/usr/bin/perl
use strict;
use warnings;
use ApacheLog::Compressor 0.004;
use Sys::Hostname qw(hostname);
binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';
my ($in, $out) = @ARGV;
die "No input file provided" unless defined $in && length $in;
die "No output file provided" unless defined $out && length $out;
# Write all data to binary output file
open my $out_fh, '>', $out or die "Failed to create output file $out - $!";
binmode $out_fh;
# Provide a callback to send data through to the file
examples/compress.pl view on Meta::CPAN
return 0 unless $data->{timestamp};
# Also skip irrelevant entries, in this case regular OPTIONS * server pings from loadbalancer
return 0 if $ApacheLog::Compressor::HTTP_METHOD_LIST[$data->{method}] eq 'OPTIONS' && $data->{url} eq '*';
return 1;
}
);
# Input file - normally use whichever one's just been closed + rotated
open my $in_fh, '<', $in or die "Failed to open input file $in - $!";
binmode $in_fh, ':encoding(utf8)';
# Initial packet to identify which server this came from
$alc->send_packet('server',
hostname => hostname(),
);
# Read and compress all the lines in the files
while(my $line = <$in_fh>) {
$alc->compress($line);
}
examples/expand.pl view on Meta::CPAN
#!/usr/bin/perl
use strict;
use warnings;
use ApacheLog::Compressor 0.004;
use Encode qw(decode_utf8 is_utf8);
my ($in, $out) = @ARGV;
die "No input file provided" unless defined $in && length $in;
die "No output file provided" unless defined $out && length $out;
binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';
# Write all data to plain text file
open my $out_fh, '>', $out or die "Failed to create output file $out - $!";
binmode $out_fh, ':encoding(utf8)';
use Data::Dumper;
# Provide a callback to send data through to the file
my $alc = ApacheLog::Compressor->new(
on_log_line => sub {
my ($self, $data) = @_;
# Use the helper method to expand back to plain text representation
print { $out_fh } $self->data_to_text($data) . "\n";
},
);
lib/ApacheLog/Compressor.pm view on Meta::CPAN
# ABSTRACT: Convert Apache/CLF data to binary format
use strict;
use warnings;
use Socket qw(inet_aton inet_ntoa);
use Date::Parse qw(str2time);
use List::Util qw(min);
use URI;
use URI::Escape qw(uri_unescape);
use DateTime;
use Encode qw(encode_utf8 decode_utf8 FB_DEFAULT is_utf8 FB_CROAK);
use POSIX qw{strftime};
our $VERSION = '0.005';
=head1 NAME
ApacheLog::Compressor - convert Apache / CLF log files into a binary format for transfer
=head1 VERSION
lib/ApacheLog/Compressor.pm view on Meta::CPAN
id => 0x07,
type => 'N1',
regex => qr{([^ ]+)},
process_in => sub {
my ($self, $data) = @_;
return $data->{url} = '' unless defined $data->{url};
($data->{url}, $data->{query}) = split /\?/, $data->{url}, 2;
# Dodgy UTF8 handling, currently disabled - no guarantee that URLs are UTF8 anyway
# if(length $data->{url}) {
# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
# my $txt = $data->{url};
# $txt = encode_utf8($txt); # turn OFF utf8
# $txt =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/ge; # expand
# $txt = decode_utf8($txt); # turn ON utf8 where applicable
# $data->{url} = $txt;
# }
# if(defined $data->{query} && length $data->{query}) {
# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
# (my $txt = $data->{query}) =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/eg;
# $data->{query} = decode_utf8($txt, FB_DEFAULT);
# }
}
},
query => { id => 0x0A, type => 'N1', },
ver => {
type => 'C1',
regex => qr{HTTP/(\d+\.\d+)"},
process_in => sub {
my ($self, $data) = @_;
$data->{ver} = ($data->{ver} eq '1.0' ? 0 : 1);
lib/ApacheLog/Compressor.pm view on Meta::CPAN
sub cached {
my $self = shift;
my ($type, $v) = @_;
$v = '' unless defined $v;
my $id = $self->{entry_cache}->{$type}->{$v};
unless(defined $id) {
push @{ $self->{entry_index}->{$type} }, $v;
++$self->{entry_count}->{$type};
$id = $self->{entry_cache}->{$type}->{$v} = scalar(@{ $self->{entry_index}->{$type} }) - 1;
$self->send_packet($type, id => $id, data => encode_utf8($v));
}
return $id;
}
=head2 from_cache
Read a value from the cache, for expanding compressed log format entries.
=cut
lib/ApacheLog/Compressor.pm view on Meta::CPAN
=head2 set_key
Set a cache index key to a value when expanding a packet stream.
=cut
sub set_key {
my $self = shift;
my $type = shift;
my %args = @_;
my $v = decode_utf8($args{data});
$self->{entry_cache}->{$type}->{$v} = $args{id};
$self->{entry_index}->{$type}->[$args{id}] = $v;
$self->{"on_set_$type"}->($self, $args{id}, $v) if $self->{"on_set_$type"};
$self->{"on_set_key"}->($self, $type, $args{id}, $v) if $self->{on_set_key};
return $self;
}
=head2 compress
General compression function. Given a line of data, sends packets as required to transmit that information.
t/unicode.t view on Meta::CPAN
use strict;
use warnings;
use utf8;
use Test::More tests => 7;
use ApacheLog::Compressor;
use Encode qw(is_utf8);
binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';
my $buffer = '';
my $comp = new_ok('ApacheLog::Compressor' => [
on_write => sub {
my ($self, $pkt) = @_;
$buffer .= $pkt;
}
]);
my $exp = new_ok('ApacheLog::Compressor' => [
on_write => sub {
t/unicode.t view on Meta::CPAN
hostname => 'apache-server1'
), 'send initial server packet');
ok($comp->compress($line), 'compress the line');
# Try to prove we have binary data
open my $out_fh, '>', \my $tmp or die $!;
binmode $out_fh;
print $out_fh $buffer;
close $out_fh;
$buffer = $tmp;
ok(!is_utf8($buffer), 'utf8 not set');
my $idx = 0;
$exp->expand(\$buffer) while length $buffer && ++$idx < 100;
( run in 1.078 second using v1.01-cache-2.11-cpan-49f99fa48dc )