ApacheLog-Compressor

 view release on metacpan or  search on metacpan

examples/compress.pl  view on Meta::CPAN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/usr/bin/perl
use strict;
 
use Sys::Hostname qw(hostname);
 
binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';
 
my ($in, $out) = @ARGV;
die "No input file provided" unless defined $in && length $in;
die "No output file provided" unless defined $out && length $out;
 
# Write all data to binary output file
open my $out_fh, '>', $out or die "Failed to create output file $out - $!";
binmode $out_fh;
 
# Provide a callback to send data through to the file

examples/compress.pl  view on Meta::CPAN

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
                return 0 unless $data->{timestamp};
 
                # Also skip irrelevant entries, in this case regular OPTIONS * server pings from loadbalancer
                return 0 if $ApacheLog::Compressor::HTTP_METHOD_LIST[$data->{method}] eq 'OPTIONS' && $data->{url} eq '*';
                return 1;
        }
);
 
# Input file - normally use whichever one's just been closed + rotated
open my $in_fh, '<', $in or die "Failed to open input file $in - $!";
binmode $in_fh, ':encoding(utf8)';
 
# Initial packet to identify which server this came from
$alc->send_packet('server',
        hostname        => hostname(),
);
 
# Read and compress all the lines in the files
while(my $line = <$in_fh>) {
        $alc->compress($line);
}

examples/expand.pl  view on Meta::CPAN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/perl
use strict;
 
use Encode qw(decode_utf8 is_utf8);
 
my ($in, $out) = @ARGV;
die "No input file provided" unless defined $in && length $in;
die "No output file provided" unless defined $out && length $out;
 
binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';
 
# Write all data to plain text file
open my $out_fh, '>', $out or die "Failed to create output file $out - $!";
binmode $out_fh, ':encoding(utf8)';
 
# Provide a callback to send data through to the file
my $alc = ApacheLog::Compressor->new(
        on_log_line     => sub {
                my ($self, $data) = @_;
                # Use the helper method to expand back to plain text representation
                print { $out_fh } $self->data_to_text($data) . "\n";
        },
);

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# ABSTRACT: Convert Apache/CLF data to binary format
use strict;
 
use Socket qw(inet_aton inet_ntoa);
use Date::Parse qw(str2time);
use List::Util qw(min);
use URI;
use URI::Escape qw(uri_unescape);
use Encode qw(encode_utf8 decode_utf8 FB_DEFAULT is_utf8 FB_CROAK);
use POSIX qw{strftime};
 
our $VERSION = '0.005';
 
=head1 NAME
 
ApacheLog::Compressor - convert Apache / CLF log files into a binary format for transfer
 
=head1 VERSION

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
                        id => 0x07,
                        type => 'N1',
                        regex => qr{([^ ]+)},
                        process_in => sub {
                                my ($self, $data) = @_;
                                return $data->{url} = '' unless defined $data->{url};
 
                                ($data->{url}, $data->{query}) = split /\?/, $data->{url}, 2;
# Dodgy UTF8 handling, currently disabled - no guarantee that URLs are UTF8 anyway
#                               if(length $data->{url}) {
                                # URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
#                                       my $txt = $data->{url};
#                                       $txt = encode_utf8($txt); # turn OFF utf8
#                                       $txt =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/ge; # expand
#                                       $txt = decode_utf8($txt); # turn ON utf8 where applicable
#                                       $data->{url} = $txt;
#                               }
#                               if(defined $data->{query} && length $data->{query}) {
                                # URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
#                                       (my $txt = $data->{query}) =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/eg;
#                                       $data->{query} = decode_utf8($txt, FB_DEFAULT);
#                               }
                        }
                },
                query           => { id => 0x0A, type => 'N1', },
                ver             => {
                        type => 'C1',
                        regex => qr{HTTP/(\d+\.\d+)"},
                        process_in => sub {
                                my ($self, $data) = @_;
                                $data->{ver} = ($data->{ver} eq '1.0' ? 0 : 1);

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
sub cached {
        my $self = shift;
        my ($type, $v) = @_;
        $v = '' unless defined $v;
        my $id = $self->{entry_cache}->{$type}->{$v};
        unless(defined $id) {
                push @{ $self->{entry_index}->{$type} }, $v;
                ++$self->{entry_count}->{$type};
                $id = $self->{entry_cache}->{$type}->{$v} = scalar(@{ $self->{entry_index}->{$type} }) - 1;
                $self->send_packet($type, id => $id, data => encode_utf8($v));
        }
        return $id;
}
 
=head2 from_cache
 
Read a value from the cache, for expanding compressed log format entries.
 
=cut

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
=head2 set_key
 
Set a cache index key to a value when expanding a packet stream.
 
=cut
 
sub set_key {
        my $self = shift;
        my $type = shift;
        my %args = @_;
        my $v = decode_utf8($args{data});
        $self->{entry_cache}->{$type}->{$v} = $args{id};
        $self->{entry_index}->{$type}->[$args{id}] = $v;
        $self->{"on_set_$type"}->($self, $args{id}, $v) if $self->{"on_set_$type"};
        $self->{"on_set_key"}->($self, $type, $args{id}, $v) if $self->{on_set_key};
        return $self;
}
 
=head2 compress
 
General compression function. Given a line of data, sends packets as required to transmit that information.

t/unicode.t  view on Meta::CPAN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
use strict;
use utf8;
 
use Test::More tests => 7;
use Encode qw(is_utf8);
binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';
 
my $buffer = '';
my $comp = new_ok('ApacheLog::Compressor' => [
        on_write        => sub {
                my ($self, $pkt) = @_;
                $buffer .= $pkt;
        }
]);
my $exp = new_ok('ApacheLog::Compressor' => [
        on_write => sub {

t/unicode.t  view on Meta::CPAN

36
37
38
39
40
41
42
43
44
45
46
47
48
        hostname        => 'apache-server1'
), 'send initial server packet');
ok($comp->compress($line), 'compress the line');
 
# Try to prove we have binary data
open my $out_fh, '>', \my $tmp or die $!;
binmode $out_fh;
print $out_fh $buffer;
close $out_fh;
$buffer = $tmp;
ok(!is_utf8($buffer), 'utf8 not set');
my $idx = 0;
$exp->expand(\$buffer) while length $buffer && ++$idx < 100;



( run in 0.955 second using v1.01-cache-2.11-cpan-49f99fa48dc )