ApacheLog-Compressor

 view release on metacpan or  search on metacpan

examples/compress.pl  view on Meta::CPAN

#!/usr/bin/perl
use strict;
use warnings;

use ApacheLog::Compressor 0.004;
use Sys::Hostname qw(hostname);

binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';

my ($in, $out) = @ARGV;
die "No input file provided" unless defined $in && length $in;
die "No output file provided" unless defined $out && length $out;

# Write all data to binary output file
open my $out_fh, '>', $out or die "Failed to create output file $out - $!";
binmode $out_fh;

# Provide a callback to send data through to the file

examples/compress.pl  view on Meta::CPAN

		return 0 unless $data->{timestamp};

		# Also skip irrelevant entries, in this case regular OPTIONS * server pings from loadbalancer
		return 0 if $ApacheLog::Compressor::HTTP_METHOD_LIST[$data->{method}] eq 'OPTIONS' && $data->{url} eq '*';
		return 1;
	}
);

# Input file - normally use whichever one's just been closed + rotated
open my $in_fh, '<', $in or die "Failed to open input file $in - $!";
binmode $in_fh, ':encoding(utf8)';

# Initial packet to identify which server this came from
$alc->send_packet('server',
	hostname	=> hostname(),
);

# Read and compress all the lines in the files
while(my $line = <$in_fh>) {
        $alc->compress($line);
}

examples/expand.pl  view on Meta::CPAN

#!/usr/bin/perl
use strict;
use warnings;

use ApacheLog::Compressor 0.004;
use Encode qw(decode_utf8 is_utf8);

my ($in, $out) = @ARGV;
die "No input file provided" unless defined $in && length $in;
die "No output file provided" unless defined $out && length $out;

binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';

# Write all data to plain text file
open my $out_fh, '>', $out or die "Failed to create output file $out - $!";
binmode $out_fh, ':encoding(utf8)';

use Data::Dumper;
# Provide a callback to send data through to the file
my $alc = ApacheLog::Compressor->new(
	on_log_line	=> sub {
		my ($self, $data) = @_;
		# Use the helper method to expand back to plain text representation
		print { $out_fh } $self->data_to_text($data) . "\n";
	},
);

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

# ABSTRACT: Convert Apache/CLF data to binary format
use strict;
use warnings;

use Socket qw(inet_aton inet_ntoa);
use Date::Parse qw(str2time);
use List::Util qw(min);
use URI;
use URI::Escape qw(uri_unescape);
use DateTime;
use Encode qw(encode_utf8 decode_utf8 FB_DEFAULT is_utf8 FB_CROAK);
use POSIX qw{strftime};

our $VERSION = '0.005';

=head1 NAME

ApacheLog::Compressor - convert Apache / CLF log files into a binary format for transfer

=head1 VERSION

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

			id => 0x07,
			type => 'N1',
			regex => qr{([^ ]+)},
			process_in => sub {
				my ($self, $data) = @_;
				return $data->{url} = '' unless defined $data->{url};

				($data->{url}, $data->{query}) = split /\?/, $data->{url}, 2;
# Dodgy UTF8 handling, currently disabled - no guarantee that URLs are UTF8 anyway
#				if(length $data->{url}) {
				# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
#					my $txt = $data->{url};
#					$txt = encode_utf8($txt); # turn OFF utf8
#					$txt =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/ge; # expand
#					$txt = decode_utf8($txt); # turn ON utf8 where applicable
#					$data->{url} = $txt;
#				}
#				if(defined $data->{query} && length $data->{query}) {
				# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
#					(my $txt = $data->{query}) =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/eg;
#					$data->{query} = decode_utf8($txt, FB_DEFAULT);
#				}
			}
		},
		query		=> { id => 0x0A, type => 'N1', },
		ver		=> {
			type => 'C1',
			regex => qr{HTTP/(\d+\.\d+)"},
			process_in => sub {
				my ($self, $data) = @_;
				$data->{ver} = ($data->{ver} eq '1.0' ? 0 : 1);

lib/ApacheLog/Compressor.pm  view on Meta::CPAN


sub cached {
	my $self = shift;
	my ($type, $v) = @_;
	$v = '' unless defined $v;
	my $id = $self->{entry_cache}->{$type}->{$v};
	unless(defined $id) {
		push @{ $self->{entry_index}->{$type} }, $v;
		++$self->{entry_count}->{$type};
		$id = $self->{entry_cache}->{$type}->{$v} = scalar(@{ $self->{entry_index}->{$type} }) - 1;
		$self->send_packet($type, id => $id, data => encode_utf8($v));
	}
	return $id;
}

=head2 from_cache

Read a value from the cache, for expanding compressed log format entries.

=cut

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

=head2 set_key

Set a cache index key to a value when expanding a packet stream.

=cut

sub set_key {
	my $self = shift;
	my $type = shift;
	my %args = @_;
	my $v = decode_utf8($args{data});
	$self->{entry_cache}->{$type}->{$v} = $args{id};
	$self->{entry_index}->{$type}->[$args{id}] = $v;
	$self->{"on_set_$type"}->($self, $args{id}, $v) if $self->{"on_set_$type"};
	$self->{"on_set_key"}->($self, $type, $args{id}, $v) if $self->{on_set_key};
	return $self;
}

=head2 compress

General compression function. Given a line of data, sends packets as required to transmit that information.

t/unicode.t  view on Meta::CPAN

use strict;
use warnings;
use utf8;

use Test::More tests => 7;
use ApacheLog::Compressor;
use Encode qw(is_utf8);
binmode STDOUT, ':encoding(utf8)';
binmode STDERR, ':encoding(utf8)';

my $buffer = '';
my $comp = new_ok('ApacheLog::Compressor' => [
	on_write	=> sub {
		my ($self, $pkt) = @_;
		$buffer .= $pkt;
	}
]);
my $exp = new_ok('ApacheLog::Compressor' => [
	on_write => sub {

t/unicode.t  view on Meta::CPAN

	hostname	=> 'apache-server1'
), 'send initial server packet');
ok($comp->compress($line), 'compress the line');

# Try to prove we have binary data
open my $out_fh, '>', \my $tmp or die $!;
binmode $out_fh;
print $out_fh $buffer;
close $out_fh;
$buffer = $tmp;
ok(!is_utf8($buffer), 'utf8 not set');
my $idx = 0;
$exp->expand(\$buffer) while length $buffer && ++$idx < 100;



( run in 1.078 second using v1.01-cache-2.11-cpan-49f99fa48dc )