ApacheLog-Compressor

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

0.004     2011-06-11 17:31:35 Europe/London
	Provide on_bad_data_event so that unparsable lines can be handled separately.
	Includes some extra code for parsing data as UTF8, including unescaping URLS - currently commented out since
	 in most cases it's either not useful, or just wrong.

0.003     2011-02-20 18:20:53 Europe/London
	Improve filtering support, basic callbacks for things that need to know when new URL packets etc. are seen,
	 and apply post processing before sending the log line event.

0.002     2011-02-20 04:28:50 Europe/London
	Handle partial incoming data streams for expanding, and avoid sending timestamp
	 for filtered entries.

0.001     2011-02-20 01:12:14 Europe/London
	Initial CPAN upload

MANIFEST  view on Meta::CPAN

Changes
LICENSE
MANIFEST
META.yml
Makefile.PL
README
dist.ini
examples/compress.pl
examples/expand.pl
lib/ApacheLog/Compressor.pm
t/00-compile.t
t/00-pod.t
t/00-use.t
t/author-test-eol.t
t/compress.t
t/release-mojibake.t
t/release-pod-coverage.t
t/release-pod-linkcheck.t
t/release-pod-syntax.t

examples/expand.pl  view on Meta::CPAN


# Write all data to plain text file
open my $out_fh, '>', $out or die "Failed to create output file $out - $!";
binmode $out_fh, ':encoding(utf8)';

use Data::Dumper;
# Provide a callback to send data through to the file
my $alc = ApacheLog::Compressor->new(
	on_log_line	=> sub {
		my ($self, $data) = @_;
		# Use the helper method to expand back to plain text representation
		print { $out_fh } $self->data_to_text($data) . "\n";
	},
);

# Input file - normally use whichever one's just been closed + rotated
open my $in_fh, '<', $in or die "Failed to open input file $in - $!";
binmode $in_fh;

# Read and expand all the lines in the files
my $buffer = '';
while(read($in_fh, my $data, 1024) >= 0) {
	$buffer .= $data;
        $alc->expand(\$buffer);
}
close $in_fh or die $!;
close $out_fh or die $!;

# Dump the stats in case anyone finds them useful
$alc->stats;

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

			process_in => sub {
				my ($self, $data) = @_;
				return $data->{url} = '' unless defined $data->{url};

				($data->{url}, $data->{query}) = split /\?/, $data->{url}, 2;
# Dodgy UTF8 handling, currently disabled - no guarantee that URLs are UTF8 anyway
#				if(length $data->{url}) {
				# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
#					my $txt = $data->{url};
#					$txt = encode_utf8($txt); # turn OFF utf8
#					$txt =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/ge; # expand
#					$txt = decode_utf8($txt); # turn ON utf8 where applicable
#					$data->{url} = $txt;
#				}
#				if(defined $data->{query} && length $data->{query}) {
				# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
#					(my $txt = $data->{query}) =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/eg;
#					$data->{query} = decode_utf8($txt, FB_DEFAULT);
#				}
			}
		},

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

		push @{ $self->{entry_index}->{$type} }, $v;
		++$self->{entry_count}->{$type};
		$id = $self->{entry_cache}->{$type}->{$v} = scalar(@{ $self->{entry_index}->{$type} }) - 1;
		$self->send_packet($type, id => $id, data => encode_utf8($v));
	}
	return $id;
}

=head2 from_cache

Read a value from the cache, for expanding compressed log format entries.

=cut

sub from_cache {
	my $self = shift;
	my ($type, $id) = @_;
	die "ID $id not found for $type\n" unless defined $self->{entry_index}->{$type}->[$id];
	return $self->{entry_index}->{$type}->[$id];
}

=head2 set_key

Set a cache index key to a value when expanding a packet stream.

=cut

sub set_key {
	my $self = shift;
	my $type = shift;
	my %args = @_;
	my $v = decode_utf8($args{data});
	$self->{entry_cache}->{$type}->{$v} = $args{id};
	$self->{entry_index}->{$type}->[$args{id}] = $v;

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

Write a packet to the output handler.

=cut

sub write_packet {
	my ($self, $pkt) = @_;
	$self->{on_write}->($self, $pkt);
	return $self;
}

=head2 expand

Expand incoming data.

=cut

sub expand {
	my $self = shift;
	my $pkt = shift;
	my $type = unpack('C1', $$pkt);
	unless($self->{packet_handler}->{$type}) {
		print substr $$pkt, 0, 16;
		die "what is $type?";
	}
	my $method = 'handle_' . $self->{packet_handler}->{$type};
	return $self->$method($pkt) if $self->can($method);

t/compress.t  view on Meta::CPAN

	},
	on_log_line => sub {
		my ($self, $data) = @_;
		is($self->data_to_text($data), 'api.example.com 105327 123.15.16.108 - apiuser@example.com [19/Dec/2009:03:12:07 +0000] "POST /api/status.json HTTP/1.1" 200 80516 "-" "-"', 'converted line matches');
	}
]);
ok($comp->send_packet('server',
	hostname	=> 'apache-server1'
), 'send initial server packet');
is_hex($buffer, '01 61 70 61 63 68 65 2d 73 65 72 76 65 72 31 00', 'initial server packet is correct');
$exp->expand(\$buffer);
is($buffer, '', 'buffer now empty');
ok($comp->compress('api.example.com 105327 123.15.16.108 - apiuser@example.com [19/Dec/2009:03:12:07 +0000] "POST /api/status.json HTTP/1.1" 200 80516 "-" "-"'), 'compress a line');
my $copy = $buffer;
is_hex(substr($buffer, 0, 5, ''),  '02 4b 2c 44 87', 'timestamp packet is correct');
is_hex(substr($buffer, 0, 21, ''), '03 00 00 00 00 61 70 69 2e 65 78 61 6d 70 6c 65 2e 63 6f 6d 00', 'vhost packet is correct');
is_hex(substr($buffer, 0, 25, ''), '04 00 00 00 00 61 70 69 75 73 65 72 40 65 78 61 6d 70 6c 65 2e 63 6f 6d 00', 'user packet is correct');
is_hex(substr($buffer, 0, 22, ''), '07 00 00 00 00 2f 61 70 69 2f 73 74 61 74 75 73 2e 6a 73 6f 6e 00', 'URL packet is correct');
is_hex(substr($buffer, 0, 6, ''),  '0a 00 00 00 00 00', 'query packet is correct');
is_hex(substr($buffer, 0, 7, ''),  '06 00 00 00 00 2d 00', 'referer packet is correct');
is_hex(substr($buffer, 0, 7, ''),  '05 00 00 00 00 2d 00', 'useragent packet is correct');
is_hex(substr($buffer, 0, 33, ''), '00 00 00 00 01 9b 6f 7b 0f 10 6c 00 00 03 00 00 00 00 00 00 00 00 01 00 c8 00 01 3a 84 00 00 00 00', 'log packet is correct');
is(length($buffer), 0, 'buffer now empty');
my $idx = 0;
$exp->expand(\$copy) while length $copy && ++$idx < 100;

{
	my $bad_data = 'api.example.com 105327 123.15.16.108 - apiuser@example.com [19/Dec/2009:03:12:07 +0000] "SOME INVALID DATA HERE" 14124 1231 -';
	local $comp->{on_bad_data} = sub {
		my $data = shift;
		pass('have bad data event');
		is($data, $bad_data, 'data matches');
	};
	ok($comp->compress($bad_data), 'pass bad data into compressor');
}

t/unicode.t  view on Meta::CPAN

ok($comp->compress($line), 'compress the line');

# Try to prove we have binary data
open my $out_fh, '>', \my $tmp or die $!;
binmode $out_fh;
print $out_fh $buffer;
close $out_fh;
$buffer = $tmp;
ok(!is_utf8($buffer), 'utf8 not set');
my $idx = 0;
$exp->expand(\$buffer) while length $buffer && ++$idx < 100;



( run in 2.645 seconds using v1.01-cache-2.11-cpan-5b529ec07f3 )