ApacheLog-Compressor
view release on metacpan or search on metacpan
examples/compress.pl view on Meta::CPAN
12345678910111213141516171819#!/usr/bin/perl
use
strict;
use
warnings;
use
ApacheLog::Compressor 0.004;
binmode
STDOUT,
':encoding(utf8)'
;
binmode
STDERR,
':encoding(utf8)'
;
my
(
$in
,
$out
) =
@ARGV
;
die
"No input file provided"
unless
defined
$in
&&
length
$in
;
die
"No output file provided"
unless
defined
$out
&&
length
$out
;
# Write all data to binary output file
open
my
$out_fh
,
'>'
,
$out
or
die
"Failed to create output file $out - $!"
;
binmode
$out_fh
;
# Provide a callback to send data through to the file
examples/compress.pl view on Meta::CPAN
293031323334353637383940414243444546474849
return
0
unless
$data
->{timestamp};
# Also skip irrelevant entries, in this case regular OPTIONS * server pings from loadbalancer
return
0
if
$ApacheLog::Compressor::HTTP_METHOD_LIST
[
$data
->{method}] eq
'OPTIONS'
&&
$data
->{url} eq
'*'
;
return
1;
}
);
# Input file - normally use whichever one's just been closed + rotated
open
my
$in_fh
,
'<'
,
$in
or
die
"Failed to open input file $in - $!"
;
binmode
$in_fh
,
':encoding(utf8)'
;
# Initial packet to identify which server this came from
$alc
->send_packet(
'server'
,
hostname
=> hostname(),
);
# Read and compress all the lines in the files
while
(
my
$line
= <
$in_fh
>) {
$alc
->compress(
$line
);
}
examples/expand.pl view on Meta::CPAN
123456789101112131415161718192021222324252627#!/usr/bin/perl
use
strict;
use
warnings;
use
ApacheLog::Compressor 0.004;
my
(
$in
,
$out
) =
@ARGV
;
die
"No input file provided"
unless
defined
$in
&&
length
$in
;
die
"No output file provided"
unless
defined
$out
&&
length
$out
;
binmode
STDOUT,
':encoding(utf8)'
;
binmode
STDERR,
':encoding(utf8)'
;
# Write all data to plain text file
open
my
$out_fh
,
'>'
,
$out
or
die
"Failed to create output file $out - $!"
;
binmode
$out_fh
,
':encoding(utf8)'
;
use
Data::Dumper;
# Provide a callback to send data through to the file
my
$alc
= ApacheLog::Compressor->new(
on_log_line
=>
sub
{
my
(
$self
,
$data
) =
@_
;
# Use the helper method to expand back to plain text representation
{
$out_fh
}
$self
->data_to_text(
$data
) .
"\n"
;
},
);
lib/ApacheLog/Compressor.pm view on Meta::CPAN
lib/ApacheLog/Compressor.pm view on Meta::CPAN
247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
id
=> 0x07,
type
=>
'N1'
,
regex
=>
qr{([^ ]+)}
,
process_in
=>
sub
{
my
(
$self
,
$data
) =
@_
;
return
$data
->{url} =
''
unless
defined
$data
->{url};
(
$data
->{url},
$data
->{query}) =
split
/\?/,
$data
->{url}, 2;
# Dodgy UTF8 handling, currently disabled - no guarantee that URLs are UTF8 anyway
# if(length $data->{url}) {
# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
# my $txt = $data->{url};
# $txt = encode_utf8($txt); # turn OFF utf8
# $txt =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/ge; # expand
# $txt = decode_utf8($txt); # turn ON utf8 where applicable
# $data->{url} = $txt;
# }
# if(defined $data->{query} && length $data->{query}) {
# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
# (my $txt = $data->{query}) =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/eg;
# $data->{query} = decode_utf8($txt, FB_DEFAULT);
# }
}
},
query
=> {
id
=> 0x0A,
type
=>
'N1'
, },
ver
=> {
type
=>
'C1'
,
regex
=>
qr{HTTP/(\d+\.\d+)"}
,
process_in
=>
sub
{
my
(
$self
,
$data
) =
@_
;
$data
->{ver} = (
$data
->{ver} eq
'1.0'
? 0 : 1);
lib/ApacheLog/Compressor.pm view on Meta::CPAN
369370371372373374375376377378379380381382383384385386387388sub
cached {
my
$self
=
shift
;
my
(
$type
,
$v
) =
@_
;
$v
=
''
unless
defined
$v
;
my
$id
=
$self
->{entry_cache}->{
$type
}->{
$v
};
unless
(
defined
$id
) {
push
@{
$self
->{entry_index}->{
$type
} },
$v
;
++
$self
->{entry_count}->{
$type
};
$id
=
$self
->{entry_cache}->{
$type
}->{
$v
} =
scalar
(@{
$self
->{entry_index}->{
$type
} }) - 1;
$self
->send_packet(
$type
,
id
=>
$id
,
data
=> encode_utf8(
$v
));
}
return
$id
;
}
=head2 from_cache
Read a value from the cache, for expanding compressed log format entries.
=cut
lib/ApacheLog/Compressor.pm view on Meta::CPAN
397398399400401402403404405406407408409410411412413414415416417=head2 set_key
Set a cache index key to a value when expanding a packet stream.
=cut
sub
set_key {
my
$self
=
shift
;
my
$type
=
shift
;
my
%args
=
@_
;
my
$v
= decode_utf8(
$args
{data});
$self
->{entry_cache}->{
$type
}->{
$v
} =
$args
{id};
$self
->{entry_index}->{
$type
}->[
$args
{id}] =
$v
;
$self
->{
"on_set_$type"
}->(
$self
,
$args
{id},
$v
)
if
$self
->{
"on_set_$type"
};
$self
->{
"on_set_key"
}->(
$self
,
$type
,
$args
{id},
$v
)
if
$self
->{on_set_key};
return
$self
;
}
=head2 compress
General compression function. Given a line of data, sends packets as required to transmit that information.
t/unicode.t view on Meta::CPAN
12345678910111213141516171819
t/unicode.t view on Meta::CPAN
36373839404142434445464748
hostname
=>
'apache-server1'
),
'send initial server packet'
);
ok(
$comp
->compress(
$line
),
'compress the line'
);
# Try to prove we have binary data
open
my
$out_fh
,
'>'
, \
my
$tmp
or
die
$!;
binmode
$out_fh
;
$out_fh
$buffer
;
close
$out_fh
;
$buffer
=
$tmp
;
ok(!is_utf8(
$buffer
),
'utf8 not set'
);
my
$idx
= 0;
$exp
->expand(\
$buffer
)
while
length
$buffer
&& ++
$idx
< 100;
( run in 0.955 second using v1.01-cache-2.11-cpan-49f99fa48dc )