ApacheLog-Compressor

 view release on metacpan or  search on metacpan

lib/ApacheLog/Compressor.pm  view on Meta::CPAN

		url		=> {
			id => 0x07,
			type => 'N1',
			regex => qr{([^ ]+)},
			process_in => sub {
				my ($self, $data) = @_;
				return $data->{url} = '' unless defined $data->{url};

				($data->{url}, $data->{query}) = split /\?/, $data->{url}, 2;
# Dodgy UTF8 handling, currently disabled - no guarantee that URLs are UTF8 anyway
#				if(length $data->{url}) {
				# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
#					my $txt = $data->{url};
#					$txt = encode_utf8($txt); # turn OFF utf8
#					$txt =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/ge; # expand
#					$txt = decode_utf8($txt); # turn ON utf8 where applicable
#					$data->{url} = $txt;
#				}
#				if(defined $data->{query} && length $data->{query}) {
				# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually
#					(my $txt = $data->{query}) =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/eg;
#					$data->{query} = decode_utf8($txt, FB_DEFAULT);
#				}
			}
		},
		query		=> { id => 0x0A, type => 'N1', },
		ver		=> {
			type => 'C1',
			regex => qr{HTTP/(\d+\.\d+)"},
			process_in => sub {
				my ($self, $data) = @_;
				$data->{ver} = ($data->{ver} eq '1.0' ? 0 : 1);
			}, process_out => sub {
				my ($self, $data) = @_;
				$data->{ver} = ($data->{ver} ? '1.1' : '1.0');
			}
		},
		result		=> { type => 'n1', regex => qr{(\d+)} },
		size		=> {
			type => 'N1',
			regex => qr{(\d+|-)},
			process_in => sub {
				my ($self, $data) = @_;
				$data->{size} = ($data->{size} eq '-') ? -1 : $data->{size};
			}, process_out => sub {
				my ($self, $data) = @_;
				$data->{size} = ($data->{size} == 4294967295) ? '-' : $data->{size};
			}
		},
		refer		=> { id => 0x06, type => 'n1', regex => qr{"([^"]*)"} },
		useragent	=> { id => 0x05, type => 'n1', regex => qr{"([^"]*)"} },
	];
}

=head2 update_mapping

Refresh the mapping from format keys and internal definitions.

=cut

sub update_mapping {
	my $self = shift;
	my %fmt = @{ $self->{format} };
	$self->{format_hash} = \%fmt;
	$self->{packet_handler} = {
		0x00 => 'log',
		0x01 => 'server',
		0x80 => 'reset',
		map { $fmt{$_}->{id} => $_ } grep { exists $fmt{$_}->{id} } keys %fmt
	};

# Extract information from format strings so that we know how big the packets are and where the data goes
	my @fmt = @{$self->{format}};
	my $pack_str = '';
	my $log_len = 0;
	my @format_keys;
	my @regex;
	ITEM:
	while(@fmt) {
		my $k = shift(@fmt);
		my $v = shift(@fmt);
		$v = { type => $v } unless ref $v;
		if(exists $v->{regex}) {
			push @regex, $v->{regex};
			push @{$self->{log_regex_keys}}, $k;
		}
		if(exists $v->{process_in}) {
			push @{$self->{log_process}}, $v->{process_in};
		}
		if(exists $v->{process_out}) {
			push @{$self->{log_process_out}}, $v->{process_out};
		}

		my $type = $v->{type};
		next ITEM unless $type;

		push @format_keys, $k;
		$pack_str .= $type;

# Obviously these will need updating if we use any other pack() datatypes
		if($type =~ /^C(\d+)/) {
			$log_len += $1;
		} elsif($type =~ /^n(\d+)/) {
			$log_len += 2 * $1;
		} elsif($type =~ /^N(\d+)/) {
			$log_len += 4 * $1;
		} else {
			die "no idea what $type is";
		}
	}
	my $regex = join(' ', @regex);
	$self->{log_regex} = qr{^$regex};
	$self->{log_format} = $pack_str;
	$self->{log_record_length} = $log_len;
	$self->{format_keys} = \@format_keys;
	return $self;
}

=head2 cached

Returns the index for the given type and value, generating a packet if no previous value was found.



( run in 1.755 second using v1.01-cache-2.11-cpan-df04353d9ac )