App-Elog
view release on metacpan or search on metacpan
}
sub parse_access_line_format {
my ($line, $log) = @_;
if ($line =~ /^\s*$/) {
return undef;
}
my $a = {};
my $captures = $log->{captures};
for my $c (@$captures) {
my $regex = $c->{regex};
my $letter = $c->{letter};
if ($line !~ /\G$regex/gc) {
# The log entry doesnt match the format, however some of it might have
# matched, so we can use that. If it didn't get at least a datestr,
# then report the error.
if (!$a->{datestr}) {
warn "Unable to parse: $line\n";
}
last;
}
my $value = $1;
if (!$letter) {
# we only matched text, nothing to capture
}
elsif ($letter eq "a") {
$a->{ip} = $value;
}
elsif ($letter eq "A") {
$a->{local_ip} = $value;
}
elsif ($letter eq "B") {
$a->{bytes} = $value;
}
elsif ($letter eq "D") {
$a->{reqtime} = $value;
}
elsif ($letter eq "e") {
my $name = lc $c->{curly};
$name =~ s/-/_/g;
$a->{$name} = $value;
}
elsif ($letter eq "f") {
$a->{file} = $value;
}
elsif ($letter eq "h") {
$a->{host} = $value;
if (!$a->{ip}) {
$a->{ip} = $a->{host};
}
}
elsif ($letter eq "i") {
my $name = lc $c->{curly};
$name =~ s/-/_/g;
$a->{$name} = $value;
}
elsif ($letter eq "I") {
$a->{req_bytes} = $value;
}
elsif ($letter eq "k") {
$a->{keepalives} = $value;
}
elsif ($letter eq "l") {
$a->{ident} = $value;
}
elsif ($letter eq "L") {
$a->{errid} = $value;
}
elsif ($letter eq "m") {
$a->{method} = $value;
}
elsif ($letter eq "O") {
# bytes with headers
$a->{byteswh} = $value;
}
elsif ($letter eq "p") {
if ($c->{curly} && $c->{curly} eq "local") {
$a->{local_port} = $value;
}
elsif ($c->{curly} && $c->{curly} eq "remote") {
$a->{remote_port} = $value;
}
}
elsif ($letter eq "P") {
if ($c->{curly} && $c->{curly} eq "tid") {
$a->{tid} = $value;
}
else {
$a->{pid} = $value;
}
}
elsif ($letter eq "q") {
$a->{query} = $value;
}
elsif ($letter eq "r") {
$a->{request} = $value;
}
elsif ($letter eq "R") {
$a->{handler} = $value;
}
elsif ($letter eq "s") {
if ($c->{angle} && $c->{angle} eq ">") {
$a->{final_status} = $value;
if (!$a->{status}) {
$a->{status} = $value;
}
}
else {
$a->{status} = $value;
}
}
elsif ($letter eq "t") {
if ($value =~ /^\[([^]]*)\]/) {
$a->{datestr} = $1;
}
else {
$a->{datestr} = $value;
}
}
elsif ($letter eq "u") {
$a->{user} = $value;
sub parse_log_format {
my ($format) = @_;
my @captures;
my $prev;
while (1) {
my $c = {};
if ($format =~ /\G\s+/gc) {
$c->{regex} = qr{\s+};
$c->{whitespace} = 1;
}
elsif ($format =~ /\G([^%\\\s]+)/gc) {
$c->{regex} = quotemeta($1);
}
elsif ($format =~ /\G\\t/gc) {
$c->{regex} = qr{\s+};
$c->{whitespace} = 1;
}
elsif ($format =~ /\G%t/gc) {
$c->{letter} = "t";
$c->{regex} = qr{\[([^\]]*)\]};
}
elsif ($format =~ /\G%r/gc) {
$c->{letter} ="r";
if ($prev && $prev->{regex} =~ /"$/) {
$c->{regex} = qr{((?:[^"]*|\\")*)};
}
else {
$c->{regex} = qr{(\S*)};
}
}
elsif ($format =~ /\G%(\{([^\}]*)\}|([<>]))?(\w)/gc) {
$c->{curly} = $2;
$c->{angle} = $3;
$c->{letter} = $4;
if ($prev && $prev->{regex} =~ /"$/) {
$c->{regex} = qr{((?:[^"]*|\\")*)};
}
else {
$c->{regex} = qr{(\S*)};
}
}
else {
last;
}
if ($prev && $prev->{whitespace} && $c->{whitespace}) {
next;
}
push @captures, $c;
$prev = $c;
}
if (!@captures) {
die "Unable to parse format: $format\n";
}
return \@captures;
}
sub parse_access_line_different {
my ($line) = @_;
# For lines like:
# unique_id:"Y0gPiJCuUIKvl11H6djHxgAAAUM" remote_host:"184.94.203.3" client_IP:"184.94.203.3" X-Real-IP:"92.205.104.221" ident:"-" user:"-" time:"[13/Oct/2022:08:15:52 -0500]" req:"GET /foo.cgi?asdf=436 HTTP/1.1"...
my $a = {};
my %key_trans = (
"time" => "datestr",
"client_ip" => "ip",
"ua" => "user_agent",
"req" => "request",
"initial_status" => "status",
"final_status" => "status",
"reqtime_usec" => "reqtime",
);
while ($line =~ m{\G\s*([\w-]+):("([^"]|\\")*"|\S*)}gc) {
my $key = $1;
my $value = $2;
$value =~ s/^"|"$//g;
$value =~ s/^\[|\]$//g;
$key = lc $key;
$key =~ s/-/_/g;
if ($key_trans{$key}) {
$key = $key_trans{$key};
}
$a->{$key} = $value;
}
if (!$a->{datestr}) {
warn "Unable to parse: $line\n";
}
process_access($a);
return $a;
}
sub process_access {
my ($a) = @_;
delete $a->{ident} if !$a->{ident} || $a->{ident} eq "-";
delete $a->{user} if !$a->{user} || $a->{user} eq "-";
delete $a->{user_agent} if !$a->{user_agent} || $a->{user_agent} eq "-";
$a->{bytes} = 0 if $a->{bytes} && $a->{bytes} eq "-";
delete $a->{referer} if $a->{referer} && $a->{referer} eq "-";
$a->{request} ||= "";
if ($a->{request} =~ /^ (\S+) \s+ (\S+) \s+ (\S+) $/x) {
$a->{method} = $1;
$a->{uri} = $2;
$a->{protocol} = $3;
$a->{loc} = $a->{uri};
$a->{loc} =~ s/\?.*//;
delete $a->{request};
}
else {
$a->{method} = $a->{uri} = $a->{protocol} = $a->{loc} = "";
}
if ($a->{loc} =~ /\.(jpe?g|gif|png|ico)$/i) {
$a->{type} = "image";
}
elsif ($a->{loc} =~ /\.(cgi)$/i) {
$a->{type} = "cgi";
}
elsif ($a->{loc} =~ /\.(php)$/i) {
$a->{type} = "php";
}
elsif ($a->{loc} =~ /\.(html?)$/i) {
$a->{type} = "html";
}
( run in 0.866 second using v1.01-cache-2.11-cpan-df04353d9ac )