Apache-ParseLog

 view release on metacpan or  search on metacpan

ParseLog.pm  view on Meta::CPAN

                $dtswitch++; 
                if ($visitorswitch) {
                    $visitorswitch = 0;
                    $visitordone++;
                    ("VISITORBYDATE", "VISITORBYTIME", "VISITORBYDATETIME", "HITBYDATE", "HITBYTIME", "HITBYDATETIME") 
                } else {
                    ("HITBYDATE", "HITBYTIME", "HITBYDATETIME")
                }
            } else {
                $_
            }
        } elsif (((m"DATETIME") || (m"BYTE")) && ($dtbyteswitch)) {
            if (m"DATETIME") { 
                $dtswitch++;
                if ($visitorswitch) {
                    $visitorswitch = 0;
                    $visitordone++;
                    ("VISITORBYDATE", "VISITORBYTIME", "VISITORBYDATETIME", "HITBYDATE", "HITBYTIME", "HITBYDATETIME", "BYTEBYDATE", "BYTEBYTIME", "BYTEBYDATETIME") 
                } else {
                    ("HITBYDATE", "HITBYTIME", "HITBYDATETIME", "BYTEBYDATE", "BYTEBYTIME", "BYTEBYDATETIME") 
                }
            } else { 
                ($_, "BYTEBYDATE", "BYTEBYTIME", "BYTEBYDATETIME") 
            }
        } elsif (m"REQUEST") {
            $fileswitch++;
            ("METHOD", "FILE", "QUERYSTRING", "PROTO")
        } elsif ((m"FILENAME") || (m"URL")) {
            $fileswitch++;
            $_
        } elsif ((m"SEC") && ($fileswitch)) {
            $_
        } elsif (m"UAGENT") {
            ($_, "UAVERSION", "BROWSER", "PLATFORM", "BROWSERBYOS")
        } elsif (m"REFERER") {
            ($_, "REFERERDETAIL")
        } else { 
            $_ 
        }
    } @elements;
    push(@METHODS, "HIT");  # the hit => { %hit } is always there
    @METHODS = map { lc } @METHODS;
    ### reports placeholders
    my(%host);               # hosts (visitors)
    my(%topdomain);          # top domains
    my(%secdomain);          # secondary domains
    my(%login);              # logins
    my(%user);               # users
    my(%visitorbydate);      # unique visitors (hosts) by date
    my(%visitorbytime);      # unique visitors by time
    my(%visitorbydatetime);  # unique visitors by date/time
    my(%hitbydate);          # hits by date
    my(%hitbytime);          # hits by time
    my(%hitbydatetime);      # hits by date/time
    my(%method);             # methods (get, post, etc.)
    my(%file);               # files
    my(%querystring);        # Query String
    my(%proto);              # protos (HTTP/1.0, etc.)
    my(%ostatus);            # original status (..)
    my(%lstatus);            # last status (use with %STATUS_BY_CODE)
    my(%byte);               # Bytes transferred (* containts one key "total")
    my(%bytebydate);         # bytes by date
    my(%bytebytime);         # bytes by time
    my(%bytebydatetime);     # bytes by date/time
    my(%filename);           # filenames (= files)
    my(%addr);               # IPs (=~ hosts)
    my(%port);               # ports
    my(%proc);               # procs
    my(%sec);                # seconds (time in sec)
    my(%url);                # URLs (=~ files)
    my(%hostname);           # hostnames (=~ hosts)
    my(%referer);            # referer (site only)
    my(%refererdetail);      # referer (detail)
    my(%uagent);             # agents
    my(%uaversion);          # uagent w/ versions (Mozilla/4.04, Slurp/2.0)
    my(%browser);            # browsers w/ version
    my(%platform);           # platforms only
    my(%browserbyos);        # browsers w/ platforms
    my(%hit);                # Total number of hits (lines)
    ### Routine
    if ((scalar(@elements) == 1) && ($elements[0] eq "UAGENT")) { 
        $FORMAT =~ s#\?## 
    }
    my($regex) = $FORMAT;
    my($line);
    my($fh) = openFile($logfile);
    while (defined($line = <$fh>)) { 
        chomp($line);
        $line =~ m#$regex#;
        # Scan each match
        for ($i = 0; $i < scalar(@elements); $i++) {
            my($mi) = $i + 1;          # index for match; $1, $2,...
            ${$elements[$i]} = ${$mi}; # assign the back ref
        }
        my($date, $time, $method, $file, $proto);
        ### create reports ###
        { # HOST RELATED BLOCK
            # HOST
            $host{$HOST}++ if $HOST;
            # HOSTNAME
            $hostname{$HOSTNAME}++ if $HOSTNAME;
            my($domain) = ($HOST ? $HOST : $HOSTNAME);
            # (TOP|SEC)DOMAIN
            if ($domain) {
                if ($domain !~ /^\d{1,3}(?:\.\d{1,3}){3}$/) {
                    if ($domain =~ m/\.([A-Za-z0-9\-]+\.)(\w+)$/) {
                        my($secdomain) = $1;
                        my($topdomain) = $2;
                        $topdomain{$topdomain}++;
                        $secdomain = $secdomain . $topdomain;
                        $secdomain{$secdomain}++;
                    } else {
                        $topdomain{$domain}++;
                        $secdomain{$domain}++;
                    }
                } else {
                    $topdomain{'unknown'}++;
                    $secdomain{'unknown'}++;
                }
            }
        }



( run in 2.642 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )