App-Fetchware

 view release on metacpan or  search on metacpan

lib/App/Fetchware.pm  view on Meta::CPAN

        # Use HTML::TreeBuilder to parse the scalar of html into a tree of tags.
        my $tree = HTML::TreeBuilder->new_from_content($http_listing);

        my @filename_listing;
        my @matching_links = $tree->look_down(
            _tag => 'a',
            sub {
                my $h = shift;

                #parse out archive name.
                my $link = $h->as_text();
                # NOTE: The weird alternations adding .asc, .md5, and .sha.?,
                # and also a KEYS file are to allow fetchware new to also use
                # this subroutine to parse http file listings to analyze the
                # contents of the user's lookup_url. It does not make any sense
                # to copy and paste this function or even add a callback argument
                # allowing you to change the regex.
                if ($link =~
                    /(\.(tar\.(gz|bz2|xz)|(tgz|tbz2|txz))|(asc|md5|sha.?))|KEYS$/) {
                    # Should I strip out dirs just to be safe?
                    my $filename = $link;
                    # Obtain the tag to the right of the archive link to find the
                    # timestamp.
                    if (my $rh = $h->right()) {
                        my $listing_line;
                        if (blessed($rh)) {
                            $listing_line = $rh->as_text();
                        } else {
                            $listing_line = $rh;
                        }
                        my @fields = split ' ', $listing_line;
                        ###BUGALERT### Internationalization probably breaks this
                        #datetime parsing? Can a library do it?
                        # day-month-year   time
                        # $fields[0]      $fields[1]
                        # Normalize format for lookup algorithms .
                        my ($day, $month, $year) = split /-/, $fields[0];
                        # Ditch the ':' in the time.
                        $fields[1] =~ s/://;
                        # Some dirlistings use string months Aug, Jun, etc...
                        if (looks_like_number($month)) {
                            # Strip leading 0 if it exists by converting the
                            # string with the useless leading 0 into an integer.
                            # The %num_month hash lookup will add back a leading
                            # 0 if there was one. This stupid roundabout code is
                            # to ensure that there always is a leading 0 if the
                            # number is less than 10 to ensure that all of the
                            # numbers this hacky datetime parser outputs all
                            # have the same length so that the numbers can
                            # easily be compared with each other.
                            $month = sprintf("%u", $month);
                            push @filename_listing, [$filename,
                                "$year$num_month{$month}$day$fields[1]"];
                        # ...and some use numbers 8, 6, etc....
                        } else {
                            push @filename_listing, [$filename,
                                "$year$month{$month}$day$fields[1]"];
                        }
                    } else {
###BUGALERT### Add support for other http servers such as lighttpd, nginx,
#cherokee, starman?, AND use the Server: header to determine which algorithm to
#use.
                        die <<EOD;
App-Fetchware: run-time error. A hardcoded algorithm to parse HTML directory
listings has failed! Fetchware currently only supports parseing Apache HTML
directory listings. This is a huge limitation, but surprisingly pretty much
everyone who runs a mirror uses apache for http support. This is a bug so
please report it. Also, if you want to try a possible workaround, just use a ftp
mirror instead of a http one, because ftp directory listings are a easy to
parse. See perldoc App::Fetchware.
EOD
                    }
                }
            }
        );


        # Delete the $tree, so perl can garbage collect it.
        $tree = $tree->delete;

        return \@filename_listing;
    }


} # end bare block for %month.





sub file_parse_filelist {
    my $file_listing = shift;

    for my $file (@$file_listing) {
        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,
            $blksize,$blocks)
            = stat($file) or die <<EOD;
App-Fetchware: Fetchware failed to stat() the file [$file] while trying to parse
your local [file://] lookup_url. The OS error was [$!]. This should not happen,
and is either a bug in fetchware or some sort of race condition.
EOD

        # Replace scalar filename with a arrayref of the filename with its
        # assocated timestamp for later processing for lookup().
        # 
        # Use Path::Class's file() constructor & basename() method to strip out
        # all unneeded directory information leaving just the file's name.
        # Add all of the timestamp numbers together, so that only one numberical
        # sort is needed instead of a descending list of numerical sorts.
        $file = [file($file)->basename(), $mtime ];
    }

    return $file_listing;
}



sub  lookup_by_timestamp {
    my $file_listing = shift;
    
    # Sort the timstamps to determine the latest one. The one with the higher



( run in 0.497 second using v1.01-cache-2.11-cpan-e93a5daba3e )