App-Fetchware
view release on metacpan or search on metacpan
lib/App/FetchwareX/HTMLPageSync.pm view on Meta::CPAN
msg <<EOM;
Uninstalling this HTMLPageSync package by deleting your destination directory.
EOM
###BUGALERT### Before release go though all of Fetchware's API, and subifiy
#each main component like lookup and download were, the later ones were not
#done this way. That way I can put say chdir_to_build_path() here instead of
#basicaly copying and pasting the code like I do below. Also
#chdir_to_build_path() can be put in :OVERRIDE_UNINSTALL!!! Which I can use
#here.
chdir $build_path or die <<EOD;
App-FetchwareX-HTMLPageSync: Failed to uninstall the specified package and specifically to change
working directory to [$build_path] before running make uninstall or the
uninstall_commands provided in the package's Fetchwarefile. Os error [$!].
EOD
if ( defined config('destination_directory')) {
# Use File::Path's remove_tree() to delete the destination_directory
# thereby "uninstalling" this package. Will throw an exception that I'll
# let the main eval in bin/fetchware catch, print, and exit 1.
vmsg <<EOM;
Deleting entire destination directory [@{[config('destination_directory')]}].
EOM
remove_tree(config('destination_directory'));
} else {
die <<EOD;
App-FetchwareX-HTMLPageSync: Failed to uninstall the specified App::FetchwareX::HTMLPageSync
package, because no destination_directory is specified in its Fetchwarefile.
This configuration option is required and must be specified.
EOD
}
# keep_destination_directory was set, so don't delete destination directory.
} else {
msg <<EOM;
Uninstalling this HTMLPageSync package but keeping your destination directory.
EOM
}
return 'True for success.';
}
sub upgrade {
my $download_path = shift; # $fetchware_package_path is not used in HTMLPageSync.
# Get the listing of already downloaded file names.
my @installed_downloads = glob(config('destination_directory'));
# Preprocess both @$download_path and @installed_downloads to ensure that
# URL crap or differing full paths won't screw up the "comparisons". The
# clever delete hashslice does the "comparisons" if you will.
my @download_path_filenames = map { ( uri_split($_) )[2] } @$download_path;
my @installed_downloads_filenames = map { ( splitpath($_) ) [2] }
@installed_downloads;
# Determine what files are in @$download_path, but not in
# @installed_downloads.
# Algo based on code from Perl Cookbook pg. 126.
my %seen;
@seen{@$download_path} = ();
delete @seen{@installed_downloads};
my @new_urls_to_download = keys %seen;
if (@new_urls_to_download > 0) {
# Alter $download_path to only list @new_urls_to_download. That way
# download() only downloads the new URLs not the already downloaded ones
# again.
$download_path = [@new_urls_to_download];
return 'New URLs Found.';
} else {
return;
}
}
1;
=pod
=head1 NAME
App::FetchwareX::HTMLPageSync - An App::Fetchware extension that downloads files based on an HTML page.
=head1 VERSION
version 1.016
=head1 SYNOPSIS
=head2 Example App::FetchwareX::HTMLPageSync Fetchwarefile.
page_name 'Cool Wallpapers';
html_page_url 'http://some-html-page-with-cool.urls';
destination_directory 'wallpapers';
# pretend to be firefox
user_agent 'Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1';
# Customize the callbacks.
html_treebuilder_callback sub {
# Get one HTML::Element.
my $h = shift;
# Return true or false to indicate if this HTML::Element shoudd be a
# download link.
if (something) {
return 'True';
} else {
return undef;
}
};
download_links_callback sub {
my @download_urls = @_;
( run in 3.185 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )