App-Wax

 view release on metacpan or  search on metacpan

lib/App/Wax.pm  view on Meta::CPAN

        unlink($filename) || $self->log(WARN => "Can't unlink %s: %s", $filename, $!);
    }
}

# return the URL's content-type or an empty string if the request fails
method content_type ($_url) {
    my ($url, $url_index) = @$_url;
    my $response = $self->_lwp_user_agent->head($url);
    my $content_type = '';

    if ($response->is_success) {
        # the initial (pre-semicolon) part of the mime-type, trimmed and lowercased.
        $content_type = $response->headers->content_type;

        if ($content_type) {
            $self->debug('content-type (%d): %s', $url_index, $content_type);
        } else {
            $content_type = DEFAULT_CONTENT_TYPE;
            $self->debug('content-type (%d): %s (default)', $url_index, $content_type);
        }
    }

    return $content_type;
}

# save the URL to a local filename; returns an error message if an error occurred,
# or a falsey value otherwise
method download ($_url, $filename) {
    my ($url, $url_index) = @$_url;
    my $ua = $self->_lwp_user_agent;
    my ($downloaded, $error, $response);

    if ($self->cache && (-e $filename)) {
        $downloaded = 0;
    } elsif ($self->mirror) {
        $response = $ua->mirror($url, $filename);

        if ($response->is_success) {
            $downloaded = 1;
        } elsif ($response->code == 304) {
            $downloaded = 0;
        }
    } else {
        $response = $ua->get($url, ':content_file' => $filename);

        if ($response->is_success) {
            $downloaded = 1;
        }
    }

    if (defined $downloaded) {
        $self->debug('download (%d): %s', $url_index,  ($downloaded ? 'yes' : 'no'));
    } else {
        my $status = $response->status_line;
        $error = "can't download URL #$url_index ($url) to filename ($filename): $status";
    }

    return $error;
}

# helper for `dump_command`: escape/quote a shell argument on POSIX shells
fun _escape ($arg) {
    # https://stackoverflow.com/a/1250279
    # https://github.com/boazy/any-shell-escape/issues/1#issuecomment-36226734
    $arg =~ s!('{1,})!'"$1"'!g;
    $arg = "'$arg'";
    $arg =~ s{^''|''$}{}g;

    return $arg;
}

method _use_default_directory () {
    # "${XDG_CACHE_HOME:-$HOME/.cache}/wax"
    require File::BaseDir;
    $self->directory(File::BaseDir::cache_home($self->app_name));
}

# print the version and exit
method _dump_version () {
    print $VERSION, $/;
    exit 0;
}

# log a message to stderr with the app's name and message's log level
method log ($level, $template, @args) {
    my $name = $self->app_name;
    my $message = @args ? sprintf($template, @args) : $template;
    warn "$name: $level: $message", $/;
}

# return a best-effort guess at the URL's file extension based on its content
# type, e.g. ".md" or ".tar.gz", or an empty string if one can't be determined.
# XXX note: makes a network request to determine the content type
method extension ($_url) {
    my ($url, $url_index) = @$_url;
    my $extension = '';
    my $split = $self->is_url($url);

    return $extension unless ($split);

    my ($scheme, $domain, $path, $query, $fragment) = @$split;
    my $content_type = $self->content_type($_url);

    return $extension unless ($content_type); # won't be defined if the URL is invalid

    if (INFER_EXTENSION->{$content_type}) {
        # try to get a more specific extension from the path
        if (not(defined $query) && $path && ($path =~ EXTENSION)) {
            $extension = $+;
        }
    }

    unless ($extension) {
        my $mime_type = $self->mime_types->type($content_type);
        my @extensions = $mime_type->extensions;

        if (@extensions) {
            $extension = '.' . $extensions[0];
        }
    }

    $self->debug('extension (%d): %s', $url_index, $extension);

    return $extension;
}

# return a truthy value (an arrayref containing the URL's components)
# if the supplied value can be parsed as a URL, or a falsey value otherwise
method is_url ($url) {
    if ($url =~ m{^[a-zA-Z][\w+]*://}) { # basic sanity check
        my ($scheme, $domain, $path, $query, $fragment) = uri_split($url);

        if ($scheme && ($domain || $path)) { # no domain for file:// URLs
            return [$scheme, $domain, $path, $query, $fragment];
        }
    }
}

# log a message to stderr if logging is enabled
method debug ($template, @args) {
    if ($self->verbose) {
        my $name = $self->app_name;
        my $message = @args ? sprintf($template, @args) : $template;
        warn "$name: $message", $/;
    }
}

# perform housekeeping after a download: replace the placeholder with the file
# path; push the path onto the delete list if it's a temporary file; and log any
# errors
#
# XXX give this a more descriptive name, e.g. _handle_download or _after_download
method _handle ($resolved, $command, $unlink) {
    my ($command_index, $filename, $error) = @$resolved;

    $command->[$command_index] = $filename;

    unless ($self->keep) {
        push @$unlink, $filename;
    }

    if ($error) {
        $self->log(ERROR => $error);
        return E_DOWNLOAD;
    } else {
        return OK;
    }
}

# this is purely for diagnostic purposes, i.e. there's no guarantee
# that the dumped command can be used as a command line. a better
# (but still imperfect/incomplete) implementation would require at
# least two extra modules: Win32::ShellQuote and String::ShellQuote:
# https://rt.cpan.org/Public/Bug/Display.html?id=37348
method dump_command ($args) {
    return join(' ', map { /[^0-9A-Za-z+,.\/:=\@_-]/ ? _escape($_) : $_ } @$args);
}

# takes a URL and returns a $filename => $error pair where
# the filename is the path to the saved file and the error
# is the first error message encountered while trying to download
# and save it
method resolve ($_url) {
    my ($error, $filename, @resolved);

    if ($self->keep) {
        ($filename, $error) = $self->resolve_keep($_url);
    } else {
        ($filename, $error) = $self->resolve_temp($_url);
    }

    $error ||= $self->download($_url, $filename);
    @resolved = ($filename, $error);

    return wantarray ? @resolved : \@resolved;
}

# takes a URL and returns a $filename => $error pair for cacheable files.
# in order to calculate the filename, we need to determine the URL's extension,
# which requires a network request for the content type. to avoid hitting the
# network for subsequent requests, we cache the extension in an index file.
method resolve_keep ($_url) {
    my ($url, $url_index) = @$_url;
    my $directory = $self->has_directory ? $self->directory : File::Spec->tmpdir;
    my $id = sprintf('%s_%s', $self->app_name, sha1_hex($url));
    my $index_file = File::Spec->catfile($directory, sprintf(INDEX, $id));
    my ($error, $extension);

    # -s: if /tmp is full, the index file may get written as an empty file, so
    # make sure it's non-empty
    if (-s $index_file) {
        $self->debug('index (%d): %s (exists)', $url_index, $index_file);

        try {
            $extension = read_text($index_file);
        } catch {
            $error = "unable to load index #$url_index ($index_file): $_";
        };
    } else {
        $self->debug('index (%d): %s (create)', $url_index, $index_file);
        $extension = $self->extension($_url);

        try {
            write_text($index_file, $extension);
        } catch {
            $error = "unable to save index #$url_index ($index_file): $_";
        };
    }

    my $filename = File::Spec->catfile($directory, "$id$extension");

    return ($filename, $error);
}

# takes a URL and returns a $filename => $error pair for
# temporary files (i.e. files which will be automatically unlinked)



( run in 1.990 second using v1.01-cache-2.11-cpan-5735350b133 )