App-grepurl
view release on metacpan or search on metacpan
lib/App/grepurl.pm view on Meta::CPAN
=over 4
=item Operate over an entire directory or website
=back
=head1 SEE ALSO
urifind by darren chamberlain E<lt>darren@cpan.orgE<gt>
=head1 SOURCE AVAILABILITY
This source is in Github:
https://github.com/briandfoy/app-grepurl
=head1 AUTHOR
brian d foy, C<< <briandfoy@pobox.com> >>
=head1 COPYRIGHT
Copyright © 2004-2025, brian d foy <briandfoy@pobox.com>. All rights reserved.
You may use this program under the terms of the Artistic License 2.0.
=cut
use File::Basename;
use FindBin;
use Getopt::Std;
use Mojo::DOM;
use Mojo::URL;
use Mojo::UserAgent;
use Mojo::Util qw(dumper);
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
our $VERSION = '1.013';
run(@ARGV) unless caller;
sub new {
my $self = bless {}, $_[0];
$self->init;
$self;
}
sub init {}
sub debug { warn join "\n", @_, '' }
sub run {
my( $class, @args ) = @_;
unless( @args ) {
print "$FindBin::Script $VERSION\n";
exit;
}
my %opts;
{
local @ARGV = @args;
getopts( 'bdv1' . 'aAiIjJ' . 'e:E:f:h:H:p:P:s:S:t:u:', \%opts );
}
# print STDERR Dumper( \%opts ); use Data::Dumper;
# print STDERR "Processed opts\n";
my $obj = $class->new();
$obj->{opts} = \%opts;
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
$obj->{Debug} = $opts{d} || $ENV{GREPURL_DEBUG} || 0;
{ no warnings 'redefine'; *debug = sub { 0 } unless $obj->{Debug} }
$obj->{Verbose} = $opts{v} || $ENV{GREPURL_VERBOSE} || 0;
$obj->{Either} = $obj->{Debug} || $obj->{Verbose} || 0;
$obj->{Hosts} = uncommify( $opts{h} );
$obj->{No_hosts} = uncommify( $opts{H} );
$obj->{Schemes} = uncommify( $opts{'s'} );
$obj->{No_schemes} = uncommify( $opts{S} );
$obj->{Extensions} = uncommify( $opts{e} );
$obj->{No_extensions} = uncommify( $opts{E} );
$obj->{Path} = regex( $opts{p} );
$obj->{No_path} = regex( $opts{P} );
$obj->{Regex} = regex( $opts{r} );
$obj->{No_regex} = regex( $opts{R} );
$obj->debug_summary if $obj->{Debug};
debug( "Moving on\n" );
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
my $text = $obj->get_text;
die "There is no text!\n" unless( defined $text && length $text > 0 );
my $urls = $obj->extract_from_html( $text );
debug( "Got URLs:\n" . dumper($urls) );
@$urls = do {
if( defined $opts{b} ) {
my $base = Mojo::URL->new( $opts{b} );
debug( "Base url is $base\n" );
map { Mojo::URL->new( $_ )->base( $base )->to_abs } @$urls;
}
else {
map { Mojo::URL->new( $_ ) } @$urls;
}
};
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Filters
#
# To select things, only pass through those elements
#
# To not select things, pass through anything that does not match
@$urls = map {
my $s = eval { $_->scheme };
( run in 1.036 second using v1.01-cache-2.11-cpan-97f6503c9c8 )