App-grepurl

 view release on metacpan or  search on metacpan

lib/App/grepurl.pm  view on Meta::CPAN

=over 4

=item Operate over an entire directory or website

=back

=head1 SEE ALSO

urifind by darren chamberlain E<lt>darren@cpan.orgE<gt>

=head1 SOURCE AVAILABILITY

This source is in Github:

	https://github.com/briandfoy/app-grepurl

=head1 AUTHOR

brian d foy, C<< <briandfoy@pobox.com> >>

=head1 COPYRIGHT

Copyright © 2004-2025, brian d foy <briandfoy@pobox.com>. All rights reserved.

You may use this program under the terms of the Artistic License 2.0.

=cut

use File::Basename;
use FindBin;
use Getopt::Std;
use Mojo::DOM;
use Mojo::URL;
use Mojo::UserAgent;
use Mojo::Util qw(dumper);

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
our $VERSION = '1.013';

run(@ARGV) unless caller;

sub new {
	my $self = bless {}, $_[0];
	$self->init;
	$self;
	}

sub init {}

sub debug { warn join "\n", @_, '' }

sub run {
	my( $class, @args ) = @_;
	unless( @args ) {
		print "$FindBin::Script $VERSION\n";
		exit;
		}

	my %opts;
	{
	local @ARGV = @args;
	getopts( 'bdv1' . 'aAiIjJ' . 'e:E:f:h:H:p:P:s:S:t:u:', \%opts );
	}
#	print STDERR Dumper( \%opts ); use Data::Dumper;
#	print STDERR "Processed opts\n";

	my $obj = $class->new();
	$obj->{opts} = \%opts;

	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
	$obj->{Debug}         = $opts{d} || $ENV{GREPURL_DEBUG} || 0;
	{ no warnings 'redefine'; *debug = sub { 0 } unless $obj->{Debug} }

	$obj->{Verbose}       = $opts{v} || $ENV{GREPURL_VERBOSE} || 0;
	$obj->{Either}        = $obj->{Debug} || $obj->{Verbose} || 0;

	$obj->{Hosts}         = uncommify( $opts{h} );
	$obj->{No_hosts}      = uncommify( $opts{H} );

	$obj->{Schemes}       = uncommify( $opts{'s'} );
	$obj->{No_schemes}    = uncommify( $opts{S} );

	$obj->{Extensions}    = uncommify( $opts{e} );
	$obj->{No_extensions} = uncommify( $opts{E} );

	$obj->{Path}          = regex( $opts{p} );
	$obj->{No_path}       = regex( $opts{P} );

	$obj->{Regex}         = regex( $opts{r} );
	$obj->{No_regex}      = regex( $opts{R} );

	$obj->debug_summary if $obj->{Debug};

	debug( "Moving on\n" );

	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
	my $text = $obj->get_text;

	die "There is no text!\n" unless( defined $text && length $text > 0 );
	my $urls = $obj->extract_from_html( $text );
	debug( "Got URLs:\n" . dumper($urls) );

	@$urls = do {
		if( defined $opts{b} ) {
			my $base = Mojo::URL->new( $opts{b} );
			debug( "Base url is $base\n" );
			map { Mojo::URL->new( $_ )->base( $base )->to_abs } @$urls;
			}
		else {
			map { Mojo::URL->new( $_ ) } @$urls;
			}
		};

	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
	# Filters
	#
	# To select things, only pass through those elements
	#
	# To not select things, pass through anything that does not match
	@$urls = map {
		my $s = eval { $_->scheme };



( run in 1.036 second using v1.01-cache-2.11-cpan-97f6503c9c8 )