App-Licensecheck

 view release on metacpan or  search on metacpan

bin/licensecheck  view on Meta::CPAN


my $progname = path($0)->basename;

our %OPT = ();
my @OPT = ();

=head1 OPTIONS

=head2 Resolving patterns

=over 16

=item B<--shortname-scheme>

I<Since v3.2.>

comma-separated priority list of license naming schemes
to use for license identifiers
S<(default value: unset (use verbose description))>

=item B<--list-licenses>

I<Since v3.2.>

list identifiers for all detectable licenses and exit

=item B<--list-naming-schemes>

I<Since v3.2.>

list all available license naming schemes and exit

=back

=cut

push @OPT, qw(
	shortname-scheme=s
	list-licenses
	list-naming-schemes
);

=head2 Selecting files

=over 16

=item B<-c> I<REGEX>, B<--check>=I<REGEX>

I<Since v2.10.10.>

regular expression of files to include
when more than one F<PATH> is provided
S<(default value: common source files)>

=item B<-i> I<REGEX>, B<--ignore>=I<REGEX>

I<Since v2.10.10.>

regular expression of files to skip
when more than one F<PATH> is provided
S<(default value: some backup and VCS files)>

=item B<-r>, B<--recursive>

I<Since v2.10.7.>

traverse directories recursively

=back

=cut

push @OPT, qw(
	check|c=s
	ignore|i=s
	recursive|r
);
$OPT{check}  = 'common source files';
$OPT{ignore} = 'some backup and VCS files';

=head2 Parsing contents

=over 16

=item B<-l> I<N>, B<--lines>=I<N>

I<Since v2.10.3.>

number of lines to parse from top of each file;
implies optimistic search
including only first cluster of detected copyrights or licenses;
set to I<0> to parse the whole file
(and ignore B<--tail>)
S<(default value: I<60>)>

=item B<--tail>=I<N>

I<Since v2.15.10.>

number of bytes to parse from bottom of each file
when parsing only from top of each file and finding nothing there;
set to 0 to avoid parsing from end of file
(or set B<--lines> to I<0> and ignore this setting)
S<(default value: 5000 (roughly 60 lines))>

=item B<-e> I<CODEC>, B<--encoding>=I<CODEC>

I<Since v2.15.10.>

try decode source files from the specified codec,
with C<iso-8859-1> as fallback
S<(default value: unset (no decoding))>

=back

=cut

push @OPT, qw(
	lines|l=i
	tail=i
	encoding|e=s
);
$OPT{lines} = 60;
$OPT{tail}  = 5000;

=head2 Reporting results

=over 16

=item B<--copyright>

I<Since v2.10.7.>

add copyright statements to license information

=item B<-s>, B<--skipped>

I<Since v3.3.0.>

bin/licensecheck  view on Meta::CPAN

}
if ( $OPT{noconf} ) {
	$log->warn('option --no-conf ingored: obsolete');    # since 2016
}
if ( $OPT{noverbose} ) {
	$log->warn('option --no-verbose ignored: obsolete');    # since 2021
}

pod2usage("$progname: No paths provided.")
	unless @ARGV;

my $app = App::Licensecheck->new(

	# parse
	top_lines => $OPT{lines},
	end_bytes => $OPT{tail},
	encoding  => $OPT{encoding},

	# report
	naming => $naming,
);

my $default_check_regex = q!
	/[\w-]+$ # executable scripts or README like file
	|\.( # search for file suffix
		c(c|pp|xx)? # c and c++
		|h(h|pp|xx)? # header files for c and c++
		|S
		|css|less # HTML css and similar
		|f(77|90)?
		|go
		|groovy
		|lisp
		|scala
		|clj
		|p(l|m)?6?|t|xs|pod6? # perl5 or perl6
		|sh
		|php
		|py(|x)
		|rb
		|java
		|js
		|vala
		|el
		|sc(i|e)
		|cs
		|pas
		|inc
		|dtd|xsl
		|mod
		|m
		|md|markdown
		|tex
		|mli?
		|(c|l)?hs
	)$
!;

# From dpkg-source
my $default_ignore_regex = q!
	# Ignore general backup files
	~$|
	# Ignore emacs recovery files
	(?:^|/)\.#|
	# Ignore vi swap files
	(?:^|/)\..*\.swp$|
	# Ignore baz-style junk files or directories
	(?:^|/),,.*(?:$|/.*$)|
	# File-names that should be ignored (never directories)
	(?:^|/)(?:DEADJOE|\.cvsignore|\.arch-inventory|\.bzrignore|\.gitignore)$|
	# File or directory names that should be ignored
	(?:^|/)(?:CVS|RCS|\.pc|\.deps|\{arch\}|\.arch-ids|\.svn|\.hg|_darcs|\.git|
	\.shelf|_MTN|\.bzr(?:\.backup|tags)?)(?:$|/.*$)
!;

my $check_regex = $OPT{check};
if ( !$check_regex or $check_regex eq 'common source files' ) {
	$check_regex = qr/$default_check_regex/x;
}
else {
	$check_regex = qr/$check_regex/;
}

my $ignore_regex = $OPT{ignore};
if ( !$ignore_regex or $ignore_regex eq 'some backup and VCS files' ) {
	$ignore_regex = qr/$default_ignore_regex/x;
}
else {
	$ignore_regex = qr/$ignore_regex/;
}

my %patternfiles;
my %patternownerlines;
my %patternlicense;

my @paths = @ARGV;

my $do      = Path::Iterator::Rule->new;
my %options = (
	follow_symlinks => 0,
);

$do->max_depth(1)
	unless $OPT{recursive};
$do->not( sub {/$ignore_regex/} );
$do->file->nonempty;

if ( @paths >> 1 ) {
	if ( $log->is_debug or $OPT{skipped} && $log->is_warn ) {
		my $dont = $do->clone->not( sub {/$check_regex/} );
		foreach ( $dont->all( @paths, \%options ) ) {
			if ( $OPT{skipped} ) {
				$log->warnf( 'skipped file %s', $_ );
			}
			else {
				$log->debugf( 'skipped file %s', $_ );
			}
		}
	}
	$do->and( sub {/$check_regex/} );
}

foreach my $file ( $do->all( @paths, \%options ) ) {
	my ( $license, $copyright ) = $app->parse($file);

	# drop duplicates
	my @copyrights = uniqstr sort { $b cmp $a } split /^/, $copyright;
	chomp @copyrights;

	if ( $OPT{'deb-machine'} ) {
		my @ownerlines_clean        = ();
		my %owneryears              = ();
		my $owneryears_seem_correct = 1;
		for my $ownerline (@copyrights) {
			my ( $owneryear, $owner )
				= $ownerline =~ /^(\d{4}(?:(?:-|, )\d{4})*)? ?(\S.*)?/;
			$owneryears_seem_correct = 0 unless ($owneryear);
			$owner =~ s/,?\s+All Rights Reserved\.?//gi if ($owner);
			push @ownerlines_clean,
				join unbackslash( $OPT{'copyright-delimiter'}, ),
				$owneryear || (), $owner || ();
			push @{ $owneryears{ $owner || '' } }, $owneryear;
		}
		my @owners = sort keys %owneryears;
		@owners = ()



( run in 2.487 seconds using v1.01-cache-2.11-cpan-2398b32b56e )