PUT results from the CPAN

App-nauniq
view release on metacpan or search on metacpan
#!perl

use 5.010001;
use strict;
use warnings;

use App::nauniq;
use Getopt::Long;

our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
our $DATE = '2025-09-24'; # DATE
our $DIST = 'App-nauniq'; # DIST
our $VERSION = '0.112'; # VERSION

my %Opts = (
    append         => 0,
    check_chars    => -1,
    forget_pattern => undef,
    ignore_case    => 0,
    md5            => 0,
    num_entries    => -1,
    read_output    => 0,
    show_unique    => 1,
    show_repeated  => 0,
    skip_chars     => 0,
);

sub parse_cmdline {
    Getopt::Long::Configure("gnu_getopt", "no_ignore_case");
    my $res = GetOptions(
        'repeated|d'       =>
            sub { $Opts{show_unique} = 0; $Opts{show_repeated} = 1 },
        'ignore-case|i'    => \$Opts{ignore_case},
        'num-entries=i'    => \$Opts{num_entries},
        'skip-chars|s=i'   => \$Opts{skip_chars},
        'unique|u'         =>
            sub { $Opts{show_unique} = 1; $Opts{show_repeated} = 0 },
        'check-chars|w=i'  => \$Opts{check_chars},
        'a'                => sub {
            $Opts{append} = 1; $Opts{read_output} = 1;
        },
        'append'           => \$Opts{append},
        'forget-pattern=s' => sub {
            my ($cbobj, $val) = @_;
            eval { $val = $Opts{ignore_case} ? qr/$val/i : qr/$val/ };
            if ($@) {
                warn "Invalid regex pattern in --forget-pattern: $@\n"; exit 99;
            }
            $Opts{forget_pattern} = $val;
        },
        'md5'              => \$Opts{md5},
        'read-output'      => \$Opts{read_output},
        'help|h'           => sub {
            print <<USAGE;
Usage:
  nauniq [OPTIONS]... [INPUT [OUTPUT]]
  nauniq --help
Options:
  --repeated, -d
  --ignore-case, -i
  --num-entries=N, -n
  --skip-chars=N, -s
  --unique, -u
  --check-chars=N, -w
  --append
  --read-output
  -a
  --md5
  --forget-pattern=S
For more details, see the manpage/documentation.
USAGE
            exit 0;
        },
        'version|v' => sub {
            print "nauniq version ", ($main::VERSION // 'dev'), "\n";
            exit 0;
        },
    );
    exit 99 if !$res;
}

# MAIN

parse_cmdline();
App::nauniq::run(%Opts);

1;
# ABSTRACT: Non-adjacent uniq
# PODNAME: nauniq

__END__

=pod

=encoding UTF-8

=head1 NAME

nauniq - Non-adjacent uniq

=head1 VERSION

This document describes version 0.112 of nauniq (from Perl distribution App-nauniq), released on 2025-09-24.

=head1 SYNOPSIS

 nauniq [OPTION]... [INPUT [OUTPUT]]

=head1 DESCRIPTION

C<nauniq> is similar to the Unix command C<uniq> but detects repeated lines even
if they are not adjacent. To do this, C<nauniq> must remember the lines being
fed to it. It's basically a glorified form of something like these:

 % awk '!mem[$0]++' INPUT
 % perl -ne'print unless $mem{$_}++' INPUT

There are options to control memory usage: option to only remember a certain
number of unique lines, option to remember a certain number of characters for
each line, and option to only remember the MD5 hash (instead of the content) of
each line. There are also other options like append, forget, and so on.

=head1 EXIT CODES

0 on success.

255 on I/O error.

99 on command-line options error.

=head1 OPTIONS

=over

=item * --repeated, -d

Print only duplicate lines. The opposite of C<--unique>.

=item * --ignore-case, -i

Ignore case.

=item * --num-entries=N

Number of unique entries to remember. The default is -1 (unlimited). This option
is to control memory usage, but the consequence is that lines that are too far
apart will be forgotten.

=item * --skip-chars=N, -s

Number of characters from the beginning of line to skip when checking
uniqueness.

=item * --unique, -u

Print only unique lines. This is the default. The opposite of C<--repeated>.

=item * --check-chars=N, -w

The amount of characters to check for uniqueness. The default is -1 (check all
characters in a line).

=item * --append

Open output file in append mode. See also C<-a>.

=item * -a

Equivalent to C<--append --read-output>.

=item * --forget-pattern=S

This is an alternative to C<--num-entries>. Instead of instructing C<nauniq> to
remember only a fixed number of entries, you can specify a regex pattern to
trigger the forgetting the lines. An example use-case of this is when you have a
file like this:
( run in 0.610 second using v1.01-cache-2.11-cpan-140bd7fdf52 )