App-freqtable

 view release on metacpan or  search on metacpan

script/freqtable  view on Meta::CPAN

#!perl

use strict;
use warnings;

use Getopt::Long qw(:config bundling no_ignore_case);
use POSIX qw(ceil);

our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
our $DATE = '2025-08-03'; # DATE
our $DIST = 'App-freqtable'; # DIST
our $VERSION = '0.010'; # VERSION

my $tmp_rank;
my %Opts = (
    mode => 'line',
    ignore_case => 0,
    print_total => 0,
    print_freq => 1,
    # XXX options to limit memory usage, e.g. max keys, max line length, --md5 (like in nauniq), ...
    min_rank => undef,
    max_rank => undef,
    sort_args => {},
    sort_sub => undef,
    percent => 0,
    clear_before_output => 0,
    output_every => undef,
    format => undef,
);
my $Numeric;
my %Occurences;

sub parse_cmdline {
    my $res = GetOptions(
        'bytes|c'   => sub { $Opts{mode} = 'byte' },
        'chars|m'   => sub { $Opts{mode} = 'char' },
        'words|w'   => sub { $Opts{mode} = 'word' },
        'lines|l'   => sub { $Opts{mode} = 'line' },
        'number|n'  => sub { $Opts{mode} = 'number' },
        'integer|i' => sub { $Opts{mode} = 'integer' },
        'ignore-case|f' => \$Opts{ignore_case},
        'no-print-freq|F' => sub { $Opts{print_freq} = 0 },
        'print-freq'      => sub { $Opts{print_freq} = 1 },
        'no-print-total|T' => sub { $Opts{print_total} = 0 },
        'print-total|t'    => sub { $Opts{print_total} = 1 },
        'rank|r=s' => \$tmp_rank,
        'sort-sub=s' => \$Opts{sort_sub},
        'sort-arg=s%' => $Opts{sort_args},
        'a'   => sub { $Opts{sort_sub} = 'asciibetically' },
        'percent|p' => sub { $Opts{percent}++ },
        'clear-before-output' => sub { $Opts{clear_before_output} = 1 },
        'output-every=i' => \$Opts{output_every},
        'format=s' =>\$Opts{format},
        'help|h'  => sub {
            print <<USAGE;
Usage:
  freqtable [OPTIONS]... < INPUT
  freqtable --help (or -h)
Options:
  --bytes, -c
  --chars, -m
  --words, -w
  --lines, -l
  --number, -n
  --integer, -i
  --ignore-case, -f
  --print-freq
  --no-print-freq, -F
  --print-total, -t
  --no-print-total, -T
  --percent, -p
  --format=FMT
  --rank N|M-N|M-|-N, -r
  --sort-sub=SPEC
  --sort-arg=ARG=VAL
  -a
  --output-every=i
  --clear-before-output
For more details, see the manpage/documentation.
USAGE
            exit 0;
        },
    );

    if (defined $tmp_rank) {
        if ($tmp_rank =~ /\A\d+\z/) {
            $Opts{min_rank} = $Opts{max_rank} = $tmp_rank;
        } elsif ($tmp_rank =~ /\A-(\d+)\z/) {
            $Opts{max_rank} = $1;
        } elsif ($tmp_rank =~ /\A(\d+)-\z/) {
            $Opts{min_rank} = $1;
        } elsif ($tmp_rank =~ /\A(\d+)-(\d+)\z/) {
            $Opts{min_rank} = $1;
            $Opts{max_rank} = $2;
        } else {
            warn "freqtable: Invalid value for --rank: '$tmp_rank', ".
                "please specify N|M-N|M-|-N\n";
            $res = 0;
        }
    }

    exit 99 if !$res;
}

sub _print_freqline {
    my ($n, $k, $totoccurrences) = @_;

    if (defined $Opts{format}) {
        my $pct = $totoccurrences == 0 ? 0 : $n/$totoccurrences*100;
        {
            no warnings; # XXX only disable warning 'redundant argument in printf'
            printf $Opts{format}, $n, $k, $pct;
            print "\n";
        }
    } else {
        if ($Opts{print_freq}) {
            unless ($Opts{percent} && $Opts{percent} < 2) {

script/freqtable  view on Meta::CPAN

                $i++;
                if ($Opts{ignore_case}) {
                    $Occurences{lc $1}++;
                } else {
                    $Occurences{$1}++;
                }
                _display_table() if $Opts{output_every} && $i % $Opts{output_every} == 0;
            }
        }
    } elsif ($Opts{mode} eq 'line') {
        while (defined(my $line = <>)) {
            $i++;
            chomp $line;
            if ($Opts{ignore_case}) {
                $Occurences{lc $line}++;
            } else {
                $Occurences{$line}++;
            }
            _display_table() if $Opts{output_every} && $i % $Opts{output_every} == 0;
        }
    } elsif ($Opts{mode} eq 'number' || $Opts{mode} eq 'integer') {
        $Numeric++;
        while (defined(my $line = <>)) {
            $i++;
            my $num = $Opts{mode} eq 'integer' ? int($line) : $line + 0;
            $Occurences{$num}++;
            _display_table() if $Opts{output_every} && $i % $Opts{output_every} == 0;
        }
    } else {
        die "freqtable: BUG: Unknown mode '$Opts{mode}'";
    }

    _display_table();
}

# MAIN

parse_cmdline();
run();

1;
# ABSTRACT: Print frequency table of lines/words/characters/bytes/numbers
# PODNAME: freqtable

__END__

=pod

=encoding UTF-8

=head1 NAME

freqtable - Print frequency table of lines/words/characters/bytes/numbers

=head1 VERSION

This document describes version 0.010 of freqtable (from Perl distribution App-freqtable), released on 2025-08-03.

=head1 SYNOPSIS

 % freqtable [OPTIONS] < INPUT

Sample input:

 % cat input-lines.txt
 one
 one
 two
 three
 four
 five
 five
 five
 six
 seven
 eight
 eight
 nine

 % cat input-words.txt
 one one two three four five five five six seven eight eight nine

 % cat input-nums.txt
 9.99 cents
 9.99 dollars
 9 cents

=head2 Modes

Display frequency table (by default: lines):

 % freqtable input-lines.txt
 3       five
 2       eight
 2       one
 1       four
 1       nine
 1       seven
 1       six
 1       three
 1       two

Display frequency table (words):

 % freqtable -w input-words.txt
 3       five
 2       eight
 2       one
 1       four
 1       nine
 1       seven
 1       six
 1       three
 1       two

Display frequency table (characters):

 % freqtable -c input-words.txt
 12
 12      e
  7      i



( run in 1.727 second using v1.01-cache-2.11-cpan-13bb782fe5a )