App-freqtable
view release on metacpan or search on metacpan
script/freqtable view on Meta::CPAN
#!perl
use strict;
use warnings;
use Getopt::Long qw(:config bundling no_ignore_case);
use POSIX qw(ceil);
our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
our $DATE = '2025-08-03'; # DATE
our $DIST = 'App-freqtable'; # DIST
our $VERSION = '0.010'; # VERSION
my $tmp_rank;
my %Opts = (
mode => 'line',
ignore_case => 0,
print_total => 0,
print_freq => 1,
# XXX options to limit memory usage, e.g. max keys, max line length, --md5 (like in nauniq), ...
min_rank => undef,
max_rank => undef,
sort_args => {},
sort_sub => undef,
percent => 0,
clear_before_output => 0,
output_every => undef,
format => undef,
);
my $Numeric;
my %Occurences;
sub parse_cmdline {
my $res = GetOptions(
'bytes|c' => sub { $Opts{mode} = 'byte' },
'chars|m' => sub { $Opts{mode} = 'char' },
'words|w' => sub { $Opts{mode} = 'word' },
'lines|l' => sub { $Opts{mode} = 'line' },
'number|n' => sub { $Opts{mode} = 'number' },
'integer|i' => sub { $Opts{mode} = 'integer' },
'ignore-case|f' => \$Opts{ignore_case},
'no-print-freq|F' => sub { $Opts{print_freq} = 0 },
'print-freq' => sub { $Opts{print_freq} = 1 },
'no-print-total|T' => sub { $Opts{print_total} = 0 },
'print-total|t' => sub { $Opts{print_total} = 1 },
'rank|r=s' => \$tmp_rank,
'sort-sub=s' => \$Opts{sort_sub},
'sort-arg=s%' => $Opts{sort_args},
'a' => sub { $Opts{sort_sub} = 'asciibetically' },
'percent|p' => sub { $Opts{percent}++ },
'clear-before-output' => sub { $Opts{clear_before_output} = 1 },
'output-every=i' => \$Opts{output_every},
'format=s' =>\$Opts{format},
'help|h' => sub {
print <<USAGE;
Usage:
freqtable [OPTIONS]... < INPUT
freqtable --help (or -h)
Options:
--bytes, -c
--chars, -m
--words, -w
--lines, -l
--number, -n
--integer, -i
--ignore-case, -f
--print-freq
--no-print-freq, -F
--print-total, -t
--no-print-total, -T
--percent, -p
--format=FMT
--rank N|M-N|M-|-N, -r
--sort-sub=SPEC
--sort-arg=ARG=VAL
-a
--output-every=i
--clear-before-output
For more details, see the manpage/documentation.
USAGE
exit 0;
},
);
if (defined $tmp_rank) {
if ($tmp_rank =~ /\A\d+\z/) {
$Opts{min_rank} = $Opts{max_rank} = $tmp_rank;
} elsif ($tmp_rank =~ /\A-(\d+)\z/) {
$Opts{max_rank} = $1;
} elsif ($tmp_rank =~ /\A(\d+)-\z/) {
$Opts{min_rank} = $1;
} elsif ($tmp_rank =~ /\A(\d+)-(\d+)\z/) {
$Opts{min_rank} = $1;
$Opts{max_rank} = $2;
} else {
warn "freqtable: Invalid value for --rank: '$tmp_rank', ".
"please specify N|M-N|M-|-N\n";
$res = 0;
}
}
exit 99 if !$res;
}
sub _print_freqline {
my ($n, $k, $totoccurrences) = @_;
if (defined $Opts{format}) {
my $pct = $totoccurrences == 0 ? 0 : $n/$totoccurrences*100;
{
no warnings; # XXX only disable warning 'redundant argument in printf'
printf $Opts{format}, $n, $k, $pct;
print "\n";
}
} else {
if ($Opts{print_freq}) {
unless ($Opts{percent} && $Opts{percent} < 2) {
script/freqtable view on Meta::CPAN
$i++;
if ($Opts{ignore_case}) {
$Occurences{lc $1}++;
} else {
$Occurences{$1}++;
}
_display_table() if $Opts{output_every} && $i % $Opts{output_every} == 0;
}
}
} elsif ($Opts{mode} eq 'line') {
while (defined(my $line = <>)) {
$i++;
chomp $line;
if ($Opts{ignore_case}) {
$Occurences{lc $line}++;
} else {
$Occurences{$line}++;
}
_display_table() if $Opts{output_every} && $i % $Opts{output_every} == 0;
}
} elsif ($Opts{mode} eq 'number' || $Opts{mode} eq 'integer') {
$Numeric++;
while (defined(my $line = <>)) {
$i++;
my $num = $Opts{mode} eq 'integer' ? int($line) : $line + 0;
$Occurences{$num}++;
_display_table() if $Opts{output_every} && $i % $Opts{output_every} == 0;
}
} else {
die "freqtable: BUG: Unknown mode '$Opts{mode}'";
}
_display_table();
}
# MAIN
parse_cmdline();
run();
1;
# ABSTRACT: Print frequency table of lines/words/characters/bytes/numbers
# PODNAME: freqtable
__END__
=pod
=encoding UTF-8
=head1 NAME
freqtable - Print frequency table of lines/words/characters/bytes/numbers
=head1 VERSION
This document describes version 0.010 of freqtable (from Perl distribution App-freqtable), released on 2025-08-03.
=head1 SYNOPSIS
% freqtable [OPTIONS] < INPUT
Sample input:
% cat input-lines.txt
one
one
two
three
four
five
five
five
six
seven
eight
eight
nine
% cat input-words.txt
one one two three four five five five six seven eight eight nine
% cat input-nums.txt
9.99 cents
9.99 dollars
9 cents
=head2 Modes
Display frequency table (by default: lines):
% freqtable input-lines.txt
3 five
2 eight
2 one
1 four
1 nine
1 seven
1 six
1 three
1 two
Display frequency table (words):
% freqtable -w input-words.txt
3 five
2 eight
2 one
1 four
1 nine
1 seven
1 six
1 three
1 two
Display frequency table (characters):
% freqtable -c input-words.txt
12
12 e
7 i
( run in 1.727 second using v1.01-cache-2.11-cpan-13bb782fe5a )