App-CSVUtils

 view release on metacpan or  search on metacpan

lib/App/CSVUtils/csv_pick_rows.pm  view on Meta::CPAN


our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
our $DATE = '2025-02-04'; # DATE
our $DIST = 'App-CSVUtils'; # DIST
our $VERSION = '1.036'; # VERSION

use App::CSVUtils qw(
                        gen_csv_util
                );

gen_csv_util(
    name => 'csv_pick_rows',
    summary => 'Return one or more random rows from CSV',
    description => <<'_',


_
    add_args => {
        num_rows => {
            summary => 'Number of rows to pick',
            schema => 'posint*',
            default => 1,
            cmdline_aliases => {n=>{}},
        },
    },
    links => [
        {url=>'prog:csv-pick-fields'},
        {url=>'prog:csv-pick-cells'},
    ],
    tags => ['category:extracting', 'random'],

    examples => [
        {
            summary => 'Pick a random row from CSV',
            argv => ['file.csv'],
            test => 0,
            'x.doc.show_result' => 0,
        },
        {
            summary => 'Pick 5 random rows from CSV',
            argv => ['file.csv', '-n5'],
            test => 0,
            'x.doc.show_result' => 0,
        },
    ],

    on_input_header_row => sub {
        my $r = shift;

        # we add this key to the stash
        $r->{picked_rows} = [];

        # because input_* will be cleared by the time of after_read_input,
        # we save and set it now
        $r->{output_fields} = $r->{input_fields};
    },

    on_input_data_row => sub {
        my $r = shift;

        #say "D:input_data_rownum=$r->{input_data_rownum}";
        if ($r->{util_args}{num_rows} == 1) {
            # algorithm from Learning Perl
            $r->{picked_rows}[0] = $r->{input_row} if rand($r->{input_data_rownum}) < 1;
        } else {
            # algorithm from Learning Perl, modified
            if (@{ $r->{picked_rows} } < $r->{util_args}{num_rows}) {
                # we haven't reached $num_rows, put row to result in a random
                # position
                splice @{ $r->{picked_rows} }, rand(@{ $r->{picked_rows} }+1), 0, $r->{input_row};
            } else {
                # we have reached $num_rows, just replace an item randomly,
                # using algorithm from Learning Perl, slightly modified
                rand($r->{input_data_rownum}) < @{ $r->{picked_rows} }
                    and splice @{ $r->{picked_rows} }, rand(@{ $r->{picked_rows} }), 1, $r->{input_row};
            }
        }
    },

    after_read_input => sub {
        my $r = shift;

        for my $row (@{ $r->{picked_rows} }) {
            $r->{code_print_row}->($row);
        }
    },
);

1;
# ABSTRACT: Return one or more random rows from CSV

__END__

=pod

=encoding UTF-8

=head1 NAME

App::CSVUtils::csv_pick_rows - Return one or more random rows from CSV

=head1 VERSION

This document describes version 1.036 of App::CSVUtils::csv_pick_rows (from Perl distribution App-CSVUtils), released on 2025-02-04.

=head1 FUNCTIONS


=head2 csv_pick_rows

Usage:

 csv_pick_rows(%args) -> [$status_code, $reason, $payload, \%result_meta]

Return one or more random rows from CSV.

Examples:

=over

=item * Pick a random row from CSV:



( run in 0.522 second using v1.01-cache-2.11-cpan-d7a12ab2c7f )