Lingua-Norms-SUBTLEX

 view release on metacpan or  search on metacpan

lib/Lingua/Norms/SUBTLEX.pm  view on Meta::CPAN

=cut

sub select_strings {
    my ( $self, %args ) = @_;
    my %patterns = ();
    if ( hascontent( $args{'regex'} ) ) {
        $patterns{'regex'} = qr/$args{'regex'}/xms;
    }
    if ( hascontent( $args{'cv_pattern'} ) ) {
        my $tmp = q{};
        my @c = split m//ms, uc( $args{'cv_pattern'} );
        foreach (@c) {
            $tmp .= $_ eq 'C' ? '[BCDFGHJKLMNPQRSTVWXYZ]' : '[AEIOU]';
        }
        $patterns{'cv_pattern'} = qr/^$tmp$/ixms;
    }

    my @list = ();
    open my $fh, q{<}, $self->{'_PATH'} or croak $OS_ERROR;
  LINES:
    while (<$fh>) {
        next if $INPUT_LINE_NUMBER == 1;    # skip column heading line
        my @line = split m/\Q$self->{'_DELIM'}\E/xms;
        next if !_in_range( length( $line[0] ), @{ $args{'length'} } );
        for ( keys %patterns ) {
            next LINES if unidecode( $line[0] ) !~ $patterns{$_};
        }
        for (qw/frq_count frq_opm frq_log frq_zipf cd_count cd_pct cd_log/) {
            if (
                ref $args{$_}
                and hascontent(
                    $self->{'_FIELDS'}->datum( $self->{'_LANG'}, $_ . '_idx' )
                )

lib/Lingua/Norms/SUBTLEX.pm  view on Meta::CPAN

=cut

sub random_string {
    my ( $self, %args ) = @_;
    eval { require File::RandomLine; };
    croak 'Need to install and access module File::RandomLine' if $EVAL_ERROR;
    my $rl =
      File::RandomLine->new( $self->{'_PATH'}, { algorithm => 'uniform' } );
    my @ari = ();
    while ( not scalar @ari or $ari[0] eq 'Word' ) {
        @ari = split m/\Q$self->{'_DELIM'}\E/xms, $rl->next;
    }
    return wantarray ? @ari : $ari[0];
}
*random_word = \&random_string;

=head2 Miscellaneous

=head3 n_lines

 $num = $subtlex->n_lines();

lib/Lingua/Norms/SUBTLEX.pm  view on Meta::CPAN

    my $val = [];

    # Search for the string, and isolate the requested values:
    open( my $fh, q{<}, $path ) or croak $OS_ERROR;
    while (<$fh>) {
        next if $INPUT_LINE_NUMBER == 1;    # skip column heading line
        my $file_str =
          _get_file_str( $_, $delim );    # have to declare as can be empty (!)
        next if nocontent($file_str);
        if ( $eq_fn->( $str, $file_str ) ) {
            my @line = split m/\Q$delim\E/xms;
            for my $col_i ( @{$col_i_aref} ) {
                push @{$val}, _clean_value( $line[$col_i] );
            }
            last;
        }
    }
    close $fh or croak;

# return the reference to array if there is more than 1 value, otherwise just the single value itself
    ## but if the string itself was not found, return the empty string for the number of requested fields:

lib/Lingua/Norms/SUBTLEX.pm  view on Meta::CPAN

    my @usr_strings = sort { $a cmp $b } @{$str_aref};

    # Search for the string, and isolate the requested values:
    open( my $fh, q{<}, $path ) or croak $OS_ERROR;
    while (<$fh>) {
        next if $INPUT_LINE_NUMBER == 1;    # skip column heading line
        my $file_str =
          _get_file_str( $_, $delim );    # have to declare as can be empty (!)
        next if nocontent($file_str);
        if ( my $found = first { $eq_fn->( $_, $file_str ) } @usr_strings ) {
            my @line = split m/\Q$delim\E/xms;    # split the line
            for my $col_i ( keys %{$col_i_href} ) {
                $string_vals{$file_str}->{ $col_i_href->{$col_i} } =
                  _clean_value( $line[$col_i] );
            }
            last if scalar keys %string_vals == scalar @{$str_aref};
            splice @usr_strings, ( firstidx { $_ eq $found } @usr_strings ), 1;

            #print STDERR "checking ",join(q{,}, @usr_strings),"\n";
        }
    }



( run in 1.926 second using v1.01-cache-2.11-cpan-71847e10f99 )