Lingua-Norms-SUBTLEX
view release on metacpan or search on metacpan
lib/Lingua/Norms/SUBTLEX.pm view on Meta::CPAN
=cut
sub select_strings {
my ( $self, %args ) = @_;
my %patterns = ();
if ( hascontent( $args{'regex'} ) ) {
$patterns{'regex'} = qr/$args{'regex'}/xms;
}
if ( hascontent( $args{'cv_pattern'} ) ) {
my $tmp = q{};
my @c = split m//ms, uc( $args{'cv_pattern'} );
foreach (@c) {
$tmp .= $_ eq 'C' ? '[BCDFGHJKLMNPQRSTVWXYZ]' : '[AEIOU]';
}
$patterns{'cv_pattern'} = qr/^$tmp$/ixms;
}
my @list = ();
open my $fh, q{<}, $self->{'_PATH'} or croak $OS_ERROR;
LINES:
while (<$fh>) {
next if $INPUT_LINE_NUMBER == 1; # skip column heading line
my @line = split m/\Q$self->{'_DELIM'}\E/xms;
next if !_in_range( length( $line[0] ), @{ $args{'length'} } );
for ( keys %patterns ) {
next LINES if unidecode( $line[0] ) !~ $patterns{$_};
}
for (qw/frq_count frq_opm frq_log frq_zipf cd_count cd_pct cd_log/) {
if (
ref $args{$_}
and hascontent(
$self->{'_FIELDS'}->datum( $self->{'_LANG'}, $_ . '_idx' )
)
lib/Lingua/Norms/SUBTLEX.pm view on Meta::CPAN
=cut
sub random_string {
my ( $self, %args ) = @_;
eval { require File::RandomLine; };
croak 'Need to install and access module File::RandomLine' if $EVAL_ERROR;
my $rl =
File::RandomLine->new( $self->{'_PATH'}, { algorithm => 'uniform' } );
my @ari = ();
while ( not scalar @ari or $ari[0] eq 'Word' ) {
@ari = split m/\Q$self->{'_DELIM'}\E/xms, $rl->next;
}
return wantarray ? @ari : $ari[0];
}
*random_word = \&random_string;
=head2 Miscellaneous
=head3 n_lines
$num = $subtlex->n_lines();
lib/Lingua/Norms/SUBTLEX.pm view on Meta::CPAN
my $val = [];
# Search for the string, and isolate the requested values:
open( my $fh, q{<}, $path ) or croak $OS_ERROR;
while (<$fh>) {
next if $INPUT_LINE_NUMBER == 1; # skip column heading line
my $file_str =
_get_file_str( $_, $delim ); # have to declare as can be empty (!)
next if nocontent($file_str);
if ( $eq_fn->( $str, $file_str ) ) {
my @line = split m/\Q$delim\E/xms;
for my $col_i ( @{$col_i_aref} ) {
push @{$val}, _clean_value( $line[$col_i] );
}
last;
}
}
close $fh or croak;
# return the reference to array if there is more than 1 value, otherwise just the single value itself
## but if the string itself was not found, return the empty string for the number of requested fields:
lib/Lingua/Norms/SUBTLEX.pm view on Meta::CPAN
my @usr_strings = sort { $a cmp $b } @{$str_aref};
# Search for the string, and isolate the requested values:
open( my $fh, q{<}, $path ) or croak $OS_ERROR;
while (<$fh>) {
next if $INPUT_LINE_NUMBER == 1; # skip column heading line
my $file_str =
_get_file_str( $_, $delim ); # have to declare as can be empty (!)
next if nocontent($file_str);
if ( my $found = first { $eq_fn->( $_, $file_str ) } @usr_strings ) {
my @line = split m/\Q$delim\E/xms; # split the line
for my $col_i ( keys %{$col_i_href} ) {
$string_vals{$file_str}->{ $col_i_href->{$col_i} } =
_clean_value( $line[$col_i] );
}
last if scalar keys %string_vals == scalar @{$str_aref};
splice @usr_strings, ( firstidx { $_ eq $found } @usr_strings ), 1;
#print STDERR "checking ",join(q{,}, @usr_strings),"\n";
}
}
( run in 1.926 second using v1.01-cache-2.11-cpan-71847e10f99 )