Lingua-IdSplitter
view release on metacpan or search on metacpan
lib/Lingua/IdSplitter.pm view on Meta::CPAN
foreach (@{$self->{hard}}) {
$str .= "Technique: $_->{tech}\n";
$str .= "Terms: ".join(',',@{$_->{terms}});
$str .= "\n";
}
}
if ( $self->{explain_rank}) {
$str .= "\n## soft split rank(s):\n";
$str .= join("\n", @{$self->{explain_rank}});
}
return $str;
}
sub _explain_rank {
my ($self) = @_;
my $r;
foreach (@{$self->{rank}}) {
my @parts;
foreach (@{$_->{terms}}) {
if ($_->{t} eq $_->{s}) {
push @parts, $_->{t};
}
else {
push @parts, "$_->{t}(<-$_->{s})";
}
}
$r .= join(',',@parts) . " ---> $_->{expr} = $_->{score}\n";
}
return $r;
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Lingua::IdSplitter - split identifiers into words
=head1 VERSION
version 0.03
=head1 SYNOPSIS
use Lingua::IdSplitter;
my $splitter = Lingua::IdSplitter->new;
$splitter->split($identifier);
=head1 DESCRIPTION
This module implements and algorithm to identify and split multi-word
identifier in their individual words. For example, "UserFind" in "user"
and "find", or "timesort" in "time" and "sort".
For more details on the algorithm check the following
L<article|http://www.sciencedirect.com/science/article/pii/S0164121214002179>
(also available L<here|http://hdl.handle.net/10198/11577>).
=head1 FUNCTIONS
=head2 new
Create a new splitter object. A list of specific dictionaries is optional,
check the C<bin/id-splitter> command for an example on how to use more
dictionaries.
=head2 soft_split
Perform a soft split of the identifier, ie split words without using
explicit markers (eg, the underscore character, or CamelCase notation).
=head2 hard_split
Perform a hard split of the identifier, ie split words using
explicit markers (eg, the underscore character, or CamelCase notation).
=head2 split
Perform a split applying first a hard split, and the applying a soft split
to the resulting set of the first split.
=head2 explain
Show the computed ranked (including scores) for a split.
=head1 AUTHOR
Nuno Carvalho <smash@cpan.org>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2014-2015 by Project Natura <natura@natura.di.uminho.pt>.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
( run in 0.724 second using v1.01-cache-2.11-cpan-71847e10f99 )