Unicode-Precis
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/Unicode/Precis.pm view on Meta::CPAN
#-*- perl -*-
#-*- coding: utf-8 -*-
package Unicode::Precis;
use 5.008007; # Use Unicode 4.1.0 or later.
use strict;
use warnings;
use Encode qw(is_utf8 _utf8_on _utf8_off);
use Unicode::BiDiRule qw(check);
use Unicode::Normalize qw(normalize);
use Unicode::Precis::Preparation qw(prepare FreeFormClass IdentifierClass);
use Unicode::Precis::Utils
qw(compareExactly decomposeWidth foldCase mapSpace);
our $VERSION = '1.100';
$VERSION = eval $VERSION; # see L<perlmodstyle>
sub new {
my $class = shift;
my %options = @_;
bless {%options} => $class;
}
sub compare {
my $self = shift;
my $stringA = $self->enforce(shift);
my $stringB = $self->enforce(shift);
return compareExactly($stringA, $stringB);
}
sub enforce {
my ($self, $string) = @_;
return undef unless defined $string;
if (lc($self->{WidthMappingRule} || '') eq 'decomposition') {
decomposeWidth($string);
}
my $mappingrule = lc($self->{AdditionalMappingRule} || '');
if ($mappingrule =~ /\bmapspace/) {
mapSpace($string);
}
if ($mappingrule =~ /\bstripspace/) {
$string =~ s/\A\x20+//;
$string =~ s/\x20+\z//;
}
if ($mappingrule =~ /\bunifyspace/) {
$string =~ s/\x20\x20+/\x20/g;
}
if (lc($self->{CaseMappingRule} || '') eq 'fold') {
foldCase($string);
}
if ($self->{NormalizationRule}) {
if (is_utf8($string)) {
$string =
eval { normalize(uc $self->{NormalizationRule}, $string) };
} elsif ("\t" eq "\005") { # EBCDIC
$string = Encode::decode('UTF-8', $string);
$string =
eval { normalize(uc $self->{NormalizationRule}, $string) };
$string = Encode::encode('UTF-8', $string) if defined $string;
} else {
_utf8_on($string);
$string =
eval { normalize(uc $self->{NormalizationRule}, $string) };
_utf8_off($string);
}
return undef unless defined $string;
}
if (lc($self->{DirectionalityRule} || '') eq 'bidi') {
return undef unless defined check($string, 0);
}
my $stringclass = {
freeformclass => FreeFormClass,
identifierclass => IdentifierClass,
}->{lc($self->{StringClass} || '')}
|| 0;
return undef
unless defined prepare($string, $stringclass);
if (ref $self->{OtherRule} eq 'CODE') {
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.312 second using v1.00-cache-2.02-grep-82fe00e-cpan-cec75d87357c )