Unicode-Precis

 view release on metacpan or  search on metacpan

lib/Unicode/Precis.pm  view on Meta::CPAN

#-*- perl -*-
#-*- coding: utf-8 -*-

package Unicode::Precis;

use 5.008007;    # Use Unicode 4.1.0 or later.
use strict;
use warnings;

use Encode qw(is_utf8 _utf8_on _utf8_off);
use Unicode::BiDiRule qw(check);
use Unicode::Normalize qw(normalize);
use Unicode::Precis::Preparation qw(prepare FreeFormClass IdentifierClass);
use Unicode::Precis::Utils
    qw(compareExactly decomposeWidth foldCase mapSpace);

our $VERSION = '1.100';
$VERSION = eval $VERSION;    # see L<perlmodstyle>

sub new {
    my $class   = shift;
    my %options = @_;

    bless {%options} => $class;
}

sub compare {
    my $self    = shift;
    my $stringA = $self->enforce(shift);
    my $stringB = $self->enforce(shift);

    return compareExactly($stringA, $stringB);
}

sub enforce {
    my ($self, $string) = @_;

    return undef unless defined $string;

    if (lc($self->{WidthMappingRule} || '') eq 'decomposition') {
        decomposeWidth($string);
    }
    my $mappingrule = lc($self->{AdditionalMappingRule} || '');
    if ($mappingrule =~ /\bmapspace/) {
        mapSpace($string);
    }
    if ($mappingrule =~ /\bstripspace/) {
        $string =~ s/\A\x20+//;
        $string =~ s/\x20+\z//;
    }
    if ($mappingrule =~ /\bunifyspace/) {
        $string =~ s/\x20\x20+/\x20/g;
    }
    if (lc($self->{CaseMappingRule} || '') eq 'fold') {
        foldCase($string);
    }
    if ($self->{NormalizationRule}) {
        if (is_utf8($string)) {
            $string =
                eval { normalize(uc $self->{NormalizationRule}, $string) };
        } elsif ("\t" eq "\005") {    # EBCDIC
            $string = Encode::decode('UTF-8', $string);
            $string =
                eval { normalize(uc $self->{NormalizationRule}, $string) };
            $string = Encode::encode('UTF-8', $string) if defined $string;
        } else {
            _utf8_on($string);
            $string =
                eval { normalize(uc $self->{NormalizationRule}, $string) };
            _utf8_off($string);
        }
        return undef unless defined $string;
    }
    if (lc($self->{DirectionalityRule} || '') eq 'bidi') {
        return undef unless defined check($string, 0);
    }
    my $stringclass = {
        freeformclass   => FreeFormClass,
        identifierclass => IdentifierClass,
        }->{lc($self->{StringClass} || '')}
        || 0;
    return undef
        unless defined prepare($string, $stringclass);
    if (ref $self->{OtherRule} eq 'CODE') {

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.312 second using v1.00-cache-2.02-grep-82fe00e-cpan-cec75d87357c )