Perl6-Str

 view release on metacpan or  search on metacpan

lib/Perl6/Str.pm  view on Meta::CPAN

        while ($pattern =~ m/(\X)/g){
            $last_pattern = $1;
            last unless $$self =~ m/(\X)/g;
            my $s = $1;
            $copy .= $func->($s, $last_pattern);
        }
        if (pos($$self)){
            # $$self longer than $pattern
            while ($$self =~ m/(\X)/g){
                $copy .= $func->($1, $last_pattern);
            }
        }
        pos $$self = $old_self_pos;
        pos $pattern = $old_pattern_pos;
        return $self->new($copy);
    }
}

BEGIN {

    *samecase   = _same_stuff(\&_copy_case);
    *sameaccent = _same_stuff(\&_copy_markings);

    for (qw(uc lc ucfirst lcfirst)) {
        eval qq{
            sub $_ {
                return \$_[0]->new(CORE::$_ \${\$_[0]});
            }
        };
    }

    for (qw(NFD NFC NFKD NFKC)) {
        eval qq{
            sub $_ {
                return \$_[0]->new(Unicode::Normalize::$_ \${\$_[0]});
            }
        };
    }
}

sub capitalize {
    my $self = shift;
    my $copy = CORE::lc $$self;
    $copy =~ s/(\w+)/CORE::ucfirst $1/eg;
    return $self->new($copy);
}

sub _copy_case {
    my ($chr, $pattern) = @_;
    if ($pattern =~ m/\p{IsTitle}|\p{IsUpper}/){
        return CORE::uc $chr;
    } elsif ($pattern =~ m/\p{IsLower}/){
        return CORE::lc $chr;
    } else {
        return $chr;
    }
}

sub _split_markings {
    my $char = Unicode::Normalize::NFKD(shift);
    return split m//, $char, 2;
}

sub _copy_markings {
    my ($source, $pattern) = @_;
    my (undef, $accents) = _split_markings($pattern);
    my ($base, undef)    = _split_markings($source);
    return $base . $accents;
}

1;

__END__

=head1 NAME

Perl6::Str - Grapheme level string implementation for Perl 5

=head1 SYNOPSIS

    use Perl6::Str;
    use charnames qw(:full);
    my $s = Perl6::Str->new("a\N{COMBINING ACUTE ACCENT}");
    my $other = "\N{LATIN SMALL LETTER A WITH ACUTE}";

    if ($s eq $other) {
        print "Equality compared at grapheme level\n";
    }

    # just one grapheme:
    printf "'%s' has %d logical characters\n", $s, $s->graphs;

    # prints the whole grapheme, not just the accent:
    print $s->substr(-1, 1); 
    print $s->uc;

    # adjust case of characters according to template:
    # prints 'AbcDE'
    print $s->new('abcdE')->samecase('Xy Z');

=head1 DESCRIPTION

Perl 5 offers string manipulation at the byte level (for non-upgraded strings)
and at the codepoint level (for decoded strings). However it fails to provide
string manipulation at the grapheme level, that is it has no easy way of
treating a sequence of codepoints, in which all but the first are combining
characters (like accents, for example) as one character.

C<Perl6::Str> tries to solve this problem by introducing a string object with
an API similar to that of Perl 6 (as far as possible), and emulating common
operations such as C<substr>, C<chomp> and C<chop> at the grapheme level.
It also introduces builtin string methods found in Perl 6 such as C<samecase>.

C<Perl6::Str> is written in pure Perl 5.

For a description of the Perl 6 C<Str> type, please see
L<http://doc.perl6.org/type/Str>.

=head1 CAVEATS

C<Perl6::Str> is implemented in terms of a blessed reference to the



( run in 0.409 second using v1.01-cache-2.11-cpan-71847e10f99 )