Perl6-Str
view release on metacpan or search on metacpan
lib/Perl6/Str.pm view on Meta::CPAN
while ($pattern =~ m/(\X)/g){
$last_pattern = $1;
last unless $$self =~ m/(\X)/g;
my $s = $1;
$copy .= $func->($s, $last_pattern);
}
if (pos($$self)){
# $$self longer than $pattern
while ($$self =~ m/(\X)/g){
$copy .= $func->($1, $last_pattern);
}
}
pos $$self = $old_self_pos;
pos $pattern = $old_pattern_pos;
return $self->new($copy);
}
}
BEGIN {
*samecase = _same_stuff(\&_copy_case);
*sameaccent = _same_stuff(\&_copy_markings);
for (qw(uc lc ucfirst lcfirst)) {
eval qq{
sub $_ {
return \$_[0]->new(CORE::$_ \${\$_[0]});
}
};
}
for (qw(NFD NFC NFKD NFKC)) {
eval qq{
sub $_ {
return \$_[0]->new(Unicode::Normalize::$_ \${\$_[0]});
}
};
}
}
sub capitalize {
my $self = shift;
my $copy = CORE::lc $$self;
$copy =~ s/(\w+)/CORE::ucfirst $1/eg;
return $self->new($copy);
}
sub _copy_case {
my ($chr, $pattern) = @_;
if ($pattern =~ m/\p{IsTitle}|\p{IsUpper}/){
return CORE::uc $chr;
} elsif ($pattern =~ m/\p{IsLower}/){
return CORE::lc $chr;
} else {
return $chr;
}
}
sub _split_markings {
my $char = Unicode::Normalize::NFKD(shift);
return split m//, $char, 2;
}
sub _copy_markings {
my ($source, $pattern) = @_;
my (undef, $accents) = _split_markings($pattern);
my ($base, undef) = _split_markings($source);
return $base . $accents;
}
1;
__END__
=head1 NAME
Perl6::Str - Grapheme level string implementation for Perl 5
=head1 SYNOPSIS
use Perl6::Str;
use charnames qw(:full);
my $s = Perl6::Str->new("a\N{COMBINING ACUTE ACCENT}");
my $other = "\N{LATIN SMALL LETTER A WITH ACUTE}";
if ($s eq $other) {
print "Equality compared at grapheme level\n";
}
# just one grapheme:
printf "'%s' has %d logical characters\n", $s, $s->graphs;
# prints the whole grapheme, not just the accent:
print $s->substr(-1, 1);
print $s->uc;
# adjust case of characters according to template:
# prints 'AbcDE'
print $s->new('abcdE')->samecase('Xy Z');
=head1 DESCRIPTION
Perl 5 offers string manipulation at the byte level (for non-upgraded strings)
and at the codepoint level (for decoded strings). However it fails to provide
string manipulation at the grapheme level, that is it has no easy way of
treating a sequence of codepoints, in which all but the first are combining
characters (like accents, for example) as one character.
C<Perl6::Str> tries to solve this problem by introducing a string object with
an API similar to that of Perl 6 (as far as possible), and emulating common
operations such as C<substr>, C<chomp> and C<chop> at the grapheme level.
It also introduces builtin string methods found in Perl 6 such as C<samecase>.
C<Perl6::Str> is written in pure Perl 5.
For a description of the Perl 6 C<Str> type, please see
L<http://doc.perl6.org/type/Str>.
=head1 CAVEATS
C<Perl6::Str> is implemented in terms of a blessed reference to the
( run in 0.409 second using v1.01-cache-2.11-cpan-71847e10f99 )