Unicode-Tussle
view release on metacpan or search on metacpan
script/hantest view on Meta::CPAN
return $cols;
};
sub banner_paragraph($$) {
my ($name, $text) = @_;
banner(uc $name);
wrap_paragraph($text);
}
UNITCHECK {
### Public Configuration Attributes (unused variable!!)
state $LB_default_config = {
BreakIndent => 'YES',
CharactersMax => 998,
ColumnsMin => 0,
ColumnsMax => 76,
ComplexBreaking => 'YES',
Context => 'NONEASTASIAN',
Format => "SIMPLE",
script/hantest view on Meta::CPAN
for (split /\R{2,}/, $text) {
s/(?:(?![\N{NO-BREAK SPACE}\t])\p{White_Space})+/ /g;
s/^\s+//;
s/\s+$//;
say $formatter->break($_), "\n";
}
}
} # end UNITCHECK
UNITCHECK {
state $uh = new Unicode::Unihan;
sub char_inform(_) {
state $seen = { };
my $string = shift;
for my $char ( split //, $string ) {
# next if $seen->{$char}++;
script/hantest view on Meta::CPAN
$best = apply_tones($lang, $best);
}
for ($best) {
s/\h.*//;
}
push @retlist, $best;
}
return join(" ", @retlist);
}
} # end UNITCHECK
sub apply_tones($$) {
my ($lang, $string) = @_;
return $string unless $string =~ / \d \b /x;
state $mandarin_tones = {
# don't use COMBINING TONE MARKs because they don't evaporate when NFC'd
1 => "\N{COMBINING MACRON}", # 1 is macron é qÄ«ng qing1
script/uniquote view on Meta::CPAN
s,\r,\\r,g;
# s,\n,\\n,g;
s,\f,\\f,g;
s,\e,\\e,g;
s/(?!\n)([\0-\37\177])/sprintf("\\c%s", chr(ord($1) ^ 64))/ge;
return $_;
}
UNITCHECK {
my %html4_2unicode = (
# Number aliases: these are \p{Other_Number}
"sup1" => "SUPERSCRIPT ONE", # ¹ U+00B9
"sup2" => "SUPERSCRIPT TWO", # ² U+00B2
"sup3" => "SUPERSCRIPT THREE", # ³ U+00B3
"frac12" => "VULGAR FRACTION ONE HALF", # ½ U+00BD
"frac14" => "VULGAR FRACTION ONE QUARTER", # ¼ U+00BC
"frac34" => "VULGAR FRACTION THREE QUARTERS", # ¾ U+00BE
script/uniquote view on Meta::CPAN
s/'/'/g;
s {
( $Ugly_RX )
}{
$entity->{$1} || sprintf("&#%d;", ord $1)
}gxe;
return $_;
}
} # end UNITCHECK private scope for hash
sub convert_to_xml(_) {
local $_ = shift();
no warnings "utf8";
s/&/&/g;
s/</</g;
s/>/>/g;
s/"/"/g;
s/'/'/g;
script/vowel-sigs view on Meta::CPAN
sub cvmap ( _ );
sub cvlen ( _ );
sub as_CV ( _ );
sub nsyl ( _ );
use constant MANY => 8;
our $RX_Subs;
our ($V, $C, $VC, $CV);
UNITCHECK {
init_subs();
}
$| = 1;
use Lingua::EN::Syllable;
@ARGV = "/usr/share/dict/words" if !@ARGV && -t;
main();
( run in 0.275 second using v1.01-cache-2.11-cpan-748bfb374f4 )