Unicode-Tussle

 view release on metacpan or  search on metacpan

script/hantest  view on Meta::CPAN

    return $cols;
};

sub banner_paragraph($$) {
    my ($name, $text) = @_;
    banner(uc $name);
    wrap_paragraph($text);
}


UNITCHECK {

### Public Configuration Attributes (unused variable!!)
state $LB_default_config = {
    BreakIndent => 'YES',
    CharactersMax => 998,
    ColumnsMin => 0,
    ColumnsMax => 76,
    ComplexBreaking => 'YES',
    Context => 'NONEASTASIAN',
    Format => "SIMPLE",

script/hantest  view on Meta::CPAN


    for (split /\R{2,}/, $text) {
        s/(?:(?![\N{NO-BREAK SPACE}\t])\p{White_Space})+/ /g;
        s/^\s+//;
        s/\s+$//;
        say $formatter->break($_), "\n";
    }

}

} # end UNITCHECK

UNITCHECK {

state $uh = new Unicode::Unihan;

sub char_inform(_) {

    state $seen = { };

    my $string = shift;
    for my $char ( split //, $string ) {
        # next if $seen->{$char}++;

script/hantest  view on Meta::CPAN

            $best = apply_tones($lang, $best);
        }
        for ($best) {
            s/\h.*//;
        }
        push @retlist, $best;
    }
    return join(" ", @retlist);
}

}  # end UNITCHECK


sub apply_tones($$) {
    my ($lang, $string) = @_;

    return $string unless $string =~ / \d \b /x;

    state $mandarin_tones = {
    # don't use COMBINING TONE MARKs because they don't evaporate when NFC'd
        1 => "\N{COMBINING MACRON}",            # 1 is macron 青 qīng qing1

script/uniquote  view on Meta::CPAN

    s,\r,\\r,g;
  # s,\n,\\n,g;
    s,\f,\\f,g;
    s,\e,\\e,g;
    s/(?!\n)([\0-\37\177])/sprintf("\\c%s", chr(ord($1) ^ 64))/ge;

    return $_;
}


UNITCHECK {

my %html4_2unicode = (

# Number aliases: these are \p{Other_Number}
      "sup1" => "SUPERSCRIPT ONE",                            # ¹ U+00B9
      "sup2" => "SUPERSCRIPT TWO",                            # ² U+00B2
      "sup3" => "SUPERSCRIPT THREE",                          # ³ U+00B3
    "frac12" => "VULGAR FRACTION ONE HALF",                   # ½ U+00BD
    "frac14" => "VULGAR FRACTION ONE QUARTER",                # ¼ U+00BC
    "frac34" => "VULGAR FRACTION THREE QUARTERS",             # ¾ U+00BE

script/uniquote  view on Meta::CPAN

    s/'/'/g;

    s {
        ( $Ugly_RX )
    }{
        $entity->{$1} || sprintf("&#%d;", ord $1)
    }gxe;
    return $_;
}

} # end UNITCHECK private scope for hash

sub convert_to_xml(_) {
    local $_ = shift();
    no warnings "utf8";

    s/&/&/g;
    s/</&lt;/g;
    s/>/&gt;/g;
    s/"/&quot;/g;
    s/'/&apos;/g;

script/vowel-sigs  view on Meta::CPAN

sub cvmap    ( _   );
sub cvlen  ( _   );
sub as_CV  ( _   );
sub nsyl  ( _   );

use constant MANY => 8;

our $RX_Subs;
our ($V, $C, $VC, $CV);

UNITCHECK { 
    init_subs(); 
}

$| = 1;

use Lingua::EN::Syllable;

@ARGV = "/usr/share/dict/words" if !@ARGV && -t;

main();



( run in 0.275 second using v1.01-cache-2.11-cpan-748bfb374f4 )