App-Basis-ConvertText2-UtfTransform

 view release on metacpan or  search on metacpan

lib/App/Basis/ConvertText2/UtfTransform.pm  view on Meta::CPAN

# ABSTRACT: Convert ascii text into UTF8 to simulate text formatting

=head1 NAME

App::Basis::ConvertText2::UtfTransform

=head1 SYNOPSIS

    use 5.10.0 ;
    use strict ;
    use warnings ;
    use App::Basis::ConvertText2::UtfTransform

    my $string = "<b>bold text</b> 
        <i>italic text</i>
        <f>flipped upside down text and reversed</f>
        <l>Some Leet speak</l>
        <o>text in bubbles</o>
        <s>script text</s>
        <l>are you leet</l>" ;

    say utf_transform( $string) ;

    my $smile = ":beer: is food!  :) I <3 :cake: ;)" ;

    say uttf_smilies( $smile ) ;

=head1 DESCRIPTION

A number of popular websites (eg twitter) do not allow the use of HTML to create
bold/italic font effects or perform smily transformations

However we can simulate this with some clever transformations of plain ascii text
into UTF8 codes which are a different font and so effectively create the same effect.

We have transformations for flip (reverses the string and flips upside down,
bold, italic, bubbles and leet.

We can transform A-Z a-z 0-9 and ? ! ,

I have only implemented a small set of smilies, ones that I am likely to use

=head1 Note

You cannot embed one format within another, so you cannot have bold script, or 
bold italic.

=head1 See Also 

L<http://txtn.us/>

=head1 Functions

=over 4

=cut

package App::Basis::ConvertText2::UtfTransform;
$App::Basis::ConvertText2::UtfTransform::VERSION = '0.4.0';
use 5.014;
use warnings;
use strict;
use Acme::LeetSpeak;
use Text::Emoticon;
use Exporter;
use vars qw( @EXPORT @ISA);

@ISA = qw(Exporter);

# this is the list of things that will get imported into the loading packages
# namespace
@EXPORT = qw(
    utf_transform
    utf_smilies
);

# ----------------------------------------------------------------------------

# UTF8 codes to transform normal ascii to different UTF8 codes
# to perform text effects that can be used on websites that allow UTF8 but
# do not allow HTML codes

# ----------------------------------------------------------------------------

my %flip = (
    "a" => "\x{0250}",
    "b" => "q",
    "c" => "\x{0254}",
    "d" => "p",
    "e" => "\x{01DD}",
    "f" => "\x{025F}",
    "g" => "\x{0183}",
    "h" => "\x{0265}",
    "i" => "\x{0131}",
    "j" => "\x{027E}",
    "k" => "\x{029E}",
    "l" => "\x{0283}",
    "m" => "\x{026F}",
    "n" => "u",
    "o" => "o",
    "p" => "d",
    "q" => "q",
    "r" => "\x{0279}",
    "s" => "s",
    "t" => "\x{0287}",
    "u" => "n",
    "v" => "\x{028C}",
    "w" => "\x{028D}",
    "x" => "x",
    "y" => "\x{028E}",
    "z" => "z",
    "0" => "0",
    "1" => "1",
    "2" => "2",
    "3" => "3",
    "4" => "4",
    "5" => "5",
    "6" => "6",
    "7" => "7",
    "8" => "8",
    "9" => "9",
    "?" => "\x{00BF}",
    "!" => "\x{00A1}",
    "," => ",",
);

my %bold = (
    "A" => "\x{1D400}",
    "B" => "\x{1D401}",
    "C" => "\x{1D402}",
    "D" => "\x{1D403}",
    "E" => "\x{1D404}",
    "F" => "\x{1D405}",
    "G" => "\x{1D406}",
    "H" => "\x{1D407}",
    "I" => "\x{1D408}",
    "J" => "\x{1D409}",
    "K" => "\x{1D40A}",
    "L" => "\x{1D40B}",
    "M" => "\x{1D40C}",
    "N" => "\x{1D40D}",
    "O" => "\x{1D40E}",
    "P" => "\x{1D40F}",
    "Q" => "\x{1D410}",
    "R" => "\x{1D411}",
    "S" => "\x{1D412}",
    "T" => "\x{1D413}",
    "U" => "\x{1D414}",
    "V" => "\x{1D415}",
    "W" => "\x{1D416}",
    "X" => "\x{1D417}",
    "Y" => "\x{1D418}",
    "Z" => "\x{1D419}",
    "a" => "\x{1D41A}",
    "b" => "\x{1D41B}",
    "c" => "\x{1D41C}",
    "d" => "\x{1D41D}",
    "e" => "\x{1D41E}",
    "f" => "\x{1D41F}",
    "g" => "\x{1D420}",
    "h" => "\x{1D421}",
    "i" => "\x{1D422}",
    "j" => "\x{1D423}",
    "k" => "\x{1D424}",
    "l" => "\x{1D425}",
    "m" => "\x{1D426}",
    "n" => "\x{1D427}",
    "o" => "\x{1D428}",
    "p" => "\x{1D429}",
    "q" => "\x{1D42A}",
    "r" => "\x{1D42B}",
    "s" => "\x{1D42C}",
    "t" => "\x{1D42D}",
    "u" => "\x{1D42E}",
    "v" => "\x{1D42F}",
    "w" => "\x{1D430}",
    "x" => "\x{1D431}",
    "y" => "\x{1D432}",
    "z" => "\x{1D433}",
    "0" => "\x{1D7CE}",
    "1" => "\x{1D7CF}",
    "2" => "\x{1D7D0}",
    "3" => "\x{1D7D1}",
    "4" => "\x{1D7D2}",
    "5" => "\x{1D7D3}",
    "6" => "\x{1D7D4}",
    "7" => "\x{1D7D5}",

lib/App/Basis/ConvertText2/UtfTransform.pm  view on Meta::CPAN

    "H" => "\x{1D43B}",
    "I" => "\x{1D43C}",
    "J" => "\x{1D43D}",
    "K" => "\x{1D43E}",
    "L" => "\x{1D43F}",
    "M" => "\x{1D440}",
    "N" => "\x{1D441}",
    "O" => "\x{1D442}",
    "P" => "\x{1D443}",
    "Q" => "\x{1D444}",
    "R" => "\x{1D445}",
    "S" => "\x{1D446}",
    "T" => "\x{1D447}",
    "U" => "\x{1D448}",
    "V" => "\x{1D449}",
    "W" => "\x{1D44A}",
    "X" => "\x{1D44B}",
    "Y" => "\x{1D44C}",
    "Z" => "\x{1D44D}",
    "a" => "\x{1D622}",
    "b" => "\x{1D623}",
    "c" => "\x{1D624}",
    "d" => "\x{1D625}",
    "e" => "\x{1D626}",
    "f" => "\x{1D627}",
    "g" => "\x{1D628}",
    "h" => "\x{1d629}",
    "i" => "\x{1D62a}",
    "j" => "\x{1D62b}",
    "k" => "\x{1D62c}",
    "l" => "\x{1D62d}",
    "m" => "\x{1D62e}",
    "n" => "\x{1D62f}",
    "o" => "\x{1D630}",
    "p" => "\x{1D631}",
    "q" => "\x{1D632}",
    "r" => "\x{1D633}",
    "s" => "\x{1D634}",
    "t" => "\x{1D635}",
    "u" => "\x{1D636}",
    "v" => "\x{1D637}",
    "w" => "\x{1D638}",
    "x" => "\x{1D639}",
    "y" => "\x{1D63a}",
    "z" => "\x{1D63b}",
    "0" => "0",
    "1" => "1",
    "2" => "2",
    "3" => "3",
    "4" => "4",
    "5" => "5",
    "6" => "6",
    "7" => "7",
    "8" => "8",
    "9" => "9",
    "?" => "?",
    "!" => "!",
    "," => ",",
);

# mathematical bold script capital and small
# http://www.fileformat.info/info/unicode/category/Lu/list.htm
# http://www.fileformat.info/info/unicode/category/Ll/list.htm

my %script = (
    "A" => "\x{1d4d0}",
    "B" => "\x{1d4d1}",
    "C" => "\x{1d4d2}",
    "D" => "\x{1d4d3}",
    "E" => "\x{1d4d4}",
    "F" => "\x{1d4d5}",
    "G" => "\x{1d4d6}",
    "H" => "\x{1d4d7}",
    "I" => "\x{1d4d8}",
    "J" => "\x{1d4d9}",
    "K" => "\x{1d4da}",
    "L" => "\x{1d4db}",
    "M" => "\x{1d4dc}",
    "N" => "\x{1d4dd}",
    "O" => "\x{1d4de}",
    "P" => "\x{1d4df}",
    "Q" => "\x{1d4e0}",
    "R" => "\x{1d4e1}",
    "S" => "\x{1d4e2}",
    "T" => "\x{1D4e3}",
    "U" => "\x{1D4e4}",    ## special
    "V" => "\x{1D4e5}",
    "W" => "\x{1D4e6}",
    "X" => "\x{1D4e7}",
    "Y" => "\x{1D4e8}",
    "Z" => "\x{1D4e9}",
    "a" => "\x{1D4ea}",
    "b" => "\x{1D4eb}",
    "c" => "\x{1D4ec}",
    "d" => "\x{1D4ed}",
    "e" => "\x{1D4ee}",
    "f" => "\x{1D4ef}",
    "g" => "\x{1D4f0}",
    "h" => "\x{1d4f1}",
    "i" => "\x{1D4f2}",
    "j" => "\x{1D4f3}",
    "k" => "\x{1D4f4}",
    "l" => "\x{1D4f5}",
    "m" => "\x{1D4f6}",
    "n" => "\x{1D4f7}",
    "o" => "\x{1D4f8}",
    "p" => "\x{1D4f9}",
    "q" => "\x{1D4fa}",
    "r" => "\x{1D4fb}",
    "s" => "\x{1D4fc}",
    "t" => "\x{1D4fd}",
    "u" => "\x{1D4fe}",
    "v" => "\x{1D4ff}",
    "w" => "\x{1D500}",
    "x" => "\x{1D501}",
    "y" => "\x{1D502}",
    "z" => "\x{1D503}",
    "0" => "0",
    "1" => "1",
    "2" => "2",
    "3" => "3",

lib/App/Basis/ConvertText2/UtfTransform.pm  view on Meta::CPAN

    "x" => "\x{24E7}",
    "y" => "\x{24E8}",
    "z" => "\x{24E9}",
    "0" => "\x{24EA}",
    "1" => "\x{2460}",
    "2" => "\x{2461}",
    "3" => "\x{2462}",
    "4" => "\x{2463}",
    "5" => "\x{2464}",
    "6" => "\x{2465}",
    "7" => "\x{2466}",
    "8" => "\x{2467}",
    "9" => "\x{2468}",
    "?" => "?",
    "!" => "!",
    "," => ",",
);

# http://www.fileformat.info/info/unicode/category/So/list.htm
my %smilies = (
    '<3'           => "\x{2665}",     #heart
    ':heart:'      => "\x{2665}",     #heart
    ':)'           => "\x{1f600}",    #smile
    ':D'           => "\x{1f625}",    #grin
    '8-)'          => "\x{1f60e}",    #cool
    ':P'           => "\x{1f61b}",    #pull tounge
    ":'("          => "\x{1f62c}",    #cry
    ':('           => "\x{2639}",     #sad
    ";)"           => "\x{1f609}",    #wink
    ":sleep:"      => "\x{1f634}",    #sleep
    ":halo:"       => "\x{1f607}",    #halo
    ":devil:"      => "\x{1f608}",    #devil
    ":horns:"      => "\x{1f608}",    #devil
    "(c)"          => "\x{00a9}",     # copyright
    "(r)"          => "\x{00ae}",     # registered
    "(tm)"         => "\x{0099}",     # trademark
    ":email:"      => "\x{2709}",     # email
    ":yes:"        => "\x{2713}",     # tick
    ":no:"         => "\x{2715}",     # cross
    ":beer:"       => "\x{1F37A}",    # beer
    ":wine:"       => "\x{1f377}",    # wine
    ":wine_glass:" => "\x{1f377}",    # wine
    ":cake:"       => "\x{1f382}",    # cake
    ":star:"       => "\x{2606}",     # star
    ":ok:"         => "\x{1f44d}",    # ok = thumbsup
    ":yes:"        => "\x{1f44d}",    # yes = thumbsup
    ":thumbsup:"   => "\x{1f44d}",    # thumbsdown
    ":thumbsdown:" => "\x{1f44e}",    # thumbsup
    ":bad:"        => "\x{1f44e}",    # bad = thumbsdown
    ":no:"         => "\x{1f44e}",    # no = thumbsdown
    ":ghost:"      => "\x{1f47b}",    # ghost
    ":skull:"      => "\x{1f480}",    # skull
    ":time:"       => "\x{231a}",     # time, watch face
    ":hourglass:"  => "\x{231b}",     # hourglass
);

my $smiles = join( '|', map { quotemeta($_) } keys %smilies );

my %code_map = (
    f => \%flip,
    b => \%bold,
    i => \%italic,
    o => \%bubbles,
    s => \%script,
);

# ----------------------------------------------------------------------------
# regexp replace function
sub _transform {
    my ( $code, $string ) = @_;
    my $transform = 1;

    if ( $code eq 'f' ) {

        # needs to be reversed and in lower case for flip
        $string = reverse lc($string);
    }
    elsif ( $code eq 'l' ) {

        # leet
        $string    = leet($string);
        $transform = 0;
    }

    if ( $transform && $code_map{$code} ) {
        $string =~ s/([A-ZA-z0-9?!,])/$code_map{$code}->{$1}/gsm;
    }

    return $string;
}

# ----------------------------------------------------------------------------

=item utf_transform

transform A-ZA-z0-9!?, into UTF8 forms suitable for websites that do not allow
HTML codes for these

we use the following psuedo HTML elements

    flip     <f>text</f>      upside down and reversed
    bold     <b>text</b>
    italic   <i>text</i>
    bubbles  <o>text</o>
    script   <s>text</s>
    leet     <l>text</l>      LeetSpeak

B<Parameters>  

incoming string    

B<Returns>

transformed string

=cut

sub utf_transform {
    my ($in) = @_;

    # transform for formatting
    $in =~ s|<(\w)>(.*?)</\1>|_transform( $1, $2)|egsi;

    return $in;
}

# ----------------------------------------------------------------------------

=item utf_smilies

transform some character strings into UTF smilies

I have only implemented a small set of smilies, ones that I am likely to use

    | smilie                    | symbol      |
    |---------------------------+-------------|
    | <3. :heart:               | heart       |
    | :)                        | smile       |
    | :D                        | grin        |
    | 8-)                       | cool        |
    | :P                        | pull tongue |
    | :(                        | cry         |
    | :(                        | sad         |
    | ;)                        | wink        |
    | :halo:                    | halo        |
    | :devil:, :horns:          | devil horns |
    | (c)                       | copyright   |
    | (r)                       | registered  |
    | (tm)                      | trademark   |
    | :email:                   | email       |
    | :yes:                     | tick        |
    | :no:                      | cross       |
    | :beer:                    | beer        |
    | :wine:, :wine_glass:      | wine        |
    | :cake:                    | cake        |
    | :star:                    | star        |
    | :ok:, :thumbsup:          | thumbsup    |
    | :bad:, :thumbsdown:       | thumbsup    |
    | :ghost:                   | ghost       |
    | :skull:                   | skull       |
    | :hourglass:               | hourglass   |
    | :time:                    | watch face  |



( run in 1.992 second using v1.01-cache-2.11-cpan-99c4e6809bf )