Acme-Unicodify

 view release on metacpan or  search on metacpan

META.json  view on Meta::CPAN

{
   "abstract" : "Convert ASCII text into look-somewhat-alike unicode",
   "author" : [
      "Joelle Maslak <jmaslak@antelope.net>"
   ],
   "dynamic_config" : 0,
   "generated_by" : "Dist::Zilla version 6.014, CPAN::Meta::Converter version 2.150010",
   "license" : [
      "perl_5"
   ],
   "meta-spec" : {
      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",

META.yml  view on Meta::CPAN

---
abstract: 'Convert ASCII text into look-somewhat-alike unicode'
author:
  - 'Joelle Maslak <jmaslak@antelope.net>'
build_requires:
  Encode: '0'
  ExtUtils::MakeMaker: '0'
  File::Spec: '0'
  File::Temp: '0'
  Test::More: '0'
  Test::UseAllModules: '0.17'
configure_requires:

Makefile.PL  view on Meta::CPAN

# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.014.
use strict;
use warnings;

use 5.022000;

use ExtUtils::MakeMaker;

my %WriteMakefileArgs = (
  "ABSTRACT" => "Convert ASCII text into look-somewhat-alike unicode",
  "AUTHOR" => "Joelle Maslak <jmaslak\@antelope.net>",
  "BUILD_REQUIRES" => {
    "Test::UseAllModules" => "0.17"
  },
  "CONFIGURE_REQUIRES" => {
    "ExtUtils::MakeMaker" => 0
  },
  "DISTNAME" => "Acme-Unicodify",
  "LICENSE" => "perl",
  "MIN_PERL_VERSION" => "5.022000",

README.md  view on Meta::CPAN

# NAME

Acme::Unicodify - Convert ASCII text into look-somewhat-alike unicode

# VERSION

version 0.008

# SYNOPSIS

    my $translate = Acme::Unicodify->new();

    $foo = $translate->to_unicode('Hello, World');
    $bar = $translate->back_to_ascii($unified_string);

    file_to_unicode('/tmp/infile', '/tmp/outfile');
    file_back_to_ascii('/tmp/infile', '/tmp/outfile');

# DESCRIPTION

This is intended to translate basic 7 bit ASCII into characters
that use several Unicode features, such as accent marks and
non-Latin characters.  While this can be used just for fun, a
better use perhaps is to use it as part of a test suite, to
allow you to easily pass in Unicode and determine if your system
handles Unicode without corrupting the text.

# METHODS

## new

Create a new instance of the Unicodify object.

## to\_unicode($str)

Takes an input string and translates it into look-alike Unicode
characters.  Input is any string.

Basic ASCII leters are translated into Unicode "look alikes", while
any character (Unicode or not) is passed through unchanged.

## back\_to\_ascii($str)

Takes an input string that has perhaps previously been produced
by `to_unicode` and translates the look-alike characters back
into 7 bit ASCII.  Any other characters (Unicode or ASCII) are
passed through unchanged.

## file\_to\_unicode($infile, $outfile)

This method reads the file with the named passed as the first
argument, and produces a new output file with the name passed
as the second argument.

The routine will call `to_unicode` on the contents of the file.

Note this will overwrite existing files and it assumes the input
and output files are in UTF-8 encoding (or plain ASCII in the
case that no codepoints >127 are used).

This also assumes that there is sufficient memory to slurp the
entire contents of the file into memory.

## file\_back\_to\_ascii($infile, $outfile)

README.pod  view on Meta::CPAN

=pod

=encoding UTF-8

=head1 NAME

Acme::Unicodify - Convert ASCII text into look-somewhat-alike unicode

=head1 VERSION

version 1.202110

=head1 SYNOPSIS

  my $translate = Acme::Unicodify->new();

  $foo = $translate->to_unicode('Hello, World');
  $bar = $translate->back_to_ascii($unified_string);

  file_to_unicode('/tmp/infile', '/tmp/outfile');
  file_back_to_ascii('/tmp/infile', '/tmp/outfile');

=head1 DESCRIPTION

This is intended to translate basic 7 bit ASCII into characters
that use several Unicode features, such as accent marks and
non-Latin characters.  While this can be used just for fun, a
better use perhaps is to use it as part of a test suite, to
allow you to easily pass in Unicode and determine if your system
handles Unicode without corrupting the text.

=head1 METHODS

=head2 new

Create a new instance of the Unicodify object.

=head2 to_unicode($str)

Takes an input string and translates it into look-alike Unicode
characters.  Input is any string.

Basic ASCII leters are translated into Unicode "look alikes", while
any character (Unicode or not) is passed through unchanged.

=head2 back_to_ascii($str)

Takes an input string that has perhaps previously been produced
by C<to_unicode> and translates the look-alike characters back
into 7 bit ASCII.  Any other characters (Unicode or ASCII) are
passed through unchanged.

=head2 file_to_unicode($infile, $outfile)

This method reads the file with the named passed as the first
argument, and produces a new output file with the name passed
as the second argument.

The routine will call C<to_unicode> on the contents of the file.

Note this will overwrite existing files and it assumes the input
and output files are in UTF-8 encoding (or plain ASCII in the
case that no codepoints >127 are used).

This also assumes that there is sufficient memory to slurp the
entire contents of the file into memory.

=head2 file_back_to_ascii($infile, $outfile)

lib/Acme/Unicodify.pm  view on Meta::CPAN

#
# Copyright (C) 2015 Joelle Maslak
# All Rights Reserved - See License
#

package Acme::Unicodify;
# ABSTRACT: Convert ASCII text into look-somewhat-alike unicode
$Acme::Unicodify::VERSION = '1.202110';
use utf8;
use v5.22;

use strict;
use warnings;

use File::Slurper 0.008 qw(read_text write_text);


lib/Acme/Unicodify.pm  view on Meta::CPAN


    my $self = {};
    bless $self, $class;

    $self->_define_cache();

    return $self;
}


sub to_unicode {
    my $self = shift;
    my $str  = shift;

    if ( !defined($str) ) { return; }

    my @parts = split /\b{gcb}/, $str;
    my $out = '';
    foreach my $l (@parts) {
        if ( exists( $_TRANSLATE{$l} ) ) {
            $out .= $_TRANSLATE{$l};

lib/Acme/Unicodify.pm  view on Meta::CPAN

            $out .= $self->{_ASCII_CACHE}->{$l};
        } else {
            $out .= $l;
        }
    }

    return $out;
}


sub file_to_unicode {
    if ( $#_ != 2 ) { confess 'invalid call' }
    my ( $self, $in_fn, $out_fn ) = @_;

    my $txt = read_text($in_fn);
    $txt = $self->to_unicode($txt);
    write_text( $out_fn, $txt );

    return;
}


sub file_back_to_ascii {
    if ( $#_ != 2 ) { confess 'invalid call' }
    my ( $self, $in_fn, $out_fn ) = @_;

lib/Acme/Unicodify.pm  view on Meta::CPAN

}

sub _define_cache {
    my $self = shift;

    $self->{_ASCII_CACHE} = {};

    my $i = 0;
    foreach my $key ( keys %_TRANSLATE ) {
        $i++;
        $self->{_ASCII_CACHE}->{ $self->to_unicode($key) } = $key;
    }
    return;
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Acme::Unicodify - Convert ASCII text into look-somewhat-alike unicode

=head1 VERSION

version 1.202110

=head1 SYNOPSIS

  my $translate = Acme::Unicodify->new();

  $foo = $translate->to_unicode('Hello, World');
  $bar = $translate->back_to_ascii($unified_string);

  file_to_unicode('/tmp/infile', '/tmp/outfile');
  file_back_to_ascii('/tmp/infile', '/tmp/outfile');

=head1 DESCRIPTION

This is intended to translate basic 7 bit ASCII into characters
that use several Unicode features, such as accent marks and
non-Latin characters.  While this can be used just for fun, a
better use perhaps is to use it as part of a test suite, to
allow you to easily pass in Unicode and determine if your system
handles Unicode without corrupting the text.

=head1 METHODS

=head2 new

Create a new instance of the Unicodify object.

=head2 to_unicode($str)

Takes an input string and translates it into look-alike Unicode
characters.  Input is any string.

Basic ASCII leters are translated into Unicode "look alikes", while
any character (Unicode or not) is passed through unchanged.

=head2 back_to_ascii($str)

Takes an input string that has perhaps previously been produced
by C<to_unicode> and translates the look-alike characters back
into 7 bit ASCII.  Any other characters (Unicode or ASCII) are
passed through unchanged.

=head2 file_to_unicode($infile, $outfile)

This method reads the file with the named passed as the first
argument, and produces a new output file with the name passed
as the second argument.

The routine will call C<to_unicode> on the contents of the file.

Note this will overwrite existing files and it assumes the input
and output files are in UTF-8 encoding (or plain ASCII in the
case that no codepoints >127 are used).

This also assumes that there is sufficient memory to slurp the
entire contents of the file into memory.

=head2 file_back_to_ascii($infile, $outfile)

t/01-ToFromUnicode.t  view on Meta::CPAN

my $unify = Acme::Unicodify->new();
ok( defined $unify, 'Can create object' );

is(
    $unify->back_to_ascii('AaaaBbbbCcccDdddEeee\n A'),
    'AaaaBbbbCcccDdddEeee\n A',
    'No change when converting ASCII to ASCII'
);

isnt(
    $unify->to_unicode('AaaaBbbbCcccDdddEeee\n A'),
    'AaaaBbbbCcccDdddEeee\n A',
    'String changes when ASCII letters passed to to_unicode'
);

is(
    $unify->back_to_ascii($unify->to_unicode( 'AaaaBbbbCcccDdddEeee\n A' )),
    'AaaaBbbbCcccDdddEeee\n A',
    'Conversion to/from Unicode is lossless'
);

is(
    $unify->to_unicode(undef),
    undef,
    'to_unicode() handles undef'
);

is(
    $unify->back_to_ascii(undef),
    undef,
    'back_to_ascii() handles undef'
);

my $text = <<'END_FILE';
This is a test.  I want to throw in a bogus Unicode character just to validate that it is preserved.

t/01-ToFromUnicode.t  view on Meta::CPAN

It should stay a camel.
END_FILE

$text = "
This is a test.  I want to throw in a bogus Unicode character just to validate that it is preserved.
This is line 2
This is a camel: \x{1F42A}
It should stay a camel.";

write_text($dir . '/infile.txt', $text);
$unify->file_to_unicode($dir . '/infile.txt', $dir . '/unifile.txt');
my $textin = read_text($dir . '/infile.txt');
my $text1 = read_text($dir . '/unifile.txt');

isnt($text, $text1, 'Unicoded file does not match non-unicoded file');
is(
    scalar(split /\b{gcb}/, $text1),
    scalar(split /\b{gcb}/, $textin),
    'File length is unchanged'
);

$unify->file_back_to_ascii($dir . '/unifile.txt', $dir . '/output.txt');
my $text2 = read_text($dir . '/output.txt');

is($text2, $text, 'Text files are lossless-ly processed');



( run in 1.188 second using v1.01-cache-2.11-cpan-88abd93f124 )