unicode results from the CPAN

unicode

Result:

found more than 821 distributions - search limited to the first 2001 files matching your query ( run in 0.798 )

Data-Password-zxcvbn-French

1 match

view release on metacpan or search on metacpan

lib/Data/Password/zxcvbn/RankedDictionaries/French.pm view on Meta::CPAN

    'under' => 16915,
    'underground' => 13281,
    'une' => 13,
    'unes' => 5685,
    'unesco' => 26765,
    'unicode' => 12795,
    'unies' => 5565,
    'unifie' => 11831,
    'unifiee' => 16032,
    'unifiees' => 25287,
    'unifier' => 19918,

view all matches for this distribution

Data-Password-zxcvbn

2 results

view release on metacpan or search on metacpan

lib/Data/Password/zxcvbn/Match/Sequence.pm view on Meta::CPAN

my $MAX_DELTA = 5;

sub make {
    my ($class, $password) = @_;
    # Identifies sequences by looking for repeated differences in
    # unicode codepoint.  this allows skipping, such as 9753, and also
    # matches some extended unicode sequences such as Greek and
    # Cyrillic alphabets.
    #
    # for example, consider the input 'abcdb975zy'
    #
    # password: a   b   c   d   b    9   7   5   z   y

view all matches for this distribution

Data-Peek

2 results

view release on metacpan or search on metacpan

Peek.pm view on Meta::CPAN

    local $Data::Dumper::Indent    = 1;
    local $Data::Dumper::Quotekeys = 0;
    local $Data::Dumper::Deparse   = 1;
    local $Data::Dumper::Terse     = 1;
    local $Data::Dumper::Purity    = 1;
    local $Data::Dumper::Useqq     = 0;	# I want unicode visible

    my $s = Data::Dumper::Dumper (@_);
    $s =~ s/^(\s*)(.*?)\s*=>/sprintf "%s%-16s =>", $1, $2/gme;  # Align =>
    $s =~ s/\bbless\s*\(\s*/bless (/gm and $s =~ s/\s+\)([;,])$/)$1/gm;
    $s =~ s/^(?=\s*[]}](?:[;,]|$))/  /gm;

view all matches for this distribution

Data-Pond

3 results

view release on metacpan or search on metacpan

lib/Data/Pond.pm view on Meta::CPAN

=item B<undef_is_empty>

If false (the default), C<undef> will be treated as invalid data.
If true, C<undef> will be serialised as an empty string.

=item B<unicode>

If false (the default), the datum will be expressed using only ASCII
characters.  If true, non-ASCII characters may be used in string literals.

=back

lib/Data/Pond.pm view on Meta::CPAN

	return $str if $str =~ /\A(?:0|[1-9][0-9]{0,8})\z/;
	die "Pond data error: invalid character\n"
		unless $str =~ /\A[\x{0}-\x{7fffffff}]*\z/;
	$str =~ s/([\x00-\x1f\"\$\@\\\x7f-\xa0])/$str_encode{$1}/eg;
	$str =~ s/([^\x00-\x7f])/sprintf("\\x{%02x}", ord($1))/eg
		unless $options->{unicode};
	return "\"$str\"";
}

sub _strdatum_to_bareword($$) {
	return $_[0] =~ /\A[A-Za-z_][0-9A-Za-z_]*\z/ ? $_[0] :

view all matches for this distribution

Data-Pretty

2 results

view release on metacpan or search on metacpan

MANIFEST view on Meta::CPAN

t/005_eval.t
t/006_filtered.t
t/007_glob.t
t/008_hash.t
t/009_quote.t
t/010_quote-unicode.t
t/011_ref.t
t/012_regexp.t
t/013_scalar-obj.t
t/014_scalar.t
t/015_tied.t

view all matches for this distribution

Data-Printer-Theme-Zellner

2 results

view release on metacpan or search on metacpan

lib/Data/Printer/Theme/Zellner.pm view on Meta::CPAN

        format      => '#000000',    # format type
        repeated    => '#000000',    # references to seen values
        caller_info => '#878787',    # details on what's being printed
        weak        => '#000000',    # weak references flag
        tainted     => '#870000',    # tainted flag
        unicode     => '#000000',    # utf8 flag
        escaped     => '#ff00ff',    # escaped characters (\t, \n, etc)
        brackets    => '#000000',    # (), {}, []
        separator   => '#000000',    # the "," between hash pairs, array elements, etc
        quotes      => '#000000',    # q(")
        unknown     => '#878787',    # any (potential) data type unknown to Data::Printer

lib/Data/Printer/Theme/Zellner.pm view on Meta::CPAN

        format      => '#000000',    # format type
        repeated    => '#000000',    # references to seen values
        caller_info => '#878787',    # details on what's being printed
        weak        => '#000000',    # weak references flag
        tainted     => '#870000',    # tainted flag
        unicode     => '#000000',    # utf8 flag
        escaped     => '#ff00ff',    # escaped characters (\t, \n, etc)
        brackets    => '#000000',    # (), {}, []
        separator   => '#000000',    # the "," between hash pairs, array elements, etc
        quotes      => '#000000',    # q(")
        unknown     => '#878787',    # any (potential) data type unknown to Data::Printer

view all matches for this distribution

Data-Printer

15 results

view release on metacpan or search on metacpan

lib/Data/Printer.pm view on Meta::CPAN

default values. See L<Data::Printer::Object> for further information on
each of them:

    # scalar options
    show_tainted      = 1
    show_unicode      = 1
    show_lvalue       = 1
    print_escapes     = 0
    scalar_quotes     = "
    escape_chars      = none
    string_max        = 4096
    string_preserve   = begin
    string_overflow   = '(...skipping __SKIPPED__ chars...)'
    unicode_charnames = 0

    # array options
    array_max      = 100
    array_preserve = begin
    array_overflow = '(...skipping __SKIPPED__ items...)'

view all matches for this distribution

Data-Random-Structure-UTF8

10 results

view release on metacpan or search on metacpan

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

use Scalar::Util qw( looks_like_number );

sub	new {
	my $class = shift;
	my %options = @_;
	my $only_unicode = 0;
	if( exists $options{'only-unicode'} ){
		if( defined $options{'only-unicode'} ){
			$only_unicode = $options{'only-unicode'}
		}
		# do not pass our options to parent it may get confused and croak
		delete $options{'only-unicode'}
	}
	my $self = $class->SUPER::new(%options);
	# at this point our _init() will be called via parent's
	# constructor. Our _init() will call parent's _init()
	$self->only_unicode($only_unicode);
	return $self
}
sub	_reset {
	my $self = shift;
	# we are interfering with the internals of the parent... not good

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

	my $self = shift;
	$self->_reset();
	$self->SUPER::_init(@_);
	push @{$self->{_scalar_types}}, 'string-UTF8'
}
sub	only_unicode {
	my $self = $_[0];
	my $m = $_[1];
	return $self->{'_only-unicode'} unless defined $m;
	$self->_init();
	$self->{'_only-unicode'} = $m;
	if( $m == 1 ){
		# delete just the 'string' type
		# we will get various types but the strings will
		# be exclusively unicode
		my @idx = grep { $self->{'_scalar_types'}->[$_] eq 'string' }
			reverse 0 .. $#{$self->{_scalar_types}}
		;
		splice(@{$self->{_scalar_types}}, $_, 1) for @idx;
	} elsif( $m > 1 ){
		# delete ALL the _scalar_types and leave just our unicode string
		# we will get only unicode strings no other scalar type
		$#{$self->{_scalar_types}} = -1;
		push @{$self->{_scalar_types}}, 'string-UTF8'
	}
	return $m
}
sub	random_char_UTF8 {
	# the crucial part borrowed from The Effective Perler:
	# https://www.effectiveperlprogramming.com/2018/08/find-the-new-emojis-in-perls-unicode-support/
#	my $achar;
#	for(my $trials=100;$trials-->0;){
#		$achar = chr(int(rand(0x10FFF+1)));
#		return $achar if $achar =~ /\p{Present_In: 8.0}/;
#	}

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

}
sub	check_content_recursively {
	my $looking_for = $_[1]; # a hashref of types to look-for, required
	my $bitparams = 0;
	$bitparams |= 1 if exists($looking_for->{'numbers'}) && ($looking_for->{'numbers'}==1);
	$bitparams |= 2 if exists($looking_for->{'strings-unicode'}) && ($looking_for->{'strings-unicode'}==1);
	$bitparams |= 4 if exists($looking_for->{'strings-plain'}) && ($looking_for->{'strings-plain'}==1);
	$bitparams |= (2+4) if exists($looking_for->{'strings'}) && ($looking_for->{'strings'}==1);
	return _check_content_recursively($_[0], $bitparams);
}
# returns 1 if we are looking for it and it was found
# returns 0 if what we were looking for was not found.
# 'looking_for' can be more than one things.
# it is a bit string, 1st bit if set looks for numbers,
# 2nd bit, if set, looks for unicode strings,
# 3rd bit, if set, looks for non-unicode strings (plain)
# if you set 'numbers'=>0, it simply means "do not check for numbers"
# and so it will not check if it has any numbers
# by giving nothing to check, it return 0, nothing was found
sub	_check_content_recursively {
	my $inp = $_[0];

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

		if( looks_like_number($inp) ){
			return 1 if $looking_for & 1; # a number
			return 0;
		}
		if( _has_utf8($inp) ){
			return 1 if $looking_for & 2; # unicode string
			return 0;
		}
		return 1 if $looking_for & 4; # plain string
		return 0;
	} elsif( $aref eq 'ARRAY' ){

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

			return 1 if $r;
		}
	} else { die "don't know how to deal with this ref '$aref'" }
}
sub	_has_utf8 { return $_[0] =~ /[^\x00-\x7f]/ }
# this does not work for unicode strings
# from https://www.perlmonks.org/?node_id=958679
# and https://www.perlmonks.org/?node_id=791677
#sub isnum ($) {
#    return 0 if $_[0] eq '';
#    $_[0] & ~$_[0] ? 0 : 1

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN


=encoding utf8

=head1 NAME

Data::Random::Structure::UTF8 - Produce nested data structures with unicode keys, values, elements.

=head1 VERSION

Version 0.06

=head1 SYNOPSIS

This module produces random, arbitrarily deep and long,
nested Perl data structures  with unicode content for the
keys, values and/or array elements. Content can be forced
to be exclusively strings and exclusively unicode. Or
the strings can be unicode. Or anything goes, mixed
unicode and non-unicode strings as well as integers, floats, etc.

This is an object-oriented module
which inherits from
L<Data::Random::Structure> and extends its functionality by
providing for unicode keys and values for hashtables and
unicode content for array elements or scalars, randomly mixed with the
usual repertoire of L<Data::Random::Structure>, which is
non-unicode strings,
numerical, boolean values and the assorted entourage to the court
of Emperor Computer, post-Turing.

For example, it produces these:

=over 4

=item * unicode scalars: e.g. C<"Î±Î²Î³">,

=item * mixed arrays: e.g. C<["Î±Î²Î³", "123", "xyz"]>

=item * hashtables with some/all keys and/or values as unicode: e.g.
C<{"Î±Î²Î³" => "123", "xyz" => "Î±Î²Î³"}>

=item * exclusive unicode arrays or hashtables: e.g. C<["Î±Î²Î³", "Ï‡ÏˆÎ¶"]>

=back

This is accomplised by adding an extra
type C<string-UTF8> (invisible to the user) and the
respective generator method. All these are invisible to the user
which will get the old functionality plus some (or maybe none
because this is a random process which does not eliminate non-unicode
strings, at the moment) unicode strings.

    use Data::Random::Structure::UTF8;

    my $randomiser = Data::Random::Structure::UTF8->new(
        'max_depth' => 5,
        'max_elements' => 20,
        # all the strings produced (keys, values, elements)
	# will be unicode strings
	'only-unicode' => 1,
        # all the strings produced (keys, values, elements)
	# will be a mixture of unicode and non-unicode
	# this is the default behaviour
	#'only-unicode' => 0,
        # only unicode strings will be produced for (keys, values, elements),
	# there will be no numbers, no bool, only unicode strings
	#'only-unicode' => 2,
    );
    my $perl_var = $randomiser->generate() or die;
    print pp($perl_var);

    # which prints the usual escape mess of Dump and Dumper

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

      "xyz" => [1, 2, "\x{7D5A}\x{4EC1}"],
    },
  ],

    # can control the scalar type (for keys, values, items) on the fly
    # this produces unicode strings in addition to
    # Data::Random::Structure's usual repertoire:
    # non-unicode-string, numbers, bool, integer, float, etc.
    # (see there for the list)
    $randomiser->only_unicode(0); # the default: anything plus unicode strings
    print $randomiser->only_unicode();

    # this produces unicode strings in addition to
    # Data::Random::Structure's usual repertoire:
    # numbers, bool, integer, float, etc.
    # (see there for the list)
    # EXCEPT non-unicode-strings, (all strings will be unicode)
    $randomiser->only_unicode(1);
    print $randomiser->only_unicode();

    # this produces unicode strings ONLY
    # Data::Random::Structure's usual repertoire does not apply
    # there will be no numbers, no bool, no integer, no float, no nothing
    $randomiser->only_unicode(2);
    print $randomiser->only_unicode();

=head1 METHODS

This is an object oriented module which has exactly the same API as
L<Data::Random::Structure>.

=head2 C<new>

Constructor. In addition to L<Data::Random::Structure> C<<new()>>
API, it takes parameter C<< 'only-unicode' >> with
a valid value of 0, 1 or 2. Default is 0.

=over 4

=item * 0 : keys, values, elements of the produced data structure will be
a mixture of unicode strings, plus L<Data::Random::Structure>'s full
repertoire which includes non-unicode strings, integers, floats etc.

=item * 1 : keys, values, elements of the produced data structure will be
a mixture of unicode strings, plus L<Data::Random::Structure>'s full
repertoire except non-unicode strings. That is, all strings will be
unicode. But there will possibly be integers etc.

=item * 2 : keys, values, elements of the produced data structure will be
only unicode strings. Nothing of L<Data::Random::Structure>'s
repertoire applies. Only unicode strings, no integers, no nothing.

=back

Controlling the scalar data types can also be done on the fly, after
the object has been created using
L<Data::Random::Structure::UTF8> C<<only_unicode()>>
method.

Additionally, L<Data::Random::Structure> C<<new()>>'s API reports that
the constructor takes 2 optional arguments, C<max_depth> and C<max_elements>.
See L<Data::Random::Structure> C<<new()>> for up-to-date, official information.

=head2 C<only_unicode>

Controls what scalar types to be included in the nested
data structures generated. With no parameters it returns back
the current setting. Otherwise, valid input parameters and their
meanings are listed in L<Data::Random::Structure::UTF8> C<<new()>>

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN


It returns the Perl data structure as a reference.

=head2 C<generate_scalar>

Generate a scalar which may contain unicode content.
See L<Data::Random::Structure::generate_scalar> for
all options. This method is overriden by this module but
calls the parent's too.

It returns a Perl string.

=head2 C<generate_array>

Generate an array with random, possibly unicode, content.
See L<Data::Random::Structure::generate_array> for
all options. This method is not overriden by this module.

It returns the Perl array as a reference.

=head2 C<generate_hash>

Generate an array with random, possibly unicode, content.
See L<Data::Random::Structure::generate_array> for
all options. This method is not overriden by this module.

It returns the Perl array as a reference.

=head2 C<random_char_UTF8>

Return a random unicode character, guaranteed to be valid.
This is a very simple method which selects characters
from some pre-set code pages (Greek, Cyrillic, Cherokee,
Ethiopic, Javanese) with equal probability.
These pages and ranges were selected so that there are
no "holes" between them which would produce an invalid
character. Therefore, not all characters from the
particular code page will be produced.

Returns a random unicode character guaranteed to be valid.

=head2 C<random_chars_UTF8>

  my $ret = random_chars_UTF8($optional_paramshash)

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN


=back

=back

Return a random unicode-only string optionally specifying
minimum and maximum length. See
L<Data::Random::Structure::UTF8> C<<random_chars_UTF8()>>
for the range of characters it returns. The returned string
is unicode and is guaranteed all its characters are valid.

=head1 SUBROUTINES

=head2 C<check_content_recursively>

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

If set to 1 and a number C<123> or C<"123"> is found, this sub returns 1.
Set it to 0 to not look for numbers at all (and not report if
there are no numbers) - I<don't bother checking for numbers>, that's what
setting this to zero means.

=item * C<'strings-unicode'> set it to 1 to look for unicode strings (possibly among other things).
The definition of "unicode string" is that at least one its characters is unicode.
If set to 1 and a "unicode string" is found, this sub returns 1.

=item * C<'strings-plain'> set it to 1 to look for plain strings (possibly among other things).
The definition of "plain string" is that none of its characters is unicode.
If set to 1 and a "plain string" is found, this sub returns 1.

=item * C<'strings'> set it to 1 to look for plain or unicode strings (possibly among other things).
If set to 1 and a "plain string" or "unicode string" is found, this sub returns 1. Basically,
it returns 1 when a string is found (as opposed to a "number").

=back

=back

In general, by setting C<<'strings-unicode'=>1>> you are checking whether
the input Perl variable contains a unicode string in a key, a value,
an array element, or a scalar reference.

But, setting C<<'strings-unicode'=>0>>, it simply means do not look for
this. It does not mean I<report if they are NO unicode strings>.

Return value: 1 or 0 depending whether what
was looking for, was found.

This is not an object-oriented method. It is called thously:

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

		# look for numbers, are there any?
		'numbers' => 1,
	}
    ) ){ print "data structure contains numbers\n" }

    # check if it contains no numbers but it does unicode strings
    if( Data::Random::Structure::UTF8::check_content_recursively(
	{'abc'=>123, 'xyz'=>[1,2,3]},
	{
		# don't look for numbers
		'numbers' => 0,
		# look for unicode strings, are there any?
		'strings-unicode' => 1,
	}
    ) ){ print "data structure contains numbers\n" }

CAVEAT: as its name suggests, this is a recursive function. Beware
of extremely deep data structures. Deep, not long. If you do get

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN


=over 4

=item * The parent class L<Data::Random::Structure>.

=item * L<Data::Roundtrip> for stringifying possibly-unicode Perl data structures.

=back

=head1 AUTHOR

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN


=head1 CAVEATS

There are two issues users should know about.

The first issue is that the unicode produced can make
L<Data::Dump> to complain with

   Operation "lc" returns its argument for UTF-16 surrogate U+DA4B at /usr/local/share/perl5/Data/Dump.pm line 302.

This, I have found, can be fixed with the following workaround (from L<github user iafan|https://github.com/evernote/serge/commit/865402bbde42101345a5bee4cd0a855b9b76bdd7>, thank you):

lib/Data/Random/Structure/UTF8.pm view on Meta::CPAN

For the moment, I have placed a catch-all, fall-back condition
to handle this but it will be called for all kind of types
and not only the types we have added.

So, this issue is not going to make the module die but may make it
to skew the random results in favour of unicode strings (which
is the fallback, default action when can't parse the type).

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

view all matches for this distribution

Data-RecordStore

1 match

view release on metacpan or search on metacpan

VERSIONS view on Meta::CPAN


1.09 - added Data::RecordStore::has_id method
	
1.08 - added JSON requirement for tests
	
1.07 - updated test to include unicode characters
	
1.06 - added version to Make.PL

1.05 - added version to Build.PL. changed name from DB::DataStore to Data::RecordStore

view all matches for this distribution

Data-Roundtrip

25 results

view release on metacpan or search on metacpan

lib/Data/Roundtrip.pm view on Meta::CPAN

use strict;
use warnings;

our $VERSION = '0.30';

# import params is just one 'no-unicode-escape-permanently'
# if set, then unicode escaping will not happen at
# all, even if 'dont-bloody-escape-unicode' is set.
# Dump's filter and Dumper's qquote overwrite will be permanent
# which is more efficient but removes the flexibility
# of having unicode escaped and rendered at will.

use Encode qw/encode_utf8 decode_utf8/;
use JSON qw/decode_json encode_json/;
use Unicode::Escape qw/escape unescape/;
# YAML v1.30 fails for {"\"aaa'bbb" => "aaa","bbb" => 1,}

lib/Data/Roundtrip.pm view on Meta::CPAN


sub import {
	# what comes here is (package, param1, param2...) = @_
	# for something like
	# use Data::Roundtrip qw/param1 params2 .../;
	# we are looking for a param, eq to 'no-unicode-escape-permanently'
	# or 'unicode-escape-permanently'
	# the rest we must pass to the Exporter::import() but in a tricky way
	# so as it injects all these subs in the proper namespace.
	# that call is at the end, but with our parameter removed from the list
	for(my $i=@_;$i-->1;){
		if( $_[$i] eq 'no-unicode-escape-permanently' ){
			splice @_, $i, 1; # remove it from the list
			$Data::Dumper::Useperl = 1;
			$Data::Dumper::Useqq='utf8';
			no warnings 'redefine';
			*Data::Dumper::qquote = \& _qquote_redefinition_by_Corion;
			$_permanent_override = 1;

			# add a filter to Data::Dump
			Data::Dump::Filtered::add_dump_filter( \& DataDumpFilterino );
			$_permanent_filter = 1;
		} elsif( $_[$i] eq 'unicode-escape-permanently' ){
			splice @_, $i, 1; # remove it from the list
			# this is the case which we want to escape unicode permanently
			# which is the default behaviour for Dump and Dumper
			$_permanent_override = 2;
			$_permanent_filter = 2;
		}
	}

lib/Data/Roundtrip.pm view on Meta::CPAN

	my $pv = $_[0];
	my $params = defined($_[1]) ? $_[1] : {};
	my $pretty_printing = exists($params->{'pretty'}) && defined($params->{'pretty'})
		? $params->{'pretty'} : 0
	;
	my $escape_unicode = exists($params->{'escape-unicode'}) && defined($params->{'escape-unicode'})
		? $params->{'escape-unicode'} : 0
	;
	my $convert_blessed = exists($params->{'convert_blessed'}) && defined($params->{'convert_blessed'})
		? $params->{'convert_blessed'} : 0
	;
	my $json_string;

lib/Data/Roundtrip.pm view on Meta::CPAN

	# for example if your object stores the important data you
	# want to print in $self->{'data'}
	# then sub TO_JSON { shift->{'data'} }
	# see https://perldoc.perl.org/JSON::PP#2.-convert_blessed-is-enabled-and-the-object-has-a-TO_JSON-method.
	$encoder = $encoder->convert_blessed if $convert_blessed;
	if( $escape_unicode ){
		$json_string = eval { $encoder->utf8(1)->encode($pv) };
		if( ! defined($json_string) ){ print STDERR "error, call to ".'JSON->new->utf8(1)->encode()'." has failed".((defined($@)&&($@!~/^\s*$/))?" with this exception:\n".$@:".")."\n"; return undef }
		if ( _has_utf8($json_string) ){
			$json_string = Unicode::Escape::escape($json_string, 'utf8');
			if( ! defined($json_string) ){ print STDERR "error, call to ".'Unicode::Escape::escape()'." has failed.\n"; return undef }

lib/Data/Roundtrip.pm view on Meta::CPAN

		? $params->{'pretty'} : 0
	;
	print STDERR "perl2yaml() : pretty-printing is not supported for YAML output\n" and $pretty_printing=0
		if $pretty_printing;

	my $escape_unicode = exists($params->{'escape-unicode'}) && defined($params->{'escape-unicode'})
		? $params->{'escape-unicode'} : 0
	;
	my ($yaml_string, $escaped);
	if( $escape_unicode ){
		#if( $pretty_printing ){
			# it's here just for historic purposes, this is not supported and a warning is issued
			#$yaml_string = eval { YAML::PP::Dump($pv) };
			#if( ! defined $yaml_string ){ print STDERR "error, call to ".'YAML::PP::Dump()'." has failed with this exception:\n".$@."\n"; return undef }
			# this does not work :( no pretty printing for yaml

lib/Data/Roundtrip.pm view on Meta::CPAN

# The redefinition code is by [Corion] @ Perlmonks and cpan
# see https://perlmonks.org/?node_id=11115271
# So, it still uses Data::Dumper to dump the input perl var
# but with its qquote() sub redefined. See section CAVEATS
# for a wee problem that may appear in the future.
# The default behaviour is NOT to escape unicode
# (which is the opposite of what Data::Dumper is doing)
# see options, below, on how to change this.
# input is the perl variable (as a reference, e.g. scalar, hashref, arrayref)
# followed by optional hashref of options which can be
#   terse
#   indent
#   dont-bloody-escape-unicode,
#   escape-unicode,
#   The last 2 control how unicode is printed, either escaped,
#   like \x{3b1} or 'a' <<< which is unicoded greek alpha but did not want to pollute with unicode this file
#   the former behaviour can be with dont-bloody-escape-unicode=>0 or escape-unicode=>1,
#   the latter behaviour is the default. but setting the opposite of above will set it.
# NOTE: there are 2 alternatives to this
# perl2dump_filtered() which uses Data::Dump filters to control unicode escaping but
# lacks in aesthetics and functionality and handling all the cases Dump and Dumper
# do quite well.
# perl2dump_homebrew() uses the same dump-recursively engine but does not involve
# Data::Dump at all.
sub	perl2dump {

lib/Data/Roundtrip.pm view on Meta::CPAN

		? $params->{'indent'} : 1
	;

	if( ($_permanent_override == 0)
        && ((
		exists($params->{'dont-bloody-escape-unicode'}) && defined($params->{'dont-bloody-escape-unicode'})
		 && ($params->{'dont-bloody-escape-unicode'}==1)
	    ) || (
		exists($params->{'escape-unicode'}) && defined($params->{'escape-unicode'})
		 && ($params->{'escape-unicode'}==0)
	    )
	   )
	){
		# this is the case where no 'no-unicode-escape-permanently'
		# was used at loading the module
		# we have to use the special qquote each time caller
		# sets 'dont-bloody-escape-unicode'=>1
		# which will be replaced with the original sub
		# once we exit this scope.
		# make benchmarks will compare all cases if you ever
		# want to get more efficiency out of this
		local $Data::Dumper::Useperl = 1;

lib/Data/Roundtrip.pm view on Meta::CPAN

	my $pv = $_[0];
	my $params = defined($_[1]) ? $_[1] : {};

	if( ($_permanent_filter == 0)
        && ((
		exists($params->{'dont-bloody-escape-unicode'}) && defined($params->{'dont-bloody-escape-unicode'})
		 && ($params->{'dont-bloody-escape-unicode'}==1)
	    ) || (
		exists($params->{'escape-unicode'}) && defined($params->{'escape-unicode'})
		 && ($params->{'escape-unicode'}==0)
	    )
	   )
	){
		Data::Dump::Filtered::add_dump_filter( \& DataDumpFilterino );
		my $ret = Data::Dump::pp($pv);

lib/Data/Roundtrip.pm view on Meta::CPAN

	my $pv = $_[0];
	my $params = defined($_[1]) ? $_[1] : {};

	if( ($_permanent_override == 1)
        || (
		exists($params->{'dont-bloody-escape-unicode'}) && defined($params->{'dont-bloody-escape-unicode'})
		 && ($params->{'dont-bloody-escape-unicode'}==1)
	    ) || (
		exists($params->{'escape-unicode'}) && defined($params->{'escape-unicode'})
		 && ($params->{'escape-unicode'}==0)
	    )
	){
		return dump_perl_var_recursively($pv);
	}
	return Data::Dumper::Dumper($pv);
}
# this will take a perl var (as a scalar or an arbitrarily nested data structure)
# and emulate a very very basic
# Dump/Dumper but with rendering unicode (for keys or values or array items)
# it returns a string representation of the input perl var
# There are 2 obvious limitations:
# 1) indentation is very basic,
# 2) it supports only scalars, hashes and arrays,
#    (which will dive into them no problem)

lib/Data/Roundtrip.pm view on Meta::CPAN

    if ($high eq "iso8859") {   # Doesn't escape the Latin1 printables
      if ($Data_Dumper_IS_ASCII) {
        s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
      }
      elsif ($] ge 5.007_003) {
        my $high_control = utf8::unicode_to_native(0x9F);
        s/$high_control/sprintf('\\%o',ord($1))/eg;
      }
    } elsif ($high eq "utf8") {
#     Some discussion of what to do here is in
#       https://rt.perl.org/Ticket/Display.html?id=113088

lib/Data/Roundtrip.pm view on Meta::CPAN


=encoding utf8

=head1 NAME

Data::Roundtrip - convert between Perl data structures, YAML and JSON with unicode support (I believe ...)

=head1 VERSION

Version 0.30

=head1 SYNOPSIS

This module contains a collection of utilities for converting between
JSON, YAML, Perl variable and a Perl variable's string representation (aka dump).
Hopefully, all unicode content will be handled correctly between
the conversions and optionally escaped or un-escaped. Also JSON can
be presented in a pretty format or in a condensed, machine-readable
format (not spaces, indendation or line breaks).

    use Data::Roundtrip qw/:all/;

lib/Data/Roundtrip.pm view on Meta::CPAN

    # and/or truncated (replaced with ...)
    #---
    #Artist: ÎšÎ±Î¶Î±Î½Ï„Î¶Î¯Î´Î·Ï‚ Î£Ï„ÎÎ»Î¹Î¿Ï‚/Î’Î¯ÏÎ²Î¿Ï‚ ÎšÏŽÏƒÏ„Î±Ï‚
    #Songname: Î‘Ï€ÏŒÎºÎ»Î·ÏÎ¿Ï‚ Ï„Î·Ï‚ ÎºÎ¿Î¹Î½Ï‰Î½Î¯Î±Ï‚

    $yamlstr = json2yaml($jsonstr, {'escape-unicode'=>1});
    print $yamlstr;
    #---
    #Artist: \u039a\u03b1\u03b6\u03b1 ...
    #Songname: \u0391\u03c0\u03cc\u03ba ...

lib/Data/Roundtrip.pm view on Meta::CPAN

    # of following JSON structure:
    # {"Artist":"ÎšÎ±Î¶Î±Î½Ï„Î¶Î¯Î´Î·Ï‚ Î£Ï„ÎÎ»Î¹Î¿Ï‚/Î’Î¯ÏÎ²Î¿Ï‚ ÎšÏŽÏƒÏ„Î±Ï‚",
    #  "Songname":"Î‘Ï€ÏŒÎºÎ»Î·ÏÎ¿Ï‚ Ï„Î·Ï‚ ÎºÎ¿Î¹Î½Ï‰Î½Î¯Î±Ï‚"}

    # This is useful when sending JSON via
    # a POST request and it needs unicode escaped:
    $backtojson = yaml2json($yamlstr, {'escape-unicode'=>1});
    # $backtojson is a string representation
    # of following JSON structure:
    # but this time with unicode escaped
    # (pod content truncated for readbility)
    # {"Artist":"\u039a\u03b1\u03b6 ...",
    #  "Songname":"\u0391\u03c0\u03cc ..."}
    # this is the usual Data::Dumper dump:
    print json2dump($jsonstr);

lib/Data/Roundtrip.pm view on Meta::CPAN

    #  'Songname' => "\x{391}\x{3c0}\x{3cc} ...",
    #  'Artist' => "\x{39a}\x{3b1}\x{3b6} ...",
    #};

    # and this is a more human-readable version:
    print json2dump($jsonstr, {'dont-bloody-escape-unicode'=>1});
    # $VAR1 = {
    #   "Artist" => "ÎšÎ±Î¶Î±Î½Ï„Î¶Î¯Î´Î·Ï‚ Î£Ï„ÎÎ»Î¹Î¿Ï‚/Î’Î¯ÏÎ²Î¿Ï‚ ÎšÏŽÏƒÏ„Î±Ï‚",
    #   "Songname" => "Î‘Ï€ÏŒÎºÎ»Î·ÏÎ¿Ï‚ Ï„Î·Ï‚ ÎºÎ¿Î¹Î½Ï‰Î½Î¯Î±Ï‚"
    # };

    # pass some parameters to Data::Dumper
    # like: be terse (no $VAR1):
    print json2dump($jsonstr,
      {'dont-bloody-escape-unicode'=>0, 'terse'=>1}
     #{'dont-bloody-escape-unicode'=>0, 'terse'=>1, 'indent'=>0}
    );
    # {
    #  "Artist" => "ÎšÎ±Î¶Î±Î½Ï„Î¶Î¯Î´Î·Ï‚ Î£Ï„ÎÎ»Î¹Î¿Ï‚/Î’Î¯ÏÎ²Î¿Ï‚ ÎšÏŽÏƒÏ„Î±Ï‚",
    #  "Songname" => "Î‘Ï€ÏŒÎºÎ»Î·ÏÎ¿Ï‚ Ï„Î·Ï‚ ÎºÎ¿Î¹Î½Ï‰Î½Î¯Î±Ï‚"
    # }

    # this is how to reformat a JSON string to
    # have its unicode content escaped:
    my $json_with_unicode_escaped =
          json2json($jsonstr, {'escape-unicode'=>1});

    # With version 0.18 and up two more exported-on-demand
    # subs were added to read JSON or YAML directly from a file:
    # jsonfile2perl() and yamlfile2perl()
    my $perldata = jsonfile2perl("file.json");
    my $perldata = yamlfile2perl("file.yaml");
    die "failed" unless defined $perldata;

    # For some of the above functions there exist command-line scripts:
    perl2json.pl -i "perl-data-structure.pl" -o "output.json" --pretty
    json2json.pl -i "with-unicode.json" -o "unicode-escaped.json" --escape-unicode
    # etc.

    # only for *2dump: perl2dump, json2dump, yaml2dump
    # and if no escape-unicode is required (i.e.
    # setting 'dont-bloody-escape-unicode' => 1 permanently)
    # and if efficiency is important,
    # meaning that perl2dump is run in a loop thousand of times,
    # then import the module like this:
    use Data::Roundtrip qw/:all no-unicode-escape-permanently/;
    # or like this
    use Data::Roundtrip qw/:all unicode-escape-permanently/;

    # then perl2dump() is more efficient but unicode characters
    # will be permanently not-escaped (1st case) or escaped (2nd case).

=head1 EXPORT

By default no symbols are exported. However, the following export tags are available (:all will export all of them):

lib/Data/Roundtrip.pm view on Meta::CPAN

in the usual way (e.g. C<use Data::Roundtrip qw/dump2perl perl2json .../>).
Section CAVEATS, under L</dump2perl>, describes how these
subs C<eval()> a string possibly coming from user,
possibly being unchecked.

=item * C<no-unicode-escape-permanently> : this is not an
export keyword/parameter but a parameter which affects
all the C<< *2dump* >> subs by setting unicode escaping
permanently to false. See L</EFFICIENCY>.

=item * C<unicode-escape-permanently> : this is not an
export keyword/parameter but a parameter which affects
all the C<< *2dump* >> subs by setting unicode escaping
permanently to true. See L</EFFICIENCY>.

=back

=head1 EFFICIENCY

The export keyword/parameter C<< no-unicode-escape-permanently >>
affects
all the C<< *2dump* >> subs by setting unicode escaping
permanently to false. This improves efficiency, although
one will ever need to
use this in extreme situations where a C<< *2dump* >>
sub is called repeatedly in a loop of
a few hundreds or thousands of iterations or more.

Each time a C<< *2dump* >> is called, the
C<< dont-bloody-escape-unicode >> flag is checked
and if it is set, then  L<Data::Dumper>'s C<< qquote() >>
is overriden with C<< _qquote_redefinition_by_Corion() >>
just for that instance and will be restored as soon as
the dump is finished. Similarly, a filter for
not escaping unicode is added to L<Data::Dump>
just for that particular call and is removed immediately
after. This has some computational cost and can be
avoided completely by overriding the sub
and adding the filter once, at loading (in C<< import() >>).

The price to pay for this added efficiency is that
unicode in any dump will never be escaped (e.g. C<< \x{3b1}) >>,
but will be rendered (e.g. C<< Î± >>, a greek alpha). Always.
The option
C<< dont-bloody-escape-unicode >> will permanently be set to true.

Similarly, the export keyword/parameter
C<< unicode-escape-permanently >>
affects
all the C<< *2dump* >> subs by setting unicode escaping
permanently to true. This improves efficiency as well.

See L</BENCHMARKS> on how to find the fastest C<< *2dump* >>
sub.

lib/Data/Roundtrip.pm view on Meta::CPAN

The special Makefile target C<< benchmarks >> will time
calls to each of the C<< *2dump* >> subs under

    use Data::Roundtrip;

    use Data::Roundtrip qw/no-unicode-escape-permanently/;

    use Data::Roundtrip qw/unicode-escape-permanently/;

and for C<< 'dont-bloody-escape-unicode' => 0 >> and
C<< 'dont-bloody-escape-unicode' => 1 >>.

In general, L</perl2dump> is faster by 25% when one of the
permanent import parameters is used
(either of the last two cases above).

lib/Data/Roundtrip.pm view on Meta::CPAN

=back

Given an input C<$perlvar> (which can be a simple scalar or
a nested data structure, but not an object), it will return
the equivalent JSON string. In C<$optional_paramshashref>
one can specify whether to escape unicode with
C<< 'escape-unicode' => 1 >>
and/or prettify the returned result with C<< 'pretty' => 1 >>
and/or allow conversion of blessed objects with C<< 'convert_blessed' => 1 >>.

The latter is useful when the input (Perl) data structure
contains Perl objects (blessed refs!). But in addition to

lib/Data/Roundtrip.pm view on Meta::CPAN

=back

Given an input C<$perlvar> (which can be a simple scalar or
a nested data structure, but not an object), it will return
the equivalent YAML string. In C<$optional_paramshashref>
one can specify whether to escape unicode with
C<< 'escape-unicode' => 1 >>. Prettify is not supported yet.
The output can be fed to L</yaml2perl>
for getting the Perl variable back.

It returns the YAML string on success or C<undef> on failure.

lib/Data/Roundtrip.pm view on Meta::CPAN


Given an input C<$perlvar> (which can be a simple scalar or
a nested data structure, but not an object), it will return
the equivalent string (via L<Data::Dumper>).
In C<$optional_paramshashref>
one can specify whether to escape unicode with
C<< 'dont-bloody-escape-unicode' => 0 >>,
(or C<< 'escape-unicode' => 1 >>). The DEFAULT
behaviour is to NOT ESCAPE unicode.

Additionally, use terse output with C<< 'terse' => 1 >> and remove
all the incessant indentation with C<< 'indent' => 1 >>
which unfortunately goes to the other extreme of
producing a space-less output, not fit for human consumption.

lib/Data/Roundtrip.pm view on Meta::CPAN

It returns the string representation of the input perl variable
on success or C<undef> on failure.

The output can be fed back to L</dump2perl>.

CAVEAT: when not escaping unicode (which is the default
behaviour), each call to this sub will override L<Data::Dumper>'s
C<qquote()> sub then
call L<Data::Dumper>'s C<Dumper()> and save its output to
a temporary variable, restore C<qquote()> sub to its original
code ref and return the

lib/Data/Roundtrip.pm view on Meta::CPAN


Note that there are two other alternative subs which offer more-or-less
the same functionality and their output can be fed back to all the C<< dump2*() >>
subs. These are
L</perl2dump_filtered> which uses L<Data::Dump::Filtered>
to add a filter to control unicode escaping but
lacks in aesthetics and functionality and handling all the
cases Dump and Dumper do quite well.

There is also C<< perl2dump_homebrew() >> which
uses the same dump-recursively engine as

lib/Data/Roundtrip.pm view on Meta::CPAN


=back

This sub will take a Perl var (as a scalar or an arbitrarily nested data structure)
and emulate a very very basic
Dump/Dumper but with enforced rendering unicode (for keys or values or array items),
and not escaping unicode - this is not an option,
it returns a string representation of the input perl var

There are 2 obvious limitations:

=over 4

lib/Data/Roundtrip.pm view on Meta::CPAN

It returns the JSON string on success or C<undef> on failure.

=head2 C<json2json> C<yaml2yaml>

Transform a json or yaml string via pretty printing or via
escaping unicode or via un-escaping unicode. Parameters
like above will be accepted.

=head2 C<json2dump> C<dump2json> C<yaml2dump> C<dump2yaml>

These subs offer similar functionality as their counterparts

lib/Data/Roundtrip.pm view on Meta::CPAN


  my $dumpstr = '...';
  my $newdumpstr = dump2dump(
    $dumpstr,
    {
      'dont-bloody-escape-unicode' => 1,
      'terse' => 0,
    }
  );

lib/Data/Roundtrip.pm view on Meta::CPAN


=head1 SEE ALSO

=over 4

=item L<Convert JSON to Perl and back with unicode|https://perlmonks.org/?node_id=11115241>

=item L<RFC: PerlE<lt>-E<gt>JSONE<lt>-E<gt>YAMLE<lt>-E<gt>Dumper : roundtripping and possibly with unicode|https://perlmonks.org/?node_id=11115280>

=back

=head1 SUPPORT

lib/Data/Roundtrip.pm view on Meta::CPAN


=item L<haukex|https://perlmonks.org/?node_id=830549>

=item L<Corion|https://perlmonks.org/?node_id=5348> (the
C<< _qquote_redefinition_by_Corion() >> which harnesses
L<Data::Dumper>'s incessant unicode escaping)

=item L<kcott|https://perlmonks.org/?node_id=861371>
(The EXPORT section among other suggestions)

=item L<jwkrahn|https://perlmonks.org/?node_id=540414>

view all matches for this distribution

Data-Section

1 match

view release on metacpan or search on metacpan

t/encodings.t view on Meta::CPAN

my $builder = Test::More->builder;
binmode $builder->output,         ":utf8";
binmode $builder->failure_output, ":utf8";
binmode $builder->todo_output,    ":utf8";

my $unicode = "\x{263a}";
my $latin1  = "Ricardo Juli\x{E1}n Besteiro Signes";

my %test_data = (
    Unicode_nopragma => $unicode,
    Unicode_pragma   => $unicode,
    Latin1           => $latin1,
);

for my $class ( keys %test_data ) {
    eval "require $class; 1" or die $@; ## no critic

view all matches for this distribution

( run in 0.798 second using v1.01-cache-2.11-cpan-88abd93f124 )