unicode results from the CPAN

Data-Dumper

view release on metacpan or search on metacpan

my $XS;

# Force Data::Dumper::Dump to use perl. We test Dumpxs explicitly by calling
# it direct. Out here it lets us knobble the next if to test that the perl
# only tests do work (and count correctly)
$Data::Dumper::Useperl = 1;
if (defined &Data::Dumper::Dumpxs) {
    print "### XS extension loaded, will run XS tests\n";
    $XS = 1;
}
else {
    print "### XS extensions not loaded, will NOT run XS tests\n";
    $XS = 0;
}

our ( @a, $c, $d, $foo, @foo, %foo, @globs, $v, $ping, %ping );
our ( @dogs, %kennel, $mutts );

our ( @numbers, @strings );
our ( @numbers_s, @numbers_i, @numbers_is, @numbers_n, @numbers_ns, @numbers_ni, @numbers_nis );
our ( @strings_s, @strings_i, @strings_is, @strings_n, @strings_ns, @strings_ni, @strings_nis );

# Perl 5.16 was the first version that correctly handled Unicode in typeglob
# names. Tests for how globs are dumped must revise their expectations
# downwards when run on earlier Perls.
sub change_glob_expectation {
    my ($input) = @_;
    if ($] < 5.016) {
        $input =~ s<\\x\{([0-9a-f]+)\}>{
            my $s = chr hex $1;
            utf8::encode($s);
            join '', map sprintf('\\%o', ord), split //, $s;
        }ge;
    }
    return $input;
}

sub convert_to_native {
    my $input = shift;

    my @output;

    # The input should always be one of the following constructs
    while ($input =~ m/ ( \\ [0-7]+ )
                      | ( \\ x \{ [[:xdigit:]]+ } )
                      | ( \\ . )
                      | ( . ) /gx)
    {
        #print STDERR __LINE__, ": ", $&, "\n";
        my $index;
        my $replacement;
        if (defined $4) {       # Literal
            $index = ord $4;
            $replacement = $4;
        }
        elsif (defined $3) {    # backslash escape
            $index = ord eval "\"$3\"";
            $replacement = $3;
        }
        elsif (defined $2) {    # Hex
            $index = utf8::unicode_to_native(ord eval "\"$2\"");

            # But low hex numbers are always in octal.  These are all
            # controls.
            my $format = ($index < ord(" "))
                         ? "\\%o"
                         : "\\x{%x}";
            $replacement = sprintf($format, $index);
        }
        elsif (defined $1) {    # Octal
            $index = utf8::unicode_to_native(ord eval "\"$1\"");
            $replacement = sprintf("\\%o", $index);
        }
        else {
            die "Unexpected match in convert_to_native()";
        }

        if (defined $output[$index]) {
            print STDERR "ordinal $index already has '$output[$index]'; skipping '$replacement'\n";
            next;
        }

        $output[$index] = $replacement;
    }

    return join "", grep { defined } @output;
}

sub TEST {
    my ($string, $desc, $want) = @_;
    Carp::confess("Tests must have a description")
            unless $desc;

    local $Test::Builder::Level = $Test::Builder::Level + 1;
 SKIP: {
        my $have = do {
            no strict;
            eval $string;
        };
        my $error = $@;

        if (defined $error && length $error) {
            is($error, "", "$desc set \$@");
            skip('No point in running eval after an error', 2);
        }

        $have =~ s/([A-Z]+)\(0x[0-9a-f]+\)/$1(0xdeadbeef)/g
            if $want =~ /deadbeef/;
        is($have, $want, $desc);

        {
            no strict;
            eval "$have";
        }

        is($@, "", "$desc - output did not eval")
            or skip('No point in restesting if output failed eval');

        $have = do {
            no strict;
            eval $string;
        };
        $error = $@;

        if (defined $error && length $error) {
            is($error, "", "$desc after eval set \$@");
        }
        else {
            $have =~ s/([A-Z]+)\(0x[0-9a-f]+\)/$1(0xdeadbeef)/g
                if $want =~ /deadbeef/;
            is($have, $want, "$desc after eval");

t/dumper.t view on Meta::CPAN

  # generated \65.66.77 (no v). Now fixed.
  my $ABC_native = chr(65) . chr(66) . chr(67);
  my $want = $XS ? <<"VSTRINGS_CORRECT" : <<"NO_vstring_HELPER";
#\$a = \\v65.66.67;
#\$b = \\v65.66.067;
#\$c = \\v65.66.6_7;
#\$d = \\'$ABC_native';
VSTRINGS_CORRECT
#\$a = \\v65.66.67;
#\$b = \\v65.66.67;
#\$c = \\v65.66.67;
#\$d = \\'$ABC_native';
NO_vstring_HELPER

  @::_v = (
    \v65.66.67,
    \(eval 'v65.66.067'),
    \v65.66.6_7,
    \~v190.189.188
  );
  if ($] >= 5.010) {
    TEST_BOTH(q(Data::Dumper->Dumpxs(\@::_v, [qw(a b c d)])),
              'vstrings',
              $want);
  }
  else { # Skip tests before 5.10. vstrings considered funny before
    SKIP_BOTH("vstrings considered funny before 5.10.0");
  }
}

#############
{
  # [perl #107372] blessed overloaded globs
  my $want = <<'EOW';
#$VAR1 = bless( \*::finkle, 'overtest' );
EOW
  {
    package overtest;
    use overload fallback=>1, q\""\=>sub{"oaoaa"};
  }
  TEST_BOTH(q(Data::Dumper->Dumpxs([bless \*finkle, "overtest"])),
            'blessed overloaded globs',
            $want);
}
#############
{
  # [perl #74798] uncovered behaviour
  my $want = <<'EOW';
#$VAR1 = "\0000";
EOW
  local $Data::Dumper::Useqq = 1;
  TEST_BOTH(q(Data::Dumper->Dumpxs(["\x000"])),
            "\\ octal followed by digit",
            $want);

  $want = <<'EOW';
#$VAR1 = "\x{100}\0000";
EOW
  local $Data::Dumper::Useqq = 1;
  TEST_BOTH(q(Data::Dumper->Dumpxs(["\x{100}\x000"])),
            "\\ octal followed by digit unicode",
            $want);

  $want = <<'EOW';
#$VAR1 = "\0\x{660}";
EOW
  TEST_BOTH(q(Data::Dumper->Dumpxs(["\\x00\\x{0660}"])),
            "\\ octal followed by unicode digit",
            $want);

  # [perl #118933 - handling of digits
  $want = <<'EOW';
#$VAR1 = 0;
#$VAR2 = 1;
#$VAR3 = 90;
#$VAR4 = -10;
#$VAR5 = "010";
#$VAR6 = 112345678;
#$VAR7 = "1234567890";
EOW
  TEST_BOTH(q(Data::Dumper->Dumpxs([0, 1, 90, -10, "010", "112345678", "1234567890" ])),
            "numbers and number-like scalars",
            $want);
}
#############
{
  # [github #18614 - handling of Unicode characters in regexes]
  # [github #18764 - ... without breaking subsequent Latin-1]
  if ($] lt '5.010') {
      SKIP_BOTH("Incomplete support for UTF-8 in old perls");
      last;
  }
  my $want = <<"EOW";
#\$VAR1 = [
#  "\\x{41f}",
#  qr/\x{8b80}/,
#  qr/\x{41f}/,
#  qr/\x{e4}/,
#  '\xE4'
#];
EOW
  if ($] lt '5.010001') {
      $want =~ s!qr/!qr/(?-xism:!g;
      $want =~ s!/,!)/,!g;
  }
  elsif ($] gt '5.014') {
      $want =~ s{/(,?)$}{/u$1}mg;
  }
  my $want_xs = $want;
  $want_xs =~ s/'\xE4'/"\\x{e4}"/;
  $want_xs =~ s<([^\0-\177])> <sprintf '\\x{%x}', ord $1>ge;
  TEST_BOTH(qq(Data::Dumper->Dumpxs([ [qq/\x{41f}/, qr/\x{8b80}/, qr/\x{41f}/, qr/\x{e4}/, "\xE4"] ])),
            "string with Unicode + regexp with Unicode",
            $want, $want_xs);
}
#############
{
  # [more perl #58608 tests]
  my $bs = "\\\\";
  my $want = <<"EOW";
#\$VAR1 = [
#  qr/ \\/ /,
#  qr/ \\?\\/ /,
#  qr/ $bs\\/ /,
#  qr/ $bs:\\/ /,
#  qr/ \\?$bs:\\/ /,
#  qr/ $bs$bs\\/ /,
#  qr/ $bs$bs:\\/ /,

t/dumper.t view on Meta::CPAN

      $want =~ s!qr/!qr/(?-xism:!g;
      $want =~ s!/,!)/,!g;
  }
  my $want_xs = $want;
  $want_xs =~ s/'\x{A3}'/"\\x{a3}"/;
  $want_xs =~ s/\x{A3}/\\x{a3}/;
  $want_xs =~ s/\x{203D}/\\x{203d}/g;
  my $have = <<"EOT";
Data::Dumper->Dumpxs([ [
  "\\x{2e18}",
  qr/^\$/,
  qr'^\$',
  qr'\$foo',
  qr/\\\$foo/,
  qr'\$ \x{A3} ',
  qr'\$ \x{203d} ',
  qr/\\\$ \x{203d} /,
  qr'\\\\\$ \x{203d} ',
  qr/ \$| \x{203d} /,
  qr/ (\$) \x{203d} /,
  '\xA3'
] ]);
EOT
  TEST_BOTH($have, "CPAN #84569", $want, $want_xs);
}
#############
{
  # [perl #82948]
  # re::regexp_pattern was moved to universal.c in v5.10.0-252-g192c1e2
  # and apparently backported to maint-5.10
  my $want = $] > 5.010 ? <<'NEW' : <<'OLD';
#$VAR1 = qr/abc/;
#$VAR2 = qr/abc/i;
NEW
#$VAR1 = qr/(?-xism:abc)/;
#$VAR2 = qr/(?i-xsm:abc)/;
OLD
  TEST_BOTH(q(Data::Dumper->Dumpxs([ qr/abc/, qr/abc/i ])), "qr// xs", $want);
}
#############

{
  sub foo {}
  my $want = <<'EOW';
#*a = sub { "DUMMY" };
#$b = \&a;
EOW

  TEST_BOTH(q(Data::Dumper->new([ \&foo, \\&foo ], [ "*a", "b" ])->Dumpxs),
            "name of code in *foo",
            $want);
}
#############

{
    # There is special code to handle the single control that in EBCDIC is
    # not in the block with all the other controls, when it is UTF-8 and
    # there are no variants in it (All controls in EBCDIC are invariant.)
    # This tests that.  There is no harm in testing this works on ASCII,
    # and is better to not have split code paths.
    my $outlier = chr utf8::unicode_to_native(0x9F);
    my $outlier_hex = sprintf "%x", ord $outlier;
    my $want = <<EOT;
#\$VAR1 = \"\\x{$outlier_hex}\";
EOT
    $foo = "$outlier\x{100}";
    chop $foo;
    local $Data::Dumper::Useqq = 1;
    TEST_BOTH (q(Data::Dumper::DumperX($foo)),
               'EBCDIC outlier control: DumperX',
               $want);
}
############# [perl #124091]
{
    my $want = <<'EOT';
#$VAR1 = "\n";
EOT
    local $Data::Dumper::Useqq = 1;
    TEST_BOTH(qq(Data::Dumper::DumperX("\n")),
              '\n alone',
              $want);
}
#############
{
    no strict 'refs';
    @globs = map { $_, \$_ } map { *$_ } map { $_, "s::$_" }
        "foo", "\1bar", "L\x{e9}on", "m\x{100}cron", "snow\x{2603}";
}

{
  my $want = change_glob_expectation(<<'EOT');
#$globs = [
#  *::foo,
#  \*::foo,
#  *s::foo,
#  \*s::foo,
#  *{"::\1bar"},
#  \*{"::\1bar"},
#  *{"s::\1bar"},
#  \*{"s::\1bar"},
#  *{"::L\351on"},
#  \*{"::L\351on"},
#  *{"s::L\351on"},
#  \*{"s::L\351on"},
#  *{"::m\x{100}cron"},
#  \*{"::m\x{100}cron"},
#  *{"s::m\x{100}cron"},
#  \*{"s::m\x{100}cron"},
#  *{"::snow\x{2603}"},
#  \*{"::snow\x{2603}"},
#  *{"s::snow\x{2603}"},
#  \*{"s::snow\x{2603}"}
#];
EOT
  local $Data::Dumper::Useqq = 1;
  if (ord("A") == 65) {
    TEST_BOTH(q(Data::Dumper->Dumpxs([\@globs], ["globs"])), 'globs: Dumpxs()',
              $want);
  }
  else {
    SKIP_BOTH("ASCII-dependent test");

( run in 1.275 second using v1.01-cache-2.11-cpan-39bf76dae61 )