unicode results from the CPAN

unicode

CPANPLUS

view release on metacpan or search on metacpan

inc/bundle/JSON/PP.pm view on Meta::CPAN

package JSON::PP;

# JSON-2.0

use 5.008;
use strict;

use Exporter ();
BEGIN { our @ISA = ('Exporter') }

use overload ();
use JSON::PP::Boolean;

use Carp ();
use Scalar::Util qw(blessed reftype refaddr);
#use Devel::Peek;

our $VERSION = '4.16';

our @EXPORT = qw(encode_json decode_json from_json to_json);

# instead of hash-access, i tried index-access for speed.
# but this method is not faster than what i expected. so it will be changed.

use constant P_ASCII                => 0;
use constant P_LATIN1               => 1;
use constant P_UTF8                 => 2;
use constant P_INDENT               => 3;
use constant P_CANONICAL            => 4;
use constant P_SPACE_BEFORE         => 5;
use constant P_SPACE_AFTER          => 6;
use constant P_ALLOW_NONREF         => 7;
use constant P_SHRINK               => 8;
use constant P_ALLOW_BLESSED        => 9;
use constant P_CONVERT_BLESSED      => 10;
use constant P_RELAXED              => 11;

use constant P_LOOSE                => 12;
use constant P_ALLOW_BIGNUM         => 13;
use constant P_ALLOW_BAREKEY        => 14;
use constant P_ALLOW_SINGLEQUOTE    => 15;
use constant P_ESCAPE_SLASH         => 16;
use constant P_AS_NONBLESSED        => 17;

use constant P_ALLOW_UNKNOWN        => 18;
use constant P_ALLOW_TAGS           => 19;

use constant USE_B => $ENV{PERL_JSON_PP_USE_B} || 0;
use constant CORE_BOOL => defined &builtin::is_bool;

my $invalid_char_re;

BEGIN {
    $invalid_char_re = "[";
    for my $i (0 .. 0x01F, 0x22, 0x5c) { # '/' is ok
        $invalid_char_re .= quotemeta chr utf8::unicode_to_native($i);
    }

    $invalid_char_re = qr/$invalid_char_re]/;
}

BEGIN {
    if (USE_B) {
        require B;
    }
}

BEGIN {
    my @xs_compati_bit_properties = qw(
            latin1 ascii utf8 indent canonical space_before space_after allow_nonref shrink
            allow_blessed convert_blessed relaxed allow_unknown
            allow_tags
    );
    my @pp_bit_properties = qw(
            allow_singlequote allow_bignum loose
            allow_barekey escape_slash as_nonblessed
    );

    for my $name (@xs_compati_bit_properties, @pp_bit_properties) {
        my $property_id = 'P_' . uc($name);

        eval qq/
            sub $name {
                my \$enable = defined \$_[1] ? \$_[1] : 1;

                if (\$enable) {
                    \$_[0]->{PROPS}->[$property_id] = 1;
                }
                else {
                    \$_[0]->{PROPS}->[$property_id] = 0;
                }

                \$_[0];
            }

            sub get_$name {
                \$_[0]->{PROPS}->[$property_id] ? 1 : '';
            }
        /;
    }

}



# Functions

my $JSON; # cache

sub encode_json ($) { # encode
    ($JSON ||= __PACKAGE__->new->utf8)->encode(@_);
}


sub decode_json { # decode
    ($JSON ||= __PACKAGE__->new->utf8)->decode(@_);

inc/bundle/JSON/PP.pm view on Meta::CPAN



    sub _sort {
        defined $keysort ? (sort $keysort (keys %{$_[0]})) : keys %{$_[0]};
    }


    sub _up_indent {
        my $self  = shift;
        my $space = ' ' x $indent_length;

        my ($pre,$post) = ('','');

        $post = "\n" . $space x $indent_count;

        $indent_count++;

        $pre = "\n" . $space x $indent_count;

        return ($pre,$post);
    }


    sub _down_indent { $indent_count--; }


    sub PP_encode_box {
        {
            depth        => $depth,
            indent_count => $indent_count,
        };
    }

} # Convert


sub _encode_ascii {
    join('',
        map {
            chr($_) =~ /[[:ascii:]]/ ?
                chr($_) :
            $_ <= 65535 ?
                sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_));
        } unpack('U*', $_[0])
    );
}


sub _encode_latin1 {
    join('',
        map {
            $_ <= 255 ?
                chr($_) :
            $_ <= 65535 ?
                sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_));
        } unpack('U*', $_[0])
    );
}


sub _encode_surrogates { # from perlunicode
    my $uni = $_[0] - 0x10000;
    return ($uni / 0x400 + 0xD800, $uni % 0x400 + 0xDC00);
}


sub _is_bignum {
    $_[0]->isa('Math::BigInt') or $_[0]->isa('Math::BigFloat');
}



#
# JSON => Perl
#

my $max_intsize;

BEGIN {
    my $checkint = 1111;
    for my $d (5..64) {
        $checkint .= 1;
        my $int   = eval qq| $checkint |;
        if ($int =~ /[eE]/) {
            $max_intsize = $d - 1;
            last;
        }
    }
}

{ # PARSE 

    my %escapes = ( #  by Jeremy Muhlich <jmuhlich [at] bitflood.org>
        b    => "\b",
        t    => "\t",
        n    => "\n",
        f    => "\f",
        r    => "\r",
        '\\' => '\\',
        '"'  => '"',
        '/'  => '/',
    );

    my $text; # json data
    my $at;   # offset
    my $ch;   # first character
    my $len;  # text length (changed according to UTF8 or NON UTF8)
    # INTERNAL
    my $depth;          # nest counter
    my $encoding;       # json text encoding
    my $is_valid_utf8;  # temp variable
    my $utf8_len;       # utf8 byte length
    # FLAGS
    my $utf8;           # must be utf8
    my $max_depth;      # max nest number of objects and arrays
    my $max_size;
    my $relaxed;
    my $cb_object;
    my $cb_sk_object;

    my $F_HOOK;

inc/bundle/JSON/PP.pm view on Meta::CPAN


    sub string {
        my $utf16;
        my $is_utf8;

        ($is_valid_utf8, $utf8_len) = ('', 0);

        my $s = ''; # basically UTF8 flag on

        if($ch eq '"' or ($singlequote and $ch eq "'")){
            my $boundChar = $ch;

            OUTER: while( defined(next_chr()) ){

                if($ch eq $boundChar){
                    next_chr();

                    if ($utf16) {
                        decode_error("missing low surrogate character in surrogate pair");
                    }

                    utf8::decode($s) if($is_utf8);

                    return $s;
                }
                elsif($ch eq '\\'){
                    next_chr();
                    if(exists $escapes{$ch}){
                        $s .= $escapes{$ch};
                    }
                    elsif($ch eq 'u'){ # UNICODE handling
                        my $u = '';

                        for(1..4){
                            $ch = next_chr();
                            last OUTER if($ch !~ /[0-9a-fA-F]/);
                            $u .= $ch;
                        }

                        # U+D800 - U+DBFF
                        if ($u =~ /^[dD][89abAB][0-9a-fA-F]{2}/) { # UTF-16 high surrogate?
                            $utf16 = $u;
                        }
                        # U+DC00 - U+DFFF
                        elsif ($u =~ /^[dD][c-fC-F][0-9a-fA-F]{2}/) { # UTF-16 low surrogate?
                            unless (defined $utf16) {
                                decode_error("missing high surrogate character in surrogate pair");
                            }
                            $is_utf8 = 1;
                            $s .= _decode_surrogates($utf16, $u) || next;
                            $utf16 = undef;
                        }
                        else {
                            if (defined $utf16) {
                                decode_error("surrogate pair expected");
                            }

                            my $hex = hex( $u );
                            if ( chr $u =~ /[[:^ascii:]]/ ) {
                                $is_utf8 = 1;
                                $s .= _decode_unicode($u) || next;
                            }
                            else {
                                $s .= chr $hex;
                            }
                        }

                    }
                    else{
                        unless ($loose) {
                            $at -= 2;
                            decode_error('illegal backslash escape sequence in string');
                        }
                        $s .= $ch;
                    }
                }
                else{

                    if ( $ch =~ /[[:^ascii:]]/ ) {
                        unless( $ch = is_valid_utf8($ch) ) {
                            $at -= 1;
                            decode_error("malformed UTF-8 character in JSON string");
                        }
                        else {
                            $at += $utf8_len - 1;
                        }

                        $is_utf8 = 1;
                    }

                    if (!$loose) {
                        if ($ch =~ $invalid_char_re)  { # '/' ok
                            if (!$relaxed or $ch ne "\t") {
                                $at--;
                                decode_error(sprintf "invalid character 0x%X"
                                   . " encountered while parsing JSON string",
                                   ord $ch);
                            }
                        }
                    }

                    $s .= $ch;
                }
            }
        }

        decode_error("unexpected end of string while parsing JSON string");
    }


    sub white {
        while( defined $ch  ){
            if($ch eq '' or $ch =~ /\A[ \t\r\n]\z/){
                next_chr();
            }
            elsif($relaxed and $ch eq '/'){
                next_chr();
                if(defined $ch and $ch eq '/'){
                    1 while(defined(next_chr()) and $ch ne "\n" and $ch ne "\r");
                }
                elsif(defined $ch and $ch eq '*'){

inc/bundle/JSON/PP.pm view on Meta::CPAN

            }
            else {
                $n .= $ch;
            }

            while(defined(next_chr) and $ch =~ /\d/){
                $n .= $ch;
            }
        }

        if(defined $ch and ($ch eq 'e' or $ch eq 'E')){
            $n .= $ch;
            $is_exp = 1;
            next_chr;

            if(defined($ch) and ($ch eq '+' or $ch eq '-')){
                $n .= $ch;
                next_chr;
                if (!defined $ch or $ch =~ /\D/) {
                    decode_error("malformed number (no digits after exp sign)");
                }
                $n .= $ch;
            }
            elsif(defined($ch) and $ch =~ /\d/){
                $n .= $ch;
            }
            else {
                decode_error("malformed number (no digits after exp sign)");
            }

            while(defined(next_chr) and $ch =~ /\d/){
                $n .= $ch;
            }

        }

        $v .= $n;

        if ($is_dec or $is_exp) {
            if ($allow_bignum) {
                require Math::BigFloat;
                return Math::BigFloat->new($v);
            }
        } else {
            if (length $v > $max_intsize) {
                if ($allow_bignum) { # from Adam Sussman
                    require Math::BigInt;
                    return Math::BigInt->new($v);
                }
                else {
                    return "$v";
                }
            }
        }

        return $is_dec ? $v/1.0 : 0+$v;
    }

    # Compute how many bytes are in the longest legal official Unicode
    # character
    my $max_unicode_length = do {
      no warnings 'utf8';
      chr 0x10FFFF;
    };
    utf8::encode($max_unicode_length);
    $max_unicode_length = length $max_unicode_length;

    sub is_valid_utf8 {

        # Returns undef (setting $utf8_len to 0) unless the next bytes in $text
        # comprise a well-formed UTF-8 encoded character, in which case,
        # return those bytes, setting $utf8_len to their count.

        my $start_point = substr($text, $at - 1);

        # Look no further than the maximum number of bytes in a single
        # character
        my $limit = $max_unicode_length;
        $limit = length($start_point) if $limit > length($start_point);

        # Find the number of bytes comprising the first character in $text
        # (without having to know the details of its internal representation).
        # This loop will iterate just once on well-formed input.
        while ($limit > 0) {    # Until we succeed or exhaust the input
            my $copy = substr($start_point, 0, $limit);

            # decode() will return true if all bytes are valid; false
            # if any aren't.
            if (utf8::decode($copy)) {

                # Is valid: get the first character, convert back to bytes,
                # and return those bytes.
                $copy = substr($copy, 0, 1);
                utf8::encode($copy);
                $utf8_len = length $copy;
                return substr($start_point, 0, $utf8_len);
            }

            # If it didn't work, it could be that there is a full legal character
            # followed by a partial or malformed one.  Narrow the window and
            # try again.
            $limit--;
        }

        # Failed to find a legal UTF-8 character.
        $utf8_len = 0;
        return;
    }


    sub decode_error {
        my $error  = shift;
        my $no_rep = shift;
        my $str    = defined $text ? substr($text, $at) : '';
        my $mess   = '';
        my $type   = 'U*';

        for my $c ( unpack( $type, $str ) ) { # emulate pv_uni_display() ?
            my $chr_c = chr($c);
            $mess .=  $chr_c eq '\\' ? '\\\\'
                    : $chr_c =~ /[[:print:]]/ ? $chr_c
                    : $chr_c eq '\a' ? '\a'
                    : $chr_c eq '\t' ? '\t'
                    : $chr_c eq '\n' ? '\n'
                    : $chr_c eq '\r' ? '\r'
                    : $chr_c eq '\f' ? '\f'
                    : sprintf('\x{%x}', $c)
                    ;
            if ( length $mess >= 20 ) {
                $mess .= '...';
                last;
            }
        }

        unless ( length $mess ) {
            $mess = '(end of string)';
        }

        Carp::croak (
            $no_rep ? "$error" : "$error, at character offset $at (before \"$mess\")"
        );

    }


    sub _json_object_hook {
        my $o    = $_[0];
        my @ks = keys %{$o};

        if ( $cb_sk_object and @ks == 1 and exists $cb_sk_object->{ $ks[0] } and ref $cb_sk_object->{ $ks[0] } ) {
            my @val = $cb_sk_object->{ $ks[0] }->( $o->{$ks[0]} );
            if (@val == 0) {
                return $o;
            }
            elsif (@val == 1) {
                return $val[0];
            }
            else {
                Carp::croak("filter_json_single_key_object callbacks must not return more than one scalar");
            }
        }

        my @val = $cb_object->($o) if ($cb_object);
        if (@val == 0) {
            return $o;
        }
        elsif (@val == 1) {
            return $val[0];
        }
        else {
            Carp::croak("filter_json_object callbacks must not return more than one scalar");
        }
    }


    sub PP_decode_box {
        {
            text    => $text,
            at      => $at,
            ch      => $ch,
            len     => $len,
            depth   => $depth,
            encoding      => $encoding,
            is_valid_utf8 => $is_valid_utf8,
        };
    }

} # PARSE


sub _decode_surrogates { # from perlunicode
    my $uni = 0x10000 + (hex($_[0]) - 0xD800) * 0x400 + (hex($_[1]) - 0xDC00);
    my $un  = pack('U*', $uni);
    utf8::encode( $un );
    return $un;
}


sub _decode_unicode {
    my $un = pack('U', hex shift);
    utf8::encode( $un );
    return $un;
}

sub incr_parse {
    local $Carp::CarpLevel = 1;
    ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_parse( @_ );
}


sub incr_skip {
    ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_skip;
}


sub incr_reset {
    ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_reset;
}

sub incr_text : lvalue {
    $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new;

    if ( $_[0]->{_incr_parser}->{incr_pos} ) {
        Carp::croak("incr_text cannot be called when the incremental parser already started parsing");
    }
    $_[0]->{_incr_parser}->{incr_text};
}


###############################
# Utilities
#

# shamelessly copied and modified from JSON::XS code.

$JSON::PP::true  = do { bless \(my $dummy = 1), "JSON::PP::Boolean" };
$JSON::PP::false = do { bless \(my $dummy = 0), "JSON::PP::Boolean" };

sub is_bool {
  if (blessed $_[0]) {
    return (
      $_[0]->isa("JSON::PP::Boolean")
      or $_[0]->isa("Types::Serialiser::BooleanBase")
      or $_[0]->isa("JSON::XS::Boolean")
    );
  }
  elsif (CORE_BOOL) {
    BEGIN { CORE_BOOL and warnings->unimport('experimental::builtin') }
    return builtin::is_bool($_[0]);
  }
  return !!0;
}

sub true  { $JSON::PP::true  }
sub false { $JSON::PP::false }
sub null  { undef; }

###############################

( run in 2.860 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )