Plack-App-MCCS

 view release on metacpan or  search on metacpan

local/lib/perl5/HTTP/Message.pm  view on Meta::CPAN



sub add_content
{
    my $self = shift;
    $self->_content unless exists $self->{_content};
    my $chunkref = \$_[0];
    $chunkref = $$chunkref if ref($$chunkref);  # legacy

    _utf8_downgrade($$chunkref);

    my $ref = ref($self->{_content});
    if (!$ref) {
	$self->{_content} .= $$chunkref;
    }
    elsif ($ref eq "SCALAR") {
	${$self->{_content}} .= $$chunkref;
    }
    else {
	Carp::croak("Can't append to $ref content");
    }
    delete $self->{_parts};
}

sub add_content_utf8 {
    my($self, $buf)  = @_;
    utf8::upgrade($buf);
    utf8::encode($buf);
    $self->add_content($buf);
}

sub content_ref
{
    my $self = shift;
    $self->_content unless exists $self->{_content};
    delete $self->{_parts};
    my $old = \$self->{_content};
    my $old_cref = $self->{_content_ref};
    if (@_) {
	my $new = shift;
	Carp::croak("Setting content_ref to a non-ref") unless ref($new);
	delete $self->{_content};  # avoid modifying $$old
	$self->{_content} = $new;
	$self->{_content_ref}++;
    }
    $old = $$old if $old_cref;
    return $old;
}


sub content_charset
{
    my $self = shift;
    if (my $charset = $self->content_type_charset) {
	return $charset;
    }

    # time to start guessing
    my $cref = $self->decoded_content(ref => 1, charset => "none");

    # Unicode BOM
    for ($$cref) {
	return "UTF-8"     if /^\xEF\xBB\xBF/;
	return "UTF-32LE" if /^\xFF\xFE\x00\x00/;
	return "UTF-32BE" if /^\x00\x00\xFE\xFF/;
	return "UTF-16LE" if /^\xFF\xFE/;
	return "UTF-16BE" if /^\xFE\xFF/;
    }

    if ($self->content_is_xml) {
	# http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing
	# XML entity not accompanied by external encoding information and not
	# in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration,
	# in which the first characters must be '<?xml'
	for ($$cref) {
	    return "UTF-32BE" if /^\x00\x00\x00</;
	    return "UTF-32LE" if /^<\x00\x00\x00/;
	    return "UTF-16BE" if /^(?:\x00\s)*\x00</;
	    return "UTF-16LE" if /^(?:\s\x00)*<\x00/;
	    if (/^\s*(<\?xml[^\x00]*?\?>)/) {
		if ($1 =~ /\sencoding\s*=\s*(["'])(.*?)\1/) {
		    my $enc = $2;
		    $enc =~ s/^\s+//; $enc =~ s/\s+\z//;
		    return $enc if $enc;
		}
	    }
	}
	return "UTF-8";
    }
    elsif ($self->content_is_html) {
	# look for <META charset="..."> or <META content="...">
	# http://dev.w3.org/html5/spec/Overview.html#determining-the-character-encoding
	require IO::HTML;
	# Use relaxed search to match previous versions of HTTP::Message:
	my $encoding = IO::HTML::find_charset_in($$cref, { encoding    => 1,
	                                                   need_pragma => 0 });
	return $encoding->mime_name if $encoding;
    }
    elsif ($self->content_type eq "application/json") {
	for ($$cref) {
	    # RFC 4627, ch 3
	    return "UTF-32BE" if /^\x00\x00\x00./s;
	    return "UTF-32LE" if /^.\x00\x00\x00/s;
	    return "UTF-16BE" if /^\x00.\x00./s;
	    return "UTF-16LE" if /^.\x00.\x00/s;
	    return "UTF-8";
	}
    }
    if ($self->content_type =~ /^text\//) {
	for ($$cref) {
	    if (length) {
		return "US-ASCII" unless /[\x80-\xFF]/;
		require Encode;
		eval {
		    Encode::decode_utf8($_, Encode::FB_CROAK() | Encode::LEAVE_SRC());
		};
		return "UTF-8" unless $@;
		return "ISO-8859-1";
	    }
	}
    }



( run in 0.503 second using v1.01-cache-2.11-cpan-39bf76dae61 )