XML-SAX

 view release on metacpan or  search on metacpan

lib/XML/SAX/PurePerl.pm  view on Meta::CPAN

        $data =~ /^<\// or $self->parser_error("No close tag marker", $reader);
        $reader->move_along(2);
        my $end_name = $self->Name($reader);
        $end_name eq $name || $self->parser_error("End tag mismatch ($end_name != $name)", $reader);
        $self->skip_whitespace($reader);
        $reader->match('>') or $self->parser_error("No close '>' on end tag", $reader);
    }
        
    my %end_el = %$el;
    delete $end_el{Attributes};
    $self->end_element(\%end_el);

    for my $ns (@new_ns) {
        $self->end_prefix_mapping($ns);
    }
    $self->{NSHelper}->pop_context;
    
    return 1;
}

sub content {
    my ($self, $reader) = @_;
    
    while (1) {
        $self->CharData($reader);
        
        my $data = $reader->data(2);
        
        if ($data =~ /^<\//) {
            return 1;
        }
        elsif ($data =~ /^&/) {
            $self->Reference($reader) or $self->parser_error("bare & not allowed in content", $reader);
            next;
        }
        elsif ($data =~ /^<!/) {
            ($self->CDSect($reader)
             or
             $self->Comment($reader))
             and next;
        }
        elsif ($data =~ /^<\?/) {
            $self->PI($reader) and next;
        }
        elsif ($data =~ /^</) {
            $self->element($reader) and next;
        }
        last;
    }
    
    return 1;
}

sub CDSect {
    my ($self, $reader) = @_;
    
    my $data = $reader->data(9);
    return 0 unless $data =~ /^<!\[CDATA\[/;
    $reader->move_along(9);
    
    $self->start_cdata({});
    
    $data = $reader->data;
    while (1) {
        $self->parser_error("EOF looking for CDATA section end", $reader)
            unless length($data);
        
        if ($data =~ /^(.*?)\]\]>/s) {
            my $chars = $1;
            $reader->move_along(length($chars) + 3);
            $self->characters({Data => $chars});
            last;
        }
        else {
            $self->characters({Data => $data});
            $reader->move_along(length($data));
            $data = $reader->data;
        }
    }
    $self->end_cdata({});
    return 1;
}

sub CharData {
    my ($self, $reader) = @_;
    
    my $data = $reader->data;
    
    while (1) {
        return unless length($data);
        
        if ($data =~ /^([^<&]*)[<&]/s) {
            my $chars = $1;
            $self->parser_error("String ']]>' not allowed in character data", $reader)
                if $chars =~ /\]\]>/;
            $reader->move_along(length($chars));
            $self->characters({Data => $chars}) if length($chars);
            last;
        }
        else {
            $self->characters({Data => $data});
            $reader->move_along(length($data));
            $data = $reader->data;
        }
    }
}

sub Misc {
    my ($self, $reader) = @_;
    if ($self->Comment($reader)) {
        return 1;
    }
    elsif ($self->PI($reader)) {
        return 1;
    }
    elsif ($self->skip_whitespace($reader)) {
        return 1;
    }
    
    return 0;
}

sub Reference {
    my ($self, $reader) = @_;
    
    return 0 unless $reader->match('&');
    
    my $data = $reader->data;

    # Fetch more data if we have an incomplete numeric reference
    if ($data =~ /^(#\d*|#x[0-9a-fA-F]*)$/) {
        $data = $reader->data(length($data) + 6);
    }
    
    if ($data =~ /^#x([0-9a-fA-F]+);/) {
        my $ref = $1;
        $reader->move_along(length($ref) + 3);
        my $char = chr_ref(hex($ref));
        $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
            unless $char =~ /$SingleChar/o;

lib/XML/SAX/PurePerl.pm  view on Meta::CPAN

# TODO: ditto above
    if (exists $self->{ParseOptions}{entities}{$name}) {
        return 1;
    }
    return 0;
}

sub _stringify_entity {
    my ($self, $name) = @_;
# TODO: ditto above
    if (exists $self->{ParseOptions}{expanded_entity}{$name}) {
        return $self->{ParseOptions}{expanded_entity}{$name};
    }
    # expand
    my $reader = XML::SAX::PurePerl::Reader::URI->new($self->{ParseOptions}{entities}{$name});
    my $ent = '';
    while(1) {
        my $data = $reader->data;
        $ent .= $data;
        $reader->move_along(length($data)) or last;
    }
    return $self->{ParseOptions}{expanded_entity}{$name} = $ent;
}

sub _get_entity {
    my ($self, $name) = @_;
# TODO: ditto above
    return $self->{ParseOptions}{entities}{$name};
}

sub skip_whitespace {
    my ($self, $reader) = @_;
    
    my $data = $reader->data;
    
    my $found = 0;
    while ($data =~ s/^([\x20\x0A\x0D\x09]*)//) {
        last unless length($1);
        $found++;
        $reader->move_along(length($1));
        $data = $reader->data;
    }
    
    return $found;
}

sub Attribute {
    my ($self, $reader) = @_;
    
    $self->skip_whitespace($reader) || return;
    
    my $data = $reader->data(2);
    return if $data =~ /^\/?>/;
    
    if (my $name = $self->Name($reader)) {
        $self->skip_whitespace($reader);
        $reader->match('=') or $self->parser_error("No '=' in Attribute", $reader);
        $self->skip_whitespace($reader);
        my $value = $self->AttValue($reader);

        if (!$self->cdata_attrib($name)) {
            $value =~ s/^\x20*//; # discard leading spaces
            $value =~ s/\x20*$//; # discard trailing spaces
            $value =~ s/ {1,}/ /g; # all >1 space to single space
        }
        
        return $name, $value;
    }
    
    return;
}

sub cdata_attrib {
    # TODO implement this!
    return 1;
}

sub AttValue {
    my ($self, $reader) = @_;
    
    my $quote = $self->quote($reader);
    
    my $value = '';
    
    while (1) {
        my $data = $reader->data;
        $self->parser_error("EOF found while looking for the end of attribute value", $reader)
            unless length($data);
        if ($data =~ /^([^$quote]*)$quote/) {
            $reader->move_along(length($1) + 1);
            $value .= $1;
            last;
        }
        else {
            $value .= $data;
            $reader->move_along(length($data));
        }
    }
    
    if ($value =~ /</) {
        $self->parser_error("< character not allowed in attribute values", $reader);
    }
    
    $value =~ s/[\x09\x0A\x0D]/\x20/g;
    $value =~ s/&(#(x[0-9a-fA-F]+)|#([0-9]+)|\w+);/$self->AttReference($1, $reader)/geo;
    
    return $value;
}

sub Comment {
    my ($self, $reader) = @_;
    
    my $data = $reader->data(4);
    if ($data =~ /^<!--/) {
        $reader->move_along(4);
        my $comment_str = '';
        while (1) {
            my $data = $reader->data;
            $self->parser_error("End of data seen while looking for close comment marker", $reader)
                unless length($data);
            if ($data =~ /^(.*?)-->/s) {
                $comment_str .= $1;
                $self->parser_error("Invalid comment (dash)", $reader) if $comment_str =~ /-$/;
                $reader->move_along(length($1) + 3);
                last;
            }
            else {
                $comment_str .= $data;
                $reader->move_along(length($data));
            }
        }
        
        $self->comment({ Data => $comment_str });



( run in 0.914 second using v1.01-cache-2.11-cpan-39bf76dae61 )