SGML-PYX
view release on metacpan or search on metacpan
# Parse file.
sub parsefile {
my ($self, $sgml_file) = @_;
# Set file.
$self->{'_tag_reader'}->set_file($sgml_file);
# Process.
while (my ($data, $tag_type, $line, $column)
= $self->{'_tag_reader'}->gettoken) {
# Decode data to internal form.
$data = decode_utf8($data);
# Data.
if ($tag_type eq '!data') {
$self->{'output'}->(char(decode(entity_decode($data))));
# Comment.
} elsif ($tag_type eq '!--') {
$data =~ s/^<!--//ms;
$data =~ s/-->$//ms;
$self->{'output'}->(comment($data));
# End of element.
} elsif ($tag_type =~ m/^\//ms) {
my $element = $data;
$element =~ s/^<\///ms;
$element =~ s/>$//ms;
$self->{'output'}->(end_element($element));
# Begin of element.
} elsif ($tag_type =~ m/^\w+/ms) {
$data =~ s/^<//ms;
$data =~ s/>$//ms;
my $end = 0;
if ($data =~ s/\/$//ms) {
$end = 1;
}
(my $element, $data) = ($data =~ m/^([^\s]+)\s*(.*)$/ms);
my @attrs = $self->_parse_attributes($data);
$self->{'output'}->(start_element($element, @attrs));
if ($end) {
$self->{'output'}->(end_element($element));
}
# Doctype.
} elsif ($tag_type eq '!doctype') {
# Nop.
# CData.
} elsif ($tag_type eq '![cdata[') {
$data =~ s/^<!\[[cC][dD][aA][tT][aA]\[//ms;
$data =~ s/\]\]>$//ms;
$self->{'output'}->(char(decode(entity_decode($data))));
# Instruction.
} elsif ($tag_type =~ m/^\?/ms) {
$data =~ s/^<\?//ms;
$data =~ s/\s*\?>$//ms;
my ($target, $code) = split m/\s+/ms, $data, 2;
$self->{'output'}->(instruction($target, $code));
} else {
err "Unsupported tag type '$tag_type'.";
}
}
return;
}
# Parse attributes.
sub _parse_attributes {
my ($self, $data) = @_;
my $original_data = $data;
my @attrs;
while ($data) {
# <example par="val"> or <example par = "val">
if ($data =~ m/^([_\w:][\.\-\w:]*)\s*=\s*"(.*?)"\s*(.*)$/ms
# <example par='val'> or <example par = 'val'>
|| $data =~ m/^([_\w:][\.\-\w:]*)\s*=\s*'(.*?)'\s*(.*)$/ms
# <example par=foo> or <example par = foo >.
|| $data =~ m/^([_\w:][\.\-\w:]*)\s*=\s*([^\s]+)\s*(.*)$/ms) {
push @attrs, $1, $2;
$data = $3;
# <example par = >
} elsif ($data =~ m/^([_\w:][\.\-\w:]*)\s*=\s*$/ms) {
push @attrs, $1, '';
$data = '';
# <example checked>
} elsif ($data =~ m/^([_\w:][\.\-\w:]*)\s*(.*)$/ms) {
push @attrs, $1, $1;
$data = $2;
} else {
err 'Problem with attribute parsing.',
'data', $original_data;
}
}
return (@attrs);
}
1;
__END__
=pod
=encoding utf8
=head1 NAME
SGML::PYX - Convertor between SGML and PYX.
( run in 0.683 second using v1.01-cache-2.11-cpan-71847e10f99 )