Lingua-Ogmios
view release on metacpan or search on metacpan
lib/Lingua/Ogmios/Annotations.pm view on Meta::CPAN
my @section_infos;
my $section_string;
while($temp_canonicalDocumentString =~ /<section[^>]*>|<list>|<item>/o) {
push @canonicalDocument_sections, $`; #` }
$temp_canonicalDocumentString = $';
$section_string = $&; # '
# warn "section string: $section_string\n";
# warn $temp_canonicalDocumentString . "\n\n";
my %tmp;
$tmp{'type'} = "empty";
$tmp{'title'} = "empty";
if ($section_string =~ /<item>/o) {
$tmp{'type'} = "item";
$tmp{'title'} = undef;
} else {
if ($section_string =~ /<list>/o) {
$tmp{'type'} = "list";
$tmp{'title'} = undef;
} else {
$tmp{'type'} = "narrative";
if ($section_string =~ /<section(\s+sectionType=\"(?<st>[^"]+)\")?(\s+title=\"(?<t>[^"]+)\")?>/o) { #"
$tmp{'type'} = $+{st};
$tmp{'title'} = $+{t};
} else {
$tmp{'title'} = undef;
}
}
}
push @section_infos, \%tmp;
}
push @canonicalDocument_sections, $temp_canonicalDocumentString;
shift @canonicalDocument_sections; # what is before the first section cannot be into the document
# shift @section_infos;
$start_position = 0;
$end_position = 0;
warn "[LOG] Identifying start position of the sections\n";
my $j;
for($j=0;$j < scalar(@canonicalDocument_sections);$j++) {
$section = $canonicalDocument_sections[$j];
# warn "-> $section\n";
# push @section_starts, $start_position;
my @tmp = ($start_position, $section_infos[$j], $section);
push @section_starts, \@tmp;
$section =~ s/<[^>]+>//go;
# warn "$section\n";
# warn "\t" . length(Lingua::Ogmios::Annotations::Element->_xmldecode($section)) . "\n";
$start_position += length(Lingua::Ogmios::Annotations::Element->_xmldecode($section));
# warn "\t" . $start_position . "\n";
}
warn "\n[LOG] Identifying end position of the sections\n";
@canonicalDocument_sections = split m!</section>|</list>|</item>!, $canonicalDocumentString;
foreach $section (@canonicalDocument_sections) {
if ($section eq "") {
# warn "empty section content ($end_position)\n";
if ($end_position != 0) {
push @section_ends, $end_position-1;
} else {
push @section_ends, $end_position;
}
} else {
# warn "-> $section\n";
# warn "\t" . $end_position . "(a)\n";
# if ($section =~ /([^<]*)<[^>]+>/os) {
# $end_position += length($1);
# }
while($section =~ s/([^<]*)<[^>]+>//os) {
$end_position += length($1);
}
# warn "\t" . $end_position . "(b)\n";
# $section =~ s/<[^>]+>//go;
# warn "$section\n";
# warn "\t" . length(Lingua::Ogmios::Annotations::Element->_xmldecode($section)) . "\n";
$end_position += length(Lingua::Ogmios::Annotations::Element->_xmldecode($section));
$end_position--;
# warn "\t" . $end_position . "(c)\n";
push @section_ends, $end_position;
$end_position++;
# push @section_ends, $section_starts[$#section_ends + 1] + length(Lingua::Ogmios::Annotations::Element->_xmldecode($section)) - 1;
}
}
# as empty trailing fields are deleted with split, put empty
# string instead to get the same number of start and end section
# position
for($i = scalar(@section_ends); $i < scalar @section_starts; $i++) {
$section_ends[$i] = $section_ends[$#section_ends];
}
$canonicalDocumentString =~ s/<[^>]+>//go;
$canonicalDocumentString = Lingua::Ogmios::Annotations::Element->_xmldecode($canonicalDocumentString);
warn "[LOG] Merging identification of the end and start position\n";
$start_position = 0;
$end_position = 0;
&_merge_sections(\@section_starts, \@section_ends, \$start_position, \$end_position, \@sections, \@section_infos, 0, undef);
# if ($debug_devel_level == 1) {
# warn "[LOG/$debug_devel_level] Check merging identification of the end and start position\n";
# foreach $section (@sections) {
# ($start_position, $end_position) = ($section->getFrom, $section->getTo);
# warn "[LOG/$debug_devel_level] Section from $start_position to $end_position\n";
# print STDERR "\t" . substr($canonicalDocumentString, $start_position, $end_position - $start_position + 1) . "\n";
# }
# }
( run in 0.670 second using v1.01-cache-2.11-cpan-5511b514fd6 )