CORBA-IDLtree
view release on metacpan or search on metacpan
lib/CORBA/IDLtree.pm view on Meta::CPAN
# CORBA/IDLtree.pm IDL to symbol tree translator
# This module is distributed under the same terms as Perl itself.
# Copyright (C) 1998-2025, O. Kellogg <olivermkellogg@gail.com>
# Main Authors: Oliver Kellogg, Heiko Schroeder
#
# -----------------------------------------------------------------------------
# Ver. | Date | Recent changes (for complete history see file Changes)
# -----+----------+------------------------------------------------------------
# 2.06 2025-04-20 * In the SUBORDINATES of ENUM, when $enable_comments is set
# change the layout for a comment to conform to the REMARK
# node layout.
# * Change sub info to only print if $verbose is set.
# * Fix handling of annotations applied on members of
# constructed types.
# * On encountering unknown annotation, downgrade severity
# from error to warning.
# 2.05 2021/06/13 * Increase minimum required perl version to 5.8 due to
# addition of "use utf8".
# * Add handling of Windows CP-1252 character encoding in
# input file:
# - Add `use utf8`.
# - Require module Encode::Guess.
# - In sub get_items:
# - On encountering a non printable character call
# Encode::Guess->guess.
# - If the call returns a ref then a decoder was found
# and no special action is required.
# - If the call returns "No appropriate encodings found"
# then assign $l from Encode::decode("cp-1252", $l).
# - If the call returns none of the above then print a
# warning "Unsupported character encoding" and replace
# the non printable characters in $l by space.
# - In sub Parse_File_i case $file case $emucpp call to
# `open $in`, the encoding directive for UTF-8 is no
# longer needed due to use of Encode::Guess (see above).
# * In sub skip_input fix handling of preprocessor directives
# where the "#" is not placed in column 1 but is preceded by
# whitespace.
# * Fix sub scoped_name in case of chained module reopenings.
#
# 2.04 2020/06/20 * In sub Parse_File_i case $file case $emucpp open $in
# with encoding(UTF-8) to ensure that IDL files are parsed
# as utf8.
# * New sub discard_bom discards a possible Unicode or UTF-8
# BOM (Byte Order Mark) at the start of the given line.
# In sub get_items add optional argument $firstline.
# If $firstline is given and true then discard_bom will be
# called on the first line read from file.
# In sub Parse_File_i outer while-loop add local
# $firstline for call to sub get_items.
# * New sub has_default_branch checks whether the given union
# subordinates contain a DEFAULT branch. This fixes a bug
# related to checking that a union has an enum type as its
# switch and does not have a default branch.
# A false warning was generated in case the default branch
# was preceded by a comment.
# * Improvements to preprocessor emulation:
# - Support "#if defined XYZ" without parentheses around
# the symbol. Fix evaluation of the symbol.
# - Do not attempt evaluating preprocessor directives when
# inside multi line comments.
# - Fix handling of #endif in nested #if/#ifdef/#ifndef.
# * In @annoDefs add java_mapping annotations defined by the
# IDL4 to Java mapping proposal.
# 2.03 2019/04/27 * Fixed a bug related to Dump_Symbols whereby when using
# a string array ref as the optional argument, repeated
# calls to the sub would accumulate the text.
# * In sub parse_members, optional argument $comment fixes
# processing of trailing comment at members of struct,
# exception, and valuetype.
# 2.02 2018/08/15 * Fixed a few typos in documentation.
# * Added support for IDL4 struct inheritance defined by the
# Building Block Extended Data-Types:
# In case of STRUCT, the first SUBORDINATES element of may
# be a reference to a further STRUCT node instead of the
# reference to quintuplet. In this case, the first element
# indicates the IDL4 parent struct type of the current
# struct. The function isnode() can be used for detecting
# this case. The support for IDL4 struct inheritance is
# implemented in sub Parse_File_i case $kw eq 'struct'.
# * In sub is_elementary_type return early on undefined
# $tdesc.
# * In sub info check for valid $currfile and @infilename
# before accessing $infilename[$currfile].
# * In sub error avoid code duplication by reusing the
lib/CORBA/IDLtree.pm view on Meta::CPAN
$count++;
} elsif ($directive eq 'endif') {
$count--;
}
# For #elif, the count remains the same.
}
error "skip_input: fell off end of file";
}
# If the given line begins with the Unicode or UTF-8 BOM (Byte Order Mark) then
# discard the BOM in the returned line.
sub discard_bom {
my $line = shift;
if (length($line) > 2) {
# Check for UTF-8 BOM (Byte Order Mark) 0xEF,0xBB,0xBF
my $ord0 = ord(substr($line, 0, 1));
if ($ord0 == 0xFEFF) {
$line = substr($line, 1); # Unicode
} elsif ($ord0 == 0xEF) {
my $ord1 = ord(substr($line, 1, 1));
my $ord2 = ord(substr($line, 2, 1));
if ($ord1 == 0xBB && $ord2 == 0xBF) {
$line = substr($line, 3); # UTF-8
}
}
}
return $line;
}
sub get_items { # returns empty list for end-of-file or fatal error
my $in = shift;
my $firstline;
if (@_) {
$firstline = shift;
}
my @items = ();
if (@global_items) {
@items = @global_items;
@global_items = ();
return @items;
}
my $first = 1;
my $in_comment = 0;
my $seen_token = 0;
my $line = "";
$starting_line_number_of_remark = 0;
$line_number_of_post_comment = 0;
my $l;
@remark = ();
@post_comment = ();
line:
while (($l = <$in>)) {
$line_number[$currfile]++;
chomp $l;
$l =~ s/\r//g; # zap DOS line ending
if ($firstline) {
$l = discard_bom($l);
$firstline = 0;
}
if ($l =~ /[^\t\f[:print:]]/) {
my $decoder = Encode::Guess->guess($l);
unless (ref $decoder) {
# info($decoder);
if ($decoder =~ /No appropriate encodings found/) {
$l = Encode::decode("cp-1252", $l);
} else {
info "Unsupported character encoding - $decoder";
$l =~ s/[^\t\f[:print:]]/ /g;
}
}
}
if ($l =~ /^\s*$/) { # empty
if ($in_comment) {
if ($seen_token) {
push @post_comment, "";
} else {
push @remark, "";
}
}
next;
}
if ($in_comment) {
if ($l =~ /\/\*/) {
info "warning: nested comments not supported!";
}
if ($l =~ /\*\//) {
my $cpos = index($l, "*/");
my $cmnt = substr($l, 0, $cpos);
$cmnt =~ s/\s*$//;
$l = substr($l, $cpos+2);
#my $cmnt = $l;
#$cmnt =~ s/\s*\*\/.*$//;
if ($seen_token) {
push @post_comment, $cmnt;
} else {
push @remark, $cmnt;
}
$in_comment = 0; # end of multi-line comment
#$l =~ s/^.*\*\///;
if ($seen_token) {
if ($l !~ /^\s*$/) {
error "unsupported comment/token combination";
}
last;
}
next if ($l =~ /^\s*$/);
} else {
if ($seen_token) {
push @post_comment, $l;
} else {
push @remark, $l;
}
next;
}
}
if ($l =~ /^\s*\/\/(.*)/) { # single-line comment by itself
my $cmnt = $1;
unless (@remark) {
$starting_line_number_of_remark = $line_number[$currfile];
}
push @remark, $cmnt;
( run in 1.007 second using v1.01-cache-2.11-cpan-39bf76dae61 )