CORBA-IDLtree

 view release on metacpan or  search on metacpan

lib/CORBA/IDLtree.pm  view on Meta::CPAN

# CORBA/IDLtree.pm   IDL to symbol tree translator
# This module is distributed under the same terms as Perl itself.
# Copyright  (C) 1998-2025, O. Kellogg <olivermkellogg@gail.com>
# Main Authors:  Oliver Kellogg, Heiko Schroeder
#
# -----------------------------------------------------------------------------
# Ver. |   Date   | Recent changes (for complete history see file Changes)
# -----+----------+------------------------------------------------------------
# 2.06  2025-04-20  * In the SUBORDINATES of ENUM, when $enable_comments is set
#                     change the layout for a comment to conform to the REMARK
#                     node layout.
#                   * Change sub info to only print if $verbose is set.
#                   * Fix handling of annotations applied on members of
#                     constructed types.
#                   * On encountering unknown annotation, downgrade severity
#                     from error to warning.
# 2.05  2021/06/13  * Increase minimum required perl version to 5.8 due to
#                     addition of "use utf8".
#                   * Add handling of Windows CP-1252 character encoding in
#                     input file:
#                     - Add `use utf8`.
#                     - Require module Encode::Guess.
#                     - In sub get_items:
#                       - On encountering a non printable character call
#                         Encode::Guess->guess.
#                       - If the call returns a ref then a decoder was found
#                         and no special action is required.
#                       - If the call returns "No appropriate encodings found"
#                         then assign $l from Encode::decode("cp-1252", $l).
#                       - If the call returns none of the above then print a
#                         warning "Unsupported character encoding" and replace
#                         the non printable characters in $l by space.
#                     - In sub Parse_File_i case $file case $emucpp call to
#                       `open $in`, the encoding directive for UTF-8 is no
#                       longer needed due to use of Encode::Guess (see above).
#                   * In sub skip_input fix handling of preprocessor directives
#                     where the "#" is not placed in column 1 but is preceded by
#                     whitespace.
#                   * Fix sub scoped_name in case of chained module reopenings.
#
# 2.04  2020/06/20  * In sub Parse_File_i case $file case $emucpp open $in
#                     with encoding(UTF-8) to ensure that IDL files are parsed
#                     as utf8.
#                   * New sub discard_bom discards a possible Unicode or UTF-8
#                     BOM (Byte Order Mark) at the start of the given line.
#                     In sub get_items add optional argument $firstline.
#                     If $firstline is given and true then discard_bom will be
#                     called on the first line read from file.
#                     In sub Parse_File_i outer while-loop add local
#                     $firstline for call to sub get_items.
#                   * New sub has_default_branch checks whether the given union
#                     subordinates contain a DEFAULT branch.  This fixes a bug
#                     related to checking that a union has an enum type as its
#                     switch and does not have a default branch.
#                     A false warning was generated in case the default branch
#                     was preceded by a comment.
#                   * Improvements to preprocessor emulation:
#                     - Support "#if defined XYZ" without parentheses around
#                       the symbol.  Fix evaluation of the symbol.
#                     - Do not attempt evaluating preprocessor directives when
#                       inside multi line comments.
#                     - Fix handling of #endif in nested #if/#ifdef/#ifndef.
#                   * In @annoDefs add java_mapping annotations defined by the
#                     IDL4 to Java mapping proposal.
# 2.03  2019/04/27  * Fixed a bug related to Dump_Symbols whereby when using
#                     a string array ref as the optional argument, repeated
#                     calls to the sub would accumulate the text.
#                   * In sub parse_members, optional argument $comment fixes
#                     processing of trailing comment at members of struct,
#                     exception, and valuetype.
# 2.02  2018/08/15  * Fixed a few typos in documentation.
#                   * Added support for IDL4 struct inheritance defined by the
#                     Building Block Extended Data-Types:
#                     In case of STRUCT, the first SUBORDINATES element of may
#                     be a reference to a further STRUCT node instead of the
#                     reference to quintuplet. In this case, the first element
#                     indicates the IDL4 parent struct type of the current
#                     struct.  The function isnode() can be used for detecting
#                     this case. The support for IDL4 struct inheritance is
#                     implemented in sub Parse_File_i case $kw eq 'struct'.
#                   * In sub is_elementary_type return early on undefined
#                     $tdesc.
#                   * In sub info check for valid $currfile and @infilename
#                     before accessing $infilename[$currfile].
#                   * In sub error avoid code duplication by reusing the

lib/CORBA/IDLtree.pm  view on Meta::CPAN

            $count++;
        } elsif ($directive eq 'endif') {
            $count--;
        }
        # For #elif, the count remains the same.
    }
    error "skip_input: fell off end of file";
}

# If the given line begins with the Unicode or UTF-8 BOM (Byte Order Mark) then
# discard the BOM in the returned line.
sub discard_bom {
    my $line = shift;
    if (length($line) > 2) {
        # Check for UTF-8 BOM (Byte Order Mark) 0xEF,0xBB,0xBF
        my $ord0 = ord(substr($line, 0, 1));
        if ($ord0 == 0xFEFF) {
            $line = substr($line, 1);         # Unicode
        } elsif ($ord0 == 0xEF) {
            my $ord1 = ord(substr($line, 1, 1));
            my $ord2 = ord(substr($line, 2, 1));
            if ($ord1 == 0xBB && $ord2 == 0xBF) {
                $line = substr($line, 3);     # UTF-8
            }
        }
    }
    return $line;
}

sub get_items {  # returns empty list for end-of-file or fatal error
    my $in = shift;
    my $firstline;
    if (@_) {
        $firstline = shift;
    }
    my @items = ();
    if (@global_items) {
        @items = @global_items;
        @global_items = ();
        return @items;
    }
    my $first = 1;
    my $in_comment = 0;
    my $seen_token = 0;
    my $line = "";
    $starting_line_number_of_remark = 0;
    $line_number_of_post_comment    = 0;
    my $l;
    @remark = ();
    @post_comment = ();
    line:
    while (($l = <$in>)) {
        $line_number[$currfile]++;
        chomp $l;
        $l =~ s/\r//g;  # zap DOS line ending
        if ($firstline) {
            $l = discard_bom($l);
            $firstline = 0;
        }
        if ($l =~ /[^\t\f[:print:]]/) {
            my $decoder = Encode::Guess->guess($l);
            unless (ref $decoder) {
                # info($decoder);
                if ($decoder =~ /No appropriate encodings found/) {
                    $l = Encode::decode("cp-1252", $l);
                } else {
                    info "Unsupported character encoding - $decoder";
                    $l =~ s/[^\t\f[:print:]]/ /g;
                }
            }
        }
        if ($l =~ /^\s*$/) {           # empty
            if ($in_comment) {
                if ($seen_token) {
                    push @post_comment, "";
                } else {
                    push @remark, "";
                }
            }
            next;
        }
        if ($in_comment) {
            if ($l =~ /\/\*/) {
                info "warning: nested comments not supported!";
            }
            if ($l =~ /\*\//) {
                my $cpos = index($l, "*/");
                my $cmnt = substr($l, 0, $cpos);
                $cmnt =~ s/\s*$//;
                $l = substr($l, $cpos+2);
                #my $cmnt = $l;
                #$cmnt =~ s/\s*\*\/.*$//;
                if ($seen_token) {
                    push @post_comment, $cmnt;
                } else {
                    push @remark, $cmnt;
                }
                $in_comment = 0;     # end of multi-line comment
                #$l =~ s/^.*\*\///;
                if ($seen_token) {
                    if ($l !~ /^\s*$/) {
                        error "unsupported comment/token combination";
                    }
                    last;
                }
                next if ($l =~ /^\s*$/);
            } else {
                if ($seen_token) {
                    push @post_comment, $l;
                } else {
                    push @remark, $l;
                }
                next;
            }
        }
        if ($l =~ /^\s*\/\/(.*)/) {  # single-line comment by itself
            my $cmnt = $1;
            unless (@remark) {
                $starting_line_number_of_remark = $line_number[$currfile];
            }
            push @remark, $cmnt;



( run in 1.007 second using v1.01-cache-2.11-cpan-39bf76dae61 )