App-PythonToPerl

 view release on metacpan or  search on metacpan

lib/Python/File.pm  view on Meta::CPAN

                    # indentation whitespace and other characters before closing """, append additional '#' character to indentation
                    elsif ($ARG =~ m/^(\s+)(.+)\"\"\"\s*$/) {
                        $python_preparsed_target->[-1]->{perl_source_code} .= (substr $1, 0, -1) . '#' . $2;
                    }
                    # non-whitespace characters before closing """, prepend additional '#' character by shifting all characters to the right
                    elsif ($ARG =~ m/^(.+)\"\"\"\s*$/) {
                        $python_preparsed_target->[-1]->{perl_source_code} .= '#' . $1;
                    }
                    # else, no characters at all before closing """
                    $python_preparsed_target->[-1]->{perl_source_code} .= q{#""};

                    # set ending line number, indicating we are no longer inside this multi-line component
                    $python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;

print 'in python_file_to_python_preparsed(), ending multi-line double-quotes \"\"\"comment\"\"\", have $python_preparsed_target->[-1]->{perl_source_code} = ', "\n", $python_preparsed_target->[-1]->{perl_source_code}, "\n";
#die 'TMP DEBUG, MULTI-LINE COMMENT DOUBLE QUOTES';
                }

                next;
            }
            elsif ($ARG =~ m/\"\"\"/) {
                croak 'ERROR EPYFI002: have multi-line double-quotes comment closing, but not at end of line, do not know how to handle, croaking';
            }
            elsif ($ARG =~ m/\'\'\'/) {
                carp 'WARNING WPYFI002: have multi-line single-quotes comment while currently inside multi-line double-quotes comment, ignoring, carping';
            }

            # prepend '#' character for non-blank comments,
            # either replacing last indentation space or shifting all characters to the right
            my string $comment = $ARG;
            if (($comment eq '') or
                ($python_preparsed_target->[-1]->{is_actually_string_literal}))
            { 1; }
            elsif ($comment =~ m/^(\s+)(.*)$/) { 
                # if indented at least 2 spaces, then we can vertically align all '#' characters
                if (((length $python_preparsed_target->[-1]->{indentation}) >= 2) and
                    ((length $1) >= 2)) {
                    substr $comment, ((length $python_preparsed_target->[-1]->{indentation}) - 2), 1, '#';
                }
                else { $comment = (substr $1, 0, -2) . '# ' . $2; }
            }
            else { $comment = '#' . $comment; }

            # accumulate non-last Perl line of multi-line component; copy comments verbatim
            $python_preparsed_target->[-1]->{perl_source_code} .= "\n" . $comment;

            # did not end multi-line component, go on to next line
            next;
        }
        # pre-parse & accumulate everything inside multi-line include statement
        elsif (((scalar @{$python_preparsed_target}) > 0) and
               $python_preparsed_target->[-1]->isa('Python::Include') and 
              ($python_preparsed_target->[-1]->{python_line_number_end} < 0)) {
print 'in python_file_to_python_preparsed(), inside multi-line include', "\n";
            # accumulate current (possibly last) Python line of multi-line component
            chomp $ARG;
            $python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            # multi-line includes with parentheses end differently than those without parentheses
            if ($python_preparsed_target->[-1]->{python_has_parentheses}) {
                # end multi-line include (w/ parentheses) when the last non-whitespace non-comment character is a close parentheses
                if ($ARG =~ m/^.*\)\s*(?:\#.*)?$/) {
print 'in python_file_to_python_preparsed(), ending multi-line include w/ parentheses', "\n";
                    # set ending line number, indicating we are no longer inside this multi-line component
                    $python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;

                    next;
                }
            }
            else {
                # end multi-line include (w/out parentheses) when the last non-whitespace character is not a backslash
#                if ($ARG =~ m/^.*[^\\]\s*$/) {  # does not match correctly?
                if ($ARG !~ m/^.*\\\s*$/) {
print 'in python_file_to_python_preparsed(), ending multi-line include w/out parentheses', "\n";
                    # set ending line number, indicating we are no longer inside this multi-line component
                    $python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;

                    next;
                }
            }

            # error if multi-line component invalidly nested inside other multi-line component
            if ($ARG =~ m/\'\'\'/) {
                croak 'ERROR EPYFI003a: have multi-line single-quotes comment while currently inside multi-line include statement, do not know how to handle, croaking';
            }
            elsif ($ARG =~ m/\"\"\"/) {
                croak 'ERROR EPYFI003b: have multi-line double-quotes comment while currently inside multi-line include statement, do not know how to handle, croaking';
            }

            # did not end multi-line component, go on to next line
            next;
        }
        # pre-parse & accumulate everything inside multi-line function header
        elsif (((scalar @{$python_preparsed_target}) > 0) and
               $python_preparsed_target->[-1]->isa('Python::Function') and 
              ($python_preparsed_target->[-1]->{python_line_number_end_header} < 0)) {
print 'in python_file_to_python_preparsed(), inside multi-line function header', "\n";
            # accumulate current (possibly last) Python line of multi-line component
            chomp $ARG;
            $python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            # end multi-line function header when it matches the entire regex;

            # DEV NOTE, CORRELATION PYFI100: all regex changes must be reflected in both locations,
            # the only difference should be the optional trailing comment pattern \s*(?:\#.*\n)?\s*
            # which is not in the header-opening regex and is used twice in the header-closing regex;
            # DEV NOTE: do NOT join multiple lines into one line for regex match,
            # need \n characters to detect trailing comments,
            # \s matches \n so multiple lines do not need to be combined
                #  $1         $2           $3                                                                                                                                                                                                             ...
            if (# Python
                ($python_preparsed_target->[-1]->{python_source_code} =~
                m/^(\s*)def\s+(\w+)\s*\(\s*((?:[\w\.\*]+\s*(?::\s*[\w\.]+\s*)?(?:\[.*\]\s*)?(?:\=\s*(?:(?:\'.*\')|(?:\".*\")|(?:\(.*\))|(?:\[.*\])|[\w\.\-\(\)]+))?\s*\,\s*(?:\#.*\n)?\s*)*[\w\.\*]+\s*(?::\s*[\w\.]+\s*)?(?:\[.*\]\s*)?(?:\=\s*(?:(?:\'.*...
# NEED ANSWER: does Pyrex accept both C and Python types?  if so, update Pyrex regex below to accept ':str' Python types
# NEED ANSWER: does Pyrex accept both C and Python types?  if so, update Pyrex regex below to accept ':str' Python types
# NEED ANSWER: does Pyrex accept both C and Python types?  if so, update Pyrex regex below to accept ':str' Python types
                # Pyrex
                ($python_preparsed_target->[-1]->{python_source_code} =~
                m/^(\s*)def\s+(\w+)\s*\(\s*((?:(?:(?:const\s+)?(?:[\w\.]+\s*(?:\[[\:\d\,\s]+\])?\s+))?[\w\.\*]+(?:\=\s*(?:(?:\'.*\')|(?:\".*\")|(?:\(.*\))|(?:\[.*\])|[\w\.\-\(\)]+))?\s*\,\s*(?:\#.*\n)?\s*)*(?:(?:const\s+)?(?:[\w\.]+\s*(?:\[[\:\d\,\s]...




print 'in python_file_to_python_preparsed(), ending multi-line function header', "\n";

                # all function header sub-components have been received, so accept them all
                if (defined $3) { $python_preparsed_target->[-1]->{arguments} = $3; }
                if (defined $4) { $python_preparsed_target->[-1]->{return_type} = $4; }

                # set ending line number, indicating we are no longer inside this multi-line component
                $python_preparsed_target->[-1]->{python_line_number_end_header} = $python_line_number;

print 'in python_file_to_python_preparsed(), ending multi-line function header, have $python_preparsed_target->[-1] = ', Dumper($python_preparsed_target->[-1]), "\n";
#die 'TMP DEBUG, END MULTI-LINE FUNCTION HEADER' if ($python_preparsed_target->[-1]->{symbol} eq '__init__');

                next;
            }

            # did not end multi-line component, go on to next line
            next;
        }
        # pre-parse & accumulate everything inside multi-line class header
        elsif (((scalar @{$python_preparsed_target}) > 0) and
               $python_preparsed_target->[-1]->isa('Python::Class') and 
              ($python_preparsed_target->[-1]->{python_line_number_end_header} < 0)) {
print 'in python_file_to_python_preparsed(), inside multi-line class header', "\n";
            # accumulate current (possibly last) Python line of multi-line component
            chomp $ARG;
            $python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            # end multi-line class header when it matches the entire regex;

            # DEV NOTE, CORRELATION PYFI101: all regex changes must be reflected in both locations,
            # the only difference should be the optional trailing comment pattern \s*(?:\#.*\n)?\s*
            # which is not in the header-opening regex and is used twice in the header-closing regex;
            # DEV NOTE: do NOT join multiple lines into one line for regex match,
            # need \n characters to detect trailing comments,
            # \s matches \n so multiple lines do not need to be combined
            if (# Python
                ($python_preparsed_target->[-1]->{python_source_code} =~ 
                #  $1           $2              $3                                                                          $4    $5
                m/^(\s*)class\s+(\w+)\s*(?:\(\s*((?:[\w\.=]+\s*\,\s*(?:\#.*\n)?\s*)*[\w\.=]+\s*\,?\s*(?:\#.*\n)?)?\s*\)\s*)?(:)\s*(\#.*)?$/) or
                # Pyrex
                ($python_preparsed_target->[-1]->{python_source_code} =~ 
                m/^(\s*)cdef\s+class\s+(\w+)(?:\{\{\w+\}\})?\s*(?:\(\s*((?:[\w\.=]+(?:\{\{\w+\}\})?[\w\.=]*\s*\,\s*(?:\#.*\n)?\s*)*[\w\.=]+(?:\{\{\w+\}\})?[\w\.=]*\s*\,?\s*(?:\#.*\n)?)?\s*\)\s*)?(:)\s*(\#.*)?$/)) {
print 'in python_file_to_python_preparsed(), ending multi-line class header', "\n";

                # all class header sub-components have been received, so accept them all
                if (defined $3) { $python_preparsed_target->[-1]->{parents} = $3; }

                # set ending line number, indicating we are no longer inside this multi-line component
                $python_preparsed_target->[-1]->{python_line_number_end_header} = $python_line_number;

print 'in python_file_to_python_preparsed(), ending multi-line class header, have $python_preparsed_target->[-1] = ', Dumper($python_preparsed_target->[-1]), "\n";
#die 'TMP DEBUG, END MULTI-LINE CLASS HEADER' if ($python_preparsed_target->[-1]->{symbol} eq '__init__');

                next;
            }

            # did not end multi-line component, go on to next line
            next;
        }

        # DEV NOTE: multi-line classes & functions can contain multi-line comments & includes, so break elsif() and start new if();
        # pre-parse & accumulate everything inside multi-line namespaces (functions & classes)
        if ((scalar @{$python_namespaces}) > 0) {
print 'in python_file_to_python_preparsed(), inside multi-line namespace', "\n";
#print 'in python_file_to_python_preparsed(), have all outer namespaces $python_namespaces = ', Dumper($python_namespaces), "\n";
#print 'in python_file_to_python_preparsed(), have next outer namespace $python_namespaces->[-1] = ', Dumper($python_namespaces->[-1]), "\n";
print 'in python_file_to_python_preparsed(), have next outer namespace $python_namespaces->[-1]->{symbol_scoped} = \'', $python_namespaces->[-1]->{symbol_scoped}, '\'', "\n";

            # end multi-line namespace(s) when the indentation level returns to the same as, or less than, the first line of its definition,
            # not counting blank (empty) lines or whitespace-only lines
            $ARG =~ m/^(\s*)[^\s]/;

print 'in python_file_to_python_preparsed(), have current line leading whitespace $1 = \'', (defined($1) ? $1 : '<<<undef>>>'), '\'', "\n";
print 'in python_file_to_python_preparsed(), have next outer namespace $python_namespaces->[-1]->{indentation} = \'', $python_namespaces->[-1]->{indentation}, '\'', "\n";

            # if regex above does not match, then $1 will be undefined;
            # this can only happen with blank (empty) lines and whitespace-only lines
            if ((defined $1) and
                ((length $1) <= (length $python_namespaces->[-1]->{indentation}))) {
print 'in python_file_to_python_preparsed(), ending one or more multi-line namespaces', "\n";

                # continue removing namespaces from the stack, as long as the stack is not empty and the indentation level is less or equal
                while (((scalar @{$python_namespaces}) > 0) and
                       ((length $1) <= (length $python_namespaces->[-1]->{indentation}))) {
print 'in python_file_to_python_preparsed(), ending multi-line namespace \'', $python_namespaces->[-1]->{symbol_scoped}, '\'', "\n";

lib/Python/File.pm  view on Meta::CPAN

                # create new component
                push @{$python_preparsed_target},
                Python::Whitespace->new(
                {
                    component_type => 'Python::Whitespace',
                    python_line_number_begin => $python_line_number,
                    python_line_number_end => $python_line_number,
                    python_source_code => $ARG,
                    perl_source_code => $ARG  # copy whitespace lines verbatim
                });
            }

            next;
        }
        # pre-parse & skip single-line # comments
        elsif ($ARG =~ m/^\s*\#/) {
print 'in python_file_to_python_preparsed(), have single-line # comment', "\n";
            chomp $ARG;  # trim trailing newline, if present

            # check if previous component was same type
            if (((scalar @{$python_preparsed_target}) > 0) and
                $python_preparsed_target->[-1]->isa('Python::Comment')) {
print 'in python_file_to_python_preparsed(), have single-line # comment, accumulating', "\n";
                # accumulate multiple single-line components into a multi-line component
                $python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;  # update ending line number
                $python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
                $python_preparsed_target->[-1]->{perl_source_code} .= "\n" . $ARG;  # copy comments verbatim
            }
            else {
print 'in python_file_to_python_preparsed(), have single-line # comment, creating', "\n";
                # create new component
                push @{$python_preparsed_target},
                Python::Comment->new(
                {
                    component_type => 'Python::Comment',
                    python_line_number_begin => $python_line_number,
                    python_line_number_end => $python_line_number,
                    python_source_code => $ARG,
                    perl_source_code => $ARG  # copy comments verbatim
                });
            }

            next;
        }

# NEED ANSWER: other than left parentheses and comma, what other characters indicate non-void context???
# NEED ANSWER: other than left parentheses and comma, what other characters indicate non-void context???
# NEED ANSWER: other than left parentheses and comma, what other characters indicate non-void context???

        # pre-parse & skip single-line '''comments''';
        # DEV NOTE: if last active character is left parentheses or comma,
        # then context is not void and this is not a comment
        elsif (($ARG =~ m/^(\s*)\'\'\'(.*)\'\'\'\s*$/) and
               ($python_last_active_character ne '(') and
               ($python_last_active_character ne ',')) {
print 'in python_file_to_python_preparsed(), have single-line \'\'\'comment\'\'\'', "\n";
            chomp $ARG;  # trim trailing newline, if present

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            # check if previous component was same type
            if (((scalar @{$python_preparsed_target}) > 0) and
                $python_preparsed_target->[-1]->isa('Python::CommentSingleQuotes')) {
print 'in python_file_to_python_preparsed(), have single-line \'\'\'comment\'\'\', accumulating', "\n";
                # accumulate multiple single-line components into a multi-line component
                $python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;  # update ending line number
                $python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
                $python_preparsed_target->[-1]->{perl_source_code} .= "\n" . ($1 . '#  ' . $2);  # reformat comments & retain spacing
            }
            else {
print 'in python_file_to_python_preparsed(), have single-line \'\'\'comment\'\'\', creating', "\n";
                # create new component
                push @{$python_preparsed_target},
                Python::CommentSingleQuotes->new(
                {
                    component_type => 'Python::CommentSingleQuotes',
                    python_line_number_begin => $python_line_number,
                    python_line_number_end => $python_line_number,
                    python_source_code => $ARG,
                    perl_source_code => ($1 . '#  ' . $2)  # reformat comments & retain spacing
                });
            }

            next;
        }
        # pre-parse & skip single-line """comments""";
        # DEV NOTE: if last active character is left parentheses or comma,
        # then context is not void and this is not a comment
        elsif (($ARG =~ m/^(\s*)\"\"\"(.*)\"\"\"\s*$/) and
               ($python_last_active_character ne '(') and
               ($python_last_active_character ne ',')) {
print 'in python_file_to_python_preparsed(), have single-line \"\"\"comment\"\"\"', "\n";
            chomp $ARG;  # trim trailing newline, if present

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            # check if previous component was same type
            if (((scalar @{$python_preparsed_target}) > 0) and
                $python_preparsed_target->[-1]->isa('Python::CommentDoubleQuotes')) {
print 'in python_file_to_python_preparsed(), have single-line \"\"\"comment\"\"\", accumulating', "\n";
                # accumulate multiple single-line components into a multi-line component
                $python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;  # update ending line number
                $python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
                $python_preparsed_target->[-1]->{perl_source_code} .= "\n" . ($1 . '#  ' . $2);  # reformat comments & retain spacing
            }
            else {
print 'in python_file_to_python_preparsed(), have single-line \"\"\"comment\"\"\", creating', "\n";
                # create new component
                push @{$python_preparsed_target},
                Python::CommentDoubleQuotes->new(
                {
                    component_type => 'Python::CommentDoubleQuotes',
                    python_line_number_begin => $python_line_number,
                    python_line_number_end => $python_line_number,
                    python_source_code => $ARG,
                    perl_source_code => ($1 . '#  ' . $2)  # reformat comments & retain spacing
                });
            }

            next;
        }
        # start multi-line '''comments''';
        elsif ($ARG =~ m/^(\s*)\'\'\'(.*)$/) {
            # DEV NOTE: if last active character is left parentheses or comma,
            # then context is not void and this is not a comment
            my boolean $is_actually_string_literal;
            if (($python_last_active_character eq '(') or
                ($python_last_active_character eq ',')) {
print 'in python_file_to_python_preparsed(), have multi-line \'\'\'string literal, starting', "\n";
                $is_actually_string_literal = 1;
            }
            else {
print 'in python_file_to_python_preparsed(), have multi-line \'\'\'comment, starting', "\n";
                $is_actually_string_literal = 0;
            }

            push @{$python_preparsed_target},
            Python::CommentSingleQuotes->new(
            {
                component_type => 'Python::CommentSingleQuotes',
                indentation => $1,
                is_actually_string_literal => $is_actually_string_literal,
                python_line_number_begin => $python_line_number,
                python_line_number_end => -1,  # negative value means we are currently inside multi-line component
                python_source_code => champ($ARG),
#                perl_source_code => champ('=x ' . $1 . $2)  # DEV NOTE: don't use POD for multi-line comments, POD parsers are inconsistent
                perl_source_code => champ($1 . q{#''} . $2)  # reformat comments & retain spacing
            });
            next;
        }
        # start multi-line """comments""";
        # DEV NOTE: if last active character is left parentheses or comma,
        # then context is not void and this is not a comment
        elsif ($ARG =~ m/^(\s*)\"\"\"(.*)$/) {
            # DEV NOTE: if last active character is left parentheses or comma,
            # then context is not void and this is not a comment
            my boolean $is_actually_string_literal;
            if (($python_last_active_character eq '(') or
                ($python_last_active_character eq ',')) {
print 'in python_file_to_python_preparsed(), have multi-line \"\"\"string literal, starting', "\n";
                $is_actually_string_literal = 1;
            }   
            else {
print 'in python_file_to_python_preparsed(), have multi-line \"\"\"comment, starting', "\n";
                $is_actually_string_literal = 0;
            }

            push @{$python_preparsed_target},
            Python::CommentDoubleQuotes->new(
            {
                component_type => 'Python::CommentDoubleQuotes',
                indentation => $1,
                is_actually_string_literal => $is_actually_string_literal,
                python_line_number_begin => $python_line_number,
                python_line_number_end => -1,  # negative value means we are currently inside multi-line component
                python_source_code => champ($ARG),
#                perl_source_code => champ('=x ' . $1 . $2)  # DEV NOTE: don't use POD for multi-line comments, POD parsers are inconsistent
                perl_source_code => champ($1 . q{#""} . $2)  # reformat comments & retain spacing
            });
            next;
        }
        # start multi-line include statements, either with enclosing parentheses,
        # or with long lines ending in backslash AKA line continuation;
        # match any line starting with 'from' and including an open but not close parentheses,
        # or starting with either 'from' or 'import', or 'cimport' for Pyrex, and ending in backslash
        # https://python-reference.readthedocs.io/en/latest/docs/operators/slash.html
        # NEED ANSWER: can the line continuation backslash appear without any preceding whitespace?
        elsif (($ARG =~ m/^\s*from\s+.*(\()[^\)]*$/) or
               ($ARG =~ m/^\s*from\s+.*\\$/) or
               ($ARG =~ m/^\s*c?import\s+.*\\$/)) {
print 'in python_file_to_python_preparsed(), have multi-line include, starting', "\n";
            chomp $ARG;  # trim trailing newline, if present

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            push @{$python_preparsed_target},
            Python::Include->new(
            {
                component_type => 'Python::Include',
                python_file_path => $self->{python_file_path},
                python_line_number_begin => $python_line_number,
                python_line_number_end => -1,  # negative value means we are currently inside multi-line component
                python_source_code => $ARG,
                python_has_parentheses => (defined $1) ? 1 : 0,
                perl_source_code => undef,  # includes are not translated during pre-parse phase
            });
            next;
        }
        # pre-parse single-line include statements;
        # 'import', or 'cimport' for Pyrex, must be followed by open parentheses or whitespace
        elsif ($ARG =~ m/^\s*(from\s+.+\s+)?c?import[\(\s].+$/) {
print 'in python_file_to_python_preparsed(), have single-line include', "\n";
            chomp $ARG;  # trim trailing newline, if present

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            push @{$python_preparsed_target},
            Python::Include->new(
            {
                component_type => 'Python::Include',
                python_file_path => $self->{python_file_path},
                python_line_number_begin => $python_line_number,
                python_line_number_end => $python_line_number,
                python_source_code => $ARG,
                perl_source_code => undef,  # includes are not translated during pre-parse phase
            });

            next;
        }

# NEED UPGRADE: support single-line Python functions; also, decorators on same line as function definition?
# NEED UPGRADE: support single-line Python functions; also, decorators on same line as function definition?
# NEED UPGRADE: support single-line Python functions; also, decorators on same line as function definition?

        # start function definitions
        # DEV NOTE: can have whitespace around commas
        # DEV NOTE: can have whitespace but NOT newline in between function name and open parentheses
        # DEV NOTE: can have whitespace but NOT newline in between close parentheses and colon
        # DEV NOTE: can have trailing comma at the end of function argument list

        # Python function header examples
        # def FOO():
        # def FOO(BAR):
        # def FOO ( BAR, BAT, BAZ, ) :
        # def FOO(BAR, BAT, BAZ) -> FooReturnType:
        # def __FOO__(self, *, BAR=None, var_BAR=1e-9, force_alpha="warn"):
        # def FOO_BAR(code, extra_preargs=[], extra_postargs=[]):
        # def FOO (
        #
        # ) :
        # def __FOO__(
        #     self,
        #     n_clusters=2,
        #     *,
        #     affinity="deprecated",  # TODO(1.4): Remove
        #     metric=None,  # TODO(1.4): Set to "euclidean"
        #     memory=None,
        #     connectivity=None,
        #     compute_full_tree="auto",
        #     linkage="ward",
        #     distance_threshold=None,
        #     compute_distances=False,
        #     damping=0.5,
        #     eps=np.finfo(np.float64).eps,
        #     slice_=(slice(70, 195), slice(78, 172)),
        # ):
        # def FOO(
        #     BAR: BarType, BAT:BatType="howdy", BAX : BaxType = 23 
        # ) -> FooReturnType:
        # def FOO(
        #     BAR: int,
        #     BAT1: Optional [ str ],
        #     BAT2: typing.Optional[str],
        #     BAX: float = 1.0,
        #     BAZ1: Union[int, str],
        #     BAZ2: typing.Union [ int , str ] = 'howdy',
        # ) -> Dict[str, Any]:

        # Pyrex function header examples
        # def FOO(
        #     const cnp.uint8_t[::1] BAR,
        #     object[:] BAT,
        #     cnp.npy_intp [::1] BAX
        # ):
        # def FOO(
        #     const cnp.float64_t[::1] BAR,
        #     const cnp.float64_t[:, ::1] BAT,
        #     const cnp.intp_t[::1] BAX,
        #     BAZ,
        #     cnp.float64_t[::1] QUUX
        # ):
        # def FOO(cnp.intp_t BAR, BAT, cnp.intp_t BAX):

        # DEV NOTE: either match the entire regex for single-line function header,
        # or match only the start of the function header 'def FOO' and 
        # start multi-line component to accumulate source code lines until entire regex can be matched
        # DEV NOTE, CORRELATION PYFI100: all regex changes must be reflected in both locations,
        # the only difference should be the optional trailing comment pattern \s*(?:\#.*\n)?\s*
        # which is not in the header-opening regex and is used twice in the header-closing regex;
        elsif ( # Python
                #           $1         $2           $3                                                                                                                                                                                                    ...
                ($ARG =~ m/^(\s*)def\s+(\w+)\s*\(\s*((?:[\w\.\*]+\s*(?::\s*[\w\.]+\s*)?(?:\[.*\]\s*)?(?:\=\s*(?:(?:\'.*\')|(?:\".*\")|(?:\(.*\))|(?:\[.*\])|[\w\.\-\(\)]+))?\s*\,\s*(?:\#.*\n)?\s*)*[\w\.\*]+\s*(?::\s*[\w\.]+\s*)?(?:\[.*\]\s*)?(?:\=\s*(...
# NEED ANSWER: does Pyrex accept both C and Python types?  if so, update Pyrex regex below to accept ':str' Python types
# NEED ANSWER: does Pyrex accept both C and Python types?  if so, update Pyrex regex below to accept ':str' Python types
# NEED ANSWER: does Pyrex accept both C and Python types?  if so, update Pyrex regex below to accept ':str' Python types
                # Pyrex
                #           $1         $2           $3                                                                                                                                                                                                    ...
                ($ARG =~ m/^(\s*)def\s+(\w+)\s*\(\s*((?:(?:(?:const\s+)?(?:[\w\.]+\s*(?:\[[\:\d\,\s]+\])?\s+))?[\w\.\*]+(?:\=\s*(?:(?:\'.*\')|(?:\".*\")|(?:\(.*\))|(?:\[.*\])|[\w\.\-\(\)]+))?\s*\,\s*)*(?:(?:const\s+)?(?:[\w\.]+\s*(?:\[[\:\d\,\s]+\])?...
                # Python or Pyrex
                ($ARG =~ m/^(\s*)def\s+(\w+)/)) {
print 'in python_file_to_python_preparsed(), have function, starting header', "\n";
#die 'TMP DEBUG, FUNCTION HEADER';

# NEED UPGRADE: utilize optional trailing comment $6, include in generated Perl source code if present
# NEED UPGRADE: utilize optional trailing comment $6, include in generated Perl source code if present
# NEED UPGRADE: utilize optional trailing comment $6, include in generated Perl source code if present

            chomp $ARG;  # trim trailing newline, if present

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            # if multi-line function header, we must receive the function name as part of the first line,
            # in order to pre-parse correctly below
            $python_namespace_name = $2;

            $python_component = 
            {
                component_type => undef,  # set below, either Python::Function or Python::Method or Python::InnerFunction
                decorators => '',  # empty value means no declared decorators; possibly set below
                indentation => $1,  # empty match returns empty string '', not undef
                symbol => $python_namespace_name,  # set now to get non-scoped symbol
                symbol_scoped => undef,  # set below, possibly-scoped symbol
                arguments => '',  # empty value means no declared arguments; possibly set below
                return_type => '',  # empty value means no declared return type; possibly set below
                python_file_path => $self->{python_file_path},
                python_line_number_begin => $python_line_number,
                python_line_number_end => -1,  # negative value means we are currently inside multi-line component
                python_line_number_end_header => -1,  # negative value means we are currently inside multi-line header; possibly set below
                python_source_code => $ARG,  # only function header, remaining source code will be nested in python_preparsed below
                python_preparsed => [],  # nested pre-parsed data structures of all source code inside this function
                python_preparsed_decorators => [],  # nested pre-parsed data structures of all decorators above this function
                perl_source_code => undef  # functions are not translated during pre-parse phase
            };

            # if all function header sub-components have been received, then accept them all;
            # consider final colon ':' captured in $5 to be the ending character of the function header
            if (defined $5) {
print 'in python_file_to_python_preparsed(), have function, ending header', "\n";
                if (defined $3) { $python_component->{arguments} = $3; }
                if (defined $4) { $python_component->{return_type} = $4; }

                # set header ending line number, indicating this is not a multi-line function header
                $python_component->{python_line_number_end_header} = $python_line_number;
            }
            else {
print 'in python_file_to_python_preparsed(), have function, no closing colon found, NOT ending header', "\n";
            }
#die 'TMP DEBUG, PARSE FUNCTION HEADER';

            # look-back to capture function decorators (@abstractmethod, @staticmethod, etc) on previous lines
            while ((scalar @{$python_preparsed_target}) > 0) {
print 'in python_file_to_python_preparsed(), have function, top of look-back loop', "\n";
                # pop blank / whitespace / comment lines onto temporary stack,
                # to either be captured along with decorators or put back if no decorator encountered
                if ($python_preparsed_target->[-1]->isa('Python::Blank') or
                    $python_preparsed_target->[-1]->isa('Python::Whitespace') or
                    $python_preparsed_target->[-1]->isa('Python::Comment')) {
print 'in python_file_to_python_preparsed(), have function, in look-back loop, moving blank / whitespace / comment line to temporary stack', "\n";

                    # utilize as-yet-unused function body python_preparsed as temporary stack
                    unshift @{$python_component->{python_preparsed}}, (pop @{$python_preparsed_target});
                    next;
                }

                # capture function decorators
                # @FOO
                # @FOO.BAR(scope="function")
                if ($python_preparsed_target->[-1]->isa('Python::Unknown') and 
                   ($python_preparsed_target->[-1]->{python_source_code} =~ m/^\s*@.+$/)) {
print 'in python_file_to_python_preparsed(), have function, in look-back loop, capturing decorator \'', $python_preparsed_target->[-1]->{python_source_code}, '\' by deleting sleep_seconds & sleep_retry_multiplier & retries_max object properties & re-...

lib/Python/File.pm  view on Meta::CPAN

            }
            else {
                # save reference to current function along with all other functions, for easy name-based access
                $python_functions->{$python_namespace_name} = $python_preparsed_target->[-1];
            }

            # being inside this new function increases the namespace stack (deepens the current scope)
            push @{$python_namespaces}, $python_preparsed_target->[-1];
            next;
        }

        # start class definitions
        # NEED ANSWER: are all Python classes multi-line?
        # DEV NOTE: can have whitespace around commas
        # DEV NOTE: can have whitespace but NOT newline in between class name and open parentheses
        # DEV NOTE: can have whitespace but NOT newline in between close parentheses and colon
        # DEV NOTE: can have trailing comma at the end of parent class list

        # Python class header examples
        # class FOO:
        # class FOO(BAR):
        # class FOO ( B.AR ) :
        # class FOO (
        #
        # ) :
        # class FOO(BAR, BAT, BAX):
        # class FOO(B.AR, BA.T, BAX):
        # class _FOO (BAR, BAT, BAX=BAY) :
        # class FOO ( BAR,
        #     BAT, BAX,
        # ) :

        # Pyrex class header examples
        # cdef class FOO:
        # cdef class FOO(BAR):

        # DEV NOTE: either match the entire regex for single-line class header,
        # or match only the start of the class header 'class FOO(' and 
        # start multi-line component to accumulate source code lines until entire regex can be matched
        # DEV NOTE, CORRELATION PYFI101: all regex changes must be reflected in both locations,
        # the only difference should be the optional trailing comment pattern \s*(?:\#.*\n)?\s*
        # which is not in the header-opening regex and is used twice in the header-closing regex;
        #                   $1           $2              $3                                              $4    $5
        elsif ( # Python
                ($ARG =~ m/^(\s*)class\s+(\w+)\s*(?:\(\s*((?:[\w\.=]+\s*\,\s*)*[\w\.=]+\s*\,?)?\s*\)\s*)?(:)\s*(\#.*)?$/) or
                ($ARG =~ m/^(\s*)class\s+(\w+)\s*\(/) or
                # Pyrex
                ($ARG =~ m/^(\s*)cdef\s+class\s+(\w+)\s*(?:\(\s*((?:[\w\.=]+(?:\{\{\w+\}\})?[\w\.=]*\s*\,\s*)*[\w\.=]+(?:\{\{\w+\}\})?[\w\.=]*\s*\,?)?\s*\)\s*)?(:)\s*(\#.*)?$/) or
                ($ARG =~ m/^(\s*)cdef\s+class\s+(\w+)\s*\(/)) {

# NEED UPGRADE: utilize optional trailing comment $5, include in generated Perl source code if present
# NEED UPGRADE: utilize optional trailing comment $5, include in generated Perl source code if present
# NEED UPGRADE: utilize optional trailing comment $5, include in generated Perl source code if present

print 'in python_file_to_python_preparsed(), have class, starting header', "\n";

            chomp $ARG;  # trim trailing newline, if present

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            # if multi-line class header, we must receive the class name as part of the first line,
            # in order to pre-parse correctly below
            $python_namespace_name = $2;

            $python_component = 
            {
                component_type => undef,  # set below, either Python::Class or Python::LocalClass or Python::InnerClass
                indentation => $1,  # empty match returns empty string '', not undef
                symbol => $python_namespace_name,
                parents => '',  # empty value means no declared parent classes; possibly set below
                python_file_path => $self->{python_file_path},
                python_line_number_begin => $python_line_number,
                python_line_number_end => -1,  # negative value means we are currently inside multi-line component
                python_line_number_end_header => -1,  # negative value means we are currently inside multi-line header; possibly set below
                python_source_code => $ARG,  # only class header, remaining source code will be nested in python_preparsed below
                python_preparsed => [],  # nested pre-parsed data structures of all source code inside this class
                perl_source_code => undef  # classes are not translated during pre-parse phase
            };

            # if all class header sub-components have been received, then accept them all;
            # consider final colon ':' captured in $4 to be the ending character of the class header
            if (defined $4) {
print 'in python_file_to_python_preparsed(), have class, ending header', "\n";
                if (defined $3) { $python_component->{parents} = $3; }

                # set header ending line number, indicating this is not a multi-line class header
                $python_component->{python_line_number_end_header} = $python_line_number;
            }
            else {
print 'in python_file_to_python_preparsed(), have class, no closing colon found, NOT ending header', "\n";
            }
#die 'TMP DEBUG, PARSE CLASS HEADER';

            # determine if class is a normal class, a local class, or an inner class
            if ((scalar @{$python_namespaces}) > 0) {
                # prepend all encompassing namespaces to class name, to create scoped class name;
                # immediately enclosing component already has scoped name, no need to loop through entire namespace stack
                $python_namespace_name = $python_namespaces->[-1]->{symbol_scoped} . '.' . $python_namespace_name;
                $python_component->{symbol_scoped} = $python_namespace_name;

                if ($python_namespaces->[-1]->isa('Python::Class')) {
                    # a class defined (nested) inside another class is an inner class
                    $python_component->{component_type} = 'Python::InnerClass';
                    push @{$python_preparsed_target}, Python::InnerClass->new($python_component);

print 'in python_file_to_python_preparsed(), Python inner class named \'', $python_namespace_name, '\' defined inside outer class named \'', $python_namespaces->[-1]->{symbol_scoped}, '\'', "\n";
                }
                elsif ($python_namespaces->[-1]->isa('Python::Function')) {
                    # a class defined inside a function is a local class
                    $python_component->{component_type} = 'Python::LocalClass';
                    push @{$python_preparsed_target}, Python::LocalClass->new($python_component);

print 'in python_file_to_python_preparsed(), Python local class named \'', $python_namespace_name, '\' defined inside function named \'', $python_namespaces->[-1]->{symbol_scoped}, '\'', "\n";
                }
                else {
print 'in python_file_to_python_preparsed(), have enclosing namespace ', Dumper($python_namespaces->[1]), "\n";
                    croak 'ERROR EPYFI005a: Unrecognized enclosing namespace, only Functions & Classes accepted; ', Dumper($python_namespaces->[1]), ', croaking';
                }
            }
            else {
                # a class defined outside all namespaces (classes or functions) is just a normal class
                $python_component->{symbol_scoped} = $python_namespace_name;  # scoped symbol is same as non-scoped for normal classes
                $python_component->{component_type} = 'Python::Class';
                push @{$python_preparsed_target}, Python::Class->new($python_component);
            }

            # can't have the same class declared twice
            if (exists $python_classes->{$python_namespace_name}) {
                croak 'ERROR EPYFI005b: Python class named \'', $python_namespace_name, '\' already pre-parsed, croaking';
            }

            # save reference to current class along with all other classes, for easy name-based access
            $python_classes->{$python_namespace_name} = $python_preparsed_target->[-1];

            # being inside this new class increases the namespace stack (deepens the current scope)
            push @{$python_namespaces}, $python_preparsed_target->[-1];
            next;
        }
        else {
print 'in python_file_to_python_preparsed(), have UNKNOWN line of code', "\n";
            chomp $ARG;  # trim trailing newline, if present

            # update last active character
            $python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";

            # ensure we correctly parse all namespaces (functions & classes)
            if ($ARG =~ m/^(\s*)def\s+/) {
                croak 'ERROR EPYFI006a: Python function with UNKNOWN format, croaking';
            }
            elsif ($ARG =~ m/^(\s*)class\s+/) {
                croak 'ERROR EPYFI006b: Python class with UNKNOWN format, croaking';
            }

            # DEV NOTE, CORRELATION PYFI102: all Unknown logic in this if-elsif-else block must be copied
            # check if previous component was same type
            if (((scalar @{$python_preparsed_target}) > 0) and
                $python_preparsed_target->[-1]->isa('Python::Unknown')) {
print 'in python_file_to_python_preparsed(), have UNKNOWN line, accumulating', "\n";
                # accumulate multiple single-line components into a multi-line component
                $python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
                # update ending line number
                $python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;
            }
            # merge Unknown components when separated by only Blank components
            elsif (((scalar @{$python_preparsed_target}) > 1) and
                $python_preparsed_target->[-1]->isa('Python::Blank') and
                $python_preparsed_target->[-2]->isa('Python::Unknown')) {
print 'in python_file_to_python_preparsed(), have UNKNOWN line preceded by blank line(s) and other UNKNOWN line(s), merging components', "\n";

                # merge 3 components into a single component;
                # Unknown + Blank + Unknown = Unknown
                $python_preparsed_target->[-2]->{python_source_code} .= 
                    "\n" . $python_preparsed_target->[-1]->{python_source_code} . "\n" . $ARG;
                # update ending line number
                $python_preparsed_target->[-2]->{python_line_number_end} = $python_line_number;
                # discard now-redundant Blank component
                pop @{$python_preparsed_target};
            }
            else {
print 'in python_file_to_python_preparsed(), have UNKNOWN line, creating', "\n";
                # create new component
                push @{$python_preparsed_target},
                Python::Unknown->new(
                {   
                    component_type => 'Python::Unknown',
                    python_line_number_begin => $python_line_number,
                    python_line_number_end => $python_line_number,
                    python_source_code => $ARG,
                    perl_source_code => undef  # unknown code is not translated during pre-parse phase
                });
            }

            next;
        }
    }

print 'in python_file_to_python_preparsed(), EOF end of file \'', $self->{python_file_path}, '\'', "\n";

    # close file after reading
    close($PYTHON_FILE)
        or croak 'ERROR EPYFI000b: failed to close Python source code file \'', $self->{python_file_path}, 
            '\' after reading, received OS error message \'', $OS_ERROR, '\', croaking';

    # ensure we finish parsing all multi-line function headers



( run in 1.761 second using v1.01-cache-2.11-cpan-140bd7fdf52 )