App-PythonToPerl
view release on metacpan or search on metacpan
lib/Python/File.pm view on Meta::CPAN
# indentation whitespace and other characters before closing """, append additional '#' character to indentation
elsif ($ARG =~ m/^(\s+)(.+)\"\"\"\s*$/) {
$python_preparsed_target->[-1]->{perl_source_code} .= (substr $1, 0, -1) . '#' . $2;
}
# non-whitespace characters before closing """, prepend additional '#' character by shifting all characters to the right
elsif ($ARG =~ m/^(.+)\"\"\"\s*$/) {
$python_preparsed_target->[-1]->{perl_source_code} .= '#' . $1;
}
# else, no characters at all before closing """
$python_preparsed_target->[-1]->{perl_source_code} .= q{#""};
# set ending line number, indicating we are no longer inside this multi-line component
$python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;
print 'in python_file_to_python_preparsed(), ending multi-line double-quotes \"\"\"comment\"\"\", have $python_preparsed_target->[-1]->{perl_source_code} = ', "\n", $python_preparsed_target->[-1]->{perl_source_code}, "\n";
#die 'TMP DEBUG, MULTI-LINE COMMENT DOUBLE QUOTES';
}
next;
}
elsif ($ARG =~ m/\"\"\"/) {
croak 'ERROR EPYFI002: have multi-line double-quotes comment closing, but not at end of line, do not know how to handle, croaking';
}
elsif ($ARG =~ m/\'\'\'/) {
carp 'WARNING WPYFI002: have multi-line single-quotes comment while currently inside multi-line double-quotes comment, ignoring, carping';
}
# prepend '#' character for non-blank comments,
# either replacing last indentation space or shifting all characters to the right
my string $comment = $ARG;
if (($comment eq '') or
($python_preparsed_target->[-1]->{is_actually_string_literal}))
{ 1; }
elsif ($comment =~ m/^(\s+)(.*)$/) {
# if indented at least 2 spaces, then we can vertically align all '#' characters
if (((length $python_preparsed_target->[-1]->{indentation}) >= 2) and
((length $1) >= 2)) {
substr $comment, ((length $python_preparsed_target->[-1]->{indentation}) - 2), 1, '#';
}
else { $comment = (substr $1, 0, -2) . '# ' . $2; }
}
else { $comment = '#' . $comment; }
# accumulate non-last Perl line of multi-line component; copy comments verbatim
$python_preparsed_target->[-1]->{perl_source_code} .= "\n" . $comment;
# did not end multi-line component, go on to next line
next;
}
# pre-parse & accumulate everything inside multi-line include statement
elsif (((scalar @{$python_preparsed_target}) > 0) and
$python_preparsed_target->[-1]->isa('Python::Include') and
($python_preparsed_target->[-1]->{python_line_number_end} < 0)) {
print 'in python_file_to_python_preparsed(), inside multi-line include', "\n";
# accumulate current (possibly last) Python line of multi-line component
chomp $ARG;
$python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
# multi-line includes with parentheses end differently than those without parentheses
if ($python_preparsed_target->[-1]->{python_has_parentheses}) {
# end multi-line include (w/ parentheses) when the last non-whitespace non-comment character is a close parentheses
if ($ARG =~ m/^.*\)\s*(?:\#.*)?$/) {
print 'in python_file_to_python_preparsed(), ending multi-line include w/ parentheses', "\n";
# set ending line number, indicating we are no longer inside this multi-line component
$python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;
next;
}
}
else {
# end multi-line include (w/out parentheses) when the last non-whitespace character is not a backslash
# if ($ARG =~ m/^.*[^\\]\s*$/) { # does not match correctly?
if ($ARG !~ m/^.*\\\s*$/) {
print 'in python_file_to_python_preparsed(), ending multi-line include w/out parentheses', "\n";
# set ending line number, indicating we are no longer inside this multi-line component
$python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;
next;
}
}
# error if multi-line component invalidly nested inside other multi-line component
if ($ARG =~ m/\'\'\'/) {
croak 'ERROR EPYFI003a: have multi-line single-quotes comment while currently inside multi-line include statement, do not know how to handle, croaking';
}
elsif ($ARG =~ m/\"\"\"/) {
croak 'ERROR EPYFI003b: have multi-line double-quotes comment while currently inside multi-line include statement, do not know how to handle, croaking';
}
# did not end multi-line component, go on to next line
next;
}
# pre-parse & accumulate everything inside multi-line function header
elsif (((scalar @{$python_preparsed_target}) > 0) and
$python_preparsed_target->[-1]->isa('Python::Function') and
($python_preparsed_target->[-1]->{python_line_number_end_header} < 0)) {
print 'in python_file_to_python_preparsed(), inside multi-line function header', "\n";
# accumulate current (possibly last) Python line of multi-line component
chomp $ARG;
$python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
# end multi-line function header when it matches the entire regex;
# DEV NOTE, CORRELATION PYFI100: all regex changes must be reflected in both locations,
# the only difference should be the optional trailing comment pattern \s*(?:\#.*\n)?\s*
# which is not in the header-opening regex and is used twice in the header-closing regex;
# DEV NOTE: do NOT join multiple lines into one line for regex match,
# need \n characters to detect trailing comments,
# \s matches \n so multiple lines do not need to be combined
# $1 $2 $3 ...
if (# Python
($python_preparsed_target->[-1]->{python_source_code} =~
m/^(\s*)def\s+(\w+)\s*\(\s*((?:[\w\.\*]+\s*(?::\s*[\w\.]+\s*)?(?:\[.*\]\s*)?(?:\=\s*(?:(?:\'.*\')|(?:\".*\")|(?:\(.*\))|(?:\[.*\])|[\w\.\-\(\)]+))?\s*\,\s*(?:\#.*\n)?\s*)*[\w\.\*]+\s*(?::\s*[\w\.]+\s*)?(?:\[.*\]\s*)?(?:\=\s*(?:(?:\'.*...
# NEED ANSWER: does Pyrex accept both C and Python types? if so, update Pyrex regex below to accept ':str' Python types
# NEED ANSWER: does Pyrex accept both C and Python types? if so, update Pyrex regex below to accept ':str' Python types
# NEED ANSWER: does Pyrex accept both C and Python types? if so, update Pyrex regex below to accept ':str' Python types
# Pyrex
($python_preparsed_target->[-1]->{python_source_code} =~
m/^(\s*)def\s+(\w+)\s*\(\s*((?:(?:(?:const\s+)?(?:[\w\.]+\s*(?:\[[\:\d\,\s]+\])?\s+))?[\w\.\*]+(?:\=\s*(?:(?:\'.*\')|(?:\".*\")|(?:\(.*\))|(?:\[.*\])|[\w\.\-\(\)]+))?\s*\,\s*(?:\#.*\n)?\s*)*(?:(?:const\s+)?(?:[\w\.]+\s*(?:\[[\:\d\,\s]...
print 'in python_file_to_python_preparsed(), ending multi-line function header', "\n";
# all function header sub-components have been received, so accept them all
if (defined $3) { $python_preparsed_target->[-1]->{arguments} = $3; }
if (defined $4) { $python_preparsed_target->[-1]->{return_type} = $4; }
# set ending line number, indicating we are no longer inside this multi-line component
$python_preparsed_target->[-1]->{python_line_number_end_header} = $python_line_number;
print 'in python_file_to_python_preparsed(), ending multi-line function header, have $python_preparsed_target->[-1] = ', Dumper($python_preparsed_target->[-1]), "\n";
#die 'TMP DEBUG, END MULTI-LINE FUNCTION HEADER' if ($python_preparsed_target->[-1]->{symbol} eq '__init__');
next;
}
# did not end multi-line component, go on to next line
next;
}
# pre-parse & accumulate everything inside multi-line class header
elsif (((scalar @{$python_preparsed_target}) > 0) and
$python_preparsed_target->[-1]->isa('Python::Class') and
($python_preparsed_target->[-1]->{python_line_number_end_header} < 0)) {
print 'in python_file_to_python_preparsed(), inside multi-line class header', "\n";
# accumulate current (possibly last) Python line of multi-line component
chomp $ARG;
$python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
# end multi-line class header when it matches the entire regex;
# DEV NOTE, CORRELATION PYFI101: all regex changes must be reflected in both locations,
# the only difference should be the optional trailing comment pattern \s*(?:\#.*\n)?\s*
# which is not in the header-opening regex and is used twice in the header-closing regex;
# DEV NOTE: do NOT join multiple lines into one line for regex match,
# need \n characters to detect trailing comments,
# \s matches \n so multiple lines do not need to be combined
if (# Python
($python_preparsed_target->[-1]->{python_source_code} =~
# $1 $2 $3 $4 $5
m/^(\s*)class\s+(\w+)\s*(?:\(\s*((?:[\w\.=]+\s*\,\s*(?:\#.*\n)?\s*)*[\w\.=]+\s*\,?\s*(?:\#.*\n)?)?\s*\)\s*)?(:)\s*(\#.*)?$/) or
# Pyrex
($python_preparsed_target->[-1]->{python_source_code} =~
m/^(\s*)cdef\s+class\s+(\w+)(?:\{\{\w+\}\})?\s*(?:\(\s*((?:[\w\.=]+(?:\{\{\w+\}\})?[\w\.=]*\s*\,\s*(?:\#.*\n)?\s*)*[\w\.=]+(?:\{\{\w+\}\})?[\w\.=]*\s*\,?\s*(?:\#.*\n)?)?\s*\)\s*)?(:)\s*(\#.*)?$/)) {
print 'in python_file_to_python_preparsed(), ending multi-line class header', "\n";
# all class header sub-components have been received, so accept them all
if (defined $3) { $python_preparsed_target->[-1]->{parents} = $3; }
# set ending line number, indicating we are no longer inside this multi-line component
$python_preparsed_target->[-1]->{python_line_number_end_header} = $python_line_number;
print 'in python_file_to_python_preparsed(), ending multi-line class header, have $python_preparsed_target->[-1] = ', Dumper($python_preparsed_target->[-1]), "\n";
#die 'TMP DEBUG, END MULTI-LINE CLASS HEADER' if ($python_preparsed_target->[-1]->{symbol} eq '__init__');
next;
}
# did not end multi-line component, go on to next line
next;
}
# DEV NOTE: multi-line classes & functions can contain multi-line comments & includes, so break elsif() and start new if();
# pre-parse & accumulate everything inside multi-line namespaces (functions & classes)
if ((scalar @{$python_namespaces}) > 0) {
print 'in python_file_to_python_preparsed(), inside multi-line namespace', "\n";
#print 'in python_file_to_python_preparsed(), have all outer namespaces $python_namespaces = ', Dumper($python_namespaces), "\n";
#print 'in python_file_to_python_preparsed(), have next outer namespace $python_namespaces->[-1] = ', Dumper($python_namespaces->[-1]), "\n";
print 'in python_file_to_python_preparsed(), have next outer namespace $python_namespaces->[-1]->{symbol_scoped} = \'', $python_namespaces->[-1]->{symbol_scoped}, '\'', "\n";
# end multi-line namespace(s) when the indentation level returns to the same as, or less than, the first line of its definition,
# not counting blank (empty) lines or whitespace-only lines
$ARG =~ m/^(\s*)[^\s]/;
print 'in python_file_to_python_preparsed(), have current line leading whitespace $1 = \'', (defined($1) ? $1 : '<<<undef>>>'), '\'', "\n";
print 'in python_file_to_python_preparsed(), have next outer namespace $python_namespaces->[-1]->{indentation} = \'', $python_namespaces->[-1]->{indentation}, '\'', "\n";
# if regex above does not match, then $1 will be undefined;
# this can only happen with blank (empty) lines and whitespace-only lines
if ((defined $1) and
((length $1) <= (length $python_namespaces->[-1]->{indentation}))) {
print 'in python_file_to_python_preparsed(), ending one or more multi-line namespaces', "\n";
# continue removing namespaces from the stack, as long as the stack is not empty and the indentation level is less or equal
while (((scalar @{$python_namespaces}) > 0) and
((length $1) <= (length $python_namespaces->[-1]->{indentation}))) {
print 'in python_file_to_python_preparsed(), ending multi-line namespace \'', $python_namespaces->[-1]->{symbol_scoped}, '\'', "\n";
lib/Python/File.pm view on Meta::CPAN
# create new component
push @{$python_preparsed_target},
Python::Whitespace->new(
{
component_type => 'Python::Whitespace',
python_line_number_begin => $python_line_number,
python_line_number_end => $python_line_number,
python_source_code => $ARG,
perl_source_code => $ARG # copy whitespace lines verbatim
});
}
next;
}
# pre-parse & skip single-line # comments
elsif ($ARG =~ m/^\s*\#/) {
print 'in python_file_to_python_preparsed(), have single-line # comment', "\n";
chomp $ARG; # trim trailing newline, if present
# check if previous component was same type
if (((scalar @{$python_preparsed_target}) > 0) and
$python_preparsed_target->[-1]->isa('Python::Comment')) {
print 'in python_file_to_python_preparsed(), have single-line # comment, accumulating', "\n";
# accumulate multiple single-line components into a multi-line component
$python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number; # update ending line number
$python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
$python_preparsed_target->[-1]->{perl_source_code} .= "\n" . $ARG; # copy comments verbatim
}
else {
print 'in python_file_to_python_preparsed(), have single-line # comment, creating', "\n";
# create new component
push @{$python_preparsed_target},
Python::Comment->new(
{
component_type => 'Python::Comment',
python_line_number_begin => $python_line_number,
python_line_number_end => $python_line_number,
python_source_code => $ARG,
perl_source_code => $ARG # copy comments verbatim
});
}
next;
}
# NEED ANSWER: other than left parentheses and comma, what other characters indicate non-void context???
# NEED ANSWER: other than left parentheses and comma, what other characters indicate non-void context???
# NEED ANSWER: other than left parentheses and comma, what other characters indicate non-void context???
# pre-parse & skip single-line '''comments''';
# DEV NOTE: if last active character is left parentheses or comma,
# then context is not void and this is not a comment
elsif (($ARG =~ m/^(\s*)\'\'\'(.*)\'\'\'\s*$/) and
($python_last_active_character ne '(') and
($python_last_active_character ne ',')) {
print 'in python_file_to_python_preparsed(), have single-line \'\'\'comment\'\'\'', "\n";
chomp $ARG; # trim trailing newline, if present
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
# check if previous component was same type
if (((scalar @{$python_preparsed_target}) > 0) and
$python_preparsed_target->[-1]->isa('Python::CommentSingleQuotes')) {
print 'in python_file_to_python_preparsed(), have single-line \'\'\'comment\'\'\', accumulating', "\n";
# accumulate multiple single-line components into a multi-line component
$python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number; # update ending line number
$python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
$python_preparsed_target->[-1]->{perl_source_code} .= "\n" . ($1 . '# ' . $2); # reformat comments & retain spacing
}
else {
print 'in python_file_to_python_preparsed(), have single-line \'\'\'comment\'\'\', creating', "\n";
# create new component
push @{$python_preparsed_target},
Python::CommentSingleQuotes->new(
{
component_type => 'Python::CommentSingleQuotes',
python_line_number_begin => $python_line_number,
python_line_number_end => $python_line_number,
python_source_code => $ARG,
perl_source_code => ($1 . '# ' . $2) # reformat comments & retain spacing
});
}
next;
}
# pre-parse & skip single-line """comments""";
# DEV NOTE: if last active character is left parentheses or comma,
# then context is not void and this is not a comment
elsif (($ARG =~ m/^(\s*)\"\"\"(.*)\"\"\"\s*$/) and
($python_last_active_character ne '(') and
($python_last_active_character ne ',')) {
print 'in python_file_to_python_preparsed(), have single-line \"\"\"comment\"\"\"', "\n";
chomp $ARG; # trim trailing newline, if present
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
# check if previous component was same type
if (((scalar @{$python_preparsed_target}) > 0) and
$python_preparsed_target->[-1]->isa('Python::CommentDoubleQuotes')) {
print 'in python_file_to_python_preparsed(), have single-line \"\"\"comment\"\"\", accumulating', "\n";
# accumulate multiple single-line components into a multi-line component
$python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number; # update ending line number
$python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
$python_preparsed_target->[-1]->{perl_source_code} .= "\n" . ($1 . '# ' . $2); # reformat comments & retain spacing
}
else {
print 'in python_file_to_python_preparsed(), have single-line \"\"\"comment\"\"\", creating', "\n";
# create new component
push @{$python_preparsed_target},
Python::CommentDoubleQuotes->new(
{
component_type => 'Python::CommentDoubleQuotes',
python_line_number_begin => $python_line_number,
python_line_number_end => $python_line_number,
python_source_code => $ARG,
perl_source_code => ($1 . '# ' . $2) # reformat comments & retain spacing
});
}
next;
}
# start multi-line '''comments''';
elsif ($ARG =~ m/^(\s*)\'\'\'(.*)$/) {
# DEV NOTE: if last active character is left parentheses or comma,
# then context is not void and this is not a comment
my boolean $is_actually_string_literal;
if (($python_last_active_character eq '(') or
($python_last_active_character eq ',')) {
print 'in python_file_to_python_preparsed(), have multi-line \'\'\'string literal, starting', "\n";
$is_actually_string_literal = 1;
}
else {
print 'in python_file_to_python_preparsed(), have multi-line \'\'\'comment, starting', "\n";
$is_actually_string_literal = 0;
}
push @{$python_preparsed_target},
Python::CommentSingleQuotes->new(
{
component_type => 'Python::CommentSingleQuotes',
indentation => $1,
is_actually_string_literal => $is_actually_string_literal,
python_line_number_begin => $python_line_number,
python_line_number_end => -1, # negative value means we are currently inside multi-line component
python_source_code => champ($ARG),
# perl_source_code => champ('=x ' . $1 . $2) # DEV NOTE: don't use POD for multi-line comments, POD parsers are inconsistent
perl_source_code => champ($1 . q{#''} . $2) # reformat comments & retain spacing
});
next;
}
# start multi-line """comments""";
# DEV NOTE: if last active character is left parentheses or comma,
# then context is not void and this is not a comment
elsif ($ARG =~ m/^(\s*)\"\"\"(.*)$/) {
# DEV NOTE: if last active character is left parentheses or comma,
# then context is not void and this is not a comment
my boolean $is_actually_string_literal;
if (($python_last_active_character eq '(') or
($python_last_active_character eq ',')) {
print 'in python_file_to_python_preparsed(), have multi-line \"\"\"string literal, starting', "\n";
$is_actually_string_literal = 1;
}
else {
print 'in python_file_to_python_preparsed(), have multi-line \"\"\"comment, starting', "\n";
$is_actually_string_literal = 0;
}
push @{$python_preparsed_target},
Python::CommentDoubleQuotes->new(
{
component_type => 'Python::CommentDoubleQuotes',
indentation => $1,
is_actually_string_literal => $is_actually_string_literal,
python_line_number_begin => $python_line_number,
python_line_number_end => -1, # negative value means we are currently inside multi-line component
python_source_code => champ($ARG),
# perl_source_code => champ('=x ' . $1 . $2) # DEV NOTE: don't use POD for multi-line comments, POD parsers are inconsistent
perl_source_code => champ($1 . q{#""} . $2) # reformat comments & retain spacing
});
next;
}
# start multi-line include statements, either with enclosing parentheses,
# or with long lines ending in backslash AKA line continuation;
# match any line starting with 'from' and including an open but not close parentheses,
# or starting with either 'from' or 'import', or 'cimport' for Pyrex, and ending in backslash
# https://python-reference.readthedocs.io/en/latest/docs/operators/slash.html
# NEED ANSWER: can the line continuation backslash appear without any preceding whitespace?
elsif (($ARG =~ m/^\s*from\s+.*(\()[^\)]*$/) or
($ARG =~ m/^\s*from\s+.*\\$/) or
($ARG =~ m/^\s*c?import\s+.*\\$/)) {
print 'in python_file_to_python_preparsed(), have multi-line include, starting', "\n";
chomp $ARG; # trim trailing newline, if present
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
push @{$python_preparsed_target},
Python::Include->new(
{
component_type => 'Python::Include',
python_file_path => $self->{python_file_path},
python_line_number_begin => $python_line_number,
python_line_number_end => -1, # negative value means we are currently inside multi-line component
python_source_code => $ARG,
python_has_parentheses => (defined $1) ? 1 : 0,
perl_source_code => undef, # includes are not translated during pre-parse phase
});
next;
}
# pre-parse single-line include statements;
# 'import', or 'cimport' for Pyrex, must be followed by open parentheses or whitespace
elsif ($ARG =~ m/^\s*(from\s+.+\s+)?c?import[\(\s].+$/) {
print 'in python_file_to_python_preparsed(), have single-line include', "\n";
chomp $ARG; # trim trailing newline, if present
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
push @{$python_preparsed_target},
Python::Include->new(
{
component_type => 'Python::Include',
python_file_path => $self->{python_file_path},
python_line_number_begin => $python_line_number,
python_line_number_end => $python_line_number,
python_source_code => $ARG,
perl_source_code => undef, # includes are not translated during pre-parse phase
});
next;
}
# NEED UPGRADE: support single-line Python functions; also, decorators on same line as function definition?
# NEED UPGRADE: support single-line Python functions; also, decorators on same line as function definition?
# NEED UPGRADE: support single-line Python functions; also, decorators on same line as function definition?
# start function definitions
# DEV NOTE: can have whitespace around commas
# DEV NOTE: can have whitespace but NOT newline in between function name and open parentheses
# DEV NOTE: can have whitespace but NOT newline in between close parentheses and colon
# DEV NOTE: can have trailing comma at the end of function argument list
# Python function header examples
# def FOO():
# def FOO(BAR):
# def FOO ( BAR, BAT, BAZ, ) :
# def FOO(BAR, BAT, BAZ) -> FooReturnType:
# def __FOO__(self, *, BAR=None, var_BAR=1e-9, force_alpha="warn"):
# def FOO_BAR(code, extra_preargs=[], extra_postargs=[]):
# def FOO (
#
# ) :
# def __FOO__(
# self,
# n_clusters=2,
# *,
# affinity="deprecated", # TODO(1.4): Remove
# metric=None, # TODO(1.4): Set to "euclidean"
# memory=None,
# connectivity=None,
# compute_full_tree="auto",
# linkage="ward",
# distance_threshold=None,
# compute_distances=False,
# damping=0.5,
# eps=np.finfo(np.float64).eps,
# slice_=(slice(70, 195), slice(78, 172)),
# ):
# def FOO(
# BAR: BarType, BAT:BatType="howdy", BAX : BaxType = 23
# ) -> FooReturnType:
# def FOO(
# BAR: int,
# BAT1: Optional [ str ],
# BAT2: typing.Optional[str],
# BAX: float = 1.0,
# BAZ1: Union[int, str],
# BAZ2: typing.Union [ int , str ] = 'howdy',
# ) -> Dict[str, Any]:
# Pyrex function header examples
# def FOO(
# const cnp.uint8_t[::1] BAR,
# object[:] BAT,
# cnp.npy_intp [::1] BAX
# ):
# def FOO(
# const cnp.float64_t[::1] BAR,
# const cnp.float64_t[:, ::1] BAT,
# const cnp.intp_t[::1] BAX,
# BAZ,
# cnp.float64_t[::1] QUUX
# ):
# def FOO(cnp.intp_t BAR, BAT, cnp.intp_t BAX):
# DEV NOTE: either match the entire regex for single-line function header,
# or match only the start of the function header 'def FOO' and
# start multi-line component to accumulate source code lines until entire regex can be matched
# DEV NOTE, CORRELATION PYFI100: all regex changes must be reflected in both locations,
# the only difference should be the optional trailing comment pattern \s*(?:\#.*\n)?\s*
# which is not in the header-opening regex and is used twice in the header-closing regex;
elsif ( # Python
# $1 $2 $3 ...
($ARG =~ m/^(\s*)def\s+(\w+)\s*\(\s*((?:[\w\.\*]+\s*(?::\s*[\w\.]+\s*)?(?:\[.*\]\s*)?(?:\=\s*(?:(?:\'.*\')|(?:\".*\")|(?:\(.*\))|(?:\[.*\])|[\w\.\-\(\)]+))?\s*\,\s*(?:\#.*\n)?\s*)*[\w\.\*]+\s*(?::\s*[\w\.]+\s*)?(?:\[.*\]\s*)?(?:\=\s*(...
# NEED ANSWER: does Pyrex accept both C and Python types? if so, update Pyrex regex below to accept ':str' Python types
# NEED ANSWER: does Pyrex accept both C and Python types? if so, update Pyrex regex below to accept ':str' Python types
# NEED ANSWER: does Pyrex accept both C and Python types? if so, update Pyrex regex below to accept ':str' Python types
# Pyrex
# $1 $2 $3 ...
($ARG =~ m/^(\s*)def\s+(\w+)\s*\(\s*((?:(?:(?:const\s+)?(?:[\w\.]+\s*(?:\[[\:\d\,\s]+\])?\s+))?[\w\.\*]+(?:\=\s*(?:(?:\'.*\')|(?:\".*\")|(?:\(.*\))|(?:\[.*\])|[\w\.\-\(\)]+))?\s*\,\s*)*(?:(?:const\s+)?(?:[\w\.]+\s*(?:\[[\:\d\,\s]+\])?...
# Python or Pyrex
($ARG =~ m/^(\s*)def\s+(\w+)/)) {
print 'in python_file_to_python_preparsed(), have function, starting header', "\n";
#die 'TMP DEBUG, FUNCTION HEADER';
# NEED UPGRADE: utilize optional trailing comment $6, include in generated Perl source code if present
# NEED UPGRADE: utilize optional trailing comment $6, include in generated Perl source code if present
# NEED UPGRADE: utilize optional trailing comment $6, include in generated Perl source code if present
chomp $ARG; # trim trailing newline, if present
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
# if multi-line function header, we must receive the function name as part of the first line,
# in order to pre-parse correctly below
$python_namespace_name = $2;
$python_component =
{
component_type => undef, # set below, either Python::Function or Python::Method or Python::InnerFunction
decorators => '', # empty value means no declared decorators; possibly set below
indentation => $1, # empty match returns empty string '', not undef
symbol => $python_namespace_name, # set now to get non-scoped symbol
symbol_scoped => undef, # set below, possibly-scoped symbol
arguments => '', # empty value means no declared arguments; possibly set below
return_type => '', # empty value means no declared return type; possibly set below
python_file_path => $self->{python_file_path},
python_line_number_begin => $python_line_number,
python_line_number_end => -1, # negative value means we are currently inside multi-line component
python_line_number_end_header => -1, # negative value means we are currently inside multi-line header; possibly set below
python_source_code => $ARG, # only function header, remaining source code will be nested in python_preparsed below
python_preparsed => [], # nested pre-parsed data structures of all source code inside this function
python_preparsed_decorators => [], # nested pre-parsed data structures of all decorators above this function
perl_source_code => undef # functions are not translated during pre-parse phase
};
# if all function header sub-components have been received, then accept them all;
# consider final colon ':' captured in $5 to be the ending character of the function header
if (defined $5) {
print 'in python_file_to_python_preparsed(), have function, ending header', "\n";
if (defined $3) { $python_component->{arguments} = $3; }
if (defined $4) { $python_component->{return_type} = $4; }
# set header ending line number, indicating this is not a multi-line function header
$python_component->{python_line_number_end_header} = $python_line_number;
}
else {
print 'in python_file_to_python_preparsed(), have function, no closing colon found, NOT ending header', "\n";
}
#die 'TMP DEBUG, PARSE FUNCTION HEADER';
# look-back to capture function decorators (@abstractmethod, @staticmethod, etc) on previous lines
while ((scalar @{$python_preparsed_target}) > 0) {
print 'in python_file_to_python_preparsed(), have function, top of look-back loop', "\n";
# pop blank / whitespace / comment lines onto temporary stack,
# to either be captured along with decorators or put back if no decorator encountered
if ($python_preparsed_target->[-1]->isa('Python::Blank') or
$python_preparsed_target->[-1]->isa('Python::Whitespace') or
$python_preparsed_target->[-1]->isa('Python::Comment')) {
print 'in python_file_to_python_preparsed(), have function, in look-back loop, moving blank / whitespace / comment line to temporary stack', "\n";
# utilize as-yet-unused function body python_preparsed as temporary stack
unshift @{$python_component->{python_preparsed}}, (pop @{$python_preparsed_target});
next;
}
# capture function decorators
# @FOO
# @FOO.BAR(scope="function")
if ($python_preparsed_target->[-1]->isa('Python::Unknown') and
($python_preparsed_target->[-1]->{python_source_code} =~ m/^\s*@.+$/)) {
print 'in python_file_to_python_preparsed(), have function, in look-back loop, capturing decorator \'', $python_preparsed_target->[-1]->{python_source_code}, '\' by deleting sleep_seconds & sleep_retry_multiplier & retries_max object properties & re-...
lib/Python/File.pm view on Meta::CPAN
}
else {
# save reference to current function along with all other functions, for easy name-based access
$python_functions->{$python_namespace_name} = $python_preparsed_target->[-1];
}
# being inside this new function increases the namespace stack (deepens the current scope)
push @{$python_namespaces}, $python_preparsed_target->[-1];
next;
}
# start class definitions
# NEED ANSWER: are all Python classes multi-line?
# DEV NOTE: can have whitespace around commas
# DEV NOTE: can have whitespace but NOT newline in between class name and open parentheses
# DEV NOTE: can have whitespace but NOT newline in between close parentheses and colon
# DEV NOTE: can have trailing comma at the end of parent class list
# Python class header examples
# class FOO:
# class FOO(BAR):
# class FOO ( B.AR ) :
# class FOO (
#
# ) :
# class FOO(BAR, BAT, BAX):
# class FOO(B.AR, BA.T, BAX):
# class _FOO (BAR, BAT, BAX=BAY) :
# class FOO ( BAR,
# BAT, BAX,
# ) :
# Pyrex class header examples
# cdef class FOO:
# cdef class FOO(BAR):
# DEV NOTE: either match the entire regex for single-line class header,
# or match only the start of the class header 'class FOO(' and
# start multi-line component to accumulate source code lines until entire regex can be matched
# DEV NOTE, CORRELATION PYFI101: all regex changes must be reflected in both locations,
# the only difference should be the optional trailing comment pattern \s*(?:\#.*\n)?\s*
# which is not in the header-opening regex and is used twice in the header-closing regex;
# $1 $2 $3 $4 $5
elsif ( # Python
($ARG =~ m/^(\s*)class\s+(\w+)\s*(?:\(\s*((?:[\w\.=]+\s*\,\s*)*[\w\.=]+\s*\,?)?\s*\)\s*)?(:)\s*(\#.*)?$/) or
($ARG =~ m/^(\s*)class\s+(\w+)\s*\(/) or
# Pyrex
($ARG =~ m/^(\s*)cdef\s+class\s+(\w+)\s*(?:\(\s*((?:[\w\.=]+(?:\{\{\w+\}\})?[\w\.=]*\s*\,\s*)*[\w\.=]+(?:\{\{\w+\}\})?[\w\.=]*\s*\,?)?\s*\)\s*)?(:)\s*(\#.*)?$/) or
($ARG =~ m/^(\s*)cdef\s+class\s+(\w+)\s*\(/)) {
# NEED UPGRADE: utilize optional trailing comment $5, include in generated Perl source code if present
# NEED UPGRADE: utilize optional trailing comment $5, include in generated Perl source code if present
# NEED UPGRADE: utilize optional trailing comment $5, include in generated Perl source code if present
print 'in python_file_to_python_preparsed(), have class, starting header', "\n";
chomp $ARG; # trim trailing newline, if present
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
# if multi-line class header, we must receive the class name as part of the first line,
# in order to pre-parse correctly below
$python_namespace_name = $2;
$python_component =
{
component_type => undef, # set below, either Python::Class or Python::LocalClass or Python::InnerClass
indentation => $1, # empty match returns empty string '', not undef
symbol => $python_namespace_name,
parents => '', # empty value means no declared parent classes; possibly set below
python_file_path => $self->{python_file_path},
python_line_number_begin => $python_line_number,
python_line_number_end => -1, # negative value means we are currently inside multi-line component
python_line_number_end_header => -1, # negative value means we are currently inside multi-line header; possibly set below
python_source_code => $ARG, # only class header, remaining source code will be nested in python_preparsed below
python_preparsed => [], # nested pre-parsed data structures of all source code inside this class
perl_source_code => undef # classes are not translated during pre-parse phase
};
# if all class header sub-components have been received, then accept them all;
# consider final colon ':' captured in $4 to be the ending character of the class header
if (defined $4) {
print 'in python_file_to_python_preparsed(), have class, ending header', "\n";
if (defined $3) { $python_component->{parents} = $3; }
# set header ending line number, indicating this is not a multi-line class header
$python_component->{python_line_number_end_header} = $python_line_number;
}
else {
print 'in python_file_to_python_preparsed(), have class, no closing colon found, NOT ending header', "\n";
}
#die 'TMP DEBUG, PARSE CLASS HEADER';
# determine if class is a normal class, a local class, or an inner class
if ((scalar @{$python_namespaces}) > 0) {
# prepend all encompassing namespaces to class name, to create scoped class name;
# immediately enclosing component already has scoped name, no need to loop through entire namespace stack
$python_namespace_name = $python_namespaces->[-1]->{symbol_scoped} . '.' . $python_namespace_name;
$python_component->{symbol_scoped} = $python_namespace_name;
if ($python_namespaces->[-1]->isa('Python::Class')) {
# a class defined (nested) inside another class is an inner class
$python_component->{component_type} = 'Python::InnerClass';
push @{$python_preparsed_target}, Python::InnerClass->new($python_component);
print 'in python_file_to_python_preparsed(), Python inner class named \'', $python_namespace_name, '\' defined inside outer class named \'', $python_namespaces->[-1]->{symbol_scoped}, '\'', "\n";
}
elsif ($python_namespaces->[-1]->isa('Python::Function')) {
# a class defined inside a function is a local class
$python_component->{component_type} = 'Python::LocalClass';
push @{$python_preparsed_target}, Python::LocalClass->new($python_component);
print 'in python_file_to_python_preparsed(), Python local class named \'', $python_namespace_name, '\' defined inside function named \'', $python_namespaces->[-1]->{symbol_scoped}, '\'', "\n";
}
else {
print 'in python_file_to_python_preparsed(), have enclosing namespace ', Dumper($python_namespaces->[1]), "\n";
croak 'ERROR EPYFI005a: Unrecognized enclosing namespace, only Functions & Classes accepted; ', Dumper($python_namespaces->[1]), ', croaking';
}
}
else {
# a class defined outside all namespaces (classes or functions) is just a normal class
$python_component->{symbol_scoped} = $python_namespace_name; # scoped symbol is same as non-scoped for normal classes
$python_component->{component_type} = 'Python::Class';
push @{$python_preparsed_target}, Python::Class->new($python_component);
}
# can't have the same class declared twice
if (exists $python_classes->{$python_namespace_name}) {
croak 'ERROR EPYFI005b: Python class named \'', $python_namespace_name, '\' already pre-parsed, croaking';
}
# save reference to current class along with all other classes, for easy name-based access
$python_classes->{$python_namespace_name} = $python_preparsed_target->[-1];
# being inside this new class increases the namespace stack (deepens the current scope)
push @{$python_namespaces}, $python_preparsed_target->[-1];
next;
}
else {
print 'in python_file_to_python_preparsed(), have UNKNOWN line of code', "\n";
chomp $ARG; # trim trailing newline, if present
# update last active character
$python_last_active_character = $self->python_last_active_character_find($python_last_active_character, $ARG);
print 'in python_file_to_python_preparsed(), possibly updated last active character to \'', $python_last_active_character, '\'', "\n";
# ensure we correctly parse all namespaces (functions & classes)
if ($ARG =~ m/^(\s*)def\s+/) {
croak 'ERROR EPYFI006a: Python function with UNKNOWN format, croaking';
}
elsif ($ARG =~ m/^(\s*)class\s+/) {
croak 'ERROR EPYFI006b: Python class with UNKNOWN format, croaking';
}
# DEV NOTE, CORRELATION PYFI102: all Unknown logic in this if-elsif-else block must be copied
# check if previous component was same type
if (((scalar @{$python_preparsed_target}) > 0) and
$python_preparsed_target->[-1]->isa('Python::Unknown')) {
print 'in python_file_to_python_preparsed(), have UNKNOWN line, accumulating', "\n";
# accumulate multiple single-line components into a multi-line component
$python_preparsed_target->[-1]->{python_source_code} .= "\n" . $ARG;
# update ending line number
$python_preparsed_target->[-1]->{python_line_number_end} = $python_line_number;
}
# merge Unknown components when separated by only Blank components
elsif (((scalar @{$python_preparsed_target}) > 1) and
$python_preparsed_target->[-1]->isa('Python::Blank') and
$python_preparsed_target->[-2]->isa('Python::Unknown')) {
print 'in python_file_to_python_preparsed(), have UNKNOWN line preceded by blank line(s) and other UNKNOWN line(s), merging components', "\n";
# merge 3 components into a single component;
# Unknown + Blank + Unknown = Unknown
$python_preparsed_target->[-2]->{python_source_code} .=
"\n" . $python_preparsed_target->[-1]->{python_source_code} . "\n" . $ARG;
# update ending line number
$python_preparsed_target->[-2]->{python_line_number_end} = $python_line_number;
# discard now-redundant Blank component
pop @{$python_preparsed_target};
}
else {
print 'in python_file_to_python_preparsed(), have UNKNOWN line, creating', "\n";
# create new component
push @{$python_preparsed_target},
Python::Unknown->new(
{
component_type => 'Python::Unknown',
python_line_number_begin => $python_line_number,
python_line_number_end => $python_line_number,
python_source_code => $ARG,
perl_source_code => undef # unknown code is not translated during pre-parse phase
});
}
next;
}
}
print 'in python_file_to_python_preparsed(), EOF end of file \'', $self->{python_file_path}, '\'', "\n";
# close file after reading
close($PYTHON_FILE)
or croak 'ERROR EPYFI000b: failed to close Python source code file \'', $self->{python_file_path},
'\' after reading, received OS error message \'', $OS_ERROR, '\', croaking';
# ensure we finish parsing all multi-line function headers
( run in 1.761 second using v1.01-cache-2.11-cpan-140bd7fdf52 )