Acme-Pythonic
view release on metacpan or search on metacpan
lib/Acme/Pythonic.pm view on Meta::CPAN
FILTER_ONLY code => sub {
unpythonize();
cuddle_elses_and_friends();
if ($DEBUG) {
s/$Filter::Simple::placeholder/BLANKED_OUT/g;
print;
$_ = '1;';
}
};
# This regexp matches a 7-bit ASCII identifier. We use atomic grouping
# because an identifier cannot be backtracked.
my $id = qr/(?>[_a-zA-Z](?:[_a-zA-Z0-9']|::)*)/;
# Shorthand to put an eventual trailing comment in some regexps.
my $tc = qr/(?<!\$)#.*/;
# Tries its best at converting Pythonic code to Perl.
sub unpythonize {
# Sometimes Filter::Simple adds newlines blanking out stuff, which
# interferes with Pythonic conventions.
my %bos = (); # BlanketOutS
my $count = 0;
s<$Filter::Simple::placeholder>
<my $bo = "$;BLANKED_OUT_".$count++."$;";
$bos{$bo} = $&;
$bo>geo;
# In addition, we can now normalize newlines without breaking
# Filter::Simple's identifiers.
normalize_newlines();
my @lines = split /\n/;
return unless @lines;
# If unsure about the ending indentation level, add an extra
# non-indented line to ensure the stack gets emptied.
push @lines, '1; # added by Acme::Pythonic' if $lines[-1] =~ /^(?:\s|\s*#)/;
my ($comment, # comment in the current line, if any
$indent, # indentation of the current logical line
$id_at_sob, # identifier at StartOfBlock, for instance "else", or "eval"
$prev_line_with_code, # previous line with code
$might_be_modifier, # flag: current logical line might be a modifier
$line_with_modifier, # physical line which started the current modifier
$joining, # flag: are we joining lines?
$unbalanced_paren, # flag: we opened a paren that remains to be closed
@stack, # keeps track of indentation stuff
);
@stack = ();
foreach my $line (@lines) {
# We remove any trailing comment so that we can assert stuff
# easily about the end of the code in this line. It is later
# appended back in the continue block below.
$comment = $line =~ s/(\s*$tc)//o ? $1 : '';
next if $line =~ /^\s*$/;
if (!$joining) {
$unbalanced_paren = left_parenthesize($line);
$might_be_modifier = $line =~ /^\s*(?:if|unless|while|until|for|foreach)\b/;
$line_with_modifier = \$line if $might_be_modifier;
($indent) = $line =~ /^(\s*)/;
$indent = length(expand($indent));
}
if ($line =~ /(?:,|=>)\s*$/ || $line =~ s/\\\s*$//) {
++$joining;
next if $joining > 1; # if 1 we need yet to handle indentation
} else {
$joining = 0;
}
# Handle trailing colons, which can be Pythonic, mark a labeled
# block, mean some map, or &-sub call, etc.
#
# We check the parity of the number of ending colons to try to
# avoid breaking things like
#
# print for keys %main::
#
my $bracket_opened_by = '';
if ($line =~ /(:+)$/ && length($1) % 2) {
$might_be_modifier = 0;
# We perform some checks because labels have to keep their colon.
if ($line !~ /^\s*$id:$/o ||
$line =~ /[[:lower:]]/ || # labels are not allowed to have lower-case letters
$line =~ /^\s*(?:BEGIN|CHECK|INIT|END):$/) {
chop $line;
if ($unbalanced_paren) {
$line .= ")";
$unbalanced_paren = 0;
} else {
($bracket_opened_by) = $line =~ /($id)\s*$/o;
}
}
} elsif (!$joining) {
$$line_with_modifier =~ s/\(// if $might_be_modifier;
$unbalanced_paren = 0;
$line .= ';';
}
# Handle indentation. Language::Pythonesque was the basis of
# this code.
my $prev_indent = @stack ? $stack[-1]{indent} : 0;
if ($prev_indent < $indent) {
push @stack, {indent => $indent, id_at_sob => $id_at_sob};
$$prev_line_with_code .= " {" unless $$prev_line_with_code =~ s/(?=\s*$tc)/ {/o;
} elsif ($prev_indent > $indent) {
do {
my $prev_id_at_sob = $stack[-1]{id_at_sob};
pop @stack;
$prev_indent = @stack ? $stack[-1]{indent} : 0;
$$prev_line_with_code .= "\n" . ((' ' x $prev_indent) . "}");
$$prev_line_with_code .= ";" if needs_semicolon($prev_id_at_sob);
} while $prev_indent > $indent;
$$prev_line_with_code =~ s/;$/ / if $might_be_modifier;
}
$id_at_sob = $bracket_opened_by;
} continue {
$line =~ s/^\s*pass;?\s*$//;
$prev_line_with_code = \$line if !$joining && $line =~ /\S/;
$line .= $comment;
}
$_ = join "\n", @lines;
s/$;BLANKED_OUT_\d+$;/$bos{$&}/go;
}
# In the trials I've done seems like the Python interpreter understands
# any of the three conventions, even if they are not the ones in the
# platform, and even if they are mixed in the same file.
#
# In addition, it guarantees make test works no matter the platform.
sub normalize_newlines {
s/\015\012/\n/g;
tr/\015/\n/ unless "\n" eq "\015";
tr/\012/\n/ unless "\n" eq "\012";
}
# Put an opening paren in the places we forgive parens. It will be later
# closed or removed as needed in the main subroutine.
sub left_parenthesize {
$_[0] =~ s/^(\s*\b(?:if|elsif|unless)\b\s*)/$1(/ ||
$_[0] =~ s/^(\s*(?:$id\s*:)?\s*\b(?:while|until)\b(\s*))/$2 eq '' ? "$1 (" : "$1("/eo ||
$_[0] =~ s/^(\s*(?:$id\s*:\s*)?\bfor(?:each)?\b\s*)(.*)/fortype_guesser($1,$2)/oxe
}
# Tries its best at guessing a for(each) type or, at least, where to put
# the opening paren.
#
# Returns a string which is a copy of the original with the paren
# inserted.
sub fortype_guesser {
my ($for, $rest) = @_;
my $guess = "";
# Try to match "for VAR in LIST", and "for VAR LIST"
if ($rest =~ m/^((?:my|our)? \s* \$ $id\s+) in\s* ((?: (?:[\$\@%&\\]) | (?:\b\w) ) .*)$/ox ||
$rest =~ m/^((?:my|our)? \s* \$ $id\s*) ((?: (?:[\$\@%&\\]) | (?:\b\w) ) .*)$/ox) {
$guess = "$for$1($2";
} else {
# We are not sure whether this is a for or a foreach, but it is
# very likely that putting parens around gets it right.
$rest =~ s/^\s*in\b//; # fixes "foreach in LIST"
$guess = "$for($rest";
}
return $guess;
}
# Guesses whether a block started by $id_at_sob needs a semicolon after the
# ending bracket.
sub needs_semicolon {
my $id_at_sob = shift;
return 0 if !$id_at_sob;
return 1 if $id_at_sob =~ /^(do|sub|eval)$/;
my $proto = $id_at_sob =~ /::/ ? prototype($id_at_sob) : prototype("${CALLER}::$id_at_sob");
return 0 if not defined $proto;
return $proto =~ /^;?&$/;
}
# We follow perlstyle here, as we did until now.
sub cuddle_elses_and_friends {
s/^([ \t]*})\s*(?=(?:elsif|else|continue)\b)/$1 /gm;
s/^([ \t]*})\s*(?=(?:if|unless|while|until|for|foreach)\b(?!.*{$tc?$))/$1 /gm;
}
1;
__END__
=head1 NAME
Acme::Pythonic - Python whitespace conventions for Perl
=head1 SYNOPSIS
use Acme::Pythonic; # this semicolon yet needed
sub delete_edges:
my $G = shift
while my ($u, $v) = splice(@_, 0, 2):
if defined $v:
$G->delete_edge($u, $v)
else:
my @e = $G->edges($u)
while ($u, $v) = splice(@e, 0, 2):
$G->delete_edge($u, $v)
=head1 DESCRIPTION
Acme::Pythonic brings Python whitespace conventions to Perl. Just C<use>
it and Pythonic code will become valid on the fly. No file is generated,
no file is modified.
This module is thought for those who embrace contradictions. A humble
contribution for walkers of the Whitespace Matters Way in their pursuit
of highest realization, only attained with L<SuperPython>.
=head1 OVERVIEW
Acme::Pythonic provides I<grosso modo> these conventions:
=over 4
=item * Blocks are marked by indentation and an opening colon instead of
braces.
=item * Simple statements are separated by newlines instead of
semicolons.
=item * EXPRs in control flow structures do not need parentheses around.
=back
Additionally, the filter understands the keywords C<pass> and C<in>.
for my $n in 1..100:
while $n != 1:
if $n % 2:
$n = 3*$n + 1
else:
$n /= 2
=head1 DETAILS
=head2 Labels
The syntax this module provides introduces an ambiguity: Given
if $flag:
do_this()
else:
do_that()
there's no way to know whether that is meant to be
if ($flag) {
do_this();
} else {
do_that();
}
or rather
if ($flag) {
do_this();
}
else: {
do_that();
}
The former is a regular if/else, whereas the latter consists of an if and a
labeled block, so C<do_that()> is unconditionally executed.
To solve this B<labels in Pythonic code have to be in upper case>.
In addition, to be able to write BEGIN blocks and friends this way:
BEGIN:
$foo = 3
C<BEGIN>, C<CHECK>, C<INIT>, C<END> cannot be used as labels.
Let's see some examples. This is the Pythonic version of the snippet in L<perlsyn>:
OUTER: for my $wid in @ary1:
INNER: for my $jet in @ary2:
next OUTER if $wid > $jet
$wid += $jet
And here we have a labeled block:
( run in 1.048 second using v1.01-cache-2.11-cpan-e1769b4cff6 )