Acme-Pythonic

 view release on metacpan or  search on metacpan

lib/Acme/Pythonic.pm  view on Meta::CPAN

# me know! The versions of Filter::Simple, Text::Tabs, and Test::More
# would be useful as well.
use 5.006_001;
use strict;
use warnings;

our ($VERSION, $DEBUG, $CALLER);
$VERSION = '0.47';

use Text::Tabs;

sub import {
    my ($package, %cfg) = @_;
    $DEBUG = $cfg{debug};
    $CALLER = caller() # to be able to check sub prototypes
}


use Filter::Simple;
FILTER_ONLY code => sub {
    unpythonize();
    cuddle_elses_and_friends();
    if ($DEBUG) {
        s/$Filter::Simple::placeholder/BLANKED_OUT/g;
        print;
        $_ = '1;';
    }
};


# This regexp matches a 7-bit ASCII identifier. We use atomic grouping
# because an identifier cannot be backtracked.
my $id = qr/(?>[_a-zA-Z](?:[_a-zA-Z0-9']|::)*)/;

# Shorthand to put an eventual trailing comment in some regexps.
my $tc = qr/(?<!\$)#.*/;


# Tries its best at converting Pythonic code to Perl.
sub unpythonize {
    # Sometimes Filter::Simple adds newlines blanking out stuff, which
    # interferes with Pythonic conventions.
    my %bos = (); # BlanketOutS
    my $count = 0;
    s<$Filter::Simple::placeholder>
     <my $bo = "$;BLANKED_OUT_".$count++."$;";
      $bos{$bo} = $&;
      $bo>geo;

    # In addition, we can now normalize newlines without breaking
    # Filter::Simple's identifiers.
    normalize_newlines();
    my @lines = split /\n/;
    return unless @lines;

    # If unsure about the ending indentation level, add an extra
    # non-indented line to ensure the stack gets emptied.
    push @lines, '1; # added by Acme::Pythonic' if $lines[-1] =~ /^(?:\s|\s*#)/;

    my ($comment,             # comment in the current line, if any
        $indent,              # indentation of the current logical line
        $id_at_sob,           # identifier at StartOfBlock, for instance "else", or "eval"
        $prev_line_with_code, # previous line with code
        $might_be_modifier,   # flag: current logical line might be a modifier
        $line_with_modifier,  # physical line which started the current modifier
        $joining,             # flag: are we joining lines?
        $unbalanced_paren,    # flag: we opened a paren that remains to be closed
        @stack,               # keeps track of indentation stuff
    );

    @stack = ();
    foreach my $line (@lines) {
        # We remove any trailing comment so that we can assert stuff
        # easily about the end of the code in this line. It is later
        # appended back in the continue block below.
        $comment = $line =~ s/(\s*$tc)//o ? $1 : '';
        next if $line =~ /^\s*$/;

        if (!$joining) {
            $unbalanced_paren = left_parenthesize($line);
            $might_be_modifier = $line =~ /^\s*(?:if|unless|while|until|for|foreach)\b/;
            $line_with_modifier = \$line if $might_be_modifier;
            ($indent) = $line =~ /^(\s*)/;
            $indent = length(expand($indent));
        }

        if ($line =~ /(?:,|=>)\s*$/ || $line =~ s/\\\s*$//) {
            ++$joining;
            next if $joining > 1; # if 1 we need yet to handle indentation
        } else {
            $joining = 0;
        }

        # Handle trailing colons, which can be Pythonic, mark a labeled
        # block, mean some map, or &-sub call, etc.
        #
        # We check the parity of the number of ending colons to try to
        # avoid breaking things like
        #
        #    print for keys %main::
        #
        my $bracket_opened_by = '';
        if ($line =~ /(:+)$/ && length($1) % 2) {
            $might_be_modifier = 0;
            # We perform some checks because labels have to keep their colon.
            if ($line !~ /^\s*$id:$/o ||
                $line =~ /[[:lower:]]/ || # labels are not allowed to have lower-case letters
                $line =~ /^\s*(?:BEGIN|CHECK|INIT|END):$/) {
                chop $line;
                if ($unbalanced_paren) {
                    $line .= ")";
                    $unbalanced_paren = 0;
                } else {
                    ($bracket_opened_by) = $line =~ /($id)\s*$/o;
                }
            }
        } elsif (!$joining) {
            $$line_with_modifier =~ s/\(// if $might_be_modifier;
            $unbalanced_paren = 0;
            $line .= ';';
        }

        # Handle indentation. Language::Pythonesque was the basis of
        # this code.

lib/Acme/Pythonic.pm  view on Meta::CPAN

means we are dealing with C<$_> as usual:

    foreach in @array:
        s/foo/bar/

but can't be used when the loop acts as a modifier:

    print foreach in @array # ERROR

This keyword is not supported as sequence membership operator.

=head2 &-Prototyped subroutines

C<&>-prototyped subroutines can be used like this:

    sub mygrep (&@):
        my $code = shift
        my @result
        foreach @_:
            push @result, $_ if &$code
        return @result

    @array = mygrep:
        my $aux = $_
        $aux *= 3
        $aux += 1
        $aux % 2
    reverse 0..5

If the prototype is exactly C<&>, however, Acme::Pythonic needs to know
it in advance. Thus, if any module defines such a subroutine C<use()> it
I<before> Acme::Pythonic:

    use Thread 'async';
    use Acme::Pythonic; # now Acme::Pythonic knows async() has prototype "&"

    async:
        do_this()
        do_that()

If such a subroutine is defined in the very code being filtered we need to
declare it before Acme::Pythonic is C<use()>d:

    sub twice (&);      # declaration
    use Acme::Pythonic; # now Acme::Pythonic knows twice() has prototype "&"

    # the definition itself can be Pythonic
    sub twice (&):
         my $code = shift
         $code->() for 1..2

    twice:
         do_this_twice()

Nevertheless, the module is not smart enough to handle optional arguments as in
a subroutine with prototype C<&;$>.


=head2 Line joining

As in Python, you can break a logical line in several physical lines
using a backslash at the end:

    my $total = total_products() + \
                total_delivery() + \
                total_taxes()

and in that case the indentation of those additional lines is irrelevant.

Unlike Python, backslashes in a line with a comment are allowed

    my $foo = 1 + \  # comment, no problem
        2

If a line ends in a comma or arrow (C<< => >>) it is conceptually joined
with the following as well:

    my %authors = (Perl   => "Larry Wall",
                   Python => "Guido van Rossum")

As in Python, comments can be intermixed there:

    my %hello = (Catalan => 'Hola',   # my mother tongue
                 English => 'Hello',)


Acme::Pythonic munges a source that has already been processed by L<Filter::Simple>. In particular, L<Filter::Simple> blanks out quotelikes whose content is not even seen by Acme::Pythonic so backslashes in C<qw//> and friends won't be removed:

    # Do not put backslashes here because qw// is bypassed
    my @colors = qw(Red
                    Blue
                    Green)


=head1 CAVEATS

Although this module makes possible some Python-like syntax in Perl,
there are some remarkable limitations in the current implementation:

=over 4

=item * Compound statement bodies are not recognized in header
lines. This would be valid according to Python syntax:

    if $n % 2: $n = 3*$n + 1
    else: $n /= 2

but it does not work in Acme::Pythonic. The reason for this is that it
would be hard to identify the colon that closes the expression without
parsing Perl, consider for instance:

    if keys %foo::bar ? keys %main:: : keys %foo::: print "foo\n"

=item * In Python statements may span lines if they're enclosed in
C<()>, C<{}>, or C<[]> pairs. Acme::Pythonic does not support this rule,
however, though it understands the common case where you break the line
in a comma in list literals, subroutine calls, etc.

=back

Remember that source filters do not work if they are called at runtime,



( run in 2.456 seconds using v1.01-cache-2.11-cpan-e1769b4cff6 )