Regexp-Optimizer

 view release on metacpan or  search on metacpan

lib/Regexp/Optimizer.pm  view on Meta::CPAN

package Regexp::Optimizer;

use 5.008001;
use strict;
use warnings FATAL => 'all';
use Regexp::Assemble;
our $VERSION = sprintf "%d.%02d", q$Revision: 0.23 $ =~ /(\d+)/g;

my $re_nested;
$re_nested = qr{
  \(                   # open paren
  ((?:                 # start capture  
    (?>[^()]+)       | # Non-parens w/o backtracking or ...
    (??{ $re_nested }) # Group with matching parens
  )*)                  # end capture
  \)                   # close paren
}msx;

my $re_optimize = qr{(?<=[^\\])\|}ms;

sub new {
    my $class = shift;
    bless {@_}, $class;
}

sub _assemble {
    my $str = shift;
    return $str if $str !~ $re_optimize;
    if ( $str !~ m/[(]/ms ) {
        my $ra = Regexp::Assemble->new();
        $ra->add( split m{[|]}, $str );
        return $ra->as_string;
    }
    $str =~ s{$re_nested}{
        no warnings 'uninitialized';
        my $sub = $1;
        if ($sub =~ m/\A\?(?:[\?\{\(PR]|[\+\-]?[0-9])/ms) {
            "($sub)";  # (?{CODE}) and like ruled out
        }else{
            my $mod = ($sub =~ s/\A\?//) ? '?' : '';
            if ($mod) {
                $sub =~ s{\A(
                              [\w\^\-]*: | # modifier
                              [<]?[=!]   | # assertions
                              [<]\w+[>]  | # named capture
                              [']\w+[']  | # ditto
                              [|]          # branch reset
                          )
                     }{}msx;
                $mod .= $1;
            }
            '(' . $mod . _assemble($sub) . ')'
        }
    }msxge;
    $str;
}

sub as_string {
    my ( $self, $str ) = @_;
    return $str if $str !~ $re_optimize;
    my ($mod) = ($str =~ m/\A\(\?(.*?):/);
    if ( $mod =~ /x/ ) {
        $str =~ s{^\s+}{}mg;
        $str =~ s{(?<=[^\\])\s*?#.*?$}{}mg;
        $str =~ s{\s+[|]\s+}{|}mg;
        $str =~ s{(?:\r\n?|\n)}{}msg;
        $str =~ s{[ ]+}{ }msgx;
        # warn $str;
    }
    # escape all occurance of '\(' and '\)'
    $str =~ s/\\([\(\)])/sprintf "\\x%02x" , ord $1/ge;
    _assemble($str);
}

sub optimize {
    my $self = shift;
    my $re   = $self->as_string(shift);
    qr{$re};
}

1; # End of Regexp::Optimizer

__END__

=head1 NAME

Regexp::Optimizer - optimizes regular expressions

=head1 VERSION

$Id: Optimizer.pm,v 0.23 2013/02/26 05:47:41 dankogai Exp dankogai $



( run in 1.545 second using v1.01-cache-2.11-cpan-71847e10f99 )