Regexp-Optimizer
view release on metacpan or search on metacpan
lib/Regexp/Optimizer.pm view on Meta::CPAN
package Regexp::Optimizer;
use 5.008001;
use strict;
use warnings FATAL => 'all';
use Regexp::Assemble;
our $VERSION = sprintf "%d.%02d", q$Revision: 0.23 $ =~ /(\d+)/g;
my $re_nested;
$re_nested = qr{
\( # open paren
((?: # start capture
(?>[^()]+) | # Non-parens w/o backtracking or ...
(??{ $re_nested }) # Group with matching parens
)*) # end capture
\) # close paren
}msx;
my $re_optimize = qr{(?<=[^\\])\|}ms;
sub new {
my $class = shift;
bless {@_}, $class;
}
sub _assemble {
my $str = shift;
return $str if $str !~ $re_optimize;
if ( $str !~ m/[(]/ms ) {
my $ra = Regexp::Assemble->new();
$ra->add( split m{[|]}, $str );
return $ra->as_string;
}
$str =~ s{$re_nested}{
no warnings 'uninitialized';
my $sub = $1;
if ($sub =~ m/\A\?(?:[\?\{\(PR]|[\+\-]?[0-9])/ms) {
"($sub)"; # (?{CODE}) and like ruled out
}else{
my $mod = ($sub =~ s/\A\?//) ? '?' : '';
if ($mod) {
$sub =~ s{\A(
[\w\^\-]*: | # modifier
[<]?[=!] | # assertions
[<]\w+[>] | # named capture
[']\w+['] | # ditto
[|] # branch reset
)
}{}msx;
$mod .= $1;
}
'(' . $mod . _assemble($sub) . ')'
}
}msxge;
$str;
}
sub as_string {
my ( $self, $str ) = @_;
return $str if $str !~ $re_optimize;
my ($mod) = ($str =~ m/\A\(\?(.*?):/);
if ( $mod =~ /x/ ) {
$str =~ s{^\s+}{}mg;
$str =~ s{(?<=[^\\])\s*?#.*?$}{}mg;
$str =~ s{\s+[|]\s+}{|}mg;
$str =~ s{(?:\r\n?|\n)}{}msg;
$str =~ s{[ ]+}{ }msgx;
# warn $str;
}
# escape all occurance of '\(' and '\)'
$str =~ s/\\([\(\)])/sprintf "\\x%02x" , ord $1/ge;
_assemble($str);
}
sub optimize {
my $self = shift;
my $re = $self->as_string(shift);
qr{$re};
}
1; # End of Regexp::Optimizer
__END__
=head1 NAME
Regexp::Optimizer - optimizes regular expressions
=head1 VERSION
$Id: Optimizer.pm,v 0.23 2013/02/26 05:47:41 dankogai Exp dankogai $
( run in 1.545 second using v1.01-cache-2.11-cpan-71847e10f99 )