Algorithm-MLCS
view release on metacpan or search on metacpan
lib/Algorithm/MLCS.pm view on Meta::CPAN
package Algorithm::MLCS;
use strict;
use warnings FATAL => 'all';
use vars qw/ $VERSION @ISA @EXPORT /;
require Exporter;
@ISA = qw/ Exporter /;
@EXPORT = qw/ lcs /;
$VERSION = '1.02';
# Gets arrayref of sequences (arrayrefs) and return LCS array in list context
# or length of LCS in scalar context
sub lcs {
my ( @seq, @lcs ) = map { _build_seq($_) } _get_dict( $_[0] );
while ( @seq && !( grep { !@$_ } @seq ) ) {
my %dict = ( %{ $seq[0][0] } );
for my $s ( @seq[ 1 .. $#seq ] ) {
%dict = map {
$_ => $dict{$_} > $s->[0]{$_}
? $s->[0]{$_} : $dict{$_}
} grep { $s->[0]{$_} } keys %dict;
}
last unless %dict;
push @lcs, ( sort { $dict{$b} <=> $dict{$a} } keys %dict )[0];
for (@seq) {
while (@$_) { last if @$_ == ( shift @$_ )->{ $lcs[-1] } }
}
}
wantarray ? @lcs : scalar @lcs;
}
# Auxiliary function that gets single sequence arrayref and
# build specific data structure for further processing
# in order to find LCS
sub _build_seq {
my ( $seq, %dict, @seq_st ) = @_;
for ( 0 .. $#{$seq} ) { push @{ $dict{ $seq->[$_] } }, $_ }
for my $i ( 0 .. $#{$seq} ) {
my %tok;
for ( keys %dict ) {
$tok{$_} = @{$seq} - $dict{$_}[0];
if ( $dict{$_}[0] == $i ) {
shift @{ $dict{$_} };
delete $dict{$_} if !@{ $dict{$_} };
}
}
$seq_st[$i] = \%tok;
}
return \@seq_st;
}
# Auxiliary function that gets arrayref of sequences (arrayrefs),
# builds dictionary of unique tokens presented in all given sequences
# and returns the arrayref of new sequences with only tokens from dictionary
sub _get_dict {
my $seq = shift;
my %dict = map { $_ => 1 } @{ $seq->[0] };
for ( @{$seq}[ 1 .. $#{$seq} ] ) {
%dict = map { $_ => 1 } grep { $dict{$_} } @$_;
last unless %dict;
}
return map { [ grep { $dict{$_} } @$_ ] } @{$seq};
}
1;
=head1 NAME
Algorithm::MLCS - Fast heuristic algorithm for finding Longest Common Subsequence
of multiple sequences
=head1 VERSION
Version 1.02
=head1 SYNOPSIS
use Data::Dumper;
use Algorithm::MLCS;
my @seqs = (
[ qw/a b c d f g h j q z/ ],
[ qw/a b c d f g h j q z/ ],
[ qw/a b c x f h j q z/ ],
[ qw/a b c f g j q z/ ],
);
my @lcs = lcs( \@seqs );
( run in 3.432 seconds using v1.01-cache-2.11-cpan-524268b4103 )