Algorithm-MLCS

 view release on metacpan or  search on metacpan

lib/Algorithm/MLCS.pm  view on Meta::CPAN

package Algorithm::MLCS;

use strict;
use warnings FATAL => 'all';

use vars qw/ $VERSION @ISA @EXPORT /;

require Exporter;

@ISA     = qw/ Exporter /;
@EXPORT  = qw/ lcs /;
$VERSION = '1.02';

# Gets arrayref of sequences (arrayrefs) and return LCS array in list context
# or length of LCS in scalar context
sub lcs {
    my ( @seq, @lcs ) = map { _build_seq($_) } _get_dict( $_[0] );

    while ( @seq && !( grep { !@$_ } @seq ) ) {
        my %dict = ( %{ $seq[0][0] } );

        for my $s ( @seq[ 1 .. $#seq ] ) {
            %dict = map {
                      $_ => $dict{$_} > $s->[0]{$_}
                    ? $s->[0]{$_} : $dict{$_}
            } grep { $s->[0]{$_} } keys %dict;
        }

        last unless %dict;

        push @lcs, ( sort { $dict{$b} <=> $dict{$a} } keys %dict )[0];

        for (@seq) {
            while (@$_) { last if @$_ == ( shift @$_ )->{ $lcs[-1] } }
        }
    }

    wantarray ? @lcs : scalar @lcs;
}

# Auxiliary function that gets single sequence arrayref and
# build specific data structure for further processing
# in order to find LCS
sub _build_seq {
    my ( $seq, %dict, @seq_st ) = @_;

    for ( 0 .. $#{$seq} ) { push @{ $dict{ $seq->[$_] } }, $_ }

    for my $i ( 0 .. $#{$seq} ) {
        my %tok;
        for ( keys %dict ) {
            $tok{$_} = @{$seq} - $dict{$_}[0];
            if ( $dict{$_}[0] == $i ) {
                shift @{ $dict{$_} };
                delete $dict{$_} if !@{ $dict{$_} };
            }
        }
        $seq_st[$i] = \%tok;
    }

    return \@seq_st;
}

# Auxiliary function that gets arrayref of sequences (arrayrefs),
# builds dictionary of unique tokens presented in all given sequences
# and returns the arrayref of new sequences with only tokens from dictionary
sub _get_dict {
    my $seq = shift;
    my %dict = map { $_ => 1 } @{ $seq->[0] };

    for ( @{$seq}[ 1 .. $#{$seq} ] ) {
        %dict = map { $_ => 1 } grep { $dict{$_} } @$_;
        last unless %dict;
    }

    return map { [ grep { $dict{$_} } @$_ ] } @{$seq};
}

1;

=head1 NAME

Algorithm::MLCS - Fast heuristic algorithm for finding Longest Common Subsequence
of multiple sequences

=head1 VERSION

Version 1.02

=head1 SYNOPSIS

    use Data::Dumper;
    use Algorithm::MLCS;

    my @seqs = (
        [ qw/a b c d f g h j q z/ ],
        [ qw/a b c d f g h j q z/ ],
        [ qw/a b c x f h j q z/   ],
        [ qw/a b c f g j q z/     ],
    );

    my @lcs = lcs( \@seqs );



( run in 3.432 seconds using v1.01-cache-2.11-cpan-524268b4103 )