Algorithm-LCSS

 view release on metacpan or  search on metacpan

LCSS.pm  view on Meta::CPAN

package Algorithm::LCSS;

use 5.006;
use strict;
use warnings;
use Algorithm::Diff qw(traverse_sequences);
require Exporter;
use vars qw( @ISA @EXPORT_OK $VERSION );
our @ISA = qw(Exporter);
@EXPORT_OK = qw( LCSS CSS CSS_Sorted );
$VERSION = '0.01';

sub _tokenize { [split //, $_[0]] }

sub CSS {
    my $is_array = ref $_[0] eq 'ARRAY' ? 1 : 0;
    my ( $seq1, $seq2, @match, $from_match );
    my $i = 0;
    if ( $is_array ) {
        $seq1 = $_[0];
        $seq2 = $_[1];
        traverse_sequences( $seq1, $seq2, {
            MATCH => sub { push @{$match[$i]}, $seq1->[$_[0]]; $from_match = 1 },
            DISCARD_A => sub { do{$i++; $from_match = 0} if $from_match },
            DISCARD_B => sub { do{$i++; $from_match = 0} if $from_match },
        });
    }
    else {
        $seq1 = _tokenize($_[0]);
        $seq2 = _tokenize($_[1]);
        traverse_sequences( $seq1, $seq2, {
            MATCH => sub { $match[$i] .= $seq1->[$_[0]]; $from_match = 1 },
            DISCARD_A => sub { do{$i++; $from_match = 0} if $from_match },
            DISCARD_B => sub { do{$i++; $from_match = 0} if $from_match },
        });
    }
  return \@match;
}

sub CSS_Sorted {
    my $match = CSS(@_);
    if ( ref $_[0] eq 'ARRAY' ) {
       @$match = map{$_->[0]}sort{$b->[1]<=>$a->[1]}map{[$_,scalar(@$_)]}@$match
    }
    else {
       @$match = map{$_->[0]}sort{$b->[1]<=>$a->[1]}map{[$_,length($_)]}@$match
    }
  return $match;
}

sub LCSS {
    my $is_array = ref $_[0] eq 'ARRAY' ? 1 : 0;
    my $css = CSS(@_);
    my $index;
    my $length = 0;
    if ( $is_array ) {
        for( my $i = 0; $i < @$css; $i++ ) {
            next unless @{$css->[$i]}>$length;
            $index = $i;
            $length = @{$css->[$i]};
        }
    }
    else {
        for( my $i = 0; $i < @$css; $i++ ) {
            next unless length($css->[$i])>$length;
            $index = $i;
            $length = length($css->[$i]);
        }
    }
  return $css->[$index];
}

1;
__END__

=head1 NAME

Algorithm::LCSS - Perl extension for getting the Longest Common Sub-Sequence

=head1 SYNOPSIS

    use Algorithm::LCSS qw( LCSS CSS CSS_Sorted );
    my $lcss_ary_ref = LCSS( \@SEQ1, \@SEQ2 );  # ref to array
    my $lcss_string  = LCSS( $STR1, $STR2 );    # string
    my $css_ary_ref = CSS( \@SEQ1, \@SEQ2 );    # ref to array of arrays
    my $css_str_ref = CSS( $STR1, $STR2 );      # ref to array of strings
    my $css_ary_ref = CSS_Sorted( \@SEQ1, \@SEQ2 );  # ref to array of arrays
    my $css_str_ref = CSS_Sorted( $STR1, $STR2 );    # ref to array of strings

=head1 DESCRIPTION

This module uses Algoritm::Diff to implement LCSS and is orders of magnitude
faster than String::LCSS.

If you pass the methods array refs you get back array (ref) format data. If
you pass strings you get a string or a ref to an array of strings.

=head1 METHODS

=head2 LCSS

Returns the longest common sub sequence. If there may be more than one (with
exactly the same length) and it matters use CSS instead.

    my $lcss_ary_ref = LCSS( \@SEQ1, \@SEQ2 );  # ref to array
    my $lcss_string  = LCSS( $STR1, $STR2 );    # string

=head2 CSS

Returns all the common sub sequences, unsorted.

    my $css_ary_ref = CSS( \@SEQ1, \@SEQ2 );  # ref to array of arrays
    my $css_str_ref = CSS( $STR1, $STR2 );    # ref to array of strings

=head2 CSS_Sorted

Returns all the common sub strings, sorted from longest to shortest CSS.

    my $css_ary_ref = CSS_Sorted( \@SEQ1, \@SEQ2 );  # ref to array of arrays
    my $css_str_ref = CSS_Sorted( $STR1, $STR2 );    # ref to array of strings

=head1 EXPORT

None by default.

=head1 AUTHOR

Dr James Freeman <james.freeman@id3.org.uk>

=head1 SEE ALSO

L<perl>.

=cut



( run in 0.683 second using v1.01-cache-2.11-cpan-140bd7fdf52 )