AI-Embedding
view release on metacpan or search on metacpan
lib/AI/Embedding.pm view on Meta::CPAN
if ($text) {
srand scalar split /\s+/, $text;
}
my @vector;
for (1...$dimension) {
push @vector, rand(2) - 1;
}
return join ',', @vector;
}
# Convert a CSV Embedding into a hashref
sub _make_vector {
my ($self, $embed_string) = @_;
if (!defined $embed_string) {
$self->{'error'} = 'Nothing to compare!';
return;
}
my %vector;
my @embed = split /,/, $embed_string;
for (my $i = 0; $i < @embed; $i++) {
$vector{'feature' . $i} = $embed[$i];
}
return \%vector;
}
# Return a comparator to compare to a set vector
sub comparator {
my($self, $embed) = @_;
$self->{'error'} = '';
my $vector1 = $self->_make_vector($embed);
return sub {
my($embed2) = @_;
my $vector2 = $self->_make_vector($embed2);
return $self->_compare_vector($vector1, $vector2);
};
}
# Compare 2 Embeddings
sub compare {
my ($self, $embed1, $embed2) = @_;
my $vector1 = $self->_make_vector($embed1);
my $vector2;
if (defined $embed2) {
$vector2 = $self->_make_vector($embed2);
} else {
$vector2 = $self->{'comparator'};
}
if (!defined $vector2) {
$self->{'error'} = 'Nothing to compare!';
return;
}
if (scalar keys %$vector1 != scalar keys %$vector2) {
$self->{'error'} = 'Embeds are unequal length';
return;
}
return $self->_compare_vector($vector1, $vector2);
}
# Compare 2 Vectors
sub _compare_vector {
my ($self, $vector1, $vector2) = @_;
my $cs = Data::CosineSimilarity->new;
$cs->add( label1 => $vector1 );
$cs->add( label2 => $vector2 );
return $cs->similarity('label1', 'label2')->cosine;
}
1;
__END__
=encoding utf8
=head1 NAME
AI::Embedding - Perl module for working with text embeddings using various APIs
=head1 VERSION
Version 1.11
=head1 SYNOPSIS
use AI::Embedding;
my $embedding = AI::Embedding->new(
api => 'OpenAI',
key => 'your-api-key'
);
my $csv_embedding = $embedding->embedding('Some sample text');
my $test_embedding = $embedding->test_embedding('Some sample text');
my @raw_embedding = $embedding->raw_embedding('Some sample text');
my $cmp = $embedding->comparator($csv_embedding2);
my $similarity = $cmp->($csv_embedding1);
my $similarity_with_other_embedding = $embedding->compare($csv_embedding1, $csv_embedding2);
=head1 DESCRIPTION
The L<AI::Embedding> module provides an interface for working with text embeddings using various APIs. It currently supports the L<OpenAI|https://www.openai.com> L<Embeddings API|https://platform.openai.com/docs/guides/embeddings/what-are-embeddings>...
Embeddings allow the meaning of passages of text to be compared for similarity. This is more natural and useful to humans than using traditional keyword based comparisons.
An Embedding is a multi-dimensional vector representing the meaning of a piece of text. The Embedding vector is created by an AI Model. The default model (OpenAI's C<text-embedding-ada-002>) produces a 1536 dimensional vector. The resulting vector...
=head2 Comparator
Embeddings are used to compare similarity of meaning between two passages of text. A typical work case is to store a number of pieces of text (e.g. articles or blogs) in a database and compare each one to some user supplied search text. L<AI::Embed...
Alternatively, the C<comparator> method can be called with one Embedding. The C<comparator> returns a reference to a method that takes a single Embedding to be compared to the Embedding from which the Comparator was created.
( run in 0.964 second using v1.01-cache-2.11-cpan-39bf76dae61 )