AI-Embedding
view release on metacpan or search on metacpan
lib/AI/Embedding.pm view on Meta::CPAN
use HTTP::Tiny;
use JSON::PP;
use Data::CosineSimilarity;
our $VERSION = '1.11';
$VERSION = eval $VERSION;
my $http = HTTP::Tiny->new;
# Create Embedding object
sub new {
my $class = shift;
my %attr = @_;
$attr{'error'} = '';
$attr{'api'} = 'OpenAI' unless $attr{'api'};
$attr{'error'} = 'Invalid API' unless $attr{'api'} eq 'OpenAI';
$attr{'error'} = 'API Key missing' unless $attr{'key'};
$attr{'model'} = 'text-embedding-ada-002' unless $attr{'model'};
lib/AI/Embedding.pm view on Meta::CPAN
my %url = (
'OpenAI' => 'https://api.openai.com/v1/embeddings',
);
# Define HTTP Headers for APIs
my %header = (
'OpenAI' => &_get_header_openai,
);
# Returns true if last operation was success
sub success {
my $self = shift;
return !$self->{'error'};
}
# Returns error if last operation failed
sub error {
my $self = shift;
return $self->{'error'};
}
# Header for calling OpenAI
sub _get_header_openai {
my $self = shift;
$self->{'key'} = '' unless defined $self->{'key'};
return {
'Authorization' => 'Bearer ' . $self->{'key'},
'Content-type' => 'application/json'
};
}
# Fetch Embedding response
sub _get_embedding {
my ($self, $text) = @_;
my $response = $http->post($url{$self->{'api'}}, {
'headers' => {
'Authorization' => 'Bearer ' . $self->{'key'},
'Content-type' => 'application/json'
},
content => encode_json {
input => $text,
model => $self->{'model'},
lib/AI/Embedding.pm view on Meta::CPAN
die 'Incorrect API Key - check your API Key is correct';
}
return $response;
}
# TODO:
# Make 'headers' use $header{$self->{'api'}}
# Currently hard coded to OpenAI
# Added purely for testing - IGNORE!
sub _test {
my $self = shift;
# return $self->{'api'};
return $header{$self->{'api'}};
}
# Return Embedding as a CSV string
sub embedding {
my ($self, $text, $verbose) = @_;
my $response = $self->_get_embedding($text);
if ($response->{'success'}) {
my $embedding = decode_json($response->{'content'});
return join (',', @{$embedding->{'data'}[0]->{'embedding'}});
}
$self->{'error'} = 'HTTP Error - ' . $response->{'reason'};
return $response if defined $verbose;
return undef;
}
# Return Embedding as an array
sub raw_embedding {
my ($self, $text, $verbose) = @_;
my $response = $self->_get_embedding($text);
if ($response->{'success'}) {
my $embedding = decode_json($response->{'content'});
return @{$embedding->{'data'}[0]->{'embedding'}};
}
$self->{'error'} = 'HTTP Error - ' . $response->{'reason'};
return $response if defined $verbose;
return undef;
}
# Return Test Embedding
sub test_embedding {
my ($self, $text, $dimension) = @_;
$self->{'error'} = '';
$dimension = 1536 unless defined $dimension;
if ($text) {
srand scalar split /\s+/, $text;
}
my @vector;
for (1...$dimension) {
push @vector, rand(2) - 1;
}
return join ',', @vector;
}
# Convert a CSV Embedding into a hashref
sub _make_vector {
my ($self, $embed_string) = @_;
if (!defined $embed_string) {
$self->{'error'} = 'Nothing to compare!';
return;
}
my %vector;
my @embed = split /,/, $embed_string;
for (my $i = 0; $i < @embed; $i++) {
$vector{'feature' . $i} = $embed[$i];
}
return \%vector;
}
# Return a comparator to compare to a set vector
sub comparator {
my($self, $embed) = @_;
$self->{'error'} = '';
my $vector1 = $self->_make_vector($embed);
return sub {
my($embed2) = @_;
my $vector2 = $self->_make_vector($embed2);
return $self->_compare_vector($vector1, $vector2);
};
}
# Compare 2 Embeddings
sub compare {
my ($self, $embed1, $embed2) = @_;
my $vector1 = $self->_make_vector($embed1);
my $vector2;
if (defined $embed2) {
$vector2 = $self->_make_vector($embed2);
} else {
$vector2 = $self->{'comparator'};
}
lib/AI/Embedding.pm view on Meta::CPAN
if (scalar keys %$vector1 != scalar keys %$vector2) {
$self->{'error'} = 'Embeds are unequal length';
return;
}
return $self->_compare_vector($vector1, $vector2);
}
# Compare 2 Vectors
sub _compare_vector {
my ($self, $vector1, $vector2) = @_;
my $cs = Data::CosineSimilarity->new;
$cs->add( label1 => $vector1 );
$cs->add( label2 => $vector2 );
return $cs->similarity('label1', 'label2')->cosine;
}
1;
__END__
( run in 0.372 second using v1.01-cache-2.11-cpan-a5abf4f5562 )