AI-Embedding

 view release on metacpan or  search on metacpan

lib/AI/Embedding.pm  view on Meta::CPAN

use HTTP::Tiny;
use JSON::PP;
use Data::CosineSimilarity;

our $VERSION = '1.11';
$VERSION = eval $VERSION;

my $http = HTTP::Tiny->new;

# Create Embedding object
sub new {
    my $class = shift;
    my %attr  = @_;

    $attr{'error'}      = '';

    $attr{'api'}        = 'OpenAI' unless $attr{'api'};
    $attr{'error'}      = 'Invalid API' unless $attr{'api'} eq 'OpenAI';
    $attr{'error'}      = 'API Key missing' unless $attr{'key'};

    $attr{'model'}      = 'text-embedding-ada-002' unless $attr{'model'};

lib/AI/Embedding.pm  view on Meta::CPAN

my %url    = (
    'OpenAI' => 'https://api.openai.com/v1/embeddings',
);

# Define HTTP Headers for APIs
my %header = (
    'OpenAI' => &_get_header_openai,
);

# Returns true if last operation was success
sub success {
    my $self = shift;
    return !$self->{'error'};
}

# Returns error if last operation failed
sub error {
    my $self = shift;
    return $self->{'error'};
}

# Header for calling OpenAI
sub _get_header_openai {
    my $self = shift;
    $self->{'key'} = '' unless defined $self->{'key'};
    return {
         'Authorization' => 'Bearer ' . $self->{'key'},
         'Content-type'  => 'application/json'
     };
 }

 # Fetch Embedding response
 sub _get_embedding {
     my ($self, $text) = @_;

     my $response = $http->post($url{$self->{'api'}}, {
         'headers' => {
             'Authorization' => 'Bearer ' . $self->{'key'},
             'Content-type'  => 'application/json'
         },
         content => encode_json {
             input  => $text,
             model  => $self->{'model'},

lib/AI/Embedding.pm  view on Meta::CPAN

         die 'Incorrect API Key - check your API Key is correct';
     }
     return $response;
 }

 # TODO:
 # Make 'headers' use $header{$self->{'api'}}
 # Currently hard coded to OpenAI

 # Added purely for testing - IGNORE!
 sub _test {
     my $self = shift;
#    return $self->{'api'};
     return $header{$self->{'api'}};
 }

 # Return Embedding as a CSV string
 sub embedding {
     my ($self, $text, $verbose) = @_;

     my $response = $self->_get_embedding($text);
     if ($response->{'success'}) {
         my $embedding = decode_json($response->{'content'});
         return join (',', @{$embedding->{'data'}[0]->{'embedding'}});
     }
     $self->{'error'} = 'HTTP Error - ' . $response->{'reason'};
     return $response if defined $verbose;
     return undef;
 }

 # Return Embedding as an array
 sub raw_embedding {
     my ($self, $text, $verbose) = @_;

     my $response = $self->_get_embedding($text);
     if ($response->{'success'}) {
         my $embedding = decode_json($response->{'content'});
         return @{$embedding->{'data'}[0]->{'embedding'}};
     }
     $self->{'error'} = 'HTTP Error - ' . $response->{'reason'};
     return $response if defined $verbose;
     return undef;
 }

 # Return Test Embedding
 sub test_embedding {
     my ($self, $text, $dimension) = @_;
     $self->{'error'} = '';

     $dimension = 1536 unless defined $dimension;

     if ($text) {
         srand scalar split /\s+/, $text;
     }

     my @vector;
     for (1...$dimension) {
         push @vector, rand(2) - 1;
     }
     return join ',', @vector;
 }

# Convert a CSV Embedding into a hashref
sub _make_vector {
    my ($self, $embed_string) = @_;

    if (!defined $embed_string) {
        $self->{'error'} = 'Nothing to compare!';
        return;
    }

    my %vector;
    my @embed = split /,/, $embed_string;
    for (my $i = 0; $i < @embed; $i++) {
       $vector{'feature' . $i} = $embed[$i];
   }
   return \%vector;
}

# Return a comparator to compare to a set vector
sub comparator {
    my($self, $embed) = @_;
    $self->{'error'} = '';

    my $vector1 = $self->_make_vector($embed);
    return sub {
        my($embed2) = @_;
        my $vector2 = $self->_make_vector($embed2);
        return $self->_compare_vector($vector1, $vector2);
    };
}

# Compare 2 Embeddings
sub compare {
    my ($self, $embed1, $embed2) = @_;

    my $vector1 = $self->_make_vector($embed1);
    my $vector2;
    if (defined $embed2) {
        $vector2 = $self->_make_vector($embed2);
    } else {
        $vector2 = $self->{'comparator'};
    }

lib/AI/Embedding.pm  view on Meta::CPAN


    if (scalar keys %$vector1 != scalar keys %$vector2) {
        $self->{'error'} = 'Embeds are unequal length';
        return;
    }

    return $self->_compare_vector($vector1, $vector2);
}

# Compare 2 Vectors
sub _compare_vector {
    my ($self, $vector1, $vector2) = @_;
    my $cs = Data::CosineSimilarity->new;
    $cs->add( label1 => $vector1 );
    $cs->add( label2 => $vector2 );
    return $cs->similarity('label1', 'label2')->cosine;
}

1;

__END__



( run in 0.372 second using v1.01-cache-2.11-cpan-a5abf4f5562 )