AI-Embedding

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

Revision history for AI-Embedding

1.11     20th December 2023
          Corrected minimum Perl version

1.10     18th December 2023
          Die if an incorrect API is provided instead os a random error

1.01	 10th June 2023
	  Corrected minor errors in POD including version number

1.0      9th June 2023
	  First release version
	  
	  Added acknowledgments to Ken Cotterill and Hugo van der Sanden
	  
0.1_4    8th June 2023
	  NOTE - This is still a development release - DO NOT USE IN PRODUCTION
	  
	  Corrected badly declared dependency and opted for the core JSON::PP module	  

lib/AI/Embedding.pm  view on Meta::CPAN

our $VERSION = '1.11';
$VERSION = eval $VERSION;

my $http = HTTP::Tiny->new;

# Create Embedding object
sub new {
    my $class = shift;
    my %attr  = @_;

    $attr{'error'}      = '';

    $attr{'api'}        = 'OpenAI' unless $attr{'api'};
    $attr{'error'}      = 'Invalid API' unless $attr{'api'} eq 'OpenAI';
    $attr{'error'}      = 'API Key missing' unless $attr{'key'};

    $attr{'model'}      = 'text-embedding-ada-002' unless $attr{'model'};

    return bless \%attr, $class;
}

# Define endpoints for APIs
my %url    = (
    'OpenAI' => 'https://api.openai.com/v1/embeddings',
);

# Define HTTP Headers for APIs
my %header = (
    'OpenAI' => &_get_header_openai,
);

# Returns true if last operation was success
sub success {
    my $self = shift;
    return !$self->{'error'};
}

# Returns error if last operation failed
sub error {
    my $self = shift;
    return $self->{'error'};
}

# Header for calling OpenAI
sub _get_header_openai {
    my $self = shift;
    $self->{'key'} = '' unless defined $self->{'key'};
    return {
         'Authorization' => 'Bearer ' . $self->{'key'},
         'Content-type'  => 'application/json'
     };

lib/AI/Embedding.pm  view on Meta::CPAN


 # Return Embedding as a CSV string
 sub embedding {
     my ($self, $text, $verbose) = @_;

     my $response = $self->_get_embedding($text);
     if ($response->{'success'}) {
         my $embedding = decode_json($response->{'content'});
         return join (',', @{$embedding->{'data'}[0]->{'embedding'}});
     }
     $self->{'error'} = 'HTTP Error - ' . $response->{'reason'};
     return $response if defined $verbose;
     return undef;
 }

 # Return Embedding as an array
 sub raw_embedding {
     my ($self, $text, $verbose) = @_;

     my $response = $self->_get_embedding($text);
     if ($response->{'success'}) {
         my $embedding = decode_json($response->{'content'});
         return @{$embedding->{'data'}[0]->{'embedding'}};
     }
     $self->{'error'} = 'HTTP Error - ' . $response->{'reason'};
     return $response if defined $verbose;
     return undef;
 }

 # Return Test Embedding
 sub test_embedding {
     my ($self, $text, $dimension) = @_;
     $self->{'error'} = '';

     $dimension = 1536 unless defined $dimension;

     if ($text) {
         srand scalar split /\s+/, $text;
     }

     my @vector;
     for (1...$dimension) {
         push @vector, rand(2) - 1;
     }
     return join ',', @vector;
 }

# Convert a CSV Embedding into a hashref
sub _make_vector {
    my ($self, $embed_string) = @_;

    if (!defined $embed_string) {
        $self->{'error'} = 'Nothing to compare!';
        return;
    }

    my %vector;
    my @embed = split /,/, $embed_string;
    for (my $i = 0; $i < @embed; $i++) {
       $vector{'feature' . $i} = $embed[$i];
   }
   return \%vector;
}

# Return a comparator to compare to a set vector
sub comparator {
    my($self, $embed) = @_;
    $self->{'error'} = '';

    my $vector1 = $self->_make_vector($embed);
    return sub {
        my($embed2) = @_;
        my $vector2 = $self->_make_vector($embed2);
        return $self->_compare_vector($vector1, $vector2);
    };
}

# Compare 2 Embeddings

lib/AI/Embedding.pm  view on Meta::CPAN


    my $vector1 = $self->_make_vector($embed1);
    my $vector2;
    if (defined $embed2) {
        $vector2 = $self->_make_vector($embed2);
    } else {
        $vector2 = $self->{'comparator'};
    }

    if (!defined $vector2) {
        $self->{'error'} = 'Nothing to compare!';
        return;
    }

    if (scalar keys %$vector1 != scalar keys %$vector2) {
        $self->{'error'} = 'Embeds are unequal length';
        return;
    }

    return $self->_compare_vector($vector1, $vector2);
}

# Compare 2 Vectors
sub _compare_vector {
    my ($self, $vector1, $vector2) = @_;
    my $cs = Data::CosineSimilarity->new;

lib/AI/Embedding.pm  view on Meta::CPAN

C<model> - The language model to use.  Defaults to C<text-embedding-ada-002> - see L<OpenAI docs|https://platform.openai.com/docs/guides/embeddings/what-are-embeddings>

=back

=head1 METHODS

=head2 success

Returns true if the last method call was successful

=head2 error

Returns the last error message or an empty string if B<success> returned true

=head2 embedding

    my $csv_embedding = $embedding->embedding('Some text passage', [$verbose]);

Generates an embedding for the given text and returns it as a comma-separated string. The C<embedding> method takes a single parameter, the text to generate the embedding for.

Returns a (rather long) string that can be stored in a C<TEXT> database field.

If the method call fails it sets the L</"error"> message and returns C<undef>.  If the optional C<verbose> parameter is true, the complete L<HTTP::Tiny> response object is also returned to aid with debugging issues when using this module.

=head2 raw_embedding

    my @raw_embedding = $embedding->raw_embedding('Some text passage', [$verbose]);

Generates an embedding for the given text and returns it as an array. The C<raw_embedding> method takes a single parameter, the text to generate the embedding for.

It is not normally necessary to use this method as the Embedding will almost always be used as a single homogeneous unit.

If the method call fails it sets the L</"error"> message and returns C<undef>.  If the optional C<verbose> parameter is true, the complete L<HTTP::Tiny> response object is also returned to aid with debugging issues when using this module.

=head2 test_embedding

    my $test_embedding = $embedding->test_embedding('Some text passage', $dimensions);

Used for testing code without making a chargeable call to the API.

Provides a CSV string of the same size and format as L<embedding> but with meaningless random data.

Returns a random embedding.  Both parameters are optional.  If a text string is provided, the returned embedding will always be the same random embedding otherwise it will be random and different every time.  The C<dimension> parameter controls the n...

t/01-openai.t  view on Meta::CPAN

    'key'	=> '0123456789',
    'api'	=> 'OpenAI',
);

ok( $embed_pass->isa( 'AI::Embedding' ), 'Instantiation' );
ok( $embed_pass->success, 'Successful object creation' );

my $comp_fail = $embed_pass->compare('-0.6,-0.5,-0.4,-0.3,-0.2,0.0,0.2,0.3,0.4,0.5', '-0.6,-0.5,-0.4,-0.3,-0.2');

ok( !$embed_pass->success, 'Compare mismatch' );
ok( $embed_pass->error eq 'Embeds are unequal length', 'Correct error message');

my $comp_pass1 = $embed_pass->compare('-0.6,-0.5,-0.4,-0.3,-0.2,0.0,0.2,0.3,0.4,0.5', '-0.6,-0.5,-0.4,-0.3,-0.2,0.0,0.2,0.3,0.4,0.5');

is( $comp_pass1, 1, "Compare got $comp_pass1");

my $cmp = $embed_pass->comparator('-0.6,-0.5,-0.4,-0.3,-0.2,0.0,0.2,0.3,0.4,0.5');

ok( $embed_pass->success, "Comparator created" );
ok( defined $cmp, "Comparator exists" );



( run in 0.964 second using v1.01-cache-2.11-cpan-49f99fa48dc )