AI-Ollama-Client
view release on metacpan or search on metacpan
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
use 5.020;
use Moo 2;
with 'Role::EventEmitter';
use experimental 'signatures';
use PerlX::Maybe;
use Carp 'croak';
# These should go into a ::Role
use YAML::PP;
use Mojo::UserAgent;
use Mojo::URL;
use URI::Template;
use Mojo::JSON 'encode_json', 'decode_json';
use OpenAPI::Modern;
use File::ShareDir 'module_file';
use Future::Mojo;
use Future::Queue;
our $SCHEMA_VERSION = "0.1.9";
use AI::Ollama::CopyModelRequest;
use AI::Ollama::CreateModelRequest;
use AI::Ollama::CreateModelResponse;
use AI::Ollama::DeleteModelRequest;
use AI::Ollama::GenerateChatCompletionRequest;
use AI::Ollama::GenerateChatCompletionResponse;
use AI::Ollama::GenerateCompletionRequest;
use AI::Ollama::GenerateCompletionResponse;
use AI::Ollama::GenerateEmbeddingRequest;
use AI::Ollama::GenerateEmbeddingResponse;
use AI::Ollama::Message;
use AI::Ollama::Model;
use AI::Ollama::ModelInfo;
use AI::Ollama::ModelInfoRequest;
use AI::Ollama::ModelsResponse;
use AI::Ollama::PullModelRequest;
use AI::Ollama::PullModelResponse;
use AI::Ollama::PushModelRequest;
use AI::Ollama::PushModelResponse;
use AI::Ollama::RequestOptions;
=encoding utf8
=head1 SYNOPSIS
my $client = AI::Ollama::Client::Impl->new(
schema_file => '...',
);
=head1 PROPERTIES
=head2 B<< schema_file >>
The OpenAPI schema file we use for validation
=head2 B<< schema >>
The OpenAPI schema data structure we use for validation. If not given,
we will create one using the C<schema_file> parameter.
=head2 B<< openapi >>
The L<OpenAPI::Modern> object we use for validation. If not given,
we will create one using the C<schema> parameter.
=head2 B<< ua >>
The L<Mojo::UserAgent> to use
=head2 B<< server >>
The server to access
=cut
has 'schema_file' => (
is => 'lazy',
default => sub { require AI::Ollama::Client::Impl; module_file('AI::Ollama::Client::Impl', 'ollama-curated.yaml') },
);
has 'schema' => (
is => 'lazy',
default => sub {
if( my $fn = $_[0]->schema_file ) {
YAML::PP->new( boolean => 'JSON::PP' )->load_file($fn);
}
},
);
has 'validate_requests' => (
is => 'rw',
default => 1,
);
has 'validate_responses' => (
is => 'rw',
default => 1,
);
has 'openapi' => (
is => 'lazy',
default => sub {
if( my $schema = $_[0]->schema ) {
OpenAPI::Modern->new( openapi_schema => $schema, openapi_uri => '' )
}
},
);
# The HTTP stuff should go into a ::Role I guess
has 'ua' => (
is => 'lazy',
default => sub { Mojo::UserAgent->new },
);
has 'server' => (
is => 'ro',
);
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
return $tx
}
sub createBlob( $self, %options ) {
my $tx = $self->build_createBlob_request(%options);
my $res = Future::Mojo->new();
my $r1 = Future::Mojo->new();
$r1->then( sub( $tx ) {
my $resp = $tx->res;
$self->emit(response => $resp);
# Should we validate using OpenAPI::Modern here?!
if( $resp->code == 201 ) {
# Blob was successfully created
$res->done($resp);
} else {
# An unknown/unhandled response, likely an error
$res->fail( sprintf( "unknown_unhandled code %d: %s", $resp->code, $resp->body ), $resp);
}
})->retain;
# Start our transaction
$self->emit(request => $tx);
$tx = $self->ua->start_p($tx)->then(sub($tx) {
$r1->resolve( $tx );
undef $r1;
})->catch(sub($err) {
$self->emit(response => $tx, $err);
$r1->fail( $err => $tx );
undef $r1;
});
return $res
}
=head2 C<< build_generateChatCompletion_request >>
Build an HTTP request as L<Mojo::Request> object. For the parameters see below.
=head2 C<< generateChatCompletion >>
use Future::Utils 'repeat';
my $response = $client->generateChatCompletion();
my $streamed = $response->get();
repeat {
my ($res) = $streamed->shift;
if( $res ) {
my $str = $res->get;
say $str;
}
Future::Mojo->done( defined $res );
} until => sub($done) { $done->get };
Generate the next message in a chat with a provided model.
This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
=head3 Options
=over 4
=item C<< format >>
The format to return a response in. Currently the only accepted value is json.
Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.
Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
=item C<< keep_alive >>
How long (in minutes) to keep the model loaded in memory.
=over
=item -
If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
=item -
If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
=item -
If set to 0, the model will be unloaded immediately once finished.
=item -
If not set, the model will stay loaded for 5 minutes by default
=back
=item C<< messages >>
The messages of the chat, this can be used to keep a chat memory
=item C<< model >>
The model name.
Model names follow a C<model:tag> format. Some examples are C<orca-mini:3b-q4_1> and C<llama2:70b>. The tag is optional and, if not provided, will default to C<latest>. The tag is used to identify a specific version.
=item C<< options >>
Additional model parameters listed in the documentation for the Modelfile such as C<temperature>.
=item C<< stream >>
If C<false> the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
$self->emit(response => $resp);
# Should we validate using OpenAPI::Modern here?!
if( $resp->code == 200 ) {
# Successful operation.
my $queue = Future::Queue->new( prototype => 'Future::Mojo' );
$res->done( $queue );
my $ct = $resp->headers->content_type;
return unless $ct;
$ct =~ s/;\s+.*//;
if( $ct eq 'application/x-ndjson' ) {
# we only handle ndjson currently
my $handled_offset = 0;
$resp->on(progress => sub($msg,@) {
my $fresh = substr( $msg->body, $handled_offset );
my $body = $msg->body;
$body =~ s/[^\r\n]+\z//; # Strip any unfinished line
$handled_offset = length $body;
my @lines = split /\n/, $fresh;
for (@lines) {
my $payload = decode_json( $_ );
$self->validate_response( $payload, $tx );
$queue->push(
AI::Ollama::CreateModelResponse->new($payload),
);
};
if( $msg->{state} eq 'finished' ) {
$queue->finish();
}
});
} else {
# Unknown/unhandled content type
$res->fail( sprintf("unknown_unhandled content type '%s'", $resp->content_type), $resp );
}
} else {
# An unknown/unhandled response, likely an error
$res->fail( sprintf( "unknown_unhandled code %d", $resp->code ), $resp);
}
});
my $_tx;
$tx->res->once( progress => sub($msg, @) {
$r1->resolve( $tx );
undef $_tx;
undef $r1;
});
$self->emit(request => $tx);
$_tx = $self->ua->start_p($tx);
return $res
}
=head2 C<< build_deleteModel_request >>
Build an HTTP request as L<Mojo::Request> object. For the parameters see below.
=head2 C<< deleteModel >>
my $res = $client->deleteModel()->get;
Delete a model and its data.
=head3 Options
=over 4
=item C<< name >>
The model name.
Model names follow a C<model:tag> format. Some examples are C<orca-mini:3b-q4_1> and C<llama2:70b>. The tag is optional and, if not provided, will default to C<latest>. The tag is used to identify a specific version.
=back
=cut
sub build_deleteModel_request( $self, %options ) {
my $method = 'DELETE';
my $path = '/delete';
my $url = Mojo::URL->new( $self->server . $path );
my $request = AI::Ollama::DeleteModelRequest->new( \%options )->as_hash;
my $tx = $self->ua->build_tx(
$method => $url,
{
"Content-Type" => 'application/json',
}
=> json => $request,
);
$self->validate_request( $tx );
return $tx
}
sub deleteModel( $self, %options ) {
my $tx = $self->build_deleteModel_request(%options);
my $res = Future::Mojo->new();
my $r1 = Future::Mojo->new();
$r1->then( sub( $tx ) {
my $resp = $tx->res;
$self->emit(response => $resp);
# Should we validate using OpenAPI::Modern here?!
if( $resp->code == 200 ) {
# Successful operation.
$res->done($resp);
} else {
# An unknown/unhandled response, likely an error
$res->fail( sprintf( "unknown_unhandled code %d: %s", $resp->code, $resp->body ), $resp);
}
})->retain;
# Start our transaction
$self->emit(request => $tx);
$tx = $self->ua->start_p($tx)->then(sub($tx) {
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
$r1->then( sub( $tx ) {
my $resp = $tx->res;
$self->emit(response => $resp);
# Should we validate using OpenAPI::Modern here?!
if( $resp->code == 200 ) {
# Successful operation.
my $ct = $resp->headers->content_type;
$ct =~ s/;\s+.*//;
if( $ct eq 'application/json' ) {
my $payload = $resp->json();
$self->validate_response( $payload, $tx );
$res->done(
AI::Ollama::GenerateEmbeddingResponse->new($payload),
);
} else {
# Unknown/unhandled content type
$res->fail( sprintf("unknown_unhandled content type '%s'", $resp->content_type), $resp );
}
} else {
# An unknown/unhandled response, likely an error
$res->fail( sprintf( "unknown_unhandled code %d: %s", $resp->code, $resp->body ), $resp);
}
})->retain;
# Start our transaction
$self->emit(request => $tx);
$tx = $self->ua->start_p($tx)->then(sub($tx) {
$r1->resolve( $tx );
undef $r1;
})->catch(sub($err) {
$self->emit(response => $tx, $err);
$r1->fail( $err => $tx );
undef $r1;
});
return $res
}
=head2 C<< build_generateCompletion_request >>
Build an HTTP request as L<Mojo::Request> object. For the parameters see below.
=head2 C<< generateCompletion >>
use Future::Utils 'repeat';
my $response = $client->generateCompletion();
my $streamed = $response->get();
repeat {
my ($res) = $streamed->shift;
if( $res ) {
my $str = $res->get;
say $str;
}
Future::Mojo->done( defined $res );
} until => sub($done) { $done->get };
Generate a response for a given prompt with a provided model.
The final response object will include statistics and additional data from the request.
=head3 Options
=over 4
=item C<< context >>
The context parameter returned from a previous request to [generateCompletion], this can be used to keep a short conversational memory.
=item C<< format >>
The format to return a response in. Currently the only accepted value is json.
Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.
Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
=item C<< images >>
(optional) a list of Base64-encoded images to include in the message (for multimodal models such as llava)
=item C<< keep_alive >>
How long (in minutes) to keep the model loaded in memory.
=over
=item -
If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
=item -
If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
=item -
If set to 0, the model will be unloaded immediately once finished.
=item -
If not set, the model will stay loaded for 5 minutes by default
=back
=item C<< model >>
The model name.
Model names follow a C<model:tag> format. Some examples are C<orca-mini:3b-q4_1> and C<llama2:70b>. The tag is optional and, if not provided, will default to C<latest>. The tag is used to identify a specific version.
=item C<< options >>
Additional model parameters listed in the documentation for the Modelfile such as C<temperature>.
( run in 0.822 second using v1.01-cache-2.11-cpan-39bf76dae61 )