view release on metacpan or search on metacpan
Makefile.PL view on Meta::CPAN
# -*- mode: perl; c-basic-offset: 4; indent-tabs-mode: nil; -*-
use strict;
use ExtUtils::MakeMaker qw(WriteMakefile);
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.
# Normalize version strings like 6.30_02 to 6.3002,
# so that we can do numerical comparisons on it.
my $eumm_version = $ExtUtils::MakeMaker::VERSION;
$eumm_version =~ s/_//;
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
=head3 Options
=over 4
=item C<< format >>
The format to return a response in. Currently the only accepted value is json.
Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.
Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
=item C<< keep_alive >>
How long (in minutes) to keep the model loaded in memory.
=over
=item -
If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
=item -
If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
=item -
If set to 0, the model will be unloaded immediately once finished.
=item -
If not set, the model will stay loaded for 5 minutes by default
=back
=item C<< messages >>
The messages of the chat, this can be used to keep a chat memory
=item C<< model >>
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
# Should we validate using OpenAPI::Modern here?!
if( $resp->code == 200 ) {
# Successful operation.
my $queue = Future::Queue->new( prototype => 'Future::Mojo' );
$res->done( $queue );
my $ct = $resp->headers->content_type;
return unless $ct;
$ct =~ s/;\s+.*//;
if( $ct eq 'application/x-ndjson' ) {
# we only handle ndjson currently
my $handled_offset = 0;
$resp->on(progress => sub($msg,@) {
my $fresh = substr( $msg->body, $handled_offset );
my $body = $msg->body;
$body =~ s/[^\r\n]+\z//; # Strip any unfinished line
$handled_offset = length $body;
my @lines = split /\n/, $fresh;
for (@lines) {
my $payload = decode_json( $_ );
$self->validate_response( $payload, $tx );
$queue->push(
AI::Ollama::GenerateChatCompletionResponse->new($payload),
);
};
if( $msg->{state} eq 'finished' ) {
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
if( $res ) {
my $str = $res->get;
say $str;
}
Future::Mojo->done( defined $res );
} until => sub($done) { $done->get };
Create a model from a Modelfile.
It is recommended to set C<modelfile> to the content of the Modelfile rather than just set C<path>. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as C<FROM> and C<ADAPTER>, explicitly wi...
=head3 Options
=over 4
=item C<< modelfile >>
The contents of the Modelfile.
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
# Should we validate using OpenAPI::Modern here?!
if( $resp->code == 200 ) {
# Successful operation.
my $queue = Future::Queue->new( prototype => 'Future::Mojo' );
$res->done( $queue );
my $ct = $resp->headers->content_type;
return unless $ct;
$ct =~ s/;\s+.*//;
if( $ct eq 'application/x-ndjson' ) {
# we only handle ndjson currently
my $handled_offset = 0;
$resp->on(progress => sub($msg,@) {
my $fresh = substr( $msg->body, $handled_offset );
my $body = $msg->body;
$body =~ s/[^\r\n]+\z//; # Strip any unfinished line
$handled_offset = length $body;
my @lines = split /\n/, $fresh;
for (@lines) {
my $payload = decode_json( $_ );
$self->validate_response( $payload, $tx );
$queue->push(
AI::Ollama::CreateModelResponse->new($payload),
);
};
if( $msg->{state} eq 'finished' ) {
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
=over 4
=item C<< context >>
The context parameter returned from a previous request to [generateCompletion], this can be used to keep a short conversational memory.
=item C<< format >>
The format to return a response in. Currently the only accepted value is json.
Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.
Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
=item C<< images >>
(optional) a list of Base64-encoded images to include in the message (for multimodal models such as llava)
=item C<< keep_alive >>
How long (in minutes) to keep the model loaded in memory.
=over
=item -
If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
=item -
If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
=item -
If set to 0, the model will be unloaded immediately once finished.
=item -
If not set, the model will stay loaded for 5 minutes by default
=back
=item C<< model >>
The model name.
Model names follow a C<model:tag> format. Some examples are C<orca-mini:3b-q4_1> and C<llama2:70b>. The tag is optional and, if not provided, will default to C<latest>. The tag is used to identify a specific version.
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
# Should we validate using OpenAPI::Modern here?!
if( $resp->code == 200 ) {
# Successful operation.
my $queue = Future::Queue->new( prototype => 'Future::Mojo' );
$res->done( $queue );
my $ct = $resp->headers->content_type;
return unless $ct;
$ct =~ s/;\s+.*//;
if( $ct eq 'application/x-ndjson' ) {
# we only handle ndjson currently
my $handled_offset = 0;
$resp->on(progress => sub($msg,@) {
my $fresh = substr( $msg->body, $handled_offset );
my $body = $msg->body;
$body =~ s/[^\r\n]+\z//; # Strip any unfinished line
$handled_offset = length $body;
my @lines = split /\n/, $fresh;
for (@lines) {
my $payload = decode_json( $_ );
$self->validate_response( $payload, $tx );
$queue->push(
AI::Ollama::GenerateCompletionResponse->new($payload),
);
};
if( $msg->{state} eq 'finished' ) {
lib/AI/Ollama/Client/Impl.pm view on Meta::CPAN
# Should we validate using OpenAPI::Modern here?!
if( $resp->code == 200 ) {
# Successful operation.
my $queue = Future::Queue->new( prototype => 'Future::Mojo' );
$res->done( $queue );
my $ct = $resp->headers->content_type;
return unless $ct;
$ct =~ s/;\s+.*//;
if( $ct eq 'application/x-ndjson' ) {
# we only handle ndjson currently
my $handled_offset = 0;
$resp->on(progress => sub($msg,@) {
my $fresh = substr( $msg->body, $handled_offset );
my $body = $msg->body;
$body =~ s/[^\r\n]+\z//; # Strip any unfinished line
$handled_offset = length $body;
my @lines = split /\n/, $fresh;
for (@lines) {
my $payload = decode_json( $_ );
$self->validate_response( $payload, $tx );
$queue->push(
AI::Ollama::PullModelResponse->new($payload),
);
};
if( $msg->{state} eq 'finished' ) {
lib/AI/Ollama/GenerateChatCompletionRequest.pm view on Meta::CPAN
sub as_hash( $self ) {
return { $self->%* }
}
=head1 PROPERTIES
=head2 C<< format >>
The format to return a response in. Currently the only accepted value is json.
Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.
Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
=cut
has 'format' => (
is => 'ro',
isa => Enum[
"json",
],
);
=head2 C<< keep_alive >>
How long (in minutes) to keep the model loaded in memory.
- If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
- If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
- If set to 0, the model will be unloaded immediately once finished.
- If not set, the model will stay loaded for 5 minutes by default
=cut
has 'keep_alive' => (
is => 'ro',
isa => Int,
);
=head2 C<< messages >>
lib/AI/Ollama/GenerateCompletionRequest.pm view on Meta::CPAN
has 'context' => (
is => 'ro',
isa => ArrayRef[Int],
);
=head2 C<< format >>
The format to return a response in. Currently the only accepted value is json.
Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.
Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
=cut
has 'format' => (
is => 'ro',
isa => Enum[
"json",
],
lib/AI/Ollama/GenerateCompletionRequest.pm view on Meta::CPAN
has 'images' => (
is => 'ro',
isa => ArrayRef[Str],
);
=head2 C<< keep_alive >>
How long (in minutes) to keep the model loaded in memory.
- If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
- If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
- If set to 0, the model will be unloaded immediately once finished.
- If not set, the model will stay loaded for 5 minutes by default
=cut
has 'keep_alive' => (
is => 'ro',
isa => Int,
);
=head2 C<< model >>
lib/AI/Ollama/RequestOptions.pm view on Meta::CPAN
=cut
has 'num_predict' => (
is => 'ro',
isa => Int,
);
=head2 C<< num_thread >>
Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores).
=cut
has 'num_thread' => (
is => 'ro',
isa => Int,
);
=head2 C<< numa >>
lib/AI/Ollama/RequestOptions.pm view on Meta::CPAN
=cut
has 'temperature' => (
is => 'ro',
isa => Num,
);
=head2 C<< tfs_z >>
Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)
=cut
has 'tfs_z' => (
is => 'ro',
isa => Num,
);
=head2 C<< top_k >>
ollama/ollama-curated.json view on Meta::CPAN
{"openapi":"3.0.3","components":{"schemas":{"PushModelResponse":{"properties":{"total":{"type":"integer","description":"total size of the model","example":"2142590208"},"status":{"$ref":"#/components/schemas/PushModelStatus"},"digest":{"example":"sha...
ollama/ollama-curated.yaml view on Meta::CPAN
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateEmbeddingResponse'
/create:
post:
operationId: createModel
tags:
- Models
summary: Create a model from a Modelfile.
description: It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as `FROM` and `ADAPTER`...
requestBody:
description: Create a new model from a Modelfile.
content:
application/json:
schema:
$ref: '#/components/schemas/CreateModelRequest'
responses:
'200':
description: Successful operation.
content:
ollama/ollama-curated.yaml view on Meta::CPAN
stream:
type: boolean
description: &stream |
If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
default: false
keep_alive:
type: integer
description: &keep_alive |
How long (in minutes) to keep the model loaded in memory.
- If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
- If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
- If set to 0, the model will be unloaded immediately once finished.
- If not set, the model will stay loaded for 5 minutes by default
required:
- model
- prompt
RequestOptions:
type: object
description: Additional model parameters listed in the documentation for the Modelfile such as `temperature`.
properties:
num_keep:
type: integer
description: |
ollama/ollama-curated.yaml view on Meta::CPAN
Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p:
type: number
format: float
description: |
Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
tfs_z:
type: number
format: float
description: |
Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)
typical_p:
type: number
format: float
description: |
Typical p is used to reduce the impact of less probable tokens from the output.
repeat_last_n:
type: integer
description: |
Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)
temperature:
ollama/ollama-curated.yaml view on Meta::CPAN
description: |
The base of the rope frequency scale. (Default: 1.0)
rope_frequency_scale:
type: number
format: float
description: |
The scale of the rope frequency. (Default: 1.0)
num_thread:
type: integer
description: |
Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number o...
ResponseFormat:
type: string
description: |
The format to return a response in. Currently the only accepted value is json.
Enable JSON mode by setting the format parameter to json. This will structure the response as valid JSON.
Note: it's important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
enum:
- json
GenerateCompletionResponse:
type: object
description: The response class for the generate endpoint.
properties:
model:
type: string