AI-Ollama-Client
view release on metacpan or search on metacpan
lib/AI/Ollama/RequestOptions.pm view on Meta::CPAN
=cut
has 'embedding_only' => (
is => 'ro',
);
=head2 C<< f16_kv >>
Enable f16 key/value. (Default: false)
=cut
has 'f16_kv' => (
is => 'ro',
);
=head2 C<< frequency_penalty >>
Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
=cut
has 'frequency_penalty' => (
is => 'ro',
isa => Num,
);
=head2 C<< logits_all >>
Enable logits all. (Default: false)
=cut
has 'logits_all' => (
is => 'ro',
);
=head2 C<< low_vram >>
Enable low VRAM mode. (Default: false)
=cut
has 'low_vram' => (
is => 'ro',
);
=head2 C<< main_gpu >>
The GPU to use for the main model. Default is 0.
=cut
has 'main_gpu' => (
is => 'ro',
isa => Int,
);
=head2 C<< mirostat >>
Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
=cut
has 'mirostat' => (
is => 'ro',
isa => Int,
);
=head2 C<< mirostat_eta >>
Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)
=cut
has 'mirostat_eta' => (
is => 'ro',
isa => Num,
);
=head2 C<< mirostat_tau >>
Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)
=cut
has 'mirostat_tau' => (
is => 'ro',
isa => Num,
);
=head2 C<< num_batch >>
Sets the number of batches to use for generation. (Default: 1)
=cut
has 'num_batch' => (
is => 'ro',
isa => Int,
);
=head2 C<< num_ctx >>
Sets the size of the context window used to generate the next token.
=cut
has 'num_ctx' => (
is => 'ro',
isa => Int,
);
=head2 C<< num_gpu >>
The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable.
=cut
has 'num_gpu' => (
is => 'ro',
isa => Int,
);
=head2 C<< num_gqa >>
The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for `llama2:70b`.
=cut
has 'num_gqa' => (
is => 'ro',
isa => Int,
);
=head2 C<< num_keep >>
Number of tokens to keep from the prompt.
=cut
has 'num_keep' => (
is => 'ro',
isa => Int,
);
=head2 C<< num_predict >>
Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)
=cut
has 'num_predict' => (
is => 'ro',
isa => Int,
);
=head2 C<< num_thread >>
Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores).
=cut
has 'num_thread' => (
is => 'ro',
isa => Int,
);
=head2 C<< numa >>
Enable NUMA support. (Default: false)
=cut
has 'numa' => (
is => 'ro',
);
=head2 C<< penalize_newline >>
Penalize newlines in the output. (Default: false)
=cut
has 'penalize_newline' => (
is => 'ro',
);
=head2 C<< presence_penalty >>
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
=cut
has 'presence_penalty' => (
is => 'ro',
isa => Num,
);
=head2 C<< repeat_last_n >>
Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)
=cut
has 'repeat_last_n' => (
is => 'ro',
isa => Int,
);
=head2 C<< repeat_penalty >>
Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
=cut
has 'repeat_penalty' => (
is => 'ro',
isa => Num,
);
=head2 C<< rope_frequency_base >>
The base of the rope frequency scale. (Default: 1.0)
=cut
has 'rope_frequency_base' => (
is => 'ro',
isa => Num,
);
=head2 C<< rope_frequency_scale >>
The scale of the rope frequency. (Default: 1.0)
=cut
has 'rope_frequency_scale' => (
is => 'ro',
isa => Num,
);
=head2 C<< seed >>
Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)
=cut
has 'seed' => (
is => 'ro',
isa => Int,
);
=head2 C<< stop >>
Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
=cut
has 'stop' => (
is => 'ro',
isa => ArrayRef[Str],
);
=head2 C<< temperature >>
The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)
=cut
has 'temperature' => (
is => 'ro',
isa => Num,
);
=head2 C<< tfs_z >>
Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)
=cut
has 'tfs_z' => (
is => 'ro',
isa => Num,
);
=head2 C<< top_k >>
Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
=cut
has 'top_k' => (
is => 'ro',
isa => Int,
);
=head2 C<< top_p >>
Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
=cut
has 'top_p' => (
is => 'ro',
isa => Num,
);
=head2 C<< typical_p >>
Typical p is used to reduce the impact of less probable tokens from the output.
=cut
has 'typical_p' => (
is => 'ro',
isa => Num,
);
=head2 C<< use_mlock >>
Enable mlock. (Default: false)
=cut
has 'use_mlock' => (
is => 'ro',
);
=head2 C<< use_mmap >>
Enable mmap. (Default: false)
=cut
has 'use_mmap' => (
is => 'ro',
);
( run in 0.930 second using v1.01-cache-2.11-cpan-39bf76dae61 )