AI-Ollama-Client
view release on metacpan or search on metacpan
ollama/ollama-curated.yaml view on Meta::CPAN
schema:
type: string
required: true
description: the SHA256 digest of the blob
example: sha256:c8edda1f17edd2f1b60253b773d837bda7b9d249a61245931a4d7c9a8d350250
responses:
'200':
description: Blob exists on the server
'404':
description: Blob was not found
post:
operationId: createBlob
tags:
- Models
summary: Create a blob from a file. Returns the server file path.
parameters:
- in: path
name: digest
schema:
type: string
required: true
description: the SHA256 digest of the blob
example: sha256:c8edda1f17edd2f1b60253b773d837bda7b9d249a61245931a4d7c9a8d350250
requestBody:
content:
application/octet-stream:
schema:
type: string
format: binary
responses:
'201':
description: Blob was successfully created
components:
schemas:
GenerateCompletionRequest:
type: object
description: Request class for the generate endpoint.
properties:
model:
type: string
description: &model_name |
The model name.
Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
example: llama2:7b
prompt:
type: string
description: The prompt to generate a response.
example: Why is the sky blue?
images:
type: array
description: (optional) a list of Base64-encoded images to include in the message (for multimodal models such as llava)
items:
type: string
contentEncoding: base64
description: Base64-encoded image (for multimodal models such as llava)
example: iVBORw0KGgoAAAANSUhEUgAAAAkAAAANCAIAAAD0YtNRAAAABnRSTlMA/AD+APzoM1ogAAAAWklEQVR4AWP48+8PLkR7uUdzcMvtU8EhdykHKAciEXL3pvw5FQIURaBDJkARoDhY3zEXiCgCHbNBmAlUiyaBkENoxZSDWnOtBmoAQu7TnT+3WuDOA7KBIkAGAGwiNeqjusp/AAAAAElFTkSuQmCC
system:
type: string
description: The system prompt to (overrides what is defined in the Modelfile).
template:
type: string
description: The full prompt or prompt template (overrides what is defined in the Modelfile).
context:
type: array
description: The context parameter returned from a previous request to [generateCompletion], this can be used to keep a short conversational memory.
items:
type: integer
options:
$ref: '#/components/schemas/RequestOptions'
format:
$ref: '#/components/schemas/ResponseFormat'
raw:
type: boolean
description: |
If `true` no formatting will be applied to the prompt and no context will be returned.
You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
stream:
type: boolean
description: &stream |
If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
default: false
keep_alive:
type: integer
description: &keep_alive |
How long (in minutes) to keep the model loaded in memory.
- If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
- If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
- If set to 0, the model will be unloaded immediately once finished.
- If not set, the model will stay loaded for 5 minutes by default
required:
- model
- prompt
RequestOptions:
type: object
description: Additional model parameters listed in the documentation for the Modelfile such as `temperature`.
properties:
num_keep:
type: integer
description: |
Number of tokens to keep from the prompt.
seed:
type: integer
description: |
Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)
num_predict:
type: integer
description: |
Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)
top_k:
type: integer
description: |
Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
top_p:
type: number
format: float
description: |
Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
tfs_z:
type: number
format: float
ollama/ollama-curated.yaml view on Meta::CPAN
example: llama2:7b
destination:
type: string
description: Name of the new model.
example: llama2-backup
required:
- source
- destination
DeleteModelRequest:
description: Request class for deleting a model.
type: object
properties:
name:
type: string
description: *model_name
example: llama2:13b
required:
- name
PullModelRequest:
description: Request class for pulling a model.
type: object
properties:
name:
type: string
description: *model_name
example: llama2:7b
insecure:
type: boolean
description: |
Allow insecure connections to the library.
Only use this if you are pulling from your own library during development.
default: false
stream:
type: boolean
description: *stream
default: false
required:
- name
PullModelResponse:
description: |
Response class for pulling a model.
The first object is the manifest. Then there is a series of downloading responses. Until any of the download is completed, the `completed` key may not be included.
The number of files to be downloaded depends on the number of layers specified in the manifest.
type: object
properties:
status:
$ref: '#/components/schemas/PullModelStatus'
digest:
type: string
description: The model's digest.
example: 'sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711a'
total:
type: integer
description: Total size of the model.
example: 2142590208
completed:
type: integer
description: Total bytes transferred.
example: 2142590208
PullModelStatus:
type: string
description: Status pulling the model.
enum:
- pulling manifest
- downloading digestname
- verifying sha256 digest
- writing manifest
- removing any unused layers
- success
example: pulling manifest
PushModelRequest:
description: Request class for pushing a model.
type: object
properties:
name:
type: string
description: The name of the model to push in the form of <namespace>/<model>:<tag>.
example: 'mattw/pygmalion:latest'
insecure:
type: boolean
description: |
Allow insecure connections to the library.
Only use this if you are pushing to your library during development.
default: false
stream:
type: boolean
description: *stream
default: false
required:
- name
PushModelResponse:
type: object
description: Response class for pushing a model.
properties:
status:
$ref: '#/components/schemas/PushModelStatus'
digest:
type: string
description: the model's digest
example: 'sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711a'
total:
type: integer
description: total size of the model
example: 2142590208
PushModelStatus:
type: string
description: Status pushing the model.
enum:
- retrieving manifest
- starting upload
- pushing manifest
- success
( run in 0.728 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )