fern/apis/text-gen/openapi/openapi.json

{
  "components": {
    "schemas": {
      "ChatCompletionChoice": {
        "description": "A single chat completion choice. A response will contain one or\nmore of these based on the setting of `n`.",
        "properties": {
          "finish_reason": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/FinishReason"
              },
              {
                "type": "null"
              }
            ],
            "description": "The reason the response finished."
          },
          "index": {
            "description": "A unique identifier for the completion.",
            "title": "Index",
            "type": "integer"
          },
          "logprobs": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/Logprobs"
              },
              {
                "type": "null"
              }
            ],
            "description": "Number of log probabilities to return per output token."
          },
          "message": {
            "allOf": [
              {
                "$ref": "#/components/schemas/ChatMessage"
              }
            ],
            "description": "A chat completion message generated by the model."
          }
        },
        "required": [
          "index",
          "message"
        ],
        "title": "ChatCompletionChoice",
        "type": "object"
      },
      "ChatCompletionChunk": {
        "description": "Represents a single chunk of a streaming chat completion response.\n\nThis object's schema is compatible with OpenAI's Chat Completion API.",
        "examples": [
          {
            "choices": [],
            "created": 11,
            "id": "chat-cmpl-123",
            "model": "my_model"
          }
        ],
        "properties": {
          "choices": {
            "description": "A list of chat completion choices.Can be more than one if n is greater than 1.",
            "items": {
              "$ref": "#/components/schemas/ChatCompletionChunkChoice"
            },
            "title": "Choices",
            "type": "array"
          },
          "created": {
            "description": "The Unix timestamp (in seconds) of when the chat completion was created.",
            "examples": [
              1672342342
            ],
            "title": "Created",
            "type": "integer"
          },
          "id": {
            "description": "A unique identifier for the entire chat completion request. Each chunk in the stream has the same ID.",
            "title": "ID",
            "type": "string"
          },
          "model": {
            "description": "The model used for the chat completion.",
            "title": "Model",
            "type": "string"
          },
          "object": {
            "const": "chat.completion.chunk",
            "default": "chat.completion.chunk",
            "description": "Object type field which is a constant and preset.",
            "enum": [
              "chat.completion.chunk"
            ],
            "title": "Object",
            "type": "string"
          },
          "usage": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/UsageStats"
              },
              {
                "type": "null"
              }
            ]
          }
        },
        "required": [
          "id",
          "created",
          "model",
          "choices"
        ],
        "title": "ChatCompletionChunk",
        "type": "object"
      },
      "ChatCompletionChunkChoice": {
        "description": "An OpenAPI compatible schema for a chat completion chunk choice.",
        "properties": {
          "delta": {
            "allOf": [
              {
                "$ref": "#/components/schemas/ChatCompletionDelta"
              }
            ],
            "description": "The generated response delta."
          },
          "finish_reason": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/FinishReason"
              },
              {
                "type": "null"
              }
            ],
            "description": "The reason the response finished."
          },
          "index": {
            "description": "A unique identifier for the completion.",
            "title": "Index",
            "type": "integer"
          },
          "logprobs": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/Logprobs"
              },
              {
                "type": "null"
              }
            ],
            "description": "Number of log probabilities to return per output token."
          }
        },
        "required": [
          "index",
          "delta"
        ],
        "title": "ChatCompletionChunkChoice",
        "type": "object"
      },
      "ChatCompletionDelta": {
        "description": "An OpenAPI compatible schema for a chat completion choice delta.",
        "properties": {
          "content": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "The response content.",
            "title": "Content"
          },
          "role": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "The generated response role.",
            "title": "Role"
          },
          "tool_calls": {
            "anyOf": [
              {
                "items": {
                  "$ref": "#/components/schemas/ChatCompletionDeltaToolCall"
                },
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "The tool calls generated by the model, such as function calls.",
            "title": "Tool Calls"
          }
        },
        "title": "ChatCompletionDelta",
        "type": "object"
      },
      "ChatCompletionDeltaToolCall": {
        "description": "A single delta representing one partial piece of a tool call.\n\nToday this only supports function calls, but the schema supports\nindirection for different tool calls in the future.",
        "properties": {
          "function": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/ChatCompletionDeltaToolCallFunction"
              },
              {
                "type": "null"
              }
            ],
            "description": "The delta of the function call generated by the model."
          },
          "id": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "The ID of the tool call.",
            "title": "Id"
          },
          "index": {
            "description": "The sequence index of the tool call.",
            "title": "Index",
            "type": "integer"
          },
          "type": {
            "anyOf": [
              {
                "const": "function",
                "enum": [
                  "function"
                ],
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "default": "function",
            "description": "The type of tool. Today only `function` is supported.",
            "title": "Type"
          }
        },
        "required": [
          "index"
        ],
        "title": "ChatCompletionDeltaToolCall",
        "type": "object"
      },
      "ChatCompletionDeltaToolCallFunction": {
        "description": "A single delta representing one partial piece of a \"function\" tool call.\n\nIt can contain part of the arguments, the name of the function, or both.",
        "properties": {
          "arguments": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "A partial chunk of the arguments to call the function with.",
            "title": "Arguments"
          },
          "name": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "A partial chunk of the function name to call.",
            "title": "Name"
          }
        },
        "title": "ChatCompletionDeltaToolCallFunction",
        "type": "object"
      },
      "ChatCompletionResponse": {
        "description": "An OpenAI API compatible schema for a chat completion response object.",
        "examples": [
          {
            "choices": [],
            "created": 11,
            "id": "chat-cmpl-123",
            "model": "my_model",
            "usage": {
              "completion_tokens": 20,
              "prompt_tokens": 10,
              "total_tokens": 30
            }
          }
        ],
        "properties": {
          "choices": {
            "description": "A list of chat completion choices.Can be more than one if n is greater than 1.",
            "items": {
              "$ref": "#/components/schemas/ChatCompletionChoice"
            },
            "title": "Choices",
            "type": "array"
          },
          "created": {
            "description": "The Unix timestamp (in seconds) of when the chat completion was created.",
            "examples": [
              1672342342
            ],
            "title": "Created",
            "type": "integer"
          },
          "id": {
            "description": "A unique identifier for the chat completion.",
            "title": "Id",
            "type": "string"
          },
          "model": {
            "description": "The model used for the chat completion.",
            "title": "Model",
            "type": "string"
          },
          "object": {
            "const": "chat.completion",
            "default": "chat.completion",
            "description": "Object type field which is a constant and preset.",
            "enum": [
              "chat.completion"
            ],
            "title": "Object",
            "type": "string"
          },
          "usage": {
            "allOf": [
              {
                "$ref": "#/components/schemas/UsageStats"
              }
            ],
            "description": "Usage statistics for the request."
          }
        },
        "required": [
          "id",
          "created",
          "model",
          "choices",
          "usage"
        ],
        "title": "ChatCompletionResponse",
        "type": "object"
      },
      "ChatCompletionResponseFormat": {
        "description": "OctoAI chat completion response format.",
        "properties": {
          "schema": {
            "anyOf": [
              {
                "type": "object"
              },
              {
                "type": "null"
              }
            ],
            "description": "The schema for the response format.",
            "title": "Schema"
          },
          "type": {
            "description": "The type of response format.",
            "title": "Type",
            "type": "string"
          }
        },
        "required": [
          "type"
        ],
        "title": "ChatCompletionResponseFormat",
        "type": "object"
      },
      "ChatMessage": {
        "description": "An OpenAI API compatible schema for a single ChatMessage.",
        "properties": {
          "content": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "items": {
                  "discriminator": {
                    "mapping": {
                      "image_url": "#/components/schemas/MultiModalImageMessage",
                      "text": "#/components/schemas/MultiModalTextMessage"
                    },
                    "propertyName": "type"
                  },
                  "oneOf": [
                    {
                      "$ref": "#/components/schemas/MultiModalImageMessage"
                    },
                    {
                      "$ref": "#/components/schemas/MultiModalTextMessage"
                    }
                  ]
                },
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "The contents of the message.",
            "title": "Content"
          },
          "role": {
            "description": "The role of the author of this message.",
            "title": "Role",
            "type": "string"
          },
          "tool_calls": {
            "anyOf": [
              {
                "items": {
                  "$ref": "#/components/schemas/ToolCall"
                },
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "The tool calls generated by the model, such as function calls.",
            "title": "Tool Calls"
          }
        },
        "required": [
          "role",
          "content"
        ],
        "title": "ChatMessage",
        "type": "object"
      },
      "CompletionChoice": {
        "description": "Completion choice for completion requests.",
        "properties": {
          "finish_reason": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/FinishReason"
              },
              {
                "type": "null"
              }
            ],
            "description": "The reason the response finished."
          },
          "index": {
            "description": "A unique identifier for the completion.",
            "title": "Index",
            "type": "integer"
          },
          "logprobs": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/Logprobs"
              },
              {
                "type": "null"
              }
            ],
            "description": "Number of log probabilities to return per output token."
          },
          "text": {
            "description": "The generated text in completion response.",
            "title": "Text",
            "type": "string"
          }
        },
        "required": [
          "index",
          "text"
        ],
        "title": "CompletionChoice",
        "type": "object"
      },
      "CompletionResponse": {
        "description": "Represents a completion response from the API.\nNote: both the streamed and non-streamed response objects\nshare the same shape (unlike the chat endpoint).",
        "examples": [
          {
            "choices": [],
            "created": 11,
            "id": "cmpl-123",
            "model": "my_model",
            "system_fingerprint": "system_fingerprint"
          }
        ],
        "properties": {
          "choices": {
            "description": "A list of completion choices.",
            "items": {
              "$ref": "#/components/schemas/CompletionChoice"
            },
            "title": "Choices",
            "type": "array"
          },
          "created": {
            "description": "The Unix timestamp (in seconds) of when the completion was created.",
            "examples": [
              1672342342
            ],
            "title": "Created",
            "type": "integer"
          },
          "id": {
            "description": "A unique identifier for the completion.",
            "title": "Id",
            "type": "string"
          },
          "model": {
            "description": "The model used for the completion.",
            "title": "Model",
            "type": "string"
          },
          "object": {
            "const": "text_completion",
            "default": "text_completion",
            "description": "Object type field which is a constant and preset.",
            "enum": [
              "text_completion"
            ],
            "title": "Object",
            "type": "string"
          },
          "system_fingerprint": {
            "description": "The system response fingerprint.",
            "title": "System Fingerprint",
            "type": "string"
          },
          "usage": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/UsageStats"
              },
              {
                "type": "null"
              }
            ],
            "description": "Usage statistics for the request."
          }
        },
        "required": [
          "id",
          "choices",
          "created",
          "model",
          "system_fingerprint"
        ],
        "title": "CompletionResponse",
        "type": "object"
      },
      "CreateChatCompletionRequest": {
        "description": "An OpenAI API compatible chat completion request.",
        "examples": [
          {
            "max_tokens": 128,
            "messages": [
              {
                "content": "You are a helpful assistant. Keep your responses limited to one short paragraph if possible.",
                "role": "system"
              },
              {
                "content": "Hello world",
                "role": "user"
              }
            ],
            "model": "llama-2-13b-chat-fp16",
            "temperature": 0.1,
            "top_p": 0.9
          }
        ],
        "properties": {
          "frequency_penalty": {
            "default": 0.0,
            "description": "Penalizes new tokens based on their frequency in the generated text so far.",
            "maximum": 2.0,
            "minimum": -2.0,
            "title": "Frequency Penalty",
            "type": "number"
          },
          "ignore_eos": {
            "default": false,
            "description": "Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.",
            "title": "Ignore Eos",
            "type": "boolean"
          },
          "log_prompt": {
            "default": false,
            "description": "OctoAI stores request prompt if True.",
            "title": "Log Prompt",
            "type": "boolean"
          },
          "logit_bias": {
            "anyOf": [
              {
                "additionalProperties": {
                  "type": "number"
                },
                "type": "object"
              },
              {
                "type": "null"
              }
            ],
            "description": "Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {'50256': -100} to prevent the <|endoftext|> token from being generated.",
            "title": "Logit Bias"
          },
          "loglikelihood": {
            "default": false,
            "description": "Return log probabilities for all prompt tokens excluding the first one from prefill step if True.",
            "title": "Loglikelihood",
            "type": "boolean"
          },
          "logprobs": {
            "default": false,
            "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.",
            "title": "Logprobs",
            "type": "boolean"
          },
          "max_tokens": {
            "default": 512,
            "description": "Maximum number of tokens to generate per output sequence.",
            "minimum": 1.0,
            "title": "Max Tokens",
            "type": "integer"
          },
          "messages": {
            "description": "A list of messages comprising the conversation so far.",
            "items": {
              "$ref": "#/components/schemas/ChatMessage"
            },
            "title": "Messages",
            "type": "array"
          },
          "min_p": {
            "default": 0.0,
            "description": "Sets a dynamic threshold of the top tokens to consider based on the probability value of the most likely next token. Following https://arxiv.org/abs/2407.01082.",
            "maximum": 1.0,
            "minimum": 0.0,
            "title": "Min P",
            "type": "number"
          },
          "model": {
            "description": "The identifier of the model to use. Can be a shared tenancy or custom model identifier.",
            "title": "Model",
            "type": "string"
          },
          "n": {
            "default": 1,
            "description": "Number of output sequences to return.",
            "minimum": 1.0,
            "title": "N",
            "type": "integer"
          },
          "peft": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Parameter-efficient fine-tuning ID.",
            "examples": [
              "example_peft_id"
            ],
            "title": "PEFT"
          },
          "presence_penalty": {
            "default": 0.0,
            "description": "Penalizes new tokens based on whether they appear in the generated text so far.",
            "maximum": 2.0,
            "minimum": -2.0,
            "title": "Presence Penalty",
            "type": "number"
          },
          "repetition_penalty": {
            "default": 1.0,
            "description": "Controls the likelihood of the model generating repeated texts.",
            "exclusiveMinimum": 0.0,
            "title": "Repetition Penalty",
            "type": "number"
          },
          "response_format": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/ChatCompletionResponseFormat"
              },
              {
                "type": "null"
              }
            ],
            "description": "Allows specification of a response format and associated schema that will constrain the LLM output to that structure. For example, using the `json_object` type allows you to provide a desired json schema for the output to follow."
          },
          "stop": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "items": {
                  "type": "string"
                },
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "Generation stop condition.",
            "title": "Stop"
          },
          "stream": {
            "default": false,
            "description": "If set, tokens will be streamed incrementally to users. One or more tokens will be sent as server-sent events (SSE) as they become available. For more information on read above.",
            "title": "Stream",
            "type": "boolean"
          },
          "stream_options": {
            "allOf": [
              {
                "$ref": "#/components/schemas/StreamOptions"
              }
            ],
            "description": "If set, usageStats will be streamed on the last content-containing chunk."
          },
          "temperature": {
            "default": 1.0,
            "description": "Controls the randomness of the sampling.",
            "maximum": 2.0,
            "minimum": 0.0,
            "title": "Temperature",
            "type": "number"
          },
          "tool_choice": {
            "anyOf": [
              {
                "const": "auto",
                "enum": [
                  "auto"
                ],
                "type": "string"
              },
              {
                "const": "required",
                "enum": [
                  "required"
                ],
                "type": "string"
              },
              {
                "$ref": "#/components/schemas/FunctionChoice"
              },
              {
                "type": "null"
              }
            ],
            "default": "auto",
            "description": "Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {\"type\": \"function\", \"function\": {\"name\": \"my_function\"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.",
            "title": "Tool Choice"
          },
          "tools": {
            "anyOf": [
              {
                "items": {
                  "$ref": "#/components/schemas/ToolDefinition"
                },
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.",
            "title": "Tools"
          },
          "top_logprobs": {
            "default": 0,
            "description": "An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.",
            "title": "Top Logprobs",
            "type": "integer"
          },
          "top_p": {
            "default": 1.0,
            "description": "Controls the cumulative probability of the top tokens to consider.",
            "exclusiveMinimum": 0.0,
            "maximum": 1.0,
            "title": "Top P",
            "type": "number"
          },
          "user": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "A unique identifier representing your end-user.",
            "title": "User"
          }
        },
        "required": [
          "model",
          "messages"
        ],
        "title": "CreateChatCompletionRequest",
        "type": "object"
      },
      "CreateCompletionRequest": {
        "description": "`POST https://text.octoai.run/v1/completions`\n\nCompletes the provided prefix prompt given the parameters.\n\nYou can view this as the \"raw\" API to the model which provides\nmore complete user control than the chat completions API.\n\nThis API is compatible with OpenAI's API.",
        "properties": {
          "best_of": {
            "default": 1,
            "description": "Number of sequences that are generated from the prompt.`best_of` must be greater than or equal to `n`.",
            "title": "Best Of",
            "type": "integer"
          },
          "echo": {
            "default": false,
            "description": "Echo back the prompt in addition to the completion.",
            "title": "Echo",
            "type": "boolean"
          },
          "frequency_penalty": {
            "default": 0.0,
            "description": "Penalizes new tokens based on their frequency in the generated text so far.",
            "maximum": 2.0,
            "minimum": -2.0,
            "title": "Frequency Penalty",
            "type": "number"
          },
          "ignore_eos": {
            "default": false,
            "description": "Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.",
            "title": "Ignore Eos",
            "type": "boolean"
          },
          "log_prompt": {
            "default": false,
            "description": "OctoAI stores request prompt if True.",
            "title": "Log Prompt",
            "type": "boolean"
          },
          "logit_bias": {
            "anyOf": [
              {
                "additionalProperties": {
                  "type": "number"
                },
                "type": "object"
              },
              {
                "type": "null"
              }
            ],
            "description": "Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {'50256': -100} to prevent the <|endoftext|> token from being generated.",
            "title": "Logit Bias"
          },
          "loglikelihood": {
            "default": false,
            "description": "Return log probabilities for all prompt tokens excluding the first one from prefill step if True.",
            "title": "Loglikelihood",
            "type": "boolean"
          },
          "logprobs": {
            "anyOf": [
              {
                "maximum": 5.0,
                "minimum": 0.0,
                "type": "integer"
              },
              {
                "type": "null"
              }
            ],
            "description": "Number of log probabilities to return per output token.",
            "title": "Logprobs"
          },
          "max_tokens": {
            "default": 512,
            "description": "Maximum number of tokens to generate per output sequence.",
            "minimum": 1.0,
            "title": "Max Tokens",
            "type": "integer"
          },
          "min_p": {
            "default": 0.0,
            "description": "Sets a dynamic threshold of the top tokens to consider based on the probability value of the most likely next token. Following https://arxiv.org/abs/2407.01082.",
            "maximum": 1.0,
            "minimum": 0.0,
            "title": "Min P",
            "type": "number"
          },
          "model": {
            "description": "The identifier of the model to use. Can be a shared tenancy or custom model identifier.",
            "title": "Model",
            "type": "string"
          },
          "n": {
            "default": 1,
            "description": "Number of output sequences to return.",
            "minimum": 1.0,
            "title": "N",
            "type": "integer"
          },
          "peft": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Parameter-efficient fine-tuning ID.",
            "examples": [
              "example_peft_id"
            ],
            "title": "PEFT"
          },
          "presence_penalty": {
            "default": 0.0,
            "description": "Penalizes new tokens based on whether they appear in the generated text so far.",
            "maximum": 2.0,
            "minimum": -2.0,
            "title": "Presence Penalty",
            "type": "number"
          },
          "prompt": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "items": {
                  "type": "string"
                },
                "type": "array"
              },
              {
                "items": {
                  "type": "integer"
                },
                "type": "array"
              },
              {
                "items": {
                  "items": {
                    "type": "integer"
                  },
                  "type": "array"
                },
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "The prompt to generate completions from.",
            "title": "Prompt"
          },
          "repetition_penalty": {
            "default": 1.0,
            "description": "Controls the likelihood of the model generating repeated texts.",
            "exclusiveMinimum": 0.0,
            "title": "Repetition Penalty",
            "type": "number"
          },
          "seed": {
            "default": 0,
            "description": "If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.",
            "title": "Seed",
            "type": "integer"
          },
          "stop": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "items": {
                  "type": "string"
                },
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "Generation stop condition.",
            "title": "Stop"
          },
          "stream": {
            "default": false,
            "description": "If set, tokens will be streamed incrementally to users. One or more tokens will be sent as server-sent events (SSE) as they become available. For more information on read above.",
            "title": "Stream",
            "type": "boolean"
          },
          "stream_options": {
            "allOf": [
              {
                "$ref": "#/components/schemas/StreamOptions"
              }
            ],
            "description": "If set, usageStats will be streamed on the last content-containing chunk."
          },
          "suffix": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "The suffix that comes after a completion of inserted text.",
            "title": "Suffix"
          },
          "temperature": {
            "default": 1.0,
            "description": "Controls the randomness of the sampling.",
            "maximum": 2.0,
            "minimum": 0.0,
            "title": "Temperature",
            "type": "number"
          },
          "top_p": {
            "default": 1.0,
            "description": "Controls the cumulative probability of the top tokens to consider.",
            "exclusiveMinimum": 0.0,
            "maximum": 1.0,
            "title": "Top P",
            "type": "number"
          },
          "user": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "A unique identifier representing your end-user.",
            "title": "User"
          }
        },
        "required": [
          "model",
          "prompt"
        ],
        "title": "CreateCompletionRequest",
        "type": "object"
      },
      "ErrorResponse": {
        "description": "An OpenAI API compatible schema for a error response.",
        "properties": {
          "code": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Optional error code.",
            "title": "Code"
          },
          "message": {
            "description": "A human-readable error message.",
            "title": "Message",
            "type": "string"
          },
          "object": {
            "const": "error",
            "default": "error",
            "description": "The object type, always error.",
            "enum": [
              "error"
            ],
            "title": "Object",
            "type": "string"
          },
          "param": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Optional error parameters.",
            "title": "Param"
          },
          "type": {
            "description": "A type identifier for the error.",
            "title": "Type",
            "type": "string"
          },
          "validation_errors": {
            "anyOf": [
              {
                "additionalProperties": {
                  "anyOf": [
                    {
                      "type": "string"
                    },
                    {
                      "items": {
                        "type": "string"
                      },
                      "type": "array"
                    }
                  ]
                },
                "type": "object"
              },
              {
                "type": "null"
              }
            ],
            "description": "Validation error in API.",
            "title": "Validation Errors"
          }
        },
        "required": [
          "message",
          "type"
        ],
        "title": "ErrorResponse",
        "type": "object"
      },
      "FinishReason": {
        "description": "The reason why the model stopped generating tokens.\n\nThis will be `stop` if the model naturally completed generation or encountered a\nprovided stop sequence, `length` if the maximum number of tokens specified in the\nrequest was reached, content_filter if content was omitted due to a flag from our\ncontent filters, tool_calls if the model called a tool, or function_call\n(deprecated) if the model called a function.",
        "enum": [
          "stop",
          "length",
          "tool_calls",
          "content_filter",
          "function_call",
          "cancelled"
        ],
        "title": "FinishReason",
        "type": "string"
      },
      "FunctionCall": {
        "description": "The representation of a function called during tool use.",
        "properties": {
          "arguments": {
            "description": "The arguments to the function call.",
            "title": "Arguments",
            "type": "string"
          },
          "name": {
            "description": "The name of the function to call.",
            "title": "Name",
            "type": "string"
          }
        },
        "required": [
          "name",
          "arguments"
        ],
        "title": "FunctionCall",
        "type": "object"
      },
      "FunctionChoice": {
        "description": "The choice of function to call.",
        "properties": {
          "function": {
            "$ref": "#/components/schemas/FunctionHandle"
          },
          "type": {
            "const": "function",
            "default": "function",
            "description": "The type of tool. Today only `function` is supported.",
            "enum": [
              "function"
            ],
            "title": "Type",
            "type": "string"
          }
        },
        "required": [
          "function"
        ],
        "title": "FunctionChoice",
        "type": "object"
      },
      "FunctionDefinition": {
        "description": "A definition of a function to call in tool use.",
        "properties": {
          "description": {
            "default": "",
            "description": "A description of what the function does. This can used by the model to choose when and how to call the function.",
            "title": "Description",
            "type": "string"
          },
          "name": {
            "description": "The name of the function to be defined.",
            "title": "Name",
            "type": "string"
          },
          "parameters": {
            "description": "The parameters the function accepts described as a JSON Schema.",
            "title": "Parameters",
            "type": "object"
          }
        },
        "required": [
          "name"
        ],
        "title": "FunctionDefinition",
        "type": "object"
      },
      "FunctionHandle": {
        "description": "The name of the function to call.",
        "properties": {
          "name": {
            "description": "The name of the function that was called.",
            "title": "Name",
            "type": "string"
          }
        },
        "required": [
          "name"
        ],
        "title": "FunctionHandle",
        "type": "object"
      },
      "HTTPValidationError": {
        "description": "HTTP validation error in API.",
        "properties": {
          "detail": {
            "description": "Detail of the http validation error.",
            "items": {
              "$ref": "#/components/schemas/ValidationError"
            },
            "title": "Detail",
            "type": "array"
          }
        },
        "title": "HTTPValidationError",
        "type": "object"
      },
      "ImageURL": {
        "description": "Image URL for multi modal messages.",
        "properties": {
          "detail": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Specifies the detail level of the image.",
            "title": "Detail"
          },
          "url": {
            "description": "Either a URL of the image or base64 encoded image data.",
            "title": "Url",
            "type": "string"
          }
        },
        "required": [
          "url"
        ],
        "title": "ImageURL",
        "type": "object"
      },
      "Logprobs": {
        "description": "An OpenAI API compatible schema for logprobs output.\nSee details in `https://platform.openai.com/docs/api-reference/chat/object#chat-create-logprobs`.",
        "properties": {
          "content": {
            "description": "List of logprobs info.",
            "items": {
              "anyOf": [
                {
                  "$ref": "#/components/schemas/LogprobsContent"
                },
                {
                  "type": "null"
                }
              ]
            },
            "title": "Content",
            "type": "array"
          }
        },
        "title": "Logprobs",
        "type": "object"
      },
      "LogprobsContent": {
        "description": "An OpenAI API compatible schema for logprobs output.",
        "properties": {
          "bytes": {
            "anyOf": [
              {
                "items": {},
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.",
            "title": "Bytes"
          },
          "logprob": {
            "description": "Logprob corresponding to the token.",
            "title": "Logprob",
            "type": "number"
          },
          "token": {
            "description": "New generated token or token from prompt for loglikelihood case.",
            "title": "Token",
            "type": "string"
          },
          "top_logprobs": {
            "description": "List of top tokens info.",
            "items": {
              "$ref": "#/components/schemas/TopLogprobs"
            },
            "title": "Top Logprobs",
            "type": "array"
          }
        },
        "required": [
          "token",
          "logprob"
        ],
        "title": "LogprobsContent",
        "type": "object"
      },
      "MultiModalImageMessage": {
        "description": "Multi modal image message.",
        "properties": {
          "image_url": {
            "allOf": [
              {
                "$ref": "#/components/schemas/ImageURL"
              }
            ],
            "description": "The image URL which could be either a URL or base64 encoded image data."
          },
          "type": {
            "const": "image_url",
            "default": "image_url",
            "description": "The type of the content part.",
            "enum": [
              "image_url"
            ],
            "title": "Type",
            "type": "string"
          }
        },
        "required": [
          "image_url"
        ],
        "title": "MultiModalImageMessage",
        "type": "object"
      },
      "MultiModalTextMessage": {
        "description": "Multi modal text message.",
        "properties": {
          "text": {
            "description": "The text content of an multimodal message.",
            "title": "Text",
            "type": "string"
          },
          "type": {
            "const": "text",
            "default": "text",
            "description": "The type of the content part.",
            "enum": [
              "text"
            ],
            "title": "Type",
            "type": "string"
          }
        },
        "required": [
          "text"
        ],
        "title": "MultiModalTextMessage",
        "type": "object"
      },
      "StreamOptions": {
        "description": "Streaming options for the request.",
        "properties": {
          "include_usage": {
            "default": false,
            "description": "Whether or not to include token usage stats on the final chunk before the [Done] message.",
            "title": "Include Usage",
            "type": "boolean"
          }
        },
        "title": "StreamOptions",
        "type": "object"
      },
      "ToolCall": {
        "description": "An OpenAI API compatible schema for a tool invocation.",
        "properties": {
          "function": {
            "allOf": [
              {
                "$ref": "#/components/schemas/FunctionCall"
              }
            ],
            "description": "The function called by the model."
          },
          "id": {
            "description": "A unique ID for the tool call used to reference it in the response.",
            "title": "Id",
            "type": "string"
          },
          "type": {
            "const": "function",
            "default": "function",
            "description": "The type of tool. Today only `function` is supported.",
            "enum": [
              "function"
            ],
            "title": "Type",
            "type": "string"
          }
        },
        "required": [
          "function"
        ],
        "title": "ToolCall",
        "type": "object"
      },
      "ToolDefinition": {
        "description": "A definition of a tool that can be used by the model.\n\nNote: currently can only be a function.\n\nContains the JSON schema definition of the function including\nits parameters, return type, and description.",
        "properties": {
          "function": {
            "allOf": [
              {
                "$ref": "#/components/schemas/FunctionDefinition"
              }
            ],
            "description": "The function definition."
          },
          "type": {
            "const": "function",
            "default": "function",
            "description": "The type of tool. Today only `function` is supported.",
            "enum": [
              "function"
            ],
            "title": "Type",
            "type": "string"
          }
        },
        "required": [
          "function"
        ],
        "title": "ToolDefinition",
        "type": "object"
      },
      "TopLogprobs": {
        "description": "An OpenAI API compatible schema for logprobs output.",
        "properties": {
          "bytes": {
            "anyOf": [
              {
                "items": {},
                "type": "array"
              },
              {
                "type": "null"
              }
            ],
            "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.",
            "title": "Bytes"
          },
          "logprob": {
            "description": "Logprob corresponding to the top token.",
            "title": "Logprob",
            "type": "number"
          },
          "token": {
            "description": "Token from the top list.",
            "title": "Token",
            "type": "string"
          }
        },
        "required": [
          "token",
          "logprob"
        ],
        "title": "TopLogprobs",
        "type": "object"
      },
      "UsageStats": {
        "description": "The token usage statistics for a request.",
        "properties": {
          "completion_tokens": {
            "description": "Number of tokens in the prompt.",
            "title": "Completion Tokens",
            "type": "integer"
          },
          "prompt_tokens": {
            "description": "Number of tokens in the prompt.",
            "title": "Prompt Tokens",
            "type": "integer"
          },
          "total_tokens": {
            "description": "Total number of tokens used in the request (prompt + completion).",
            "title": "Total Tokens",
            "type": "integer"
          }
        },
        "required": [
          "prompt_tokens",
          "completion_tokens",
          "total_tokens"
        ],
        "title": "UsageStats",
        "type": "object"
      },
      "ValidationError": {
        "description": "Validation error in API.",
        "properties": {
          "loc": {
            "description": "Error location in ValidationError.",
            "items": {
              "anyOf": [
                {
                  "type": "string"
                },
                {
                  "type": "integer"
                }
              ]
            },
            "title": "Location",
            "type": "array"
          },
          "msg": {
            "description": "Error message in ValidationError.",
            "title": "Message",
            "type": "string"
          },
          "type": {
            "description": "Error type in ValidationError.",
            "title": "Error Type",
            "type": "string"
          }
        },
        "required": [
          "loc",
          "msg",
          "type"
        ],
        "title": "ValidationError",
        "type": "object"
      }
    }
  },
  "info": {
    "description": "Text Generation API.",
    "title": "ollm-api",
    "version": "0.1"
  },
  "openapi": "3.1.0",
  "paths": {
    "/v1/chat/completions": {
      "post": {
        "description": "Create a Chat Completion.",
        "operationId": "create_chat_completion_v1_chat_completions_post",
        "requestBody": {
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/CreateChatCompletionRequest"
              }
            }
          },
          "required": true
        },
        "responses": {
          "200": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ChatCompletionResponse",
                  "anyOf": [
                    {
                      "$ref": "#/components/schemas/ChatCompletionResponse"
                    },
                    {
                      "$ref": "#/components/schemas/ChatCompletionChunk"
                    }
                  ],
                  "title": "Response 200 Create Chat Completion V1 Chat Completions Post"
                }
              },
              "text/event-stream": {
                "schema": {
                  "$ref": "#/components/schemas/ChatCompletionChunk"
                }
              }
            },
            "description": "An OctoAI text endpoint can be requested in either a synchronous orstreaming mode.When the request body has `stream: False` set the content type will be`application/json`. When the request body has `stream: True` set thecontent type will be `text-event-stream` and will respond with a stream of[server-send-events (SSE)](https://en.wikipedia.org/wiki/Server-sent_events)."
          },
          "422": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/HTTPValidationError"
                }
              }
            },
            "description": "Validation Error"
          },
          "500": {
            "content": {
              "application/json": {
                "example": {
                  "message": "An example error",
                  "object": "error",
                  "type": "internal_error"
                },
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Internal Server Error"
          }
        },
        "summary": "Create Chat Completion",
        "tags": [
          "text"
        ],
        "x-fern-streaming": {
          "response": {
            "$ref": "#/components/schemas/ChatCompletionResponse"
          },
          "response-stream": {
            "$ref": "#/components/schemas/ChatCompletionChunk"
          },
          "stream-condition": "$request.stream"
        }
      }
    },
    "/v1/completions": {
      "post": {
        "description": "Create a Completion.",
        "operationId": "create_completion_v1_completions_post",
        "requestBody": {
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/CreateCompletionRequest"
              }
            }
          },
          "required": true
        },
        "responses": {
          "200": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CompletionResponse"
                }
              },
              "text/event-stream": {
                "schema": {
                  "$ref": "#/components/schemas/CompletionResponse"
                }
              }
            },
            "description": "An OctoAI text endpoint can be requested in either a synchronous orstreaming mode.When the request body has `stream: False` set the content type will be`application/json`. When the request body has `stream: True` set thecontent type will be `text-event-stream` and will respond with a stream of[server-send-events (SSE)](https://en.wikipedia.org/wiki/Server-sent_events)."
          },
          "422": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/HTTPValidationError"
                }
              }
            },
            "description": "Validation Error"
          },
          "500": {
            "content": {
              "application/json": {
                "example": {
                  "message": "An example error",
                  "object": "error",
                  "type": "internal_error"
                },
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Internal Server Error"
          }
        },
        "summary": "Create Completion",
        "tags": [
          "text"
        ],
        "x-fern-streaming": {
          "response": {
            "$ref": "#/components/schemas/CompletionResponse"
          },
          "response-stream": {
            "$ref": "#/components/schemas/CompletionResponse"
          },
          "stream-condition": "$request.stream"
        }
      }
    }
  },
  "servers": [
    {
      "url": "https://text.octoai.run"
    }
  ]
}