From 4304c6451ff0c876f08a075dfe0c03570ddb07a2 Mon Sep 17 00:00:00 2001 From: bitbucket-pipelines Date: Tue, 13 Feb 2024 22:48:08 +0000 Subject: [PATCH 1/3] Auto-generated. Updating Vectara public protos. (00b322398b00caecaa9e01ba767f1ca7f452a800). --- admin_apikey.proto | 5 +++++ services.proto | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/admin_apikey.proto b/admin_apikey.proto index c7fe344..6b0aadd 100644 --- a/admin_apikey.proto +++ b/admin_apikey.proto @@ -19,6 +19,8 @@ enum ApiKeyType { API_KEY_TYPE__SERVING = 1; // ApiKey for serving and indexing. Gives access to both query and index data. API_KEY_TYPE__SERVING_INDEXING = 2; + // ApiKey for personal access key. + API_KEY_TYPE__PERSONAL = 3; } // Status of ApiKey. @@ -84,6 +86,9 @@ message ListApiKeysRequest { // use the page key returned in previous response, and all other // fields are ignored. bytes page_key = 2; + // [Optional] Get API keys of these types. + // Default: If not set, API_KEY_TYPE__SERVING and API_KEY_TYPE__SERVING_INDEXING are returned. + repeated ApiKeyType api_key_type = 3; // If set, returns corpus name & id associated with api keys. bool read_corpora_info = 1000; } diff --git a/services.proto b/services.proto index 20e512a..9ab82eb 100644 --- a/services.proto +++ b/services.proto @@ -71,7 +71,7 @@ service QueryService { // A streamed response interface when lower latency is absolutely critical. rpc StreamQuery(com.vectara.serving.BatchQueryRequest) - returns (stream com.vectara.serving.QueryResponsePart) { + returns (stream com.vectara.serving.QueryResponsePart) { } } @@ -216,3 +216,4 @@ service DocumentService { }; } } + From a2be26f594742584be642dc875511ab2a50c3497 Mon Sep 17 00:00:00 2001 From: bitbucket-pipelines Date: Tue, 20 Feb 2024 19:15:45 +0000 Subject: [PATCH 2/3] Auto-generated. Updating Vectara public protos. (33db6fd5d5ed9f1eb26b30a33fa03de430503bed). --- chat.proto | 106 +++++++++++++++++++++++++++++++++++++++++++++++++ services.proto | 49 +++++++++++++++++++++++ serving.proto | 37 +++++++++++++++++ 3 files changed, 192 insertions(+) create mode 100644 chat.proto diff --git a/chat.proto b/chat.proto new file mode 100644 index 0000000..75473cc --- /dev/null +++ b/chat.proto @@ -0,0 +1,106 @@ +syntax = "proto3"; + +import "status.proto"; + +option java_package = "com.vectara.chat"; +option java_outer_classname = "ChatProtos"; + +option go_package = "vectara.com/public/proto/chat"; + +package com.vectara.chat; + +// A turn in a conversation is a single exchange of query and answer. +// A conversation is composed of several turns. +message Turn { + // The ID of the turn. The ID of the first turn in a conversation is the same as the + // ID of the conversation. This is unique within the chat history corpus. + string id = 1; + // The ID of the conversation this turn belongs to. This is the same as the ID of the + // first turn in the conversation. + string conversation_id = 5; + // The query text. + string query = 10; + // The answer text. + string answer = 15; + // Whether this turn is enabled. If a turn is disabled, it will not be used when + // generating answers for subsequent queries in the conversation. + bool enabled = 20; + // The time at which this turn was created. + int64 epoch_secs = 25; +} + +// A chat contains several back-and-forth messages called turns. +message Conversation { + // The ID of the conversation. This is unique within the chat history corpus. + string id = 1; + // The turns comprising this conversation. + repeated Turn turn = 5; +} + +message ListConversationsRequest { + + // Maximum number of conversations to return per page. + uint32 num_results = 5; + + // A key that is passed in to retrieve a specific page of results. + // Leave empty to retrieve the first page. Subsequent page request should + // use the page key returned in previous response, and all other + // fields are ignored. + bytes page_key = 10; +} + +message ListConversationsResponse { + // The first turn in each conversation. + // This doesn't comprise all turns in each conversation; only the first turn of each + // conversation is returned. + repeated Turn conversation = 1; + Status status = 5; + + // A key that is passed in to retrieve a specific page of results. + // Pass this as is in to the next request to retrieve the next page of results. + bytes page_key = 10; +} + +message ReadConversationsRequest { + // The IDs of the conversations to read. Limit: 10 conversations. + repeated string conversation_id = 5; +} + +message ReadConversationsResponse { + repeated Conversation Conversation = 1; + Status status = 5; +} + +message DeleteConversationsRequest { + // The IDs of the conversations to delete. Limit: 1000 conversations. + repeated string conversation_id = 5; +} + +message DeleteConversationsResponse { + Status status = 1; +} + +message DeleteTurnsRequest { + // The ID of the conversations from which to delete turns. + string conversation_id = 5; + // The ID of the turn to start deletion from. All turns in this conversation starting from this + // turn (inclusive) will be deleted. + string turn_id = 10; +} + +message DeleteTurnsResponse { + Status status = 1; +} + +message DisableTurnsRequest { + // The ID of the conversations from which to disable turns. + string conversation_id = 5; + // The ID of the turn to start disabling from. All turns in this conversation starting from this + // turn will be disabled. + string turn_id = 10; +} + +message DisableTurnsResponse { + Status status = 1; +} + diff --git a/services.proto b/services.proto index 9ab82eb..22eba7d 100644 --- a/services.proto +++ b/services.proto @@ -8,6 +8,7 @@ import "admin_metric.proto"; import "admin_security.proto"; import "admin_user.proto"; +import "chat.proto"; import "common.proto"; import "indexing.proto"; import "serving.proto"; @@ -217,3 +218,51 @@ service DocumentService { } } +// Service for working with chat conversations. +service ChatService { + + // List all conversations. + rpc ListConversations(com.vectara.chat.ListConversationsRequest) + returns (com.vectara.chat.ListConversationsResponse) { + option (google.api.http) = { + post: "/v1/list-conversations" + body: "*" + }; + } + + // Read all turns within the passed conversations. + rpc ReadConversations(com.vectara.chat.ReadConversationsRequest) + returns (com.vectara.chat.ReadConversationsResponse) { + option (google.api.http) = { + post: "/v1/read-conversations" + body: "*" + }; + } + + // Delete conversations (including all turns in it). + rpc DeleteConversations(com.vectara.chat.DeleteConversationsRequest) + returns (com.vectara.chat.DeleteConversationsResponse) { + option (google.api.http) = { + post: "/v1/delete-conversations" + body: "*" + }; + } + + // Delete turns. + rpc DeleteTurns(com.vectara.chat.DeleteTurnsRequest) + returns (com.vectara.chat.DeleteTurnsResponse) { + option (google.api.http) = { + post: "/v1/delete-turns" + body: "*" + }; + } + + // Disable turn. The turn will no longer be used in conversations. + rpc DisableTurns(com.vectara.chat.DisableTurnsRequest) + returns (com.vectara.chat.DisableTurnsResponse) { + option (google.api.http) = { + post: "/v1/disable-turns" + body: "*" + }; + } +} diff --git a/serving.proto b/serving.proto index a2063dc..705057e 100644 --- a/serving.proto +++ b/serving.proto @@ -61,6 +61,19 @@ message SummarizationRequest { // the auto-detected language of the incoming query should be used. string response_lang = 20; + + // Vectara manages both system and user roles and prompts for the generative + // LLM out of the box by default. However, Scale customers can override the + // prompt_text via this variable. The prompt_text is in the form of an + // Apache Velocity template. For more details on how to configure the + // prompt_text, see the long-form documentation at + // https://docs.vectara.com/docs/prompts/vectara-prompt-engine + string prompt_text = 200; + + + // If present, the query will be treated as a chat query. + // When using chat, only one summarization request is allowed per query. + ChatRequest chat = 225; } @@ -150,6 +163,27 @@ message QueryRequest { } +// The chat request. +message ChatRequest { + // Whether to store the query/answer pair. + bool store = 5; + + // The conversation id of the chat. + // If empty, a new conversation will be started. + string conversation_id = 15; +} + +message Chat { + // The conversation id of the chat. + string conversation_id = 5; + // The id assigned to this query and answer. + string turn_id = 10; + + + // Any errors when processing the chat request. + Status status = 1000; +} + message Attribute { string name = 5; string value = 10; @@ -164,6 +198,9 @@ message Summary { string lang = 15; + // Populated if chat was requested in the SummaryRequest. + Chat chat = 205; + // Statuses are marked “repeated” for consistency and flexibility. A failed // summary should bubble up into the status code of the entire ResponseSet. repeated Status status = 1000; From 52b069b057a176a26f2debf69715560f9a5b253a Mon Sep 17 00:00:00 2001 From: bitbucket-pipelines Date: Fri, 23 Feb 2024 14:39:58 +0000 Subject: [PATCH 3/3] Auto-generated. Updating Vectara public protos. (3d38692e9fff9e88b11e9b74e9e9244bda8eecba). --- serving.proto | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/serving.proto b/serving.proto index b2d8280..a389b3b 100644 --- a/serving.proto +++ b/serving.proto @@ -61,14 +61,46 @@ message SummarizationRequest { // the auto-detected language of the incoming query should be used. string response_lang = 20; + // Vectara manages both system and user roles and prompts for the generative // LLM out of the box by default. However, Scale customers can override the // prompt_text via this variable. The prompt_text is in the form of an // Apache Velocity template. For more details on how to configure the // prompt_text, see the long-form documentation at // https://docs.vectara.com/docs/prompts/vectara-prompt-engine + // See https://vectara.com/pricing/ for more details on becoming a Scale customer. string prompt_text = 200; + // Debugging the generative prompt is currently a Scale-only feature. + // See https://vectara.com/pricing/ for more details on becoming a Scale customer. + bool debug = 205; + + // Controls the length of the summary. + // This is a rough estimate and not a hard limit: the end summary can be longer or shorter + // than this value. This is currently a Scale-only feature. + // See https://vectara.com/pricing/ for more details on becoming a Scale customer. + uint32 response_chars = 210; + + // Parameters for the summarizer model. These are currently a Scale-only feature. + // See https://vectara.com/pricing/ for more details on becoming a Scale customer. + // WARNING: This is an experimental feature, and breakable at any point with virtually no + // notice. It is meant for experimentation to converge on optimal parameters that can then + // be set in the prompt definitions. + message ModelParams { + optional uint32 max_tokens = 5; + // The sampling temperature to use. Higher values make the summary more random, while lower + // values make it more focused and deterministic. + optional float temperature = 10; + // Higher values penalize new tokens based on their existing frequency in the text so far, + // decreasing the model's likelihood to repeat the same line verbatim. + optional float frequency_penalty = 15; + // Higher values penalize new tokens based on whether they appear in the text so far, + // increasing the model's likelihood to talk about new topics. + optional float presence_penalty = 20; + } + ModelParams model_params = 215; + + // If present, the query will be treated as a chat query. // When using chat, only one summarization request is allowed per query. ChatRequest chat = 225;