From a74623e936eca7b20ddeb5b14496128bcb19d6f1 Mon Sep 17 00:00:00 2001 From: NguyenNguyen205 <92852929+NguyenNguyen205@users.noreply.github.com> Date: Sun, 8 Dec 2024 08:45:19 +0700 Subject: [PATCH] API for /v1/files (#101) * Add tokens count for chat completion Signed-off-by: NguyenNguyen205 * Set up files APIs Signed-off-by: NguyenNguyen205 * Add check file format on endpoint Signed-off-by: NguyenNguyen205 * Add get all endpoints, save data to lance db, modify npm run dev Signed-off-by: NguyenNguyen205 * Add documentation for 2 endpoints Signed-off-by: NguyenNguyen205 * Add code commenting and fix small bugs Signed-off-by: NguyenNguyen205 --------- Signed-off-by: NguyenNguyen205 --- actions/file.js | 95 ++++++++++ database/file-handling.js | 61 +++++++ database/index.js | 11 +- database/types.js | 3 +- package.json | 6 +- routes/file.js | 13 ++ routes/index.js | 2 + swagger.json | 347 +++++++++++++++++++++++++++++++------ tools/enabledApiDecoder.js | 8 +- 9 files changed, 485 insertions(+), 61 deletions(-) create mode 100644 actions/file.js create mode 100644 database/file-handling.js create mode 100644 routes/file.js diff --git a/actions/file.js b/actions/file.js new file mode 100644 index 0000000..2c8bc51 --- /dev/null +++ b/actions/file.js @@ -0,0 +1,95 @@ +// coding=utf-8 + +import { randomUUID } from "crypto"; +import { extractAPIKeyFromRequest, validateAPIKey } from "../tools/apiKey.js"; +import * as fs from 'fs'; +import { getAllFilesData, loadFileToDatabase } from "../database/file-handling.js"; + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * function for upload a file + * @param {Request} req + * @param {Response} res + */ + +export async function uploadFile(req, res) { + if (!validateAPIKey(extractAPIKeyFromRequest(req))) { + res.status(401).send("Not Authorized!"); + return; + } + const { file } = req; + if (!file) { + res.status(400).send("Input file not specified"); + return; + } + + // Check file size limit (10MB) + if (file.size / 1000000 > 10) { + res.status(400).send("Only accepting file size smaller than 10MB"); + return; + } + + // Check file format + let acceptedFormat = "application/json" + if (acceptedFormat.localeCompare(file.mimetype) != 0) { + res.status(400).send("File format not supported"); + return; + } + + // Load in database + let resBody = { + "id": randomUUID(), + "bytes": file.size, + "created_at": Date.now(), + "filename": file.originalname, + } + + const result = await loadFileToDatabase(resBody); + if (!result) { + res.status(500).send("Can't save to database"); + return; + } + + // load file + const uploadPath = `files/${file.originalname}`; + fs.writeFileSync(uploadPath, file.buffer, (err) => { + if (err) throw err; + console.log("File has been saved"); + }) + + res.status(200).send(resBody); + return; +} + +/** + * function for get all files metadata + * @param {Request} req + * @param {Response} res + */ + +export async function getAllFiles(req, res) { + if (!validateAPIKey(extractAPIKeyFromRequest(req))) { + res.status(401).send("Not Authorized!"); + return; + } + + let resBody = await getAllFilesData(); + + const completeRes = {}; + completeRes['data'] = resBody; + + res.status(200).send(completeRes); + return; +} \ No newline at end of file diff --git a/database/file-handling.js b/database/file-handling.js new file mode 100644 index 0000000..73a56d9 --- /dev/null +++ b/database/file-handling.js @@ -0,0 +1,61 @@ +// coding=utf-8 + +// Copyright [2024] [SkywardAI] +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { getTable } from "./index.js"; +import { FILE_TABLE } from "./types.js"; + +/** + * @typedef FileMetadataStructure + * @property {String} id Id of the uploaded file + * @property {Number} bytes Size of the uploaded file + * @property {Number} created_at Date of uploaded, measure in miliseconds + * @property {String} filename Name of the file + */ + +/** + * Get all files metadata currently in database + * @returns {Promise} + */ +export async function getAllFilesData() { + const file_table = await getTable(FILE_TABLE); + + let queryResult = await file_table.query().toArray(); + let result = [] + for (let i in queryResult) { + const batch = queryResult[i]; + const mid = { + id: batch.id, + bytes: batch.bytes, + created_at: Number(batch.created_at), + filename: batch.filename + } + result.push(mid) + } + + return result; +} + +/** + * Upload file metadata into database + * @param {FileMetadataStructure} fileData Metadata of the uploaded file + * @returns {Boolean} Success status of storing into database + */ +export async function loadFileToDatabase(fileData) { + const file_table = await getTable(FILE_TABLE); + + await file_table.add([{ id: fileData.id, bytes: fileData.bytes, created_at: fileData.created_at, filename: fileData.filename }]) + + return true; +} diff --git a/database/index.js b/database/index.js index 7e0d1cd..2fad7c0 100644 --- a/database/index.js +++ b/database/index.js @@ -16,11 +16,11 @@ import { connect } from "@lancedb/lancedb"; import { Schema, Field, FixedSizeList, - Float32, Utf8, Int32, + Float32, Utf8, Int32, Int64, // eslint-disable-next-line Table } from "apache-arrow"; -import { API_KEY_TABLE, DATASET_TABLE, SYSTEM_TABLE } from "./types.js"; +import { API_KEY_TABLE, DATASET_TABLE, FILE_TABLE, SYSTEM_TABLE } from "./types.js"; const uri = "/tmp/lancedb/"; const db = await connect(uri); @@ -44,6 +44,13 @@ export async function initDB(force = false) { new Field("api_key", new Utf8()), new Field("usage", new Int32()) ]), open_options); + // create or re-open file table + await db.createEmptyTable(FILE_TABLE, new Schema([ + new Field("id", new Utf8()), + new Field("bytes", new Int32()), + new Field("created_at", new Int64()), + new Field("filename", new Utf8()) + ]), open_options); } /** diff --git a/database/types.js b/database/types.js index 97f975a..6400a4f 100644 --- a/database/types.js +++ b/database/types.js @@ -15,4 +15,5 @@ export const SYSTEM_TABLE = 'system'; export const DATASET_TABLE = 'dataset'; -export const API_KEY_TABLE = 'api_tokens' \ No newline at end of file +export const API_KEY_TABLE = 'api_tokens'; +export const FILE_TABLE = 'files' \ No newline at end of file diff --git a/package.json b/package.json index 9f43f66..edcde55 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "main": "index.js", "scripts": { "start": "node --disable-warning=ExperimentalWarning index.js", - "dev": "nodemon --disable-warning=ExperimentalWarning index.js", + "dev": "nodemon --disable-warning=ExperimentalWarning index.js --ignore files", "lint": "npx eslint .", "build": "npm install && node index.js" }, @@ -25,8 +25,10 @@ "eslint": "^9.8.0", "express": "^4.19.2", "globals": "^15.8.0", + "multer": "^1.4.5-lts.1", + "nodemon": "^3.1.7", "prom-client": "12", "swagger-stats": "^0.99.7", "swagger-ui-express": "^5.0.1" } -} +} \ No newline at end of file diff --git a/routes/file.js b/routes/file.js new file mode 100644 index 0000000..c446ed1 --- /dev/null +++ b/routes/file.js @@ -0,0 +1,13 @@ +import { Router } from "express"; +import { getAllFiles, uploadFile } from "../actions/file.js"; +import multer from "multer"; + +export default function fileRoute() { + const router = Router(); + const upload = multer(); + + router.post('', upload.single('input'), uploadFile); + router.get('', getAllFiles); + + return router; +} \ No newline at end of file diff --git a/routes/index.js b/routes/index.js index 58f5476..c5cf592 100644 --- a/routes/index.js +++ b/routes/index.js @@ -24,6 +24,7 @@ import embeddingRoute from "./embedding.js"; import versionRoute from "./version.js"; import { isRouteEnabled } from "../tools/enabledApiDecoder.js"; import { generateScript } from "../tools/web_embed.js"; +import fileRoute from "./file.js"; function indexRoute() { const router = Router(); @@ -55,6 +56,7 @@ function generateAPIRouters() { // api_router.use('/encoder', encoderRoute()); // api_router.use('/decoder', decoderRoute()); isRouteEnabled("version") && api_router.use('/version', versionRoute()); + isRouteEnabled("file") && api_router.use('/files', fileRoute()); return api_router; } diff --git a/swagger.json b/swagger.json index 9a18c22..506aab4 100644 --- a/swagger.json +++ b/swagger.json @@ -36,6 +36,10 @@ { "name": "Version", "description": "v1 Version APIs" + }, + { + "name": "File", + "description": "v1 File APIs" } ], "paths": { @@ -44,7 +48,7 @@ "tags": [ "Index" ], - "summary" : "Route of this page", + "summary": "Route of this page", "description": "Route to get this page" } }, @@ -61,7 +65,9 @@ "content": { "text/plain; charset=utf-8": { "examples": { - "ok": {"value": "ok"} + "ok": { + "value": "ok" + } } } } @@ -74,13 +80,15 @@ }, "/stats": { "get": { - "tags": ["Index"], - "summary" : "Route to check stats", + "tags": [ + "Index" + ], + "summary": "Route to check stats", "description": "Graphical server stats, Click [here](/stats) to get the page.", "responses": { "200": { "description": "Returns The page of checking stats", - "content":{ + "content": { "text/html": {} } } @@ -89,8 +97,10 @@ }, "/chatbox": { "get": { - "tags": ["Index"], - "summary" : "Route to embed chatbot", + "tags": [ + "Index" + ], + "summary": "Route to embed chatbot", "description": "Please see this [README](https://github.com/SkywardAI/voyager?tab=readme-ov-file#embed-your-chatbot-into-website).", "parameters": [ { @@ -103,7 +113,7 @@ "responses": { "200": { "description": "Returns the load script, usually load with script tag in HTML", - "content":{ + "content": { "application/json; charset=utf-8": {} } } @@ -127,8 +137,14 @@ }, "example": { "messages": [ - { "role": "system", "content": "You are a helpful assistant who helps users solve their questions." }, - { "role": "user", "content": "Hello, tell me more about you!" } + { + "role": "system", + "content": "You are a helpful assistant who helps users solve their questions." + }, + { + "role": "user", + "content": "Hello, tell me more about you!" + } ] } } @@ -141,8 +157,12 @@ "application/json": { "schema": { "oneOf": [ - {"$ref": "#/components/schemas/CompletionResponseStream"}, - {"$ref": "#/components/schemas/CompletionResponseEntire"} + { + "$ref": "#/components/schemas/CompletionResponseStream" + }, + { + "$ref": "#/components/schemas/CompletionResponseEntire" + } ] } } @@ -156,13 +176,17 @@ } }, "security": [ - {"api_key": []} + { + "api_key": [] + } ] } }, "/v1/chat/rag-completions": { "post": { - "tags": ["Chat"], + "tags": [ + "Chat" + ], "summary": "AI chat completion with RAG dataset.", "description": "Start a conversation with given messages and QAs from dataset as context. Please load dataset and pass the correct dataset name to make it work.", "requestBody": { @@ -174,8 +198,14 @@ }, "example": { "messages": [ - { "role": "system", "content": "You are a helpful assistant who helps users solve their questions." }, - { "role": "user", "content": "tell me something interest about massachusetts" } + { + "role": "system", + "content": "You are a helpful assistant who helps users solve their questions." + }, + { + "role": "user", + "content": "tell me something interest about massachusetts" + } ], "dataset_name": "aisuko/squad01-v2" } @@ -196,15 +226,21 @@ "properties": { "context": { "type": "string", - "examples": ["Questioin:In what year did Massachusetts first require children to be educated in schools? Answer:1852"] + "examples": [ + "Questioin:In what year did Massachusetts first require children to be educated in schools? Answer:1852" + ] }, "identifier": { "type": "string", - "examples": ["Private_school"] + "examples": [ + "Private_school" + ] }, "_distance": { "type": "number", - "examples": [0.4810483455657959] + "examples": [ + 0.4810483455657959 + ] } } } @@ -221,13 +257,17 @@ } }, "security": [ - {"api_key": []} + { + "api_key": [] + } ] } }, "/v1/embeddings": { "post": { - "tags": ["Embedding"], + "tags": [ + "Embedding" + ], "summary": "Get embedding of input", "description": "Get the embedding value of given input, in OpenAI format", "requestBody": { @@ -237,17 +277,23 @@ "properties": { "input": { "type": "string", - "examples": ["Hello, world!"] + "examples": [ + "Hello, world!" + ] }, "model": { "type": "string", "description": "You can pass model, but this won't work at current stage", - "examples": ["all-MiniLM-L6-v2"] + "examples": [ + "all-MiniLM-L6-v2" + ] }, "encoding_format": { "type": "string", "description": "You can pass encoding_format, but this won't work as we currently only support float.", - "examples": ["float"] + "examples": [ + "float" + ] } }, "required": [ @@ -270,7 +316,9 @@ "properties": { "object": { "type": "string", - "examples": ["list"] + "examples": [ + "list" + ] }, "data": { "type": "array", @@ -279,13 +327,19 @@ "properties": { "object": { "type": "string", - "examples": ["embedding"] + "examples": [ + "embedding" + ] }, "embedding": { "type": "array", "items": { "type": "number", - "examples": [-0.02184351161122322, 0.049017686396837234, 0.06728602200746536] + "examples": [ + -0.02184351161122322, + 0.049017686396837234, + 0.06728602200746536 + ] }, "description": "The length should be exactly 384 items", "example": [ @@ -300,14 +354,18 @@ }, "index": { "type": "integer", - "examples": [0] + "examples": [ + 0 + ] } } } }, "model": { "type": "string", - "examples": ["all-MiniLM-L6-v2"] + "examples": [ + "all-MiniLM-L6-v2" + ] }, "usage": { "type": "array", @@ -316,11 +374,15 @@ "properties": { "prompt_tokens": { "type": "integer", - "examples": [0] + "examples": [ + 0 + ] }, "total_tokens": { "type": "integer", - "examples": [0] + "examples": [ + 0 + ] } } } @@ -341,13 +403,17 @@ } }, "security": [ - {"api_key": []} + { + "api_key": [] + } ] } }, "/v1/embeddings/dataset": { "post": { - "tags": ["Embedding"], + "tags": [ + "Embedding" + ], "summary": "Loads dataset into database", "description": "Load a dataset from url or directly pass a dataset array. Requires either url or json specified.", "requestBody": { @@ -358,7 +424,7 @@ "required": [ "name" ], - "example":{ + "example": { "name": "aisuko/squad01-v2", "url": "https://datasets-server.huggingface.co/rows?dataset=aisuko%2Fsquad01-v2&config=default&split=validation&offset=0&length=100" } @@ -378,13 +444,17 @@ } }, "security": [ - {"api_key": []} + { + "api_key": [] + } ] } }, "/v1/token/api-key": { "get": { - "tags": [ "Token" ], + "tags": [ + "Token" + ], "summary": "Get api key", "description": "The route to get api key, please keep it yourself.", "responses": { @@ -411,7 +481,9 @@ "get": { "summary": "Get engine versions", "description": "Get version of various engines used by this project", - "tags": ["Version"], + "tags": [ + "Version" + ], "responses": { "200": { "description": "Get a json of versions", @@ -421,7 +493,9 @@ "properties": { "inference_engine_version": { "type": "string", - "examples": ["server--b1-2321a5e"] + "examples": [ + "server--b1-2321a5e" + ] } } } @@ -430,6 +504,142 @@ } } } + }, + "/v1/files": { + "post": { + "summary": "Upload a file", + "description": "Upload a file to the voyager endpoint", + "tags": [ + "File" + ], + "requestBody": { + "required": true, + "content": { + "multipart/form-data": { + "description": "Request body for file upload", + "schema": { + "type": "object", + "properties": { + "input": { + "type": "string", + "format": "binary" + } + } + } + } + } + }, + "responses": { + "200": { + "description": "Upload a file successfully", + "content": { + "application/json": { + "schema": { + "properties": { + "id": { + "type": "string", + "examples": [ + "file_abc123" + ] + }, + "bytes": { + "type": "integer", + "examples": [ + 120001 + ] + }, + "created_at": { + "type": "integer", + "examples": [ + 1677610603 + ] + }, + "filename": { + "type": "string", + "examples": [ + "test.json" + ] + } + } + } + } + } + }, + "400": { + "description": "Incorrect or missing file" + }, + "401": { + "description": "Not authorized" + }, + "500": { + "description": "Server error" + } + }, + "security": [ + { + "api_key": [] + } + ] + }, + "get": { + "summary": "Get all files", + "description": "Get all files metadata available in the table", + "tags": [ + "File" + ], + "responses": { + "200": { + "description": "Get all files metadata successfully", + "content": { + "application/json": { + "schema": { + "properties": { + "data": { + "type": "array", + "items": { + "properties": { + "id": { + "type": "string", + "examples": [ + "file_abc123" + ] + }, + "bytes": { + "type": "number", + "examples": [ + "120001" + ] + }, + "created_at": { + "type": "number", + "examples": [ + "1677610603" + ] + }, + "filename": { + "type": "string", + "examples": [ + "test.json" + ] + } + } + } + } + } + } + } + } + }, + "401": { + "description": "Not authorized" + } + }, + "security": [ + { + "api_key": [] + } + ] + } } }, "components": { @@ -440,11 +650,13 @@ "type": "array", "items": { "type": "object", - "properties":{ + "properties": { "role": { "type": "string", "examples": [ - "system", "user", "assistant" + "system", + "user", + "assistant" ] }, "content": { @@ -459,14 +671,20 @@ }, "max_tokens": { "type": "integer", - "examples": [ 32, 128, 512 ] + "examples": [ + 32, + 128, + 512 + ] }, "end": { "type": "array", "description": "When AI outputs the end pattern, end response.", "items": { "type": "string", - "examples": ["<|user|>"] + "examples": [ + "<|user|>" + ] } }, "stream": { @@ -484,11 +702,13 @@ "type": "array", "items": { "type": "object", - "properties":{ + "properties": { "role": { "type": "string", "examples": [ - "system", "user", "assistant" + "system", + "user", + "assistant" ] }, "content": { @@ -503,14 +723,20 @@ }, "max_tokens": { "type": "integer", - "examples": [ 32, 128, 512 ] + "examples": [ + 32, + 128, + 512 + ] }, "end": { "type": "array", "description": "When AI outputs the end pattern, end response.", "items": { "type": "string", - "examples": ["<|user|>"] + "examples": [ + "<|user|>" + ] } }, "stream": { @@ -534,7 +760,9 @@ "name": { "type": "string", "description": "Dataset name", - "examples": ["aisuko/squad01-v2"] + "examples": [ + "aisuko/squad01-v2" + ] }, "url": { "type": "string", @@ -551,7 +779,7 @@ "identifier": { "type": "string", "description": "Identifier of the row, not necessarily unique", - "examples":[ + "examples": [ "Chloroplast" ] }, @@ -643,7 +871,9 @@ "logprobs": { "type": "number", "nullable": true, - "examples": [null] + "examples": [ + null + ] }, "finish_reason": { "type": "string", @@ -661,15 +891,21 @@ "properties": { "prompt_tokens": { "type": "integer", - "examples":[0] + "examples": [ + 0 + ] }, "completion_tokens": { "type": "integer", - "examples":[0] + "examples": [ + 0 + ] }, "total_tokens": { "type": "integer", - "examples":[0] + "examples": [ + 0 + ] } } } @@ -738,12 +974,15 @@ "logprobs": { "type": "number", "nullable": true, - "examples": [null] + "examples": [ + null + ] }, "finish_reason": { "type": "string", "examples": [ - null, "stop" + null, + "stop" ] } } diff --git a/tools/enabledApiDecoder.js b/tools/enabledApiDecoder.js index 2341ec3..083358e 100644 --- a/tools/enabledApiDecoder.js +++ b/tools/enabledApiDecoder.js @@ -33,6 +33,9 @@ const allow_paths = { }, version: { allowed: false + }, + file: { + allowed: false } } @@ -41,7 +44,8 @@ const allow_indexes = { inference: false, token: false, embedding: false, - version: false + version: false, + file: false } export function decodeEnabledAPIs() { @@ -71,7 +75,7 @@ export function decodeEnabledAPIs() { /** * check if provided route enabled - * @param {"index"|"inference"|"embedding"|"version"|"token"} index_path_name name of route index + * @param {"index"|"inference"|"embedding"|"version"|"token"|"file"} index_path_name name of route index * @param {"docs"|"stats"|"healthy"|"chatbox"|"completions"|"rag"|"calculate"|"dataset"} api_name name of specific api * @returns {Boolean} */