diff --git a/.dockerignore b/.dockerignore index b0c8185..23e3147 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,7 +3,7 @@ .github .vscode .gitignore -Dockerfile +*Dockerfile README.md node_modules package-lock.json @@ -11,4 +11,5 @@ pnpm-lock.yaml eslint.config.mjs LICENSE volumes -docker-compose.yaml \ No newline at end of file +docker-compose* +Makefile \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index f96d875..bfeb2ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,8 +3,6 @@ WORKDIR /app COPY . . HEALTHCHECK --interval=300s --timeout=30s --start-period=5s --retries=3 CMD [ "node", "healthy-check.js" ] -# RUN npm install -g pnpm && pnpm install -RUN npm install -g pnpm nodemon && pnpm install +RUN npm install -g pnpm && pnpm install EXPOSE 8000 -# ENTRYPOINT [ "npm", "start" ] -ENTRYPOINT [ "npm", "run", "dev" ] \ No newline at end of file +ENTRYPOINT [ "npm", "start" ] \ No newline at end of file diff --git a/Makefile b/Makefile index 50a90b2..d9490d5 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,7 @@ model-prepare: @mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) $(LANGUAGE_MODEL_URL) @mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) $(EMBEDDING_MODEL_URL) +# normal build & up .PHONY: compose-build compose-build: env model-prepare @docker compose -f docker-compose.yaml build @@ -61,3 +62,17 @@ compose-build: env model-prepare .PHONY: up up: compose-build @docker compose -f docker-compose.yaml up -d + +# dev build & up +.PHONY: compose-build-dev +compose-build-dev: env model-prepare + @docker compose -f docker-compose-dev.yaml build + +.PHONY: dev +dev: env model-prepare + @docker compose -f docker-compose-dev.yaml up -d + +# stop +.PHONY: stop +stop: + docker compose stop \ No newline at end of file diff --git a/README.md b/README.md index 1bb243f..8497328 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,12 @@ make build # if you want to start only this project in docker, please run make start # PLEASE NOTE: make start will automatically run make build first + +# to run a container bind to your local machine volume, run +make dev +# this will do the same thing as `make up` but allows you to make changes and sync with container ``` +**NOTE:** `make dev` Requires Node.js environment installed, or at least have `node_modules` specified in `package.json` installed on your server. Please see [Local Machine](#local-machine) section. ## Lint To start lint your code, simply run @@ -41,4 +46,5 @@ npm run lint ``` ## Monitor -This project got monitor build with swagger-stats, when you got this project running, just go to `:/stats` \ No newline at end of file +This project got monitor build with swagger-stats, when you got this project running, just go to `:/stats`. +For example, [http://localhost:8000/stats](http://localhost:8000/stats) \ No newline at end of file diff --git a/actions/inference.js b/actions/inference.js index 3463936..32258b5 100644 --- a/actions/inference.js +++ b/actions/inference.js @@ -44,6 +44,8 @@ function generateResponseContent(id, object, model, system_fingerprint, stream, return resp; } +const default_stop_keywords = ['### user:'] + export async function chatCompletion(req, res) { const api_key = (req.headers.authorization || '').split('Bearer ').pop(); if(!api_key) { @@ -51,9 +53,15 @@ export async function chatCompletion(req, res) { return; } - const system_fingerprint = generateFingerprint(); - let {messages, ...request_body} = req.body; + let {messages, max_tokens, ...request_body} = req.body; + + // format requests to llamacpp format input request_body.prompt = formatOpenAIContext(messages); + if(max_tokens) request_body.n_predict = max_tokens; + if(!request_body.stop) request_body.stop = [...default_stop_keywords]; + + // extra + const system_fingerprint = generateFingerprint(); const model = request_body.model || process.env.LANGUAGE_MODEL_NAME if(request_body.stream) { diff --git a/devDockerfile b/devDockerfile new file mode 100644 index 0000000..ec3cc1d --- /dev/null +++ b/devDockerfile @@ -0,0 +1,8 @@ +FROM node:20.15.1-slim +WORKDIR /app +COPY . . + +HEALTHCHECK --interval=300s --timeout=30s --start-period=5s --retries=3 CMD [ "node", "healthy-check.js" ] +RUN npm install -g pnpm nodemon && pnpm install +EXPOSE 8000 +ENTRYPOINT [ "npm", "run", "dev" ] \ No newline at end of file diff --git a/docker-compose-dev.yaml b/docker-compose-dev.yaml new file mode 100644 index 0000000..d34616f --- /dev/null +++ b/docker-compose-dev.yaml @@ -0,0 +1,48 @@ +services: + llamacpp: + container_name: ${INFERENCE_ENG} + image: gclub/llama.cpp:${INFERENCE_ENG_VERSION} + restart: always + deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md + resources: + reservations: + cpus: "${NUM_CPU_CORES}" + volumes: + - "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models" + expose: + - ${ENG_ACCESS_PORT} + ports: + - ${INFERENCE_ENG_PORT}:${ENG_ACCESS_PORT} + command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"] + + embedding_eng: + container_name: ${EMBEDDING_ENG} + image: gclub/llama.cpp:${INFERENCE_ENG_VERSION} + restart: always + deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md + resources: + reservations: + cpus: "${NUM_CPU_CORES_EMBEDDING}" + volumes: + - "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models" + expose: + - ${ENG_ACCESS_PORT} + ports: + - ${EMBEDDING_ENG_PORT}:${ENG_ACCESS_PORT} + command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"] + + voyager: + container_name: voyager + restart: always + build: + dockerfile: devDockerfile + context: . + volumes: + - .:/app + expose: + - ${APP_PORT} + ports: + - ${APP_PORT}:${APP_PORT} + depends_on: + - llamacpp + - embedding_eng \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 72c67d6..d87728b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -37,8 +37,6 @@ services: build: dockerfile: Dockerfile context: . - volumes: - - .:/app expose: - ${APP_PORT} ports: