Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Completion improve #13

Merged
merged 3 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
.github
.vscode
.gitignore
Dockerfile
*Dockerfile
README.md
node_modules
package-lock.json
pnpm-lock.yaml
eslint.config.mjs
LICENSE
volumes
docker-compose.yaml
docker-compose*
Makefile
6 changes: 2 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ WORKDIR /app
COPY . .

HEALTHCHECK --interval=300s --timeout=30s --start-period=5s --retries=3 CMD [ "node", "healthy-check.js" ]
# RUN npm install -g pnpm && pnpm install
RUN npm install -g pnpm nodemon && pnpm install
RUN npm install -g pnpm && pnpm install
EXPOSE 8000
# ENTRYPOINT [ "npm", "start" ]
ENTRYPOINT [ "npm", "run", "dev" ]
ENTRYPOINT [ "npm", "start" ]
15 changes: 15 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,25 @@ model-prepare:
@mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) $(LANGUAGE_MODEL_URL)
@mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) $(EMBEDDING_MODEL_URL)

# normal build & up
.PHONY: compose-build
compose-build: env model-prepare
@docker compose -f docker-compose.yaml build

.PHONY: up
up: compose-build
@docker compose -f docker-compose.yaml up -d

# dev build & up
.PHONY: compose-build-dev
compose-build-dev: env model-prepare
@docker compose -f docker-compose-dev.yaml build

.PHONY: dev
dev: env model-prepare
@docker compose -f docker-compose-dev.yaml up -d

# stop
.PHONY: stop
stop:
docker compose stop
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ make build
# if you want to start only this project in docker, please run
make start
# PLEASE NOTE: make start will automatically run make build first

# to run a container bind to your local machine volume, run
make dev
# this will do the same thing as `make up` but allows you to make changes and sync with container
```
**NOTE:** `make dev` Requires Node.js environment installed, or at least have `node_modules` specified in `package.json` installed on your server. Please see [Local Machine](#local-machine) section.

## Lint
To start lint your code, simply run
Expand All @@ -41,4 +46,5 @@ npm run lint
```

## Monitor
This project got monitor build with swagger-stats, when you got this project running, just go to `<Your Server>:<Your Port>/stats`
This project got monitor build with swagger-stats, when you got this project running, just go to `<Your Server>:<Your Port>/stats`.
For example, [http://localhost:8000/stats](http://localhost:8000/stats)
12 changes: 10 additions & 2 deletions actions/inference.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,24 @@ function generateResponseContent(id, object, model, system_fingerprint, stream,
return resp;
}

const default_stop_keywords = ['### user:']

export async function chatCompletion(req, res) {
const api_key = (req.headers.authorization || '').split('Bearer ').pop();
if(!api_key) {
res.status(401).send('Not Authorized');
return;
}

const system_fingerprint = generateFingerprint();
let {messages, ...request_body} = req.body;
let {messages, max_tokens, ...request_body} = req.body;

// format requests to llamacpp format input
request_body.prompt = formatOpenAIContext(messages);
if(max_tokens) request_body.n_predict = max_tokens;
if(!request_body.stop) request_body.stop = [...default_stop_keywords];

// extra
const system_fingerprint = generateFingerprint();
const model = request_body.model || process.env.LANGUAGE_MODEL_NAME

if(request_body.stream) {
Expand Down
8 changes: 8 additions & 0 deletions devDockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM node:20.15.1-slim
WORKDIR /app
COPY . .

HEALTHCHECK --interval=300s --timeout=30s --start-period=5s --retries=3 CMD [ "node", "healthy-check.js" ]
RUN npm install -g pnpm nodemon && pnpm install
EXPOSE 8000
ENTRYPOINT [ "npm", "run", "dev" ]
48 changes: 48 additions & 0 deletions docker-compose-dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
services:
llamacpp:
container_name: ${INFERENCE_ENG}
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: always
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models"
expose:
- ${ENG_ACCESS_PORT}
ports:
- ${INFERENCE_ENG_PORT}:${ENG_ACCESS_PORT}
command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"]

embedding_eng:
container_name: ${EMBEDDING_ENG}
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: always
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES_EMBEDDING}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models"
expose:
- ${ENG_ACCESS_PORT}
ports:
- ${EMBEDDING_ENG_PORT}:${ENG_ACCESS_PORT}
command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"]

voyager:
container_name: voyager
restart: always
build:
dockerfile: devDockerfile
context: .
volumes:
- .:/app
expose:
- ${APP_PORT}
ports:
- ${APP_PORT}:${APP_PORT}
depends_on:
- llamacpp
- embedding_eng
2 changes: 0 additions & 2 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ services:
build:
dockerfile: Dockerfile
context: .
volumes:
- .:/app
expose:
- ${APP_PORT}
ports:
Expand Down
Loading