diff --git a/.github/workflows/build_push_image.yml b/.github/workflows/build_push_image.yml index c38630c..5192dca 100644 --- a/.github/workflows/build_push_image.yml +++ b/.github/workflows/build_push_image.yml @@ -2,7 +2,9 @@ name: "CD: build & push image" on: push: - branches: [build-image] + branches: + - build-push-llama3-image + - build-push-phi3-image workflow_dispatch: env: @@ -12,7 +14,7 @@ env: jobs: deploy: - timeout-minutes: 15 + timeout-minutes: 30 runs-on: group: bulkier steps: @@ -31,13 +33,22 @@ jobs: run: | python -m pip install transformers torch + - name: Set environment variables based on branch + run: | + if [[ "${{ github.ref }}" == "refs/heads/build-push-llama3-image" ]]; then + echo "MODEL_NAME=llama3" >> $GITHUB_ENV + elif [[ "${{ github.ref }}" == "refs/heads/build-push-phi3-image" ]]; then + echo "MODEL_NAME=phi3" >> $GITHUB_ENV + fi + - name: Docker build and push run: | - df -h - IMAGE_NAME="${DOCKER_REPO_NAME}:${TAG_VERSION}" + IMAGE_NAME="${DOCKER_REPO_NAME}-{MODEL_NAME}:${TAG_VERSION}" cd src/compute_horde_prompt_gen - python download_model.py --model_name phi3 --huggingface_token "${{ secrets.HUGGINGFACE_API_KEY }}" + python download_model.py --model_name ${{ env.MODEL_NAME }} --huggingface_token "${{ secrets.HUGGINGFACE_API_KEY }}" docker build -t $IMAGE_NAME . + + docker push $IMAGE_NAME diff --git a/src/compute_horde_prompt_gen/Dockerfile b/src/compute_horde_prompt_gen/Dockerfile index ca7b698..a504b03 100644 --- a/src/compute_horde_prompt_gen/Dockerfile +++ b/src/compute_horde_prompt_gen/Dockerfile @@ -18,7 +18,7 @@ RUN mkdir /output # Copy your Python script into the container COPY saved_models/ /app/saved_models/ -COPY *.py . +COPY *.py ./ # Set the entrypoint to run your script ENTRYPOINT ["python3", "run.py"] diff --git a/src/compute_horde_prompt_gen/download_model.py b/src/compute_horde_prompt_gen/download_model.py index 7516d76..8e062b5 100644 --- a/src/compute_horde_prompt_gen/download_model.py +++ b/src/compute_horde_prompt_gen/download_model.py @@ -31,16 +31,35 @@ default="./saved_models/", help="Path to save the model and tokenizer to", ) + parser.add_argument( + "--quantize", + action="store_true", + help="Quantize the model", + default=False, + ) args = parser.parse_args() save_path = os.path.join(args.save_path, args.model_name) model_name = MODEL_PATHS[args.model_name] print(f"Saving {model_name} model to {save_path}") + quantization_config = None + if args.quantize: + import torch + from transformers import BitsAndBytesConfig + + quantization_config = BitsAndBytesConfig( + llm_int8_enable_fp32_cpu_offload=False, + load_in_4bit=True, + bnb_4bit_compute_dtype=torch.float16, + ) + print("using quantized model") + model = AutoModelForCausalLM.from_pretrained( model_name, # either give token directly or assume logged in with huggingface-cli token=args.huggingface_token or True, + quantization_config=quantization_config, ) model.save_pretrained(save_path) diff --git a/src/compute_horde_prompt_gen/model.py b/src/compute_horde_prompt_gen/model.py index d68b513..43218f3 100644 --- a/src/compute_horde_prompt_gen/model.py +++ b/src/compute_horde_prompt_gen/model.py @@ -15,7 +15,7 @@ def __init__(self): pass def generate(self, prompts: list[str], num_return_sequences: int, **_kwargs): - content = f"Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n" + content = "Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n" return [content for _ in range(len(prompts) * num_return_sequences)] @@ -23,7 +23,6 @@ class GenerativeModel: def __init__(self, model_path: str, quantize: bool = False): self.input_prompt_ending = None - import torch from transformers import ( AutoTokenizer, AutoModelForCausalLM, @@ -31,6 +30,7 @@ def __init__(self, model_path: str, quantize: bool = False): quantization_config = None if quantize: + import torch from transformers import BitsAndBytesConfig quantization_config = BitsAndBytesConfig(