-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathquantize_llms_to_gguf (1).py
54 lines (35 loc) · 1.52 KB
/
quantize_llms_to_gguf (1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
"""Quantize LLMs to GGUF.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1kvvWdDKefT_IIZ5fvDku-B569kS486qh
"""
!git clone https://github.com/ggerganov/llama.cpp
!cd llama.cpp && LLAMA_CUBLAS=1 make && pip install -r requirements.txt
from huggingface_hub import snapshot_download
model_name = "Qwen/Qwen1.5-1.8B"
methods = ['q4_k_m']
base_model = "./original_model/"
quantized_path = "./quantized_model/"
snapshot_download(repo_id=model_name, local_dir=base_model , local_dir_use_symlinks=False)
original_model = quantized_path+'/FP16.gguf'
!mkdir ./quantized_model/
!python llama.cpp/convert-hf-to-gguf.py ./original_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf
import os
for m in methods:
qtype = f"{quantized_path}/{m.upper()}.gguf"
os.system("./llama.cpp/quantize "+quantized_path+"/FP16.gguf "+qtype+" "+m)
! ./llama.cpp/main -m ./quantized_model/Q4_K_M.gguf -n 90 --repeat_penalty 1.0 --color -i -r "User:" -f llama.cpp/prompts/chat-with-bob.txt
from huggingface_hub import notebook_login
notebook_login()
from huggingface_hub import HfApi, HfFolder, create_repo, upload_file
model_path = "./quantized_model/Q4_K_M.gguf" # Your model's local path
repo_name = "qwen1.5-llm" # Desired HF Hub repository name
repo_url = create_repo(repo_name, private=False)
api = HfApi()
api.upload_file(
path_or_fileobj=model_path,
path_in_repo="Q4_K_M.gguf",
repo_id="skuma307/qwen1.5-llm",
repo_type="model",
)