Skip to content

Commit

Permalink
add more helpful comments
Browse files Browse the repository at this point in the history
  • Loading branch information
horheynm committed Jan 14, 2025
1 parent 8e89c3b commit fc35707
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
compressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_FP8_Dynamic_compressed
uncompressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_FP8_Dynamic_uncompressed
compressed_model_stub: nm-testing/TinyLlama_1.1B_Chat_v1.0_FP8_Dynamic_compressed
uncompressed_model_stub: nm-testing/TinyLlama_1.1B_Chat_v1.0_FP8_Dynamic_uncompressed
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
compressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W4A16_G128_compressed
uncompressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W4A16_G128_uncompressed
compressed_model_stub: nm-testing/TinyLlama_1.1B_Chat_v1.0_W4A16_G128_compressed
uncompressed_model_stub: nm-testing/TinyLlama_1.1B_Chat_v1.0_W4A16_G128_uncompressed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cadence: "commit"
test_type: "regression"
compressed_model_stub: nm-testing/TinyLlama_1.1B_Chat_v1.0_W8A16_G128_compressed
uncompressed_model_stub: nm-testing/TinyLlama_1.1B_Chat_v1.0_W8A16_G128_uncompressed

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
compressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W8A8_Dynamic_Per_Token_compressed
uncompressed_model_stub: horheynm/TinyLlama_1.1B_Chat_v1.0_W8A8_Dynamic_Per_Token_uncompressed
compressed_model_stub: nm-testing/TinyLlama_1.1B_Chat_v1.0_W8A8_Dynamic_Per_Token_compressed
uncompressed_model_stub: nm-testing/TinyLlama_1.1B_Chat_v1.0_W8A8_Dynamic_Per_Token_uncompressed
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

@requires_gpu
@parameterized_class(parse_params(COMPRESSED_LINEAR_CONFIG_DIR))
class TestUncompressedDecompressed(unittest.TestCase):
class Test_Decompressed_Linear_Uncompressed_Linear(unittest.TestCase):
"""
Uncompressed-decompressed check
Uncompressed-Linear-forward decompressed-Linear-foward check
Uncompressed: Optimized model saved as run_compressed=False, no need to decompress
Decompressed: Optimized model saved as run_compressed=True, and decompressed using
Expand All @@ -38,18 +38,24 @@ def setUpClass(cls):
cls.test_dir = tempfile.mkdtemp()

quantization_config = CompressedTensorsConfig(run_compressed=False)

# Decompressed using HFQuantizer
# Linear foward
cls.decompressed_model = AutoModelForCausalLM.from_pretrained(
cls.compressed_model_stub,
torch_dtype="auto",
device_map="auto",
quantization_config=quantization_config,
)

# Load model as is at the uncompressed state
# Linear forward
cls.uncompressed_model = AutoModelForCausalLM.from_pretrained(
cls.uncompressed_model_stub,
torch_dtype=cls.decompressed_model.dtype,
device_map=cls.decompressed_model.device,
)
breakpoint()

cls.tokenizer = AutoTokenizer.from_pretrained(cls.compressed_model_stub)

Expand Down Expand Up @@ -94,9 +100,9 @@ def tearDownClass(cls):

@requires_gpu
@parameterized_class(parse_params(COMPRESSED_LINEAR_CONFIG_DIR))
class TestCompressedDecompressed(unittest.TestCase):
class Test_Compressed_CompressedLinear_Decompressed_Linear(unittest.TestCase):
"""
Compressed-decompressed check
Compressed-CompresesdLinear, Decompressed-Linear check
Compressed: Optimized model saved as run_compressed=True, no decompression
Decompressed: Optimized model saved as run_compressed=True, and decompressed using
Expand All @@ -113,13 +119,15 @@ def setUpClass(cls):
cls.test_dir = tempfile.mkdtemp()

# Should have CompressedLinear modules
# Compressed Linear forward
cls.compressed_model = AutoModelForCausalLM.from_pretrained(
cls.compressed_model_stub,
torch_dtype="auto",
device_map="auto",
)

# Should just be linear modules
# Linear forward
quantization_config = CompressedTensorsConfig(run_compressed=False)
cls.decompressed_model = AutoModelForCausalLM.from_pretrained(
cls.compressed_model_stub,
Expand Down

0 comments on commit fc35707

Please sign in to comment.