Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin committed May 10, 2024
1 parent 935dd70 commit 332d98a
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions auto_fp8/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ def from_pretrained(
quantize_config: BaseQuantizeConfig,
**model_init_kwargs,
):
"""load un-quantized pretrained model to cpu"""
"""Load the un-quantized pretrained model"""

if not torch.cuda.is_available():
raise EnvironmentError(
"Load pretrained model to do quantization requires CUDA available."
)
# if not torch.cuda.is_available():
# raise EnvironmentError(
# "Load pretrained model to do quantization requires CUDA available."
# )

def skip(*args, **kwargs):
pass
Expand Down Expand Up @@ -88,9 +88,7 @@ def skip(*args, **kwargs):
model.seqlen = model_config[key]
break
else:
print(
"can't get model's sequence length from model config, will set to 2048."
)
print("Can't get model's sequence length, setting to 2048.")
model.seqlen = 2048
model.eval()

Expand Down

0 comments on commit 332d98a

Please sign in to comment.