Skip to content

Commit

Permalink
Update example
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin committed Jul 18, 2024
1 parent ab3dad3 commit be6eef2
Showing 1 changed file with 1 addition and 8 deletions.
9 changes: 1 addition & 8 deletions example_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,10 @@
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

MAX_SEQUENCE_LENGTH = 2048
ds = load_dataset("mgoin/ultrachat_2k", split="train_sft").select(range(512))
def preprocess(example):
example = tokenizer.apply_chat_template(example["messages"], tokenize=False)
return tokenizer(
example,
padding=False,
max_length=MAX_SEQUENCE_LENGTH,
truncation=True,
add_special_tokens=False,
)
return tokenizer(example, max_length=2048, truncation=True, add_special_tokens=False)
ds = ds.map(preprocess, remove_columns=ds.column_names)

quantize_config = BaseQuantizeConfig(quant_method="fp8", activation_scheme="static")
Expand Down

0 comments on commit be6eef2

Please sign in to comment.