Skip to content

Commit

Permalink
small refactor; remove unused var
Browse files Browse the repository at this point in the history
  • Loading branch information
vwxyzjn committed Jan 14, 2024
1 parent 2166b4f commit 90c75f0
Showing 1 changed file with 8 additions and 9 deletions.
17 changes: 8 additions & 9 deletions lm_human_preference_details/summarize/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,13 @@ def evaluate(args: Args, accelerator, tokenizer, model, dataloader):
args.local_batch_size = args.local_micro_batch_size * args.gradient_accumulation_steps
args.micro_batch_size = int(args.local_micro_batch_size * args.world_size)
args.batch_size = int(args.local_batch_size * args.world_size)
tokenizer = AutoTokenizer.from_pretrained(
args.base_model,
padding_side="right",
trust_remote_code=True,
)
# we use the padding token manually but do not resize the token embedding of the model
tokenizer.add_special_tokens({"pad_token": "[PAD]"})

# load dataset
dataset = load_dataset(args.label_dataset, split="train")
Expand All @@ -288,7 +295,6 @@ def evaluate(args: Args, accelerator, tokenizer, model, dataloader):
],
)
dataloader = DataLoader(dataset, batch_size=args.local_micro_batch_size)
eval_datasets = []
eval_dataloaders = {}
for split in ["validation", "validation_cnndm"]:
validation_dataset = load_dataset(args.label_dataset, split=split).flatten()
Expand All @@ -309,7 +315,6 @@ def evaluate(args: Args, accelerator, tokenizer, model, dataloader):
"policies",
],
)
eval_datasets.append(validation_dataset)
eval_dataloaders[split] = DataLoader(validation_dataset, batch_size=args.local_eval_batch_size)
accelerator.print("The number of samples in validation_dataset", len(validation_dataset))
accelerator.print("The number of samples in dataset", len(dataset))
Expand Down Expand Up @@ -345,13 +350,7 @@ def evaluate(args: Args, accelerator, tokenizer, model, dataloader):
np.random.seed(local_seed)
torch.manual_seed(local_seed)
torch.backends.cudnn.deterministic = True
tokenizer = AutoTokenizer.from_pretrained(
args.base_model,
padding_side="right",
trust_remote_code=True,
)
# we use the padding token manually but do not resize the token embedding of the model
tokenizer.add_special_tokens({"pad_token": "[PAD]"})

model_config = AutoConfig.from_pretrained(args.base_model)
configure_dropout(model_config, args.dropout_layer_keys, 0.0) # disable dropout
scalar_model_config = ScalarModelConfig(
Expand Down

0 comments on commit 90c75f0

Please sign in to comment.