-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
33 changed files
with
1,342,876 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
''' | ||
Code for implementing 1-PL Item Response Theory (student ability and item difficulty) in Python | ||
''' | ||
import os | ||
import random | ||
import numpy as np | ||
import json | ||
import torch | ||
from torch.utils.data import TensorDataset, DataLoader, random_split | ||
|
||
# setting the seed | ||
def set_seed(seed_val=37): | ||
''' | ||
set random seed for reproducibility | ||
''' | ||
random.seed(seed_val) | ||
np.random.seed(seed_val) | ||
torch.manual_seed(seed_val) | ||
torch.cuda.manual_seed_all(seed_val) | ||
|
||
|
||
def read_dataset(num_items): | ||
data_path = 'IRT_dataset/502_45/IRT_dataset.json' | ||
with open(data_path) as f: | ||
data = json.load(f) | ||
student_ids = [] | ||
outputs = [] | ||
for student_id, student_data in data.items(): | ||
student_ids.append(student_id) | ||
outputs.append(list(student_data.values())[:num_items]) | ||
return student_ids, outputs | ||
|
||
class IRTModel(torch.nn.Module): | ||
def __init__(self, num_students, num_items, load_params=False): | ||
super(IRTModel, self).__init__() | ||
self.num_students = num_students | ||
self.num_items = num_items | ||
self.student_ability = torch.nn.parameter.Parameter(torch.normal(0.0, 0.1, (self.num_students,))) | ||
if not load_params: | ||
self.item_difficulty = torch.nn.parameter.Parameter(torch.normal(0.0, 0.1, (self.num_items,))) | ||
else: | ||
# load the saved parameters and freeze them | ||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | ||
if device == torch.device('cpu'): | ||
self.item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt', map_location=torch.device('cpu')).requires_grad_(False) | ||
else: | ||
self.item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt').requires_grad_(False) | ||
|
||
def forward(self, student_ids, item_ids): | ||
''' | ||
student_ids and item_ids are not of the same size | ||
''' | ||
student_ability = self.student_ability[student_ids] | ||
# broadcase student_ability to the size of item_difficulty | ||
student_ability = student_ability.unsqueeze(1).expand(-1, len(item_ids)) | ||
item_difficulty = self.item_difficulty[item_ids] | ||
predictions = student_ability - item_difficulty | ||
return predictions | ||
|
||
def play_with_model(model): | ||
|
||
# play with the model | ||
student_ids = torch.tensor([0, 1]) | ||
item_ids = torch.tensor([0, 1, 2]) | ||
|
||
predictions = model(student_ids, item_ids) | ||
print('Sample Predictions Shape: ', predictions.shape) | ||
|
||
def get_dataloader(batch_size, student_ids, outputs): | ||
output = torch.tensor(outputs, dtype=torch.float32) | ||
student_ids = torch.tensor(student_ids, dtype=torch.int64) | ||
data = TensorDataset(student_ids, output) | ||
return DataLoader(data, batch_size=batch_size, shuffle=True) | ||
|
||
def get_model_info(num_students, num_questions, load_params=False, verbose=True): | ||
''' | ||
Return IRT model and the optimizers | ||
''' | ||
# select device | ||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | ||
if verbose: | ||
print('Using device:', device) | ||
# read model | ||
model = IRTModel(num_students, num_questions, load_params).to(device) | ||
# loss fucntion | ||
loss_fn = torch.nn.BCEWithLogitsLoss() | ||
# optimizer | ||
optimizer = torch.optim.Adam(model.parameters(), lr = 5e-3) | ||
# number of training epochs | ||
num_epochs = 1000 | ||
return model, loss_fn, optimizer, num_epochs, device | ||
|
||
def train_IRT(item_ids_lst, model, loss_fn, optimizer, num_epochs, device, train_dataloader, verbose=True): | ||
''' | ||
Train the model | ||
''' | ||
item_ids = torch.tensor(item_ids_lst, dtype=torch.int64).to(device) | ||
for epoch in range(num_epochs): | ||
if verbose: | ||
print('Epoch {}/{}'.format(epoch, num_epochs - 1)) | ||
print('-' * 10) | ||
# Each epoch has a training and validation phase | ||
for phase in ['train']: | ||
if phase == 'train': | ||
model.train() # Set model to training mode | ||
# Iterate over data. | ||
for student_ids, output in train_dataloader: | ||
student_ids = student_ids.to(device) | ||
output = output.to(device) | ||
# zero the parameter gradients | ||
optimizer.zero_grad() | ||
# forward | ||
# track history if only in train | ||
with torch.set_grad_enabled(phase == 'train'): | ||
predictions = model(student_ids, item_ids) | ||
loss = loss_fn(predictions, output) | ||
# backward + optimize only if in training phase | ||
if phase == 'train': | ||
loss.backward() | ||
optimizer.step() | ||
if verbose: | ||
print('Loss: ', loss.item()) | ||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
''' | ||
Loads the saved parameters from IRT_parameters folder | ||
''' | ||
import torch | ||
|
||
def load_irt_parameters(): | ||
''' | ||
Load the saved parameters from IRT_parameters folder | ||
''' | ||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | ||
if device == torch.device('cpu'): | ||
student_ability = torch.load('IRT/IRT_parameters/student_ability.pt', map_location=torch.device('cpu')) | ||
item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt', map_location=torch.device('cpu')) | ||
else: | ||
student_ability = torch.load('IRT/IRT_parameters/student_ability.pt') | ||
item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt') | ||
return student_ability, item_difficulty | ||
|
||
def main(): | ||
student_ability, item_difficulty = load_irt_parameters() | ||
|
||
print(student_ability) | ||
print(item_difficulty) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from implement_irt import * | ||
|
||
def main(): | ||
CONSIDER_TEST_CASES = 36 | ||
|
||
# set seed | ||
set_seed(37) | ||
|
||
# read dataset | ||
student_ids, outputs = read_dataset(CONSIDER_TEST_CASES) | ||
|
||
# model parameters | ||
num_students = len(outputs) | ||
# num_items = len(outputs[0]) | ||
num_items = CONSIDER_TEST_CASES | ||
|
||
|
||
|
||
model, loss_fn, optimizer, num_epochs, device = get_model_info(num_students, num_items) | ||
|
||
# play with the model | ||
play_with_model(model) | ||
|
||
# get dataloader | ||
batch_size = 128 | ||
train_dataloader = get_dataloader(batch_size, [i for i in range(num_students)], outputs) | ||
|
||
# train the model | ||
item_ids = [i for i in range(num_items)] | ||
model = train_IRT(item_ids, model, loss_fn, optimizer, num_epochs, device, train_dataloader) | ||
|
||
# Save the student ability and item difficulty separately | ||
save_dir = 'IRT/IRT_parameters' | ||
if not os.path.exists(save_dir): | ||
os.makedirs(save_dir) | ||
|
||
torch.save(model.student_ability, '{:s}/student_ability.pt'.format(save_dir)) | ||
torch.save(model.item_difficulty, '{:s}/item_difficulty.pt'.format(save_dir)) | ||
|
||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.