Skip to content

Commit

Permalink
add files
Browse files Browse the repository at this point in the history
  • Loading branch information
Nish-19 committed Dec 26, 2023
1 parent a62478f commit f043809
Show file tree
Hide file tree
Showing 33 changed files with 1,342,876 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
Binary file added IRT/IRT_parameters/item_difficulty.pt
Binary file not shown.
Binary file added IRT/IRT_parameters/student_ability.pt
Binary file not shown.
Empty file added IRT/__init__.py
Empty file.
Binary file added IRT/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file added IRT/__pycache__/__init__.cpython-311.pyc
Binary file not shown.
Binary file added IRT/__pycache__/implement_irt.cpython-310.pyc
Binary file not shown.
Binary file added IRT/__pycache__/implement_irt.cpython-311.pyc
Binary file not shown.
Binary file added IRT/__pycache__/load_params.cpython-310.pyc
Binary file not shown.
123 changes: 123 additions & 0 deletions IRT/implement_irt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
'''
Code for implementing 1-PL Item Response Theory (student ability and item difficulty) in Python
'''
import os
import random
import numpy as np
import json
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split

# setting the seed
def set_seed(seed_val=37):
'''
set random seed for reproducibility
'''
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)


def read_dataset(num_items):
data_path = 'IRT_dataset/502_45/IRT_dataset.json'
with open(data_path) as f:
data = json.load(f)
student_ids = []
outputs = []
for student_id, student_data in data.items():
student_ids.append(student_id)
outputs.append(list(student_data.values())[:num_items])
return student_ids, outputs

class IRTModel(torch.nn.Module):
def __init__(self, num_students, num_items, load_params=False):
super(IRTModel, self).__init__()
self.num_students = num_students
self.num_items = num_items
self.student_ability = torch.nn.parameter.Parameter(torch.normal(0.0, 0.1, (self.num_students,)))
if not load_params:
self.item_difficulty = torch.nn.parameter.Parameter(torch.normal(0.0, 0.1, (self.num_items,)))
else:
# load the saved parameters and freeze them
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
if device == torch.device('cpu'):
self.item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt', map_location=torch.device('cpu')).requires_grad_(False)
else:
self.item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt').requires_grad_(False)

def forward(self, student_ids, item_ids):
'''
student_ids and item_ids are not of the same size
'''
student_ability = self.student_ability[student_ids]
# broadcase student_ability to the size of item_difficulty
student_ability = student_ability.unsqueeze(1).expand(-1, len(item_ids))
item_difficulty = self.item_difficulty[item_ids]
predictions = student_ability - item_difficulty
return predictions

def play_with_model(model):

# play with the model
student_ids = torch.tensor([0, 1])
item_ids = torch.tensor([0, 1, 2])

predictions = model(student_ids, item_ids)
print('Sample Predictions Shape: ', predictions.shape)

def get_dataloader(batch_size, student_ids, outputs):
output = torch.tensor(outputs, dtype=torch.float32)
student_ids = torch.tensor(student_ids, dtype=torch.int64)
data = TensorDataset(student_ids, output)
return DataLoader(data, batch_size=batch_size, shuffle=True)

def get_model_info(num_students, num_questions, load_params=False, verbose=True):
'''
Return IRT model and the optimizers
'''
# select device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
if verbose:
print('Using device:', device)
# read model
model = IRTModel(num_students, num_questions, load_params).to(device)
# loss fucntion
loss_fn = torch.nn.BCEWithLogitsLoss()
# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = 5e-3)
# number of training epochs
num_epochs = 1000
return model, loss_fn, optimizer, num_epochs, device

def train_IRT(item_ids_lst, model, loss_fn, optimizer, num_epochs, device, train_dataloader, verbose=True):
'''
Train the model
'''
item_ids = torch.tensor(item_ids_lst, dtype=torch.int64).to(device)
for epoch in range(num_epochs):
if verbose:
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train']:
if phase == 'train':
model.train() # Set model to training mode
# Iterate over data.
for student_ids, output in train_dataloader:
student_ids = student_ids.to(device)
output = output.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
predictions = model(student_ids, item_ids)
loss = loss_fn(predictions, output)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
if verbose:
print('Loss: ', loss.item())
return model
26 changes: 26 additions & 0 deletions IRT/load_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
'''
Loads the saved parameters from IRT_parameters folder
'''
import torch

def load_irt_parameters():
'''
Load the saved parameters from IRT_parameters folder
'''
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
if device == torch.device('cpu'):
student_ability = torch.load('IRT/IRT_parameters/student_ability.pt', map_location=torch.device('cpu'))
item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt', map_location=torch.device('cpu'))
else:
student_ability = torch.load('IRT/IRT_parameters/student_ability.pt')
item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt')
return student_ability, item_difficulty

def main():
student_ability, item_difficulty = load_irt_parameters()

print(student_ability)
print(item_difficulty)

if __name__ == '__main__':
main()
43 changes: 43 additions & 0 deletions IRT/run_irt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from implement_irt import *

def main():
CONSIDER_TEST_CASES = 36

# set seed
set_seed(37)

# read dataset
student_ids, outputs = read_dataset(CONSIDER_TEST_CASES)

# model parameters
num_students = len(outputs)
# num_items = len(outputs[0])
num_items = CONSIDER_TEST_CASES



model, loss_fn, optimizer, num_epochs, device = get_model_info(num_students, num_items)

# play with the model
play_with_model(model)

# get dataloader
batch_size = 128
train_dataloader = get_dataloader(batch_size, [i for i in range(num_students)], outputs)

# train the model
item_ids = [i for i in range(num_items)]
model = train_IRT(item_ids, model, loss_fn, optimizer, num_epochs, device, train_dataloader)

# Save the student ability and item difficulty separately
save_dir = 'IRT/IRT_parameters'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

torch.save(model.student_ability, '{:s}/student_ability.pt'.format(save_dir))
torch.save(model.item_difficulty, '{:s}/item_difficulty.pt'.format(save_dir))



if __name__ == '__main__':
main()
Loading

0 comments on commit f043809

Please sign in to comment.