add files

Nish-19 · Dec 26, 2023 · f043809 · f043809
1 parent a62478f
commit f043809
Show file tree

Hide file tree

Showing 33 changed files with 1,342,876 additions and 0 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/IRT/IRT_parameters/item_difficulty.pt b/IRT/IRT_parameters/item_difficulty.pt
diff --git a/IRT/IRT_parameters/student_ability.pt b/IRT/IRT_parameters/student_ability.pt
diff --git a/IRT/__init__.py b/IRT/__init__.py
diff --git a/IRT/__pycache__/__init__.cpython-310.pyc b/IRT/__pycache__/__init__.cpython-310.pyc
diff --git a/IRT/__pycache__/__init__.cpython-311.pyc b/IRT/__pycache__/__init__.cpython-311.pyc
diff --git a/IRT/__pycache__/implement_irt.cpython-310.pyc b/IRT/__pycache__/implement_irt.cpython-310.pyc
diff --git a/IRT/__pycache__/implement_irt.cpython-311.pyc b/IRT/__pycache__/implement_irt.cpython-311.pyc
diff --git a/IRT/__pycache__/load_params.cpython-310.pyc b/IRT/__pycache__/load_params.cpython-310.pyc
diff --git a/IRT/implement_irt.py b/IRT/implement_irt.py
@@ -0,0 +1,123 @@
+'''
+Code for implementing 1-PL Item Response Theory (student ability and item difficulty) in Python
+'''
+import os
+import random 
+import numpy as np
+import json 
+import torch 
+from torch.utils.data import TensorDataset, DataLoader, random_split
+
+# setting the seed
+def set_seed(seed_val=37):
+    '''
+    set random seed for reproducibility
+    '''
+    random.seed(seed_val)
+    np.random.seed(seed_val)
+    torch.manual_seed(seed_val)
+    torch.cuda.manual_seed_all(seed_val)
+
+
+def read_dataset(num_items):
+    data_path = 'IRT_dataset/502_45/IRT_dataset.json'
+    with open(data_path) as f:
+        data = json.load(f)
+    student_ids = []
+    outputs = []
+    for student_id, student_data in data.items():
+        student_ids.append(student_id)
+        outputs.append(list(student_data.values())[:num_items])
+    return student_ids, outputs
+
+class IRTModel(torch.nn.Module):
+    def __init__(self, num_students, num_items, load_params=False):
+        super(IRTModel, self).__init__()
+        self.num_students = num_students
+        self.num_items = num_items
+        self.student_ability = torch.nn.parameter.Parameter(torch.normal(0.0, 0.1, (self.num_students,)))
+        if not load_params:
+            self.item_difficulty = torch.nn.parameter.Parameter(torch.normal(0.0, 0.1, (self.num_items,)))
+        else:
+            # load the saved parameters and freeze them
+            device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+            if device == torch.device('cpu'):
+                self.item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt', map_location=torch.device('cpu')).requires_grad_(False)
+            else:
+                self.item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt').requires_grad_(False)
+
+    def forward(self, student_ids, item_ids):
+        '''
+        student_ids and item_ids are not of the same size
+        '''
+        student_ability = self.student_ability[student_ids]
+        # broadcase student_ability to the size of item_difficulty
+        student_ability = student_ability.unsqueeze(1).expand(-1, len(item_ids))
+        item_difficulty = self.item_difficulty[item_ids]
+        predictions = student_ability - item_difficulty
+        return predictions
+
+def play_with_model(model):
+
+    # play with the model 
+    student_ids = torch.tensor([0, 1])
+    item_ids = torch.tensor([0, 1, 2])
+
+    predictions = model(student_ids, item_ids)
+    print('Sample Predictions Shape: ', predictions.shape)
+
+def get_dataloader(batch_size, student_ids, outputs):
+    output = torch.tensor(outputs, dtype=torch.float32)
+    student_ids = torch.tensor(student_ids, dtype=torch.int64)
+    data = TensorDataset(student_ids, output)
+    return DataLoader(data, batch_size=batch_size, shuffle=True)
+
+def get_model_info(num_students, num_questions, load_params=False, verbose=True):
+    '''
+    Return IRT model and the optimizers
+    '''
+    # select device
+    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    if verbose:
+        print('Using device:', device)
+    # read model
+    model = IRTModel(num_students, num_questions, load_params).to(device)
+    # loss fucntion
+    loss_fn = torch.nn.BCEWithLogitsLoss()
+    # optimizer
+    optimizer = torch.optim.Adam(model.parameters(), lr = 5e-3)
+    # number of training epochs
+    num_epochs = 1000
+    return model, loss_fn, optimizer, num_epochs, device
+
+def train_IRT(item_ids_lst, model, loss_fn, optimizer, num_epochs, device, train_dataloader, verbose=True):
+    '''
+    Train the model
+    '''
+    item_ids = torch.tensor(item_ids_lst, dtype=torch.int64).to(device)
+    for epoch in range(num_epochs):
+        if verbose:
+            print('Epoch {}/{}'.format(epoch, num_epochs - 1))
+            print('-' * 10)
+        # Each epoch has a training and validation phase
+        for phase in ['train']:
+            if phase == 'train':
+                model.train()  # Set model to training mode
+            # Iterate over data.
+            for student_ids, output in train_dataloader:
+                student_ids = student_ids.to(device)
+                output = output.to(device)
+                # zero the parameter gradients
+                optimizer.zero_grad()
+                # forward
+                # track history if only in train
+                with torch.set_grad_enabled(phase == 'train'):
+                    predictions = model(student_ids, item_ids)
+                    loss = loss_fn(predictions, output)
+                    # backward + optimize only if in training phase
+                    if phase == 'train':
+                        loss.backward()
+                        optimizer.step()
+            if verbose:
+                print('Loss: ', loss.item())            
+    return model
diff --git a/IRT/load_params.py b/IRT/load_params.py
@@ -0,0 +1,26 @@
+'''
+Loads the saved parameters from IRT_parameters folder
+'''
+import torch 
+
+def load_irt_parameters():
+    '''
+    Load the saved parameters from IRT_parameters folder
+    '''
+    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    if device == torch.device('cpu'):
+        student_ability = torch.load('IRT/IRT_parameters/student_ability.pt', map_location=torch.device('cpu'))
+        item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt', map_location=torch.device('cpu'))
+    else:
+        student_ability = torch.load('IRT/IRT_parameters/student_ability.pt')
+        item_difficulty = torch.load('IRT/IRT_parameters/item_difficulty.pt')
+    return student_ability, item_difficulty
+
+def main():
+    student_ability, item_difficulty = load_irt_parameters() 
+
+    print(student_ability)
+    print(item_difficulty)
+
+if __name__ == '__main__':
+    main()
diff --git a/IRT/run_irt.py b/IRT/run_irt.py
@@ -0,0 +1,43 @@
+from implement_irt import *
+
+def main():
+    CONSIDER_TEST_CASES = 36
+
+    # set seed
+    set_seed(37)
+
+    # read dataset
+    student_ids, outputs = read_dataset(CONSIDER_TEST_CASES) 
+
+    # model parameters 
+    num_students = len(outputs)
+    # num_items = len(outputs[0])
+    num_items = CONSIDER_TEST_CASES
+
+
+
+    model, loss_fn, optimizer, num_epochs, device = get_model_info(num_students, num_items)
+
+    # play with the model
+    play_with_model(model)
+
+    # get dataloader
+    batch_size = 128
+    train_dataloader = get_dataloader(batch_size, [i for i in range(num_students)], outputs)
+
+    # train the model
+    item_ids = [i for i in range(num_items)]
+    model = train_IRT(item_ids, model, loss_fn, optimizer, num_epochs, device, train_dataloader)
+
+    # Save the student ability and item difficulty separately
+    save_dir = 'IRT/IRT_parameters'
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    torch.save(model.student_ability, '{:s}/student_ability.pt'.format(save_dir))
+    torch.save(model.item_difficulty, '{:s}/item_difficulty.pt'.format(save_dir))
+
+
+
+if __name__ == '__main__':
+    main()