-
Notifications
You must be signed in to change notification settings - Fork 0
/
rnn.py
201 lines (167 loc) · 8.08 KB
/
rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import numpy as np
import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import math
import random
import os
import time
from tqdm import tqdm
import json
import string
from argparse import ArgumentParser
import pickle
unk = '<UNK>'
# Consult the PyTorch documentation for information on the functions used below:
# https://pytorch.org/docs/stable/torch.html
class RNN(nn.Module):
def __init__(self, input_dim, h): # Add relevant parameters
super(RNN, self).__init__()
self.h = h # hidden layer dimension
self.numOfLayer = 1
self.rnn = nn.RNN(input_dim, h, self.numOfLayer, nonlinearity='tanh') # RNN
self.W = nn.Linear(h, 5) # H -> O weights
self.softmax = nn.LogSoftmax(dim=1)
self.loss = nn.NLLLoss()
def compute_Loss(self, predicted_vector, gold_label):
return self.loss(predicted_vector, gold_label)
def forward(self, inputs):
# [to fill] obtain hidden layer representation (https://pytorch.org/docs/stable/generated/torch.nn.RNN.html)
seq_len, batch_size, _ = inputs.size()
h_t_minus_1 = torch.zeros(self.numOfLayer, self.h)
output_matrix = torch.zeros(seq_len, 5)
for i in range(seq_len):
# print("YAY")
# print(inputs[i, :, :].shape)
_, h_t = self.rnn(inputs[i, :, :], h_t_minus_1)
# print(self.W.bias.shape)
# print(torch.matmul(self.W.weight, h_t.T).shape)
# print((torch.matmul(h_t, self.W.weight) + self.W.bias).shape)
output_matrix[i] = torch.matmul(h_t, self.W.weight.T) + self.W.bias
h_t_minus_1 = h_t
# h_t_minus_1seq_len, _, _ = inputs.size()
# print(inputs.shape)
# print(h_t.shape)
# [to fill] obtain output layer representations
# output = torch.matmul(self.W.weight, hidden) + self.W.bias
# [to fill] sum over output
# print(output_matrix.shape)
output_sum = output_matrix.sum(dim=0).view(1, 5)
# print(output_sum.shape)
# [to fill] obtain probability dist.
predicted_vector = self.softmax(output_sum)
return predicted_vector
def load_data(train_data, val_data):
with open(train_data) as training_f:
training = json.load(training_f)
with open(val_data) as valid_f:
validation = json.load(valid_f)
tra = []
val = []
for elt in training:
tra.append((elt["text"].split(),int(elt["stars"]-1)))
for elt in validation:
val.append((elt["text"].split(),int(elt["stars"]-1)))
return tra, val
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("-hd", "--hidden_dim", type=int, required = True, help = "hidden_dim")
parser.add_argument("-e", "--epochs", type=int, required = True, help = "num of epochs to train")
parser.add_argument("--train_data", required = True, help = "path to training data")
parser.add_argument("--val_data", required = True, help = "path to validation data")
parser.add_argument("--test_data", default = "to fill", help = "path to test data")
parser.add_argument('--do_train', action='store_true')
args = parser.parse_args()
print("========== Loading data ==========")
train_data, valid_data = load_data(args.train_data, args.val_data) # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
# Think about the type of function that an RNN describes. To apply it, you will need to convert the text data into vector representations.
# Further, think about where the vectors will come from. There are 3 reasonable choices:
# 1) Randomly assign the input to vectors and learn better embeddings during training; see the PyTorch documentation for guidance
# 2) Assign the input to vectors using pretrained word embeddings. We recommend any of {Word2Vec, GloVe, FastText}. Then, you do not train/update these embeddings.
# 3) You do the same as 2) but you train (this is called fine-tuning) the pretrained embeddings further.
# Option 3 will be the most time consuming, so we do not recommend starting with this
print("========== Vectorizing data ==========")
model = RNN(50, args.hidden_dim) # Fill in parameters
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.01)
word_embedding = pickle.load(open('./word_embedding.pkl', 'rb'))
stopping_condition = False
epoch = 0
last_train_accuracy = 0
last_validation_accuracy = 0
while not stopping_condition:
random.shuffle(train_data)
model.train()
# You will need further code to operationalize training, ffnn.py may be helpful
print("Training started for epoch {}".format(epoch + 1))
train_data = train_data
correct = 0
total = 0
minibatch_size = 512
N = len(train_data)
loss_total = 0
loss_count = 0
for minibatch_index in tqdm(range(N // minibatch_size)):
optimizer.zero_grad()
loss = None
for example_index in range(minibatch_size):
input_words, gold_label = train_data[minibatch_index * minibatch_size + example_index]
input_words = " ".join(input_words)
# Remove punctuation
input_words = input_words.translate(input_words.maketrans("", "", string.punctuation)).split()
# Look up word embedding dictionary
vectors = [word_embedding[i.lower()] if i.lower() in word_embedding.keys() else word_embedding['unk'] for i in input_words ]
# Transform the input into required shape
vectors = torch.tensor(vectors).view(len(vectors), 1, -1)
# print(vectors.shape)
output = model(vectors)
# Get loss
example_loss = model.compute_Loss(output.view(1,-1), torch.tensor([gold_label]))
# Get predicted label
predicted_label = torch.argmax(output)
correct += int(predicted_label == gold_label)
# print(predicted_label, gold_label)
total += 1
if loss is None:
loss = example_loss
else:
loss += example_loss
loss = loss / minibatch_size
loss_total += loss.data
loss_count += 1
loss.backward()
optimizer.step()
print(loss_total/loss_count)
print("Training completed for epoch {}".format(epoch + 1))
print("Training accuracy for epoch {}: {}".format(epoch + 1, correct / total))
trainning_accuracy = correct/total
model.eval()
correct = 0
total = 0
random.shuffle(valid_data)
print("Validation started for epoch {}".format(epoch + 1))
valid_data = valid_data
for input_words, gold_label in tqdm(valid_data):
input_words = " ".join(input_words)
input_words = input_words.translate(input_words.maketrans("", "", string.punctuation)).split()
vectors = [word_embedding[i.lower()] if i.lower() in word_embedding.keys() else word_embedding['unk'] for i
in input_words]
vectors = torch.tensor(vectors).view(len(vectors), 1, -1)
output = model(vectors)
predicted_label = torch.argmax(output)
correct += int(predicted_label == gold_label)
total += 1
# print(predicted_label, gold_label)
print("Validation completed for epoch {}".format(epoch + 1))
print("Validation accuracy for epoch {}: {}".format(epoch + 1, correct / total))
validation_accuracy = correct/total
if validation_accuracy < last_validation_accuracy and trainning_accuracy > last_train_accuracy:
stopping_condition=True
print("Training done to avoid overfitting!")
print("Best validation accuracy is:", last_validation_accuracy)
else:
last_validation_accuracy = validation_accuracy
last_train_accuracy = trainning_accuracy
epoch += 1
# You may find it be