-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtorch-tutorial.py
150 lines (94 loc) · 3.46 KB
/
torch-tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import torch
import numpy as np
#tensors are n dimensional
#tensor init functions
t = torch.empty(1) #one element tensor
t = torch.empty(0) #empty
t= torch.zeros(3, 2, 2)
t= torch.ones(3, 2, 2)
t= torch.rand(3, 2, 2)
t= torch.empty(3, 2, dtype=torch.double) #can always specify the datatype
#these operations are element wise
x = torch.rand(2, 2)
y = torch.ones(2, 2)
z = torch.mul(x, y) #this is element wise!
y.mul_(x) #just a synonym
#slicing operation works
x = torch.rand(4, 4)
print(x[:, 0]) #print col 0
print(x[0, :]) #print row 0
#conversion of a tensor to numpy array
x = torch.ones(3)
y = x.numpy()
#but they will share the same memory location (if they are on the cpu)
x.add_(1) #will also modify y
x = torch.randn(3, requires_grad=True)
y = x + 2 #y is the output of the computational node
z = y * y * 2 #grad funct is multplication forward
z = z.mean() #the gradient function is now th mean function
#carry out the gradients
z.backward() #dz/dx
print(x.grad) #can only access the grad attribute of leaf tensors
w = torch.ones(3, requires_grad=True)
for epoch in range(3):
output = (w * 3).sum() #scalar output
output.backward() #doutput/dw
w.grad.zero_() #clear the gradients at each iteration, otherwise they will be accumulated at each iteration
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w= torch.tensor(1.0, requires_grad=True)
#forward pass
y_hat = w * x
loss = (y_hat - y)**2
print(loss)
#backward pass
loss.backward()
print(w.grad)
#update weights
x = np.array([1, 2, 3, 4], dtype=np.float32)
y = np.array([1, 2, 3, 4], dtype=np.float32)
# w = torch.tensor(0.0, requires_grad=True)
w = 0.0
def forward(x): #y = 2x, approximate w st loss is minimized
return w * x #one layer
def loss(y, y_predict): #mean squared error loss
return ((y_predict - y)**2).mean()
#gradient
def gradient(x, y, y_predict):
return np.dot(2*x, y_predict - y).mean()
print(f'prediction before trainig: f(5)= {forward(5):.3f}')
#training of the net
learning_rate = 0.01
iter = 50
for epoch in range(iter):
y_pred = forward(x) #forward pass throught the network
l = loss(y, y_pred) #epoch loss after the current pass
dw = gradient(x,y,y_pred) #backward gradient computation, assign the gradient to a variable
w -= learning_rate * dw #weights update
if epoch % 5 == 0:
print(f'{epoch}: w ={w}, loss ={l:.8f}')
print(f'Predictin after training: f(5)= {forward(5):.3f}')
#now do everything with pytorch
x = torch.tensor([1,2,3,4], dtype=torch.float32)
y = torch.tensor([1,2,3,4], dtype=torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True) #requires gradient if you use only torch
learning_rate = 0.01
iter = 50
def forward2(x):
return w * x
def loss2(y, y_predicted):
return ((y_predicted - y)**2).mean()
print(f'Only Pytorch: prediction before training: f(5)={forward2(5)}')
for epoch in range(iter):
y_pred = forward2(x)
tmp_loss = loss2(y, y_pred)
tmp_loss.backward() #backprop and gradient are done automatically like this
#update the weights
with torch.no_grad(): #it sshould not be part of the computational graph
w -= learning_rate * w.grad
#zero gradients- clear them, they must not be accumulated
w.grad.zero_()
if epoch % (iter/10) == 0:
print(f'{epoch}: w ={w}, loss ={tmp_loss:.8f}')
print(f'Only Pytorch: prediction after training: f(5)={forward2(5)}')
#with pytorch it is not as exact as with numpy