-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlayer_benchmark.py
75 lines (61 loc) · 2.69 KB
/
layer_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import torch, time
import torch.nn as nn
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
n_iters = 101
def run_benchmarks(input_tensor, grad_tensor, layer):
global n_iters
forward_pass_time = []
backward_pass_time = []
total_time = []
for i in range(n_iters):
start_time_forward = time.time()
result = layer(input_tensor)
if torch.cuda.is_available():
torch.cuda.synchronize()
end_time_forward = time.time()
result.requires_grad_()
start_time_backward = time.time()
result.backward(grad_tensor)
if torch.cuda.is_available():
torch.cuda.synchronize()
end_time_backward = time.time()
if i is 1: # Ignore first iteration
continue
forward_pass_time.append(end_time_forward - start_time_forward)
backward_pass_time.append(end_time_backward - start_time_backward)
total_time.append(forward_pass_time[-1] + backward_pass_time[-1])
# Display time in ms
print("Forward Pass Time : {0:.4f}ms".format(1000 * sum(forward_pass_time)/(n_iters-1)))
print("Backward Pass Time : {0:.4f}ms".format(1000 * sum(backward_pass_time)/(n_iters-1)))
print("Total Time : {0:.4f}ms".format(1000 * sum(total_time)/(n_iters-1)))
input_tensor = torch.rand(1, 3, 224, 224).to(device)
grad_tensor = torch.ones(1, 64, 224, 224).to(device)
layer = nn.Conv2d(3, 64, 3, padding = 1).to(device)
print("Benchmarks for Conv3x3/1")
run_benchmarks(input_tensor, grad_tensor, layer)
layer = nn.Conv2d(3, 64, 5, padding = 2).to(device)
print("Benchmarks for Conv5x5/1")
run_benchmarks(input_tensor, grad_tensor, layer)
layer = nn.Conv2d(3, 64, 3, padding = 1, stride = 2).to(device)
grad_tensor = torch.ones(1, 64, 112, 112).to(device)
print("Benchmarks for Conv3x3/2")
run_benchmarks(input_tensor, grad_tensor, layer)
layer = nn.Conv2d(3, 64, 5, padding = 2, stride = 2).to(device)
print("Benchmarks for Conv5x5/2")
run_benchmarks(input_tensor, grad_tensor, layer)
layer = nn.MaxPool2d(3, stride = 2, padding = 1).to(device)
grad_tensor = torch.ones(1, 3, 112, 112).to(device)
print("Benchmarks for Maxpool")
run_benchmarks(input_tensor, grad_tensor, layer)
layer = nn.AvgPool2d(3, stride = 2, padding = 1).to(device)
print("Benchmarks for Meanpool")
run_benchmarks(input_tensor, grad_tensor, layer)
grad_tensor = torch.ones(1, 3, 224, 224).to(device)
layer = nn.BatchNorm2d(3).to(device)
print("Benchmarks for BatchNorm")
run_benchmarks(input_tensor, grad_tensor, layer)
input_tensor = torch.rand(1, 1024).to(device)
grad_tensor = torch.ones(1, 512).to(device)
layer = nn.Linear(1024, 512).to(device)
print("Benchmarks for Dense")
run_benchmarks(input_tensor, grad_tensor, layer)