diff --git a/save/bisenet_training1/automated_log.txt b/save/bisenet_training1/automated_log.txt deleted file mode 100644 index 09c3ae2..0000000 --- a/save/bisenet_training1/automated_log.txt +++ /dev/null @@ -1,126 +0,0 @@ -Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate -1 3.9792 3.9001 0.0000 0.3599 0.00050000 -2 3.5550 3.3912 0.0000 0.4236 0.00049700 -3 3.4146 3.3582 0.0000 0.4412 0.00049400 -4 3.3489 3.5177 0.0000 0.4532 0.00049099 -5 3.2692 3.6933 0.0000 0.4839 0.00048798 -6 3.2231 3.5844 0.0000 0.4533 0.00048497 -7 3.1911 4.1798 0.0000 0.4076 0.00048196 -8 3.1515 3.2479 0.0000 0.5060 0.00047895 -9 3.1084 3.5135 0.0000 0.5562 0.00047593 -10 3.1014 3.5033 0.0000 0.5431 0.00047292 -11 3.0681 3.7079 0.0000 0.5293 0.00046990 -12 3.0532 3.2032 0.0000 0.5600 0.00046688 -13 3.0484 3.3251 0.0000 0.5934 0.00046385 -14 3.0155 3.8794 0.0000 0.4733 0.00046083 -15 3.0064 3.2069 0.0000 0.5381 0.00045780 -16 2.9752 3.5603 0.0000 0.5861 0.00045477 -17 2.9469 3.5183 0.0000 0.5954 0.00045173 -18 2.9506 3.4961 0.0000 0.5658 0.00044870 -19 2.9242 3.4800 0.0000 0.5621 0.00044566 -20 2.9282 3.8263 0.0000 0.5277 0.00044262 -21 2.9019 3.6284 0.0000 0.5649 0.00043958 -22 2.8859 3.4999 0.0000 0.5970 0.00043653 -23 2.8831 3.8472 0.0000 0.5607 0.00043349 -24 2.8798 3.8634 0.0000 0.5826 0.00043044 -25 2.8788 3.8610 0.0000 0.5682 0.00042739 -26 2.8386 3.7606 0.0000 0.5850 0.00042433 -27 2.8220 3.8189 0.0000 0.6167 0.00042128 -28 2.8052 3.5966 0.0000 0.6143 0.00041822 -29 2.7949 3.8360 0.0000 0.5768 0.00041516 -30 2.8160 3.7362 0.0000 0.6112 0.00041209 -31 2.7986 3.7714 0.0000 0.5650 0.00040903 -32 2.7979 3.8223 0.0000 0.6013 0.00040596 -33 2.7663 4.0719 0.0000 0.5800 0.00040289 -34 2.7599 4.1707 0.0000 0.5984 0.00039981 -35 2.7504 3.9280 0.0000 0.5985 0.00039673 -36 2.7713 3.8951 0.0000 0.5953 0.00039366 -37 2.7262 4.0748 0.0000 0.6329 0.00039057 -38 2.7137 3.9763 0.0000 0.6288 0.00038749 -39 2.7054 4.0973 0.0000 0.6214 0.00038440 -40 2.7175 4.2663 0.0000 0.5983 0.00038131 -41 2.6948 3.9869 0.0000 0.6177 0.00037822 -42 2.6946 4.1193 0.0000 0.6079 0.00037512 -43 2.6772 4.1540 0.0000 0.6095 0.00037202 -44 2.6852 4.1108 0.0000 0.6114 0.00036892 -45 2.7099 4.7217 0.0000 0.5490 0.00036582 -46 2.6982 4.1744 0.0000 0.6263 0.00036271 -47 2.6520 4.1139 0.0000 0.6226 0.00035960 -48 2.6393 4.5040 0.0000 0.6379 0.00035649 -49 2.6345 4.5977 0.0000 0.5833 0.00035337 -50 2.6514 4.3122 0.0000 0.6229 0.00035025 -51 2.6691 4.2598 0.0000 0.6107 0.00034713 -52 2.6328 4.2943 0.0000 0.6095 0.00034400 -53 2.6049 4.5418 0.0000 0.6112 0.00034087 -54 2.6121 4.2598 0.0000 0.6026 0.00033774 -55 2.6286 4.3250 0.0000 0.6064 0.00033460 -56 2.5921 4.5597 0.0000 0.6224 0.00033147 -57 2.5757 4.5647 0.0000 0.6242 0.00032832 -58 2.5736 4.5556 0.0000 0.6137 0.00032518 -59 2.6799 3.9880 0.0000 0.5662 0.00032203 -60 2.6341 4.2541 0.0000 0.6363 0.00031888 -61 2.5860 4.4965 0.0000 0.6342 0.00031572 -62 2.5570 4.7645 0.0000 0.6327 0.00031256 -63 2.5560 4.7079 0.0000 0.6201 0.00030940 -64 2.5483 4.6514 0.0000 0.6274 0.00030624 -65 2.5457 4.7897 0.0000 0.6016 0.00030307 -66 2.5868 4.4803 0.0000 0.5811 0.00029989 -67 2.5540 4.6763 0.0000 0.6291 0.00029671 -68 2.5288 4.7423 0.0000 0.6411 0.00029353 -69 2.5221 4.9406 0.0000 0.6307 0.00029035 -70 2.5360 4.6918 0.0000 0.6291 0.00028716 -71 2.5478 4.6280 0.0000 0.6092 0.00028397 -72 2.5647 4.7926 0.0000 0.6090 0.00028077 -73 2.5213 4.7577 0.0000 0.6345 0.00027757 -74 2.5093 4.2881 0.0000 0.6069 0.00027437 -75 2.5288 4.7355 0.0000 0.6280 0.00027116 -76 2.4998 4.6320 0.0000 0.6282 0.00026794 -77 2.4980 4.7502 0.0000 0.6302 0.00026473 -78 2.5601 4.8274 0.0000 0.5543 0.00026150 -79 2.5232 4.7493 0.0000 0.6235 0.00025828 -80 2.4924 4.9141 0.0000 0.6360 0.00025505 -81 2.4931 4.8881 0.0000 0.6248 0.00025181 -82 2.4796 4.7319 0.0000 0.6318 0.00024857 -83 2.4699 4.9581 0.0000 0.6297 0.00024533 -84 2.4683 5.0742 0.0000 0.6364 0.00024208 -85 2.4727 5.2385 0.0000 0.6274 0.00023882 -86 2.4617 5.0890 0.0000 0.6461 0.00023556 -87 2.4572 5.0742 0.0000 0.6446 0.00023230 -88 2.4538 5.0830 0.0000 0.6310 0.00022903 -89 2.4542 5.0321 0.0000 0.6410 0.00022576 -87 2.4491 5.2838 0.0000 0.6085 0.00023230 -88 2.4952 4.4150 0.0000 0.5880 0.00022903 -89 2.4883 4.9423 0.0000 0.6306 0.00022576 -90 2.4563 5.1033 0.0000 0.6239 0.00022248 -91 2.4667 4.9450 0.0000 0.6286 0.00021919 -92 2.4558 5.0789 0.0000 0.6367 0.00021590 -93 2.4310 5.0094 0.0000 0.6404 0.00021260 -94 2.4277 5.0384 0.0000 0.6380 0.00020930 -95 2.4276 5.1439 0.0000 0.6268 0.00020600 -96 2.4752 4.6773 0.0000 0.5955 0.00020268 -97 2.4505 5.0564 0.0000 0.6330 0.00019936 -98 2.4209 5.2378 0.0000 0.6422 0.00019604 -99 2.4079 5.3240 0.0000 0.6340 0.00019270 -100 2.4036 5.4218 0.0000 0.6420 0.00018937 -101 2.4018 5.3210 0.0000 0.6409 0.00018602 -102 2.3999 5.4496 0.0000 0.6426 0.00018267 -103 2.4160 5.3478 0.0000 0.6409 0.00017931 -104 2.3974 5.3341 0.0000 0.6423 0.00017594 -105 2.3928 5.5072 0.0000 0.6367 0.00017257 -106 2.3960 5.4686 0.0000 0.6307 0.00016919 -107 2.3930 5.5572 0.0000 0.6386 0.00016580 -108 2.3943 5.5435 0.0000 0.6284 0.00016241 -109 2.3897 5.5520 0.0000 0.6314 0.00015901 -110 2.3795 5.7242 0.0000 0.6352 0.00015559 -111 2.3756 5.6513 0.0000 0.6345 0.00015217 -112 2.3699 5.7341 0.0000 0.6400 0.00014875 -113 2.3912 5.6502 0.0000 0.6416 0.00014531 -114 2.3737 5.5110 0.0000 0.6451 0.00014186 -115 2.3604 5.6169 0.0000 0.6415 0.00013841 -116 2.3565 5.7300 0.0000 0.6366 0.00013494 -117 2.3539 5.8203 0.0000 0.6426 0.00013147 -118 2.3542 5.7647 0.0000 0.6388 0.00012798 -119 2.3504 5.8215 0.0000 0.6452 0.00012449 -120 2.3596 5.7152 0.0000 0.6405 0.00012098 -121 2.3441 5.9977 0.0000 0.6410 0.00011746 -122 2.3391 5.8876 0.0000 0.6410 0.00011393 \ No newline at end of file diff --git a/save/bisenet_training1/best.txt b/save/bisenet_training1/best.txt deleted file mode 100644 index 81c503b..0000000 --- a/save/bisenet_training1/best.txt +++ /dev/null @@ -1 +0,0 @@ -Best epoch is 86, with Val-IoU= 0.6461 \ No newline at end of file diff --git a/save/bisenet_training1/bisenetv1.py b/save/bisenet_training1/bisenetv1.py deleted file mode 100644 index 22fd22b..0000000 --- a/save/bisenet_training1/bisenetv1.py +++ /dev/null @@ -1,306 +0,0 @@ -#!/usr/bin/python -# -*- encoding: utf-8 -*- - - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torchvision - -from resnet import Resnet18 - -from torch.nn import BatchNorm2d - - -class ConvBNReLU(nn.Module): - def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs): - super(ConvBNReLU, self).__init__() - self.conv = nn.Conv2d(in_chan, out_chan, kernel_size=ks, stride=stride, padding=padding, bias=False) - self.bn = BatchNorm2d(out_chan) - self.relu = nn.ReLU(inplace=True) - self.init_weight() - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - x = self.relu(x) - return x - - def init_weight(self): - for ly in self.children(): - if isinstance(ly, nn.Conv2d): - nn.init.kaiming_normal_(ly.weight, a=1) - if not ly.bias is None: - nn.init.constant_(ly.bias, 0) - - -class UpSample(nn.Module): - def __init__(self, n_chan, factor=2): - super(UpSample, self).__init__() - out_chan = n_chan * factor * factor - self.proj = nn.Conv2d(n_chan, out_chan, 1, 1, 0) - self.up = nn.PixelShuffle(factor) - self.init_weight() - - def forward(self, x): - feat = self.proj(x) - feat = self.up(feat) - return feat - - def init_weight(self): - nn.init.xavier_normal_(self.proj.weight, gain=1.0) - - -class BiSeNetOutput(nn.Module): - def __init__(self, in_chan, mid_chan, n_classes, up_factor=32, *args, **kwargs): - super(BiSeNetOutput, self).__init__() - self.up_factor = up_factor - out_chan = n_classes - self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1) - self.conv_out = nn.Conv2d(mid_chan, out_chan, kernel_size=1, bias=True) - self.up = nn.Upsample(scale_factor=up_factor, mode='bilinear', align_corners=False) - self.init_weight() - - def forward(self, x): - x = self.conv(x) - x = self.conv_out(x) - x = self.up(x) - return x - - def init_weight(self): - for ly in self.children(): - if isinstance(ly, nn.Conv2d): - nn.init.kaiming_normal_(ly.weight, a=1) - if not ly.bias is None: - nn.init.constant_(ly.bias, 0) - - def get_params(self): - wd_params, nowd_params = [], [] - for name, module in self.named_modules(): - if isinstance(module, (nn.Linear, nn.Conv2d)): - wd_params.append(module.weight) - if not module.bias is None: - nowd_params.append(module.bias) - elif isinstance(module, nn.modules.batchnorm._BatchNorm): - nowd_params += list(module.parameters()) - return wd_params, nowd_params - - -class AttentionRefinementModule(nn.Module): - def __init__(self, in_chan, out_chan, *args, **kwargs): - super(AttentionRefinementModule, self).__init__() - self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1) - self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size=1, bias=False) - self.bn_atten = BatchNorm2d(out_chan) - # self.sigmoid_atten = nn.Sigmoid() - self.init_weight() - - def forward(self, x): - feat = self.conv(x) - atten = torch.mean(feat, dim=(2, 3), keepdim=True) - atten = self.conv_atten(atten) - atten = self.bn_atten(atten) - # atten = self.sigmoid_atten(atten) - atten = atten.sigmoid() - out = torch.mul(feat, atten) - return out - - def init_weight(self): - for ly in self.children(): - if isinstance(ly, nn.Conv2d): - nn.init.kaiming_normal_(ly.weight, a=1) - if not ly.bias is None: - nn.init.constant_(ly.bias, 0) - - -class ContextPath(nn.Module): - def __init__(self, *args, **kwargs): - super(ContextPath, self).__init__() - self.resnet = Resnet18() - self.arm16 = AttentionRefinementModule(256, 128) - self.arm32 = AttentionRefinementModule(512, 128) - self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) - self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) - self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0) - self.up32 = nn.Upsample(scale_factor=2.0) - self.up16 = nn.Upsample(scale_factor=2.0) - - self.init_weight() - - def forward(self, x): - feat8, feat16, feat32 = self.resnet(x) - - avg = torch.mean(feat32, dim=(2, 3), keepdim=True) - avg = self.conv_avg(avg) - - feat32_arm = self.arm32(feat32) - feat32_sum = feat32_arm + avg - feat32_up = self.up32(feat32_sum) - feat32_up = self.conv_head32(feat32_up) - - feat16_arm = self.arm16(feat16) - feat16_sum = feat16_arm + feat32_up - feat16_up = self.up16(feat16_sum) - feat16_up = self.conv_head16(feat16_up) - - return feat16_up, feat32_up # x8, x16 - - def init_weight(self): - for ly in self.children(): - if isinstance(ly, nn.Conv2d): - nn.init.kaiming_normal_(ly.weight, a=1) - if not ly.bias is None: - nn.init.constant_(ly.bias, 0) - - def get_params(self): - wd_params, nowd_params = [], [] - for name, module in self.named_modules(): - if isinstance(module, (nn.Linear, nn.Conv2d)): - wd_params.append(module.weight) - if not module.bias is None: - nowd_params.append(module.bias) - elif isinstance(module, nn.modules.batchnorm._BatchNorm): - nowd_params += list(module.parameters()) - return wd_params, nowd_params - - -class SpatialPath(nn.Module): - def __init__(self, *args, **kwargs): - super(SpatialPath, self).__init__() - self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3) - self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) - self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) - self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0) - self.init_weight() - - def forward(self, x): - feat = self.conv1(x) - feat = self.conv2(feat) - feat = self.conv3(feat) - feat = self.conv_out(feat) - return feat - - def init_weight(self): - for ly in self.children(): - if isinstance(ly, nn.Conv2d): - nn.init.kaiming_normal_(ly.weight, a=1) - if not ly.bias is None: - nn.init.constant_(ly.bias, 0) - - def get_params(self): - wd_params, nowd_params = [], [] - for name, module in self.named_modules(): - if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): - wd_params.append(module.weight) - if not module.bias is None: - nowd_params.append(module.bias) - elif isinstance(module, nn.modules.batchnorm._BatchNorm): - nowd_params += list(module.parameters()) - return wd_params, nowd_params - - -class FeatureFusionModule(nn.Module): - def __init__(self, in_chan, out_chan, *args, **kwargs): - super(FeatureFusionModule, self).__init__() - self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0) - ## use conv-bn instead of 2 layer mlp, so that tensorrt 7.2.3.4 can work for fp16 - self.conv = nn.Conv2d(out_chan, out_chan, kernel_size=1, stride=1, padding=0, bias=False) - self.bn = nn.BatchNorm2d(out_chan) - # self.conv1 = nn.Conv2d(out_chan, - # out_chan//4, - # kernel_size = 1, - # stride = 1, - # padding = 0, - # bias = False) - # self.conv2 = nn.Conv2d(out_chan//4, - # out_chan, - # kernel_size = 1, - # stride = 1, - # padding = 0, - # bias = False) - # self.relu = nn.ReLU(inplace=True) - self.init_weight() - - def forward(self, fsp, fcp): - fcat = torch.cat([fsp, fcp], dim=1) - feat = self.convblk(fcat) - atten = torch.mean(feat, dim=(2, 3), keepdim=True) - atten = self.conv(atten) - atten = self.bn(atten) - # atten = self.conv1(atten) - # atten = self.relu(atten) - # atten = self.conv2(atten) - atten = atten.sigmoid() - feat_atten = torch.mul(feat, atten) - feat_out = feat_atten + feat - return feat_out - - def init_weight(self): - for ly in self.children(): - if isinstance(ly, nn.Conv2d): - nn.init.kaiming_normal_(ly.weight, a=1) - if not ly.bias is None: - nn.init.constant_(ly.bias, 0) - - def get_params(self): - wd_params, nowd_params = [], [] - for name, module in self.named_modules(): - if isinstance(module, (nn.Linear, nn.Conv2d)): - wd_params.append(module.weight) - if not module.bias is None: - nowd_params.append(module.bias) - elif isinstance(module, nn.modules.batchnorm._BatchNorm): - nowd_params += list(module.parameters()) - return wd_params, nowd_params - - -class Net(nn.Module): - def __init__(self, n_classes, aux_mode='train', *args, **kwargs): - super(Net, self).__init__() - self.cp = ContextPath() - self.sp = SpatialPath() - self.ffm = FeatureFusionModule(256, 256) - self.conv_out = BiSeNetOutput(256, 256, n_classes, up_factor=8) - self.aux_mode = aux_mode - if self.aux_mode == 'train': - self.conv_out16 = BiSeNetOutput(128, 64, n_classes, up_factor=8) - self.conv_out32 = BiSeNetOutput(128, 64, n_classes, up_factor=16) - self.init_weight() - - def forward(self, x): - H, W = x.size()[2:] - feat_cp8, feat_cp16 = self.cp(x) - feat_sp = self.sp(x) - feat_fuse = self.ffm(feat_sp, feat_cp8) - - feat_out = self.conv_out(feat_fuse) - if self.aux_mode == 'train': - feat_out16 = self.conv_out16(feat_cp8) - feat_out32 = self.conv_out32(feat_cp16) - return feat_out, feat_out16, feat_out32 - elif self.aux_mode == 'eval': - return (feat_out,) - elif self.aux_mode == 'pred': - feat_out = feat_out.argmax(dim=1) - return feat_out - else: - raise NotImplementedError - - def init_weight(self): - for ly in self.children(): - if isinstance(ly, nn.Conv2d): - nn.init.kaiming_normal_(ly.weight, a=1) - if not ly.bias is None: - nn.init.constant_(ly.bias, 0) - - def get_params(self): - wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], [] - for name, child in self.named_children(): - child_wd_params, child_nowd_params = child.get_params() - if isinstance(child, (FeatureFusionModule, BiSeNetOutput)): - lr_mul_wd_params += child_wd_params - lr_mul_nowd_params += child_nowd_params - else: - wd_params += child_wd_params - nowd_params += child_nowd_params - return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params diff --git a/save/bisenet_training1/model.txt b/save/bisenet_training1/model.txt deleted file mode 100644 index 3e6245c..0000000 --- a/save/bisenet_training1/model.txt +++ /dev/null @@ -1,181 +0,0 @@ -DataParallel( - (module): Net( - (cp): ContextPath( - (resnet): Resnet18( - (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) - (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) - (layer1): Sequential( - (0): BasicBlock( - (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (1): BasicBlock( - (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - ) - (layer2): Sequential( - (0): BasicBlock( - (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) - (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - (downsample): Sequential( - (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) - (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - ) - (1): BasicBlock( - (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - ) - (layer3): Sequential( - (0): BasicBlock( - (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) - (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - (downsample): Sequential( - (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) - (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - ) - (1): BasicBlock( - (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - ) - (layer4): Sequential( - (0): BasicBlock( - (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) - (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - (downsample): Sequential( - (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) - (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - ) - (1): BasicBlock( - (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - ) - ) - (arm16): AttentionRefinementModule( - (conv): ConvBNReLU( - (conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv_atten): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) - (bn_atten): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (arm32): AttentionRefinementModule( - (conv): ConvBNReLU( - (conv): Conv2d(512, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv_atten): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) - (bn_atten): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (conv_head32): ConvBNReLU( - (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv_head16): ConvBNReLU( - (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv_avg): ConvBNReLU( - (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) - (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (up32): Upsample(scale_factor=2.0, mode='nearest') - (up16): Upsample(scale_factor=2.0, mode='nearest') - ) - (sp): SpatialPath( - (conv1): ConvBNReLU( - (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) - (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv2): ConvBNReLU( - (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) - (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv3): ConvBNReLU( - (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) - (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv_out): ConvBNReLU( - (conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) - (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - ) - (ffm): FeatureFusionModule( - (convblk): ConvBNReLU( - (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) - (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) - (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (conv_out): BiSeNetOutput( - (conv): ConvBNReLU( - (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv_out): Conv2d(256, 20, kernel_size=(1, 1), stride=(1, 1)) - (up): Upsample(scale_factor=8.0, mode='bilinear') - ) - (conv_out16): BiSeNetOutput( - (conv): ConvBNReLU( - (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv_out): Conv2d(64, 20, kernel_size=(1, 1), stride=(1, 1)) - (up): Upsample(scale_factor=8.0, mode='bilinear') - ) - (conv_out32): BiSeNetOutput( - (conv): ConvBNReLU( - (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) - (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (relu): ReLU(inplace=True) - ) - (conv_out): Conv2d(64, 20, kernel_size=(1, 1), stride=(1, 1)) - (up): Upsample(scale_factor=16.0, mode='bilinear') - ) - ) -) \ No newline at end of file diff --git a/save/bisenet_training1/opts.txt b/save/bisenet_training1/opts.txt deleted file mode 100644 index 429067f..0000000 --- a/save/bisenet_training1/opts.txt +++ /dev/null @@ -1 +0,0 @@ -Namespace(cuda=True, model='bisenetv1', state=None, port=8097, datadir='/content/cityscapes', height=512, num_epochs=150, num_workers=4, batch_size=8, steps_loss=50, steps_plot=50, epochs_save=0, savedir='bisenet_training1', decoder=False, pretrainedEncoder=None, visualize=False, iouTrain=False, iouVal=True, resume=True, erfnet=False) \ No newline at end of file diff --git a/save/enet_fine_tuning/automated_log.txt b/save/enet_fine_tuning/automated_log.txt new file mode 100644 index 0000000..acd5882 --- /dev/null +++ b/save/enet_fine_tuning/automated_log.txt @@ -0,0 +1,28 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 0.8305 0.5683 0.0000 0.3718 0.00050000 +1 0.8246 0.9483 0.0000 0.4791 0.00005000 +2 0.6623 0.8353 0.0000 0.5023 0.00005000 +3 0.5222 0.6835 0.0000 0.5324 0.00005000 +4 0.4194 0.6383 0.0000 0.5477 0.00005000 +1 0.8242 0.9656 0.0000 0.0215 0.00005000 +2 0.6608 0.8214 0.0000 0.0174 0.00005000 +1 0.8246 0.9579 0.0000 0.5736 0.00005000 +2 0.6616 0.8275 0.0000 0.5857 0.00005000 +3 0.5234 0.6817 0.0000 0.5788 0.00005000 +4 0.4193 0.6404 0.0000 0.5825 0.00005000 +5 0.3840 0.6279 0.0000 0.5822 0.00005000 +6 0.3689 0.6160 0.0000 0.5892 0.00005000 +7 0.3583 0.6157 0.0000 0.5810 0.00000500 +8 0.3558 0.6186 0.0000 0.5814 0.00000500 +9 0.3507 0.6105 0.0000 0.5832 0.00000500 +10 0.3520 0.6148 0.0000 0.5830 0.00000500 +11 0.3518 0.6102 0.0000 0.5855 0.00000500 +12 0.3506 0.6107 0.0000 0.5847 0.00000500 +13 0.3487 0.6135 0.0000 0.5836 0.00000500 +14 0.3489 0.6152 0.0000 0.5847 0.00000050 +15 0.3484 0.6141 0.0000 0.5884 0.00000050 +16 0.3483 0.6179 0.0000 0.5805 0.00000050 +17 0.3470 0.6068 0.0000 0.5859 0.00000050 +18 0.3475 0.6137 0.0000 0.5865 0.00000050 +19 0.3468 0.6183 0.0000 0.5865 0.00000050 +20 0.3484 0.6223 0.0000 0.5820 0.00000050 \ No newline at end of file diff --git a/save/enet_fine_tuning/best.txt b/save/enet_fine_tuning/best.txt new file mode 100644 index 0000000..333a99b --- /dev/null +++ b/save/enet_fine_tuning/best.txt @@ -0,0 +1 @@ +Best epoch is 6, with Val-IoU= 0.5892 \ No newline at end of file diff --git a/save/enet_fine_tuning/enet.py b/save/enet_fine_tuning/enet.py new file mode 100644 index 0000000..73fc7a3 --- /dev/null +++ b/save/enet_fine_tuning/enet.py @@ -0,0 +1,555 @@ +# Author: David Silva +# Link: https://github.com/davidtvs/PyTorch-ENet/ + +import torch.nn as nn +import torch + + +class InitialBlock(nn.Module): + """The initial block is composed of two branches: + 1. a main branch which performs a regular convolution with stride 2; + 2. an extension branch which performs max-pooling. + + Doing both operations in parallel and concatenating their results + allows for efficient downsampling and expansion. The main branch + outputs 13 feature maps while the extension branch outputs 3, for a + total of 16 feature maps after concatenation. + + Keyword arguments: + - in_channels (int): the number of input channels. + - out_channels (int): the number output channels. + - kernel_size (int, optional): the kernel size of the filters used in + the convolution layer. Default: 3. + - padding (int, optional): zero-padding added to both sides of the + input. Default: 0. + - bias (bool, optional): Adds a learnable bias to the output if + ``True``. Default: False. + - relu (bool, optional): When ``True`` ReLU is used as the activation + function; otherwise, PReLU is used. Default: True. + + """ + + def __init__(self, in_channels, out_channels, bias=False, relu=True): + super().__init__() + + if relu: + activation = nn.ReLU + else: + activation = nn.PReLU + + # Main branch - As stated above the number of output channels for this + # branch is the total minus 3, since the remaining channels come from + # the extension branch + self.main_branch = nn.Conv2d(in_channels, out_channels - 3, kernel_size=3, stride=2, padding=1, bias=bias) + + # Extension branch + self.ext_branch = nn.MaxPool2d(3, stride=2, padding=1) + + # Initialize batch normalization to be used after concatenation + self.batch_norm = nn.BatchNorm2d(out_channels) + + # PReLU layer to apply after concatenating the branches + self.out_activation = activation() + + def forward(self, x): + main = self.main_branch(x) + ext = self.ext_branch(x) + + # Concatenate branches + out = torch.cat((main, ext), 1) + + # Apply batch normalization + out = self.batch_norm(out) + + return self.out_activation(out) + + +class RegularBottleneck(nn.Module): + """Regular bottlenecks are the main building block of ENet. + Main branch: + 1. Shortcut connection. + + Extension branch: + 1. 1x1 convolution which decreases the number of channels by + ``internal_ratio``, also called a projection; + 2. regular, dilated or asymmetric convolution; + 3. 1x1 convolution which increases the number of channels back to + ``channels``, also called an expansion; + 4. dropout as a regularizer. + + Keyword arguments: + - channels (int): the number of input and output channels. + - internal_ratio (int, optional): a scale factor applied to + ``channels`` used to compute the number of + channels after the projection. eg. given ``channels`` equal to 128 and + internal_ratio equal to 2 the number of channels after the projection + is 64. Default: 4. + - kernel_size (int, optional): the kernel size of the filters used in + the convolution layer described above in item 2 of the extension + branch. Default: 3. + - padding (int, optional): zero-padding added to both sides of the + input. Default: 0. + - dilation (int, optional): spacing between kernel elements for the + convolution described in item 2 of the extension branch. Default: 1. + asymmetric (bool, optional): flags if the convolution described in + item 2 of the extension branch is asymmetric or not. Default: False. + - dropout_prob (float, optional): probability of an element to be + zeroed. Default: 0 (no dropout). + - bias (bool, optional): Adds a learnable bias to the output if + ``True``. Default: False. + - relu (bool, optional): When ``True`` ReLU is used as the activation + function; otherwise, PReLU is used. Default: True. + + """ + + def __init__( + self, + channels, + internal_ratio=4, + kernel_size=3, + padding=0, + dilation=1, + asymmetric=False, + dropout_prob=0, + bias=False, + relu=True, + ): + super().__init__() + + # Check in the internal_scale parameter is within the expected range + # [1, channels] + if internal_ratio <= 1 or internal_ratio > channels: + raise RuntimeError( + "Value out of range. Expected value in the " + "interval [1, {0}], got internal_scale={1}.".format(channels, internal_ratio) + ) + + internal_channels = channels // internal_ratio + + if relu: + activation = nn.ReLU + else: + activation = nn.PReLU + + # Main branch - shortcut connection + + # Extension branch - 1x1 convolution, followed by a regular, dilated or + # asymmetric convolution, followed by another 1x1 convolution, and, + # finally, a regularizer (spatial dropout). Number of channels is constant. + + # 1x1 projection convolution + self.ext_conv1 = nn.Sequential( + nn.Conv2d(channels, internal_channels, kernel_size=1, stride=1, bias=bias), + nn.BatchNorm2d(internal_channels), + activation(), + ) + + # If the convolution is asymmetric we split the main convolution in + # two. Eg. for a 5x5 asymmetric convolution we have two convolution: + # the first is 5x1 and the second is 1x5. + if asymmetric: + self.ext_conv2 = nn.Sequential( + nn.Conv2d( + internal_channels, + internal_channels, + kernel_size=(kernel_size, 1), + stride=1, + padding=(padding, 0), + dilation=dilation, + bias=bias, + ), + nn.BatchNorm2d(internal_channels), + activation(), + nn.Conv2d( + internal_channels, + internal_channels, + kernel_size=(1, kernel_size), + stride=1, + padding=(0, padding), + dilation=dilation, + bias=bias, + ), + nn.BatchNorm2d(internal_channels), + activation(), + ) + else: + self.ext_conv2 = nn.Sequential( + nn.Conv2d( + internal_channels, + internal_channels, + kernel_size=kernel_size, + stride=1, + padding=padding, + dilation=dilation, + bias=bias, + ), + nn.BatchNorm2d(internal_channels), + activation(), + ) + + # 1x1 expansion convolution + self.ext_conv3 = nn.Sequential( + nn.Conv2d(internal_channels, channels, kernel_size=1, stride=1, bias=bias), + nn.BatchNorm2d(channels), + activation(), + ) + + self.ext_regul = nn.Dropout2d(p=dropout_prob) + + # PReLU layer to apply after adding the branches + self.out_activation = activation() + + def forward(self, x): + # Main branch shortcut + main = x + + # Extension branch + ext = self.ext_conv1(x) + ext = self.ext_conv2(ext) + ext = self.ext_conv3(ext) + ext = self.ext_regul(ext) + + # Add main and extension branches + out = main + ext + + return self.out_activation(out) + + +class DownsamplingBottleneck(nn.Module): + """Downsampling bottlenecks further downsample the feature map size. + + Main branch: + 1. max pooling with stride 2; indices are saved to be used for + unpooling later. + + Extension branch: + 1. 2x2 convolution with stride 2 that decreases the number of channels + by ``internal_ratio``, also called a projection; + 2. regular convolution (by default, 3x3); + 3. 1x1 convolution which increases the number of channels to + ``out_channels``, also called an expansion; + 4. dropout as a regularizer. + + Keyword arguments: + - in_channels (int): the number of input channels. + - out_channels (int): the number of output channels. + - internal_ratio (int, optional): a scale factor applied to ``channels`` + used to compute the number of channels after the projection. eg. given + ``channels`` equal to 128 and internal_ratio equal to 2 the number of + channels after the projection is 64. Default: 4. + - return_indices (bool, optional): if ``True``, will return the max + indices along with the outputs. Useful when unpooling later. + - dropout_prob (float, optional): probability of an element to be + zeroed. Default: 0 (no dropout). + - bias (bool, optional): Adds a learnable bias to the output if + ``True``. Default: False. + - relu (bool, optional): When ``True`` ReLU is used as the activation + function; otherwise, PReLU is used. Default: True. + + """ + + def __init__( + self, in_channels, out_channels, internal_ratio=4, return_indices=False, dropout_prob=0, bias=False, relu=True + ): + super().__init__() + + # Store parameters that are needed later + self.return_indices = return_indices + + # Check in the internal_scale parameter is within the expected range + # [1, channels] + if internal_ratio <= 1 or internal_ratio > in_channels: + raise RuntimeError( + "Value out of range. Expected value in the " + "interval [1, {0}], got internal_scale={1}. ".format(in_channels, internal_ratio) + ) + + internal_channels = in_channels // internal_ratio + + if relu: + activation = nn.ReLU + else: + activation = nn.PReLU + + # Main branch - max pooling followed by feature map (channels) padding + self.main_max1 = nn.MaxPool2d(2, stride=2, return_indices=return_indices) + + # Extension branch - 2x2 convolution, followed by a regular, dilated or + # asymmetric convolution, followed by another 1x1 convolution. Number + # of channels is doubled. + + # 2x2 projection convolution with stride 2 + self.ext_conv1 = nn.Sequential( + nn.Conv2d(in_channels, internal_channels, kernel_size=2, stride=2, bias=bias), + nn.BatchNorm2d(internal_channels), + activation(), + ) + + # Convolution + self.ext_conv2 = nn.Sequential( + nn.Conv2d(internal_channels, internal_channels, kernel_size=3, stride=1, padding=1, bias=bias), + nn.BatchNorm2d(internal_channels), + activation(), + ) + + # 1x1 expansion convolution + self.ext_conv3 = nn.Sequential( + nn.Conv2d(internal_channels, out_channels, kernel_size=1, stride=1, bias=bias), + nn.BatchNorm2d(out_channels), + activation(), + ) + + self.ext_regul = nn.Dropout2d(p=dropout_prob) + + # PReLU layer to apply after concatenating the branches + self.out_activation = activation() + + def forward(self, x): + # Main branch shortcut + if self.return_indices: + main, max_indices = self.main_max1(x) + else: + main = self.main_max1(x) + + # Extension branch + ext = self.ext_conv1(x) + ext = self.ext_conv2(ext) + ext = self.ext_conv3(ext) + ext = self.ext_regul(ext) + + # Main branch channel padding + n, ch_ext, h, w = ext.size() + ch_main = main.size()[1] + padding = torch.zeros(n, ch_ext - ch_main, h, w) + + # Before concatenating, check if main is on the CPU or GPU and + # convert padding accordingly + if main.is_cuda: + padding = padding.cuda() + + # Concatenate + main = torch.cat((main, padding), 1) + + # Add main and extension branches + out = main + ext + + return self.out_activation(out), max_indices + + +class UpsamplingBottleneck(nn.Module): + """The upsampling bottlenecks upsample the feature map resolution using max + pooling indices stored from the corresponding downsampling bottleneck. + + Main branch: + 1. 1x1 convolution with stride 1 that decreases the number of channels by + ``internal_ratio``, also called a projection; + 2. max unpool layer using the max pool indices from the corresponding + downsampling max pool layer. + + Extension branch: + 1. 1x1 convolution with stride 1 that decreases the number of channels by + ``internal_ratio``, also called a projection; + 2. transposed convolution (by default, 3x3); + 3. 1x1 convolution which increases the number of channels to + ``out_channels``, also called an expansion; + 4. dropout as a regularizer. + + Keyword arguments: + - in_channels (int): the number of input channels. + - out_channels (int): the number of output channels. + - internal_ratio (int, optional): a scale factor applied to ``in_channels`` + used to compute the number of channels after the projection. eg. given + ``in_channels`` equal to 128 and ``internal_ratio`` equal to 2 the number + of channels after the projection is 64. Default: 4. + - dropout_prob (float, optional): probability of an element to be zeroed. + Default: 0 (no dropout). + - bias (bool, optional): Adds a learnable bias to the output if ``True``. + Default: False. + - relu (bool, optional): When ``True`` ReLU is used as the activation + function; otherwise, PReLU is used. Default: True. + + """ + + def __init__(self, in_channels, out_channels, internal_ratio=4, dropout_prob=0, bias=False, relu=True): + super().__init__() + + # Check in the internal_scale parameter is within the expected range + # [1, channels] + if internal_ratio <= 1 or internal_ratio > in_channels: + raise RuntimeError( + "Value out of range. Expected value in the " + "interval [1, {0}], got internal_scale={1}. ".format(in_channels, internal_ratio) + ) + + internal_channels = in_channels // internal_ratio + + if relu: + activation = nn.ReLU + else: + activation = nn.PReLU + + # Main branch - max pooling followed by feature map (channels) padding + self.main_conv1 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias), nn.BatchNorm2d(out_channels) + ) + + # Remember that the stride is the same as the kernel_size, just like + # the max pooling layers + self.main_unpool1 = nn.MaxUnpool2d(kernel_size=2) + + # Extension branch - 1x1 convolution, followed by a regular, dilated or + # asymmetric convolution, followed by another 1x1 convolution. Number + # of channels is doubled. + + # 1x1 projection convolution with stride 1 + self.ext_conv1 = nn.Sequential( + nn.Conv2d(in_channels, internal_channels, kernel_size=1, bias=bias), + nn.BatchNorm2d(internal_channels), + activation(), + ) + + # Transposed convolution + self.ext_tconv1 = nn.ConvTranspose2d(internal_channels, internal_channels, kernel_size=2, stride=2, bias=bias) + self.ext_tconv1_bnorm = nn.BatchNorm2d(internal_channels) + self.ext_tconv1_activation = activation() + + # 1x1 expansion convolution + self.ext_conv2 = nn.Sequential( + nn.Conv2d(internal_channels, out_channels, kernel_size=1, bias=bias), nn.BatchNorm2d(out_channels) + ) + + self.ext_regul = nn.Dropout2d(p=dropout_prob) + + # PReLU layer to apply after concatenating the branches + self.out_activation = activation() + + def forward(self, x, max_indices, output_size): + # Main branch shortcut + main = self.main_conv1(x) + main = self.main_unpool1(main, max_indices, output_size=output_size) + + # Extension branch + ext = self.ext_conv1(x) + ext = self.ext_tconv1(ext, output_size=output_size) + ext = self.ext_tconv1_bnorm(ext) + ext = self.ext_tconv1_activation(ext) + ext = self.ext_conv2(ext) + ext = self.ext_regul(ext) + + # Add main and extension branches + out = main + ext + + return self.out_activation(out) + + +class Net(nn.Module): + """Generate the ENet model. + + Keyword arguments: + - num_classes (int): the number of classes to segment. + - encoder_relu (bool, optional): When ``True`` ReLU is used as the + activation function in the encoder blocks/layers; otherwise, PReLU + is used. Default: False. + - decoder_relu (bool, optional): When ``True`` ReLU is used as the + activation function in the decoder blocks/layers; otherwise, PReLU + is used. Default: True. + + """ + + def __init__(self, num_classes, encoder_relu=False, decoder_relu=True): + super().__init__() + + self.initial_block = InitialBlock(3, 16, relu=encoder_relu) + + # Stage 1 - Encoder + self.downsample1_0 = DownsamplingBottleneck(16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu) + self.regular1_1 = RegularBottleneck(64, padding=1, dropout_prob=0.01, relu=encoder_relu) + self.regular1_2 = RegularBottleneck(64, padding=1, dropout_prob=0.01, relu=encoder_relu) + self.regular1_3 = RegularBottleneck(64, padding=1, dropout_prob=0.01, relu=encoder_relu) + self.regular1_4 = RegularBottleneck(64, padding=1, dropout_prob=0.01, relu=encoder_relu) + + # Stage 2 - Encoder + self.downsample2_0 = DownsamplingBottleneck(64, 128, return_indices=True, dropout_prob=0.1, relu=encoder_relu) + self.regular2_1 = RegularBottleneck(128, padding=1, dropout_prob=0.1, relu=encoder_relu) + self.dilated2_2 = RegularBottleneck(128, dilation=2, padding=2, dropout_prob=0.1, relu=encoder_relu) + self.asymmetric2_3 = RegularBottleneck( + 128, kernel_size=5, padding=2, asymmetric=True, dropout_prob=0.1, relu=encoder_relu + ) + self.dilated2_4 = RegularBottleneck(128, dilation=4, padding=4, dropout_prob=0.1, relu=encoder_relu) + self.regular2_5 = RegularBottleneck(128, padding=1, dropout_prob=0.1, relu=encoder_relu) + self.dilated2_6 = RegularBottleneck(128, dilation=8, padding=8, dropout_prob=0.1, relu=encoder_relu) + self.asymmetric2_7 = RegularBottleneck( + 128, kernel_size=5, asymmetric=True, padding=2, dropout_prob=0.1, relu=encoder_relu + ) + self.dilated2_8 = RegularBottleneck(128, dilation=16, padding=16, dropout_prob=0.1, relu=encoder_relu) + + # Stage 3 - Encoder + self.regular3_0 = RegularBottleneck(128, padding=1, dropout_prob=0.1, relu=encoder_relu) + self.dilated3_1 = RegularBottleneck(128, dilation=2, padding=2, dropout_prob=0.1, relu=encoder_relu) + self.asymmetric3_2 = RegularBottleneck( + 128, kernel_size=5, padding=2, asymmetric=True, dropout_prob=0.1, relu=encoder_relu + ) + self.dilated3_3 = RegularBottleneck(128, dilation=4, padding=4, dropout_prob=0.1, relu=encoder_relu) + self.regular3_4 = RegularBottleneck(128, padding=1, dropout_prob=0.1, relu=encoder_relu) + self.dilated3_5 = RegularBottleneck(128, dilation=8, padding=8, dropout_prob=0.1, relu=encoder_relu) + self.asymmetric3_6 = RegularBottleneck( + 128, kernel_size=5, asymmetric=True, padding=2, dropout_prob=0.1, relu=encoder_relu + ) + self.dilated3_7 = RegularBottleneck(128, dilation=16, padding=16, dropout_prob=0.1, relu=encoder_relu) + + # Stage 4 - Decoder + self.upsample4_0 = UpsamplingBottleneck(128, 64, dropout_prob=0.1, relu=decoder_relu) + self.regular4_1 = RegularBottleneck(64, padding=1, dropout_prob=0.1, relu=decoder_relu) + self.regular4_2 = RegularBottleneck(64, padding=1, dropout_prob=0.1, relu=decoder_relu) + + # Stage 5 - Decoder + self.upsample5_0 = UpsamplingBottleneck(64, 16, dropout_prob=0.1, relu=decoder_relu) + self.regular5_1 = RegularBottleneck(16, padding=1, dropout_prob=0.1, relu=decoder_relu) + self.transposed_conv = nn.ConvTranspose2d(16, num_classes, kernel_size=3, stride=2, padding=1, bias=False) + + def forward(self, x): + # Initial block + input_size = x.size() + x = self.initial_block(x) + + # Stage 1 - Encoder + stage1_input_size = x.size() + x, max_indices1_0 = self.downsample1_0(x) + x = self.regular1_1(x) + x = self.regular1_2(x) + x = self.regular1_3(x) + x = self.regular1_4(x) + + # Stage 2 - Encoder + stage2_input_size = x.size() + x, max_indices2_0 = self.downsample2_0(x) + x = self.regular2_1(x) + x = self.dilated2_2(x) + x = self.asymmetric2_3(x) + x = self.dilated2_4(x) + x = self.regular2_5(x) + x = self.dilated2_6(x) + x = self.asymmetric2_7(x) + x = self.dilated2_8(x) + + # Stage 3 - Encoder + x = self.regular3_0(x) + x = self.dilated3_1(x) + x = self.asymmetric3_2(x) + x = self.dilated3_3(x) + x = self.regular3_4(x) + x = self.dilated3_5(x) + x = self.asymmetric3_6(x) + x = self.dilated3_7(x) + + # Stage 4 - Decoder + x = self.upsample4_0(x, max_indices2_0, output_size=stage2_input_size) + x = self.regular4_1(x) + x = self.regular4_2(x) + + # Stage 5 - Decoder + x = self.upsample5_0(x, max_indices1_0, output_size=stage1_input_size) + x = self.regular5_1(x) + x = self.transposed_conv(x, output_size=input_size) + + return x diff --git a/save/enet_fine_tuning/model.txt b/save/enet_fine_tuning/model.txt new file mode 100644 index 0000000..e3db718 --- /dev/null +++ b/save/enet_fine_tuning/model.txt @@ -0,0 +1,540 @@ +Net( + (initial_block): InitialBlock( + (main_branch): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) + (ext_branch): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (batch_norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (out_activation): PReLU(num_parameters=1) + ) + (downsample1_0): DownsamplingBottleneck( + (main_max1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (ext_conv1): Sequential( + (0): Conv2d(16, 4, kernel_size=(2, 2), stride=(2, 2), bias=False) + (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(4, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.01, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (regular1_1): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.01, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (regular1_2): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.01, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (regular1_3): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.01, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (regular1_4): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.01, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (downsample2_0): DownsamplingBottleneck( + (main_max1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (ext_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(2, 2), stride=(2, 2), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(16, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (regular2_1): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (dilated2_2): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (asymmetric2_3): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + (3): Conv2d(32, 32, kernel_size=(1, 5), stride=(1, 1), padding=(0, 2), bias=False) + (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (5): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (dilated2_4): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(4, 4), dilation=(4, 4), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (regular2_5): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (dilated2_6): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(8, 8), dilation=(8, 8), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (asymmetric2_7): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + (3): Conv2d(32, 32, kernel_size=(1, 5), stride=(1, 1), padding=(0, 2), bias=False) + (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (5): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (dilated2_8): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(16, 16), dilation=(16, 16), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (regular3_0): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (dilated3_1): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (asymmetric3_2): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + (3): Conv2d(32, 32, kernel_size=(1, 5), stride=(1, 1), padding=(0, 2), bias=False) + (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (5): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (dilated3_3): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(4, 4), dilation=(4, 4), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (regular3_4): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (dilated3_5): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(8, 8), dilation=(8, 8), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (asymmetric3_6): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + (3): Conv2d(32, 32, kernel_size=(1, 5), stride=(1, 1), padding=(0, 2), bias=False) + (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (5): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (dilated3_7): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv2): Sequential( + (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(16, 16), dilation=(16, 16), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_conv3): Sequential( + (0): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): PReLU(num_parameters=1) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): PReLU(num_parameters=1) + ) + (upsample4_0): UpsamplingBottleneck( + (main_conv1): Sequential( + (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + ) + (main_unpool1): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)) + (ext_conv1): Sequential( + (0): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_tconv1): ConvTranspose2d(32, 32, kernel_size=(2, 2), stride=(2, 2), bias=False) + (ext_tconv1_bnorm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (ext_tconv1_activation): ReLU() + (ext_conv2): Sequential( + (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): ReLU() + ) + (regular4_1): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_conv2): Sequential( + (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_conv3): Sequential( + (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): ReLU() + ) + (regular4_2): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_conv2): Sequential( + (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_conv3): Sequential( + (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): ReLU() + ) + (upsample5_0): UpsamplingBottleneck( + (main_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + ) + (main_unpool1): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)) + (ext_conv1): Sequential( + (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_tconv1): ConvTranspose2d(16, 16, kernel_size=(2, 2), stride=(2, 2), bias=False) + (ext_tconv1_bnorm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (ext_tconv1_activation): ReLU() + (ext_conv2): Sequential( + (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): ReLU() + ) + (regular5_1): RegularBottleneck( + (ext_conv1): Sequential( + (0): Conv2d(16, 4, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_conv2): Sequential( + (0): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_conv3): Sequential( + (0): Conv2d(4, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) + (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) + (2): ReLU() + ) + (ext_regul): Dropout2d(p=0.1, inplace=False) + (out_activation): ReLU() + ) + (transposed_conv): ConvTranspose2d(16, 20, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) +) \ No newline at end of file diff --git a/save/enet_fine_tuning/opts.txt b/save/enet_fine_tuning/opts.txt new file mode 100644 index 0000000..bb5c55b --- /dev/null +++ b/save/enet_fine_tuning/opts.txt @@ -0,0 +1 @@ +Namespace(cuda=True, model='enet', state=None, port=8097, datadir='../cityscapes/', height=512, width=1024, num_epochs=20, num_workers=4, batch_size=4, steps_loss=50, steps_plot=50, epochs_save=0, savedir='enet_training1', decoder=False, pretrainedEncoder=None, visualize=False, iouTrain=False, iouVal=True, resume=False, colab=False, download_step=10, loss='ce', logit_norm=False, entropic_scale=10.0, loadDir='../trained_models/', loadWeights='enet_pretrained.pth') \ No newline at end of file diff --git a/save/erfnet_fine_tuning/automated_log.txt b/save/erfnet_fine_tuning/automated_log.txt new file mode 100644 index 0000000..035d9d7 --- /dev/null +++ b/save/erfnet_fine_tuning/automated_log.txt @@ -0,0 +1,21 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 0.2792 0.3822 0.0000 0.7180 0.00005000 +2 0.2424 0.3708 0.0000 0.7039 0.00004774 +3 0.2239 0.3393 0.0000 0.7140 0.00004548 +4 0.1832 0.3012 0.0000 0.7051 0.00004320 +5 0.1651 0.2956 0.0000 0.7164 0.00004090 +6 0.1598 0.2911 0.0000 0.7265 0.00003859 +7 0.1574 0.2917 0.0000 0.7171 0.00003627 +8 0.1552 0.2973 0.0000 0.7225 0.00003393 +9 0.1533 0.2931 0.0000 0.7058 0.00003157 +10 0.1528 0.2917 0.0000 0.7195 0.00002919 +11 0.1503 0.2998 0.0000 0.7171 0.00002679 +12 0.1490 0.2877 0.0000 0.7131 0.00002437 +13 0.1486 0.2937 0.0000 0.7217 0.00002192 +14 0.1475 0.2950 0.0000 0.7117 0.00001944 +15 0.1468 0.2944 0.0000 0.7180 0.00001692 +16 0.1462 0.2961 0.0000 0.7187 0.00001436 +17 0.1451 0.2920 0.0000 0.7176 0.00001175 +18 0.1441 0.2938 0.0000 0.7226 0.00000907 +19 0.1437 0.2939 0.0000 0.7211 0.00000629 +20 0.1427 0.2920 0.0000 0.7251 0.00000337 \ No newline at end of file diff --git a/save/erfnet_fine_tuning/automated_log_encoder.txt b/save/erfnet_fine_tuning/automated_log_encoder.txt new file mode 100644 index 0000000..7576ea8 --- /dev/null +++ b/save/erfnet_fine_tuning/automated_log_encoder.txt @@ -0,0 +1,7 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 1.3049 0.9971 0.0000 0.2120 0.00005000 +1 1.0388 0.5528 0.0000 0.3681 0.00005000 +2 0.4795 0.4236 0.0000 0.4420 0.00004774 +1 1.0087 0.4826 0.0000 0.3771 0.00005000 +1 1.0734 0.6002 0.0000 0.3289 0.00005000 +1 1.2663 0.9611 0.0000 0.2421 0.00050000 \ No newline at end of file diff --git a/save/erfnet_fine_tuning/best.txt b/save/erfnet_fine_tuning/best.txt new file mode 100644 index 0000000..52e46d0 --- /dev/null +++ b/save/erfnet_fine_tuning/best.txt @@ -0,0 +1 @@ +Best epoch is 6, with Val-IoU= 0.7265 \ No newline at end of file diff --git a/save/erfnet_fine_tuning/best_encoder.txt b/save/erfnet_fine_tuning/best_encoder.txt new file mode 100644 index 0000000..7fa4526 --- /dev/null +++ b/save/erfnet_fine_tuning/best_encoder.txt @@ -0,0 +1 @@ +Best epoch is 1, with Val-IoU= 0.2421 \ No newline at end of file diff --git a/save/erfnet_fine_tuning/erfnet.py b/save/erfnet_fine_tuning/erfnet.py new file mode 100644 index 0000000..8d2bc9d --- /dev/null +++ b/save/erfnet_fine_tuning/erfnet.py @@ -0,0 +1,157 @@ +# ERFNet full model definition for Pytorch +# Sept 2017 +# Eduardo Romera +####################### + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + + +class DownsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + + self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True) + self.pool = nn.MaxPool2d(2, stride=2) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = torch.cat([self.conv(input), self.pool(input)], 1) + output = self.bn(output) + return F.relu(output) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated): + super().__init__() + + self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d( + chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, dilation=(dilated, 1) + ) + + self.conv1x3_2 = nn.Conv2d( + chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, dilation=(1, dilated) + ) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if self.dropout.p != 0: + output = self.dropout(output) + + return F.relu(output + input) # +input = identity (residual connection) + + +class Encoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + self.initial_block = DownsamplerBlock(3, 16) + + self.layers = nn.ModuleList() + + self.layers.append(DownsamplerBlock(16, 64)) + + for x in range(0, 5): # 5 times + self.layers.append(non_bottleneck_1d(64, 0.03, 1)) + + self.layers.append(DownsamplerBlock(64, 128)) + + for x in range(0, 2): # 2 times + self.layers.append(non_bottleneck_1d(128, 0.3, 2)) + self.layers.append(non_bottleneck_1d(128, 0.3, 4)) + self.layers.append(non_bottleneck_1d(128, 0.3, 8)) + self.layers.append(non_bottleneck_1d(128, 0.3, 16)) + + # Only in encoder mode: + self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) + + def forward(self, input, predict=False): + output = self.initial_block(input) + + for layer in self.layers: + output = layer(output) + + if predict: + output = self.output_conv(output) + + return output + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return F.relu(output) + + +class Decoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + + self.layers = nn.ModuleList() + + self.layers.append(UpsamplerBlock(128, 64)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + + self.layers.append(UpsamplerBlock(64, 16)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + + self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = self.output_conv(output) + + return output + + +# ERFNet +class Net(nn.Module): + def __init__(self, num_classes, encoder=None): # use encoder to pass pretrained encoder + super().__init__() + + if encoder == None: + self.encoder = Encoder(num_classes) + else: + self.encoder = encoder + self.decoder = Decoder(num_classes) + + def forward(self, input, only_encode=False): + if only_encode: + return self.encoder.forward(input, predict=True) + else: + output = self.encoder(input) # predict=False by default + return self.decoder.forward(output) diff --git a/save/erfnet_fine_tuning/model.txt b/save/erfnet_fine_tuning/model.txt new file mode 100644 index 0000000..df34e27 --- /dev/null +++ b/save/erfnet_fine_tuning/model.txt @@ -0,0 +1,136 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_fine_tuning/model_encoder.txt b/save/erfnet_fine_tuning/model_encoder.txt new file mode 100644 index 0000000..df34e27 --- /dev/null +++ b/save/erfnet_fine_tuning/model_encoder.txt @@ -0,0 +1,136 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_fine_tuning/opts.txt b/save/erfnet_fine_tuning/opts.txt new file mode 100644 index 0000000..738f448 --- /dev/null +++ b/save/erfnet_fine_tuning/opts.txt @@ -0,0 +1 @@ +Namespace(cuda=True, model='erfnet', port=8097, datadir='../cityscapes/', height=512, width=1024, num_epochs=20, num_workers=4, batch_size=6, steps_loss=50, steps_plot=50, epochs_save=0, savedir='erfnet_fine_tuning1', decoder=False, pretrainedEncoder=None, visualize=False, iouTrain=False, iouVal=True, resume=False, colab=False, download_step=10, loss='ce', logit_norm=False, entropic_scale=10.0, loadDir='../trained_models/', loadWeights='erfnet_pretrained.pth', fine_tuning=True) \ No newline at end of file diff --git a/save/erfnet_isomax_plus_ce/automated_log.txt b/save/erfnet_isomax_plus_ce/automated_log.txt new file mode 100644 index 0000000..59f7767 --- /dev/null +++ b/save/erfnet_isomax_plus_ce/automated_log.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 1.4116 0.8857 0.0000 0.3092 0.00050000 +2 0.7178 0.7104 0.0000 0.3461 0.00049099 +3 0.6045 0.7188 0.0000 0.3511 0.00048196 +4 0.5432 0.7027 0.0000 0.3856 0.00047292 +5 0.5381 0.5633 0.0000 0.4225 0.00046385 +6 0.4858 0.5437 0.0000 0.4473 0.00045477 +7 0.4585 0.4952 0.0000 0.4749 0.00044566 +8 0.4390 0.4927 0.0000 0.4828 0.00043653 +9 0.4231 0.4630 0.0000 0.4909 0.00042739 +10 0.4071 0.5541 0.0000 0.4373 0.00041822 +11 0.3933 0.4799 0.0000 0.4869 0.00040903 +12 0.3664 0.4432 0.0000 0.5015 0.00039981 +13 0.3590 0.4316 0.0000 0.5094 0.00039057 +14 0.3661 0.4112 0.0000 0.5134 0.00038131 +15 0.3499 0.4778 0.0000 0.4776 0.00037202 +16 0.3323 0.4238 0.0000 0.5180 0.00036271 +17 0.3342 0.4013 0.0000 0.5284 0.00035337 +18 0.3144 0.3915 0.0000 0.5407 0.00034400 +19 0.3148 0.3958 0.0000 0.5249 0.00033460 +20 0.3041 0.3750 0.0000 0.5473 0.00032518 +21 0.2990 0.3708 0.0000 0.5623 0.00031572 +22 0.2955 0.4095 0.0000 0.5426 0.00030624 +23 0.2839 0.3662 0.0000 0.5769 0.00029671 +24 0.2883 0.3667 0.0000 0.5700 0.00028716 +25 0.2691 0.3522 0.0000 0.5600 0.00027757 +26 0.2693 0.3651 0.0000 0.5585 0.00026794 +27 0.2626 0.4633 0.0000 0.5247 0.00025828 +28 0.2671 0.3540 0.0000 0.5735 0.00024857 +29 0.2542 0.3463 0.0000 0.5840 0.00023882 +30 0.2475 0.3565 0.0000 0.5577 0.00022903 +31 0.2582 0.3529 0.0000 0.5711 0.00021919 +32 0.2402 0.3324 0.0000 0.5929 0.00020930 +33 0.2397 0.3495 0.0000 0.5775 0.00019936 +34 0.2257 0.3282 0.0000 0.5980 0.00018937 +35 0.2297 0.3372 0.0000 0.5914 0.00017931 +36 0.2256 0.3285 0.0000 0.6008 0.00016919 +37 0.2129 0.3266 0.0000 0.6142 0.00015901 +38 0.2161 0.3364 0.0000 0.6104 0.00014875 +39 0.2083 0.3286 0.0000 0.6141 0.00013841 +40 0.2025 0.3093 0.0000 0.6216 0.00012798 +41 0.1994 0.3224 0.0000 0.6158 0.00011746 +42 0.2024 0.3445 0.0000 0.5943 0.00010684 +43 0.1984 0.3220 0.0000 0.6114 0.00009609 +44 0.1889 0.3178 0.0000 0.6142 0.00008521 +45 0.1865 0.3149 0.0000 0.6227 0.00007417 +46 0.1838 0.3107 0.0000 0.6287 0.00006295 +47 0.1819 0.3193 0.0000 0.6241 0.00005149 +48 0.1784 0.3031 0.0000 0.6282 0.00003975 +49 0.1757 0.3068 0.0000 0.6322 0.00002759 +50 0.1737 0.3010 0.0000 0.6363 0.00001479 \ No newline at end of file diff --git a/save/erfnet_isomax_plus_ce/automated_log_encoder.txt b/save/erfnet_isomax_plus_ce/automated_log_encoder.txt new file mode 100644 index 0000000..7a107c6 --- /dev/null +++ b/save/erfnet_isomax_plus_ce/automated_log_encoder.txt @@ -0,0 +1,55 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 2.0599 2.3816 0.0000 0.1952 0.00050000 +2 0.9879 2.3745 0.0000 0.2353 0.00049099 +3 0.8029 2.3205 0.0000 0.2607 0.00048196 +4 0.7361 2.2786 0.0000 0.2918 0.00047292 +1 2.0826 1.5419 0.0000 0.1893 0.00050000 +2 0.9274 0.8754 0.0000 0.2507 0.00049099 +3 0.7647 0.7608 0.0000 0.2754 0.00048196 +4 0.6799 0.6781 0.0000 0.2975 0.00047292 +5 0.6079 0.6279 0.0000 0.3155 0.00046385 +6 0.5526 0.5952 0.0000 0.3326 0.00045477 +7 0.5320 0.5783 0.0000 0.3349 0.00044566 +8 0.5013 0.4949 0.0000 0.3739 0.00043653 +9 0.4658 0.4788 0.0000 0.3879 0.00042739 +10 0.4938 0.5169 0.0000 0.3774 0.00041822 +11 0.4585 0.4746 0.0000 0.3940 0.00040903 +12 0.4242 0.4775 0.0000 0.4080 0.00039981 +13 0.4021 0.4296 0.0000 0.4253 0.00039057 +14 0.3865 0.4109 0.0000 0.4419 0.00038131 +15 0.3741 0.4131 0.0000 0.4418 0.00037202 +16 0.3703 0.4394 0.0000 0.4257 0.00036271 +17 0.3526 0.3875 0.0000 0.4626 0.00035337 +18 0.3381 0.3984 0.0000 0.4575 0.00034400 +19 0.3286 0.3790 0.0000 0.4836 0.00033460 +20 0.3157 0.3547 0.0000 0.4987 0.00032518 +21 0.3138 0.3637 0.0000 0.4844 0.00031572 +22 0.2985 0.3807 0.0000 0.4765 0.00030624 +23 0.3009 0.3542 0.0000 0.4993 0.00029671 +24 0.3122 0.3742 0.0000 0.4869 0.00028716 +25 0.2973 0.3277 0.0000 0.5225 0.00027757 +26 0.2874 0.3211 0.0000 0.5317 0.00026794 +27 0.2722 0.3306 0.0000 0.5195 0.00025828 +28 0.2620 0.3260 0.0000 0.5323 0.00024857 +29 0.2583 0.3064 0.0000 0.5581 0.00023882 +30 0.2535 0.3133 0.0000 0.5394 0.00022903 +31 0.2470 0.3127 0.0000 0.5434 0.00021919 +32 0.2475 0.3111 0.0000 0.5352 0.00020930 +33 0.2367 0.2980 0.0000 0.5675 0.00019936 +34 0.2302 0.2877 0.0000 0.5710 0.00018937 +35 0.2318 0.3034 0.0000 0.5480 0.00017931 +36 0.2257 0.2956 0.0000 0.5666 0.00016919 +37 0.2200 0.2880 0.0000 0.5846 0.00015901 +38 0.2137 0.2770 0.0000 0.6001 0.00014875 +39 0.2098 0.2815 0.0000 0.5834 0.00013841 +40 0.2086 0.2754 0.0000 0.5867 0.00012798 +41 0.2009 0.2641 0.0000 0.6154 0.00011746 +42 0.2026 0.2776 0.0000 0.6067 0.00010684 +43 0.1935 0.2665 0.0000 0.6123 0.00009609 +44 0.1905 0.2644 0.0000 0.6080 0.00008521 +45 0.1878 0.2627 0.0000 0.6037 0.00007417 +46 0.1828 0.2657 0.0000 0.6111 0.00006295 +47 0.1791 0.2641 0.0000 0.6178 0.00005149 +48 0.1776 0.2570 0.0000 0.6227 0.00003975 +49 0.1743 0.2576 0.0000 0.6223 0.00002759 +50 0.1724 0.2512 0.0000 0.6280 0.00001479 \ No newline at end of file diff --git a/save/erfnet_isomax_plus_ce/best.txt b/save/erfnet_isomax_plus_ce/best.txt new file mode 100644 index 0000000..733960d --- /dev/null +++ b/save/erfnet_isomax_plus_ce/best.txt @@ -0,0 +1 @@ +Best epoch is 50, with Val-IoU= 0.6363 \ No newline at end of file diff --git a/save/erfnet_isomax_plus_ce/best_encoder.txt b/save/erfnet_isomax_plus_ce/best_encoder.txt new file mode 100644 index 0000000..9f96790 --- /dev/null +++ b/save/erfnet_isomax_plus_ce/best_encoder.txt @@ -0,0 +1 @@ +Best epoch is 50, with Val-IoU= 0.6280 \ No newline at end of file diff --git a/save/erfnet_isomax_plus_ce/erfnet_isomax_plus.py b/save/erfnet_isomax_plus_ce/erfnet_isomax_plus.py new file mode 100644 index 0000000..cb4aec8 --- /dev/null +++ b/save/erfnet_isomax_plus_ce/erfnet_isomax_plus.py @@ -0,0 +1,183 @@ +# ERFNet full model definition for Pytorch +# Sept 2017 +# Eduardo Romera +####################### + +# Reference: https://github.com/dlmacedo/entropic-out-of-distribution-detection/blob/9ad451ca815160e5339dc21319cea2b859e3e101/losses/isomaxplus.py + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + + +class IsoMaxPlusLossFirstPart(nn.Module): + """This part replaces the model classifier output layer""" + + def __init__(self, num_classes, encoder, temperature=1.0): + super(IsoMaxPlusLossFirstPart, self).__init__() + self.num_classes = num_classes + self.temperature = temperature + self.prototypes = nn.Parameter(torch.Tensor(1, num_classes, 1, 1)) + self.distance_scale = nn.Parameter(torch.Tensor(1)) + if encoder: + self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) + else: + self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True) + nn.init.normal_(self.prototypes, mean=0.0, std=1.0) + nn.init.constant_(self.distance_scale, 1.0) + + def forward(self, features): + features = self.output_conv(features) + distances = torch.abs(self.distance_scale) * torch.abs(features - self.prototypes) + logits = -distances + return logits / self.temperature + + +class DownsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + + self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True) + self.pool = nn.MaxPool2d(2, stride=2) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = torch.cat([self.conv(input), self.pool(input)], 1) + output = self.bn(output) + return F.relu(output) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated): + super().__init__() + + self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d( + chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, dilation=(dilated, 1) + ) + + self.conv1x3_2 = nn.Conv2d( + chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, dilation=(1, dilated) + ) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if self.dropout.p != 0: + output = self.dropout(output) + + return F.relu(output + input) # +input = identity (residual connection) + + +class Encoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + self.initial_block = DownsamplerBlock(3, 16) + + self.layers = nn.ModuleList() + + self.layers.append(DownsamplerBlock(16, 64)) + + for x in range(0, 5): # 5 times + self.layers.append(non_bottleneck_1d(64, 0.03, 1)) + + self.layers.append(DownsamplerBlock(64, 128)) + + for x in range(0, 2): # 2 times + self.layers.append(non_bottleneck_1d(128, 0.3, 2)) + self.layers.append(non_bottleneck_1d(128, 0.3, 4)) + self.layers.append(non_bottleneck_1d(128, 0.3, 8)) + self.layers.append(non_bottleneck_1d(128, 0.3, 16)) + + # Only in encoder mode: + + self.output_conv = IsoMaxPlusLossFirstPart(num_classes, encoder=True) + + def forward(self, input, predict=False): + output = self.initial_block(input) + + for layer in self.layers: + output = layer(output) + + if predict: + output = self.output_conv(output) + + return output + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return F.relu(output) + + +class Decoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + + self.layers = nn.ModuleList() + + self.layers.append(UpsamplerBlock(128, 64)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + + self.layers.append(UpsamplerBlock(64, 16)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + + self.output_conv = IsoMaxPlusLossFirstPart(num_classes, encoder=False) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = self.output_conv(output) + + return output + + +# ERFNet +class Net(nn.Module): + def __init__(self, num_classes, encoder=None): # use encoder to pass pretrained encoder + super().__init__() + + if encoder == None: + self.encoder = Encoder(num_classes) + else: + self.encoder = encoder + self.decoder = Decoder(num_classes) + + def forward(self, input, only_encode=False): + if only_encode: + return self.encoder.forward(input, predict=True) + else: + output = self.encoder(input) # predict=False by default + return self.decoder.forward(output) diff --git a/save/erfnet_isomax_plus_ce/model.txt b/save/erfnet_isomax_plus_ce/model.txt new file mode 100644 index 0000000..1602ec1 --- /dev/null +++ b/save/erfnet_isomax_plus_ce/model.txt @@ -0,0 +1,140 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): IsoMaxPlusLossFirstPart( + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): IsoMaxPlusLossFirstPart( + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_isomax_plus_ce/model_encoder.txt b/save/erfnet_isomax_plus_ce/model_encoder.txt new file mode 100644 index 0000000..1602ec1 --- /dev/null +++ b/save/erfnet_isomax_plus_ce/model_encoder.txt @@ -0,0 +1,140 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): IsoMaxPlusLossFirstPart( + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): IsoMaxPlusLossFirstPart( + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_isomax_plus_ce/opts.txt b/save/erfnet_isomax_plus_ce/opts.txt new file mode 100644 index 0000000..794d859 --- /dev/null +++ b/save/erfnet_isomax_plus_ce/opts.txt @@ -0,0 +1 @@ +Namespace(cuda=True, model='erfnet_isomax_plus', state=None, port=8097, datadir='../cityscapes/', height=512, width=1024, num_epochs=50, num_workers=4, batch_size=6, steps_loss=50, steps_plot=50, epochs_save=0, savedir='erfnet_isomax_plus_ce', decoder=False, pretrainedEncoder=None, visualize=False, iouTrain=False, iouVal=True, resume=False, erfnet=True, colab=False, download_step=10, loss='ce', logit_norm=False, entropic_scale=10.0) \ No newline at end of file diff --git a/save/erfnet_isomax_plus_focal/automated_log.txt b/save/erfnet_isomax_plus_focal/automated_log.txt new file mode 100644 index 0000000..9c3644c --- /dev/null +++ b/save/erfnet_isomax_plus_focal/automated_log.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 0.8668 0.6150 0.0000 0.2560 0.00050000 +2 0.3597 0.3443 0.0000 0.3531 0.00049099 +3 0.3040 0.3208 0.0000 0.3564 0.00048196 +4 0.2673 0.2633 0.0000 0.4057 0.00047292 +5 0.2475 0.2713 0.0000 0.4004 0.00046385 +6 0.2372 0.2925 0.0000 0.3773 0.00045477 +7 0.2395 0.2361 0.0000 0.4550 0.00044566 +8 0.2156 0.2397 0.0000 0.4493 0.00043653 +9 0.2079 0.2246 0.0000 0.4642 0.00042739 +10 0.2115 0.2383 0.0000 0.4536 0.00041822 +11 0.2054 0.2275 0.0000 0.4688 0.00040903 +12 0.1961 0.2135 0.0000 0.4869 0.00039981 +13 0.1913 0.2232 0.0000 0.4736 0.00039057 +14 0.1937 0.2175 0.0000 0.4895 0.00038131 +15 0.1843 0.2138 0.0000 0.4893 0.00037202 +16 0.1828 0.1969 0.0000 0.5107 0.00036271 +17 0.1768 0.1934 0.0000 0.5089 0.00035337 +18 0.1717 0.2093 0.0000 0.4765 0.00034400 +19 0.1778 0.1884 0.0000 0.5250 0.00033460 +20 0.1688 0.1992 0.0000 0.4941 0.00032518 +21 0.1632 0.2252 0.0000 0.4642 0.00031572 +22 0.1670 0.1908 0.0000 0.5145 0.00030624 +23 0.1640 0.1950 0.0000 0.5384 0.00029671 +24 0.1543 0.1946 0.0000 0.5326 0.00028716 +25 0.1531 0.1837 0.0000 0.5425 0.00027757 +26 0.1497 0.1823 0.0000 0.5325 0.00026794 +27 0.1502 0.2186 0.0000 0.4881 0.00025828 +28 0.1485 0.1954 0.0000 0.5288 0.00024857 +29 0.1471 0.1769 0.0000 0.5522 0.00023882 +30 0.1421 0.1755 0.0000 0.5278 0.00022903 +31 0.1414 0.1713 0.0000 0.5584 0.00021919 +32 0.1351 0.1793 0.0000 0.5595 0.00020930 +33 0.1364 0.1885 0.0000 0.5232 0.00019936 +34 0.1329 0.1797 0.0000 0.5343 0.00018937 +35 0.1293 0.1659 0.0000 0.5821 0.00017931 +36 0.1280 0.1732 0.0000 0.5571 0.00016919 +37 0.1250 0.1663 0.0000 0.5800 0.00015901 +38 0.1261 0.1725 0.0000 0.5787 0.00014875 +39 0.1202 0.1688 0.0000 0.5840 0.00013841 +40 0.1208 0.1663 0.0000 0.5805 0.00012798 +41 0.1164 0.1659 0.0000 0.5816 0.00011746 +42 0.1143 0.1812 0.0000 0.5686 0.00010684 +43 0.1125 0.1661 0.0000 0.5921 0.00009609 +44 0.1102 0.1664 0.0000 0.5913 0.00008521 +45 0.1084 0.1652 0.0000 0.5907 0.00007417 +46 0.1054 0.1658 0.0000 0.5997 0.00006295 +47 0.1040 0.1650 0.0000 0.6112 0.00005149 +48 0.1021 0.1560 0.0000 0.6053 0.00003975 +49 0.1002 0.1611 0.0000 0.6115 0.00002759 +50 0.0993 0.1615 0.0000 0.6111 0.00001479 \ No newline at end of file diff --git a/save/erfnet_isomax_plus_focal/automated_log_encoder.txt b/save/erfnet_isomax_plus_focal/automated_log_encoder.txt new file mode 100644 index 0000000..1979b43 --- /dev/null +++ b/save/erfnet_isomax_plus_focal/automated_log_encoder.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 1.4821 0.6205 0.0000 0.1583 0.00050000 +2 0.7193 0.5843 0.0000 0.1998 0.00049099 +3 0.6009 0.5193 0.0000 0.2160 0.00048196 +4 0.5229 0.4731 0.0000 0.2398 0.00047292 +5 0.4649 0.4376 0.0000 0.2583 0.00046385 +6 0.4285 0.4082 0.0000 0.2705 0.00045477 +7 0.4066 0.4121 0.0000 0.2652 0.00044566 +8 0.3713 0.3684 0.0000 0.3114 0.00043653 +9 0.3506 0.3454 0.0000 0.3249 0.00042739 +10 0.3302 0.3885 0.0000 0.3090 0.00041822 +11 0.3085 0.3103 0.0000 0.3547 0.00040903 +12 0.2969 0.3949 0.0000 0.3700 0.00039981 +13 0.2922 0.2958 0.0000 0.3682 0.00039057 +14 0.2710 0.2820 0.0000 0.4000 0.00038131 +15 0.2606 0.2862 0.0000 0.3854 0.00037202 +16 0.2601 0.2726 0.0000 0.4095 0.00036271 +17 0.2439 0.2564 0.0000 0.4265 0.00035337 +18 0.2334 0.2390 0.0000 0.4355 0.00034400 +19 0.2315 0.2323 0.0000 0.4455 0.00033460 +20 0.2191 0.2354 0.0000 0.4582 0.00032518 +21 0.2158 0.2320 0.0000 0.4426 0.00031572 +22 0.2097 0.2202 0.0000 0.4778 0.00030624 +23 0.2008 0.2371 0.0000 0.4449 0.00029671 +24 0.1986 0.2314 0.0000 0.4534 0.00028716 +25 0.1926 0.2049 0.0000 0.4961 0.00027757 +26 0.1933 0.2204 0.0000 0.4821 0.00026794 +27 0.1838 0.2470 0.0000 0.4531 0.00025828 +28 0.1835 0.2086 0.0000 0.4755 0.00024857 +29 0.1746 0.2091 0.0000 0.5107 0.00023882 +30 0.1715 0.1919 0.0000 0.5171 0.00022903 +31 0.1677 0.2105 0.0000 0.4961 0.00021919 +32 0.1651 0.1958 0.0000 0.5018 0.00020930 +33 0.1625 0.1931 0.0000 0.5171 0.00019936 +34 0.1576 0.1800 0.0000 0.5412 0.00018937 +35 0.1562 0.1997 0.0000 0.5078 0.00017931 +36 0.1512 0.1759 0.0000 0.5540 0.00016919 +37 0.1473 0.1834 0.0000 0.5586 0.00015901 +38 0.1434 0.1808 0.0000 0.5459 0.00014875 +39 0.1417 0.1737 0.0000 0.5444 0.00013841 +40 0.1399 0.1750 0.0000 0.5524 0.00012798 +41 0.1367 0.1685 0.0000 0.5655 0.00011746 +42 0.1337 0.1680 0.0000 0.5686 0.00010684 +43 0.1302 0.1729 0.0000 0.5574 0.00009609 +44 0.1267 0.1723 0.0000 0.5818 0.00008521 +45 0.1255 0.1665 0.0000 0.5752 0.00007417 +46 0.1224 0.1745 0.0000 0.5681 0.00006295 +47 0.1193 0.1705 0.0000 0.5851 0.00005149 +48 0.1173 0.1620 0.0000 0.5939 0.00003975 +49 0.1154 0.1656 0.0000 0.5924 0.00002759 +50 0.1125 0.1589 0.0000 0.5960 0.00001479 \ No newline at end of file diff --git a/save/erfnet_isomax_plus_focal/best.txt b/save/erfnet_isomax_plus_focal/best.txt new file mode 100644 index 0000000..5ae0f70 --- /dev/null +++ b/save/erfnet_isomax_plus_focal/best.txt @@ -0,0 +1 @@ +Best epoch is 49, with Val-IoU= 0.6115 \ No newline at end of file diff --git a/save/erfnet_isomax_plus_focal/best_encoder.txt b/save/erfnet_isomax_plus_focal/best_encoder.txt new file mode 100644 index 0000000..15b0202 --- /dev/null +++ b/save/erfnet_isomax_plus_focal/best_encoder.txt @@ -0,0 +1 @@ +Best epoch is 50, with Val-IoU= 0.5960 \ No newline at end of file diff --git a/save/erfnet_isomax_plus_focal/erfnet_isomax_plus.py b/save/erfnet_isomax_plus_focal/erfnet_isomax_plus.py new file mode 100644 index 0000000..cb4aec8 --- /dev/null +++ b/save/erfnet_isomax_plus_focal/erfnet_isomax_plus.py @@ -0,0 +1,183 @@ +# ERFNet full model definition for Pytorch +# Sept 2017 +# Eduardo Romera +####################### + +# Reference: https://github.com/dlmacedo/entropic-out-of-distribution-detection/blob/9ad451ca815160e5339dc21319cea2b859e3e101/losses/isomaxplus.py + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + + +class IsoMaxPlusLossFirstPart(nn.Module): + """This part replaces the model classifier output layer""" + + def __init__(self, num_classes, encoder, temperature=1.0): + super(IsoMaxPlusLossFirstPart, self).__init__() + self.num_classes = num_classes + self.temperature = temperature + self.prototypes = nn.Parameter(torch.Tensor(1, num_classes, 1, 1)) + self.distance_scale = nn.Parameter(torch.Tensor(1)) + if encoder: + self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) + else: + self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True) + nn.init.normal_(self.prototypes, mean=0.0, std=1.0) + nn.init.constant_(self.distance_scale, 1.0) + + def forward(self, features): + features = self.output_conv(features) + distances = torch.abs(self.distance_scale) * torch.abs(features - self.prototypes) + logits = -distances + return logits / self.temperature + + +class DownsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + + self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True) + self.pool = nn.MaxPool2d(2, stride=2) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = torch.cat([self.conv(input), self.pool(input)], 1) + output = self.bn(output) + return F.relu(output) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated): + super().__init__() + + self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d( + chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, dilation=(dilated, 1) + ) + + self.conv1x3_2 = nn.Conv2d( + chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, dilation=(1, dilated) + ) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if self.dropout.p != 0: + output = self.dropout(output) + + return F.relu(output + input) # +input = identity (residual connection) + + +class Encoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + self.initial_block = DownsamplerBlock(3, 16) + + self.layers = nn.ModuleList() + + self.layers.append(DownsamplerBlock(16, 64)) + + for x in range(0, 5): # 5 times + self.layers.append(non_bottleneck_1d(64, 0.03, 1)) + + self.layers.append(DownsamplerBlock(64, 128)) + + for x in range(0, 2): # 2 times + self.layers.append(non_bottleneck_1d(128, 0.3, 2)) + self.layers.append(non_bottleneck_1d(128, 0.3, 4)) + self.layers.append(non_bottleneck_1d(128, 0.3, 8)) + self.layers.append(non_bottleneck_1d(128, 0.3, 16)) + + # Only in encoder mode: + + self.output_conv = IsoMaxPlusLossFirstPart(num_classes, encoder=True) + + def forward(self, input, predict=False): + output = self.initial_block(input) + + for layer in self.layers: + output = layer(output) + + if predict: + output = self.output_conv(output) + + return output + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return F.relu(output) + + +class Decoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + + self.layers = nn.ModuleList() + + self.layers.append(UpsamplerBlock(128, 64)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + + self.layers.append(UpsamplerBlock(64, 16)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + + self.output_conv = IsoMaxPlusLossFirstPart(num_classes, encoder=False) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = self.output_conv(output) + + return output + + +# ERFNet +class Net(nn.Module): + def __init__(self, num_classes, encoder=None): # use encoder to pass pretrained encoder + super().__init__() + + if encoder == None: + self.encoder = Encoder(num_classes) + else: + self.encoder = encoder + self.decoder = Decoder(num_classes) + + def forward(self, input, only_encode=False): + if only_encode: + return self.encoder.forward(input, predict=True) + else: + output = self.encoder(input) # predict=False by default + return self.decoder.forward(output) diff --git a/save/erfnet_isomax_plus_focal/model.txt b/save/erfnet_isomax_plus_focal/model.txt new file mode 100644 index 0000000..1602ec1 --- /dev/null +++ b/save/erfnet_isomax_plus_focal/model.txt @@ -0,0 +1,140 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): IsoMaxPlusLossFirstPart( + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): IsoMaxPlusLossFirstPart( + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_isomax_plus_focal/model_encoder.txt b/save/erfnet_isomax_plus_focal/model_encoder.txt new file mode 100644 index 0000000..1602ec1 --- /dev/null +++ b/save/erfnet_isomax_plus_focal/model_encoder.txt @@ -0,0 +1,140 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): IsoMaxPlusLossFirstPart( + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): IsoMaxPlusLossFirstPart( + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_isomax_plus_focal/opts.txt b/save/erfnet_isomax_plus_focal/opts.txt new file mode 100644 index 0000000..b9d007f --- /dev/null +++ b/save/erfnet_isomax_plus_focal/opts.txt @@ -0,0 +1 @@ +Namespace(cuda=True, model='erfnet_isomax_plus', state=None, port=8097, datadir='../cityscapes/', height=512, width=1024, num_epochs=50, num_workers=4, batch_size=6, steps_loss=50, steps_plot=50, epochs_save=0, savedir='erfnet_isomax_plus_focal', decoder=False, pretrainedEncoder=None, visualize=False, iouTrain=False, iouVal=True, resume=False, erfnet=True, colab=False, download_step=10, loss='focal', logit_norm=False, entropic_scale=10.0) \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce/automated_log.txt b/save/erfnet_logit_norm_ce/automated_log.txt new file mode 100644 index 0000000..2a6ce02 --- /dev/null +++ b/save/erfnet_logit_norm_ce/automated_log.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 1.3298 0.6590 0.0000 0.3659 0.00050000 +2 0.5249 0.5353 0.0000 0.4250 0.00049099 +3 0.4232 0.4538 0.0000 0.4779 0.00048196 +4 0.3924 0.4142 0.0000 0.4927 0.00047292 +5 0.3645 0.4392 0.0000 0.4872 0.00046385 +6 0.3402 0.3668 0.0000 0.5361 0.00045477 +7 0.3391 0.4246 0.0000 0.4776 0.00044566 +8 0.3112 0.4817 0.0000 0.4894 0.00043653 +9 0.3268 0.3749 0.0000 0.5381 0.00042739 +10 0.3051 0.4008 0.0000 0.5191 0.00041822 +11 0.3127 0.3822 0.0000 0.5284 0.00040903 +12 0.2945 0.4318 0.0000 0.5218 0.00039981 +13 0.2871 0.3593 0.0000 0.5505 0.00039057 +14 0.2755 0.4070 0.0000 0.5175 0.00038131 +15 0.3048 0.3314 0.0000 0.6066 0.00037202 +16 0.2573 0.3325 0.0000 0.5757 0.00036271 +17 0.2727 0.3644 0.0000 0.5454 0.00035337 +18 0.2665 0.3270 0.0000 0.5732 0.00034400 +19 0.2635 0.3203 0.0000 0.5889 0.00033460 +20 0.2399 0.3111 0.0000 0.6005 0.00032518 +21 0.2508 0.3490 0.0000 0.5819 0.00031572 +22 0.2459 0.3093 0.0000 0.5957 0.00030624 +23 0.2326 0.3060 0.0000 0.6087 0.00029671 +24 0.2335 0.3046 0.0000 0.6190 0.00028716 +25 0.2320 0.3233 0.0000 0.5836 0.00027757 +26 0.2177 0.2969 0.0000 0.6131 0.00026794 +27 0.2191 0.2933 0.0000 0.6138 0.00025828 +28 0.2311 0.3136 0.0000 0.6031 0.00024857 +29 0.2144 0.2835 0.0000 0.6211 0.00023882 +30 0.2087 0.2999 0.0000 0.6165 0.00022903 +31 0.2026 0.2923 0.0000 0.6202 0.00021919 +32 0.1972 0.2878 0.0000 0.6247 0.00020930 +33 0.1952 0.2801 0.0000 0.6435 0.00019936 +34 0.1958 0.2812 0.0000 0.6250 0.00018937 +35 0.1953 0.2775 0.0000 0.6268 0.00017931 +36 0.1822 0.2802 0.0000 0.6395 0.00016919 +37 0.1849 0.3540 0.0000 0.5843 0.00015901 +38 0.1870 0.2862 0.0000 0.6373 0.00014875 +39 0.1761 0.2752 0.0000 0.6588 0.00013841 +40 0.1741 0.2734 0.0000 0.6628 0.00012798 +41 0.1703 0.2761 0.0000 0.6508 0.00011746 +42 0.1658 0.2726 0.0000 0.6566 0.00010684 +43 0.1635 0.2749 0.0000 0.6581 0.00009609 +44 0.1617 0.2699 0.0000 0.6727 0.00008521 +45 0.1597 0.2642 0.0000 0.6667 0.00007417 +46 0.1548 0.2679 0.0000 0.6714 0.00006295 +47 0.1530 0.2694 0.0000 0.6704 0.00005149 +48 0.1513 0.2676 0.0000 0.6640 0.00003975 +49 0.1495 0.2663 0.0000 0.6720 0.00002759 +50 0.1474 0.2665 0.0000 0.6782 0.00001479 \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce/automated_log_encoder.txt b/save/erfnet_logit_norm_ce/automated_log_encoder.txt new file mode 100644 index 0000000..bce4dee --- /dev/null +++ b/save/erfnet_logit_norm_ce/automated_log_encoder.txt @@ -0,0 +1,109 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 2.8388 2.8254 0.0000 0.2673 0.00050000 +2 2.8185 2.8187 0.0000 0.3082 0.00049700 +1 2.3049 2.2755 0.0000 0.1964 0.00050000 +2 2.2377 2.2232 0.0000 0.2542 0.00049700 +3 2.2147 2.2145 0.0000 0.2840 0.00049400 +4 2.2035 2.2023 0.0000 0.2994 0.00049099 +5 2.1948 2.1994 0.0000 0.3051 0.00048798 +6 2.1901 2.1979 0.0000 0.3148 0.00048497 +7 2.1831 2.2006 0.0000 0.3286 0.00048196 +8 2.1807 2.1895 0.0000 0.3540 0.00047895 +9 2.1760 2.1828 0.0000 0.3626 0.00047593 +10 2.1737 2.1819 0.0000 0.3725 0.00047292 +11 2.1717 2.1827 0.0000 0.3802 0.00046990 +12 2.1688 2.1754 0.0000 0.3907 0.00046688 +13 2.1671 2.1764 0.0000 0.3857 0.00046385 +14 2.1650 2.1703 0.0000 0.4075 0.00046083 +15 2.1640 2.1779 0.0000 0.3862 0.00045780 +16 2.1620 2.1767 0.0000 0.4034 0.00045477 +17 2.1606 2.1681 0.0000 0.4154 0.00045173 +18 2.1592 2.1688 0.0000 0.4108 0.00044870 +19 2.1589 2.1666 0.0000 0.4248 0.00044566 +20 2.1572 2.1641 0.0000 0.4317 0.00044262 +21 2.1555 2.1661 0.0000 0.4417 0.00043958 +22 2.1543 2.1672 0.0000 0.4337 0.00043653 +23 2.1534 2.1629 0.0000 0.4429 0.00043349 +24 2.1536 2.1650 0.0000 0.4329 0.00043044 +1 1.0448 0.8514 0.0000 0.2353 0.00050000 +2 0.6750 0.6552 0.0000 0.3027 0.00049700 +3 0.5758 0.7474 0.0000 0.3075 0.00049400 +4 0.5237 0.5323 0.0000 0.3504 0.00049099 +5 0.4850 0.4757 0.0000 0.3924 0.00048798 +6 0.4549 0.4906 0.0000 0.3964 0.00048497 +7 0.4287 0.5810 0.0000 0.3648 0.00048196 +8 0.4119 0.4664 0.0000 0.4037 0.00047895 +9 0.3872 0.4556 0.0000 0.4286 0.00047593 +10 0.3846 0.4245 0.0000 0.4409 0.00047292 +11 0.3622 0.4261 0.0000 0.4320 0.00046990 +12 0.3575 0.3995 0.0000 0.4627 0.00046688 +13 0.3469 0.3556 0.0000 0.4938 0.00046385 +14 0.3391 0.3592 0.0000 0.4954 0.00046083 +15 0.3238 0.3628 0.0000 0.4806 0.00045780 +16 0.3199 0.3525 0.0000 0.5007 0.00045477 +17 0.3185 0.3772 0.0000 0.4734 0.00045173 +18 0.3073 0.3918 0.0000 0.4485 0.00044870 +19 0.3025 0.3439 0.0000 0.4983 0.00044566 +20 0.2928 0.3364 0.0000 0.5082 0.00044262 +21 0.2900 0.3180 0.0000 0.5251 0.00043958 +22 0.2839 0.3474 0.0000 0.4876 0.00043653 +23 0.2822 0.3240 0.0000 0.5172 0.00043349 +24 0.2728 0.3410 0.0000 0.5222 0.00043044 +25 0.2842 0.3283 0.0000 0.5250 0.00042739 +26 0.2695 0.3316 0.0000 0.5360 0.00042433 +27 0.2569 0.2892 0.0000 0.5697 0.00042128 +28 0.2578 0.3185 0.0000 0.5337 0.00041822 +29 0.2647 0.3058 0.0000 0.5602 0.00041516 +30 0.2549 0.3255 0.0000 0.5305 0.00041209 +31 0.2512 0.3149 0.0000 0.5365 0.00040903 +32 0.2450 0.2926 0.0000 0.5692 0.00040596 +1 1.0613 0.7725 0.0000 0.2614 0.00050000 +2 0.6663 0.6726 0.0000 0.2897 0.00049099 +3 0.5693 0.6119 0.0000 0.3215 0.00048196 +4 0.5091 0.5409 0.0000 0.3588 0.00047292 +5 0.4731 0.4943 0.0000 0.3835 0.00046385 +6 0.4412 0.5002 0.0000 0.3841 0.00045477 +7 0.4208 0.4786 0.0000 0.4110 0.00044566 +8 0.4063 0.4633 0.0000 0.4119 0.00043653 +9 0.3815 0.4077 0.0000 0.4474 0.00042739 +10 0.3653 0.3855 0.0000 0.4576 0.00041822 +11 0.3563 0.4492 0.0000 0.4281 0.00040903 +12 0.3483 0.3915 0.0000 0.4687 0.00039981 +13 0.3399 0.3903 0.0000 0.4754 0.00039057 +14 0.3218 0.3573 0.0000 0.4739 0.00038131 +15 0.3138 0.3607 0.0000 0.4971 0.00037202 +16 0.3074 0.3342 0.0000 0.5027 0.00036271 +17 0.3003 0.4122 0.0000 0.4684 0.00035337 +18 0.2960 0.3526 0.0000 0.5051 0.00034400 +19 0.2923 0.3528 0.0000 0.5197 0.00033460 +20 0.2799 0.3168 0.0000 0.5276 0.00032518 +21 0.2717 0.3205 0.0000 0.5265 0.00031572 +22 0.2663 0.3600 0.0000 0.4972 0.00030624 +23 0.2612 0.3033 0.0000 0.5568 0.00029671 +24 0.2562 0.2978 0.0000 0.5453 0.00028716 +25 0.2561 0.2979 0.0000 0.5484 0.00027757 +26 0.2448 0.3282 0.0000 0.5194 0.00026794 +27 0.2415 0.3006 0.0000 0.5686 0.00025828 +28 0.2381 0.3230 0.0000 0.5513 0.00024857 +29 0.2350 0.2854 0.0000 0.5791 0.00023882 +30 0.2251 0.2840 0.0000 0.5644 0.00022903 +31 0.2266 0.2720 0.0000 0.5912 0.00021919 +32 0.2208 0.3052 0.0000 0.5634 0.00020930 +33 0.2145 0.2617 0.0000 0.6114 0.00019936 +34 0.2140 0.2781 0.0000 0.5735 0.00018937 +35 0.2061 0.2643 0.0000 0.5907 0.00017931 +36 0.1998 0.2682 0.0000 0.6101 0.00016919 +37 0.1993 0.2674 0.0000 0.6033 0.00015901 +38 0.1961 0.2646 0.0000 0.6119 0.00014875 +39 0.1896 0.2805 0.0000 0.6006 0.00013841 +40 0.1903 0.2448 0.0000 0.6144 0.00012798 +41 0.1855 0.2676 0.0000 0.6120 0.00011746 +42 0.1826 0.2523 0.0000 0.6178 0.00010684 +43 0.1759 0.2449 0.0000 0.6346 0.00009609 +44 0.1733 0.2497 0.0000 0.6288 0.00008521 +45 0.1689 0.2444 0.0000 0.6304 0.00007417 +46 0.1673 0.2447 0.0000 0.6366 0.00006295 +47 0.1645 0.2421 0.0000 0.6358 0.00005149 +48 0.1620 0.2512 0.0000 0.6295 0.00003975 +49 0.1597 0.2341 0.0000 0.6537 0.00002759 +50 0.1570 0.2380 0.0000 0.6463 0.00001479 \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce/best.txt b/save/erfnet_logit_norm_ce/best.txt new file mode 100644 index 0000000..efdfc9e --- /dev/null +++ b/save/erfnet_logit_norm_ce/best.txt @@ -0,0 +1 @@ +Best epoch is 50, with Val-IoU= 0.6782 \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce/best_encoder.txt b/save/erfnet_logit_norm_ce/best_encoder.txt new file mode 100644 index 0000000..3200e57 --- /dev/null +++ b/save/erfnet_logit_norm_ce/best_encoder.txt @@ -0,0 +1 @@ +Best epoch is 49, with Val-IoU= 0.6537 \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce/erfnet.py b/save/erfnet_logit_norm_ce/erfnet.py new file mode 100644 index 0000000..8d2bc9d --- /dev/null +++ b/save/erfnet_logit_norm_ce/erfnet.py @@ -0,0 +1,157 @@ +# ERFNet full model definition for Pytorch +# Sept 2017 +# Eduardo Romera +####################### + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + + +class DownsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + + self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True) + self.pool = nn.MaxPool2d(2, stride=2) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = torch.cat([self.conv(input), self.pool(input)], 1) + output = self.bn(output) + return F.relu(output) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated): + super().__init__() + + self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d( + chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, dilation=(dilated, 1) + ) + + self.conv1x3_2 = nn.Conv2d( + chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, dilation=(1, dilated) + ) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if self.dropout.p != 0: + output = self.dropout(output) + + return F.relu(output + input) # +input = identity (residual connection) + + +class Encoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + self.initial_block = DownsamplerBlock(3, 16) + + self.layers = nn.ModuleList() + + self.layers.append(DownsamplerBlock(16, 64)) + + for x in range(0, 5): # 5 times + self.layers.append(non_bottleneck_1d(64, 0.03, 1)) + + self.layers.append(DownsamplerBlock(64, 128)) + + for x in range(0, 2): # 2 times + self.layers.append(non_bottleneck_1d(128, 0.3, 2)) + self.layers.append(non_bottleneck_1d(128, 0.3, 4)) + self.layers.append(non_bottleneck_1d(128, 0.3, 8)) + self.layers.append(non_bottleneck_1d(128, 0.3, 16)) + + # Only in encoder mode: + self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) + + def forward(self, input, predict=False): + output = self.initial_block(input) + + for layer in self.layers: + output = layer(output) + + if predict: + output = self.output_conv(output) + + return output + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return F.relu(output) + + +class Decoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + + self.layers = nn.ModuleList() + + self.layers.append(UpsamplerBlock(128, 64)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + + self.layers.append(UpsamplerBlock(64, 16)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + + self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = self.output_conv(output) + + return output + + +# ERFNet +class Net(nn.Module): + def __init__(self, num_classes, encoder=None): # use encoder to pass pretrained encoder + super().__init__() + + if encoder == None: + self.encoder = Encoder(num_classes) + else: + self.encoder = encoder + self.decoder = Decoder(num_classes) + + def forward(self, input, only_encode=False): + if only_encode: + return self.encoder.forward(input, predict=True) + else: + output = self.encoder(input) # predict=False by default + return self.decoder.forward(output) diff --git a/save/erfnet_logit_norm_ce/model.txt b/save/erfnet_logit_norm_ce/model.txt new file mode 100644 index 0000000..df34e27 --- /dev/null +++ b/save/erfnet_logit_norm_ce/model.txt @@ -0,0 +1,136 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce/model_encoder.txt b/save/erfnet_logit_norm_ce/model_encoder.txt new file mode 100644 index 0000000..df34e27 --- /dev/null +++ b/save/erfnet_logit_norm_ce/model_encoder.txt @@ -0,0 +1,136 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce/opts.txt b/save/erfnet_logit_norm_ce/opts.txt new file mode 100644 index 0000000..de54b5a --- /dev/null +++ b/save/erfnet_logit_norm_ce/opts.txt @@ -0,0 +1 @@ +Namespace(cuda=True, model='erfnet', state=None, port=8097, datadir='../cityscapes/', height=512, width=1024, num_epochs=50, num_workers=4, batch_size=6, steps_loss=50, steps_plot=50, epochs_save=0, savedir='erfnet_training1', decoder=False, pretrainedEncoder=None, visualize=False, iouTrain=False, iouVal=True, resume=False, erfnet=True, colab=False, download_step=10, loss='ce', logit_norm=True) \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce_imagenet/automated_log.txt b/save/erfnet_logit_norm_ce_imagenet/automated_log.txt new file mode 100644 index 0000000..9ab3009 --- /dev/null +++ b/save/erfnet_logit_norm_ce_imagenet/automated_log.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 1.8011 0.7737 0.0000 0.2935 0.00050000 +2 0.6144 0.5111 0.0000 0.3851 0.00049099 +3 0.4520 0.4794 0.0000 0.4310 0.00048196 +4 0.3910 0.3852 0.0000 0.4982 0.00047292 +5 0.3555 0.3717 0.0000 0.5005 0.00046385 +6 0.3393 0.3485 0.0000 0.5387 0.00045477 +7 0.3163 0.3418 0.0000 0.5360 0.00044566 +8 0.3044 0.3436 0.0000 0.5558 0.00043653 +9 0.2928 0.3141 0.0000 0.5709 0.00042739 +10 0.2815 0.3147 0.0000 0.5878 0.00041822 +11 0.2740 0.3559 0.0000 0.5732 0.00040903 +12 0.2685 0.3003 0.0000 0.5967 0.00039981 +13 0.2673 0.3059 0.0000 0.6021 0.00039057 +14 0.2508 0.3164 0.0000 0.5817 0.00038131 +15 0.2429 0.3255 0.0000 0.5577 0.00037202 +16 0.2484 0.2923 0.0000 0.6139 0.00036271 +17 0.2351 0.3010 0.0000 0.5982 0.00035337 +18 0.2231 0.2955 0.0000 0.6077 0.00034400 +19 0.2349 0.2843 0.0000 0.6321 0.00033460 +20 0.2194 0.2731 0.0000 0.6320 0.00032518 +21 0.2278 0.3020 0.0000 0.6063 0.00031572 +22 0.2131 0.2726 0.0000 0.6288 0.00030624 +23 0.2013 0.2874 0.0000 0.6101 0.00029671 +24 0.1976 0.2726 0.0000 0.6458 0.00028716 +25 0.2080 0.3276 0.0000 0.5902 0.00027757 +26 0.2002 0.2721 0.0000 0.6389 0.00026794 +27 0.1890 0.2784 0.0000 0.6386 0.00025828 +28 0.1825 0.2653 0.0000 0.6534 0.00024857 +29 0.1887 0.2633 0.0000 0.6452 0.00023882 +30 0.1793 0.2853 0.0000 0.6435 0.00022903 +31 0.1820 0.4602 0.0000 0.5486 0.00021919 +32 0.1957 0.2672 0.0000 0.6340 0.00020930 +33 0.1714 0.2569 0.0000 0.6708 0.00019936 +34 0.1714 0.2744 0.0000 0.6635 0.00018937 +35 0.1641 0.2805 0.0000 0.6464 0.00017931 +36 0.1670 0.2742 0.0000 0.6628 0.00016919 +37 0.1594 0.2529 0.0000 0.6822 0.00015901 +38 0.1547 0.2567 0.0000 0.6829 0.00014875 +39 0.1535 0.2773 0.0000 0.6551 0.00013841 +40 0.1494 0.2647 0.0000 0.6747 0.00012798 +41 0.1479 0.2601 0.0000 0.6832 0.00011746 +42 0.1459 0.2528 0.0000 0.6733 0.00010684 +43 0.1452 0.2745 0.0000 0.6798 0.00009609 +44 0.1404 0.2553 0.0000 0.6904 0.00008521 +45 0.1386 0.2559 0.0000 0.6884 0.00007417 +46 0.1372 0.2595 0.0000 0.6950 0.00006295 +47 0.1343 0.2497 0.0000 0.7015 0.00005149 +48 0.1322 0.2553 0.0000 0.6966 0.00003975 +49 0.1305 0.2546 0.0000 0.6983 0.00002759 +50 0.1288 0.2578 0.0000 0.6997 0.00001479 \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce_imagenet/best.txt b/save/erfnet_logit_norm_ce_imagenet/best.txt new file mode 100644 index 0000000..fdc5180 --- /dev/null +++ b/save/erfnet_logit_norm_ce_imagenet/best.txt @@ -0,0 +1 @@ +Best epoch is 47, with Val-IoU= 0.7015 \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce_imagenet/erfnet.py b/save/erfnet_logit_norm_ce_imagenet/erfnet.py new file mode 100644 index 0000000..8d2bc9d --- /dev/null +++ b/save/erfnet_logit_norm_ce_imagenet/erfnet.py @@ -0,0 +1,157 @@ +# ERFNet full model definition for Pytorch +# Sept 2017 +# Eduardo Romera +####################### + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + + +class DownsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + + self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True) + self.pool = nn.MaxPool2d(2, stride=2) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = torch.cat([self.conv(input), self.pool(input)], 1) + output = self.bn(output) + return F.relu(output) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated): + super().__init__() + + self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d( + chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, dilation=(dilated, 1) + ) + + self.conv1x3_2 = nn.Conv2d( + chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, dilation=(1, dilated) + ) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if self.dropout.p != 0: + output = self.dropout(output) + + return F.relu(output + input) # +input = identity (residual connection) + + +class Encoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + self.initial_block = DownsamplerBlock(3, 16) + + self.layers = nn.ModuleList() + + self.layers.append(DownsamplerBlock(16, 64)) + + for x in range(0, 5): # 5 times + self.layers.append(non_bottleneck_1d(64, 0.03, 1)) + + self.layers.append(DownsamplerBlock(64, 128)) + + for x in range(0, 2): # 2 times + self.layers.append(non_bottleneck_1d(128, 0.3, 2)) + self.layers.append(non_bottleneck_1d(128, 0.3, 4)) + self.layers.append(non_bottleneck_1d(128, 0.3, 8)) + self.layers.append(non_bottleneck_1d(128, 0.3, 16)) + + # Only in encoder mode: + self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) + + def forward(self, input, predict=False): + output = self.initial_block(input) + + for layer in self.layers: + output = layer(output) + + if predict: + output = self.output_conv(output) + + return output + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return F.relu(output) + + +class Decoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + + self.layers = nn.ModuleList() + + self.layers.append(UpsamplerBlock(128, 64)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + + self.layers.append(UpsamplerBlock(64, 16)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + + self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = self.output_conv(output) + + return output + + +# ERFNet +class Net(nn.Module): + def __init__(self, num_classes, encoder=None): # use encoder to pass pretrained encoder + super().__init__() + + if encoder == None: + self.encoder = Encoder(num_classes) + else: + self.encoder = encoder + self.decoder = Decoder(num_classes) + + def forward(self, input, only_encode=False): + if only_encode: + return self.encoder.forward(input, predict=True) + else: + output = self.encoder(input) # predict=False by default + return self.decoder.forward(output) diff --git a/save/erfnet_logit_norm_ce_imagenet/model.txt b/save/erfnet_logit_norm_ce_imagenet/model.txt new file mode 100644 index 0000000..5ae9986 --- /dev/null +++ b/save/erfnet_logit_norm_ce_imagenet/model.txt @@ -0,0 +1,135 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.1, inplace=False) + ) + ) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_logit_norm_ce_imagenet/opts.txt b/save/erfnet_logit_norm_ce_imagenet/opts.txt new file mode 100644 index 0000000..94242e7 --- /dev/null +++ b/save/erfnet_logit_norm_ce_imagenet/opts.txt @@ -0,0 +1 @@ +Namespace(cuda=True, model='erfnet', port=8097, datadir='../cityscapes/', height=512, width=1024, num_epochs=50, num_workers=4, batch_size=6, steps_loss=50, steps_plot=50, epochs_save=0, savedir='erfnet_logit_norm_ce_imagenet', decoder=True, pretrainedEncoder='../trained_models/erfnet_encoder_pretrained.pth.tar', visualize=False, iouTrain=False, iouVal=True, resume=True, colab=False, download_step=10, loss='ce', logit_norm=True, entropic_scale=10.0, loadDir='../trained_models/', loadWeights=None, fine_tuning=False) \ No newline at end of file diff --git a/save/erfnet_logit_norm_focal/automated_log.txt b/save/erfnet_logit_norm_focal/automated_log.txt new file mode 100644 index 0000000..64d4112 --- /dev/null +++ b/save/erfnet_logit_norm_focal/automated_log.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 1.0171 0.4623 0.0000 0.2770 0.00050000 +2 0.3943 0.3617 0.0000 0.3246 0.00049099 +3 0.3297 0.3702 0.0000 0.2973 0.00048196 +4 0.2792 0.2755 0.0000 0.3713 0.00047292 +5 0.2502 0.3224 0.0000 0.3609 0.00046385 +6 0.2269 0.2672 0.0000 0.4056 0.00045477 +7 0.2166 0.2303 0.0000 0.4369 0.00044566 +8 0.2010 0.2410 0.0000 0.4370 0.00043653 +9 0.1991 0.2830 0.0000 0.4145 0.00042739 +10 0.1905 0.2110 0.0000 0.4889 0.00041822 +11 0.1933 0.2312 0.0000 0.4649 0.00040903 +12 0.1786 0.2120 0.0000 0.4579 0.00039981 +13 0.1940 0.2118 0.0000 0.4895 0.00039057 +14 0.1822 0.2095 0.0000 0.4557 0.00038131 +15 0.1685 0.2061 0.0000 0.4911 0.00037202 +16 0.1689 0.1965 0.0000 0.4899 0.00036271 +17 0.1709 0.2183 0.0000 0.4647 0.00035337 +18 0.1625 0.1871 0.0000 0.5153 0.00034400 +19 0.1619 0.2243 0.0000 0.4829 0.00033460 +20 0.1595 0.1861 0.0000 0.5417 0.00032518 +21 0.1541 0.1917 0.0000 0.5016 0.00031572 +22 0.1549 0.1792 0.0000 0.5359 0.00030624 +23 0.1498 0.1930 0.0000 0.5263 0.00029671 +24 0.1490 0.1957 0.0000 0.5151 0.00028716 +25 0.1542 0.1782 0.0000 0.5362 0.00027757 +26 0.1433 0.1604 0.0000 0.5723 0.00026794 +27 0.1374 0.1586 0.0000 0.5677 0.00025828 +28 0.1394 0.1696 0.0000 0.5267 0.00024857 +29 0.1417 0.1657 0.0000 0.5476 0.00023882 +30 0.1398 0.1634 0.0000 0.5570 0.00022903 +31 0.1313 0.1638 0.0000 0.5632 0.00021919 +32 0.1318 0.1602 0.0000 0.5718 0.00020930 +33 0.1303 0.1578 0.0000 0.5644 0.00019936 +34 0.1269 0.1613 0.0000 0.5625 0.00018937 +35 0.1279 0.1631 0.0000 0.5654 0.00017931 +36 0.1216 0.1537 0.0000 0.5859 0.00016919 +37 0.1184 0.1514 0.0000 0.5821 0.00015901 +38 0.1162 0.1586 0.0000 0.5737 0.00014875 +39 0.1175 0.1596 0.0000 0.5766 0.00013841 +40 0.1140 0.1565 0.0000 0.5908 0.00012798 +41 0.1120 0.1497 0.0000 0.5873 0.00011746 +42 0.1083 0.1512 0.0000 0.6055 0.00010684 +43 0.1069 0.1493 0.0000 0.5954 0.00009609 +44 0.1059 0.1492 0.0000 0.6142 0.00008521 +45 0.1046 0.1490 0.0000 0.6103 0.00007417 +46 0.1006 0.1517 0.0000 0.6050 0.00006295 +47 0.0985 0.1458 0.0000 0.6118 0.00005149 +48 0.0978 0.1473 0.0000 0.6223 0.00003975 +49 0.0955 0.1500 0.0000 0.6138 0.00002759 +50 0.0939 0.1494 0.0000 0.6198 0.00001479 \ No newline at end of file diff --git a/save/erfnet_logit_norm_focal/automated_log_encoder.txt b/save/erfnet_logit_norm_focal/automated_log_encoder.txt new file mode 100644 index 0000000..6562d00 --- /dev/null +++ b/save/erfnet_logit_norm_focal/automated_log_encoder.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 0.8284 0.5501 0.0000 0.2394 0.00050000 +2 0.4943 0.4756 0.0000 0.2606 0.00049099 +3 0.4076 0.3682 0.0000 0.3153 0.00048196 +4 0.3541 0.3400 0.0000 0.3261 0.00047292 +5 0.3179 0.3272 0.0000 0.3468 0.00046385 +6 0.3016 0.3743 0.0000 0.3281 0.00045477 +7 0.2868 0.3140 0.0000 0.3562 0.00044566 +8 0.2673 0.2970 0.0000 0.3538 0.00043653 +9 0.2594 0.2662 0.0000 0.4070 0.00042739 +10 0.2452 0.3110 0.0000 0.3826 0.00041822 +11 0.2411 0.2567 0.0000 0.4212 0.00040903 +12 0.2353 0.2442 0.0000 0.4445 0.00039981 +13 0.2229 0.2683 0.0000 0.4137 0.00039057 +14 0.2199 0.2278 0.0000 0.4495 0.00038131 +15 0.2128 0.2393 0.0000 0.4575 0.00037202 +16 0.2056 0.2205 0.0000 0.4607 0.00036271 +17 0.1982 0.2332 0.0000 0.4699 0.00035337 +18 0.1978 0.2608 0.0000 0.4316 0.00034400 +19 0.1909 0.2038 0.0000 0.4924 0.00033460 +20 0.1832 0.2171 0.0000 0.4896 0.00032518 +21 0.1805 0.2440 0.0000 0.4472 0.00031572 +22 0.1791 0.1913 0.0000 0.5270 0.00030624 +23 0.1734 0.1855 0.0000 0.5354 0.00029671 +24 0.1714 0.2038 0.0000 0.4933 0.00028716 +25 0.1721 0.2172 0.0000 0.4835 0.00027757 +26 0.1655 0.2036 0.0000 0.5099 0.00026794 +27 0.1621 0.1790 0.0000 0.5257 0.00025828 +28 0.1548 0.1949 0.0000 0.5139 0.00024857 +29 0.1531 0.1748 0.0000 0.5265 0.00023882 +30 0.1524 0.1826 0.0000 0.5354 0.00022903 +31 0.1508 0.1901 0.0000 0.5309 0.00021919 +32 0.1453 0.1713 0.0000 0.5468 0.00020930 +33 0.1435 0.1728 0.0000 0.5410 0.00019936 +34 0.1488 0.1754 0.0000 0.5453 0.00018937 +35 0.1375 0.1689 0.0000 0.5401 0.00017931 +36 0.1349 0.1641 0.0000 0.5548 0.00016919 +37 0.1334 0.1661 0.0000 0.5601 0.00015901 +38 0.1292 0.1543 0.0000 0.5766 0.00014875 +39 0.1273 0.1566 0.0000 0.5815 0.00013841 +40 0.1258 0.1543 0.0000 0.5751 0.00012798 +41 0.1232 0.1571 0.0000 0.5782 0.00011746 +42 0.1211 0.1589 0.0000 0.5713 0.00010684 +43 0.1186 0.1501 0.0000 0.5850 0.00009609 +44 0.1145 0.1551 0.0000 0.5896 0.00008521 +45 0.1130 0.1510 0.0000 0.5949 0.00007417 +46 0.1106 0.1485 0.0000 0.6037 0.00006295 +47 0.1088 0.1483 0.0000 0.6005 0.00005149 +48 0.1070 0.1475 0.0000 0.6055 0.00003975 +49 0.1038 0.1465 0.0000 0.6097 0.00002759 +50 0.1021 0.1450 0.0000 0.6146 0.00001479 \ No newline at end of file diff --git a/save/erfnet_logit_norm_focal/best.txt b/save/erfnet_logit_norm_focal/best.txt new file mode 100644 index 0000000..9ac4b30 --- /dev/null +++ b/save/erfnet_logit_norm_focal/best.txt @@ -0,0 +1 @@ +Best epoch is 48, with Val-IoU= 0.6223 \ No newline at end of file diff --git a/save/erfnet_logit_norm_focal/best_encoder.txt b/save/erfnet_logit_norm_focal/best_encoder.txt new file mode 100644 index 0000000..e4f79b6 --- /dev/null +++ b/save/erfnet_logit_norm_focal/best_encoder.txt @@ -0,0 +1 @@ +Best epoch is 50, with Val-IoU= 0.6146 \ No newline at end of file diff --git a/save/erfnet_logit_norm_focal/erfnet.py b/save/erfnet_logit_norm_focal/erfnet.py new file mode 100644 index 0000000..8d2bc9d --- /dev/null +++ b/save/erfnet_logit_norm_focal/erfnet.py @@ -0,0 +1,157 @@ +# ERFNet full model definition for Pytorch +# Sept 2017 +# Eduardo Romera +####################### + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + + +class DownsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + + self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True) + self.pool = nn.MaxPool2d(2, stride=2) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = torch.cat([self.conv(input), self.pool(input)], 1) + output = self.bn(output) + return F.relu(output) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated): + super().__init__() + + self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d( + chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, dilation=(dilated, 1) + ) + + self.conv1x3_2 = nn.Conv2d( + chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, dilation=(1, dilated) + ) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if self.dropout.p != 0: + output = self.dropout(output) + + return F.relu(output + input) # +input = identity (residual connection) + + +class Encoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + self.initial_block = DownsamplerBlock(3, 16) + + self.layers = nn.ModuleList() + + self.layers.append(DownsamplerBlock(16, 64)) + + for x in range(0, 5): # 5 times + self.layers.append(non_bottleneck_1d(64, 0.03, 1)) + + self.layers.append(DownsamplerBlock(64, 128)) + + for x in range(0, 2): # 2 times + self.layers.append(non_bottleneck_1d(128, 0.3, 2)) + self.layers.append(non_bottleneck_1d(128, 0.3, 4)) + self.layers.append(non_bottleneck_1d(128, 0.3, 8)) + self.layers.append(non_bottleneck_1d(128, 0.3, 16)) + + # Only in encoder mode: + self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) + + def forward(self, input, predict=False): + output = self.initial_block(input) + + for layer in self.layers: + output = layer(output) + + if predict: + output = self.output_conv(output) + + return output + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return F.relu(output) + + +class Decoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + + self.layers = nn.ModuleList() + + self.layers.append(UpsamplerBlock(128, 64)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + + self.layers.append(UpsamplerBlock(64, 16)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + + self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = self.output_conv(output) + + return output + + +# ERFNet +class Net(nn.Module): + def __init__(self, num_classes, encoder=None): # use encoder to pass pretrained encoder + super().__init__() + + if encoder == None: + self.encoder = Encoder(num_classes) + else: + self.encoder = encoder + self.decoder = Decoder(num_classes) + + def forward(self, input, only_encode=False): + if only_encode: + return self.encoder.forward(input, predict=True) + else: + output = self.encoder(input) # predict=False by default + return self.decoder.forward(output) diff --git a/save/erfnet_logit_norm_focal/model.txt b/save/erfnet_logit_norm_focal/model.txt new file mode 100644 index 0000000..df34e27 --- /dev/null +++ b/save/erfnet_logit_norm_focal/model.txt @@ -0,0 +1,136 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_logit_norm_focal/model_encoder.txt b/save/erfnet_logit_norm_focal/model_encoder.txt new file mode 100644 index 0000000..df34e27 --- /dev/null +++ b/save/erfnet_logit_norm_focal/model_encoder.txt @@ -0,0 +1,136 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_logit_norm_focal/opts.txt b/save/erfnet_logit_norm_focal/opts.txt new file mode 100644 index 0000000..e175cb1 --- /dev/null +++ b/save/erfnet_logit_norm_focal/opts.txt @@ -0,0 +1 @@ +Namespace(cuda=True, model='erfnet', state=None, port=8097, datadir='../cityscapes/', height=512, width=1024, num_epochs=50, num_workers=4, batch_size=6, steps_loss=50, steps_plot=50, epochs_save=0, savedir='erfnet_logit_norm_focal', decoder=False, pretrainedEncoder=None, visualize=False, iouTrain=False, iouVal=True, resume=False, erfnet=True, colab=False, download_step=10, loss='focal', logit_norm=True) \ No newline at end of file diff --git a/save/erfnet_training_ce/automated_log.txt b/save/erfnet_training_ce/automated_log.txt new file mode 100644 index 0000000..ed54f67 --- /dev/null +++ b/save/erfnet_training_ce/automated_log.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 1.1543 0.7562 0.0000 0.2676 0.00050000 +2 0.5915 0.5704 0.0000 0.3364 0.00049099 +3 0.4558 0.5226 0.0000 0.3935 0.00048196 +4 0.4072 0.4583 0.0000 0.4473 0.00047292 +5 0.3755 0.4247 0.0000 0.4651 0.00046385 +6 0.3529 0.3976 0.0000 0.4813 0.00045477 +7 0.3309 0.4189 0.0000 0.4729 0.00044566 +8 0.3312 0.3556 0.0000 0.5303 0.00043653 +9 0.3058 0.3726 0.0000 0.5174 0.00042739 +10 0.2971 0.4199 0.0000 0.5085 0.00041822 +11 0.2830 0.3481 0.0000 0.5467 0.00040903 +12 0.2845 0.3467 0.0000 0.5361 0.00039981 +13 0.2980 0.3655 0.0000 0.5584 0.00039057 +14 0.2837 0.3723 0.0000 0.5583 0.00038131 +15 0.2679 0.3398 0.0000 0.5642 0.00037202 +16 0.2582 0.3363 0.0000 0.5486 0.00036271 +17 0.2629 0.3235 0.0000 0.5705 0.00035337 +18 0.2539 0.3168 0.0000 0.5904 0.00034400 +19 0.2549 0.3350 0.0000 0.5681 0.00033460 +20 0.2433 0.3983 0.0000 0.5523 0.00032518 +21 0.2479 0.3240 0.0000 0.5836 0.00031572 +22 0.2373 0.3165 0.0000 0.5772 0.00030624 +23 0.2292 0.3254 0.0000 0.5822 0.00029671 +24 0.2325 0.3346 0.0000 0.5751 0.00028716 +25 0.2300 0.3364 0.0000 0.5746 0.00027757 +26 0.2167 0.2883 0.0000 0.6021 0.00026794 +27 0.2242 0.3179 0.0000 0.5912 0.00025828 +28 0.2123 0.3222 0.0000 0.5999 0.00024857 +29 0.2205 0.2943 0.0000 0.6197 0.00023882 +30 0.2060 0.2993 0.0000 0.6176 0.00022903 +31 0.2171 0.2802 0.0000 0.6338 0.00021919 +32 0.1980 0.3408 0.0000 0.5965 0.00020930 +33 0.1976 0.2906 0.0000 0.6162 0.00019936 +34 0.1918 0.2964 0.0000 0.6330 0.00018937 +35 0.1943 0.2740 0.0000 0.6397 0.00017931 +36 0.1838 0.2725 0.0000 0.6425 0.00016919 +37 0.1828 0.2751 0.0000 0.6404 0.00015901 +38 0.1787 0.2727 0.0000 0.6395 0.00014875 +39 0.1744 0.3113 0.0000 0.6142 0.00013841 +40 0.1813 0.2761 0.0000 0.6512 0.00012798 +41 0.1731 0.2778 0.0000 0.6333 0.00011746 +42 0.1704 0.2683 0.0000 0.6572 0.00010684 +43 0.1654 0.2686 0.0000 0.6602 0.00009609 +44 0.1615 0.2693 0.0000 0.6601 0.00008521 +45 0.1599 0.2701 0.0000 0.6643 0.00007417 +46 0.1593 0.2639 0.0000 0.6735 0.00006295 +47 0.1573 0.2602 0.0000 0.6749 0.00005149 +48 0.1536 0.2631 0.0000 0.6755 0.00003975 +49 0.1518 0.2602 0.0000 0.6763 0.00002759 +50 0.1506 0.2573 0.0000 0.6778 0.00001479 \ No newline at end of file diff --git a/save/erfnet_training_ce/automated_log_encoder.txt b/save/erfnet_training_ce/automated_log_encoder.txt new file mode 100644 index 0000000..9f93a7e --- /dev/null +++ b/save/erfnet_training_ce/automated_log_encoder.txt @@ -0,0 +1,51 @@ +Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate +1 0.9151 0.8415 0.0000 0.2250 0.00050000 +2 0.6360 0.5955 0.0000 0.3203 0.00049099 +3 0.5415 0.5558 0.0000 0.3390 0.00048196 +4 0.4897 0.5673 0.0000 0.3460 0.00047292 +5 0.4537 0.5234 0.0000 0.3786 0.00046385 +6 0.4261 0.4899 0.0000 0.3940 0.00045477 +7 0.4070 0.4630 0.0000 0.4126 0.00044566 +8 0.3903 0.4209 0.0000 0.4325 0.00043653 +9 0.3674 0.4160 0.0000 0.4345 0.00042739 +10 0.3565 0.4451 0.0000 0.4403 0.00041822 +11 0.3440 0.3726 0.0000 0.4787 0.00040903 +12 0.3431 0.4004 0.0000 0.4423 0.00039981 +13 0.3271 0.3807 0.0000 0.4699 0.00039057 +14 0.3173 0.3832 0.0000 0.4668 0.00038131 +15 0.3067 0.3509 0.0000 0.4973 0.00037202 +16 0.3039 0.3311 0.0000 0.5117 0.00036271 +17 0.2908 0.3255 0.0000 0.5171 0.00035337 +18 0.2833 0.3247 0.0000 0.5299 0.00034400 +19 0.2785 0.3147 0.0000 0.5348 0.00033460 +20 0.2723 0.3170 0.0000 0.5307 0.00032518 +21 0.2692 0.3257 0.0000 0.5179 0.00031572 +22 0.2629 0.3165 0.0000 0.5407 0.00030624 +23 0.2600 0.3020 0.0000 0.5443 0.00029671 +24 0.2538 0.3071 0.0000 0.5541 0.00028716 +25 0.2432 0.2888 0.0000 0.5598 0.00027757 +26 0.2395 0.3099 0.0000 0.5406 0.00026794 +27 0.2371 0.2762 0.0000 0.5839 0.00025828 +28 0.2301 0.3195 0.0000 0.5335 0.00024857 +29 0.2363 0.2746 0.0000 0.5874 0.00023882 +30 0.2209 0.2733 0.0000 0.5836 0.00022903 +31 0.2202 0.2683 0.0000 0.5857 0.00021919 +32 0.2149 0.2708 0.0000 0.5894 0.00020930 +33 0.2064 0.2664 0.0000 0.5977 0.00019936 +34 0.2085 0.2756 0.0000 0.5854 0.00018937 +35 0.2114 0.2618 0.0000 0.5872 0.00017931 +36 0.1988 0.2575 0.0000 0.6037 0.00016919 +37 0.1946 0.2517 0.0000 0.6224 0.00015901 +38 0.1960 0.2623 0.0000 0.6174 0.00014875 +39 0.1945 0.2474 0.0000 0.6169 0.00013841 +40 0.1847 0.2530 0.0000 0.6114 0.00012798 +41 0.1806 0.2507 0.0000 0.6105 0.00011746 +42 0.1767 0.2452 0.0000 0.6279 0.00010684 +43 0.1767 0.2492 0.0000 0.6183 0.00009609 +44 0.1722 0.2443 0.0000 0.6299 0.00008521 +45 0.1686 0.2437 0.0000 0.6325 0.00007417 +46 0.1659 0.2404 0.0000 0.6337 0.00006295 +47 0.1636 0.2346 0.0000 0.6377 0.00005149 +48 0.1607 0.2381 0.0000 0.6360 0.00003975 +49 0.1586 0.2360 0.0000 0.6384 0.00002759 +50 0.1564 0.2326 0.0000 0.6436 0.00001479 \ No newline at end of file diff --git a/save/erfnet_training_ce/best.txt b/save/erfnet_training_ce/best.txt new file mode 100644 index 0000000..af5aacf --- /dev/null +++ b/save/erfnet_training_ce/best.txt @@ -0,0 +1 @@ +Best epoch is 50, with Val-IoU= 0.6778 \ No newline at end of file diff --git a/save/erfnet_training_ce/best_encoder.txt b/save/erfnet_training_ce/best_encoder.txt new file mode 100644 index 0000000..9dc6870 --- /dev/null +++ b/save/erfnet_training_ce/best_encoder.txt @@ -0,0 +1 @@ +Best epoch is 50, with Val-IoU= 0.6436 \ No newline at end of file diff --git a/save/erfnet_training_ce/erfnet.py b/save/erfnet_training_ce/erfnet.py new file mode 100644 index 0000000..8d2bc9d --- /dev/null +++ b/save/erfnet_training_ce/erfnet.py @@ -0,0 +1,157 @@ +# ERFNet full model definition for Pytorch +# Sept 2017 +# Eduardo Romera +####################### + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + + +class DownsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + + self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True) + self.pool = nn.MaxPool2d(2, stride=2) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = torch.cat([self.conv(input), self.pool(input)], 1) + output = self.bn(output) + return F.relu(output) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated): + super().__init__() + + self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d( + chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, dilation=(dilated, 1) + ) + + self.conv1x3_2 = nn.Conv2d( + chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, dilation=(1, dilated) + ) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if self.dropout.p != 0: + output = self.dropout(output) + + return F.relu(output + input) # +input = identity (residual connection) + + +class Encoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + self.initial_block = DownsamplerBlock(3, 16) + + self.layers = nn.ModuleList() + + self.layers.append(DownsamplerBlock(16, 64)) + + for x in range(0, 5): # 5 times + self.layers.append(non_bottleneck_1d(64, 0.03, 1)) + + self.layers.append(DownsamplerBlock(64, 128)) + + for x in range(0, 2): # 2 times + self.layers.append(non_bottleneck_1d(128, 0.3, 2)) + self.layers.append(non_bottleneck_1d(128, 0.3, 4)) + self.layers.append(non_bottleneck_1d(128, 0.3, 8)) + self.layers.append(non_bottleneck_1d(128, 0.3, 16)) + + # Only in encoder mode: + self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) + + def forward(self, input, predict=False): + output = self.initial_block(input) + + for layer in self.layers: + output = layer(output) + + if predict: + output = self.output_conv(output) + + return output + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return F.relu(output) + + +class Decoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + + self.layers = nn.ModuleList() + + self.layers.append(UpsamplerBlock(128, 64)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + self.layers.append(non_bottleneck_1d(64, 0, 1)) + + self.layers.append(UpsamplerBlock(64, 16)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + self.layers.append(non_bottleneck_1d(16, 0, 1)) + + self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = self.output_conv(output) + + return output + + +# ERFNet +class Net(nn.Module): + def __init__(self, num_classes, encoder=None): # use encoder to pass pretrained encoder + super().__init__() + + if encoder == None: + self.encoder = Encoder(num_classes) + else: + self.encoder = encoder + self.decoder = Decoder(num_classes) + + def forward(self, input, only_encode=False): + if only_encode: + return self.encoder.forward(input, predict=True) + else: + output = self.encoder(input) # predict=False by default + return self.decoder.forward(output) diff --git a/save/erfnet_training_ce/model.txt b/save/erfnet_training_ce/model.txt new file mode 100644 index 0000000..df34e27 --- /dev/null +++ b/save/erfnet_training_ce/model.txt @@ -0,0 +1,136 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_training_ce/model_encoder.txt b/save/erfnet_training_ce/model_encoder.txt new file mode 100644 index 0000000..df34e27 --- /dev/null +++ b/save/erfnet_training_ce/model_encoder.txt @@ -0,0 +1,136 @@ +DataParallel( + (module): Net( + (encoder): Encoder( + (initial_block): DownsamplerBlock( + (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (layers): ModuleList( + (0): DownsamplerBlock( + (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-5): 5 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.03, inplace=False) + ) + (6): DownsamplerBlock( + (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (7): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (8): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (9): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (10): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (11): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0), dilation=(2, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 2), dilation=(1, 2)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (12): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(4, 0), dilation=(4, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 4), dilation=(1, 4)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (13): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(8, 0), dilation=(8, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 8), dilation=(1, 8)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + (14): non_bottleneck_1d( + (conv3x1_1): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(128, 128, kernel_size=(3, 1), stride=(1, 1), padding=(16, 0), dilation=(16, 1)) + (conv1x3_2): Conv2d(128, 128, kernel_size=(1, 3), stride=(1, 1), padding=(0, 16), dilation=(1, 16)) + (bn2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0.3, inplace=False) + ) + ) + (output_conv): Conv2d(128, 20, kernel_size=(1, 1), stride=(1, 1)) + ) + (decoder): Decoder( + (layers): ModuleList( + (0): UpsamplerBlock( + (conv): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (1-2): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + (3): UpsamplerBlock( + (conv): ConvTranspose2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) + (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + ) + (4-5): 2 x non_bottleneck_1d( + (conv3x1_1): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_1): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (conv3x1_2): Conv2d(16, 16, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)) + (conv1x3_2): Conv2d(16, 16, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1)) + (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) + (dropout): Dropout2d(p=0, inplace=False) + ) + ) + (output_conv): ConvTranspose2d(16, 20, kernel_size=(2, 2), stride=(2, 2)) + ) + ) +) \ No newline at end of file diff --git a/save/erfnet_training_ce/opts.txt b/save/erfnet_training_ce/opts.txt new file mode 100644 index 0000000..a4c0fb1 --- /dev/null +++ b/save/erfnet_training_ce/opts.txt @@ -0,0 +1 @@ +Namespace(cuda=True, model='erfnet', port=8097, datadir='../cityscapes/', height=512, width=1024, num_epochs=50, num_workers=4, batch_size=6, steps_loss=50, steps_plot=50, epochs_save=0, savedir='erfnet_training3_ce', decoder=True, pretrainedEncoder='../save/erfnet_training2_ce/checkpoint_enc.pth.tar', visualize=False, iouTrain=False, iouVal=True, resume=False, colab=False, download_step=10, loss='ce', logit_norm=False, entropic_scale=10.0, no_unlabeled=True, loadDir='../trained_models/', loadWeights=None, fine_tuning=False) \ No newline at end of file diff --git a/train/README.md b/train/README.md index bf8027b..85dcba2 100644 --- a/train/README.md +++ b/train/README.md @@ -14,12 +14,12 @@ For all options and defaults please see the bottom of the "main.py" file. Requir ## Example commands Train encoder with 150 epochs and batch=6 and then train decoder (decoder training starts after encoder training): ``` -python main.py --savedir erfnet_training1 --datadir /home/datasets/cityscapes/ --num-epochs 150 --batch-size 6 +python main.py --savedir erfnet_training1 --datadir /content/cityscapes/ --num-epochs 150 --batch-size 6 ``` Train decoder using encoder's pretrained weights with ImageNet: ``` -python main.py --savedir erfnet_training1 --datadir /home/datasets/cityscapes/ --num-epochs 150 --batch-size 6 --decoder --pretrainedEncoder "../trained_models/erfnet_encoder_pretrained.pth.tar" +python main.py --savedir erfnet_training1 --datadir /content/cityscapes/ --num-epochs 150 --batch-size 6 --decoder --pretrainedEncoder "../trained_models/erfnet_encoder_pretrained.pth.tar" ``` ## Output files generated for each training: diff --git a/trained_models/bisenet_pretrained.pth b/trained_models/bisenet_pretrained.pth deleted file mode 100644 index 84c9455..0000000 Binary files a/trained_models/bisenet_pretrained.pth and /dev/null differ