-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathconfig.py
197 lines (155 loc) · 6.78 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
"""
Main network configuration and hyperparameters
Copyright (c) Pedro F. Proenza
"""
import math
import numpy as np
# Base Configuration Class
class Config:
"""Base configuration class. For custom configurations, create a
sub-class that inherits from this one and override properties
that need to be changed.
"""
# NUMBER OF GPUs to use. For CPU training, use 1
GPU_COUNT = 1
# Number of images to train with on each GPU.
IMAGES_PER_GPU = 2
# Number of training steps per epoch
# This doesn't need to match the size of the training set. Tensorboard
# updates are saved at the end of each epoch, so setting this to a
# smaller number means getting more frequent TensorBoard updates.
# Validation stats are also calculated at each epoch end and they
# might take a while, so don't set this too small to avoid spending
# a lot of time on validation stats.
STEPS_PER_EPOCH = 1000
# Number of validation steps to run at the end of every training epoch.
# A bigger number improves accuracy of validation stats, but slows
# down the training.
VALIDATION_STEPS = 50
# Backbone network architecture
# Supported values are: resnet50, resnet101
BACKBONE = "resnet101"
# Number of filters used in the last Convolution layer
BOTTLENECK_WIDTH = 128
# Size of branch input
BRANCH_SIZE = 1024
# Input image resizing
# Generally, use the "square" resizing mode for training and inferencing
# and it should work well in most cases. In this mode, images are scaled
# up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
# scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
# padded with zeros to make it a square so multiple images can be put
# in one batch.
# Available resizing modes:
# none: No resizing or padding. Return the image unchanged.
# square: Resize and pad with zeros to get a square image
# of size [max_dim, max_dim].
# pad64: Pads width and height with zeros to make them multiples of 64.
# If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
# up before padding. IMAGE_MAX_DIM is ignored in this mode.
# The multiple of 64 is needed to ensure smooth scaling of feature
# maps up and down the 6 levels of the FPN pyramid (2**6=64).
# crop: Picks random crops from the image. First, scales the image based
# on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
# size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
# IMAGE_MAX_DIM is not used in this mode.
IMAGE_RESIZE_MODE = "pad64"
IMAGE_MIN_DIM = 480
IMAGE_MAX_DIM = 512
# Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
# up scaling. For example, if set to 2 then images are scaled up to double
# the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
# Howver, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
IMAGE_MIN_SCALE = 0
NR_IMAGE_CHANNELS = 3 # DONT TOUCH THIS IF WE ARE USING PRE-TRAINED WEIGHTS
# Image mean (RGB)
MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
# Learning rate and momentum
# The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
# weights to explode. Likely due to differences in optimzer
# implementation.
LEARNING_RATE = 0.001
LEARNING_MOMENTUM = 0.9
# Option to use cyclical learning rate
CLR = False
MAX_LEARNING_RATE = 0.0005#0.0015 # 0.0015
BASE_LEARNING_RATE = 0.0001#0.00005 # 0.00005
CLR_STEP_SIZE = 4000
# Perform regression or classification
REGRESS_ORI = True
REGRESS_LOC = True
REGRESS_KEYPOINTS = False
# Performs camera rotation perturbations
ROT_AUG = True
# Performs blurring, brightness change, RGB2GRAY
SIM2REAL_AUG = False
# In-plane rotation augmentation
ROT_IMAGE_AUG = False
# Option only used for regression
ORIENTATION_PARAM = 'quaternion'
DECOUPLE_ORIENTATION = False
# Only used for classification
# Number of bins used to discretize a dimension
LOC_BINS_PER_DIM = 16
ORI_BINS_PER_DIM = 32
# Soft assignement parameter to scale gaussian stddev
BETA = 6.0
OPTIMIZER = 'SGD'
# Weight decay regularization
WEIGHT_DECAY = 0.0001
# Sets Keras datatype to 16 bit instead of 32 bit (only works with RTX GPU family)
F16 = False
# If true, learns loss weights by minimizing the negative log likelihood
LEARNABLE_LOSS_WEIGHTS = False
# Loss weights for more precise optimization.
# Can be used for R-CNN training setup.
LOSS_WEIGHTS = {
"loc_loss": 1.,
"ori_loss": 1.,
"k2_loss": 1.,
"k3_loss": 1.
}
# Train or freeze batch normalization layers
# None: Train BN layers. This is the normal mode
# False: Freeze BN layers. Good when using a small batch size
# True: (don't use). Set layer in training mode even when inferencing
TRAIN_BN = False # Defaulting to False since batch size is often small
# Gradient norm clipping
GRADIENT_CLIP_NORM = 5.0
def update(self):
'''Set rest of parameters based on configuration values'''
# Effective batch size
self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
# Input image size
if self.IMAGE_RESIZE_MODE == "crop":
self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, self.NR_IMAGE_CHANNELS])
elif self.IMAGE_RESIZE_MODE == "pad64":
# Assumes wide images
self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MAX_DIM, self.NR_IMAGE_CHANNELS])
else:
self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, self.NR_IMAGE_CHANNELS])
# Image meta data length
# See compose_image_meta() for details
self.IMAGE_META_SIZE = 1 + self.NR_IMAGE_CHANNELS + 3 + 4 + 1
def __init__(self):
self.update()
def display(self):
"""Display Configuration values."""
print("\nConfigurations:")
for a in dir(self):
if not a.startswith("__") and not callable(getattr(self, a)):
print("{:30} {}".format(a, getattr(self, a)))
print("\n")
def write_to_file(self, filepath):
import json
import os
config_dict = {}
for a in dir(self):
if not a.startswith("__") and not callable(getattr(self, a)) and \
not isinstance(getattr(self, a),np.ndarray):
config_dict[a] = getattr(self, a)
directory = os.path.dirname(filepath)
if not os.path.isdir(directory):
os.makedirs(directory)
with open(filepath, 'w+') as f:
f.write(json.dumps(config_dict))