Skip to content

Commit

Permalink
Merge pull request #153 from BR-IDL/add_training
Browse files Browse the repository at this point in the history
add training for classification models
  • Loading branch information
xperzy authored Jan 11, 2022
2 parents f8b4da0 + cc0b342 commit 6a2c063
Show file tree
Hide file tree
Showing 229 changed files with 19,223 additions and 3,220 deletions.
4 changes: 2 additions & 2 deletions image_classification/BoTNet/augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def auto_augment_policy_original():
return policy


def rand_augment_policy_original(magnitude_idx):
def rand_augment_policy_original(magnitude_idx=9):
"""
14 types of augment policies in original paper
Args:
Expand Down Expand Up @@ -112,7 +112,7 @@ class RandAugment():
transformed_image = augment(image)
"""

def __init__(self, policy, num_layers):
def __init__(self, policy, num_layers=2):
"""
Args:
policy: list of SubPolicy
Expand Down
16 changes: 2 additions & 14 deletions image_classification/BoTNet/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,20 +89,6 @@
_C.TRAIN.RANDOM_ERASE_COUNT = 1
_C.TRAIN.RANDOM_ERASE_SPLIT = False

# augmentation
_C.AUG = CN()
_C.AUG.COLOR_JITTER = 0.4 # color jitter factor
_C.AUG.AUTO_AUGMENT = 'rand-m9-mstd0.5-inc1'
_C.AUG.RE_PROB = 0.25 # random earse prob
_C.AUG.RE_MODE = 'pixel' # random earse mode
_C.AUG.RE_COUNT = 1 # random earse count
_C.AUG.MIXUP = 0.8 # mixup alpha, enabled if >0
_C.AUG.CUTMIX = 1.0 # cutmix alpha, enabled if >0
_C.AUG.CUTMIX_MINMAX = None # cutmix min/max ratio, overrides alpha
_C.AUG.MIXUP_PROB = 1.0 # prob of mixup or cutmix when either/both is enabled
_C.AUG.MIXUP_SWITCH_PROB = 0.5 # prob of switching cutmix when both mixup and cutmix enabled
_C.AUG.MIXUP_MODE = 'batch' #how to apply mixup/curmix params, per 'batch', 'pair', or 'elem'

# misc
_C.SAVE = "./output"
_C.TAG = "default"
Expand Down Expand Up @@ -145,6 +131,8 @@ def update_config(config, args):
config.DATA.BATCH_SIZE = args.batch_size
if args.image_size:
config.DATA.IMAGE_SIZE = args.image_size
if args.num_classes:
config.MODEL.NUM_CLASSES = args.num_classes
if args.data_path:
config.DATA.DATA_PATH = args.data_path
if args.output is not None:
Expand Down
18 changes: 15 additions & 3 deletions image_classification/BoTNet/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,19 @@
import os
import math
from PIL import Image
from paddle.io import Dataset, DataLoader, DistributedBatchSampler
from paddle.vision import transforms, datasets, image_load
from paddle.io import Dataset
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.vision import transforms
from paddle.vision import datasets
from paddle.vision import image_load
from augment import auto_augment_policy_original
from augment import AutoAugment
from augment import rand_augment_policy_original
from augment import RandAugment
from random_erasing import RandomErasing


class ImageNet2012Dataset(Dataset):
"""Build ImageNet2012 dataset
Expand Down Expand Up @@ -93,9 +100,13 @@ def get_train_transforms(config):
policy = auto_augment_policy_original()
auto_augment = AutoAugment(policy)
aug_op_list.append(auto_augment)
elif config.TRAIN.RAND_AUGMENT:
policy = rand_augment_policy_original()
rand_augment = RandAugment(policy)
aug_op_list.append(rand_augment)
else:
jitter = (float(config.TRAIN.COLOR_JITTER),) * 3
aug_op_list.append(transforms.ColorJitter(jitter))
aug_op_list.append(transforms.ColorJitter(*jitter))
# other ops
aug_op_list.append(transforms.ToTensor())
aug_op_list.append(transforms.Normalize(mean=config.DATA.IMAGENET_MEAN,
Expand Down Expand Up @@ -147,6 +158,7 @@ def get_dataset(config, mode='train'):
Returns:
dataset: dataset object
"""

assert mode in ['train', 'val']
if config.DATA.DATASET == "cifar10":
if mode == 'train':
Expand Down
8 changes: 3 additions & 5 deletions image_classification/BoTNet/main_multi_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def get_arguments():
parser.add_argument('-data_path', type=str, default=None)
parser.add_argument('-output', type=str, default=None)
parser.add_argument('-ngpus', type=int, default=None)
parser.add_argument('-num_classes', type=int, default=None)
parser.add_argument('-pretrained', type=str, default=None)
parser.add_argument('-resume', type=str, default=None)
parser.add_argument('-last_epoch', type=int, default=None)
Expand Down Expand Up @@ -556,11 +557,8 @@ def main_worker(*args):
config.SAVE, f"{config.MODEL.TYPE}-Epoch-{epoch}-Loss-{train_loss}")
paddle.save(model.state_dict(), model_path + '.pdparams')
paddle.save(optimizer.state_dict(), model_path + '.pdopt')
local_logger.info(f"----- Save model: {model_path}.pdparams")
local_logger.info(f"----- Save optim: {model_path}.pdopt")
if local_rank == 0:
master_logger.info(f"----- Save model: {model_path}.pdparams")
master_logger.info(f"----- Save optim: {model_path}.pdopt")
master_logger.info(f"----- Save model: {model_path}.pdparams")
master_logger.info(f"----- Save optim: {model_path}.pdopt")


def main():
Expand Down
14 changes: 9 additions & 5 deletions image_classification/BoTNet/main_single_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def get_arguments():
parser.add_argument('-data_path', type=str, default=None)
parser.add_argument('-output', type=str, default=None)
parser.add_argument('-ngpus', type=int, default=None)
parser.add_argument('-num_classes', type=int, default=None)
parser.add_argument('-pretrained', type=str, default=None)
parser.add_argument('-resume', type=str, default=None)
parser.add_argument('-last_epoch', type=int, default=None)
Expand Down Expand Up @@ -266,11 +267,14 @@ def main():
criterion_val = nn.CrossEntropyLoss()

# STEP 5: Define optimizer and lr_scheduler
# set lr according to batch size and world size (hacked from official code)
if config.TRAIN.LINEAR_SCALED_LR:
linear_scaled_lr = (config.TRAIN.BASE_LR * config.DATA.BATCH_SIZE) / 1024.0
linear_scaled_warmup_start_lr = (config.TRAIN.WARMUP_START_LR * config.DATA.BATCH_SIZE) / 1024.0
linear_scaled_end_lr = (config.TRAIN.END_LR * config.DATA.BATCH_SIZE) / 1024.0
# set lr according to batch size and world size (hacked from Swin official code and modified for CSwin)
if config.TRAIN.LINEAR_SCALED_LR is not None:
linear_scaled_lr = (
config.TRAIN.BASE_LR * config.DATA.BATCH_SIZE) / config.TRAIN.LINEAR_SCALED_LR
linear_scaled_warmup_start_lr = (
config.TRAIN.WARMUP_START_LR * config.DATA.BATCH_SIZE) / config.TRAIN.LINEAR_SCALED_LR
linear_scaled_end_lr = (
config.TRAIN.END_LR * config.DATA.BATCH_SIZE) / config.TRAIN.LINEAR_SCALED_LR

if config.TRAIN.ACCUM_ITER > 1:
linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUM_ITER
Expand Down
2 changes: 1 addition & 1 deletion image_classification/BoTNet/run_train.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CUDA_VISIBLE_DEVICES=0 \
python main_single_gpu.py \
-cfg='./configs/botnet50_224.yaml' \
-cfg='./configs/botnet50.yaml' \
-dataset='imagenet2012' \
-batch_size=16 \
-data_path='/dataset/imagenet' \
6 changes: 6 additions & 0 deletions image_classification/BoTNet/run_train_multi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CUDA_VISIBLE_DEVICES=0,1,2,3 \
python main_multi_gpu.py \
-cfg='./configs/botnet50.yaml' \
-dataset='imagenet2012' \
-batch_size=16 \
-data_path='/dataset/imagenet' \
4 changes: 2 additions & 2 deletions image_classification/CSwin/augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def auto_augment_policy_original():
return policy


def rand_augment_policy_original(magnitude_idx):
def rand_augment_policy_original(magnitude_idx=9):
"""
14 types of augment policies in original paper
Args:
Expand Down Expand Up @@ -112,7 +112,7 @@ class RandAugment():
transformed_image = augment(image)
"""

def __init__(self, policy, num_layers):
def __init__(self, policy, num_layers=2):
"""
Args:
policy: list of SubPolicy
Expand Down
22 changes: 7 additions & 15 deletions image_classification/CSwin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@
_C.TRAIN.WARMUP_START_LR = 1e-6
_C.TRAIN.END_LR = 1e-5
_C.TRAIN.GRAD_CLIP = None
_C.TRAIN.ACCUM_ITER = 2
_C.TRAIN.ACCUM_ITER = 1
_C.TRAIN.MODEL_EMA = True
_C.TRAIN.MODEL_EMA_DECAY = 0.99992
_C.TRAIN.LINEAR_SCALED_LR = None

_C.TRAIN.LR_SCHEDULER = CN()
_C.TRAIN.LR_SCHEDULER.NAME = 'warmupcosine'
Expand All @@ -97,26 +98,14 @@

_C.TRAIN.SMOOTHING = 0.1
_C.TRAIN.COLOR_JITTER = 0.4
_C.TRAIN.AUTO_AUGMENT = True #'rand-m9-mstd0.5-inc1'
_C.TRAIN.AUTO_AUGMENT = False #'rand-m9-mstd0.5-inc1'
_C.TRAIN.RAND_AUGMENT = True

_C.TRAIN.RANDOM_ERASE_PROB = 0.25
_C.TRAIN.RANDOM_ERASE_MODE = 'pixel'
_C.TRAIN.RANDOM_ERASE_COUNT = 1
_C.TRAIN.RANDOM_ERASE_SPLIT = False

# augmentation
_C.AUG = CN()
_C.AUG.COLOR_JITTER = 0.4 # color jitter factor
_C.AUG.AUTO_AUGMENT = 'rand-m9-mstd0.5-inc1'
_C.AUG.RE_PROB = 0.25 # random earse prob
_C.AUG.RE_MODE = 'pixel' # random earse mode
_C.AUG.RE_COUNT = 1 # random earse count
_C.AUG.MIXUP = 0.8 # mixup alpha, enabled if >0
_C.AUG.CUTMIX = 1.0 # cutmix alpha, enabled if >0
_C.AUG.CUTMIX_MINMAX = None # cutmix min/max ratio, overrides alpha
_C.AUG.MIXUP_PROB = 1.0 # prob of mixup or cutmix when either/both is enabled
_C.AUG.MIXUP_SWITCH_PROB = 0.5 # prob of switching cutmix when both mixup and cutmix enabled
_C.AUG.MIXUP_MODE = 'batch' #how to apply mixup/curmix params, per 'batch', 'pair', or 'elem'

# misc
_C.SAVE = "./output"
Expand Down Expand Up @@ -144,6 +133,7 @@ def _update_config_from_file(config, cfg_file):
config.merge_from_file(cfg_file)
config.freeze()


def update_config(config, args):
"""Update config by ArgumentParser
Args:
Expand All @@ -160,6 +150,8 @@ def update_config(config, args):
config.DATA.BATCH_SIZE = args.batch_size
if args.image_size:
config.DATA.IMAGE_SIZE = args.image_size
if args.num_classes:
config.MODEL.NUM_CLASSES = args.num_classes
if args.data_path:
config.DATA.DATA_PATH = args.data_path
if args.output is not None:
Expand Down
24 changes: 12 additions & 12 deletions image_classification/CSwin/cswin.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ def __init__(self, patch_stride=4, in_channels=3, embed_dim=96):
bias_attr=b_attr)

def _init_weights_layernorm(self):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(1))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0))
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(1.))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.))
return weight_attr, bias_attr

def _init_weights(self):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.TruncatedNormal(std=.02))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.))
return weight_attr, bias_attr

def forward(self, x):
Expand Down Expand Up @@ -110,7 +110,7 @@ def __init__(self, in_features, hidden_features, dropout):

def _init_weights(self):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.TruncatedNormal(std=.02))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.))
return weight_attr, bias_attr

def forward(self, x):
Expand Down Expand Up @@ -337,13 +337,13 @@ def __init__(self,
dropout=dropout)

def _init_weights_layernorm(self):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(1))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0))
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(1.))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.))
return weight_attr, bias_attr

def _init_weights(self):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.TruncatedNormal(std=.02))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.))
return weight_attr, bias_attr

def chunk_qkv(self, x, chunks=1, axis=-1):
Expand Down Expand Up @@ -393,8 +393,8 @@ def __init__(self, dim_in, dim_out):
bias_attr=b_attr_1)

def _init_weights_layernorm(self):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(1))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0))
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(1.))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.))
return weight_attr, bias_attr

def forward(self, x):
Expand Down Expand Up @@ -543,13 +543,13 @@ def __init__(self,
bias_attr=b_attr_2)

def _init_weights_layernorm(self):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(1))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0))
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(1.))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.))
return weight_attr, bias_attr

def _init_weights(self):
weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.TruncatedNormal(std=.02))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0))
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.))
return weight_attr, bias_attr

def forward_features(self, x):
Expand Down
8 changes: 7 additions & 1 deletion image_classification/CSwin/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from paddle.vision import image_load
from augment import auto_augment_policy_original
from augment import AutoAugment
from augment import rand_augment_policy_original
from augment import RandAugment
from transforms import RandomHorizontalFlip
from random_erasing import RandomErasing

Expand Down Expand Up @@ -100,9 +102,13 @@ def get_train_transforms(config):
policy = auto_augment_policy_original()
auto_augment = AutoAugment(policy)
aug_op_list.append(auto_augment)
elif config.TRAIN.RAND_AUGMENT:
policy = rand_augment_policy_original()
rand_augment = RandAugment(policy)
aug_op_list.append(rand_augment)
else:
jitter = (float(config.TRAIN.COLOR_JITTER), ) * 3
aug_op_list.append(transforms.ColorJitter(jitter))
aug_op_list.append(transforms.ColorJitter(*jitter))
# STEP3: other ops
aug_op_list.append(transforms.ToTensor())
aug_op_list.append(transforms.Normalize(mean=config.DATA.IMAGENET_MEAN,
Expand Down
34 changes: 19 additions & 15 deletions image_classification/CSwin/main_multi_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from mixup import Mixup
from losses import LabelSmoothingCrossEntropyLoss
from losses import SoftTargetCrossEntropyLoss
from losses import DistillationLoss
from model_ema import ModelEma
from cswin import build_cswin as build_model

Expand All @@ -50,6 +49,7 @@ def get_arguments():
parser.add_argument('-data_path', type=str, default=None)
parser.add_argument('-output', type=str, default=None)
parser.add_argument('-ngpus', type=int, default=None)
parser.add_argument('-num_classes', type=int, default=None)
parser.add_argument('-pretrained', type=str, default=None)
parser.add_argument('-resume', type=str, default=None)
parser.add_argument('-last_epoch', type=int, default=None)
Expand Down Expand Up @@ -369,18 +369,22 @@ def main_worker(*args):

# STEP 5: Define optimizer and lr_scheduler
# set lr according to batch size and world size (hacked from Swin official code and modified for CSwin)
linear_scaled_lr = (config.TRAIN.BASE_LR * config.DATA.BATCH_SIZE * world_size) / 256.0
linear_scaled_warmup_start_lr = (config.TRAIN.WARMUP_START_LR * config.DATA.BATCH_SIZE * world_size) / 256.0
linear_scaled_end_lr = (config.TRAIN.END_LR * config.DATA.BATCH_SIZE * world_size) / 256.0

if config.TRAIN.ACCUM_ITER > 1:
linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUM_ITER
linear_scaled_warmup_start_lr = linear_scaled_warmup_start_lr * config.TRAIN.ACCUM_ITER
linear_scaled_end_lr = linear_scaled_end_lr * config.TRAIN.ACCUM_ITER
if config.TRAIN.LINEAR_SCALED_LR is not None:
linear_scaled_lr = (
config.TRAIN.BASE_LR * config.DATA.BATCH_SIZE * world_size) / config.TRAIN.LINEAR_SCALED_LR
linear_scaled_warmup_start_lr = (
config.TRAIN.WARMUP_START_LR * config.DATA.BATCH_SIZE * world_size) / config.TRAIN.LINEAR_SCALED_LR
linear_scaled_end_lr = (
config.TRAIN.END_LR * config.DATA.BATCH_SIZE * world_size) / config.TRAIN.LINEAR_SCALED_LR

config.TRAIN.BASE_LR = linear_scaled_lr
config.TRAIN.WARMUP_START_LR = linear_scaled_warmup_start_lr
config.TRAIN.END_LR = linear_scaled_end_lr
if config.TRAIN.ACCUM_ITER > 1:
linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUM_ITER
linear_scaled_warmup_start_lr = linear_scaled_warmup_start_lr * config.TRAIN.ACCUM_ITER
linear_scaled_end_lr = linear_scaled_end_lr * config.TRAIN.ACCUM_ITER

config.TRAIN.BASE_LR = linear_scaled_lr
config.TRAIN.WARMUP_START_LR = linear_scaled_warmup_start_lr
config.TRAIN.END_LR = linear_scaled_end_lr

scheduler = None
if config.TRAIN.LR_SCHEDULER.NAME == "warmupcosine":
Expand Down Expand Up @@ -454,9 +458,9 @@ def main_worker(*args):
f"----- Pretrained: Load model state from {config.MODEL.PRETRAINED}")

if config.MODEL.RESUME:
assert os.path.isfile(config.MODEL.RESUME+'.pdparams') is True
assert os.path.isfile(config.MODEL.RESUME+'.pdopt') is True
model_state = paddle.load(config.MODEL.RESUME+'.pdparams')
assert os.path.isfile(config.MODEL.RESUME + '.pdparams') is True
assert os.path.isfile(config.MODEL.RESUME + '.pdopt') is True
model_state = paddle.load(config.MODEL.RESUME + '.pdparams')
model.set_dict(model_state)
opt_state = paddle.load(config.MODEL.RESUME+'.pdopt')
optimizer.set_state_dict(opt_state)
Expand Down
Loading

0 comments on commit 6a2c063

Please sign in to comment.