diff --git a/aoe b/aoe new file mode 100644 index 0000000..caf5aae --- /dev/null +++ b/aoe @@ -0,0 +1 @@ +'python', '-m', 'easy_attributes.do_train', '--input_data', '/disk1/mcs_physics_data_derender/', '--output_dir', 'bayesian_opt/output', '--bo_config_file', 'bayesian_opt/output/bo_cfg.yml', '--distributed' \ No newline at end of file diff --git a/bayesian_opt/do_bayes_opt.py b/bayesian_opt/do_bayes_opt.py new file mode 100644 index 0000000..7e9d601 --- /dev/null +++ b/bayesian_opt/do_bayes_opt.py @@ -0,0 +1,140 @@ +import argparse +import os +import subprocess +import sys +from argparse import Namespace +from collections import defaultdict +from pathlib import Path + +from dragonfly.apis.api_utils import preprocess_multifidelity_arguments +from dragonfly.exd.cp_domain_utils import load_config_file +from dragonfly.exd.experiment_caller import CPFunctionCaller +from dragonfly.opt import gp_bandit + +sys.path.insert(0, './') +from easy_attributes.config import get_config as _get_config +from easy_attributes.utils.io import write_serialized, read_serialized + + +def mf_cost(z): + return z[0] / 100.0 + + +def setup_opt(config): + domain = config.domain + domain_orderings = config.domain_orderings + (ask_tell_fidel_space, ask_tell_domain, _, ask_tell_mf_cost, ask_tell_fidel_to_opt, ask_tell_config, _) = \ + preprocess_multifidelity_arguments(config.fidel_space, domain, [mf_cost], + mf_cost, config.fidel_to_opt, config) + + func_caller = CPFunctionCaller(None, ask_tell_domain, domain_orderings=domain_orderings, + fidel_space=ask_tell_fidel_space, fidel_cost_func=ask_tell_mf_cost, + fidel_to_opt=ask_tell_fidel_to_opt, + fidel_space_orderings=config.fidel_space_orderings, + config=ask_tell_config) + + opt = gp_bandit.CPGPBandit(func_caller, is_mf=True, ask_tell_mode=True) + opt.initialise() + return opt + + +PROC_METHODS = defaultdict(lambda: lambda x: str(x)) +PROC_METHODS.update({"SOLVER.BASE_LR": lambda x: float(10.0 ** x), + "SOLVER.MOMENTUM": lambda x: float(1 - 10.0 ** x), + "SOLVER.ADAM_BETA": lambda x: float(1 - 10.0 ** x), + # "SOLVER.OPT_TYPE": lambda x: str(x), + "SOLVER.IMS_PER_BATCH": lambda x: int(2 ** x), + "SOLVER.MAX_TIME_SECS": lambda x: int(x), + "DATALOADER.NUM_WORKERS": lambda x: int(2 ** x)}) + + +def process_draw(draw, draw_names): + processed_draws = [] + draw_dict = {} + for d, name in zip(draw, draw_names): + proc_d = PROC_METHODS[name](d) + processed_draws += [name, proc_d] + draw_dict[name] = proc_d + return processed_draws, draw_dict + +def write_config(data_path: Path, draw_dict, draw_list,secs): + cfg = _get_config(data_path, + use_mask=draw_dict['MASK_OR_BBOX'] == 'mask', + use_bounding_box=draw_dict['MASK_OR_BBOX'] != 'mask') + + cfg.SOLVER.CHECKPOINT_PERIOD = 100000000 + cfg.TEST.EVAL_PERIOD = 0 + cfg.SOLVER.MAX_ITER = int(18000 * secs *20 / (2400 * draw_dict["SOLVER.IMS_PER_BATCH"])) + + cfg.OUTPUT_DIR = 'bayesian_opt/output' + Path(cfg.OUTPUT_DIR).mkdir(parents=True, exist_ok=True) + + cfg.merge_from_list(draw_list) + with open('bayesian_opt/output/bo_cfg.yml','w') as f: + f.write(cfg.dump()) + # write_serialized(cfg,Path('bayesian_opt/output/cfg.yml')) + +def parse_args(): + parser = argparse.ArgumentParser() + # parser.add_argument("--output_dir", type=str) + # parser.add_argument('--model_weights', type=str) + # parser.add_argument('--resume', action='store_true') + # parser.add_argument("--distributed", action='store_true') + # # parser.add_argument("--rank", type=int) + # parser.add_argument('--debug', action='store_true') + parser.add_argument('--input_data', type=str) + # parser.add_argument('--use_mask_on_input', action='store_true') + # parser.add_argument('--use_bounding_box_on_input', action='store_true') + # parser.add_argument('--remove_cache', action='store_true') + # parser.add_argument('--num_input_channels', type=int) + return parser.parse_args() + +def evaluate_objective(): + command = f'python -m easy_attributes.do_train --input_data /disk1/mcs_physics_data_derender/ --output_dir bayesian_opt/output --bo_config_file bayesian_opt/output/bo_cfg.yml --distributed' + return os.system(command) + +if __name__ == "__main__": + config = load_config_file('bayesian_opt/params_domain.json') + opt = setup_opt(config) + args = parse_args() + + results = [] + while True: + secs, draw = opt.ask() + draw_list, draw_dict = process_draw(draw, + config.domain_orderings.raw_name_ordering) + write_config(Path(args.input_data), + draw_dict, + draw_list, + secs[0]) + # break + # try: + # ret_code = evaluate_objective() + # except Exception: + # ret_code = 1 + try: + subprocess.run(['python', '-m', 'easy_attributes.do_train', '--input_data', '/disk1/mcs_physics_data_derender/', '--output_dir', 'bayesian_opt/output', '--bo_config_file', 'bayesian_opt/output/bo_cfg.yml', '--distributed']) + ret_code = 0 + except: + ret_code = 1 + # ret_code = 0 + if ret_code != 0 or not Path('bayesian_opt/output/last_results.json').exists(): + #failed + results.append({'status': 'failed', + 'draw': draw_dict, + 'secs': int(secs[0]), + }) + opt.tell([(secs, draw, -1.0)]) + else: + #didn't fail + r = read_serialized(Path('bayesian_opt/output/last_results.json'))['results'] + results.append({'status':'successful', + 'result':r, + 'draw': draw_dict, + 'secs': int(secs[0]), + }) + opt.tell([(secs, draw, -r['overall_mean'])]) + + write_serialized(results, Path('bayesian_opt/bo_results.yml')) + # break + diff --git a/bayesian_opt/params_domain.json b/bayesian_opt/params_domain.json new file mode 100755 index 0000000..24ee761 --- /dev/null +++ b/bayesian_opt/params_domain.json @@ -0,0 +1,59 @@ +{ +"name": "derenderer", + +"domain":{ + "optimizer_type":{ + "name":"SOLVER.OPT_TYPE", + "type":"discrete", + "items":"Adam-SGD" + }, + + "pooler_type":{ + "name": "MODEL.POOLER_TYPE", + "type": "discrete", + "items": "average-max" + }, + + "segmenting_way": { + "name": "MASK_OR_BBOX", + "type": "discrete", + "items": "mask-bbox" + }, + + "log10_base_learning_rate":{ + "name":"SOLVER.BASE_LR", + "type":"float", + "min":-6, + "max":-2.5 + }, + "log10_1-momentum":{ + "name":"SOLVER.MOMENTUM", + "type":"float", + "min":-3, + "max":-0.52 + }, + "log10_1-adam_beta":{ + "name":"SOLVER.ADAM_BETA", + "type":"float", + "min":-5, + "max":-1 + }, + "log2_batch_size":{ + "name":"SOLVER.IMS_PER_BATCH", + "type":"float", + "min":2, + "max":7.4 + } + }, +"fidel_space": { + "training_seconds":{ + "name":"training_seconds", + "type":"int", + "min":600, + "max":2400 + } + }, + +"fidel_to_opt":[2400] +} + diff --git a/data_processing/intphys_process.py b/data_processing/intphys_process.py new file mode 100644 index 0000000..77fcfee --- /dev/null +++ b/data_processing/intphys_process.py @@ -0,0 +1,152 @@ +import argparse +import os +import shutil +import sys +from itertools import chain +from pathlib import Path +from typing import Dict + +import numpy as np +import hickle as hkl +from pycocotools import mask as mask_util +from PIL import Image + +sys.path.insert(0, './') +from easy_attributes.utils.io import read_serialized, write_serialized +from easy_attributes.utils.istarmap_tqdm_patch import array_apply +from easy_attributes.utils.meta_data import get_discrete_metadata, get_pixels_mean_and_std + +MIN_AREA = 50 + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--input_dir', type=str) + parser.add_argument('--output_dir', type=str) + parser.add_argument('--parallel', action='store_true') + parser.add_argument('--debug', action='store_true') + args = parser.parse_args() + args.input_dir = Path(args.input_dir) + args.output_dir = Path(args.output_dir) + return args + + +def build_recursive_case_paths(input_folder, cases): + if "scene" not in os.listdir(input_folder): + to_recurse = sorted(list(os.path.join(input_folder, sub_folder) for sub_folder in os.listdir(input_folder))) + for new_folder in to_recurse: + if os.path.isdir(new_folder): + build_recursive_case_paths(new_folder, cases) + else: + cases.append(Path(input_folder)) + return cases + + +def get_attributes(obj: Dict): + attributes = {} + + attributes['shape'] = obj.get('shape', 'Occluder') + + # [attributes['position']] + + return attributes + + +def process_frame(depth_file, mask_file, rgb_file, frame, out_dir, index): + depth_array = np.asarray(Image.open(depth_file), dtype=np.float32) + # Intphys encoding here: https://www.intphys.com/benchmark/training_set.html + depth_array = (2 ** 16 - 1 - depth_array) / 1000.0 + depth_array = 1 / (1 + depth_array) + + rgb = np.asarray(Image.open(rgb_file), dtype=np.float32) / 255.0 + + input_array = np.concatenate([rgb, depth_array[..., np.newaxis]], axis=2) + input_array = input_array.swapaxes(2, 1).swapaxes(1, 0) + + input_file = out_dir / 'inputs' / (str(index).zfill(9) + '.hkl') + hkl.dump(input_array, input_file, + mode='w', compression='gzip') + + masks = np.asarray(Image.open(mask_file)) + + objects = [] + for oid, mask_val in frame['masks'].items(): + if not ('occluder' in oid or 'object' in oid): + continue + + mask = masks == mask_val + if mask.sum() < MIN_AREA: + continue + + mask_y, mask_x = mask.nonzero() + bbox = list(map(int, [mask_x.min(), mask_y.min(), mask_x.max(), mask_y.max()])) + if bbox[3] <= bbox[1] + 2 and bbox[2] <= bbox[0] + 2: # width and height shouldn't be too small + continue + + mask = mask_util.encode(np.asarray(mask, order="F")) + mask['counts'] = mask['counts'].decode('ascii') + + attributes = get_attributes(frame[oid]) + + objects.append({'mask': mask, + 'bbox': bbox, + **attributes, + 'filename': str(input_file), + }) + + return objects + + +def process_video(video_path, vid_num, out_dir): + depths = [] + rgbs = [] + masks = [] + frames = [] + status = read_serialized(video_path / 'status.json') + for i in range(1, 101): + depths.append(video_path / 'depth' / ('depth_' + str(i).zfill(3) + '.png')) + rgbs.append(video_path / 'scene' / ('scene_' + str(i).zfill(3) + '.png')) + masks.append(video_path / 'masks' / ('masks_' + str(i).zfill(3) + '.png')) + frames.append(status['frames'][i - 1]) + + objects = [process_frame(d, m, r, f, out_dir, vid_num * 1000 + f_num) for f_num, (d, m, r, f) in + enumerate(zip(depths, masks, rgbs, frames))] + + return chain.from_iterable(objects) + + +if __name__ == '__main__': + args = parse_args() + video_folders_val = build_recursive_case_paths(args.input_dir / 'dev_meta', []) + video_folders_train = build_recursive_case_paths(args.input_dir / 'train', []) + + shutil.rmtree(args.output_dir, ignore_errors=True) + (args.output_dir / 'inputs').mkdir(parents=True, exist_ok=True) + + data = {} + for f_set, folders in {'val': video_folders_val, 'train': video_folders_train}.items(): + worker_args = [(v, i, args.output_dir) for i, v in enumerate(folders)] + + objects = list(chain.from_iterable(array_apply(process_video, + worker_args, + args.parallel, + # cpu_frac=2, + chunksize=10, + description='processing intphys scenes'))) + + data[f_set] = objects + + meta_data = {'inputs': {'file_name': {'type': 'input_tensor', + 'num_channels': 4, + 'height': 288, + 'width': 288, + **get_pixels_mean_and_std(data['val'])}, + 'mask': {'type': 'bitmask'}, + 'bbox': {'type': 'bounding_box'}}, + 'outputs': {'shape': get_discrete_metadata(data['val'] + data['train'], + 'shape')} + } + + write_serialized(data['val'], args.output_dir / 'val.json') + write_serialized(meta_data, args.output_dir / 'metadata.yml') + write_serialized(data['train'], args.output_dir / 'train.json') diff --git a/easy_attributes/attribute_evaluator.py b/easy_attributes/attribute_evaluator.py index 59f80ce..99f78a8 100644 --- a/easy_attributes/attribute_evaluator.py +++ b/easy_attributes/attribute_evaluator.py @@ -1,4 +1,5 @@ from itertools import chain +from pathlib import Path import numpy as np from detectron2.config import configurable @@ -7,17 +8,20 @@ from detectron2.utils.comm import get_world_size from easy_attributes.model import OutputHead +from easy_attributes.utils.io import write_serialized class AttributeEvaluator(DatasetEvaluator): @configurable - def __init__(self, output_head: OutputHead): + def __init__(self, output_head: OutputHead, output_path : Path): self.output_head = output_head + self.output_path = output_path @classmethod def from_config(cls, cfg): - return {'output_head': OutputHead(cfg)} + return {'output_head': OutputHead(cfg), + 'output_path': Path(cfg.OUTPUT_DIR) / 'last_results.json'} def reset(self): self._predictions = [] @@ -50,6 +54,8 @@ def evaluate(self): err_dict = self.output_head.pred_error(all_preds, all_labels) + write_serialized(err_dict, self.output_path) + return err_dict diff --git a/easy_attributes/attributes_dataset.py b/easy_attributes/attributes_dataset.py index 880c28e..ef43869 100644 --- a/easy_attributes/attributes_dataset.py +++ b/easy_attributes/attributes_dataset.py @@ -5,12 +5,12 @@ import numpy as np from detectron2.config import CfgNode from detectron2.data import DatasetFromList, MapDataset, DatasetCatalog, MetadataCatalog -from torch.utils.data import Dataset, DataLoader import torch from pycocotools import mask as mask_util from easy_attributes.utils.io import read_serialized, load_input from easy_attributes.utils.istarmap_tqdm_patch import array_apply +from easy_attributes.utils.visualize import visualize_data_dict def data_dict_from_serialized(d, cfg, metadata): @@ -84,6 +84,7 @@ def build_datasets(cfg, data_path: Path): attr_dataset = AttributeDataset(data_path / (d + '.json'), cfg) DatasetCatalog.register(d, lambda ad=attr_dataset: ad.get_data_dicts()) MetadataCatalog.get(d).set(**metadata) + MetadataCatalog.get(d).set(thing_classes=['object']) if __name__ == "__main__": diff --git a/easy_attributes/config.py b/easy_attributes/config.py index c115e75..9aa0f3e 100644 --- a/easy_attributes/config.py +++ b/easy_attributes/config.py @@ -9,21 +9,24 @@ def get_config(data_path: Path, model_weights_path: Path = None, output_path: Path = None, - debug: bool = True, + debug: bool = False, use_mask=True, - use_bounding_box=True): + use_bounding_box=True, + bo_config_file = None): cfg = detectron_get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")) + cfg.MODEL.META_ARCHITECTURE = 'CustomModel' if model_weights_path is None: - cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml") + # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml") + cfg.MODEL.WEIGHTS = 'https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl' else: cfg.MODEL.WEIGHTS = str(model_weights_path) cfg.OUTPUT_DIR = str(output_path) if output_path is not None else './output' - Path(cfg.OUTPUT_DIR).mkdir(exist_ok=True) + # Path(cfg.OUTPUT_DIR).mkdir(exist_ok=True) cfg.DATALOADER.NUM_WORKERS = 0 if debug else 6 @@ -38,60 +41,63 @@ def get_config(data_path: Path, # 'agent_position_y', # 'agent_position_z', # 'agent_rotation', - 'dimension_0_x', - 'dimension_0_y', - 'dimension_0_z', - 'dimension_1_x', - 'dimension_1_y', - 'dimension_1_z', - 'dimension_2_x', - 'dimension_2_y', - 'dimension_2_z', - 'dimension_3_x', - 'dimension_3_y', - 'dimension_3_z', - 'dimension_4_x', - 'dimension_4_y', - 'dimension_4_z', - 'dimension_5_x', - 'dimension_5_y', - 'dimension_5_z', - 'dimension_6_x', - 'dimension_6_y', - 'dimension_6_z', - 'dimension_7_x', - 'dimension_7_y', - 'dimension_7_z', - 'position_x', - 'position_y', - 'position_z', - 'rotation_x', - 'rotation_y', - 'rotation_z', + # 'dimension_0_x', + # 'dimension_0_y', + # 'dimension_0_z', + # 'dimension_1_x', + # 'dimension_1_y', + # 'dimension_1_z', + # 'dimension_2_x', + # 'dimension_2_y', + # 'dimension_2_z', + # 'dimension_3_x', + # 'dimension_3_y', + # 'dimension_3_z', + # 'dimension_4_x', + # 'dimension_4_y', + # 'dimension_4_z', + # 'dimension_5_x', + # 'dimension_5_y', + # 'dimension_5_z', + # 'dimension_6_x', + # 'dimension_6_y', + # 'dimension_6_z', + # 'dimension_7_x', + # 'dimension_7_y', + # 'dimension_7_z', + # 'position_x', + # 'position_y', + # 'position_z', + # 'rotation_x', + # 'rotation_y', + # 'rotation_z', 'shape',) cfg.DEBUG = debug metadata = read_serialized(data_path / 'metadata.yml') num_input_channels = metadata['inputs']['file_name']['num_channels'] - num_input_channels *= sum([use_mask, use_bounding_box]) + 1 + num_repeats = sum([use_mask, use_bounding_box]) + 1 + num_input_channels *= num_repeats cfg.INPUT.FORMAT = "D" * num_input_channels - cfg.MODEL.PIXEL_MEAN = [0.5] * num_input_channels - cfg.MODEL.PIXEL_STD = [1.0] * num_input_channels + cfg.MODEL.PIXEL_MEAN = metadata['inputs']['file_name']['pixel_mean'] * num_repeats + cfg.MODEL.PIXEL_STD = metadata['inputs']['file_name']['pixel_std'] * num_repeats cfg.MODEL.BACKBONE.FREEZE_AT = 0 - cfg.MODEL.FPN_OUT_FEATS = ('p2', 'p3', 'p4', 'p5', 'p6') + cfg.MODEL.BACKBONE.NAME = 'build_resnet_backbone' + cfg.MODEL.RESNETS.OUT_FEATURES = ['res5'] + # cfg.MODEL.FPN_OUT_FEATS = ('p2', 'p3', 'p4', 'p5', 'p6') cfg.MODEL.LAST_HIDDEN_LAYER_FEATS = 512 + cfg.MODEL.POOLER_TYPE = 'max' - cfg.SOLVER.WARMUP_FACTOR = 1.0 / 1000 - cfg.SOLVER.WARMUP_ITERS = 1000 # a warm up is necessary to avoid diverging training while keeping the goal learning rate as high as possible - cfg.SOLVER.IMS_PER_BATCH = 80 if not debug else 42 - # cfg.SOLVER.BASE_LR = 0.0005 # pick a good LR + cfg.SOLVER.WARMUP_FACTOR = 1.0 / 100 + cfg.SOLVER.WARMUP_ITERS = 100 # a warm up is necessary to avoid diverging training while keeping the goal learning rate as high as possible + cfg.SOLVER.IMS_PER_BATCH = 20 if not debug else 8 # cfg.SOLVER.MAX_ITER = 80000 - # cfg.SOLVER.STEPS = (40000, 60000, 70000) - # cfg.SOLVER.GAMMA = 0.5 # after each milestone in SOLVER.STEPS gets reached, the learning rate gets scaled by Gamma. - cfg.SOLVER.BASE_LR = 6.658777172739463e-5 + cfg.SOLVER.STEPS = (30000, 45000, 60000) + cfg.SOLVER.GAMMA = 0.5 # after each milestone in SOLVER.STEPS gets reached, the learning rate gets scaled by Gamma. + cfg.SOLVER.BASE_LR = 6.658777172739463e-5 / 4 cfg.SOLVER.OPT_TYPE = "Adam" # options "Adam" "SGD" cfg.SOLVER.MOMENTUM = 0.9960477666835778 # found via Bayesian Optimization @@ -99,9 +105,13 @@ def get_config(data_path: Path, # cfg.SOLVER.WEIGHT_DECAY = 0.0005 # cfg.SOLVER.WEIGHT_DECAY_BIAS = 0 - cfg.SOLVER.CHECKPOINT_PERIOD = 50 if debug else 2000 # 5000 + cfg.SOLVER.CHECKPOINT_PERIOD = 50 if debug else 1000 # 5000 + + cfg.TEST.EVAL_PERIOD = 30 if debug else 1000 - cfg.TEST.EVAL_PERIOD = 30 if debug else 4000 + cfg.MASK_OR_BBOX = '' + if bo_config_file: + cfg.merge_from_file(bo_config_file) return cfg diff --git a/easy_attributes/do_train.py b/easy_attributes/do_train.py index c89c5db..de7b7e9 100644 --- a/easy_attributes/do_train.py +++ b/easy_attributes/do_train.py @@ -23,6 +23,7 @@ def parse_args(): parser.add_argument('--input_data', type=str) parser.add_argument('--use_mask_on_input', action='store_true') parser.add_argument('--use_bounding_box_on_input', action='store_true') + parser.add_argument('--bo_config_file', type=str) # parser.add_argument('--remove_cache', action='store_true') # parser.add_argument('--num_input_channels', type=int) return parser.parse_args() @@ -32,15 +33,17 @@ def main(args): if args.debug and not args.distributed: import ipdb ipdb.set_trace() - if not args.resume: - shutil.rmtree(args.output_dir, ignore_errors=True) cfg = get_config(data_path=Path(args.input_data), model_weights_path=Path(args.model_weights) if args.model_weights else None, output_path=Path(args.output_dir) if args.output_dir else None, debug=args.debug, use_mask=args.use_mask_on_input, - use_bounding_box=args.use_bounding_box_on_input) + use_bounding_box=args.use_bounding_box_on_input, + bo_config_file=args.bo_config_file) + if not args.resume: + shutil.rmtree(cfg.OUTPUT_DIR, ignore_errors=True) + Path(cfg.OUTPUT_DIR).mkdir(exist_ok=True) default_setup(cfg, args) build_datasets(cfg, Path(args.input_data)) diff --git a/easy_attributes/model.py b/easy_attributes/model.py index 030b9b9..c71a6bd 100644 --- a/easy_attributes/model.py +++ b/easy_attributes/model.py @@ -5,7 +5,7 @@ import torch from detectron2.config import configurable from detectron2.data import MetadataCatalog -from detectron2.modeling import META_ARCH_REGISTRY, Backbone, build_backbone +from detectron2.modeling import META_ARCH_REGISTRY, Backbone, build_backbone, build_resnet_backbone from detectron2.structures import ImageList from numpy import cumsum import numpy as np @@ -77,8 +77,7 @@ def loss(self, inputs, targets): for term, loss_method in self.loss_methods.items(): # unstandardize to keep training stable normalize = self.out_metadata[term]['type'] == 'continuous' - l_input, l_target = map(lambda z: z / self.out_metadata[term]['std'] if normalize - else z, + l_input, l_target = map(lambda z: z / self.out_metadata[term]['std'] if normalize else z, [inputs[term], targets[term]]) loss = loss_method(l_input, l_target) @@ -121,7 +120,7 @@ def pred_error(self, all_preds, all_labels): out_meta = self.out_metadata[term] err_dict[term] = err_method(i_input, i_target, normalizing_std=out_meta['std'] if out_meta['type'] == 'continuous' - else 1.0) + else 1.0) err_dict['overall_mean'] += err_dict[term] err_dict["overall_mean"] /= len(self.terms) @@ -147,6 +146,7 @@ def __init__(self, # batch_size: int, resnet_features: List[str], last_hid_num_feats: int, + pooler_type: str ): super().__init__() self.backbone = backbone @@ -156,7 +156,10 @@ def __init__(self, feat_heights = {p: ceil(input_height / fs) for p, fs in feat_strides.items()} feat_widths = {p: ceil(input_width / fs) for p, fs in feat_strides.items()} - self.poolers = {p: nn.MaxPool2d((feat_heights[p], feat_widths[p])) for p in resnet_features} + p_funcs = {'average': nn.AvgPool2d, + 'max': nn.MaxPool2d} + self.poolers = {p: p_funcs[pooler_type]((feat_heights[p], feat_widths[p])) + for p in resnet_features} self.sum_feat_channels = sum([backbone.output_shape()[p].channels for p in resnet_features]) @@ -175,6 +178,7 @@ def __init__(self, @classmethod def from_config(cls, cfg): backbone = build_backbone(cfg) + # backbone = build_resnet_backbone(cfg) return {'backbone': backbone, 'output_head': OutputHead(cfg), "pixel_mean": cfg.MODEL.PIXEL_MEAN, @@ -182,8 +186,9 @@ def from_config(cls, cfg): "input_height": MetadataCatalog.get(cfg.DATASETS.TEST[0]).inputs['file_name']['height'], "input_width": MetadataCatalog.get(cfg.DATASETS.TEST[0]).inputs['file_name']['width'], # "batch_size": get_batch_size(cfg.SOLVER.IMS_PER_BATCH), - "resnet_features": cfg.MODEL.FPN_OUT_FEATS, - "last_hid_num_feats": cfg.MODEL.LAST_HIDDEN_LAYER_FEATS + "resnet_features": cfg.MODEL.RESNETS.OUT_FEATURES, + "last_hid_num_feats": cfg.MODEL.LAST_HIDDEN_LAYER_FEATS, + 'pooler_type': cfg.MODEL.POOLER_TYPE } @property diff --git a/easy_attributes/utils/meta_data.py b/easy_attributes/utils/meta_data.py index 3e52502..137f435 100644 --- a/easy_attributes/utils/meta_data.py +++ b/easy_attributes/utils/meta_data.py @@ -1,5 +1,8 @@ import numpy as np +from easy_attributes.utils.io import load_input +from easy_attributes.utils.istarmap_tqdm_patch import array_apply + def get_continuous_metadata(data_dicts, attribute_name): values = np.array([d[attribute_name] for d in data_dicts]) @@ -16,4 +19,17 @@ def get_discrete_metadata(data_dicts, attribute_name): class_to_index_map = {k: i for i, k in enumerate(possible_values)} return {'type': 'discrete', - 'class_to_index_map': class_to_index_map} \ No newline at end of file + 'class_to_index_map': class_to_index_map} + +def _load_input(d): + return load_input(d['filename']).numpy() + +def get_pixels_mean_and_std(data_dicts): + all_inputs = array_apply(_load_input, + data_dicts, + parallel=True, + unpack=False) + + all_inputs = np.stack(all_inputs, axis=0) + return {'pixel_mean': [all_inputs[:,i].mean().item() for i in range(all_inputs.shape[1])], + 'pixel_std': [all_inputs[:,i].std().item() for i in range(all_inputs.shape[1])]} \ No newline at end of file diff --git a/easy_attributes/utils/visualize.py b/easy_attributes/utils/visualize.py new file mode 100644 index 0000000..5975fa0 --- /dev/null +++ b/easy_attributes/utils/visualize.py @@ -0,0 +1,31 @@ +from pathlib import Path + +import numpy as np +import torch +from PIL import Image +from detectron2.data import MetadataCatalog +from detectron2.structures import BoxMode +from detectron2.utils.visualizer import Visualizer + +from easy_attributes.utils.io import load_input + + +def visualize_data_dict(d, save_path: Path = None, channels=(0, 1, 2), input_tensor: torch.FloatTensor = None): + # assumes d['file_name'] is in (C, H, W) format as required by detectron2 + # #the visualizer requires everything in rgb and (H, W, C) version + img = input_tensor if input_tensor is not None else load_input(d['filename']) + img = (img * 255)[list(channels)].numpy() + if img.shape[0] == 1: + img = np.concatenate([img] * 3, axis=0) + assert img.shape[0] == 3 + img = img.swapaxes(0, 1).swapaxes(1, 2) + img = img.astype(np.uint8) + visualizer = Visualizer(img, metadata=MetadataCatalog.get('val')) + out = visualizer.draw_dataset_dict({"annotations": [{"bbox": d['bbox'], + "bbox_mode": int(BoxMode.XYXY_ABS), + "segmentation": d['mask'], + "category_id": 0, + "iscrowd": 0}]}).get_image() + if save_path: + Image.fromarray(out).save(save_path) + return out diff --git a/environment.yml b/environment.yml index 19e9c42..d94567f 100644 --- a/environment.yml +++ b/environment.yml @@ -11,4 +11,5 @@ dependencies: - pycocotools==2.0.2 - tqdm==4.51.0 - git+https://github.com/facebookresearch/detectron2.git@v0.3 - - sklearn==0.23.2 \ No newline at end of file + - sklearn==0.23.2 + - opencv-python==4.4.0.44 \ No newline at end of file