added bayesian optimization capabilities, built a processor for intph…

…ys data, added visualization capabilities
aldopareja · Nov 19, 2020 · 39f6e84 · 39f6e84
1 parent cb4ae0e
commit 39f6e84
Show file tree

Hide file tree

Showing 12 changed files with 487 additions and 62 deletions.
diff --git a/aoe b/aoe
@@ -0,0 +1 @@
+'python', '-m', 'easy_attributes.do_train', '--input_data', '/disk1/mcs_physics_data_derender/', '--output_dir', 'bayesian_opt/output', '--bo_config_file', 'bayesian_opt/output/bo_cfg.yml', '--distributed'
diff --git a/bayesian_opt/do_bayes_opt.py b/bayesian_opt/do_bayes_opt.py
@@ -0,0 +1,140 @@
+import argparse
+import os
+import subprocess
+import sys
+from argparse import Namespace
+from collections import defaultdict
+from pathlib import Path
+
+from dragonfly.apis.api_utils import preprocess_multifidelity_arguments
+from dragonfly.exd.cp_domain_utils import load_config_file
+from dragonfly.exd.experiment_caller import CPFunctionCaller
+from dragonfly.opt import gp_bandit
+
+sys.path.insert(0, './')
+from easy_attributes.config import get_config as _get_config
+from easy_attributes.utils.io import write_serialized, read_serialized
+
+
+def mf_cost(z):
+    return z[0] / 100.0
+
+
+def setup_opt(config):
+    domain = config.domain
+    domain_orderings = config.domain_orderings
+    (ask_tell_fidel_space, ask_tell_domain, _, ask_tell_mf_cost, ask_tell_fidel_to_opt, ask_tell_config, _) = \
+        preprocess_multifidelity_arguments(config.fidel_space, domain, [mf_cost],
+                                           mf_cost, config.fidel_to_opt, config)
+
+    func_caller = CPFunctionCaller(None, ask_tell_domain, domain_orderings=domain_orderings,
+                                   fidel_space=ask_tell_fidel_space, fidel_cost_func=ask_tell_mf_cost,
+                                   fidel_to_opt=ask_tell_fidel_to_opt,
+                                   fidel_space_orderings=config.fidel_space_orderings,
+                                   config=ask_tell_config)
+
+    opt = gp_bandit.CPGPBandit(func_caller, is_mf=True, ask_tell_mode=True)
+    opt.initialise()
+    return opt
+
+
+PROC_METHODS = defaultdict(lambda: lambda x: str(x))
+PROC_METHODS.update({"SOLVER.BASE_LR": lambda x: float(10.0 ** x),
+                     "SOLVER.MOMENTUM": lambda x: float(1 - 10.0 ** x),
+                     "SOLVER.ADAM_BETA": lambda x: float(1 - 10.0 ** x),
+                     # "SOLVER.OPT_TYPE": lambda x: str(x),
+                     "SOLVER.IMS_PER_BATCH": lambda x: int(2 ** x),
+                     "SOLVER.MAX_TIME_SECS": lambda x: int(x),
+                     "DATALOADER.NUM_WORKERS": lambda x: int(2 ** x)})
+
+
+def process_draw(draw, draw_names):
+    processed_draws = []
+    draw_dict = {}
+    for d, name in zip(draw, draw_names):
+        proc_d = PROC_METHODS[name](d)
+        processed_draws += [name, proc_d]
+        draw_dict[name] = proc_d
+    return processed_draws, draw_dict
+
+def write_config(data_path: Path, draw_dict, draw_list,secs):
+    cfg = _get_config(data_path,
+                      use_mask=draw_dict['MASK_OR_BBOX'] == 'mask',
+                      use_bounding_box=draw_dict['MASK_OR_BBOX'] != 'mask')
+
+    cfg.SOLVER.CHECKPOINT_PERIOD = 100000000
+    cfg.TEST.EVAL_PERIOD = 0
+    cfg.SOLVER.MAX_ITER = int(18000 * secs *20 / (2400 * draw_dict["SOLVER.IMS_PER_BATCH"]))
+
+    cfg.OUTPUT_DIR = 'bayesian_opt/output'
+    Path(cfg.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
+
+    cfg.merge_from_list(draw_list)
+    with open('bayesian_opt/output/bo_cfg.yml','w') as f:
+        f.write(cfg.dump())
+    # write_serialized(cfg,Path('bayesian_opt/output/cfg.yml'))
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    # parser.add_argument("--output_dir", type=str)
+    # parser.add_argument('--model_weights', type=str)
+    # parser.add_argument('--resume', action='store_true')
+    # parser.add_argument("--distributed", action='store_true')
+    # # parser.add_argument("--rank", type=int)
+    # parser.add_argument('--debug', action='store_true')
+    parser.add_argument('--input_data', type=str)
+    # parser.add_argument('--use_mask_on_input', action='store_true')
+    # parser.add_argument('--use_bounding_box_on_input', action='store_true')
+    # parser.add_argument('--remove_cache', action='store_true')
+    # parser.add_argument('--num_input_channels', type=int)
+    return parser.parse_args()
+
+def evaluate_objective():
+    command = f'python -m easy_attributes.do_train --input_data /disk1/mcs_physics_data_derender/ --output_dir bayesian_opt/output --bo_config_file bayesian_opt/output/bo_cfg.yml --distributed'
+    return os.system(command)
+
+if __name__ == "__main__":
+    config = load_config_file('bayesian_opt/params_domain.json')
+    opt = setup_opt(config)
+    args = parse_args()
+
+    results = []
+    while True:
+        secs, draw = opt.ask()
+        draw_list, draw_dict = process_draw(draw,
+                                  config.domain_orderings.raw_name_ordering)
+        write_config(Path(args.input_data),
+                     draw_dict,
+                     draw_list,
+                     secs[0])
+        # break
+        # try:
+        #     ret_code = evaluate_objective()
+        # except Exception:
+        #     ret_code = 1
+        try:
+            subprocess.run(['python', '-m', 'easy_attributes.do_train', '--input_data', '/disk1/mcs_physics_data_derender/', '--output_dir', 'bayesian_opt/output', '--bo_config_file', 'bayesian_opt/output/bo_cfg.yml', '--distributed'])
+            ret_code = 0
+        except:
+            ret_code = 1
+        # ret_code = 0
+        if ret_code != 0 or not Path('bayesian_opt/output/last_results.json').exists():
+            #failed
+            results.append({'status': 'failed',
+                            'draw': draw_dict,
+                            'secs': int(secs[0]),
+                            })
+            opt.tell([(secs, draw, -1.0)])
+        else:
+            #didn't fail
+            r = read_serialized(Path('bayesian_opt/output/last_results.json'))['results']
+            results.append({'status':'successful',
+                            'result':r,
+                            'draw': draw_dict,
+                            'secs': int(secs[0]),
+                            })
+            opt.tell([(secs, draw, -r['overall_mean'])])
+
+        write_serialized(results, Path('bayesian_opt/bo_results.yml'))
+        # break
+
diff --git a/bayesian_opt/params_domain.json b/bayesian_opt/params_domain.json
@@ -0,0 +1,59 @@
+{
+"name": "derenderer",
+
+"domain":{
+  "optimizer_type":{
+    "name":"SOLVER.OPT_TYPE",
+    "type":"discrete",
+    "items":"Adam-SGD"
+  },
+
+  "pooler_type":{
+    "name": "MODEL.POOLER_TYPE",
+    "type": "discrete",
+    "items": "average-max"
+  },
+
+  "segmenting_way": {
+    "name": "MASK_OR_BBOX",
+    "type": "discrete",
+    "items": "mask-bbox"
+  },
+
+  "log10_base_learning_rate":{
+    "name":"SOLVER.BASE_LR",
+    "type":"float",
+    "min":-6,
+    "max":-2.5
+  },
+  "log10_1-momentum":{
+      "name":"SOLVER.MOMENTUM",
+      "type":"float",
+      "min":-3,
+      "max":-0.52
+  },
+  "log10_1-adam_beta":{
+      "name":"SOLVER.ADAM_BETA",
+      "type":"float",
+      "min":-5,
+      "max":-1
+  },
+  "log2_batch_size":{
+      "name":"SOLVER.IMS_PER_BATCH",
+      "type":"float",
+      "min":2,
+      "max":7.4
+  }
+ },
+"fidel_space": {
+  "training_seconds":{
+    "name":"training_seconds",
+    "type":"int",
+    "min":600,
+    "max":2400
+  }
+ },
+
+"fidel_to_opt":[2400]
+}
+
diff --git a/data_processing/intphys_process.py b/data_processing/intphys_process.py
@@ -0,0 +1,152 @@
+import argparse
+import os
+import shutil
+import sys
+from itertools import chain
+from pathlib import Path
+from typing import Dict
+
+import numpy as np
+import hickle as hkl
+from pycocotools import mask as mask_util
+from PIL import Image
+
+sys.path.insert(0, './')
+from easy_attributes.utils.io import read_serialized, write_serialized
+from easy_attributes.utils.istarmap_tqdm_patch import array_apply
+from easy_attributes.utils.meta_data import get_discrete_metadata, get_pixels_mean_and_std
+
+MIN_AREA = 50
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_dir', type=str)
+    parser.add_argument('--output_dir', type=str)
+    parser.add_argument('--parallel', action='store_true')
+    parser.add_argument('--debug', action='store_true')
+    args = parser.parse_args()
+    args.input_dir = Path(args.input_dir)
+    args.output_dir = Path(args.output_dir)
+    return args
+
+
+def build_recursive_case_paths(input_folder, cases):
+    if "scene" not in os.listdir(input_folder):
+        to_recurse = sorted(list(os.path.join(input_folder, sub_folder) for sub_folder in os.listdir(input_folder)))
+        for new_folder in to_recurse:
+            if os.path.isdir(new_folder):
+                build_recursive_case_paths(new_folder, cases)
+    else:
+        cases.append(Path(input_folder))
+    return cases
+
+
+def get_attributes(obj: Dict):
+    attributes = {}
+
+    attributes['shape'] = obj.get('shape', 'Occluder')
+
+    # [attributes['position']]
+
+    return attributes
+
+
+def process_frame(depth_file, mask_file, rgb_file, frame, out_dir, index):
+    depth_array = np.asarray(Image.open(depth_file), dtype=np.float32)
+    # Intphys  encoding  here: https://www.intphys.com/benchmark/training_set.html
+    depth_array = (2 ** 16 - 1 - depth_array) / 1000.0
+    depth_array = 1 / (1 + depth_array)
+
+    rgb = np.asarray(Image.open(rgb_file), dtype=np.float32) / 255.0
+
+    input_array = np.concatenate([rgb, depth_array[..., np.newaxis]], axis=2)
+    input_array = input_array.swapaxes(2, 1).swapaxes(1, 0)
+
+    input_file = out_dir / 'inputs' / (str(index).zfill(9) + '.hkl')
+    hkl.dump(input_array, input_file,
+             mode='w', compression='gzip')
+
+    masks = np.asarray(Image.open(mask_file))
+
+    objects = []
+    for oid, mask_val in frame['masks'].items():
+        if not ('occluder' in oid or 'object' in oid):
+            continue
+
+        mask = masks == mask_val
+        if mask.sum() < MIN_AREA:
+            continue
+
+        mask_y, mask_x = mask.nonzero()
+        bbox = list(map(int, [mask_x.min(), mask_y.min(), mask_x.max(), mask_y.max()]))
+        if bbox[3] <= bbox[1] + 2 and bbox[2] <= bbox[0] + 2:  # width and height shouldn't be too small
+            continue
+
+        mask = mask_util.encode(np.asarray(mask, order="F"))
+        mask['counts'] = mask['counts'].decode('ascii')
+
+        attributes = get_attributes(frame[oid])
+
+        objects.append({'mask': mask,
+                        'bbox': bbox,
+                        **attributes,
+                        'filename': str(input_file),
+                        })
+
+    return objects
+
+
+def process_video(video_path, vid_num, out_dir):
+    depths = []
+    rgbs = []
+    masks = []
+    frames = []
+    status = read_serialized(video_path / 'status.json')
+    for i in range(1, 101):
+        depths.append(video_path / 'depth' / ('depth_' + str(i).zfill(3) + '.png'))
+        rgbs.append(video_path / 'scene' / ('scene_' + str(i).zfill(3) + '.png'))
+        masks.append(video_path / 'masks' / ('masks_' + str(i).zfill(3) + '.png'))
+        frames.append(status['frames'][i - 1])
+
+    objects = [process_frame(d, m, r, f, out_dir, vid_num * 1000 + f_num) for f_num, (d, m, r, f) in
+               enumerate(zip(depths, masks, rgbs, frames))]
+
+    return chain.from_iterable(objects)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    video_folders_val = build_recursive_case_paths(args.input_dir / 'dev_meta', [])
+    video_folders_train = build_recursive_case_paths(args.input_dir / 'train', [])
+
+    shutil.rmtree(args.output_dir, ignore_errors=True)
+    (args.output_dir / 'inputs').mkdir(parents=True, exist_ok=True)
+
+    data = {}
+    for f_set, folders in {'val': video_folders_val, 'train': video_folders_train}.items():
+        worker_args = [(v, i, args.output_dir) for i, v in enumerate(folders)]
+
+        objects = list(chain.from_iterable(array_apply(process_video,
+                                                       worker_args,
+                                                       args.parallel,
+                                                       # cpu_frac=2,
+                                                       chunksize=10,
+                                                       description='processing intphys scenes')))
+
+        data[f_set] = objects
+
+    meta_data = {'inputs': {'file_name': {'type': 'input_tensor',
+                                          'num_channels': 4,
+                                          'height': 288,
+                                          'width': 288,
+                                          **get_pixels_mean_and_std(data['val'])},
+                            'mask': {'type': 'bitmask'},
+                            'bbox': {'type': 'bounding_box'}},
+                 'outputs': {'shape': get_discrete_metadata(data['val'] + data['train'],
+                                                            'shape')}
+                 }
+
+    write_serialized(data['val'], args.output_dir / 'val.json')
+    write_serialized(meta_data, args.output_dir / 'metadata.yml')
+    write_serialized(data['train'], args.output_dir / 'train.json')
diff --git a/easy_attributes/attribute_evaluator.py b/easy_attributes/attribute_evaluator.py
@@ -1,4 +1,5 @@
 from itertools import chain
+from pathlib import Path
 
 import numpy as np
 from detectron2.config import configurable
@@ -7,17 +8,20 @@
 from detectron2.utils.comm import get_world_size
 
 from easy_attributes.model import OutputHead
+from easy_attributes.utils.io import write_serialized
 
 
 class AttributeEvaluator(DatasetEvaluator):
 
     @configurable
-    def __init__(self, output_head: OutputHead):
+    def __init__(self, output_head: OutputHead, output_path : Path):
         self.output_head = output_head
+        self.output_path = output_path
 
     @classmethod
     def from_config(cls, cfg):
-        return {'output_head': OutputHead(cfg)}
+        return {'output_head': OutputHead(cfg),
+                'output_path': Path(cfg.OUTPUT_DIR) / 'last_results.json'}
 
     def reset(self):
         self._predictions = []
@@ -50,6 +54,8 @@ def evaluate(self):
 
         err_dict = self.output_head.pred_error(all_preds, all_labels)
 
+        write_serialized(err_dict, self.output_path)
+
         return err_dict
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		'python', '-m', 'easy_attributes.do_train', '--input_data', '/disk1/mcs_physics_data_derender/', '--output_dir', 'bayesian_opt/output', '--bo_config_file', 'bayesian_opt/output/bo_cfg.yml', '--distributed'