Skip to content

Commit

Permalink
added bayesian optimization capabilities, built a processor for intph…
Browse files Browse the repository at this point in the history
…ys data, added visualization capabilities
  • Loading branch information
aldo committed Nov 19, 2020
1 parent cb4ae0e commit 39f6e84
Show file tree
Hide file tree
Showing 12 changed files with 487 additions and 62 deletions.
1 change: 1 addition & 0 deletions aoe
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
'python', '-m', 'easy_attributes.do_train', '--input_data', '/disk1/mcs_physics_data_derender/', '--output_dir', 'bayesian_opt/output', '--bo_config_file', 'bayesian_opt/output/bo_cfg.yml', '--distributed'
140 changes: 140 additions & 0 deletions bayesian_opt/do_bayes_opt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import argparse
import os
import subprocess
import sys
from argparse import Namespace
from collections import defaultdict
from pathlib import Path

from dragonfly.apis.api_utils import preprocess_multifidelity_arguments
from dragonfly.exd.cp_domain_utils import load_config_file
from dragonfly.exd.experiment_caller import CPFunctionCaller
from dragonfly.opt import gp_bandit

sys.path.insert(0, './')
from easy_attributes.config import get_config as _get_config
from easy_attributes.utils.io import write_serialized, read_serialized


def mf_cost(z):
return z[0] / 100.0


def setup_opt(config):
domain = config.domain
domain_orderings = config.domain_orderings
(ask_tell_fidel_space, ask_tell_domain, _, ask_tell_mf_cost, ask_tell_fidel_to_opt, ask_tell_config, _) = \
preprocess_multifidelity_arguments(config.fidel_space, domain, [mf_cost],
mf_cost, config.fidel_to_opt, config)

func_caller = CPFunctionCaller(None, ask_tell_domain, domain_orderings=domain_orderings,
fidel_space=ask_tell_fidel_space, fidel_cost_func=ask_tell_mf_cost,
fidel_to_opt=ask_tell_fidel_to_opt,
fidel_space_orderings=config.fidel_space_orderings,
config=ask_tell_config)

opt = gp_bandit.CPGPBandit(func_caller, is_mf=True, ask_tell_mode=True)
opt.initialise()
return opt


PROC_METHODS = defaultdict(lambda: lambda x: str(x))
PROC_METHODS.update({"SOLVER.BASE_LR": lambda x: float(10.0 ** x),
"SOLVER.MOMENTUM": lambda x: float(1 - 10.0 ** x),
"SOLVER.ADAM_BETA": lambda x: float(1 - 10.0 ** x),
# "SOLVER.OPT_TYPE": lambda x: str(x),
"SOLVER.IMS_PER_BATCH": lambda x: int(2 ** x),
"SOLVER.MAX_TIME_SECS": lambda x: int(x),
"DATALOADER.NUM_WORKERS": lambda x: int(2 ** x)})


def process_draw(draw, draw_names):
processed_draws = []
draw_dict = {}
for d, name in zip(draw, draw_names):
proc_d = PROC_METHODS[name](d)
processed_draws += [name, proc_d]
draw_dict[name] = proc_d
return processed_draws, draw_dict

def write_config(data_path: Path, draw_dict, draw_list,secs):
cfg = _get_config(data_path,
use_mask=draw_dict['MASK_OR_BBOX'] == 'mask',
use_bounding_box=draw_dict['MASK_OR_BBOX'] != 'mask')

cfg.SOLVER.CHECKPOINT_PERIOD = 100000000
cfg.TEST.EVAL_PERIOD = 0
cfg.SOLVER.MAX_ITER = int(18000 * secs *20 / (2400 * draw_dict["SOLVER.IMS_PER_BATCH"]))

cfg.OUTPUT_DIR = 'bayesian_opt/output'
Path(cfg.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

cfg.merge_from_list(draw_list)
with open('bayesian_opt/output/bo_cfg.yml','w') as f:
f.write(cfg.dump())
# write_serialized(cfg,Path('bayesian_opt/output/cfg.yml'))

def parse_args():
parser = argparse.ArgumentParser()
# parser.add_argument("--output_dir", type=str)
# parser.add_argument('--model_weights', type=str)
# parser.add_argument('--resume', action='store_true')
# parser.add_argument("--distributed", action='store_true')
# # parser.add_argument("--rank", type=int)
# parser.add_argument('--debug', action='store_true')
parser.add_argument('--input_data', type=str)
# parser.add_argument('--use_mask_on_input', action='store_true')
# parser.add_argument('--use_bounding_box_on_input', action='store_true')
# parser.add_argument('--remove_cache', action='store_true')
# parser.add_argument('--num_input_channels', type=int)
return parser.parse_args()

def evaluate_objective():
command = f'python -m easy_attributes.do_train --input_data /disk1/mcs_physics_data_derender/ --output_dir bayesian_opt/output --bo_config_file bayesian_opt/output/bo_cfg.yml --distributed'
return os.system(command)

if __name__ == "__main__":
config = load_config_file('bayesian_opt/params_domain.json')
opt = setup_opt(config)
args = parse_args()

results = []
while True:
secs, draw = opt.ask()
draw_list, draw_dict = process_draw(draw,
config.domain_orderings.raw_name_ordering)
write_config(Path(args.input_data),
draw_dict,
draw_list,
secs[0])
# break
# try:
# ret_code = evaluate_objective()
# except Exception:
# ret_code = 1
try:
subprocess.run(['python', '-m', 'easy_attributes.do_train', '--input_data', '/disk1/mcs_physics_data_derender/', '--output_dir', 'bayesian_opt/output', '--bo_config_file', 'bayesian_opt/output/bo_cfg.yml', '--distributed'])
ret_code = 0
except:
ret_code = 1
# ret_code = 0
if ret_code != 0 or not Path('bayesian_opt/output/last_results.json').exists():
#failed
results.append({'status': 'failed',
'draw': draw_dict,
'secs': int(secs[0]),
})
opt.tell([(secs, draw, -1.0)])
else:
#didn't fail
r = read_serialized(Path('bayesian_opt/output/last_results.json'))['results']
results.append({'status':'successful',
'result':r,
'draw': draw_dict,
'secs': int(secs[0]),
})
opt.tell([(secs, draw, -r['overall_mean'])])

write_serialized(results, Path('bayesian_opt/bo_results.yml'))
# break

59 changes: 59 additions & 0 deletions bayesian_opt/params_domain.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"name": "derenderer",

"domain":{
"optimizer_type":{
"name":"SOLVER.OPT_TYPE",
"type":"discrete",
"items":"Adam-SGD"
},

"pooler_type":{
"name": "MODEL.POOLER_TYPE",
"type": "discrete",
"items": "average-max"
},

"segmenting_way": {
"name": "MASK_OR_BBOX",
"type": "discrete",
"items": "mask-bbox"
},

"log10_base_learning_rate":{
"name":"SOLVER.BASE_LR",
"type":"float",
"min":-6,
"max":-2.5
},
"log10_1-momentum":{
"name":"SOLVER.MOMENTUM",
"type":"float",
"min":-3,
"max":-0.52
},
"log10_1-adam_beta":{
"name":"SOLVER.ADAM_BETA",
"type":"float",
"min":-5,
"max":-1
},
"log2_batch_size":{
"name":"SOLVER.IMS_PER_BATCH",
"type":"float",
"min":2,
"max":7.4
}
},
"fidel_space": {
"training_seconds":{
"name":"training_seconds",
"type":"int",
"min":600,
"max":2400
}
},

"fidel_to_opt":[2400]
}

152 changes: 152 additions & 0 deletions data_processing/intphys_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import argparse
import os
import shutil
import sys
from itertools import chain
from pathlib import Path
from typing import Dict

import numpy as np
import hickle as hkl
from pycocotools import mask as mask_util
from PIL import Image

sys.path.insert(0, './')
from easy_attributes.utils.io import read_serialized, write_serialized
from easy_attributes.utils.istarmap_tqdm_patch import array_apply
from easy_attributes.utils.meta_data import get_discrete_metadata, get_pixels_mean_and_std

MIN_AREA = 50


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--input_dir', type=str)
parser.add_argument('--output_dir', type=str)
parser.add_argument('--parallel', action='store_true')
parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
args.input_dir = Path(args.input_dir)
args.output_dir = Path(args.output_dir)
return args


def build_recursive_case_paths(input_folder, cases):
if "scene" not in os.listdir(input_folder):
to_recurse = sorted(list(os.path.join(input_folder, sub_folder) for sub_folder in os.listdir(input_folder)))
for new_folder in to_recurse:
if os.path.isdir(new_folder):
build_recursive_case_paths(new_folder, cases)
else:
cases.append(Path(input_folder))
return cases


def get_attributes(obj: Dict):
attributes = {}

attributes['shape'] = obj.get('shape', 'Occluder')

# [attributes['position']]

return attributes


def process_frame(depth_file, mask_file, rgb_file, frame, out_dir, index):
depth_array = np.asarray(Image.open(depth_file), dtype=np.float32)
# Intphys encoding here: https://www.intphys.com/benchmark/training_set.html
depth_array = (2 ** 16 - 1 - depth_array) / 1000.0
depth_array = 1 / (1 + depth_array)

rgb = np.asarray(Image.open(rgb_file), dtype=np.float32) / 255.0

input_array = np.concatenate([rgb, depth_array[..., np.newaxis]], axis=2)
input_array = input_array.swapaxes(2, 1).swapaxes(1, 0)

input_file = out_dir / 'inputs' / (str(index).zfill(9) + '.hkl')
hkl.dump(input_array, input_file,
mode='w', compression='gzip')

masks = np.asarray(Image.open(mask_file))

objects = []
for oid, mask_val in frame['masks'].items():
if not ('occluder' in oid or 'object' in oid):
continue

mask = masks == mask_val
if mask.sum() < MIN_AREA:
continue

mask_y, mask_x = mask.nonzero()
bbox = list(map(int, [mask_x.min(), mask_y.min(), mask_x.max(), mask_y.max()]))
if bbox[3] <= bbox[1] + 2 and bbox[2] <= bbox[0] + 2: # width and height shouldn't be too small
continue

mask = mask_util.encode(np.asarray(mask, order="F"))
mask['counts'] = mask['counts'].decode('ascii')

attributes = get_attributes(frame[oid])

objects.append({'mask': mask,
'bbox': bbox,
**attributes,
'filename': str(input_file),
})

return objects


def process_video(video_path, vid_num, out_dir):
depths = []
rgbs = []
masks = []
frames = []
status = read_serialized(video_path / 'status.json')
for i in range(1, 101):
depths.append(video_path / 'depth' / ('depth_' + str(i).zfill(3) + '.png'))
rgbs.append(video_path / 'scene' / ('scene_' + str(i).zfill(3) + '.png'))
masks.append(video_path / 'masks' / ('masks_' + str(i).zfill(3) + '.png'))
frames.append(status['frames'][i - 1])

objects = [process_frame(d, m, r, f, out_dir, vid_num * 1000 + f_num) for f_num, (d, m, r, f) in
enumerate(zip(depths, masks, rgbs, frames))]

return chain.from_iterable(objects)


if __name__ == '__main__':
args = parse_args()
video_folders_val = build_recursive_case_paths(args.input_dir / 'dev_meta', [])
video_folders_train = build_recursive_case_paths(args.input_dir / 'train', [])

shutil.rmtree(args.output_dir, ignore_errors=True)
(args.output_dir / 'inputs').mkdir(parents=True, exist_ok=True)

data = {}
for f_set, folders in {'val': video_folders_val, 'train': video_folders_train}.items():
worker_args = [(v, i, args.output_dir) for i, v in enumerate(folders)]

objects = list(chain.from_iterable(array_apply(process_video,
worker_args,
args.parallel,
# cpu_frac=2,
chunksize=10,
description='processing intphys scenes')))

data[f_set] = objects

meta_data = {'inputs': {'file_name': {'type': 'input_tensor',
'num_channels': 4,
'height': 288,
'width': 288,
**get_pixels_mean_and_std(data['val'])},
'mask': {'type': 'bitmask'},
'bbox': {'type': 'bounding_box'}},
'outputs': {'shape': get_discrete_metadata(data['val'] + data['train'],
'shape')}
}

write_serialized(data['val'], args.output_dir / 'val.json')
write_serialized(meta_data, args.output_dir / 'metadata.yml')
write_serialized(data['train'], args.output_dir / 'train.json')
10 changes: 8 additions & 2 deletions easy_attributes/attribute_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from itertools import chain
from pathlib import Path

import numpy as np
from detectron2.config import configurable
Expand All @@ -7,17 +8,20 @@
from detectron2.utils.comm import get_world_size

from easy_attributes.model import OutputHead
from easy_attributes.utils.io import write_serialized


class AttributeEvaluator(DatasetEvaluator):

@configurable
def __init__(self, output_head: OutputHead):
def __init__(self, output_head: OutputHead, output_path : Path):
self.output_head = output_head
self.output_path = output_path

@classmethod
def from_config(cls, cfg):
return {'output_head': OutputHead(cfg)}
return {'output_head': OutputHead(cfg),
'output_path': Path(cfg.OUTPUT_DIR) / 'last_results.json'}

def reset(self):
self._predictions = []
Expand Down Expand Up @@ -50,6 +54,8 @@ def evaluate(self):

err_dict = self.output_head.pred_error(all_preds, all_labels)

write_serialized(err_dict, self.output_path)

return err_dict


Expand Down
Loading

0 comments on commit 39f6e84

Please sign in to comment.