From 5b9d9097cc255becef4b5460c4b951c143d7a380 Mon Sep 17 00:00:00 2001 From: Saurabh Gupta Date: Wed, 19 Apr 2017 22:59:48 -0700 Subject: [PATCH] Implementation for Cognitive Mapping and Planning paper. --- cognitive_mapping_and_planning/.gitignore | 4 + cognitive_mapping_and_planning/README.md | 122 ++ cognitive_mapping_and_planning/__init__.py | 0 .../cfgs/__init__.py | 0 .../cfgs/config_cmp.py | 283 ++++ .../cfgs/config_common.py | 261 +++ .../cfgs/config_distill.py | 114 ++ .../cfgs/config_vision_baseline.py | 173 ++ .../data/.gitignore | 3 + cognitive_mapping_and_planning/data/README.md | 33 + .../datasets/__init__.py | 0 .../datasets/factory.py | 113 ++ .../datasets/nav_env.py | 1465 +++++++++++++++++ .../datasets/nav_env_config.py | 127 ++ cognitive_mapping_and_planning/matplotlibrc | 1 + .../output/.gitignore | 1 + .../output/README.md | 16 + .../patches/GLES2_2_0.py.patch | 14 + .../patches/apply_patches.sh | 18 + .../patches/ctypesloader.py.patch | 15 + .../render/__init__.py | 0 .../render/depth_rgb_encoded.fp | 30 + .../render/depth_rgb_encoded.vp | 15 + .../render/rgb_flat_color.fp | 11 + .../render/rgb_flat_color.vp | 18 + .../render/swiftshader_renderer.py | 427 +++++ .../requirements.txt | 9 + .../scripts/__init__.py | 0 .../scripts/script_distill.py | 177 ++ .../scripts/script_download_init_models.sh | 18 + .../scripts/script_env_vis.py | 186 +++ .../scripts/script_nav_agent_release.py | 253 +++ .../scripts/script_plot_trajectory.py | 339 ++++ .../script_preprocess_annoations_S3DIS.py | 197 +++ .../script_preprocess_annoations_S3DIS.sh | 24 + .../scripts/script_preprocess_meshes_S3DIS.sh | 37 + .../scripts/script_test_pretrained_models.sh | 63 + .../src/__init__.py | 0 .../src/depth_utils.py | 95 ++ .../src/file_utils.py | 41 + .../src/graph_utils.py | 550 +++++++ .../src/map_utils.py | 244 +++ .../src/rotation_utils.py | 73 + cognitive_mapping_and_planning/src/utils.py | 168 ++ .../tfcode/__init__.py | 0 cognitive_mapping_and_planning/tfcode/cmp.py | 553 +++++++ .../tfcode/cmp_summary.py | 213 +++ .../tfcode/cmp_utils.py | 164 ++ .../tfcode/nav_utils.py | 435 +++++ .../tfcode/tf_utils.py | 840 ++++++++++ .../tfcode/vision_baseline_lstm.py | 533 ++++++ 51 files changed, 8476 insertions(+) create mode 100644 cognitive_mapping_and_planning/.gitignore create mode 100644 cognitive_mapping_and_planning/README.md create mode 100644 cognitive_mapping_and_planning/__init__.py create mode 100644 cognitive_mapping_and_planning/cfgs/__init__.py create mode 100644 cognitive_mapping_and_planning/cfgs/config_cmp.py create mode 100644 cognitive_mapping_and_planning/cfgs/config_common.py create mode 100644 cognitive_mapping_and_planning/cfgs/config_distill.py create mode 100644 cognitive_mapping_and_planning/cfgs/config_vision_baseline.py create mode 100644 cognitive_mapping_and_planning/data/.gitignore create mode 100644 cognitive_mapping_and_planning/data/README.md create mode 100644 cognitive_mapping_and_planning/datasets/__init__.py create mode 100644 cognitive_mapping_and_planning/datasets/factory.py create mode 100644 cognitive_mapping_and_planning/datasets/nav_env.py create mode 100644 cognitive_mapping_and_planning/datasets/nav_env_config.py create mode 100644 cognitive_mapping_and_planning/matplotlibrc create mode 100644 cognitive_mapping_and_planning/output/.gitignore create mode 100644 cognitive_mapping_and_planning/output/README.md create mode 100644 cognitive_mapping_and_planning/patches/GLES2_2_0.py.patch create mode 100644 cognitive_mapping_and_planning/patches/apply_patches.sh create mode 100644 cognitive_mapping_and_planning/patches/ctypesloader.py.patch create mode 100644 cognitive_mapping_and_planning/render/__init__.py create mode 100644 cognitive_mapping_and_planning/render/depth_rgb_encoded.fp create mode 100644 cognitive_mapping_and_planning/render/depth_rgb_encoded.vp create mode 100644 cognitive_mapping_and_planning/render/rgb_flat_color.fp create mode 100644 cognitive_mapping_and_planning/render/rgb_flat_color.vp create mode 100644 cognitive_mapping_and_planning/render/swiftshader_renderer.py create mode 100644 cognitive_mapping_and_planning/requirements.txt create mode 100644 cognitive_mapping_and_planning/scripts/__init__.py create mode 100644 cognitive_mapping_and_planning/scripts/script_distill.py create mode 100644 cognitive_mapping_and_planning/scripts/script_download_init_models.sh create mode 100644 cognitive_mapping_and_planning/scripts/script_env_vis.py create mode 100644 cognitive_mapping_and_planning/scripts/script_nav_agent_release.py create mode 100644 cognitive_mapping_and_planning/scripts/script_plot_trajectory.py create mode 100644 cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.py create mode 100644 cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.sh create mode 100644 cognitive_mapping_and_planning/scripts/script_preprocess_meshes_S3DIS.sh create mode 100644 cognitive_mapping_and_planning/scripts/script_test_pretrained_models.sh create mode 100644 cognitive_mapping_and_planning/src/__init__.py create mode 100644 cognitive_mapping_and_planning/src/depth_utils.py create mode 100644 cognitive_mapping_and_planning/src/file_utils.py create mode 100644 cognitive_mapping_and_planning/src/graph_utils.py create mode 100644 cognitive_mapping_and_planning/src/map_utils.py create mode 100644 cognitive_mapping_and_planning/src/rotation_utils.py create mode 100644 cognitive_mapping_and_planning/src/utils.py create mode 100644 cognitive_mapping_and_planning/tfcode/__init__.py create mode 100644 cognitive_mapping_and_planning/tfcode/cmp.py create mode 100644 cognitive_mapping_and_planning/tfcode/cmp_summary.py create mode 100644 cognitive_mapping_and_planning/tfcode/cmp_utils.py create mode 100644 cognitive_mapping_and_planning/tfcode/nav_utils.py create mode 100644 cognitive_mapping_and_planning/tfcode/tf_utils.py create mode 100644 cognitive_mapping_and_planning/tfcode/vision_baseline_lstm.py diff --git a/cognitive_mapping_and_planning/.gitignore b/cognitive_mapping_and_planning/.gitignore new file mode 100644 index 00000000000..cbc6a8f0271 --- /dev/null +++ b/cognitive_mapping_and_planning/.gitignore @@ -0,0 +1,4 @@ +deps +*.pyc +lib*.so +lib*.so* diff --git a/cognitive_mapping_and_planning/README.md b/cognitive_mapping_and_planning/README.md new file mode 100644 index 00000000000..b3f3e5080e2 --- /dev/null +++ b/cognitive_mapping_and_planning/README.md @@ -0,0 +1,122 @@ +# Cognitive Mapping and Planning for Visual Navigation +**Saurabh Gupta, James Davidson, Sergey Levine, Rahul Sukthankar, Jitendra Malik** + +**Computer Vision and Pattern Recognition (CVPR) 2017.** + +**[ArXiv](https://arxiv.org/abs/1702.03920), +[Project Website](https://sites.google.com/corp/view/cognitive-mapping-and-planning/)** + +### Citing +If you find this code base and models useful in your research, please consider +citing the following paper: + ``` + @inproceedings{gupta2017cognitive, + title={Cognitive Mapping and Planning for Visual Navigation}, + author={Gupta, Saurabh and Davidson, James and Levine, Sergey and + Sukthankar, Rahul and Malik, Jitendra}, + booktitle={CVPR}, + year={2017} + } + ``` + +### Contents +1. [Requirements: software](#requirements-software) +2. [Requirements: data](#requirements-data) +3. [Test Pre-trained Models](#test-pre_trained-models) +4. [Train your Own Models](#train-your-own-models) + +### Requirements: software +1. Python Virtual Env Setup: All code is implemented in Python but depends on a + small number of python packages and a couple of C libraries. We recommend + using virtual environment for installing these python packages and python + bindings for these C libraries. + ```Shell + VENV_DIR=venv + pip install virtualenv + virtualenv $VENV_DIR + source $VENV_DIR/bin/activate + + # You may need to upgrade pip for installing openv-python. + pip install --upgrade pip + # Install simple dependencies. + pip install -r requirements.txt + + # Patch bugs in dependencies. + sh patches/apply_patches.sh + ``` + +2. Install [Tensorflow](https://www.tensorflow.org/) inside this virtual + environment. Typically done with `pip install --upgrade tensorflow-gpu`. + +3. Swiftshader: We use + [Swiftshader](https://github.com/google/swiftshader.git), a CPU based + renderer to render the meshes. It is possible to use other renderers, + replace `SwiftshaderRenderer` in `render/swiftshader_renderer.py` with + bindings to your renderer. + ```Shell + mkdir -p deps + git clone --recursive https://github.com/google/swiftshader.git deps/swiftshader-src + cd deps/swiftshader-src && git checkout 91da6b00584afd7dcaed66da88e2b617429b3950 + mkdir build && cd build && cmake .. && make -j 16 libEGL libGLESv2 + cd ../../../ + cp deps/swiftshader-src/build/libEGL* libEGL.so.1 + cp deps/swiftshader-src/build/libGLESv2* libGLESv2.so.2 + ``` + +4. PyAssimp: We use [PyAssimp](https://github.com/assimp/assimp.git) to load + meshes. It is possible to use other libraries to load meshes, replace + `Shape` `render/swiftshader_renderer.py` with bindings to your library for + loading meshes. + ```Shell + mkdir -p deps + git clone https://github.com/assimp/assimp.git deps/assimp-src + cd deps/assimp-src + git checkout 2afeddd5cb63d14bc77b53740b38a54a97d94ee8 + cmake CMakeLists.txt -G 'Unix Makefiles' && make -j 16 + cd port/PyAssimp && python setup.py install + cd ../../../.. + cp deps/assimp-src/lib/libassimp* . + ``` + +5. graph-tool: We use [graph-tool](https://git.skewed.de/count0/graph-tool) + library for graph processing. + ```Shell + mkdir -p deps + # If the following git clone command fails, you can also download the source + # from https://downloads.skewed.de/graph-tool/graph-tool-2.2.44.tar.bz2 + git clone https://git.skewed.de/count0/graph-tool deps/graph-tool-src + cd deps/graph-tool-src && git checkout 178add3a571feb6666f4f119027705d95d2951ab + bash autogen.sh + ./configure --disable-cairo --disable-sparsehash --prefix=$HOME/.local + make -j 16 + make install + cd ../../ + ``` + +### Requirements: data +1. Download the Stanford 3D Inddor Spaces Dataset (S3DIS Dataset) and ImageNet + Pre-trained models for initializing different models. Follow instructions in + `data/README.md` + +### Test Pre-trained Models +1. Download pre-trained models using + `scripts/scripts_download_pretrained_models.sh` + +2. Test models using `scripts/script_test_pretrained_models.sh`. + +### Train Your Own Models +All models were trained asynchronously with 16 workers each worker using data +from a single floor. The default hyper-parameters coorespond to this setting. +See [distributed training with +Tensorflow](https://www.tensorflow.org/deploy/distributed) for setting up +distributed training. Training with a single worker is possible with the current +code base but will require some minor changes to allow each worker to load all +training environments. + +### Contact +For questions or issues open an issue on the tensorflow/models [issues +tracker](https://github.com/tensorflow/models/issues). Please assign issues to +@s-gupta. + +### Credits +This code was written by Saurabh Gupta (@s-gupta). diff --git a/cognitive_mapping_and_planning/__init__.py b/cognitive_mapping_and_planning/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cognitive_mapping_and_planning/cfgs/__init__.py b/cognitive_mapping_and_planning/cfgs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cognitive_mapping_and_planning/cfgs/config_cmp.py b/cognitive_mapping_and_planning/cfgs/config_cmp.py new file mode 100644 index 00000000000..715eee2b973 --- /dev/null +++ b/cognitive_mapping_and_planning/cfgs/config_cmp.py @@ -0,0 +1,283 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os, sys +import numpy as np +from tensorflow.python.platform import app +from tensorflow.python.platform import flags +import logging +import src.utils as utils +import cfgs.config_common as cc + + +import tensorflow as tf + + +rgb_resnet_v2_50_path = 'data/init_models/resnet_v2_50/model.ckpt-5136169' +d_resnet_v2_50_path = 'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002' + +def get_default_args(): + summary_args = utils.Foo(display_interval=1, test_iters=26, + arop_full_summary_iters=14) + + control_args = utils.Foo(train=False, test=False, + force_batchnorm_is_training_at_test=False, + reset_rng_seed=False, only_eval_when_done=False, + test_mode=None) + return summary_args, control_args + +def get_default_cmp_args(): + batch_norm_param = {'center': True, 'scale': True, + 'activation_fn':tf.nn.relu} + + mapper_arch_args = utils.Foo( + dim_reduce_neurons=64, + fc_neurons=[1024, 1024], + fc_out_size=8, + fc_out_neurons=64, + encoder='resnet_v2_50', + deconv_neurons=[64, 32, 16, 8, 4, 2], + deconv_strides=[2, 2, 2, 2, 2, 2], + deconv_layers_per_block=2, + deconv_kernel_size=4, + fc_dropout=0.5, + combine_type='wt_avg_logits', + batch_norm_param=batch_norm_param) + + readout_maps_arch_args = utils.Foo( + num_neurons=[], + strides=[], + kernel_size=None, + layers_per_block=None) + + arch_args = utils.Foo( + vin_val_neurons=8, vin_action_neurons=8, vin_ks=3, vin_share_wts=False, + pred_neurons=[64, 64], pred_batch_norm_param=batch_norm_param, + conv_on_value_map=0, fr_neurons=16, fr_ver='v2', fr_inside_neurons=64, + fr_stride=1, crop_remove_each=30, value_crop_size=4, + action_sample_type='sample', action_sample_combine_type='one_or_other', + sample_gt_prob_type='inverse_sigmoid_decay', dagger_sample_bn_false=True, + vin_num_iters=36, isd_k=750., use_agent_loc=False, multi_scale=True, + readout_maps=False, rom_arch=readout_maps_arch_args) + + return arch_args, mapper_arch_args + +def get_arch_vars(arch_str): + if arch_str == '': vals = [] + else: vals = arch_str.split('_') + ks = ['var1', 'var2', 'var3'] + ks = ks[:len(vals)] + + # Exp Ver. + if len(vals) == 0: ks.append('var1'); vals.append('v0') + # custom arch. + if len(vals) == 1: ks.append('var2'); vals.append('') + # map scape for projection baseline. + if len(vals) == 2: ks.append('var3'); vals.append('fr2') + + assert(len(vals) == 3) + + vars = utils.Foo() + for k, v in zip(ks, vals): + setattr(vars, k, v) + + logging.error('arch_vars: %s', vars) + return vars + +def process_arch_str(args, arch_str): + # This function modifies args. + args.arch, args.mapper_arch = get_default_cmp_args() + + arch_vars = get_arch_vars(arch_str) + + args.navtask.task_params.outputs.ego_maps = True + args.navtask.task_params.outputs.ego_goal_imgs = True + args.navtask.task_params.outputs.egomotion = True + args.navtask.task_params.toy_problem = False + + if arch_vars.var1 == 'lmap': + args = process_arch_learned_map(args, arch_vars) + + elif arch_vars.var1 == 'pmap': + args = process_arch_projected_map(args, arch_vars) + + else: + logging.fatal('arch_vars.var1 should be lmap or pmap, but is %s', arch_vars.var1) + assert(False) + + return args + +def process_arch_learned_map(args, arch_vars): + # Multiscale vision based system. + args.navtask.task_params.input_type = 'vision' + args.navtask.task_params.outputs.images = True + + if args.navtask.camera_param.modalities[0] == 'rgb': + args.solver.pretrained_path = rgb_resnet_v2_50_path + elif args.navtask.camera_param.modalities[0] == 'depth': + args.solver.pretrained_path = d_resnet_v2_50_path + + if arch_vars.var2 == 'Ssc': + sc = 1./args.navtask.task_params.step_size + args.arch.vin_num_iters = 40 + args.navtask.task_params.map_scales = [sc] + max_dist = args.navtask.task_params.max_dist * \ + args.navtask.task_params.num_goals + args.navtask.task_params.map_crop_sizes = [2*max_dist] + + args.arch.fr_stride = 1 + args.arch.vin_action_neurons = 8 + args.arch.vin_val_neurons = 3 + args.arch.fr_inside_neurons = 32 + + args.mapper_arch.pad_map_with_zeros_each = [24] + args.mapper_arch.deconv_neurons = [64, 32, 16] + args.mapper_arch.deconv_strides = [1, 2, 1] + + elif (arch_vars.var2 == 'Msc' or arch_vars.var2 == 'MscROMms' or + arch_vars.var2 == 'MscROMss' or arch_vars.var2 == 'MscNoVin'): + # Code for multi-scale planner. + args.arch.vin_num_iters = 8 + args.arch.crop_remove_each = 4 + args.arch.value_crop_size = 8 + + sc = 1./args.navtask.task_params.step_size + max_dist = args.navtask.task_params.max_dist * \ + args.navtask.task_params.num_goals + n_scales = np.log2(float(max_dist) / float(args.arch.vin_num_iters)) + n_scales = int(np.ceil(n_scales)+1) + + args.navtask.task_params.map_scales = \ + list(sc*(0.5**(np.arange(n_scales))[::-1])) + args.navtask.task_params.map_crop_sizes = [16 for x in range(n_scales)] + + args.arch.fr_stride = 1 + args.arch.vin_action_neurons = 8 + args.arch.vin_val_neurons = 3 + args.arch.fr_inside_neurons = 32 + + args.mapper_arch.pad_map_with_zeros_each = [0 for _ in range(n_scales)] + args.mapper_arch.deconv_neurons = [64*n_scales, 32*n_scales, 16*n_scales] + args.mapper_arch.deconv_strides = [1, 2, 1] + + if arch_vars.var2 == 'MscNoVin': + # No planning version. + args.arch.fr_stride = [1, 2, 1, 2] + args.arch.vin_action_neurons = None + args.arch.vin_val_neurons = 16 + args.arch.fr_inside_neurons = 32 + + args.arch.crop_remove_each = 0 + args.arch.value_crop_size = 4 + args.arch.vin_num_iters = 0 + + elif arch_vars.var2 == 'MscROMms' or arch_vars.var2 == 'MscROMss': + # Code with read outs, MscROMms flattens and reads out, + # MscROMss does not flatten and produces output at multiple scales. + args.navtask.task_params.outputs.readout_maps = True + args.navtask.task_params.map_resize_method = 'antialiasing' + args.arch.readout_maps = True + + if arch_vars.var2 == 'MscROMms': + args.arch.rom_arch.num_neurons = [64, 1] + args.arch.rom_arch.kernel_size = 4 + args.arch.rom_arch.strides = [2,2] + args.arch.rom_arch.layers_per_block = 2 + + args.navtask.task_params.readout_maps_crop_sizes = [64] + args.navtask.task_params.readout_maps_scales = [sc] + + elif arch_vars.var2 == 'MscROMss': + args.arch.rom_arch.num_neurons = \ + [64, len(args.navtask.task_params.map_scales)] + args.arch.rom_arch.kernel_size = 4 + args.arch.rom_arch.strides = [1,1] + args.arch.rom_arch.layers_per_block = 1 + + args.navtask.task_params.readout_maps_crop_sizes = \ + args.navtask.task_params.map_crop_sizes + args.navtask.task_params.readout_maps_scales = \ + args.navtask.task_params.map_scales + + else: + logging.fatal('arch_vars.var2 not one of Msc, MscROMms, MscROMss, MscNoVin.') + assert(False) + + map_channels = args.mapper_arch.deconv_neurons[-1] / \ + (2*len(args.navtask.task_params.map_scales)) + args.navtask.task_params.map_channels = map_channels + + return args + +def process_arch_projected_map(args, arch_vars): + # Single scale vision based system which does not use a mapper but instead + # uses an analytically estimated map. + ds = int(arch_vars.var3[2]) + args.navtask.task_params.input_type = 'analytical_counts' + args.navtask.task_params.outputs.analytical_counts = True + + assert(args.navtask.task_params.modalities[0] == 'depth') + args.navtask.camera_param.img_channels = None + + analytical_counts = utils.Foo(map_sizes=[512/ds], + xy_resolution=[5.*ds], + z_bins=[[-10, 10, 150, 200]], + non_linearity=[arch_vars.var2]) + args.navtask.task_params.analytical_counts = analytical_counts + + sc = 1./ds + args.arch.vin_num_iters = 36 + args.navtask.task_params.map_scales = [sc] + args.navtask.task_params.map_crop_sizes = [512/ds] + + args.arch.fr_stride = [1,2] + args.arch.vin_action_neurons = 8 + args.arch.vin_val_neurons = 3 + args.arch.fr_inside_neurons = 32 + + map_channels = len(analytical_counts.z_bins[0]) + 1 + args.navtask.task_params.map_channels = map_channels + args.solver.freeze_conv = False + + return args + +def get_args_for_config(config_name): + args = utils.Foo() + + args.summary, args.control = get_default_args() + + exp_name, mode_str = config_name.split('+') + arch_str, solver_str, navtask_str = exp_name.split('.') + logging.error('config_name: %s', config_name) + logging.error('arch_str: %s', arch_str) + logging.error('navtask_str: %s', navtask_str) + logging.error('solver_str: %s', solver_str) + logging.error('mode_str: %s', mode_str) + + args.solver = cc.process_solver_str(solver_str) + args.navtask = cc.process_navtask_str(navtask_str) + + args = process_arch_str(args, arch_str) + args.arch.isd_k = args.solver.isd_k + + # Train, test, etc. + mode, imset = mode_str.split('_') + args = cc.adjust_args_for_mode(args, mode) + args.navtask.building_names = args.navtask.dataset.get_split(imset) + args.control.test_name = '{:s}_on_{:s}'.format(mode, imset) + + # Log the arguments + logging.error('%s', args) + return args diff --git a/cognitive_mapping_and_planning/cfgs/config_common.py b/cognitive_mapping_and_planning/cfgs/config_common.py new file mode 100644 index 00000000000..440bf5b72f8 --- /dev/null +++ b/cognitive_mapping_and_planning/cfgs/config_common.py @@ -0,0 +1,261 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +import numpy as np +import logging +import src.utils as utils +import datasets.nav_env_config as nec +from datasets import factory + +def adjust_args_for_mode(args, mode): + if mode == 'train': + args.control.train = True + + elif mode == 'val1': + # Same settings as for training, to make sure nothing wonky is happening + # there. + args.control.test = True + args.control.test_mode = 'val' + args.navtask.task_params.batch_size = 32 + + elif mode == 'val2': + # No data augmentation, not sampling but taking the argmax action, not + # sampling from the ground truth at all. + args.control.test = True + args.arch.action_sample_type = 'argmax' + args.arch.sample_gt_prob_type = 'zero' + args.navtask.task_params.data_augment = \ + utils.Foo(lr_flip=0, delta_angle=0, delta_xy=0, relight=False, + relight_fast=False, structured=False) + args.control.test_mode = 'val' + args.navtask.task_params.batch_size = 32 + + elif mode == 'bench': + # Actually testing the agent in settings that are kept same between + # different runs. + args.navtask.task_params.batch_size = 16 + args.control.test = True + args.arch.action_sample_type = 'argmax' + args.arch.sample_gt_prob_type = 'zero' + args.navtask.task_params.data_augment = \ + utils.Foo(lr_flip=0, delta_angle=0, delta_xy=0, relight=False, + relight_fast=False, structured=False) + args.summary.test_iters = 250 + args.control.only_eval_when_done = True + args.control.reset_rng_seed = True + args.control.test_mode = 'test' + else: + logging.fatal('Unknown mode: %s.', mode) + assert(False) + return args + +def get_solver_vars(solver_str): + if solver_str == '': vals = []; + else: vals = solver_str.split('_') + ks = ['clip', 'dlw', 'long', 'typ', 'isdk', 'adam_eps', 'init_lr']; + ks = ks[:len(vals)] + + # Gradient clipping or not. + if len(vals) == 0: ks.append('clip'); vals.append('noclip'); + # data loss weight. + if len(vals) == 1: ks.append('dlw'); vals.append('dlw20') + # how long to train for. + if len(vals) == 2: ks.append('long'); vals.append('nolong') + # Adam + if len(vals) == 3: ks.append('typ'); vals.append('adam2') + # reg loss wt + if len(vals) == 4: ks.append('rlw'); vals.append('rlw1') + # isd_k + if len(vals) == 5: ks.append('isdk'); vals.append('isdk415') # 415, inflexion at 2.5k. + # adam eps + if len(vals) == 6: ks.append('adam_eps'); vals.append('aeps1en8') + # init lr + if len(vals) == 7: ks.append('init_lr'); vals.append('lr1en3') + + assert(len(vals) == 8) + + vars = utils.Foo() + for k, v in zip(ks, vals): + setattr(vars, k, v) + logging.error('solver_vars: %s', vars) + return vars + +def process_solver_str(solver_str): + solver = utils.Foo( + seed=0, learning_rate_decay=None, clip_gradient_norm=None, max_steps=None, + initial_learning_rate=None, momentum=None, steps_per_decay=None, + logdir=None, sync=False, adjust_lr_sync=True, wt_decay=0.0001, + data_loss_wt=None, reg_loss_wt=None, freeze_conv=True, num_workers=1, + task=0, ps_tasks=0, master='local', typ=None, momentum2=None, + adam_eps=None) + + # Clobber with overrides from solver str. + solver_vars = get_solver_vars(solver_str) + + solver.data_loss_wt = float(solver_vars.dlw[3:].replace('x', '.')) + solver.adam_eps = float(solver_vars.adam_eps[4:].replace('x', '.').replace('n', '-')) + solver.initial_learning_rate = float(solver_vars.init_lr[2:].replace('x', '.').replace('n', '-')) + solver.reg_loss_wt = float(solver_vars.rlw[3:].replace('x', '.')) + solver.isd_k = float(solver_vars.isdk[4:].replace('x', '.')) + + long = solver_vars.long + if long == 'long': + solver.steps_per_decay = 40000 + solver.max_steps = 120000 + elif long == 'long2': + solver.steps_per_decay = 80000 + solver.max_steps = 120000 + elif long == 'nolong' or long == 'nol': + solver.steps_per_decay = 20000 + solver.max_steps = 60000 + else: + logging.fatal('solver_vars.long should be long, long2, nolong or nol.') + assert(False) + + clip = solver_vars.clip + if clip == 'noclip' or clip == 'nocl': + solver.clip_gradient_norm = 0 + elif clip[:4] == 'clip': + solver.clip_gradient_norm = float(clip[4:].replace('x', '.')) + else: + logging.fatal('Unknown solver_vars.clip: %s', clip) + assert(False) + + typ = solver_vars.typ + if typ == 'adam': + solver.typ = 'adam' + solver.momentum = 0.9 + solver.momentum2 = 0.999 + solver.learning_rate_decay = 1.0 + elif typ == 'adam2': + solver.typ = 'adam' + solver.momentum = 0.9 + solver.momentum2 = 0.999 + solver.learning_rate_decay = 0.1 + elif typ == 'sgd': + solver.typ = 'sgd' + solver.momentum = 0.99 + solver.momentum2 = None + solver.learning_rate_decay = 0.1 + else: + logging.fatal('Unknown solver_vars.typ: %s', typ) + assert(False) + + logging.error('solver: %s', solver) + return solver + +def get_navtask_vars(navtask_str): + if navtask_str == '': vals = [] + else: vals = navtask_str.split('_') + + ks_all = ['dataset_name', 'modality', 'task', 'history', 'max_dist', + 'num_steps', 'step_size', 'n_ori', 'aux_views', 'data_aug'] + ks = ks_all[:len(vals)] + + # All data or not. + if len(vals) == 0: ks.append('dataset_name'); vals.append('sbpd') + # modality + if len(vals) == 1: ks.append('modality'); vals.append('rgb') + # semantic task? + if len(vals) == 2: ks.append('task'); vals.append('r2r') + # number of history frames. + if len(vals) == 3: ks.append('history'); vals.append('h0') + # max steps + if len(vals) == 4: ks.append('max_dist'); vals.append('32') + # num steps + if len(vals) == 5: ks.append('num_steps'); vals.append('40') + # step size + if len(vals) == 6: ks.append('step_size'); vals.append('8') + # n_ori + if len(vals) == 7: ks.append('n_ori'); vals.append('4') + # Auxiliary views. + if len(vals) == 8: ks.append('aux_views'); vals.append('nv0') + # Normal data augmentation as opposed to structured data augmentation (if set + # to straug. + if len(vals) == 9: ks.append('data_aug'); vals.append('straug') + + assert(len(vals) == 10) + for i in range(len(ks)): + assert(ks[i] == ks_all[i]) + + vars = utils.Foo() + for k, v in zip(ks, vals): + setattr(vars, k, v) + logging.error('navtask_vars: %s', vals) + return vars + +def process_navtask_str(navtask_str): + navtask = nec.nav_env_base_config() + + # Clobber with overrides from strings. + navtask_vars = get_navtask_vars(navtask_str) + + navtask.task_params.n_ori = int(navtask_vars.n_ori) + navtask.task_params.max_dist = int(navtask_vars.max_dist) + navtask.task_params.num_steps = int(navtask_vars.num_steps) + navtask.task_params.step_size = int(navtask_vars.step_size) + navtask.task_params.data_augment.delta_xy = int(navtask_vars.step_size)/2. + n_aux_views_each = int(navtask_vars.aux_views[2]) + aux_delta_thetas = np.concatenate((np.arange(n_aux_views_each) + 1, + -1 -np.arange(n_aux_views_each))) + aux_delta_thetas = aux_delta_thetas*np.deg2rad(navtask.camera_param.fov) + navtask.task_params.aux_delta_thetas = aux_delta_thetas + + if navtask_vars.data_aug == 'aug': + navtask.task_params.data_augment.structured = False + elif navtask_vars.data_aug == 'straug': + navtask.task_params.data_augment.structured = True + else: + logging.fatal('Unknown navtask_vars.data_aug %s.', navtask_vars.data_aug) + assert(False) + + navtask.task_params.num_history_frames = int(navtask_vars.history[1:]) + navtask.task_params.n_views = 1+navtask.task_params.num_history_frames + + navtask.task_params.goal_channels = int(navtask_vars.n_ori) + + if navtask_vars.task == 'hard': + navtask.task_params.type = 'rng_rejection_sampling_many' + navtask.task_params.rejection_sampling_M = 2000 + navtask.task_params.min_dist = 10 + elif navtask_vars.task == 'r2r': + navtask.task_params.type = 'room_to_room_many' + elif navtask_vars.task == 'ST': + # Semantic task at hand. + navtask.task_params.goal_channels = \ + len(navtask.task_params.semantic_task.class_map_names) + navtask.task_params.rel_goal_loc_dim = \ + len(navtask.task_params.semantic_task.class_map_names) + navtask.task_params.type = 'to_nearest_obj_acc' + else: + logging.fatal('navtask_vars.task: should be hard or r2r, ST') + assert(False) + + if navtask_vars.modality == 'rgb': + navtask.camera_param.modalities = ['rgb'] + navtask.camera_param.img_channels = 3 + elif navtask_vars.modality == 'd': + navtask.camera_param.modalities = ['depth'] + navtask.camera_param.img_channels = 2 + + navtask.task_params.img_height = navtask.camera_param.height + navtask.task_params.img_width = navtask.camera_param.width + navtask.task_params.modalities = navtask.camera_param.modalities + navtask.task_params.img_channels = navtask.camera_param.img_channels + navtask.task_params.img_fov = navtask.camera_param.fov + + navtask.dataset = factory.get_dataset(navtask_vars.dataset_name) + return navtask diff --git a/cognitive_mapping_and_planning/cfgs/config_distill.py b/cognitive_mapping_and_planning/cfgs/config_distill.py new file mode 100644 index 00000000000..a6f7985f8f0 --- /dev/null +++ b/cognitive_mapping_and_planning/cfgs/config_distill.py @@ -0,0 +1,114 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import pprint +import copy +import os +from tensorflow.python.platform import app +from tensorflow.python.platform import flags +import logging +import src.utils as utils +import cfgs.config_common as cc + + +import tensorflow as tf + +rgb_resnet_v2_50_path = 'cache/resnet_v2_50_inception_preprocessed/model.ckpt-5136169' + +def get_default_args(): + robot = utils.Foo(radius=15, base=10, height=140, sensor_height=120, + camera_elevation_degree=-15) + + camera_param = utils.Foo(width=225, height=225, z_near=0.05, z_far=20.0, + fov=60., modalities=['rgb', 'depth']) + + env = utils.Foo(padding=10, resolution=5, num_point_threshold=2, + valid_min=-10, valid_max=200, n_samples_per_face=200) + + data_augment = utils.Foo(lr_flip=0, delta_angle=1, delta_xy=4, relight=False, + relight_fast=False, structured=False) + + task_params = utils.Foo(num_actions=4, step_size=4, num_steps=0, + batch_size=32, room_seed=0, base_class='Building', + task='mapping', n_ori=6, data_augment=data_augment, + output_transform_to_global_map=False, + output_canonical_map=False, + output_incremental_transform=False, + output_free_space=False, move_type='shortest_path', + toy_problem=0) + + buildinger_args = utils.Foo(building_names=['area1_gates_wingA_floor1_westpart'], + env_class=None, robot=robot, + task_params=task_params, env=env, + camera_param=camera_param) + + solver_args = utils.Foo(seed=0, learning_rate_decay=0.1, + clip_gradient_norm=0, max_steps=120000, + initial_learning_rate=0.001, momentum=0.99, + steps_per_decay=40000, logdir=None, sync=False, + adjust_lr_sync=True, wt_decay=0.0001, + data_loss_wt=1.0, reg_loss_wt=1.0, + num_workers=1, task=0, ps_tasks=0, master='local') + + summary_args = utils.Foo(display_interval=1, test_iters=100) + + control_args = utils.Foo(train=False, test=False, + force_batchnorm_is_training_at_test=False) + + arch_args = utils.Foo(rgb_encoder='resnet_v2_50', d_encoder='resnet_v2_50') + + return utils.Foo(solver=solver_args, + summary=summary_args, control=control_args, arch=arch_args, + buildinger=buildinger_args) + +def get_vars(config_name): + vars = config_name.split('_') + if len(vars) == 1: # All data or not. + vars.append('noall') + if len(vars) == 2: # n_ori + vars.append('4') + logging.error('vars: %s', vars) + return vars + +def get_args_for_config(config_name): + args = get_default_args() + config_name, mode = config_name.split('+') + vars = get_vars(config_name) + + logging.info('config_name: %s, mode: %s', config_name, mode) + + args.buildinger.task_params.n_ori = int(vars[2]) + args.solver.freeze_conv = True + args.solver.pretrained_path = resnet_v2_50_path + args.buildinger.task_params.img_channels = 5 + args.solver.data_loss_wt = 0.00001 + + if vars[0] == 'v0': + None + else: + logging.error('config_name: %s undefined', config_name) + + args.buildinger.task_params.height = args.buildinger.camera_param.height + args.buildinger.task_params.width = args.buildinger.camera_param.width + args.buildinger.task_params.modalities = args.buildinger.camera_param.modalities + + if vars[1] == 'all': + args = cc.get_args_for_mode_building_all(args, mode) + elif vars[1] == 'noall': + args = cc.get_args_for_mode_building(args, mode) + + # Log the arguments + logging.error('%s', args) + return args diff --git a/cognitive_mapping_and_planning/cfgs/config_vision_baseline.py b/cognitive_mapping_and_planning/cfgs/config_vision_baseline.py new file mode 100644 index 00000000000..3cc64fe594a --- /dev/null +++ b/cognitive_mapping_and_planning/cfgs/config_vision_baseline.py @@ -0,0 +1,173 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import pprint +import os +import numpy as np +from tensorflow.python.platform import app +from tensorflow.python.platform import flags +import logging +import src.utils as utils +import cfgs.config_common as cc +import datasets.nav_env_config as nec + + +import tensorflow as tf + +FLAGS = flags.FLAGS + +get_solver_vars = cc.get_solver_vars +get_navtask_vars = cc.get_navtask_vars + + +rgb_resnet_v2_50_path = 'data/init_models/resnet_v2_50/model.ckpt-5136169' +d_resnet_v2_50_path = 'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002' + +def get_default_args(): + summary_args = utils.Foo(display_interval=1, test_iters=26, + arop_full_summary_iters=14) + + control_args = utils.Foo(train=False, test=False, + force_batchnorm_is_training_at_test=False, + reset_rng_seed=False, only_eval_when_done=False, + test_mode=None) + return summary_args, control_args + +def get_default_baseline_args(): + batch_norm_param = {'center': True, 'scale': True, + 'activation_fn':tf.nn.relu} + arch_args = utils.Foo( + pred_neurons=[], goal_embed_neurons=[], img_embed_neurons=[], + batch_norm_param=batch_norm_param, dim_reduce_neurons=64, combine_type='', + encoder='resnet_v2_50', action_sample_type='sample', + action_sample_combine_type='one_or_other', + sample_gt_prob_type='inverse_sigmoid_decay', dagger_sample_bn_false=True, + isd_k=750., use_visit_count=False, lstm_output=False, lstm_ego=False, + lstm_img=False, fc_dropout=0.0, embed_goal_for_state=False, + lstm_output_init_state_from_goal=False) + return arch_args + +def get_arch_vars(arch_str): + if arch_str == '': vals = [] + else: vals = arch_str.split('_') + + ks = ['ver', 'lstm_dim', 'dropout'] + + # Exp Ver + if len(vals) == 0: vals.append('v0') + # LSTM dimentsions + if len(vals) == 1: vals.append('lstm2048') + # Dropout + if len(vals) == 2: vals.append('noDO') + + assert(len(vals) == 3) + + vars = utils.Foo() + for k, v in zip(ks, vals): + setattr(vars, k, v) + + logging.error('arch_vars: %s', vars) + return vars + +def process_arch_str(args, arch_str): + # This function modifies args. + args.arch = get_default_baseline_args() + arch_vars = get_arch_vars(arch_str) + + args.navtask.task_params.outputs.rel_goal_loc = True + args.navtask.task_params.input_type = 'vision' + args.navtask.task_params.outputs.images = True + + if args.navtask.camera_param.modalities[0] == 'rgb': + args.solver.pretrained_path = rgb_resnet_v2_50_path + elif args.navtask.camera_param.modalities[0] == 'depth': + args.solver.pretrained_path = d_resnet_v2_50_path + else: + logging.fatal('Neither of rgb or d') + + if arch_vars.dropout == 'DO': + args.arch.fc_dropout = 0.5 + + args.tfcode = 'B' + + exp_ver = arch_vars.ver + if exp_ver == 'v0': + # Multiplicative interaction between goal loc and image features. + args.arch.combine_type = 'multiply' + args.arch.pred_neurons = [256, 256] + args.arch.goal_embed_neurons = [64, 8] + args.arch.img_embed_neurons = [1024, 512, 256*8] + + elif exp_ver == 'v1': + # Additive interaction between goal and image features. + args.arch.combine_type = 'add' + args.arch.pred_neurons = [256, 256] + args.arch.goal_embed_neurons = [64, 256] + args.arch.img_embed_neurons = [1024, 512, 256] + + elif exp_ver == 'v2': + # LSTM at the output on top of multiple interactions. + args.arch.combine_type = 'multiply' + args.arch.goal_embed_neurons = [64, 8] + args.arch.img_embed_neurons = [1024, 512, 256*8] + args.arch.lstm_output = True + args.arch.lstm_output_dim = int(arch_vars.lstm_dim[4:]) + args.arch.pred_neurons = [256] # The other is inside the LSTM. + + elif exp_ver == 'v0blind': + # LSTM only on the goal location. + args.arch.combine_type = 'goalonly' + args.arch.goal_embed_neurons = [64, 256] + args.arch.img_embed_neurons = [2] # I dont know what it will do otherwise. + args.arch.lstm_output = True + args.arch.lstm_output_dim = 256 + args.arch.pred_neurons = [256] # The other is inside the LSTM. + + else: + logging.fatal('exp_ver: %s undefined', exp_ver) + assert(False) + + # Log the arguments + logging.error('%s', args) + return args + +def get_args_for_config(config_name): + args = utils.Foo() + + args.summary, args.control = get_default_args() + + exp_name, mode_str = config_name.split('+') + arch_str, solver_str, navtask_str = exp_name.split('.') + logging.error('config_name: %s', config_name) + logging.error('arch_str: %s', arch_str) + logging.error('navtask_str: %s', navtask_str) + logging.error('solver_str: %s', solver_str) + logging.error('mode_str: %s', mode_str) + + args.solver = cc.process_solver_str(solver_str) + args.navtask = cc.process_navtask_str(navtask_str) + + args = process_arch_str(args, arch_str) + args.arch.isd_k = args.solver.isd_k + + # Train, test, etc. + mode, imset = mode_str.split('_') + args = cc.adjust_args_for_mode(args, mode) + args.navtask.building_names = args.navtask.dataset.get_split(imset) + args.control.test_name = '{:s}_on_{:s}'.format(mode, imset) + + # Log the arguments + logging.error('%s', args) + return args diff --git a/cognitive_mapping_and_planning/data/.gitignore b/cognitive_mapping_and_planning/data/.gitignore new file mode 100644 index 00000000000..2b6d5e46652 --- /dev/null +++ b/cognitive_mapping_and_planning/data/.gitignore @@ -0,0 +1,3 @@ +stanford_building_parser_dataset_raw +stanford_building_parser_dataset +init_models diff --git a/cognitive_mapping_and_planning/data/README.md b/cognitive_mapping_and_planning/data/README.md new file mode 100644 index 00000000000..a8928345351 --- /dev/null +++ b/cognitive_mapping_and_planning/data/README.md @@ -0,0 +1,33 @@ +This directory contains the data needed for training and benchmarking various +navigation models. + +1. Download the data from the [dataset website] + (http://buildingparser.stanford.edu/dataset.html). + 1. [Raw meshes](https://goo.gl/forms/2YSPaO2UKmn5Td5m2). We need the meshes + which are in the noXYZ folder. Download the tar files and place them in + the `stanford_building_parser_dataset_raw` folder. You need to download + `area_1_noXYZ.tar`, `area_3_noXYZ.tar`, `area_5a_noXYZ.tar`, + `area_5b_noXYZ.tar`, `area_6_noXYZ.tar` for training and + `area_4_noXYZ.tar` for evaluation. + 2. [Annotations](https://goo.gl/forms/4SoGp4KtH1jfRqEj2) for setting up + tasks. We will need the file called `Stanford3dDataset_v1.2.zip`. Place + the file in the directory `stanford_building_parser_dataset_raw`. + +2. Preprocess the data. + 1. Extract meshes using `scripts/script_preprocess_meshes_S3DIS.sh`. After + this `ls data/stanford_building_parser_dataset/mesh` should have 6 + folders `area1`, `area3`, `area4`, `area5a`, `area5b`, `area6`, with + textures and obj files within each directory. + 2. Extract out room information and semantics from zip file using + `scripts/script_preprocess_annoations_S3DIS.sh`. After this there should + be `room-dimension` and `class-maps` folder in + `data/stanford_building_parser_dataset`. (If you find this script to + crash because of an exception in np.loadtxt while processing + `Area_5/office_19/Annotations/ceiling_1.txt`, there is a special + character on line 323474, that should be removed manually.) + +3. Download ImageNet Pre-trained models. We used ResNet-v2-50 for representing + images. For RGB images this is pre-trained on ImageNet. For Depth images we + [distill](https://arxiv.org/abs/1507.00448) the RGB model to depth images + using paired RGB-D images. Both there models are available through + `scripts/script_download_init_models.sh` diff --git a/cognitive_mapping_and_planning/datasets/__init__.py b/cognitive_mapping_and_planning/datasets/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cognitive_mapping_and_planning/datasets/factory.py b/cognitive_mapping_and_planning/datasets/factory.py new file mode 100644 index 00000000000..3f7b5c0a602 --- /dev/null +++ b/cognitive_mapping_and_planning/datasets/factory.py @@ -0,0 +1,113 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r"""Wrapper for selecting the navigation environment that we want to train and +test on. +""" +import numpy as np +import os, glob +import platform + +import logging +from tensorflow.python.platform import app +from tensorflow.python.platform import flags + +import render.swiftshader_renderer as renderer +import src.file_utils as fu +import src.utils as utils + +def get_dataset(dataset_name): + if dataset_name == 'sbpd': + dataset = StanfordBuildingParserDataset(dataset_name) + else: + logging.fatal('Not one of sbpd') + return dataset + +class Loader(): + def get_data_dir(): + pass + + def get_meta_data(self, file_name, data_dir=None): + if data_dir is None: + data_dir = self.get_data_dir() + full_file_name = os.path.join(data_dir, 'meta', file_name) + assert(fu.exists(full_file_name)), \ + '{:s} does not exist'.format(full_file_name) + ext = os.path.splitext(full_file_name)[1] + if ext == '.txt': + ls = [] + with fu.fopen(full_file_name, 'r') as f: + for l in f: + ls.append(l.rstrip()) + elif ext == '.pkl': + ls = utils.load_variables(full_file_name) + return ls + + def load_building(self, name, data_dir=None): + if data_dir is None: + data_dir = self.get_data_dir() + out = {} + out['name'] = name + out['data_dir'] = data_dir + out['room_dimension_file'] = os.path.join(data_dir, 'room-dimension', + name+'.pkl') + out['class_map_folder'] = os.path.join(data_dir, 'class-maps') + return out + + def load_building_meshes(self, building): + dir_name = os.path.join(building['data_dir'], 'mesh', building['name']) + mesh_file_name = glob.glob1(dir_name, '*.obj')[0] + mesh_file_name_full = os.path.join(dir_name, mesh_file_name) + logging.error('Loading building from obj file: %s', mesh_file_name_full) + shape = renderer.Shape(mesh_file_name_full, load_materials=True, + name_prefix=building['name']+'_') + return [shape] + +class StanfordBuildingParserDataset(Loader): + def __init__(self, ver): + self.ver = ver + self.data_dir = None + + def get_data_dir(self): + if self.data_dir is None: + self.data_dir = 'data/stanford_building_parser_dataset/' + return self.data_dir + + def get_benchmark_sets(self): + return self._get_benchmark_sets() + + def get_split(self, split_name): + if self.ver == 'sbpd': + return self._get_split(split_name) + else: + logging.fatal('Unknown version.') + + def _get_benchmark_sets(self): + sets = ['train1', 'val', 'test'] + return sets + + def _get_split(self, split_name): + train = ['area1', 'area5a', 'area5b', 'area6'] + train1 = ['area1'] + val = ['area3'] + test = ['area4'] + + sets = {} + sets['train'] = train + sets['train1'] = train1 + sets['val'] = val + sets['test'] = test + sets['all'] = sorted(list(set(train + val + test))) + return sets[split_name] diff --git a/cognitive_mapping_and_planning/datasets/nav_env.py b/cognitive_mapping_and_planning/datasets/nav_env.py new file mode 100644 index 00000000000..5710e26dcb1 --- /dev/null +++ b/cognitive_mapping_and_planning/datasets/nav_env.py @@ -0,0 +1,1465 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r"""Navidation Environment. Includes the following classes along with some +helper functions. + Building: Loads buildings, computes traversibility, exposes functionality for + rendering images. + + GridWorld: Base class which implements functionality for moving an agent on a + grid world. + + NavigationEnv: Base class which generates navigation problems on a grid world. + + VisualNavigationEnv: Builds upon NavigationEnv and Building to provide + interface that is used externally to train the agent. + + MeshMapper: Class used for distilling the model, testing the mapper. + + BuildingMultiplexer: Wrapper class that instantiates a VisualNavigationEnv for + each building and multiplexes between them as needed. +""" + +import numpy as np +import os +import re +import matplotlib.pyplot as plt + +import graph_tool as gt +import graph_tool.topology + +from tensorflow.python.platform import gfile +import logging +import src.file_utils as fu +import src.utils as utils +import src.graph_utils as gu +import src.map_utils as mu +import src.depth_utils as du +import render.swiftshader_renderer as sru +from render.swiftshader_renderer import SwiftshaderRenderer +import cv2 + +label_nodes_with_class = gu.label_nodes_with_class +label_nodes_with_class_geodesic = gu.label_nodes_with_class_geodesic +get_distance_node_list = gu.get_distance_node_list +convert_to_graph_tool = gu.convert_to_graph_tool +generate_graph = gu.generate_graph +get_hardness_distribution = gu.get_hardness_distribution +rng_next_goal_rejection_sampling = gu.rng_next_goal_rejection_sampling +rng_next_goal = gu.rng_next_goal +rng_room_to_room = gu.rng_room_to_room +rng_target_dist_field = gu.rng_target_dist_field + +compute_traversibility = mu.compute_traversibility +make_map = mu.make_map +resize_maps = mu.resize_maps +pick_largest_cc = mu.pick_largest_cc +get_graph_origin_loc = mu.get_graph_origin_loc +generate_egocentric_maps = mu.generate_egocentric_maps +generate_goal_images = mu.generate_goal_images +get_map_to_predict = mu.get_map_to_predict + +bin_points = du.bin_points +make_geocentric = du.make_geocentric +get_point_cloud_from_z = du.get_point_cloud_from_z +get_camera_matrix = du.get_camera_matrix + +def _get_semantic_maps(folder_name, building_name, map, flip): + # Load file from the cache. + file_name = '{:s}_{:d}_{:d}_{:d}_{:d}_{:d}_{:d}.pkl' + file_name = file_name.format(building_name, map.size[0], map.size[1], + map.origin[0], map.origin[1], map.resolution, + flip) + file_name = os.path.join(folder_name, file_name) + logging.info('Loading semantic maps from %s.', file_name) + + if fu.exists(file_name): + a = utils.load_variables(file_name) + maps = a['maps'] #HxWx#C + cats = a['cats'] + else: + logging.error('file_name: %s not found.', file_name) + maps = None + cats = None + return maps, cats + +def _select_classes(all_maps, all_cats, cats_to_use): + inds = [] + for c in cats_to_use: + ind = all_cats.index(c) + inds.append(ind) + out_maps = all_maps[:,:,inds] + return out_maps + +def _get_room_dimensions(file_name, resolution, origin, flip=False): + if fu.exists(file_name): + a = utils.load_variables(file_name)['room_dimension'] + names = a.keys() + dims = np.concatenate(a.values(), axis=0).reshape((-1,6)) + ind = np.argsort(names) + dims = dims[ind,:] + names = [names[x] for x in ind] + if flip: + dims_new = dims*1 + dims_new[:,1] = -dims[:,4] + dims_new[:,4] = -dims[:,1] + dims = dims_new*1 + + dims = dims*100. + dims[:,0] = dims[:,0] - origin[0] + dims[:,1] = dims[:,1] - origin[1] + dims[:,3] = dims[:,3] - origin[0] + dims[:,4] = dims[:,4] - origin[1] + dims = dims / resolution + out = {'names': names, 'dims': dims} + else: + out = None + return out + +def _filter_rooms(room_dims, room_regex): + pattern = re.compile(room_regex) + ind = [] + for i, name in enumerate(room_dims['names']): + if pattern.match(name): + ind.append(i) + new_room_dims = {} + new_room_dims['names'] = [room_dims['names'][i] for i in ind] + new_room_dims['dims'] = room_dims['dims'][ind,:]*1 + return new_room_dims + +def _label_nodes_with_room_id(xyt, room_dims): + # Label the room with the ID into things. + node_room_id = -1*np.ones((xyt.shape[0], 1)) + dims = room_dims['dims'] + for x, name in enumerate(room_dims['names']): + all_ = np.concatenate((xyt[:,[0]] >= dims[x,0], + xyt[:,[0]] <= dims[x,3], + xyt[:,[1]] >= dims[x,1], + xyt[:,[1]] <= dims[x,4]), axis=1) + node_room_id[np.all(all_, axis=1), 0] = x + return node_room_id + +def get_path_ids(start_node_id, end_node_id, pred_map): + id = start_node_id + path = [id] + while id != end_node_id: + id = pred_map[id] + path.append(id) + return path + +def image_pre(images, modalities): + # Assumes images are ...xHxWxC. + # We always assume images are RGB followed by Depth. + if 'depth' in modalities: + d = images[...,-1][...,np.newaxis]*1. + d[d < 0.01] = np.NaN; isnan = np.isnan(d); + d = 100./d; d[isnan] = 0.; + images = np.concatenate((images[...,:-1], d, isnan), axis=images.ndim-1) + if 'rgb' in modalities: + images[...,:3] = images[...,:3]*1. - 128 + return images + +def _get_relative_goal_loc(goal_loc, loc, theta): + r = np.sqrt(np.sum(np.square(goal_loc - loc), axis=1)) + t = np.arctan2(goal_loc[:,1] - loc[:,1], goal_loc[:,0] - loc[:,0]) + t = t-theta[:,0] + np.pi/2 + return np.expand_dims(r,axis=1), np.expand_dims(t, axis=1) + +def _gen_perturbs(rng, batch_size, num_steps, lr_flip, delta_angle, delta_xy, + structured): + perturbs = [] + for i in range(batch_size): + # Doing things one by one for each episode in this batch. This way this + # remains replicatable even when we change the batch size. + p = np.zeros((num_steps+1, 4)) + if lr_flip: + # Flip the whole trajectory. + p[:,3] = rng.rand(1)-0.5 + if delta_angle > 0: + if structured: + p[:,2] = (rng.rand(1)-0.5)* delta_angle + else: + p[:,2] = (rng.rand(p.shape[0])-0.5)* delta_angle + if delta_xy > 0: + if structured: + p[:,:2] = (rng.rand(1, 2)-0.5)*delta_xy + else: + p[:,:2] = (rng.rand(p.shape[0], 2)-0.5)*delta_xy + perturbs.append(p) + return perturbs + +def get_multiplexer_class(args, task_number): + assert(args.task_params.base_class == 'Building') + logging.info('Returning BuildingMultiplexer') + R = BuildingMultiplexer(args, task_number) + return R + +class GridWorld(): + def __init__(self): + """Class members that will be assigned by any class that actually uses this + class.""" + self.restrict_to_largest_cc = None + self.robot = None + self.env = None + self.category_list = None + self.traversible = None + + def get_loc_axis(self, node, delta_theta, perturb=None): + """Based on the node orientation returns X, and Y axis. Used to sample the + map in egocentric coordinate frame. + """ + if type(node) == tuple: + node = np.array([node]) + if perturb is None: + perturb = np.zeros((node.shape[0], 4)) + xyt = self.to_actual_xyt_vec(node) + x = xyt[:,[0]] + perturb[:,[0]] + y = xyt[:,[1]] + perturb[:,[1]] + t = xyt[:,[2]] + perturb[:,[2]] + theta = t*delta_theta + loc = np.concatenate((x,y), axis=1) + x_axis = np.concatenate((np.cos(theta), np.sin(theta)), axis=1) + y_axis = np.concatenate((np.cos(theta+np.pi/2.), np.sin(theta+np.pi/2.)), + axis=1) + # Flip the sampled map where need be. + y_axis[np.where(perturb[:,3] > 0)[0], :] *= -1. + return loc, x_axis, y_axis, theta + + def to_actual_xyt(self, pqr): + """Converts from node to location on the map.""" + (p, q, r) = pqr + if self.task.n_ori == 6: + out = (p - q * 0.5 + self.task.origin_loc[0], + q * np.sqrt(3.) / 2. + self.task.origin_loc[1], r) + elif self.task.n_ori == 4: + out = (p + self.task.origin_loc[0], + q + self.task.origin_loc[1], r) + return out + + def to_actual_xyt_vec(self, pqr): + """Converts from node array to location array on the map.""" + p = pqr[:,0][:, np.newaxis] + q = pqr[:,1][:, np.newaxis] + r = pqr[:,2][:, np.newaxis] + if self.task.n_ori == 6: + out = np.concatenate((p - q * 0.5 + self.task.origin_loc[0], + q * np.sqrt(3.) / 2. + self.task.origin_loc[1], + r), axis=1) + elif self.task.n_ori == 4: + out = np.concatenate((p + self.task.origin_loc[0], + q + self.task.origin_loc[1], + r), axis=1) + return out + + def raw_valid_fn_vec(self, xyt): + """Returns if the given set of nodes is valid or not.""" + height = self.traversible.shape[0] + width = self.traversible.shape[1] + x = np.round(xyt[:,[0]]).astype(np.int32) + y = np.round(xyt[:,[1]]).astype(np.int32) + is_inside = np.all(np.concatenate((x >= 0, y >= 0, + x < width, y < height), axis=1), axis=1) + x = np.minimum(np.maximum(x, 0), width-1) + y = np.minimum(np.maximum(y, 0), height-1) + ind = np.ravel_multi_index((y,x), self.traversible.shape) + is_traversible = self.traversible.ravel()[ind] + + is_valid = np.all(np.concatenate((is_inside[:,np.newaxis], is_traversible), + axis=1), axis=1) + return is_valid + + + def valid_fn_vec(self, pqr): + """Returns if the given set of nodes is valid or not.""" + xyt = self.to_actual_xyt_vec(np.array(pqr)) + height = self.traversible.shape[0] + width = self.traversible.shape[1] + x = np.round(xyt[:,[0]]).astype(np.int32) + y = np.round(xyt[:,[1]]).astype(np.int32) + is_inside = np.all(np.concatenate((x >= 0, y >= 0, + x < width, y < height), axis=1), axis=1) + x = np.minimum(np.maximum(x, 0), width-1) + y = np.minimum(np.maximum(y, 0), height-1) + ind = np.ravel_multi_index((y,x), self.traversible.shape) + is_traversible = self.traversible.ravel()[ind] + + is_valid = np.all(np.concatenate((is_inside[:,np.newaxis], is_traversible), + axis=1), axis=1) + return is_valid + + def get_feasible_actions(self, node_ids): + """Returns the feasible set of actions from the current node.""" + a = np.zeros((len(node_ids), self.task_params.num_actions), dtype=np.int32) + gtG = self.task.gtG + next_node = [] + for i, c in enumerate(node_ids): + neigh = gtG.vertex(c).out_neighbours() + neigh_edge = gtG.vertex(c).out_edges() + nn = {} + for n, e in zip(neigh, neigh_edge): + _ = gtG.ep['action'][e] + a[i,_] = 1 + nn[_] = int(n) + next_node.append(nn) + return a, next_node + + def take_action(self, current_node_ids, action): + """Returns the new node after taking the action action. Stays at the current + node if the action is invalid.""" + actions, next_node_ids = self.get_feasible_actions(current_node_ids) + new_node_ids = [] + for i, (c,a) in enumerate(zip(current_node_ids, action)): + if actions[i,a] == 1: + new_node_ids.append(next_node_ids[i][a]) + else: + new_node_ids.append(c) + return new_node_ids + + def set_r_obj(self, r_obj): + """Sets the SwiftshaderRenderer object used for rendering.""" + self.r_obj = r_obj + +class Building(GridWorld): + def __init__(self, building_name, robot, env, + category_list=None, small=False, flip=False, logdir=None, + building_loader=None): + + self.restrict_to_largest_cc = True + self.robot = robot + self.env = env + self.logdir = logdir + + # Load the building meta data. + building = building_loader.load_building(building_name) + if small: + building['mesh_names'] = building['mesh_names'][:5] + + # New code. + shapess = building_loader.load_building_meshes(building) + if flip: + for shapes in shapess: + shapes.flip_shape() + + vs = [] + for shapes in shapess: + vs.append(shapes.get_vertices()[0]) + vs = np.concatenate(vs, axis=0) + map = make_map(env.padding, env.resolution, vertex=vs, sc=100.) + map = compute_traversibility( + map, robot.base, robot.height, robot.radius, env.valid_min, + env.valid_max, env.num_point_threshold, shapess=shapess, sc=100., + n_samples_per_face=env.n_samples_per_face) + + room_dims = _get_room_dimensions(building['room_dimension_file'], + env.resolution, map.origin, flip=flip) + class_maps, class_map_names = _get_semantic_maps( + building['class_map_folder'], building_name, map, flip) + + self.class_maps = class_maps + self.class_map_names = class_map_names + self.building = building + self.shapess = shapess + self.map = map + self.traversible = map.traversible*1 + self.building_name = building_name + self.room_dims = room_dims + self.flipped = flip + self.renderer_entitiy_ids = [] + + if self.restrict_to_largest_cc: + self.traversible = pick_largest_cc(self.traversible) + + def load_building_into_scene(self): + # Loads the scene. + self.renderer_entitiy_ids += self.r_obj.load_shapes(self.shapess) + # Free up memory, we dont need the mesh or the materials anymore. + self.shapess = None + + def add_entity_at_nodes(self, nodes, height, shape): + xyt = self.to_actual_xyt_vec(nodes) + nxy = xyt[:,:2]*1. + nxy = nxy * self.map.resolution + nxy = nxy + self.map.origin + Ts = np.concatenate((nxy, nxy[:,:1]), axis=1) + Ts[:,2] = height; Ts = Ts / 100.; + + # Merge all the shapes into a single shape and add that shape. + shape.replicate_shape(Ts) + entity_ids = self.r_obj.load_shapes([shape]) + self.renderer_entitiy_ids += entity_ids + return entity_ids + + def add_shapes(self, shapes): + scene = self.r_obj.viz.scene() + for shape in shapes: + scene.AddShape(shape) + + def add_materials(self, materials): + scene = self.r_obj.viz.scene() + for material in materials: + scene.AddOrUpdateMaterial(material) + + def set_building_visibility(self, visibility): + self.r_obj.set_entity_visible(self.renderer_entitiy_ids, visibility) + + def render_nodes(self, nodes, perturb=None, aux_delta_theta=0.): + self.set_building_visibility(True) + if perturb is None: + perturb = np.zeros((len(nodes), 4)) + + imgs = [] + r = 2 + elevation_z = r * np.tan(np.deg2rad(self.robot.camera_elevation_degree)) + + for i in range(len(nodes)): + xyt = self.to_actual_xyt(nodes[i]) + lookat_theta = 3.0 * np.pi / 2.0 - (xyt[2]+perturb[i,2]+aux_delta_theta) * (self.task.delta_theta) + nxy = np.array([xyt[0]+perturb[i,0], xyt[1]+perturb[i,1]]).reshape(1, -1) + nxy = nxy * self.map.resolution + nxy = nxy + self.map.origin + camera_xyz = np.zeros((1, 3)) + camera_xyz[...] = [nxy[0, 0], nxy[0, 1], self.robot.sensor_height] + camera_xyz = camera_xyz / 100. + lookat_xyz = np.array([-r * np.sin(lookat_theta), + -r * np.cos(lookat_theta), elevation_z]) + lookat_xyz = lookat_xyz + camera_xyz[0, :] + self.r_obj.position_camera(camera_xyz[0, :].tolist(), + lookat_xyz.tolist(), [0.0, 0.0, 1.0]) + img = self.r_obj.render(take_screenshot=True, output_type=0) + img = [x for x in img if x is not None] + img = np.concatenate(img, axis=2).astype(np.float32) + if perturb[i,3]>0: + img = img[:,::-1,:] + imgs.append(img) + + self.set_building_visibility(False) + return imgs + + +class MeshMapper(Building): + def __init__(self, robot, env, task_params, building_name, category_list, + flip, logdir=None, building_loader=None): + Building.__init__(self, building_name, robot, env, category_list, + small=task_params.toy_problem, flip=flip, logdir=logdir, + building_loader=building_loader) + self.task_params = task_params + self.task = None + self._preprocess_for_task(self.task_params.building_seed) + + def _preprocess_for_task(self, seed): + if self.task is None or self.task.seed != seed: + rng = np.random.RandomState(seed) + origin_loc = get_graph_origin_loc(rng, self.traversible) + self.task = utils.Foo(seed=seed, origin_loc=origin_loc, + n_ori=self.task_params.n_ori) + G = generate_graph(self.valid_fn_vec, + self.task_params.step_size, self.task.n_ori, + (0, 0, 0)) + gtG, nodes, nodes_to_id = convert_to_graph_tool(G) + self.task.gtG = gtG + self.task.nodes = nodes + self.task.delta_theta = 2.0*np.pi/(self.task.n_ori*1.) + self.task.nodes_to_id = nodes_to_id + logging.info('Building %s, #V=%d, #E=%d', self.building_name, + self.task.nodes.shape[0], self.task.gtG.num_edges()) + + if self.logdir is not None: + write_traversible = cv2.applyColorMap(self.traversible.astype(np.uint8)*255, cv2.COLORMAP_JET) + img_path = os.path.join(self.logdir, + '{:s}_{:d}_graph.png'.format(self.building_name, + seed)) + node_xyt = self.to_actual_xyt_vec(self.task.nodes) + plt.set_cmap('jet'); + fig, ax = utils.subplot(plt, (1,1), (12,12)) + ax.plot(node_xyt[:,0], node_xyt[:,1], 'm.') + ax.imshow(self.traversible, origin='lower'); + ax.set_axis_off(); ax.axis('equal'); + ax.set_title('{:s}, {:d}, {:d}'.format(self.building_name, + self.task.nodes.shape[0], + self.task.gtG.num_edges())) + if self.room_dims is not None: + for i, r in enumerate(self.room_dims['dims']*1): + min_ = r[:3]*1 + max_ = r[3:]*1 + xmin, ymin, zmin = min_ + xmax, ymax, zmax = max_ + + ax.plot([xmin, xmax, xmax, xmin, xmin], + [ymin, ymin, ymax, ymax, ymin], 'g') + with fu.fopen(img_path, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + plt.close(fig) + + + def _gen_rng(self, rng): + # instances is a list of list of node_ids. + if self.task_params.move_type == 'circle': + _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size, + self.task.gtG, rng, 0, 1, + compute_path=True) + instances_ = paths + + instances = [] + for instance_ in instances_: + instance = instance_ + for i in range(self.task_params.num_steps): + instance.append(self.take_action([instance[-1]], [1])[0]) + instances.append(instance) + + elif self.task_params.move_type == 'shortest_path': + _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size, + self.task.gtG, rng, + self.task_params.num_steps, + self.task_params.num_steps+1, + compute_path=True) + instances = paths + + elif self.task_params.move_type == 'circle+forward': + _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size, + self.task.gtG, rng, 0, 1, + compute_path=True) + instances_ = paths + instances = [] + for instance_ in instances_: + instance = instance_ + for i in range(self.task_params.n_ori-1): + instance.append(self.take_action([instance[-1]], [1])[0]) + while len(instance) <= self.task_params.num_steps: + while self.take_action([instance[-1]], [3])[0] == instance[-1] and len(instance) <= self.task_params.num_steps: + instance.append(self.take_action([instance[-1]], [2])[0]) + if len(instance) <= self.task_params.num_steps: + instance.append(self.take_action([instance[-1]], [3])[0]) + instances.append(instance) + + # Do random perturbation if needed. + perturbs = _gen_perturbs(rng, self.task_params.batch_size, + self.task_params.num_steps, + self.task_params.data_augment.lr_flip, + self.task_params.data_augment.delta_angle, + self.task_params.data_augment.delta_xy, + self.task_params.data_augment.structured) + return instances, perturbs + + def worker(self, instances, perturbs): + # Output the images and the free space. + + # Make the instances be all the same length. + for i in range(len(instances)): + for j in range(self.task_params.num_steps - len(instances[i]) + 1): + instances[i].append(instances[i][-1]) + if perturbs[i].shape[0] < self.task_params.num_steps+1: + p = np.zeros((self.task_params.num_steps+1, 4)) + p[:perturbs[i].shape[0], :] = perturbs[i] + p[perturbs[i].shape[0]:, :] = perturbs[i][-1,:] + perturbs[i] = p + + instances_ = [] + for instance in instances: + instances_ = instances_ + instance + perturbs_ = np.concatenate(perturbs, axis=0) + + instances_nodes = self.task.nodes[instances_,:] + instances_nodes = [tuple(x) for x in instances_nodes] + + imgs_ = self.render_nodes(instances_nodes, perturbs_) + imgs = []; next = 0; + for instance in instances: + img_i = [] + for _ in instance: + img_i.append(imgs_[next]) + next = next+1 + imgs.append(img_i) + imgs = np.array(imgs) + + # Render out the maps in the egocentric view for all nodes and not just the + # last node. + all_nodes = [] + for x in instances: + all_nodes = all_nodes + x + all_perturbs = np.concatenate(perturbs, axis=0) + loc, x_axis, y_axis, theta = self.get_loc_axis( + self.task.nodes[all_nodes, :]*1, delta_theta=self.task.delta_theta, + perturb=all_perturbs) + fss = None + valids = None + loc_on_map = None + theta_on_map = None + cum_fs = None + cum_valid = None + incremental_locs = None + incremental_thetas = None + + if self.task_params.output_free_space: + fss, valids = get_map_to_predict(loc, x_axis, y_axis, + map=self.traversible*1., + map_size=self.task_params.map_size) + fss = np.array(fss) > 0.5 + fss = np.reshape(fss, [self.task_params.batch_size, + self.task_params.num_steps+1, + self.task_params.map_size, + self.task_params.map_size]) + valids = np.reshape(np.array(valids), fss.shape) + + if self.task_params.output_transform_to_global_map: + # Output the transform to the global map. + loc_on_map = np.reshape(loc*1, [self.task_params.batch_size, + self.task_params.num_steps+1, -1]) + # Converting to location wrt to first location so that warping happens + # properly. + theta_on_map = np.reshape(theta*1, [self.task_params.batch_size, + self.task_params.num_steps+1, -1]) + + if self.task_params.output_incremental_transform: + # Output the transform to the global map. + incremental_locs_ = np.reshape(loc*1, [self.task_params.batch_size, + self.task_params.num_steps+1, -1]) + incremental_locs_[:,1:,:] -= incremental_locs_[:,:-1,:] + t0 = -np.pi/2+np.reshape(theta*1, [self.task_params.batch_size, + self.task_params.num_steps+1, -1]) + t = t0*1 + incremental_locs = incremental_locs_*1 + incremental_locs[:,:,0] = np.sum(incremental_locs_ * np.concatenate((np.cos(t), np.sin(t)), axis=-1), axis=-1) + incremental_locs[:,:,1] = np.sum(incremental_locs_ * np.concatenate((np.cos(t+np.pi/2), np.sin(t+np.pi/2)), axis=-1), axis=-1) + incremental_locs[:,0,:] = incremental_locs_[:,0,:] + # print incremental_locs_[0,:,:], incremental_locs[0,:,:], t0[0,:,:] + + incremental_thetas = np.reshape(theta*1, [self.task_params.batch_size, + self.task_params.num_steps+1, + -1]) + incremental_thetas[:,1:,:] += -incremental_thetas[:,:-1,:] + + if self.task_params.output_canonical_map: + loc_ = loc[0::(self.task_params.num_steps+1), :] + x_axis = np.zeros_like(loc_); x_axis[:,1] = 1 + y_axis = np.zeros_like(loc_); y_axis[:,0] = -1 + cum_fs, cum_valid = get_map_to_predict(loc_, x_axis, y_axis, + map=self.traversible*1., + map_size=self.task_params.map_size) + cum_fs = np.array(cum_fs) > 0.5 + cum_fs = np.reshape(cum_fs, [self.task_params.batch_size, 1, + self.task_params.map_size, + self.task_params.map_size]) + cum_valid = np.reshape(np.array(cum_valid), cum_fs.shape) + + + inputs = {'fs_maps': fss, + 'valid_maps': valids, + 'imgs': imgs, + 'loc_on_map': loc_on_map, + 'theta_on_map': theta_on_map, + 'cum_fs_maps': cum_fs, + 'cum_valid_maps': cum_valid, + 'incremental_thetas': incremental_thetas, + 'incremental_locs': incremental_locs} + return inputs + + def pre(self, inputs): + inputs['imgs'] = image_pre(inputs['imgs'], self.task_params.modalities) + if inputs['loc_on_map'] is not None: + inputs['loc_on_map'] = inputs['loc_on_map'] - inputs['loc_on_map'][:,[0],:] + if inputs['theta_on_map'] is not None: + inputs['theta_on_map'] = np.pi/2. - inputs['theta_on_map'] + return inputs + +def _nav_env_reset_helper(type, rng, nodes, batch_size, gtG, max_dist, + num_steps, num_goals, data_augment, **kwargs): + """Generates and returns a new episode.""" + max_compute = max_dist + 4*num_steps + if type == 'general': + start_node_ids, end_node_ids, dist, pred_map, paths = \ + rng_target_dist_field(batch_size, gtG, rng, max_dist, max_compute, + nodes=nodes, compute_path=False) + target_class = None + + elif type == 'room_to_room_many': + goal_node_ids = []; dists = []; + node_room_ids = kwargs['node_room_ids'] + # Sample the first one + start_node_ids_, end_node_ids_, dist_, _, _ = rng_room_to_room( + batch_size, gtG, rng, max_dist, max_compute, + node_room_ids=node_room_ids, nodes=nodes) + start_node_ids = start_node_ids_ + goal_node_ids.append(end_node_ids_) + dists.append(dist_) + for n in range(num_goals-1): + start_node_ids_, end_node_ids_, dist_, _, _ = rng_next_goal( + goal_node_ids[n], batch_size, gtG, rng, max_dist, + max_compute, node_room_ids=node_room_ids, nodes=nodes, + dists_from_start_node=dists[n]) + goal_node_ids.append(end_node_ids_) + dists.append(dist_) + target_class = None + + elif type == 'rng_rejection_sampling_many': + num_goals = num_goals + goal_node_ids = []; dists = []; + + n_ori = kwargs['n_ori'] + step_size = kwargs['step_size'] + min_dist = kwargs['min_dist'] + sampling_distribution = kwargs['sampling_distribution'] + target_distribution = kwargs['target_distribution'] + rejection_sampling_M = kwargs['rejection_sampling_M'] + distribution_bins = kwargs['distribution_bins'] + + for n in range(num_goals): + if n == 0: input_nodes = None + else: input_nodes = goal_node_ids[n-1] + start_node_ids_, end_node_ids_, dist_, _, _, _, _ = rng_next_goal_rejection_sampling( + input_nodes, batch_size, gtG, rng, max_dist, min_dist, + max_compute, sampling_distribution, target_distribution, nodes, + n_ori, step_size, distribution_bins, rejection_sampling_M) + if n == 0: start_node_ids = start_node_ids_ + goal_node_ids.append(end_node_ids_) + dists.append(dist_) + target_class = None + + elif type == 'room_to_room_back': + num_goals = num_goals + assert(num_goals == 2), 'num_goals must be 2.' + goal_node_ids = []; dists = []; + node_room_ids = kwargs['node_room_ids'] + # Sample the first one. + start_node_ids_, end_node_ids_, dist_, _, _ = rng_room_to_room( + batch_size, gtG, rng, max_dist, max_compute, + node_room_ids=node_room_ids, nodes=nodes) + start_node_ids = start_node_ids_ + goal_node_ids.append(end_node_ids_) + dists.append(dist_) + + # Set second goal to be starting position, and compute distance to the start node. + goal_node_ids.append(start_node_ids) + dist = [] + for i in range(batch_size): + dist_ = gt.topology.shortest_distance( + gt.GraphView(gtG, reversed=True), + source=gtG.vertex(start_node_ids[i]), target=None) + dist_ = np.array(dist_.get_array()) + dist.append(dist_) + dists.append(dist) + target_class = None + + elif type[:14] == 'to_nearest_obj': + # Generate an episode by sampling one of the target classes (with + # probability proportional to the number of nodes in the world). + # With the sampled class sample a node that is within some distance from + # the sampled class. + class_nodes = kwargs['class_nodes'] + sampling = kwargs['sampling'] + dist_to_class = kwargs['dist_to_class'] + + assert(num_goals == 1), 'Only supports a single goal.' + ind = rng.choice(class_nodes.shape[0], size=batch_size) + target_class = class_nodes[ind,1] + start_node_ids = []; dists = []; goal_node_ids = []; + + for t in target_class: + if sampling == 'uniform': + max_dist = max_dist + cnts = np.bincount(dist_to_class[t], minlength=max_dist+1)*1. + cnts[max_dist+1:] = 0 + p_each = 1./ cnts / (max_dist+1.) + p_each[cnts == 0] = 0 + p = p_each[dist_to_class[t]]*1.; p = p/np.sum(p) + start_node_id = rng.choice(p.shape[0], size=1, p=p)[0] + else: + logging.fatal('Sampling not one of uniform.') + start_node_ids.append(start_node_id) + dists.append(dist_to_class[t]) + # Dummy goal node, same as the start node, so that vis is better. + goal_node_ids.append(start_node_id) + dists = [dists] + goal_node_ids = [goal_node_ids] + + return start_node_ids, goal_node_ids, dists, target_class + + +class NavigationEnv(GridWorld, Building): + """Wrapper around GridWorld which sets up navigation tasks. + """ + def _debug_save_hardness(self, seed): + out_path = os.path.join(self.logdir, '{:s}_{:d}_hardness.png'.format(self.building_name, seed)) + batch_size = 4000 + rng = np.random.RandomState(0) + start_node_ids, end_node_ids, dists, pred_maps, paths, hardnesss, gt_dists = \ + rng_next_goal_rejection_sampling( + None, batch_size, self.task.gtG, rng, self.task_params.max_dist, + self.task_params.min_dist, self.task_params.max_dist, + self.task.sampling_distribution, self.task.target_distribution, + self.task.nodes, self.task_params.n_ori, self.task_params.step_size, + self.task.distribution_bins, self.task.rejection_sampling_M) + bins = self.task.distribution_bins + n_bins = self.task.n_bins + with plt.style.context('ggplot'): + fig, axes = utils.subplot(plt, (1,2), (10,10)) + ax = axes[0] + _ = ax.hist(hardnesss, bins=bins, weights=np.ones_like(hardnesss)/len(hardnesss)) + ax.plot(bins[:-1]+0.5/n_bins, self.task.target_distribution, 'g') + ax.plot(bins[:-1]+0.5/n_bins, self.task.sampling_distribution, 'b') + ax.grid('on') + + ax = axes[1] + _ = ax.hist(gt_dists, bins=np.arange(self.task_params.max_dist+1)) + ax.grid('on') + ax.set_title('Mean: {:0.2f}, Median: {:0.2f}'.format(np.mean(gt_dists), + np.median(gt_dists))) + with fu.fopen(out_path, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + + def _debug_save_map_nodes(self, seed): + """Saves traversible space along with nodes generated on the graph. Takes + the seed as input.""" + img_path = os.path.join(self.logdir, '{:s}_{:d}_graph.png'.format(self.building_name, seed)) + node_xyt = self.to_actual_xyt_vec(self.task.nodes) + plt.set_cmap('jet'); + fig, ax = utils.subplot(plt, (1,1), (12,12)) + ax.plot(node_xyt[:,0], node_xyt[:,1], 'm.') + ax.set_axis_off(); ax.axis('equal'); + + if self.room_dims is not None: + for i, r in enumerate(self.room_dims['dims']*1): + min_ = r[:3]*1 + max_ = r[3:]*1 + xmin, ymin, zmin = min_ + xmax, ymax, zmax = max_ + + ax.plot([xmin, xmax, xmax, xmin, xmin], + [ymin, ymin, ymax, ymax, ymin], 'g') + ax.imshow(self.traversible, origin='lower'); + with fu.fopen(img_path, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + + def _debug_semantic_maps(self, seed): + """Saves traversible space along with nodes generated on the graph. Takes + the seed as input.""" + for i, cls in enumerate(self.task_params.semantic_task.class_map_names): + img_path = os.path.join(self.logdir, '{:s}_flip{:d}_{:s}_graph.png'.format(self.building_name, seed, cls)) + maps = self.traversible*1. + maps += 0.5*(self.task.class_maps_dilated[:,:,i]) + write_traversible = (maps*1.+1.)/3.0 + write_traversible = (write_traversible*255.).astype(np.uint8)[:,:,np.newaxis] + write_traversible = write_traversible + np.zeros((1,1,3), dtype=np.uint8) + fu.write_image(img_path, write_traversible[::-1,:,:]) + + def _preprocess_for_task(self, seed): + """Sets up the task field for doing navigation on the grid world.""" + if self.task is None or self.task.seed != seed: + rng = np.random.RandomState(seed) + origin_loc = get_graph_origin_loc(rng, self.traversible) + self.task = utils.Foo(seed=seed, origin_loc=origin_loc, + n_ori=self.task_params.n_ori) + G = generate_graph(self.valid_fn_vec, self.task_params.step_size, + self.task.n_ori, (0, 0, 0)) + gtG, nodes, nodes_to_id = convert_to_graph_tool(G) + self.task.gtG = gtG + self.task.nodes = nodes + self.task.delta_theta = 2.0*np.pi/(self.task.n_ori*1.) + self.task.nodes_to_id = nodes_to_id + + logging.info('Building %s, #V=%d, #E=%d', self.building_name, + self.task.nodes.shape[0], self.task.gtG.num_edges()) + type = self.task_params.type + if type == 'general': + # Do nothing + _ = None + + elif type == 'room_to_room_many' or type == 'room_to_room_back': + if type == 'room_to_room_back': + assert(self.task_params.num_goals == 2), 'num_goals must be 2.' + + self.room_dims = _filter_rooms(self.room_dims, self.task_params.room_regex) + xyt = self.to_actual_xyt_vec(self.task.nodes) + self.task.node_room_ids = _label_nodes_with_room_id(xyt, self.room_dims) + self.task.reset_kwargs = {'node_room_ids': self.task.node_room_ids} + + elif type == 'rng_rejection_sampling_many': + n_bins = 20 + rejection_sampling_M = self.task_params.rejection_sampling_M + min_dist = self.task_params.min_dist + bins = np.arange(n_bins+1)/(n_bins*1.) + target_d = np.zeros(n_bins); target_d[...] = 1./n_bins; + + sampling_d = get_hardness_distribution( + self.task.gtG, self.task_params.max_dist, self.task_params.min_dist, + np.random.RandomState(0), 4000, bins, self.task.nodes, + self.task_params.n_ori, self.task_params.step_size) + + self.task.reset_kwargs = {'distribution_bins': bins, + 'target_distribution': target_d, + 'sampling_distribution': sampling_d, + 'rejection_sampling_M': rejection_sampling_M, + 'n_bins': n_bins, + 'n_ori': self.task_params.n_ori, + 'step_size': self.task_params.step_size, + 'min_dist': self.task_params.min_dist} + self.task.n_bins = n_bins + self.task.distribution_bins = bins + self.task.target_distribution = target_d + self.task.sampling_distribution = sampling_d + self.task.rejection_sampling_M = rejection_sampling_M + + if self.logdir is not None: + self._debug_save_hardness(seed) + + elif type[:14] == 'to_nearest_obj': + self.room_dims = _filter_rooms(self.room_dims, self.task_params.room_regex) + xyt = self.to_actual_xyt_vec(self.task.nodes) + + self.class_maps = _select_classes(self.class_maps, + self.class_map_names, + self.task_params.semantic_task.class_map_names)*1 + self.class_map_names = self.task_params.semantic_task.class_map_names + nodes_xyt = self.to_actual_xyt_vec(np.array(self.task.nodes)) + + tt = utils.Timer(); tt.tic(); + if self.task_params.type == 'to_nearest_obj_acc': + self.task.class_maps_dilated, self.task.node_class_label = label_nodes_with_class_geodesic( + nodes_xyt, self.class_maps, + self.task_params.semantic_task.pix_distance+8, self.map.traversible, + ff_cost=1., fo_cost=1., oo_cost=4., connectivity=8.) + + dists = [] + for i in range(len(self.class_map_names)): + class_nodes_ = np.where(self.task.node_class_label[:,i])[0] + dists.append(get_distance_node_list(gtG, source_nodes=class_nodes_, direction='to')) + self.task.dist_to_class = dists + a_, b_ = np.where(self.task.node_class_label) + self.task.class_nodes = np.concatenate((a_[:,np.newaxis], b_[:,np.newaxis]), axis=1) + + if self.logdir is not None: + self._debug_semantic_maps(seed) + + self.task.reset_kwargs = {'sampling': self.task_params.semantic_task.sampling, + 'class_nodes': self.task.class_nodes, + 'dist_to_class': self.task.dist_to_class} + + if self.logdir is not None: + self._debug_save_map_nodes(seed) + + def reset(self, rngs): + rng = rngs[0]; rng_perturb = rngs[1]; + nodes = self.task.nodes + tp = self.task_params + + start_node_ids, goal_node_ids, dists, target_class = \ + _nav_env_reset_helper(tp.type, rng, self.task.nodes, tp.batch_size, + self.task.gtG, tp.max_dist, tp.num_steps, + tp.num_goals, tp.data_augment, + **(self.task.reset_kwargs)) + + start_nodes = [tuple(nodes[_,:]) for _ in start_node_ids] + goal_nodes = [[tuple(nodes[_,:]) for _ in __] for __ in goal_node_ids] + data_augment = tp.data_augment + perturbs = _gen_perturbs(rng_perturb, tp.batch_size, + (tp.num_steps+1)*tp.num_goals, + data_augment.lr_flip, data_augment.delta_angle, + data_augment.delta_xy, data_augment.structured) + perturbs = np.array(perturbs) # batch x steps x 4 + end_perturbs = perturbs[:,-(tp.num_goals):,:]*1 # fixed perturb for the goal. + perturbs = perturbs[:,:-(tp.num_goals),:]*1 + + history = -np.ones((tp.batch_size, tp.num_steps*tp.num_goals), dtype=np.int32) + self.episode = utils.Foo( + start_nodes=start_nodes, start_node_ids=start_node_ids, + goal_nodes=goal_nodes, goal_node_ids=goal_node_ids, dist_to_goal=dists, + perturbs=perturbs, goal_perturbs=end_perturbs, history=history, + target_class=target_class, history_frames=[]) + return start_node_ids + + def take_action(self, current_node_ids, action, step_number): + """In addition to returning the action, also returns the reward that the + agent receives.""" + goal_number = step_number / self.task_params.num_steps + new_node_ids = GridWorld.take_action(self, current_node_ids, action) + rewards = [] + for i, n in enumerate(new_node_ids): + reward = 0 + if n == self.episode.goal_node_ids[goal_number][i]: + reward = self.task_params.reward_at_goal + reward = reward - self.task_params.reward_time_penalty + rewards.append(reward) + return new_node_ids, rewards + + + def get_optimal_action(self, current_node_ids, step_number): + """Returns the optimal action from the current node.""" + goal_number = step_number / self.task_params.num_steps + gtG = self.task.gtG + a = np.zeros((len(current_node_ids), self.task_params.num_actions), dtype=np.int32) + d_dict = self.episode.dist_to_goal[goal_number] + for i, c in enumerate(current_node_ids): + neigh = gtG.vertex(c).out_neighbours() + neigh_edge = gtG.vertex(c).out_edges() + ds = np.array([d_dict[i][int(x)] for x in neigh]) + ds_min = np.min(ds) + for i_, e in enumerate(neigh_edge): + if ds[i_] == ds_min: + _ = gtG.ep['action'][e] + a[i, _] = 1 + return a + + def get_targets(self, current_node_ids, step_number): + """Returns the target actions from the current node.""" + action = self.get_optimal_action(current_node_ids, step_number) + action = np.expand_dims(action, axis=1) + return vars(utils.Foo(action=action)) + + def get_targets_name(self): + """Returns the list of names of the targets.""" + return ['action'] + + def cleanup(self): + self.episode = None + +class VisualNavigationEnv(NavigationEnv): + """Class for doing visual navigation in environments. Functions for computing + features on states, etc. + """ + def __init__(self, robot, env, task_params, category_list=None, + building_name=None, flip=False, logdir=None, + building_loader=None, r_obj=None): + tt = utils.Timer() + tt.tic() + Building.__init__(self, building_name, robot, env, category_list, + small=task_params.toy_problem, flip=flip, logdir=logdir, + building_loader=building_loader) + + self.set_r_obj(r_obj) + self.task_params = task_params + self.task = None + self.episode = None + self._preprocess_for_task(self.task_params.building_seed) + if hasattr(self.task_params, 'map_scales'): + self.task.scaled_maps = resize_maps( + self.traversible.astype(np.float32)*1, self.task_params.map_scales, + self.task_params.map_resize_method) + else: + logging.fatal('VisualNavigationEnv does not support scale_f anymore.') + self.task.readout_maps_scaled = resize_maps( + self.traversible.astype(np.float32)*1, + self.task_params.readout_maps_scales, + self.task_params.map_resize_method) + tt.toc(log_at=1, log_str='VisualNavigationEnv __init__: ') + + def get_weight(self): + return self.task.nodes.shape[0] + + def get_common_data(self): + goal_nodes = self.episode.goal_nodes + start_nodes = self.episode.start_nodes + perturbs = self.episode.perturbs + goal_perturbs = self.episode.goal_perturbs + target_class = self.episode.target_class + + goal_locs = []; rel_goal_locs = []; + for i in range(len(goal_nodes)): + end_nodes = goal_nodes[i] + goal_loc, _, _, goal_theta = self.get_loc_axis( + np.array(end_nodes), delta_theta=self.task.delta_theta, + perturb=goal_perturbs[:,i,:]) + + # Compute the relative location to all goals from the starting location. + loc, _, _, theta = self.get_loc_axis(np.array(start_nodes), + delta_theta=self.task.delta_theta, + perturb=perturbs[:,0,:]) + r_goal, t_goal = _get_relative_goal_loc(goal_loc*1., loc, theta) + rel_goal_loc = np.concatenate((r_goal*np.cos(t_goal), r_goal*np.sin(t_goal), + np.cos(goal_theta-theta), + np.sin(goal_theta-theta)), axis=1) + rel_goal_locs.append(np.expand_dims(rel_goal_loc, axis=1)) + goal_locs.append(np.expand_dims(goal_loc, axis=1)) + + map = self.traversible*1. + maps = np.repeat(np.expand_dims(np.expand_dims(map, axis=0), axis=0), + self.task_params.batch_size, axis=0)*1 + if self.task_params.type[:14] == 'to_nearest_obj': + for i in range(self.task_params.batch_size): + maps[i,0,:,:] += 0.5*(self.task.class_maps_dilated[:,:,target_class[i]]) + + rel_goal_locs = np.concatenate(rel_goal_locs, axis=1) + goal_locs = np.concatenate(goal_locs, axis=1) + maps = np.expand_dims(maps, axis=-1) + + if self.task_params.type[:14] == 'to_nearest_obj': + rel_goal_locs = np.zeros((self.task_params.batch_size, 1, + len(self.task_params.semantic_task.class_map_names)), + dtype=np.float32) + goal_locs = np.zeros((self.task_params.batch_size, 1, 2), + dtype=np.float32) + for i in range(self.task_params.batch_size): + t = target_class[i] + rel_goal_locs[i,0,t] = 1. + goal_locs[i,0,0] = t + goal_locs[i,0,1] = np.NaN + + return vars(utils.Foo(orig_maps=maps, goal_loc=goal_locs, + rel_goal_loc_at_start=rel_goal_locs)) + + def pre_common_data(self, inputs): + return inputs + + + def get_features(self, current_node_ids, step_number): + task_params = self.task_params + goal_number = step_number / self.task_params.num_steps + end_nodes = self.task.nodes[self.episode.goal_node_ids[goal_number],:]*1 + current_nodes = self.task.nodes[current_node_ids,:]*1 + end_perturbs = self.episode.goal_perturbs[:,goal_number,:][:,np.newaxis,:] + perturbs = self.episode.perturbs + target_class = self.episode.target_class + + # Append to history. + self.episode.history[:,step_number] = np.array(current_node_ids) + + # Render out the images from current node. + outs = {} + + if self.task_params.outputs.images: + imgs_all = [] + imgs = self.render_nodes([tuple(x) for x in current_nodes], + perturb=perturbs[:,step_number,:]) + imgs_all.append(imgs) + aux_delta_thetas = self.task_params.aux_delta_thetas + for i in range(len(aux_delta_thetas)): + imgs = self.render_nodes([tuple(x) for x in current_nodes], + perturb=perturbs[:,step_number,:], + aux_delta_theta=aux_delta_thetas[i]) + imgs_all.append(imgs) + imgs_all = np.array(imgs_all) # A x B x H x W x C + imgs_all = np.transpose(imgs_all, axes=[1,0,2,3,4]) + imgs_all = np.expand_dims(imgs_all, axis=1) # B x N x A x H x W x C + if task_params.num_history_frames > 0: + if step_number == 0: + # Append the same frame 4 times + for i in range(task_params.num_history_frames+1): + self.episode.history_frames.insert(0, imgs_all*1.) + self.episode.history_frames.insert(0, imgs_all) + self.episode.history_frames.pop() + imgs_all_with_history = np.concatenate(self.episode.history_frames, axis=2) + else: + imgs_all_with_history = imgs_all + outs['imgs'] = imgs_all_with_history # B x N x A x H x W x C + + if self.task_params.outputs.node_ids: + outs['node_ids'] = np.array(current_node_ids).reshape((-1,1,1)) + outs['perturbs'] = np.expand_dims(perturbs[:,step_number, :]*1., axis=1) + + if self.task_params.outputs.analytical_counts: + assert(self.task_params.modalities == ['depth']) + d = image_pre(outs['imgs']*1., self.task_params.modalities) + cm = get_camera_matrix(self.task_params.img_width, + self.task_params.img_height, + self.task_params.img_fov) + XYZ = get_point_cloud_from_z(100./d[...,0], cm) + XYZ = make_geocentric(XYZ*100., self.robot.sensor_height, + self.robot.camera_elevation_degree) + for i in range(len(self.task_params.analytical_counts.map_sizes)): + non_linearity = self.task_params.analytical_counts.non_linearity[i] + count, isvalid = bin_points(XYZ*1., + map_size=self.task_params.analytical_counts.map_sizes[i], + xy_resolution=self.task_params.analytical_counts.xy_resolution[i], + z_bins=self.task_params.analytical_counts.z_bins[i]) + assert(count.shape[2] == 1), 'only works for n_views equal to 1.' + count = count[:,:,0,:,:,:] + isvalid = isvalid[:,:,0,:,:,:] + if non_linearity == 'none': + None + elif non_linearity == 'min10': + count = np.minimum(count, 10.) + elif non_linearity == 'sqrt': + count = np.sqrt(count) + else: + logging.fatal('Undefined non_linearity.') + outs['analytical_counts_{:d}'.format(i)] = count + + # Compute the goal location in the cordinate frame of the robot. + if self.task_params.outputs.rel_goal_loc: + if self.task_params.type[:14] != 'to_nearest_obj': + loc, _, _, theta = self.get_loc_axis(current_nodes, + delta_theta=self.task.delta_theta, + perturb=perturbs[:,step_number,:]) + goal_loc, _, _, goal_theta = self.get_loc_axis(end_nodes, + delta_theta=self.task.delta_theta, + perturb=end_perturbs[:,0,:]) + r_goal, t_goal = _get_relative_goal_loc(goal_loc, loc, theta) + + rel_goal_loc = np.concatenate((r_goal*np.cos(t_goal), r_goal*np.sin(t_goal), + np.cos(goal_theta-theta), + np.sin(goal_theta-theta)), axis=1) + outs['rel_goal_loc'] = np.expand_dims(rel_goal_loc, axis=1) + elif self.task_params.type[:14] == 'to_nearest_obj': + rel_goal_loc = np.zeros((self.task_params.batch_size, 1, + len(self.task_params.semantic_task.class_map_names)), + dtype=np.float32) + for i in range(self.task_params.batch_size): + t = target_class[i] + rel_goal_loc[i,0,t] = 1. + outs['rel_goal_loc'] = rel_goal_loc + + # Location on map to plot the trajectory during validation. + if self.task_params.outputs.loc_on_map: + loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes, + delta_theta=self.task.delta_theta, + perturb=perturbs[:,step_number,:]) + outs['loc_on_map'] = np.expand_dims(loc, axis=1) + + # Compute gt_dist to goal + if self.task_params.outputs.gt_dist_to_goal: + gt_dist_to_goal = np.zeros((len(current_node_ids), 1), dtype=np.float32) + for i, n in enumerate(current_node_ids): + gt_dist_to_goal[i,0] = self.episode.dist_to_goal[goal_number][i][n] + outs['gt_dist_to_goal'] = np.expand_dims(gt_dist_to_goal, axis=1) + + # Free space in front of you, map and goal as images. + if self.task_params.outputs.ego_maps: + loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes, + delta_theta=self.task.delta_theta, + perturb=perturbs[:,step_number,:]) + maps = generate_egocentric_maps(self.task.scaled_maps, + self.task_params.map_scales, + self.task_params.map_crop_sizes, loc, + x_axis, y_axis, theta) + + for i in range(len(self.task_params.map_scales)): + outs['ego_maps_{:d}'.format(i)] = \ + np.expand_dims(np.expand_dims(maps[i], axis=1), axis=-1) + + if self.task_params.outputs.readout_maps: + loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes, + delta_theta=self.task.delta_theta, + perturb=perturbs[:,step_number,:]) + maps = generate_egocentric_maps(self.task.readout_maps_scaled, + self.task_params.readout_maps_scales, + self.task_params.readout_maps_crop_sizes, + loc, x_axis, y_axis, theta) + for i in range(len(self.task_params.readout_maps_scales)): + outs['readout_maps_{:d}'.format(i)] = \ + np.expand_dims(np.expand_dims(maps[i], axis=1), axis=-1) + + # Images for the goal. + if self.task_params.outputs.ego_goal_imgs: + if self.task_params.type[:14] != 'to_nearest_obj': + loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes, + delta_theta=self.task.delta_theta, + perturb=perturbs[:,step_number,:]) + goal_loc, _, _, _ = self.get_loc_axis(end_nodes, + delta_theta=self.task.delta_theta, + perturb=end_perturbs[:,0,:]) + rel_goal_orientation = np.mod( + np.int32(current_nodes[:,2:] - end_nodes[:,2:]), self.task_params.n_ori) + goal_dist, goal_theta = _get_relative_goal_loc(goal_loc, loc, theta) + goals = generate_goal_images(self.task_params.map_scales, + self.task_params.map_crop_sizes, + self.task_params.n_ori, goal_dist, + goal_theta, rel_goal_orientation) + for i in range(len(self.task_params.map_scales)): + outs['ego_goal_imgs_{:d}'.format(i)] = np.expand_dims(goals[i], axis=1) + + elif self.task_params.type[:14] == 'to_nearest_obj': + for i in range(len(self.task_params.map_scales)): + num_classes = len(self.task_params.semantic_task.class_map_names) + outs['ego_goal_imgs_{:d}'.format(i)] = np.zeros((self.task_params.batch_size, 1, + self.task_params.map_crop_sizes[i], + self.task_params.map_crop_sizes[i], + self.task_params.goal_channels)) + for i in range(self.task_params.batch_size): + t = target_class[i] + for j in range(len(self.task_params.map_scales)): + outs['ego_goal_imgs_{:d}'.format(j)][i,:,:,:,t] = 1. + + # Incremental locs and theta (for map warping), always in the original scale + # of the map, the subequent steps in the tf code scale appropriately. + # Scaling is done by just multiplying incremental_locs appropriately. + if self.task_params.outputs.egomotion: + if step_number == 0: + # Zero Ego Motion + incremental_locs = np.zeros((self.task_params.batch_size, 1, 2), dtype=np.float32) + incremental_thetas = np.zeros((self.task_params.batch_size, 1, 1), dtype=np.float32) + else: + previous_nodes = self.task.nodes[self.episode.history[:,step_number-1], :]*1 + loc, _, _, theta = self.get_loc_axis(current_nodes, + delta_theta=self.task.delta_theta, + perturb=perturbs[:,step_number,:]) + previous_loc, _, _, previous_theta = self.get_loc_axis( + previous_nodes, delta_theta=self.task.delta_theta, + perturb=perturbs[:,step_number-1,:]) + + incremental_locs_ = np.reshape(loc-previous_loc, [self.task_params.batch_size, 1, -1]) + + t = -np.pi/2+np.reshape(theta*1, [self.task_params.batch_size, 1, -1]) + incremental_locs = incremental_locs_*1 + incremental_locs[:,:,0] = np.sum(incremental_locs_ * + np.concatenate((np.cos(t), np.sin(t)), + axis=-1), axis=-1) + incremental_locs[:,:,1] = np.sum(incremental_locs_ * + np.concatenate((np.cos(t+np.pi/2), + np.sin(t+np.pi/2)), + axis=-1), axis=-1) + incremental_thetas = np.reshape(theta-previous_theta, + [self.task_params.batch_size, 1, -1]) + outs['incremental_locs'] = incremental_locs + outs['incremental_thetas'] = incremental_thetas + + if self.task_params.outputs.visit_count: + # Output the visit count for this state, how many times has the current + # state been visited, and how far in the history was the last visit + # (except this one) + visit_count = np.zeros((self.task_params.batch_size, 1), dtype=np.int32) + last_visit = -np.ones((self.task_params.batch_size, 1), dtype=np.int32) + if step_number >= 1: + h = self.episode.history[:,:(step_number)] + visit_count[:,0] = np.sum(h == np.array(current_node_ids).reshape([-1,1]), + axis=1) + last_visit[:,0] = np.argmax(h[:,::-1] == np.array(current_node_ids).reshape([-1,1]), + axis=1) + 1 + last_visit[visit_count == 0] = -1 # -1 if not visited. + outs['visit_count'] = np.expand_dims(visit_count, axis=1) + outs['last_visit'] = np.expand_dims(last_visit, axis=1) + return outs + + def get_features_name(self): + f = [] + if self.task_params.outputs.images: + f.append('imgs') + if self.task_params.outputs.rel_goal_loc: + f.append('rel_goal_loc') + if self.task_params.outputs.loc_on_map: + f.append('loc_on_map') + if self.task_params.outputs.gt_dist_to_goal: + f.append('gt_dist_to_goal') + if self.task_params.outputs.ego_maps: + for i in range(len(self.task_params.map_scales)): + f.append('ego_maps_{:d}'.format(i)) + if self.task_params.outputs.readout_maps: + for i in range(len(self.task_params.readout_maps_scales)): + f.append('readout_maps_{:d}'.format(i)) + if self.task_params.outputs.ego_goal_imgs: + for i in range(len(self.task_params.map_scales)): + f.append('ego_goal_imgs_{:d}'.format(i)) + if self.task_params.outputs.egomotion: + f.append('incremental_locs') + f.append('incremental_thetas') + if self.task_params.outputs.visit_count: + f.append('visit_count') + f.append('last_visit') + if self.task_params.outputs.analytical_counts: + for i in range(len(self.task_params.analytical_counts.map_sizes)): + f.append('analytical_counts_{:d}'.format(i)) + if self.task_params.outputs.node_ids: + f.append('node_ids') + f.append('perturbs') + return f + + def pre_features(self, inputs): + if self.task_params.outputs.images: + inputs['imgs'] = image_pre(inputs['imgs'], self.task_params.modalities) + return inputs + +class BuildingMultiplexer(): + def __init__(self, args, task_number): + params = vars(args) + for k in params.keys(): + setattr(self, k, params[k]) + self.task_number = task_number + self._pick_data(task_number) + logging.info('Env Class: %s.', self.env_class) + if self.task_params.task == 'planning': + self._setup_planner() + elif self.task_params.task == 'mapping': + self._setup_mapper() + elif self.task_params.task == 'map+plan': + self._setup_mapper() + else: + logging.error('Undefined task: %s'.format(self.task_params.task)) + + def _pick_data(self, task_number): + logging.error('Input Building Names: %s', self.building_names) + self.flip = [np.mod(task_number / len(self.building_names), 2) == 1] + id = np.mod(task_number, len(self.building_names)) + self.building_names = [self.building_names[id]] + self.task_params.building_seed = task_number + logging.error('BuildingMultiplexer: Picked Building Name: %s', self.building_names) + self.building_names = self.building_names[0].split('+') + self.flip = [self.flip[0] for _ in self.building_names] + logging.error('BuildingMultiplexer: Picked Building Name: %s', self.building_names) + logging.error('BuildingMultiplexer: Flipping Buildings: %s', self.flip) + logging.error('BuildingMultiplexer: Set building_seed: %d', self.task_params.building_seed) + self.num_buildings = len(self.building_names) + logging.error('BuildingMultiplexer: Num buildings: %d', self.num_buildings) + + def _setup_planner(self): + # Load building env class. + self.buildings = [] + for i, building_name in enumerate(self.building_names): + b = self.env_class(robot=self.robot, env=self.env, + task_params=self.task_params, + building_name=building_name, flip=self.flip[i], + logdir=self.logdir, building_loader=self.dataset) + self.buildings.append(b) + + def _setup_mapper(self): + # Set up the renderer. + cp = self.camera_param + rgb_shader, d_shader = sru.get_shaders(cp.modalities) + r_obj = SwiftshaderRenderer() + r_obj.init_display(width=cp.width, height=cp.height, fov=cp.fov, + z_near=cp.z_near, z_far=cp.z_far, rgb_shader=rgb_shader, + d_shader=d_shader) + self.r_obj = r_obj + r_obj.clear_scene() + + # Load building env class. + self.buildings = [] + wt = [] + for i, building_name in enumerate(self.building_names): + b = self.env_class(robot=self.robot, env=self.env, + task_params=self.task_params, + building_name=building_name, flip=self.flip[i], + logdir=self.logdir, building_loader=self.dataset, + r_obj=r_obj) + wt.append(b.get_weight()) + b.load_building_into_scene() + b.set_building_visibility(False) + self.buildings.append(b) + wt = np.array(wt).astype(np.float32) + wt = wt / np.sum(wt+0.0001) + self.building_sampling_weights = wt + + def sample_building(self, rng): + if self.num_buildings == 1: + building_id = rng.choice(range(len(self.building_names))) + else: + building_id = rng.choice(self.num_buildings, + p=self.building_sampling_weights) + b = self.buildings[building_id] + instances = b._gen_rng(rng) + self._building_id = building_id + return self.buildings[building_id], instances + + def sample_env(self, rngs): + rng = rngs[0]; + if self.num_buildings == 1: + building_id = rng.choice(range(len(self.building_names))) + else: + building_id = rng.choice(self.num_buildings, + p=self.building_sampling_weights) + return self.buildings[building_id] + + def pre(self, inputs): + return self.buildings[self._building_id].pre(inputs) + + def __del__(self): + self.r_obj.clear_scene() + logging.error('Clearing scene.') diff --git a/cognitive_mapping_and_planning/datasets/nav_env_config.py b/cognitive_mapping_and_planning/datasets/nav_env_config.py new file mode 100644 index 00000000000..3d71c5767c4 --- /dev/null +++ b/cognitive_mapping_and_planning/datasets/nav_env_config.py @@ -0,0 +1,127 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Configs for stanford navigation environment. + +Base config for stanford navigation enviornment. +""" +import numpy as np +import src.utils as utils +import datasets.nav_env as nav_env + +def nav_env_base_config(): + """Returns the base config for stanford navigation environment. + + Returns: + Base config for stanford navigation environment. + """ + robot = utils.Foo(radius=15, + base=10, + height=140, + sensor_height=120, + camera_elevation_degree=-15) + + env = utils.Foo(padding=10, + resolution=5, + num_point_threshold=2, + valid_min=-10, + valid_max=200, + n_samples_per_face=200) + + camera_param = utils.Foo(width=225, + height=225, + z_near=0.05, + z_far=20.0, + fov=60., + modalities=['rgb'], + img_channels=3) + + data_augment = utils.Foo(lr_flip=0, + delta_angle=0.5, + delta_xy=4, + relight=True, + relight_fast=False, + structured=False) # if True, uses the same perturb for the whole episode. + + outputs = utils.Foo(images=True, + rel_goal_loc=False, + loc_on_map=True, + gt_dist_to_goal=True, + ego_maps=False, + ego_goal_imgs=False, + egomotion=False, + visit_count=False, + analytical_counts=False, + node_ids=True, + readout_maps=False) + + # class_map_names=['board', 'chair', 'door', 'sofa', 'table'] + class_map_names = ['chair', 'door', 'table'] + semantic_task = utils.Foo(class_map_names=class_map_names, pix_distance=16, + sampling='uniform') + + # time per iteration for cmp is 0.82 seconds per episode with 3.4s overhead per batch. + task_params = utils.Foo(max_dist=32, + step_size=8, + num_steps=40, + num_actions=4, + batch_size=4, + building_seed=0, + num_goals=1, + img_height=None, + img_width=None, + img_channels=None, + modalities=None, + outputs=outputs, + map_scales=[1.], + map_crop_sizes=[64], + rel_goal_loc_dim=4, + base_class='Building', + task='map+plan', + n_ori=4, + type='room_to_room_many', + data_augment=data_augment, + room_regex='^((?!hallway).)*$', + toy_problem=False, + map_channels=1, + gt_coverage=False, + input_type='maps', + full_information=False, + aux_delta_thetas=[], + semantic_task=semantic_task, + num_history_frames=0, + node_ids_dim=1, + perturbs_dim=4, + map_resize_method='linear_noantialiasing', + readout_maps_channels=1, + readout_maps_scales=[], + readout_maps_crop_sizes=[], + n_views=1, + reward_time_penalty=0.1, + reward_at_goal=1., + discount_factor=0.99, + rejection_sampling_M=100, + min_dist=None) + + navtask_args = utils.Foo( + building_names=['area1_gates_wingA_floor1_westpart'], + env_class=nav_env.VisualNavigationEnv, + robot=robot, + task_params=task_params, + env=env, + camera_param=camera_param, + cache_rooms=True) + return navtask_args + diff --git a/cognitive_mapping_and_planning/matplotlibrc b/cognitive_mapping_and_planning/matplotlibrc new file mode 100644 index 00000000000..ed5097572ae --- /dev/null +++ b/cognitive_mapping_and_planning/matplotlibrc @@ -0,0 +1 @@ +backend : agg diff --git a/cognitive_mapping_and_planning/output/.gitignore b/cognitive_mapping_and_planning/output/.gitignore new file mode 100644 index 00000000000..a767cafbbd8 --- /dev/null +++ b/cognitive_mapping_and_planning/output/.gitignore @@ -0,0 +1 @@ +* diff --git a/cognitive_mapping_and_planning/output/README.md b/cognitive_mapping_and_planning/output/README.md new file mode 100644 index 00000000000..7518c387439 --- /dev/null +++ b/cognitive_mapping_and_planning/output/README.md @@ -0,0 +1,16 @@ +### Pre-Trained Models + +We provide the following pre-trained models: + +Config Name | Checkpoint | Mean Dist. | 50%ile Dist. | 75%ile Dist. | Success %age | +:-: | :-: | :-: | :-: | :-: | :-: | +cmp.lmap_Msc.clip5.sbpd_d_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_r2r.tar) | 4.79 | 0 | 1 | 78.9 | +cmp.lmap_Msc.clip5.sbpd_rgb_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_rgb_r2r.tar) | 7.74 | 0 | 14 | 62.4 | +cmp.lmap_Msc.clip5.sbpd_d_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_ST.tar) | 10.67 | 9 | 19 | 39.7 | +cmp.lmap_Msc.clip5.sbpd_rgb_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_rgb_ST.tar) | 11.27 | 10 | 19 | 35.6 | +cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80 | [ckpt](http:////download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80.tar) | 11.6 | 0 | 19 | 66.9 | +bl.v2.noclip.sbpd_d_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_r2r.tar) | 5.90 | 0 | 6 | 71.2 | +bl.v2.noclip.sbpd_rgb_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_rgb_r2r.tar) | 10.21 | 1 | 21 | 53.4 | +bl.v2.noclip.sbpd_d_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_ST.tar) | 13.29 | 14 | 23 | 28.0 | +bl.v2.noclip.sbpd_rgb_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_rgb_ST.tar) | 13.37 | 13 | 20 | 24.2 | +bl.v2.noclip.sbpd_d_r2r_h0_64_80 | [ckpt](http:////download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_r2r_h0_64_80.tar) | 15.30 | 0 | 29 | 57.9 | diff --git a/cognitive_mapping_and_planning/patches/GLES2_2_0.py.patch b/cognitive_mapping_and_planning/patches/GLES2_2_0.py.patch new file mode 100644 index 00000000000..de1be442d5b --- /dev/null +++ b/cognitive_mapping_and_planning/patches/GLES2_2_0.py.patch @@ -0,0 +1,14 @@ +10c10 +< from OpenGL import platform, constant, arrays +--- +> from OpenGL import platform, constant, arrays, contextdata +249a250 +> from OpenGL._bytes import _NULL_8_BYTE +399c400 +< array = ArrayDatatype.asArray( pointer, type ) +--- +> array = arrays.ArrayDatatype.asArray( pointer, type ) +405c406 +< ArrayDatatype.voidDataPointer( array ) +--- +> arrays.ArrayDatatype.voidDataPointer( array ) diff --git a/cognitive_mapping_and_planning/patches/apply_patches.sh b/cognitive_mapping_and_planning/patches/apply_patches.sh new file mode 100644 index 00000000000..4a786058258 --- /dev/null +++ b/cognitive_mapping_and_planning/patches/apply_patches.sh @@ -0,0 +1,18 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +echo $VIRTUAL_ENV +patch $VIRTUAL_ENV/local/lib/python2.7/site-packages/OpenGL/GLES2/VERSION/GLES2_2_0.py patches/GLES2_2_0.py.patch +patch $VIRTUAL_ENV/local/lib/python2.7/site-packages/OpenGL/platform/ctypesloader.py patches/ctypesloader.py.patch diff --git a/cognitive_mapping_and_planning/patches/ctypesloader.py.patch b/cognitive_mapping_and_planning/patches/ctypesloader.py.patch new file mode 100644 index 00000000000..27dd43b1801 --- /dev/null +++ b/cognitive_mapping_and_planning/patches/ctypesloader.py.patch @@ -0,0 +1,15 @@ +45c45,46 +< return dllType( name, mode ) +--- +> print './' + name +> return dllType( './' + name, mode ) +47,48c48,53 +< err.args += (name,fullName) +< raise +--- +> try: +> print name +> return dllType( name, mode ) +> except: +> err.args += (name,fullName) +> raise diff --git a/cognitive_mapping_and_planning/render/__init__.py b/cognitive_mapping_and_planning/render/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cognitive_mapping_and_planning/render/depth_rgb_encoded.fp b/cognitive_mapping_and_planning/render/depth_rgb_encoded.fp new file mode 100644 index 00000000000..23e93d27f58 --- /dev/null +++ b/cognitive_mapping_and_planning/render/depth_rgb_encoded.fp @@ -0,0 +1,30 @@ +// This shader computes per-pixel depth (-z coordinate in the camera space, or +// orthogonal distance to the camera plane). The result is multiplied by the +// `kFixedPointFraction` constant and is encoded to RGB channels as an integer +// (R being the least significant byte). + +#ifdef GL_ES +#ifdef GL_FRAGMENT_PRECISION_HIGH +precision highp float; +#else +precision mediump float; +#endif +#endif + +const float kFixedPointFraction = 1000.0; + +varying float vDepth; + +void main(void) { + float d = vDepth; + + // Encode the depth to RGB. + d *= (kFixedPointFraction / 255.0); + gl_FragColor.r = mod(d, 1.0); + d = (d - gl_FragColor.r) / 255.0; + gl_FragColor.g = mod(d, 1.0); + d = (d - gl_FragColor.g) / 255.0; + gl_FragColor.b = mod(d, 1.0); + + gl_FragColor.a = 1.0; +} diff --git a/cognitive_mapping_and_planning/render/depth_rgb_encoded.vp b/cognitive_mapping_and_planning/render/depth_rgb_encoded.vp new file mode 100644 index 00000000000..2db74f14aa7 --- /dev/null +++ b/cognitive_mapping_and_planning/render/depth_rgb_encoded.vp @@ -0,0 +1,15 @@ +uniform mat4 uViewMatrix; +uniform mat4 uProjectionMatrix; + +attribute vec3 aPosition; + +varying float vDepth; + +void main(void) { + vec4 worldPosition = vec4(aPosition, 1.0); + vec4 viewPosition = uViewMatrix * worldPosition; + gl_Position = uProjectionMatrix * viewPosition; + + // Orthogonal depth is simply -z in the camera space. + vDepth = -viewPosition.z; +} diff --git a/cognitive_mapping_and_planning/render/rgb_flat_color.fp b/cognitive_mapping_and_planning/render/rgb_flat_color.fp new file mode 100644 index 00000000000..c8c24d76103 --- /dev/null +++ b/cognitive_mapping_and_planning/render/rgb_flat_color.fp @@ -0,0 +1,11 @@ +precision highp float; +varying vec4 vColor; +varying vec2 vTextureCoord; + +uniform sampler2D uTexture; + +void main(void) { + vec4 color = vColor; + color = texture2D(uTexture, vTextureCoord); + gl_FragColor = color; +} diff --git a/cognitive_mapping_and_planning/render/rgb_flat_color.vp b/cognitive_mapping_and_planning/render/rgb_flat_color.vp new file mode 100644 index 00000000000..ebc79173405 --- /dev/null +++ b/cognitive_mapping_and_planning/render/rgb_flat_color.vp @@ -0,0 +1,18 @@ +uniform mat4 uViewMatrix; +uniform mat4 uProjectionMatrix; +uniform vec4 uColor; + +attribute vec4 aColor; +attribute vec3 aPosition; +attribute vec2 aTextureCoord; + +varying vec4 vColor; +varying vec2 vTextureCoord; + +void main(void) { + vec4 worldPosition = vec4(aPosition, 1.0); + gl_Position = uProjectionMatrix * (uViewMatrix * worldPosition); + + vColor = aColor * uColor; + vTextureCoord = aTextureCoord; +} diff --git a/cognitive_mapping_and_planning/render/swiftshader_renderer.py b/cognitive_mapping_and_planning/render/swiftshader_renderer.py new file mode 100644 index 00000000000..74b1be72c11 --- /dev/null +++ b/cognitive_mapping_and_planning/render/swiftshader_renderer.py @@ -0,0 +1,427 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r"""Implements loading and rendering of meshes. Contains 2 classes: + Shape: Class that exposes high level functions for loading and manipulating + shapes. This currently is bound to assimp + (https://github.com/assimp/assimp). If you want to interface to a different + library, reimplement this class with bindings to your mesh loading library. + + SwiftshaderRenderer: Class that renders Shapes. Currently this uses python + bindings to OpenGL (EGL), bindings to an alternate renderer may be implemented + here. +""" + +import numpy as np, os +import cv2, ctypes, logging, os, numpy as np +import pyassimp as assimp +from OpenGL.GLES2 import * +from OpenGL.EGL import * +import src.rotation_utils as ru + +__version__ = 'swiftshader_renderer' + +def get_shaders(modalities): + rgb_shader = 'rgb_flat_color' if 'rgb' in modalities else None + d_shader = 'depth_rgb_encoded' if 'depth' in modalities else None + return rgb_shader, d_shader + +def sample_points_on_faces(vs, fs, rng, n_samples_per_face): + idx = np.repeat(np.arange(fs.shape[0]), n_samples_per_face) + + r = rng.rand(idx.size, 2) + r1 = r[:,:1]; r2 = r[:,1:]; sqrt_r1 = np.sqrt(r1); + + v1 = vs[fs[idx, 0], :]; v2 = vs[fs[idx, 1], :]; v3 = vs[fs[idx, 2], :]; + pts = (1-sqrt_r1)*v1 + sqrt_r1*(1-r2)*v2 + sqrt_r1*r2*v3 + + v1 = vs[fs[:,0], :]; v2 = vs[fs[:, 1], :]; v3 = vs[fs[:, 2], :]; + ar = 0.5*np.sqrt(np.sum(np.cross(v1-v3, v2-v3)**2, 1)) + + return pts, ar, idx + +class Shape(): + def get_pyassimp_load_options(self): + load_flags = assimp.postprocess.aiProcess_Triangulate; + load_flags = load_flags | assimp.postprocess.aiProcess_SortByPType; + load_flags = load_flags | assimp.postprocess.aiProcess_OptimizeMeshes; + load_flags = load_flags | assimp.postprocess.aiProcess_RemoveRedundantMaterials; + load_flags = load_flags | assimp.postprocess.aiProcess_FindDegenerates; + load_flags = load_flags | assimp.postprocess.aiProcess_GenSmoothNormals; + load_flags = load_flags | assimp.postprocess.aiProcess_JoinIdenticalVertices; + load_flags = load_flags | assimp.postprocess.aiProcess_ImproveCacheLocality; + load_flags = load_flags | assimp.postprocess.aiProcess_GenUVCoords; + load_flags = load_flags | assimp.postprocess.aiProcess_FindInvalidData; + return load_flags + + def __init__(self, obj_file, material_file=None, load_materials=True, + name_prefix='', name_suffix=''): + if material_file is not None: + logging.error('Ignoring material file input, reading them off obj file.') + load_flags = self.get_pyassimp_load_options() + scene = assimp.load(obj_file, processing=load_flags) + filter_ind = self._filter_triangles(scene.meshes) + self.meshes = [scene.meshes[i] for i in filter_ind] + for m in self.meshes: + m.name = name_prefix + m.name + name_suffix + + dir_name = os.path.dirname(obj_file) + # Load materials + materials = None + if load_materials: + materials = [] + for m in self.meshes: + file_name = os.path.join(dir_name, m.material.properties[('file', 1)]) + assert(os.path.exists(file_name)), \ + 'Texture file {:s} foes not exist.'.format(file_name) + img_rgb = cv2.imread(file_name)[::-1,:,::-1] + if img_rgb.shape[0] != img_rgb.shape[1]: + logging.warn('Texture image not square.') + sz = np.maximum(img_rgb.shape[0], img_rgb.shape[1]) + sz = int(np.power(2., np.ceil(np.log2(sz)))) + img_rgb = cv2.resize(img_rgb, (sz,sz), interpolation=cv2.INTER_LINEAR) + else: + sz = img_rgb.shape[0] + sz_ = int(np.power(2., np.ceil(np.log2(sz)))) + if sz != sz_: + logging.warn('Texture image not square of power of 2 size. ' + + 'Changing size from %d to %d.', sz, sz_) + sz = sz_ + img_rgb = cv2.resize(img_rgb, (sz,sz), interpolation=cv2.INTER_LINEAR) + materials.append(img_rgb) + self.scene = scene + self.materials = materials + + def _filter_triangles(self, meshes): + select = [] + for i in range(len(meshes)): + if meshes[i].primitivetypes == 4: + select.append(i) + return select + + def flip_shape(self): + for m in self.meshes: + m.vertices[:,1] = -m.vertices[:,1] + bb = m.faces*1 + bb[:,1] = m.faces[:,2] + bb[:,2] = m.faces[:,1] + m.faces = bb + # m.vertices[:,[0,1]] = m.vertices[:,[1,0]] + + def get_vertices(self): + vs = [] + for m in self.meshes: + vs.append(m.vertices) + vss = np.concatenate(vs, axis=0) + return vss, vs + + def get_faces(self): + vs = [] + for m in self.meshes: + v = m.faces + vs.append(v) + return vs + + def get_number_of_meshes(self): + return len(self.meshes) + + def scale(self, sx=1., sy=1., sz=1.): + pass + + def sample_points_on_face_of_shape(self, i, n_samples_per_face, sc): + v = self.meshes[i].vertices*sc + f = self.meshes[i].faces + p, face_areas, face_idx = sample_points_on_faces( + v, f, np.random.RandomState(0), n_samples_per_face) + return p, face_areas, face_idx + + def __del__(self): + scene = self.scene + assimp.release(scene) + +class SwiftshaderRenderer(): + def __init__(self): + self.entities = {} + + def init_display(self, width, height, fov, z_near, z_far, rgb_shader, + d_shader): + self.init_renderer_egl(width, height) + dir_path = os.path.dirname(os.path.realpath(__file__)) + if d_shader is not None and rgb_shader is not None: + logging.fatal('Does not support setting both rgb_shader and d_shader.') + + if d_shader is not None: + assert rgb_shader is None + shader = d_shader + self.modality = 'depth' + + if rgb_shader is not None: + assert d_shader is None + shader = rgb_shader + self.modality = 'rgb' + + self.create_shaders(os.path.join(dir_path, shader+'.vp'), + os.path.join(dir_path, shader + '.fp')) + aspect = width*1./(height*1.) + self.set_camera(fov, z_near, z_far, aspect) + + def init_renderer_egl(self, width, height): + major,minor = ctypes.c_long(),ctypes.c_long() + logging.info('init_renderer_egl: EGL_DEFAULT_DISPLAY: %s', EGL_DEFAULT_DISPLAY) + + egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY) + logging.info('init_renderer_egl: egl_display: %s', egl_display) + + eglInitialize(egl_display, major, minor) + logging.info('init_renderer_egl: EGL_OPENGL_API, EGL_OPENGL_ES_API: %s, %s', + EGL_OPENGL_API, EGL_OPENGL_ES_API) + eglBindAPI(EGL_OPENGL_ES_API) + + num_configs = ctypes.c_long() + configs = (EGLConfig*1)() + local_attributes = [EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, EGL_BLUE_SIZE, 8, + EGL_DEPTH_SIZE, 16, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, + EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, EGL_NONE,] + logging.error('init_renderer_egl: local attributes: %s', local_attributes) + local_attributes = arrays.GLintArray.asArray(local_attributes) + success = eglChooseConfig(egl_display, local_attributes, configs, 1, num_configs) + logging.error('init_renderer_egl: eglChooseConfig success, num_configs: %d, %d', success, num_configs.value) + egl_config = configs[0] + + + context_attributes = [EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE] + context_attributes = arrays.GLintArray.asArray(context_attributes) + egl_context = eglCreateContext(egl_display, egl_config, EGL_NO_CONTEXT, context_attributes) + + buffer_attributes = [EGL_WIDTH, width, EGL_HEIGHT, height, EGL_NONE] + buffer_attributes = arrays.GLintArray.asArray(buffer_attributes) + egl_surface = eglCreatePbufferSurface(egl_display, egl_config, buffer_attributes) + + + eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context) + logging.error("init_renderer_egl: egl_display: %s egl_surface: %s, egl_config: %s", egl_display, egl_surface, egl_context) + + glViewport(0, 0, width, height); + + self.egl_display = egl_display + self.egl_surface = egl_surface + self.egl_config = egl_config + self.egl_mapping = {} + self.render_timer = None + self.load_timer = None + self.height = height + self.width = width + + def create_shaders(self, v_shader_file, f_shader_file): + v_shader = glCreateShader(GL_VERTEX_SHADER) + with open(v_shader_file, 'r') as f: + ls = '' + for l in f: + ls = ls + l + glShaderSource(v_shader, ls) + glCompileShader(v_shader); + assert(glGetShaderiv(v_shader, GL_COMPILE_STATUS) == 1) + + f_shader = glCreateShader(GL_FRAGMENT_SHADER) + with open(f_shader_file, 'r') as f: + ls = '' + for l in f: + ls = ls + l + glShaderSource(f_shader, ls) + glCompileShader(f_shader); + assert(glGetShaderiv(f_shader, GL_COMPILE_STATUS) == 1) + + egl_program = glCreateProgram(); + assert(egl_program) + glAttachShader(egl_program, v_shader) + glAttachShader(egl_program, f_shader) + glLinkProgram(egl_program); + assert(glGetProgramiv(egl_program, GL_LINK_STATUS) == 1) + glUseProgram(egl_program) + + glBindAttribLocation(egl_program, 0, "aPosition") + glBindAttribLocation(egl_program, 1, "aColor") + glBindAttribLocation(egl_program, 2, "aTextureCoord") + + self.egl_program = egl_program + self.egl_mapping['vertexs'] = 0 + self.egl_mapping['vertexs_color'] = 1 + self.egl_mapping['vertexs_tc'] = 2 + + glClearColor(0.0, 0.0, 0.0, 1.0); + # glEnable(GL_CULL_FACE); glCullFace(GL_BACK); + glEnable(GL_DEPTH_TEST); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) + + def set_camera(self, fov_vertical, z_near, z_far, aspect): + width = 2*np.tan(np.deg2rad(fov_vertical)/2.0)*z_near*aspect; + height = 2*np.tan(np.deg2rad(fov_vertical)/2.0)*z_near; + egl_program = self.egl_program + c = np.eye(4, dtype=np.float32) + c[3,3] = 0 + c[3,2] = -1 + c[2,2] = -(z_near+z_far)/(z_far-z_near) + c[2,3] = -2.0*(z_near*z_far)/(z_far-z_near) + c[0,0] = 2.0*z_near/width + c[1,1] = 2.0*z_near/height + c = c.T + + projection_matrix_o = glGetUniformLocation(egl_program, 'uProjectionMatrix') + projection_matrix = np.eye(4, dtype=np.float32) + projection_matrix[...] = c + projection_matrix = np.reshape(projection_matrix, (-1)) + glUniformMatrix4fv(projection_matrix_o, 1, GL_FALSE, projection_matrix) + + + def load_default_object(self): + v = np.array([[0.0, 0.5, 0.0, 1.0, 1.0, 0.0, 1.0], + [-0.5, -0.5, 0.0, 1.0, 0.0, 1.0, 1.0], + [0.5, -0.5, 0.0, 1.0, 1.0, 1.0, 1.0]], dtype=np.float32) + v = np.concatenate((v,v+0.1), axis=0) + v = np.ascontiguousarray(v, dtype=np.float32) + + vbo = glGenBuffers(1) + glBindBuffer (GL_ARRAY_BUFFER, vbo) + glBufferData (GL_ARRAY_BUFFER, v.dtype.itemsize*v.size, v, GL_STATIC_DRAW) + glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 28, ctypes.c_void_p(0)) + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 28, ctypes.c_void_p(12)) + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + + self.num_to_render = 6; + + def _actual_render(self): + for entity_id, entity in self.entities.iteritems(): + if entity['visible']: + vbo = entity['vbo'] + tbo = entity['tbo'] + num = entity['num'] + + glBindBuffer(GL_ARRAY_BUFFER, vbo) + glVertexAttribPointer(self.egl_mapping['vertexs'], 3, GL_FLOAT, GL_FALSE, + 20, ctypes.c_void_p(0)) + glVertexAttribPointer(self.egl_mapping['vertexs_tc'], 2, GL_FLOAT, + GL_FALSE, 20, ctypes.c_void_p(12)) + glEnableVertexAttribArray(self.egl_mapping['vertexs']); + glEnableVertexAttribArray(self.egl_mapping['vertexs_tc']); + + glBindTexture(GL_TEXTURE_2D, tbo) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glDrawArrays(GL_TRIANGLES, 0, num) + + def render(self, take_screenshot=False, output_type=0): + # self.render_timer.tic() + self._actual_render() + # self.render_timer.toc(log_at=1000, log_str='render timer', type='time') + + np_rgb_img = None + np_d_img = None + c = 1000. + if take_screenshot: + if self.modality == 'rgb': + screenshot_rgba = np.zeros((self.height, self.width, 4), dtype=np.uint8) + glReadPixels(0, 0, self.width, self.height, GL_RGBA, GL_UNSIGNED_BYTE, screenshot_rgba) + np_rgb_img = screenshot_rgba[::-1,:,:3]; + + if self.modality == 'depth': + screenshot_d = np.zeros((self.height, self.width, 4), dtype=np.uint8) + glReadPixels(0, 0, self.width, self.height, GL_RGBA, GL_UNSIGNED_BYTE, screenshot_d) + np_d_img = screenshot_d[::-1,:,:3]; + np_d_img = np_d_img[:,:,2]*(255.*255./c) + np_d_img[:,:,1]*(255./c) + np_d_img[:,:,0]*(1./c) + np_d_img = np_d_img.astype(np.float32) + np_d_img[np_d_img == 0] = np.NaN + np_d_img = np_d_img[:,:,np.newaxis] + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) + return np_rgb_img, np_d_img + + def _load_mesh_into_gl(self, mesh, material): + vvt = np.concatenate((mesh.vertices, mesh.texturecoords[0,:,:2]), axis=1) + vvt = np.ascontiguousarray(vvt[mesh.faces.reshape((-1)),:], dtype=np.float32) + num = vvt.shape[0] + vvt = np.reshape(vvt, (-1)) + + vbo = glGenBuffers(1) + glBindBuffer(GL_ARRAY_BUFFER, vbo) + glBufferData(GL_ARRAY_BUFFER, vvt.dtype.itemsize*vvt.size, vvt, GL_STATIC_DRAW) + + tbo = glGenTextures(1) + glBindTexture(GL_TEXTURE_2D, tbo) + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, material.shape[1], + material.shape[0], 0, GL_RGB, GL_UNSIGNED_BYTE, + np.reshape(material, (-1))) + return num, vbo, tbo + + def load_shapes(self, shapes): + entities = self.entities + entity_ids = [] + for i, shape in enumerate(shapes): + for j in range(len(shape.meshes)): + name = shape.meshes[j].name + assert name not in entities, '{:s} entity already exists.'.format(name) + num, vbo, tbo = self._load_mesh_into_gl(shape.meshes[j], shape.materials[j]) + entities[name] = {'num': num, 'vbo': vbo, 'tbo': tbo, 'visible': False} + entity_ids.append(name) + return entity_ids + + def set_entity_visible(self, entity_ids, visibility): + for entity_id in entity_ids: + self.entities[entity_id]['visible'] = visibility + + def position_camera(self, camera_xyz, lookat_xyz, up): + camera_xyz = np.array(camera_xyz) + lookat_xyz = np.array(lookat_xyz) + up = np.array(up) + lookat_to = lookat_xyz - camera_xyz + lookat_from = np.array([0, 1., 0.]) + up_from = np.array([0, 0., 1.]) + up_to = up * 1. + # np.set_printoptions(precision=2, suppress=True) + # print up_from, lookat_from, up_to, lookat_to + r = ru.rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to) + R = np.eye(4, dtype=np.float32) + R[:3,:3] = r + + t = np.eye(4, dtype=np.float32) + t[:3,3] = -camera_xyz + + view_matrix = np.dot(R.T, t) + flip_yz = np.eye(4, dtype=np.float32) + flip_yz[1,1] = 0; flip_yz[2,2] = 0; flip_yz[1,2] = 1; flip_yz[2,1] = -1; + view_matrix = np.dot(flip_yz, view_matrix) + view_matrix = view_matrix.T + # print np.concatenate((R, t, view_matrix), axis=1) + view_matrix = np.reshape(view_matrix, (-1)) + view_matrix_o = glGetUniformLocation(self.egl_program, 'uViewMatrix') + glUniformMatrix4fv(view_matrix_o, 1, GL_FALSE, view_matrix) + return None, None #camera_xyz, q + + def clear_scene(self): + keys = self.entities.keys() + for entity_id in keys: + entity = self.entities.pop(entity_id, None) + vbo = entity['vbo'] + tbo = entity['tbo'] + num = entity['num'] + glDeleteBuffers(1, [vbo]) + glDeleteTextures(1, [tbo]) + + def __del__(self): + self.clear_scene() + eglMakeCurrent(self.egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT) + eglDestroySurface(self.egl_display, self.egl_surface) + eglTerminate(self.egl_display) diff --git a/cognitive_mapping_and_planning/requirements.txt b/cognitive_mapping_and_planning/requirements.txt new file mode 100644 index 00000000000..306c807a6c9 --- /dev/null +++ b/cognitive_mapping_and_planning/requirements.txt @@ -0,0 +1,9 @@ +numpy +pillow +PyOpenGL +PyOpenGL-accelerate +six +networkx +scikit-image +scipy +opencv-python diff --git a/cognitive_mapping_and_planning/scripts/__init__.py b/cognitive_mapping_and_planning/scripts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cognitive_mapping_and_planning/scripts/script_distill.py b/cognitive_mapping_and_planning/scripts/script_distill.py new file mode 100644 index 00000000000..010c690412e --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_distill.py @@ -0,0 +1,177 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r""" Script to setup the grid moving agent. + +blaze build --define=ION_GFX_OGLES20=1 -c opt --copt=-mavx --config=cuda_clang \ + learning/brain/public/tensorflow_std_server{,_gpu} \ + experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill.par \ + experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill + + +./blaze-bin/experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill \ + --logdir=/cns/iq-d/home/saurabhgupta/output/stanford-distill/local/v0/ \ + --config_name 'v0+train' --gfs_user robot-intelligence-gpu + +""" +import sys, os, numpy as np +import copy +import argparse, pprint +import time +import cProfile + + +import tensorflow as tf +from tensorflow.contrib import slim +from tensorflow.python.framework import ops +from tensorflow.contrib.framework.python.ops import variables + +import logging +from tensorflow.python.platform import gfile +from tensorflow.python.platform import app +from tensorflow.python.platform import flags +from cfgs import config_distill +from tfcode import tf_utils +import src.utils as utils +import src.file_utils as fu +import tfcode.distillation as distill +import datasets.nav_env as nav_env + +FLAGS = flags.FLAGS + +flags.DEFINE_string('master', 'local', + 'The name of the TensorFlow master to use.') +flags.DEFINE_integer('ps_tasks', 0, 'The number of parameter servers. If the ' + 'value is 0, then the parameters are handled locally by ' + 'the worker.') +flags.DEFINE_integer('task', 0, 'The Task ID. This value is used when training ' + 'with multiple workers to identify each worker.') + +flags.DEFINE_integer('num_workers', 1, '') + +flags.DEFINE_string('config_name', '', '') + +flags.DEFINE_string('logdir', '', '') + +def main(_): + args = config_distill.get_args_for_config(FLAGS.config_name) + args.logdir = FLAGS.logdir + args.solver.num_workers = FLAGS.num_workers + args.solver.task = FLAGS.task + args.solver.ps_tasks = FLAGS.ps_tasks + args.solver.master = FLAGS.master + + args.buildinger.env_class = nav_env.MeshMapper + fu.makedirs(args.logdir) + args.buildinger.logdir = args.logdir + R = nav_env.get_multiplexor_class(args.buildinger, args.solver.task) + + if False: + pr = cProfile.Profile() + pr.enable() + rng = np.random.RandomState(0) + for i in range(1): + b, instances_perturbs = R.sample_building(rng) + inputs = b.worker(*(instances_perturbs)) + for j in range(inputs['imgs'].shape[0]): + p = os.path.join('tmp', '{:d}.png'.format(j)) + img = inputs['imgs'][j,0,:,:,:3]*1 + img = (img).astype(np.uint8) + fu.write_image(p, img) + print(inputs['imgs'].shape) + inputs = R.pre(inputs) + pr.disable() + pr.print_stats(2) + + if args.control.train: + if not gfile.Exists(args.logdir): + gfile.MakeDirs(args.logdir) + + m = utils.Foo() + m.tf_graph = tf.Graph() + + config = tf.ConfigProto() + config.device_count['GPU'] = 1 + config.gpu_options.allow_growth = True + config.gpu_options.per_process_gpu_memory_fraction = 0.8 + + with m.tf_graph.as_default(): + with tf.device(tf.train.replica_device_setter(args.solver.ps_tasks)): + m = distill.setup_to_run(m, args, is_training=True, + batch_norm_is_training=True) + + train_step_kwargs = distill.setup_train_step_kwargs_mesh( + m, R, os.path.join(args.logdir, 'train'), + rng_seed=args.solver.task, is_chief=args.solver.task==0, iters=1, + train_display_interval=args.summary.display_interval) + + final_loss = slim.learning.train( + train_op=m.train_op, + logdir=args.logdir, + master=args.solver.master, + is_chief=args.solver.task == 0, + number_of_steps=args.solver.max_steps, + train_step_fn=tf_utils.train_step_custom, + train_step_kwargs=train_step_kwargs, + global_step=m.global_step_op, + init_op=m.init_op, + init_fn=m.init_fn, + sync_optimizer=m.sync_optimizer, + saver=m.saver_op, + summary_op=None, session_config=config) + + if args.control.test: + m = utils.Foo() + m.tf_graph = tf.Graph() + checkpoint_dir = os.path.join(format(args.logdir)) + with m.tf_graph.as_default(): + m = distill.setup_to_run(m, args, is_training=False, + batch_norm_is_training=args.control.force_batchnorm_is_training_at_test) + + train_step_kwargs = distill.setup_train_step_kwargs_mesh( + m, R, os.path.join(args.logdir, args.control.test_name), + rng_seed=args.solver.task+1, is_chief=args.solver.task==0, + iters=args.summary.test_iters, train_display_interval=None) + + sv = slim.learning.supervisor.Supervisor( + graph=ops.get_default_graph(), logdir=None, init_op=m.init_op, + summary_op=None, summary_writer=None, global_step=None, saver=m.saver_op) + + last_checkpoint = None + while True: + last_checkpoint = slim.evaluation.wait_for_new_checkpoint(checkpoint_dir, last_checkpoint) + checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1]) + start = time.time() + logging.info('Starting evaluation at %s using checkpoint %s.', + time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()), + last_checkpoint) + + config = tf.ConfigProto() + config.device_count['GPU'] = 1 + config.gpu_options.allow_growth = True + config.gpu_options.per_process_gpu_memory_fraction = 0.8 + + with sv.managed_session(args.solver.master,config=config, + start_standard_services=False) as sess: + sess.run(m.init_op) + sv.saver.restore(sess, last_checkpoint) + sv.start_queue_runners(sess) + vals, _ = tf_utils.train_step_custom( + sess, None, m.global_step_op, train_step_kwargs, mode='val') + if checkpoint_iter >= args.solver.max_steps: + break + +if __name__ == '__main__': + app.run() diff --git a/cognitive_mapping_and_planning/scripts/script_download_init_models.sh b/cognitive_mapping_and_planning/scripts/script_download_init_models.sh new file mode 100644 index 00000000000..1900bd0b035 --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_download_init_models.sh @@ -0,0 +1,18 @@ +# Script to download models to initialize the RGB and D models for training.We +# use ResNet-v2-50 for both modalities. + +mkdir -p data/init_models +cd data/init_models + +# RGB Models are initialized by pre-training on ImageNet. +mkdir -p resnet_v2_50 +RGB_URL="http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz" +wget $RGB_URL +tar -xf resnet_v2_50_2017_04_14.tar.gz -C resnet_v2_50 + +# Depth models are initialized by distilling the RGB model to D images using +# Cross-Modal Distillation (https://arxiv.org/abs/1507.00448). +mkdir -p distill_rgb_to_d_resnet_v2_50 +D_URL="http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/distill_rgb_to_d_resnet_v2_50.tar" +wget $D_URL +tar -xf distill_rgb_to_d_resnet_v2_50.tar -C distill_rgb_to_d_resnet_v2_50 diff --git a/cognitive_mapping_and_planning/scripts/script_env_vis.py b/cognitive_mapping_and_planning/scripts/script_env_vis.py new file mode 100644 index 00000000000..03222dfab3f --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_env_vis.py @@ -0,0 +1,186 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A simple python function to walk in the enviornments that we have created. +PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_env_vis.py \ + --dataset_name sbpd --building_name area3 +""" +import sys +import numpy as np +import matplotlib +matplotlib.use('TkAgg') +from PIL import ImageTk, Image +import Tkinter as tk +import logging +from tensorflow.python.platform import app +from tensorflow.python.platform import flags + +import datasets.nav_env_config as nec +import datasets.nav_env as nav_env +import cv2 +from datasets import factory +import render.swiftshader_renderer as renderer + +SwiftshaderRenderer = renderer.SwiftshaderRenderer +VisualNavigationEnv = nav_env.VisualNavigationEnv + +FLAGS = flags.FLAGS +flags.DEFINE_string('dataset_name', 'sbpd', 'Name of the dataset.') +flags.DEFINE_float('fov', 60., 'Field of view') +flags.DEFINE_integer('image_size', 512, 'Size of the image.') +flags.DEFINE_string('building_name', '', 'Name of the building.') + +def get_args(): + navtask = nec.nav_env_base_config() + navtask.task_params.type = 'rng_rejection_sampling_many' + navtask.task_params.rejection_sampling_M = 2000 + navtask.task_params.min_dist = 10 + sz = FLAGS.image_size + navtask.camera_param.fov = FLAGS.fov + navtask.camera_param.height = sz + navtask.camera_param.width = sz + navtask.task_params.img_height = sz + navtask.task_params.img_width = sz + + # navtask.task_params.semantic_task.class_map_names = ['chair', 'door', 'table'] + # navtask.task_params.type = 'to_nearest_obj_acc' + + logging.info('navtask: %s', navtask) + return navtask + +def load_building(dataset_name, building_name): + dataset = factory.get_dataset(dataset_name) + + navtask = get_args() + cp = navtask.camera_param + rgb_shader, d_shader = renderer.get_shaders(cp.modalities) + r_obj = SwiftshaderRenderer() + r_obj.init_display(width=cp.width, height=cp.height, + fov=cp.fov, z_near=cp.z_near, z_far=cp.z_far, + rgb_shader=rgb_shader, d_shader=d_shader) + r_obj.clear_scene() + b = VisualNavigationEnv(robot=navtask.robot, env=navtask.env, + task_params=navtask.task_params, + building_name=building_name, flip=False, + logdir=None, building_loader=dataset, + r_obj=r_obj) + b.load_building_into_scene() + b.set_building_visibility(False) + return b + +def walk_through(b): + # init agent at a random location in the environment. + init_env_state = b.reset([np.random.RandomState(0), np.random.RandomState(0)]) + + global current_node + rng = np.random.RandomState(0) + current_node = rng.choice(b.task.nodes.shape[0]) + + root = tk.Tk() + image = b.render_nodes(b.task.nodes[[current_node],:])[0] + print image.shape + image = image.astype(np.uint8) + im = Image.fromarray(image) + im = ImageTk.PhotoImage(im) + panel = tk.Label(root, image=im) + + map_size = b.traversible.shape + sc = np.max(map_size)/256. + loc = np.array([[map_size[1]/2., map_size[0]/2.]]) + x_axis = np.zeros_like(loc); x_axis[:,1] = sc + y_axis = np.zeros_like(loc); y_axis[:,0] = -sc + cum_fs, cum_valid = nav_env.get_map_to_predict(loc, x_axis, y_axis, + map=b.traversible*1., + map_size=256) + cum_fs = cum_fs[0] + cum_fs = cv2.applyColorMap((cum_fs*255).astype(np.uint8), cv2.COLORMAP_JET) + im = Image.fromarray(cum_fs) + im = ImageTk.PhotoImage(im) + panel_overhead = tk.Label(root, image=im) + + def refresh(): + global current_node + image = b.render_nodes(b.task.nodes[[current_node],:])[0] + image = image.astype(np.uint8) + im = Image.fromarray(image) + im = ImageTk.PhotoImage(im) + panel.configure(image=im) + panel.image = im + + def left_key(event): + global current_node + current_node = b.take_action([current_node], [2], 1)[0][0] + refresh() + + def up_key(event): + global current_node + current_node = b.take_action([current_node], [3], 1)[0][0] + refresh() + + def right_key(event): + global current_node + current_node = b.take_action([current_node], [1], 1)[0][0] + refresh() + + def quit(event): + root.destroy() + + panel_overhead.grid(row=4, column=5, rowspan=1, columnspan=1, + sticky=tk.W+tk.E+tk.N+tk.S) + panel.bind('', left_key) + panel.bind('', up_key) + panel.bind('', right_key) + panel.bind('q', quit) + panel.focus_set() + panel.grid(row=0, column=0, rowspan=5, columnspan=5, + sticky=tk.W+tk.E+tk.N+tk.S) + root.mainloop() + +def simple_window(): + root = tk.Tk() + + image = np.zeros((128, 128, 3), dtype=np.uint8) + image[32:96, 32:96, 0] = 255 + im = Image.fromarray(image) + im = ImageTk.PhotoImage(im) + + image = np.zeros((128, 128, 3), dtype=np.uint8) + image[32:96, 32:96, 1] = 255 + im2 = Image.fromarray(image) + im2 = ImageTk.PhotoImage(im2) + + panel = tk.Label(root, image=im) + + def left_key(event): + panel.configure(image=im2) + panel.image = im2 + + def quit(event): + sys.exit() + + panel.bind('', left_key) + panel.bind('', left_key) + panel.bind('', left_key) + panel.bind('q', quit) + panel.focus_set() + panel.pack(side = "bottom", fill = "both", expand = "yes") + root.mainloop() + +def main(_): + b = load_building(FLAGS.dataset_name, FLAGS.building_name) + walk_through(b) + +if __name__ == '__main__': + app.run() diff --git a/cognitive_mapping_and_planning/scripts/script_nav_agent_release.py b/cognitive_mapping_and_planning/scripts/script_nav_agent_release.py new file mode 100644 index 00000000000..dab2819a6fc --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_nav_agent_release.py @@ -0,0 +1,253 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r""" Script to train and test the grid navigation agent. +Usage: + 1. Testing a model. + CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \ + PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \ + --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+bench_test \ + --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r + + 2. Training a model (locally). + CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \ + PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \ + --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+train_train \ + --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_ + + 3. Training a model (distributed). + # See https://www.tensorflow.org/deploy/distributed on how to setup distributed + # training. + CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \ + PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \ + --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+train_train \ + --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_ \ + --ps_tasks $num_ps --master $master_name --task $worker_id +""" + +import sys, os, numpy as np +import copy +import argparse, pprint +import time +import cProfile +import platform + + +import tensorflow as tf +from tensorflow.contrib import slim +from tensorflow.python.framework import ops +from tensorflow.contrib.framework.python.ops import variables + +import logging +from tensorflow.python.platform import gfile +from tensorflow.python.platform import app +from tensorflow.python.platform import flags +from cfgs import config_cmp +from cfgs import config_vision_baseline +import datasets.nav_env as nav_env +import src.file_utils as fu +import src.utils as utils +import tfcode.cmp as cmp +from tfcode import tf_utils +from tfcode import vision_baseline_lstm + +FLAGS = flags.FLAGS + +flags.DEFINE_string('master', '', + 'The address of the tensorflow master') +flags.DEFINE_integer('ps_tasks', 0, 'The number of parameter servers. If the ' + 'value is 0, then the parameters are handled locally by ' + 'the worker.') +flags.DEFINE_integer('task', 0, 'The Task ID. This value is used when training ' + 'with multiple workers to identify each worker.') + +flags.DEFINE_integer('num_workers', 1, '') + +flags.DEFINE_string('config_name', '', '') + +flags.DEFINE_string('logdir', '', '') + +flags.DEFINE_integer('solver_seed', 0, '') + +flags.DEFINE_integer('delay_start_iters', 20, '') + +logging.basicConfig(level=logging.INFO) + +def main(_): + _launcher(FLAGS.config_name, FLAGS.logdir) + +def _launcher(config_name, logdir): + args = _setup_args(config_name, logdir) + + fu.makedirs(args.logdir) + + if args.control.train: + _train(args) + + if args.control.test: + _test(args) + +def get_args_for_config(config_name): + configs = config_name.split('.') + type = configs[0] + config_name = '.'.join(configs[1:]) + if type == 'cmp': + args = config_cmp.get_args_for_config(config_name) + args.setup_to_run = cmp.setup_to_run + args.setup_train_step_kwargs = cmp.setup_train_step_kwargs + + elif type == 'bl': + args = config_vision_baseline.get_args_for_config(config_name) + args.setup_to_run = vision_baseline_lstm.setup_to_run + args.setup_train_step_kwargs = vision_baseline_lstm.setup_train_step_kwargs + + else: + logging.fatal('Unknown type: {:s}'.format(type)) + return args + +def _setup_args(config_name, logdir): + args = get_args_for_config(config_name) + args.solver.num_workers = FLAGS.num_workers + args.solver.task = FLAGS.task + args.solver.ps_tasks = FLAGS.ps_tasks + args.solver.master = FLAGS.master + args.solver.seed = FLAGS.solver_seed + args.logdir = logdir + args.navtask.logdir = None + return args + +def _train(args): + container_name = "" + + R = lambda: nav_env.get_multiplexer_class(args.navtask, args.solver.task) + m = utils.Foo() + m.tf_graph = tf.Graph() + + config = tf.ConfigProto() + config.device_count['GPU'] = 1 + + with m.tf_graph.as_default(): + with tf.device(tf.train.replica_device_setter(args.solver.ps_tasks, + merge_devices=True)): + with tf.container(container_name): + m = args.setup_to_run(m, args, is_training=True, + batch_norm_is_training=True, summary_mode='train') + + train_step_kwargs = args.setup_train_step_kwargs( + m, R(), os.path.join(args.logdir, 'train'), rng_seed=args.solver.task, + is_chief=args.solver.task==0, + num_steps=args.navtask.task_params.num_steps*args.navtask.task_params.num_goals, iters=1, + train_display_interval=args.summary.display_interval, + dagger_sample_bn_false=args.arch.dagger_sample_bn_false) + + delay_start = (args.solver.task*(args.solver.task+1))/2 * FLAGS.delay_start_iters + logging.error('delaying start for task %d by %d steps.', + args.solver.task, delay_start) + + additional_args = {} + final_loss = slim.learning.train( + train_op=m.train_op, + logdir=args.logdir, + master=args.solver.master, + is_chief=args.solver.task == 0, + number_of_steps=args.solver.max_steps, + train_step_fn=tf_utils.train_step_custom_online_sampling, + train_step_kwargs=train_step_kwargs, + global_step=m.global_step_op, + init_op=m.init_op, + init_fn=m.init_fn, + sync_optimizer=m.sync_optimizer, + saver=m.saver_op, + startup_delay_steps=delay_start, + summary_op=None, session_config=config, **additional_args) + +def _test(args): + args.solver.master = '' + container_name = "" + checkpoint_dir = os.path.join(format(args.logdir)) + logging.error('Checkpoint_dir: %s', args.logdir) + + config = tf.ConfigProto(); + config.device_count['GPU'] = 1; + + m = utils.Foo() + m.tf_graph = tf.Graph() + + rng_data_seed = 0; rng_action_seed = 0; + R = lambda: nav_env.get_multiplexer_class(args.navtask, rng_data_seed) + with m.tf_graph.as_default(): + with tf.container(container_name): + m = args.setup_to_run( + m, args, is_training=False, + batch_norm_is_training=args.control.force_batchnorm_is_training_at_test, + summary_mode=args.control.test_mode) + train_step_kwargs = args.setup_train_step_kwargs( + m, R(), os.path.join(args.logdir, args.control.test_name), + rng_seed=rng_data_seed, is_chief=True, + num_steps=args.navtask.task_params.num_steps*args.navtask.task_params.num_goals, + iters=args.summary.test_iters, train_display_interval=None, + dagger_sample_bn_false=args.arch.dagger_sample_bn_false) + + saver = slim.learning.tf_saver.Saver(variables.get_variables_to_restore()) + + sv = slim.learning.supervisor.Supervisor( + graph=ops.get_default_graph(), logdir=None, init_op=m.init_op, + summary_op=None, summary_writer=None, global_step=None, saver=m.saver_op) + + last_checkpoint = None + reported = False + while True: + last_checkpoint_ = None + while last_checkpoint_ is None: + last_checkpoint_ = slim.evaluation.wait_for_new_checkpoint( + checkpoint_dir, last_checkpoint, seconds_to_sleep=10, timeout=60) + if last_checkpoint_ is None: break + + last_checkpoint = last_checkpoint_ + checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1]) + + logging.info('Starting evaluation at %s using checkpoint %s.', + time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()), + last_checkpoint) + + if (args.control.only_eval_when_done == False or + checkpoint_iter >= args.solver.max_steps): + start = time.time() + logging.info('Starting evaluation at %s using checkpoint %s.', + time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()), + last_checkpoint) + + with sv.managed_session(args.solver.master, config=config, + start_standard_services=False) as sess: + sess.run(m.init_op) + sv.saver.restore(sess, last_checkpoint) + sv.start_queue_runners(sess) + if args.control.reset_rng_seed: + train_step_kwargs['rng_data'] = [np.random.RandomState(rng_data_seed), + np.random.RandomState(rng_data_seed)] + train_step_kwargs['rng_action'] = np.random.RandomState(rng_action_seed) + vals, _ = tf_utils.train_step_custom_online_sampling( + sess, None, m.global_step_op, train_step_kwargs, + mode=args.control.test_mode) + should_stop = False + + if checkpoint_iter >= args.solver.max_steps: + should_stop = True + + if should_stop: + break + +if __name__ == '__main__': + app.run() diff --git a/cognitive_mapping_and_planning/scripts/script_plot_trajectory.py b/cognitive_mapping_and_planning/scripts/script_plot_trajectory.py new file mode 100644 index 00000000000..81c4c899052 --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_plot_trajectory.py @@ -0,0 +1,339 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r""" +Code for plotting trajectories in the top view, and also plot first person views +from saved trajectories. Does not run the network but only loads the mesh data +to plot the view points. + CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 + PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_plot_trajectory.py \ + --first_person --num_steps 40 \ + --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r \ + --imset test --alsologtostderr --base_dir output --out_dir vis + +""" +import os, sys, numpy as np, copy +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import matplotlib.animation as animation +from matplotlib.gridspec import GridSpec + +import tensorflow as tf +from tensorflow.contrib import slim +import cv2 +import logging +from tensorflow.python.platform import gfile +from tensorflow.python.platform import app +from tensorflow.python.platform import flags + +from datasets import nav_env +import scripts.script_nav_agent_release as sna +import src.file_utils as fu +from src import graph_utils +from src import utils +FLAGS = flags.FLAGS + +flags.DEFINE_string('out_dir', 'vis', 'Directory where to store the output') +flags.DEFINE_string('type', '', 'Optional type.') +flags.DEFINE_bool('first_person', False, 'Visualize the first person view.') +flags.DEFINE_bool('top_view', False, 'Visualize the trajectory in the top view.') +flags.DEFINE_integer('num_steps', 40, 'Number of steps to run the model for.') +flags.DEFINE_string('imset', 'test', '') +flags.DEFINE_string('base_dir', 'output', 'Cache directory.') + +def _get_suffix_str(): + return '' + + +def _load_trajectory(): + base_dir = FLAGS.base_dir + config_name = FLAGS.config_name+_get_suffix_str() + + dir_name = os.path.join(base_dir, FLAGS.type, config_name) + logging.info('Waiting for snapshot in directory %s.', dir_name) + last_checkpoint = slim.evaluation.wait_for_new_checkpoint(dir_name, None) + checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1]) + + # Load the distances. + a = utils.load_variables(os.path.join(dir_name, 'bench_on_'+FLAGS.imset, + 'all_locs_at_t_{:d}.pkl'.format(checkpoint_iter))) + return a + +def _compute_hardness(): + # Load the stanford data to compute the hardness. + if FLAGS.type == '': + args = sna.get_args_for_config(FLAGS.config_name+'+bench_'+FLAGS.imset) + else: + args = sna.get_args_for_config(FLAGS.type+'.'+FLAGS.config_name+'+bench_'+FLAGS.imset) + + args.navtask.logdir = None + R = lambda: nav_env.get_multiplexer_class(args.navtask, 0) + R = R() + + rng_data = [np.random.RandomState(0), np.random.RandomState(0)] + + # Sample a room. + h_dists = [] + gt_dists = [] + for i in range(250): + e = R.sample_env(rng_data) + nodes = e.task.nodes + + # Initialize the agent. + init_env_state = e.reset(rng_data) + + gt_dist_to_goal = [e.episode.dist_to_goal[0][j][s] + for j, s in enumerate(e.episode.start_node_ids)] + + for j in range(args.navtask.task_params.batch_size): + start_node_id = e.episode.start_node_ids[j] + end_node_id =e.episode.goal_node_ids[0][j] + h_dist = graph_utils.heuristic_fn_vec( + nodes[[start_node_id],:], nodes[[end_node_id], :], + n_ori=args.navtask.task_params.n_ori, + step_size=args.navtask.task_params.step_size)[0][0] + gt_dist = e.episode.dist_to_goal[0][j][start_node_id] + h_dists.append(h_dist) + gt_dists.append(gt_dist) + + h_dists = np.array(h_dists) + gt_dists = np.array(gt_dists) + e = R.sample_env([np.random.RandomState(0), np.random.RandomState(0)]) + input = e.get_common_data() + orig_maps = input['orig_maps'][0,0,:,:,0] + return h_dists, gt_dists, orig_maps + +def plot_trajectory_first_person(dt, orig_maps, out_dir): + out_dir = os.path.join(out_dir, FLAGS.config_name+_get_suffix_str(), + FLAGS.imset) + fu.makedirs(out_dir) + + # Load the model so that we can render. + plt.set_cmap('gray') + samples_per_action = 8; wait_at_action = 0; + + Writer = animation.writers['mencoder'] + writer = Writer(fps=3*(samples_per_action+wait_at_action), + metadata=dict(artist='anonymous'), bitrate=1800) + + args = sna.get_args_for_config(FLAGS.config_name + '+bench_'+FLAGS.imset) + args.navtask.logdir = None + navtask_ = copy.deepcopy(args.navtask) + navtask_.camera_param.modalities = ['rgb'] + navtask_.task_params.modalities = ['rgb'] + sz = 512 + navtask_.camera_param.height = sz + navtask_.camera_param.width = sz + navtask_.task_params.img_height = sz + navtask_.task_params.img_width = sz + R = lambda: nav_env.get_multiplexer_class(navtask_, 0) + R = R() + b = R.buildings[0] + + f = [0 for _ in range(wait_at_action)] + \ + [float(_)/samples_per_action for _ in range(samples_per_action)]; + + # Generate things for it to render. + inds_to_do = [] + inds_to_do += [1, 4, 10] #1291, 1268, 1273, 1289, 1302, 1426, 1413, 1449, 1399, 1390] + + for i in inds_to_do: + fig = plt.figure(figsize=(10,8)) + gs = GridSpec(3,4) + gs.update(wspace=0.05, hspace=0.05, left=0.0, top=0.97, right=1.0, bottom=0.) + ax = fig.add_subplot(gs[:,:-1]) + ax1 = fig.add_subplot(gs[0,-1]) + ax2 = fig.add_subplot(gs[1,-1]) + ax3 = fig.add_subplot(gs[2,-1]) + axes = [ax, ax1, ax2, ax3] + # ax = fig.add_subplot(gs[:,:]) + # axes = [ax] + for ax in axes: + ax.set_axis_off() + + node_ids = dt['all_node_ids'][i, :, 0]*1 + # Prune so that last node is not repeated more than 3 times? + if np.all(node_ids[-4:] == node_ids[-1]): + while node_ids[-4] == node_ids[-1]: + node_ids = node_ids[:-1] + num_steps = np.minimum(FLAGS.num_steps, len(node_ids)) + + xyt = b.to_actual_xyt_vec(b.task.nodes[node_ids]) + xyt_diff = xyt[1:,:] - xyt[:-1:,:] + xyt_diff[:,2] = np.mod(xyt_diff[:,2], 4) + ind = np.where(xyt_diff[:,2] == 3)[0] + xyt_diff[ind, 2] = -1 + xyt_diff = np.expand_dims(xyt_diff, axis=1) + to_cat = [xyt_diff*_ for _ in f] + perturbs_all = np.concatenate(to_cat, axis=1) + perturbs_all = np.concatenate([perturbs_all, np.zeros_like(perturbs_all[:,:,:1])], axis=2) + node_ids_all = np.expand_dims(node_ids, axis=1)*1 + node_ids_all = np.concatenate([node_ids_all for _ in f], axis=1) + node_ids_all = np.reshape(node_ids_all[:-1,:], -1) + perturbs_all = np.reshape(perturbs_all, [-1, 4]) + imgs = b.render_nodes(b.task.nodes[node_ids_all,:], perturb=perturbs_all) + + # Get action at each node. + actions = [] + _, action_to_nodes = b.get_feasible_actions(node_ids) + for j in range(num_steps-1): + action_to_node = action_to_nodes[j] + node_to_action = dict(zip(action_to_node.values(), action_to_node.keys())) + actions.append(node_to_action[node_ids[j+1]]) + + def init_fn(): + return fig, + gt_dist_to_goal = [] + + # Render trajectories. + def worker(j): + # Plot the image. + step_number = j/(samples_per_action + wait_at_action) + img = imgs[j]; ax = axes[0]; ax.clear(); ax.set_axis_off(); + img = img.astype(np.uint8); ax.imshow(img); + tt = ax.set_title( + "First Person View\n" + + "Top corners show diagnostics (distance, agents' action) not input to agent.", + fontsize=12) + plt.setp(tt, color='white') + + # Distance to goal. + t = 'Dist to Goal:\n{:2d} steps'.format(int(dt['all_d_at_t'][i, step_number])) + t = ax.text(0.01, 0.99, t, + horizontalalignment='left', + verticalalignment='top', + fontsize=20, color='red', + transform=ax.transAxes, alpha=1.0) + t.set_bbox(dict(color='white', alpha=0.85, pad=-0.1)) + + # Action to take. + action_latex = ['$\odot$ ', '$\curvearrowright$ ', '$\curvearrowleft$ ', '$\Uparrow$ '] + t = ax.text(0.99, 0.99, action_latex[actions[step_number]], + horizontalalignment='right', + verticalalignment='top', + fontsize=40, color='green', + transform=ax.transAxes, alpha=1.0) + t.set_bbox(dict(color='white', alpha=0.85, pad=-0.1)) + + + # Plot the map top view. + ax = axes[-1] + if j == 0: + # Plot the map + locs = dt['all_locs'][i,:num_steps,:] + goal_loc = dt['all_goal_locs'][i,:,:] + xymin = np.minimum(np.min(goal_loc, axis=0), np.min(locs, axis=0)) + xymax = np.maximum(np.max(goal_loc, axis=0), np.max(locs, axis=0)) + xy1 = (xymax+xymin)/2. - 0.7*np.maximum(np.max(xymax-xymin), 24) + xy2 = (xymax+xymin)/2. + 0.7*np.maximum(np.max(xymax-xymin), 24) + + ax.set_axis_on() + ax.patch.set_facecolor((0.333, 0.333, 0.333)) + ax.set_xticks([]); ax.set_yticks([]); + ax.imshow(orig_maps, origin='lower', vmin=-1.0, vmax=2.0) + ax.plot(goal_loc[:,0], goal_loc[:,1], 'g*', markersize=12) + + locs = dt['all_locs'][i,:1,:] + ax.plot(locs[:,0], locs[:,1], 'b.', markersize=12) + + ax.set_xlim([xy1[0], xy2[0]]) + ax.set_ylim([xy1[1], xy2[1]]) + + locs = dt['all_locs'][i,step_number,:] + locs = np.expand_dims(locs, axis=0) + ax.plot(locs[:,0], locs[:,1], 'r.', alpha=1.0, linewidth=0, markersize=4) + tt = ax.set_title('Trajectory in topview', fontsize=14) + plt.setp(tt, color='white') + return fig, + + line_ani = animation.FuncAnimation(fig, worker, + (num_steps-1)*(wait_at_action+samples_per_action), + interval=500, blit=True, init_func=init_fn) + tmp_file_name = 'tmp.mp4' + line_ani.save(tmp_file_name, writer=writer, savefig_kwargs={'facecolor':'black'}) + out_file_name = os.path.join(out_dir, 'vis_{:04d}.mp4'.format(i)) + print out_file_name + + if fu.exists(out_file_name): + gfile.Remove(out_file_name) + gfile.Copy(tmp_file_name, out_file_name) + gfile.Remove(tmp_file_name) + plt.close(fig) + +def plot_trajectory(dt, hardness, orig_maps, out_dir): + out_dir = os.path.join(out_dir, FLAGS.config_name+_get_suffix_str(), + FLAGS.imset) + fu.makedirs(out_dir) + out_file = os.path.join(out_dir, 'all_locs_at_t.pkl') + dt['hardness'] = hardness + utils.save_variables(out_file, dt.values(), dt.keys(), overwrite=True) + + #Plot trajectories onto the maps + plt.set_cmap('gray') + for i in range(4000): + goal_loc = dt['all_goal_locs'][i, :, :] + locs = np.concatenate((dt['all_locs'][i,:,:], + dt['all_locs'][i,:,:]), axis=0) + xymin = np.minimum(np.min(goal_loc, axis=0), np.min(locs, axis=0)) + xymax = np.maximum(np.max(goal_loc, axis=0), np.max(locs, axis=0)) + xy1 = (xymax+xymin)/2. - 1.*np.maximum(np.max(xymax-xymin), 24) + xy2 = (xymax+xymin)/2. + 1.*np.maximum(np.max(xymax-xymin), 24) + + fig, ax = utils.tight_imshow_figure(plt, figsize=(6,6)) + ax.set_axis_on() + ax.patch.set_facecolor((0.333, 0.333, 0.333)) + ax.set_xticks([]) + ax.set_yticks([]) + + all_locs = dt['all_locs'][i,:,:]*1 + uniq = np.where(np.any(all_locs[1:,:] != all_locs[:-1,:], axis=1))[0]+1 + uniq = np.sort(uniq).tolist() + uniq.insert(0,0) + uniq = np.array(uniq) + all_locs = all_locs[uniq, :] + + ax.plot(dt['all_locs'][i, 0, 0], + dt['all_locs'][i, 0, 1], 'b.', markersize=24) + ax.plot(dt['all_goal_locs'][i, 0, 0], + dt['all_goal_locs'][i, 0, 1], 'g*', markersize=19) + ax.plot(all_locs[:,0], all_locs[:,1], 'r', alpha=0.4, linewidth=2) + ax.scatter(all_locs[:,0], all_locs[:,1], + c=5+np.arange(all_locs.shape[0])*1./all_locs.shape[0], + cmap='Reds', s=30, linewidth=0) + ax.imshow(orig_maps, origin='lower', vmin=-1.0, vmax=2.0, aspect='equal') + ax.set_xlim([xy1[0], xy2[0]]) + ax.set_ylim([xy1[1], xy2[1]]) + + file_name = os.path.join(out_dir, 'trajectory_{:04d}.png'.format(i)) + print file_name + with fu.fopen(file_name, 'w') as f: + plt.savefig(f) + plt.close(fig) + + +def main(_): + a = _load_trajectory() + h_dists, gt_dists, orig_maps = _compute_hardness() + hardness = 1.-h_dists*1./ gt_dists + + if FLAGS.top_view: + plot_trajectory(a, hardness, orig_maps, out_dir=FLAGS.out_dir) + + if FLAGS.first_person: + plot_trajectory_first_person(a, orig_maps, out_dir=FLAGS.out_dir) + +if __name__ == '__main__': + app.run() diff --git a/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.py b/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.py new file mode 100644 index 00000000000..58f32d121ac --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.py @@ -0,0 +1,197 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +import glob +import numpy as np +import logging +import cPickle +from datasets import nav_env +from datasets import factory +from src import utils +from src import map_utils as mu + +logging.basicConfig(level=logging.INFO) +DATA_DIR = 'data/stanford_building_parser_dataset_raw/' + +mkdir_if_missing = utils.mkdir_if_missing +save_variables = utils.save_variables + +def _get_semantic_maps(building_name, transform, map_, flip, cats): + rooms = get_room_in_building(building_name) + maps = [] + for cat in cats: + maps.append(np.zeros((map_.size[1], map_.size[0]))) + + for r in rooms: + room = load_room(building_name, r, category_list=cats) + classes = room['class_id'] + for i, cat in enumerate(cats): + c_ind = cats.index(cat) + ind = [_ for _, c in enumerate(classes) if c == c_ind] + if len(ind) > 0: + vs = [room['vertexs'][x]*1 for x in ind] + vs = np.concatenate(vs, axis=0) + if transform: + vs = np.array([vs[:,1], vs[:,0], vs[:,2]]).T + vs[:,0] = -vs[:,0] + vs[:,1] += 4.20 + vs[:,0] += 6.20 + vs = vs*100. + if flip: + vs[:,1] = -vs[:,1] + maps[i] = maps[i] + \ + mu._project_to_map(map_, vs, ignore_points_outside_map=True) + return maps + +def _map_building_name(building_name): + b = int(building_name.split('_')[0][4]) + out_name = 'Area_{:d}'.format(b) + if b == 5: + if int(building_name.split('_')[0][5]) == 1: + transform = True + else: + transform = False + else: + transform = False + return out_name, transform + +def get_categories(): + cats = ['beam', 'board', 'bookcase', 'ceiling', 'chair', 'clutter', 'column', + 'door', 'floor', 'sofa', 'table', 'wall', 'window'] + return cats + +def _write_map_files(b_in, b_out, transform): + cats = get_categories() + + env = utils.Foo(padding=10, resolution=5, num_point_threshold=2, + valid_min=-10, valid_max=200, n_samples_per_face=200) + robot = utils.Foo(radius=15, base=10, height=140, sensor_height=120, + camera_elevation_degree=-15) + + building_loader = factory.get_dataset('sbpd') + for flip in [False, True]: + b = nav_env.Building(b_out, robot, env, flip=flip, + building_loader=building_loader) + logging.info("building_in: %s, building_out: %s, transform: %d", b_in, + b_out, transform) + maps = _get_semantic_maps(b_in, transform, b.map, flip, cats) + maps = np.transpose(np.array(maps), axes=[1,2,0]) + + # Load file from the cache. + file_name = '{:s}_{:d}_{:d}_{:d}_{:d}_{:d}_{:d}.pkl' + file_name = file_name.format(b.building_name, b.map.size[0], b.map.size[1], + b.map.origin[0], b.map.origin[1], + b.map.resolution, flip) + out_file = os.path.join(DATA_DIR, 'processing', 'class-maps', file_name) + logging.info('Writing semantic maps to %s.', out_file) + save_variables(out_file, [maps, cats], ['maps', 'cats'], overwrite=True) + +def _transform_area5b(room_dimension): + for a in room_dimension.keys(): + r = room_dimension[a]*1 + r[[0,1,3,4]] = r[[1,0,4,3]] + r[[0,3]] = -r[[3,0]] + r[[1,4]] += 4.20 + r[[0,3]] += 6.20 + room_dimension[a] = r + return room_dimension + +def collect_room(building_name, room_name): + room_dir = os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2', building_name, + room_name, 'Annotations') + files = glob.glob1(room_dir, '*.txt') + files = sorted(files, key=lambda s: s.lower()) + vertexs = []; colors = []; + for f in files: + file_name = os.path.join(room_dir, f) + logging.info(' %s', file_name) + a = np.loadtxt(file_name) + vertex = a[:,:3]*1. + color = a[:,3:]*1 + color = color.astype(np.uint8) + vertexs.append(vertex) + colors.append(color) + files = [f.split('.')[0] for f in files] + out = {'vertexs': vertexs, 'colors': colors, 'names': files} + return out + +def load_room(building_name, room_name, category_list=None): + room = collect_room(building_name, room_name) + room['building_name'] = building_name + room['room_name'] = room_name + instance_id = range(len(room['names'])) + room['instance_id'] = instance_id + if category_list is not None: + name = [r.split('_')[0] for r in room['names']] + class_id = [] + for n in name: + if n in category_list: + class_id.append(category_list.index(n)) + else: + class_id.append(len(category_list)) + room['class_id'] = class_id + room['category_list'] = category_list + return room + +def get_room_in_building(building_name): + building_dir = os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2', building_name) + rn = os.listdir(building_dir) + rn = [x for x in rn if os.path.isdir(os.path.join(building_dir, x))] + rn = sorted(rn, key=lambda s: s.lower()) + return rn + +def write_room_dimensions(b_in, b_out, transform): + rooms = get_room_in_building(b_in) + room_dimension = {} + for r in rooms: + room = load_room(b_in, r, category_list=None) + vertex = np.concatenate(room['vertexs'], axis=0) + room_dimension[r] = np.concatenate((np.min(vertex, axis=0), np.max(vertex, axis=0)), axis=0) + if transform == 1: + room_dimension = _transform_area5b(room_dimension) + + out_file = os.path.join(DATA_DIR, 'processing', 'room-dimension', b_out+'.pkl') + save_variables(out_file, [room_dimension], ['room_dimension'], overwrite=True) + +def write_room_dimensions_all(I): + mkdir_if_missing(os.path.join(DATA_DIR, 'processing', 'room-dimension')) + bs_in = ['Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_5', 'Area_6'] + bs_out = ['area1', 'area2', 'area3', 'area4', 'area5a', 'area5b', 'area6'] + transforms = [0, 0, 0, 0, 0, 1, 0] + + for i in I: + b_in = bs_in[i] + b_out = bs_out[i] + t = transforms[i] + write_room_dimensions(b_in, b_out, t) + +def write_class_maps_all(I): + mkdir_if_missing(os.path.join(DATA_DIR, 'processing', 'class-maps')) + bs_in = ['Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_5', 'Area_6'] + bs_out = ['area1', 'area2', 'area3', 'area4', 'area5a', 'area5b', 'area6'] + transforms = [0, 0, 0, 0, 0, 1, 0] + + for i in I: + b_in = bs_in[i] + b_out = bs_out[i] + t = transforms[i] + _write_map_files(b_in, b_out, t) + + +if __name__ == '__main__': + write_room_dimensions_all([0, 2, 3, 4, 5, 6]) + write_class_maps_all([0, 2, 3, 4, 5, 6]) + diff --git a/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.sh b/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.sh new file mode 100644 index 00000000000..1384fabe692 --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.sh @@ -0,0 +1,24 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +cd data/stanford_building_parser_dataset_raw +unzip Stanford3dDataset_v1.2.zip +cd ../../ +PYOPENGL_PLATFORM=egl PYTHONPATH='.' python scripts/script_preprocess_annoations_S3DIS.py + +mv data/stanford_building_parser_dataset_raw/processing/room-dimension data/stanford_building_parser_dataset/. +mv data/stanford_building_parser_dataset_raw/processing/class-maps data/stanford_building_parser_dataset/. + +echo "You may now delete data/stanford_building_parser_dataset_raw if needed." diff --git a/cognitive_mapping_and_planning/scripts/script_preprocess_meshes_S3DIS.sh b/cognitive_mapping_and_planning/scripts/script_preprocess_meshes_S3DIS.sh new file mode 100644 index 00000000000..557a4dde611 --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_preprocess_meshes_S3DIS.sh @@ -0,0 +1,37 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +mkdir -p data/stanford_building_parser_dataset +mkdir -p data/stanford_building_parser_dataset/mesh +cd data/stanford_building_parser_dataset_raw + +# Untar the files and extract the meshes. +for t in "1" "3" "4" "5a" "5b" "6"; do + tar -xf area_"$t"_noXYZ.tar area_$t/3d/rgb_textures + mv area_$t/3d/rgb_textures ../stanford_building_parser_dataset/mesh/area$t + rmdir area_$t/3d + rmdir area_$t +done + +cd ../../ + +# Preprocess meshes to remove the group and chunk information. +cd data/stanford_building_parser_dataset/ +for t in "1" "3" "4" "5a" "5b" "6"; do + obj_name=`ls mesh/area$t/*.obj` + cp $obj_name "$obj_name".bck + cat $obj_name.bck | grep -v '^g' | grep -v '^o' > $obj_name +done +cd ../../ diff --git a/cognitive_mapping_and_planning/scripts/script_test_pretrained_models.sh b/cognitive_mapping_and_planning/scripts/script_test_pretrained_models.sh new file mode 100644 index 00000000000..a4299fff534 --- /dev/null +++ b/cognitive_mapping_and_planning/scripts/script_test_pretrained_models.sh @@ -0,0 +1,63 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Test CMP models. +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+bench_test \ + --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r + +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_rgb_r2r+bench_test \ + --logdir output/cmp.lmap_Msc.clip5.sbpd_rgb_r2r + +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_ST+bench_test \ + --logdir output/cmp.lmap_Msc.clip5.sbpd_d_ST + +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_rgb_ST+bench_test \ + --logdir output/cmp.lmap_Msc.clip5.sbpd_rgb_ST + +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80+bench_test \ + --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80 + +# Test LSTM baseline models. +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_r2r+bench_test \ + --logdir output/bl.v2.noclip.sbpd_d_r2r + +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_rgb_r2r+bench_test \ + --logdir output/bl.v2.noclip.sbpd_rgb_r2r + +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_ST+bench_test \ + --logdir output/bl.v2.noclip.sbpd_d_ST + +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_rgb_ST+bench_test \ + --logdir output/bl.v2.noclip.sbpd_rgb_ST + +CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ + python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_r2r_h0_64_80+bench_test \ + --logdir output/bl.v2.noclip.sbpd_d_r2r_h0_64_80 + +# Visualize test trajectories in top view. +# CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ +# python scripts/script_plot_trajectory.py \ +# --first_person --num_steps 40 \ +# --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r \ +# --imset test --alsologtostderr diff --git a/cognitive_mapping_and_planning/src/__init__.py b/cognitive_mapping_and_planning/src/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cognitive_mapping_and_planning/src/depth_utils.py b/cognitive_mapping_and_planning/src/depth_utils.py new file mode 100644 index 00000000000..b1fb2f51e5c --- /dev/null +++ b/cognitive_mapping_and_planning/src/depth_utils.py @@ -0,0 +1,95 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utilities for processing depth images. +""" +import numpy as np +import src.rotation_utils as ru + +def get_camera_matrix(width, height, fov): + """Returns a camera matrix from image size and fov.""" + xc = (width-1.) / 2. + zc = (height-1.) / 2. + f = (width / 2.) / np.tan(np.deg2rad(fov / 2.)) + camera_matrix = utils.Foo(xc=xc, zc=zc, f=f) + return camera_matrix + +def get_point_cloud_from_z(Y, camera_matrix): + """Projects the depth image Y into a 3D point cloud. + Inputs: + Y is ...xHxW + camera_matrix + Outputs: + X is positive going right + Y is positive into the image + Z is positive up in the image + XYZ is ...xHxWx3 + """ + x, z = np.meshgrid(np.arange(Y.shape[-1]), + np.arange(Y.shape[-2]-1, -1, -1)) + for i in range(Y.ndim-2): + x = np.expand_dims(x, axis=0) + z = np.expand_dims(z, axis=0) + X = (x-camera_matrix.xc) * Y / camera_matrix.f + Z = (z-camera_matrix.zc) * Y / camera_matrix.f + XYZ = np.concatenate((X[...,np.newaxis], Y[...,np.newaxis], + Z[...,np.newaxis]), axis=X.ndim) + return XYZ + +def make_geocentric(XYZ, sensor_height, camera_elevation_degree): + """Transforms the point cloud into geocentric coordinate frame. + Input: + XYZ : ...x3 + sensor_height : height of the sensor + camera_elevation_degree : camera elevation to rectify. + Output: + XYZ : ...x3 + """ + R = ru.get_r_matrix([1.,0.,0.], angle=np.deg2rad(camera_elevation_degree)) + XYZ = np.matmul(XYZ.reshape(-1,3), R.T).reshape(XYZ.shape) + XYZ[...,2] = XYZ[...,2] + sensor_height + return XYZ + +def bin_points(XYZ_cms, map_size, z_bins, xy_resolution): + """Bins points into xy-z bins + XYZ_cms is ... x H x W x3 + Outputs is ... x map_size x map_size x (len(z_bins)+1) + """ + sh = XYZ_cms.shape + XYZ_cms = XYZ_cms.reshape([-1, sh[-3], sh[-2], sh[-1]]) + n_z_bins = len(z_bins)+1 + map_center = (map_size-1.)/2. + counts = [] + isvalids = [] + for XYZ_cm in XYZ_cms: + isnotnan = np.logical_not(np.isnan(XYZ_cm[:,:,0])) + X_bin = np.round(XYZ_cm[:,:,0] / xy_resolution + map_center).astype(np.int32) + Y_bin = np.round(XYZ_cm[:,:,1] / xy_resolution + map_center).astype(np.int32) + Z_bin = np.digitize(XYZ_cm[:,:,2], bins=z_bins).astype(np.int32) + + isvalid = np.array([X_bin >= 0, X_bin < map_size, Y_bin >= 0, Y_bin < map_size, + Z_bin >= 0, Z_bin < n_z_bins, isnotnan]) + isvalid = np.all(isvalid, axis=0) + + ind = (Y_bin * map_size + X_bin) * n_z_bins + Z_bin + ind[np.logical_not(isvalid)] = 0 + count = np.bincount(ind.ravel(), isvalid.ravel().astype(np.int32), + minlength=map_size*map_size*n_z_bins) + count = np.reshape(count, [map_size, map_size, n_z_bins]) + counts.append(count) + isvalids.append(isvalid) + counts = np.array(counts).reshape(list(sh[:-3]) + [map_size, map_size, n_z_bins]) + isvalids = np.array(isvalids).reshape(list(sh[:-3]) + [sh[-3], sh[-2], 1]) + return counts, isvalids diff --git a/cognitive_mapping_and_planning/src/file_utils.py b/cognitive_mapping_and_planning/src/file_utils.py new file mode 100644 index 00000000000..5bf0e4a2e0d --- /dev/null +++ b/cognitive_mapping_and_planning/src/file_utils.py @@ -0,0 +1,41 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utilities for manipulating files. +""" +import os +import PIL +from tensorflow.python.platform import gfile +import cv2 + +exists = lambda path: gfile.Exists(path) +fopen = lambda path, mode: gfile.Open(path, mode) +makedirs = lambda path: gfile.MakeDirs(path) +listdir = lambda path: gfile.ListDir(path) +copyfile = lambda a, b, o: gfile.Copy(a,b,o) + +def write_image(image_path, rgb): + ext = os.path.splitext(image_path)[1] + with gfile.GFile(image_path, 'w') as f: + img_str = cv2.imencode(ext, rgb[:,:,::-1])[1].tostring() + f.write(img_str) + +def read_image(image_path, type='rgb'): + with fopen(file_name, 'r') as f: + I = PIL.Image.open(f) + II = np.array(I) + if type == 'rgb': + II = II[:,:,:3] + return II diff --git a/cognitive_mapping_and_planning/src/graph_utils.py b/cognitive_mapping_and_planning/src/graph_utils.py new file mode 100644 index 00000000000..d40eb62ca6e --- /dev/null +++ b/cognitive_mapping_and_planning/src/graph_utils.py @@ -0,0 +1,550 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Various function to manipulate graphs for computing distances. +""" +import skimage.morphology +import numpy as np +import networkx as nx +import itertools +import graph_tool as gt +import graph_tool.topology +import graph_tool.generation +import src.utils as utils + +# Compute shortest path from all nodes to or from all source nodes +def get_distance_node_list(gtG, source_nodes, direction, weights=None): + gtG_ = gt.Graph(gtG) + v = gtG_.add_vertex() + + if weights is not None: + weights = gtG_.edge_properties[weights] + + for s in source_nodes: + e = gtG_.add_edge(s, int(v)) + if weights is not None: + weights[e] = 0. + + if direction == 'to': + dist = gt.topology.shortest_distance( + gt.GraphView(gtG_, reversed=True), source=gtG_.vertex(int(v)), + target=None, weights=weights) + elif direction == 'from': + dist = gt.topology.shortest_distance( + gt.GraphView(gtG_, reversed=False), source=gtG_.vertex(int(v)), + target=None, weights=weights) + dist = np.array(dist.get_array()) + dist = dist[:-1] + if weights is None: + dist = dist-1 + return dist + +# Functions for semantically labelling nodes in the traversal graph. +def generate_lattice(sz_x, sz_y): + """Generates a lattice with sz_x vertices along x and sz_y vertices along y + direction Each of these vertices is step_size distance apart. Origin is at + (0,0). """ + g = gt.generation.lattice([sz_x, sz_y]) + x, y = np.meshgrid(np.arange(sz_x), np.arange(sz_y)) + x = np.reshape(x, [-1,1]); y = np.reshape(y, [-1,1]); + nodes = np.concatenate((x,y), axis=1) + return g, nodes + +def add_diagonal_edges(g, nodes, sz_x, sz_y, edge_len): + offset = [sz_x+1, sz_x-1] + for o in offset: + s = np.arange(nodes.shape[0]-o-1) + t = s + o + ind = np.all(np.abs(nodes[s,:] - nodes[t,:]) == np.array([[1,1]]), axis=1) + s = s[ind][:,np.newaxis] + t = t[ind][:,np.newaxis] + st = np.concatenate((s,t), axis=1) + for i in range(st.shape[0]): + e = g.add_edge(st[i,0], st[i,1], add_missing=False) + g.ep['wts'][e] = edge_len + +def convert_traversible_to_graph(traversible, ff_cost=1., fo_cost=1., + oo_cost=1., connectivity=4): + assert(connectivity == 4 or connectivity == 8) + + sz_x = traversible.shape[1] + sz_y = traversible.shape[0] + g, nodes = generate_lattice(sz_x, sz_y) + + # Assign costs. + edge_wts = g.new_edge_property('float') + g.edge_properties['wts'] = edge_wts + wts = np.ones(g.num_edges(), dtype=np.float32) + edge_wts.get_array()[:] = wts + + if connectivity == 8: + add_diagonal_edges(g, nodes, sz_x, sz_y, np.sqrt(2.)) + + se = np.array([[int(e.source()), int(e.target())] for e in g.edges()]) + s_xy = nodes[se[:,0]] + t_xy = nodes[se[:,1]] + s_t = np.ravel_multi_index((s_xy[:,1], s_xy[:,0]), traversible.shape) + t_t = np.ravel_multi_index((t_xy[:,1], t_xy[:,0]), traversible.shape) + s_t = traversible.ravel()[s_t] + t_t = traversible.ravel()[t_t] + + wts = np.zeros(g.num_edges(), dtype=np.float32) + wts[np.logical_and(s_t == True, t_t == True)] = ff_cost + wts[np.logical_and(s_t == False, t_t == False)] = oo_cost + wts[np.logical_xor(s_t, t_t)] = fo_cost + + edge_wts = g.edge_properties['wts'] + for i, e in enumerate(g.edges()): + edge_wts[e] = edge_wts[e] * wts[i] + # d = edge_wts.get_array()*1. + # edge_wts.get_array()[:] = d*wts + return g, nodes + +def label_nodes_with_class(nodes_xyt, class_maps, pix): + """ + Returns: + class_maps__: one-hot class_map for each class. + node_class_label: one-hot class_map for each class, nodes_xyt.shape[0] x n_classes + """ + # Assign each pixel to a node. + selem = skimage.morphology.disk(pix) + class_maps_ = class_maps*1. + for i in range(class_maps.shape[2]): + class_maps_[:,:,i] = skimage.morphology.dilation(class_maps[:,:,i]*1, selem) + class_maps__ = np.argmax(class_maps_, axis=2) + class_maps__[np.max(class_maps_, axis=2) == 0] = -1 + + # For each node pick out the label from this class map. + x = np.round(nodes_xyt[:,[0]]).astype(np.int32) + y = np.round(nodes_xyt[:,[1]]).astype(np.int32) + ind = np.ravel_multi_index((y,x), class_maps__.shape) + node_class_label = class_maps__.ravel()[ind][:,0] + + # Convert to one hot versions. + class_maps_one_hot = np.zeros(class_maps.shape, dtype=np.bool) + node_class_label_one_hot = np.zeros((node_class_label.shape[0], class_maps.shape[2]), dtype=np.bool) + for i in range(class_maps.shape[2]): + class_maps_one_hot[:,:,i] = class_maps__ == i + node_class_label_one_hot[:,i] = node_class_label == i + return class_maps_one_hot, node_class_label_one_hot + +def label_nodes_with_class_geodesic(nodes_xyt, class_maps, pix, traversible, + ff_cost=1., fo_cost=1., oo_cost=1., + connectivity=4): + """Labels nodes in nodes_xyt with class labels using geodesic distance as + defined by traversible from class_maps. + Inputs: + nodes_xyt + class_maps: counts for each class. + pix: distance threshold to consider close enough to target. + traversible: binary map of whether traversible or not. + Output: + labels: For each node in nodes_xyt returns a label of the class or -1 is + unlabelled. + """ + g, nodes = convert_traversible_to_graph(traversible, ff_cost=ff_cost, + fo_cost=fo_cost, oo_cost=oo_cost, + connectivity=connectivity) + + class_dist = np.zeros_like(class_maps*1.) + n_classes = class_maps.shape[2] + if False: + # Assign each pixel to a class based on number of points. + selem = skimage.morphology.disk(pix) + class_maps_ = class_maps*1. + class_maps__ = np.argmax(class_maps_, axis=2) + class_maps__[np.max(class_maps_, axis=2) == 0] = -1 + + # Label nodes with classes. + for i in range(n_classes): + # class_node_ids = np.where(class_maps__.ravel() == i)[0] + class_node_ids = np.where(class_maps[:,:,i].ravel() > 0)[0] + dist_i = get_distance_node_list(g, class_node_ids, 'to', weights='wts') + class_dist[:,:,i] = np.reshape(dist_i, class_dist[:,:,i].shape) + class_map_geodesic = (class_dist <= pix) + class_map_geodesic = np.reshape(class_map_geodesic, [-1, n_classes]) + + # For each node pick out the label from this class map. + x = np.round(nodes_xyt[:,[0]]).astype(np.int32) + y = np.round(nodes_xyt[:,[1]]).astype(np.int32) + ind = np.ravel_multi_index((y,x), class_dist[:,:,0].shape) + node_class_label = class_map_geodesic[ind[:,0],:] + class_map_geodesic = class_dist <= pix + return class_map_geodesic, node_class_label + +def _get_next_nodes_undirected(n, sc, n_ori): + nodes_to_add = [] + nodes_to_validate = [] + (p, q, r) = n + nodes_to_add.append((n, (p, q, r), 0)) + if n_ori == 4: + for _ in [1, 2, 3, 4]: + if _ == 1: + v = (p - sc, q, r) + elif _ == 2: + v = (p + sc, q, r) + elif _ == 3: + v = (p, q - sc, r) + elif _ == 4: + v = (p, q + sc, r) + nodes_to_validate.append((n, v, _)) + return nodes_to_add, nodes_to_validate + +def _get_next_nodes(n, sc, n_ori): + nodes_to_add = [] + nodes_to_validate = [] + (p, q, r) = n + for r_, a_ in zip([-1, 0, 1], [1, 0, 2]): + nodes_to_add.append((n, (p, q, np.mod(r+r_, n_ori)), a_)) + + if n_ori == 6: + if r == 0: + v = (p + sc, q, r) + elif r == 1: + v = (p + sc, q + sc, r) + elif r == 2: + v = (p, q + sc, r) + elif r == 3: + v = (p - sc, q, r) + elif r == 4: + v = (p - sc, q - sc, r) + elif r == 5: + v = (p, q - sc, r) + elif n_ori == 4: + if r == 0: + v = (p + sc, q, r) + elif r == 1: + v = (p, q + sc, r) + elif r == 2: + v = (p - sc, q, r) + elif r == 3: + v = (p, q - sc, r) + nodes_to_validate.append((n,v,3)) + + return nodes_to_add, nodes_to_validate + +def generate_graph(valid_fn_vec=None, sc=1., n_ori=6, + starting_location=(0, 0, 0), vis=False, directed=True): + timer = utils.Timer() + timer.tic() + if directed: G = nx.DiGraph(directed=True) + else: G = nx.Graph() + G.add_node(starting_location) + new_nodes = G.nodes() + while len(new_nodes) != 0: + nodes_to_add = [] + nodes_to_validate = [] + for n in new_nodes: + if directed: + na, nv = _get_next_nodes(n, sc, n_ori) + else: + na, nv = _get_next_nodes_undirected(n, sc, n_ori) + nodes_to_add = nodes_to_add + na + if valid_fn_vec is not None: + nodes_to_validate = nodes_to_validate + nv + else: + node_to_add = nodes_to_add + nv + + # Validate nodes. + vs = [_[1] for _ in nodes_to_validate] + valids = valid_fn_vec(vs) + + for nva, valid in zip(nodes_to_validate, valids): + if valid: + nodes_to_add.append(nva) + + new_nodes = [] + for n,v,a in nodes_to_add: + if not G.has_node(v): + new_nodes.append(v) + G.add_edge(n, v, action=a) + + timer.toc(average=True, log_at=1, log_str='src.graph_utils.generate_graph') + return (G) + +def vis_G(G, ax, vertex_color='r', edge_color='b', r=None): + if edge_color is not None: + for e in G.edges(): + XYT = zip(*e) + x = XYT[-3] + y = XYT[-2] + t = XYT[-1] + if r is None or t[0] == r: + ax.plot(x, y, edge_color) + if vertex_color is not None: + XYT = zip(*G.nodes()) + x = XYT[-3] + y = XYT[-2] + t = XYT[-1] + ax.plot(x, y, vertex_color + '.') + +def convert_to_graph_tool(G): + timer = utils.Timer() + timer.tic() + gtG = gt.Graph(directed=G.is_directed()) + gtG.ep['action'] = gtG.new_edge_property('int') + + nodes_list = G.nodes() + nodes_array = np.array(nodes_list) + + nodes_id = np.zeros((nodes_array.shape[0],), dtype=np.int64) + + for i in range(nodes_array.shape[0]): + v = gtG.add_vertex() + nodes_id[i] = int(v) + + # d = {key: value for (key, value) in zip(nodes_list, nodes_id)} + d = dict(itertools.izip(nodes_list, nodes_id)) + + for src, dst, data in G.edges_iter(data=True): + e = gtG.add_edge(d[src], d[dst]) + gtG.ep['action'][e] = data['action'] + nodes_to_id = d + timer.toc(average=True, log_at=1, log_str='src.graph_utils.convert_to_graph_tool') + return gtG, nodes_array, nodes_to_id + + +def _rejection_sampling(rng, sampling_d, target_d, bins, hardness, M): + bin_ind = np.digitize(hardness, bins)-1 + i = 0 + ratio = target_d[bin_ind] / (M*sampling_d[bin_ind]) + while i < ratio.size and rng.rand() > ratio[i]: + i = i+1 + return i + +def heuristic_fn_vec(n1, n2, n_ori, step_size): + # n1 is a vector and n2 is a single point. + dx = (n1[:,0] - n2[0,0])/step_size + dy = (n1[:,1] - n2[0,1])/step_size + dt = n1[:,2] - n2[0,2] + dt = np.mod(dt, n_ori) + dt = np.minimum(dt, n_ori-dt) + + if n_ori == 6: + if dx*dy > 0: + d = np.maximum(np.abs(dx), np.abs(dy)) + else: + d = np.abs(dy-dx) + elif n_ori == 4: + d = np.abs(dx) + np.abs(dy) + + return (d + dt).reshape((-1,1)) + +def get_hardness_distribution(gtG, max_dist, min_dist, rng, trials, bins, nodes, + n_ori, step_size): + heuristic_fn = lambda node_ids, node_id: \ + heuristic_fn_vec(nodes[node_ids, :], nodes[[node_id], :], n_ori, step_size) + num_nodes = gtG.num_vertices() + gt_dists = []; h_dists = []; + for i in range(trials): + end_node_id = rng.choice(num_nodes) + gt_dist = gt.topology.shortest_distance(gt.GraphView(gtG, reversed=True), + source=gtG.vertex(end_node_id), + target=None, max_dist=max_dist) + gt_dist = np.array(gt_dist.get_array()) + ind = np.where(np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0] + gt_dist = gt_dist[ind] + h_dist = heuristic_fn(ind, end_node_id)[:,0] + gt_dists.append(gt_dist) + h_dists.append(h_dist) + gt_dists = np.concatenate(gt_dists) + h_dists = np.concatenate(h_dists) + hardness = 1. - h_dists*1./gt_dists + hist, _ = np.histogram(hardness, bins) + hist = hist.astype(np.float64) + hist = hist / np.sum(hist) + return hist + +def rng_next_goal_rejection_sampling(start_node_ids, batch_size, gtG, rng, + max_dist, min_dist, max_dist_to_compute, + sampling_d, target_d, + nodes, n_ori, step_size, bins, M): + sample_start_nodes = start_node_ids is None + dists = []; pred_maps = []; end_node_ids = []; start_node_ids_ = []; + hardnesss = []; gt_dists = []; + num_nodes = gtG.num_vertices() + for i in range(batch_size): + done = False + while not done: + if sample_start_nodes: + start_node_id = rng.choice(num_nodes) + else: + start_node_id = start_node_ids[i] + + gt_dist = gt.topology.shortest_distance( + gt.GraphView(gtG, reversed=False), source=start_node_id, target=None, + max_dist=max_dist) + gt_dist = np.array(gt_dist.get_array()) + ind = np.where(np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0] + ind = rng.permutation(ind) + gt_dist = gt_dist[ind]*1. + h_dist = heuristic_fn_vec(nodes[ind, :], nodes[[start_node_id], :], + n_ori, step_size)[:,0] + hardness = 1. - h_dist / gt_dist + sampled_ind = _rejection_sampling(rng, sampling_d, target_d, bins, + hardness, M) + if sampled_ind < ind.size: + # print sampled_ind + end_node_id = ind[sampled_ind] + hardness = hardness[sampled_ind] + gt_dist = gt_dist[sampled_ind] + done = True + + # Compute distance from end node to all nodes, to return. + dist, pred_map = gt.topology.shortest_distance( + gt.GraphView(gtG, reversed=True), source=end_node_id, target=None, + max_dist=max_dist_to_compute, pred_map=True) + dist = np.array(dist.get_array()) + pred_map = np.array(pred_map.get_array()) + + hardnesss.append(hardness); dists.append(dist); pred_maps.append(pred_map); + start_node_ids_.append(start_node_id); end_node_ids.append(end_node_id); + gt_dists.append(gt_dist); + paths = None + return start_node_ids_, end_node_ids, dists, pred_maps, paths, hardnesss, gt_dists + + +def rng_next_goal(start_node_ids, batch_size, gtG, rng, max_dist, + max_dist_to_compute, node_room_ids, nodes=None, + compute_path=False, dists_from_start_node=None): + # Compute the distance field from the starting location, and then pick a + # destination in another room if possible otherwise anywhere outside this + # room. + dists = []; pred_maps = []; paths = []; end_node_ids = []; + for i in range(batch_size): + room_id = node_room_ids[start_node_ids[i]] + # Compute distances. + if dists_from_start_node == None: + dist, pred_map = gt.topology.shortest_distance( + gt.GraphView(gtG, reversed=False), source=gtG.vertex(start_node_ids[i]), + target=None, max_dist=max_dist_to_compute, pred_map=True) + dist = np.array(dist.get_array()) + else: + dist = dists_from_start_node[i] + + # Randomly sample nodes which are within max_dist. + near_ids = dist <= max_dist + near_ids = near_ids[:, np.newaxis] + # Check to see if there is a non-negative node which is close enough. + non_same_room_ids = node_room_ids != room_id + non_hallway_ids = node_room_ids != -1 + good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids)) + good2_ids = np.logical_and(near_ids, non_hallway_ids) + good3_ids = near_ids + if np.any(good1_ids): + end_node_id = rng.choice(np.where(good1_ids)[0]) + elif np.any(good2_ids): + end_node_id = rng.choice(np.where(good2_ids)[0]) + elif np.any(good3_ids): + end_node_id = rng.choice(np.where(good3_ids)[0]) + else: + logging.error('Did not find any good nodes.') + + # Compute distance to this new goal for doing distance queries. + dist, pred_map = gt.topology.shortest_distance( + gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id), + target=None, max_dist=max_dist_to_compute, pred_map=True) + dist = np.array(dist.get_array()) + pred_map = np.array(pred_map.get_array()) + + dists.append(dist) + pred_maps.append(pred_map) + end_node_ids.append(end_node_id) + + path = None + if compute_path: + path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) + paths.append(path) + + return start_node_ids, end_node_ids, dists, pred_maps, paths + + +def rng_room_to_room(batch_size, gtG, rng, max_dist, max_dist_to_compute, + node_room_ids, nodes=None, compute_path=False): + # Sample one of the rooms, compute the distance field. Pick a destination in + # another room if possible otherwise anywhere outside this room. + dists = []; pred_maps = []; paths = []; start_node_ids = []; end_node_ids = []; + room_ids = np.unique(node_room_ids[node_room_ids[:,0] >= 0, 0]) + for i in range(batch_size): + room_id = rng.choice(room_ids) + end_node_id = rng.choice(np.where(node_room_ids[:,0] == room_id)[0]) + end_node_ids.append(end_node_id) + + # Compute distances. + dist, pred_map = gt.topology.shortest_distance( + gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id), + target=None, max_dist=max_dist_to_compute, pred_map=True) + dist = np.array(dist.get_array()) + pred_map = np.array(pred_map.get_array()) + dists.append(dist) + pred_maps.append(pred_map) + + # Randomly sample nodes which are within max_dist. + near_ids = dist <= max_dist + near_ids = near_ids[:, np.newaxis] + + # Check to see if there is a non-negative node which is close enough. + non_same_room_ids = node_room_ids != room_id + non_hallway_ids = node_room_ids != -1 + good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids)) + good2_ids = np.logical_and(near_ids, non_hallway_ids) + good3_ids = near_ids + if np.any(good1_ids): + start_node_id = rng.choice(np.where(good1_ids)[0]) + elif np.any(good2_ids): + start_node_id = rng.choice(np.where(good2_ids)[0]) + elif np.any(good3_ids): + start_node_id = rng.choice(np.where(good3_ids)[0]) + else: + logging.error('Did not find any good nodes.') + + start_node_ids.append(start_node_id) + + path = None + if compute_path: + path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) + paths.append(path) + + return start_node_ids, end_node_ids, dists, pred_maps, paths + + +def rng_target_dist_field(batch_size, gtG, rng, max_dist, max_dist_to_compute, + nodes=None, compute_path=False): + # Sample a single node, compute distance to all nodes less than max_dist, + # sample nodes which are a particular distance away. + dists = []; pred_maps = []; paths = []; start_node_ids = [] + end_node_ids = rng.choice(gtG.num_vertices(), size=(batch_size,), + replace=False).tolist() + + for i in range(batch_size): + dist, pred_map = gt.topology.shortest_distance( + gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_ids[i]), + target=None, max_dist=max_dist_to_compute, pred_map=True) + dist = np.array(dist.get_array()) + pred_map = np.array(pred_map.get_array()) + dists.append(dist) + pred_maps.append(pred_map) + + # Randomly sample nodes which are withing max_dist + near_ids = np.where(dist <= max_dist)[0] + start_node_id = rng.choice(near_ids, size=(1,), replace=False)[0] + start_node_ids.append(start_node_id) + + path = None + if compute_path: + path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) + paths.append(path) + + return start_node_ids, end_node_ids, dists, pred_maps, paths diff --git a/cognitive_mapping_and_planning/src/map_utils.py b/cognitive_mapping_and_planning/src/map_utils.py new file mode 100644 index 00000000000..1298bff24e7 --- /dev/null +++ b/cognitive_mapping_and_planning/src/map_utils.py @@ -0,0 +1,244 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Various function to compute the ground truth map for training etc. +""" +import copy +import skimage.morphology +import numpy as np +import scipy.ndimage +import matplotlib.pyplot as plt +import PIL + +import src.utils as utils +import cv2 + +def _get_xy_bounding_box(vertex, padding): + """Returns the xy bounding box of the environment.""" + min_ = np.floor(np.min(vertex[:, :2], axis=0) - padding).astype(np.int) + max_ = np.ceil(np.max(vertex[:, :2], axis=0) + padding).astype(np.int) + return min_, max_ + +def _project_to_map(map, vertex, wt=None, ignore_points_outside_map=False): + """Projects points to map, returns how many points are present at each + location.""" + num_points = np.zeros((map.size[1], map.size[0])) + vertex_ = vertex[:, :2] - map.origin + vertex_ = np.round(vertex_ / map.resolution).astype(np.int) + if ignore_points_outside_map: + good_ind = np.all(np.array([vertex_[:,1] >= 0, vertex_[:,1] < map.size[1], + vertex_[:,0] >= 0, vertex_[:,0] < map.size[0]]), + axis=0) + vertex_ = vertex_[good_ind, :] + if wt is not None: + wt = wt[good_ind, :] + if wt is None: + np.add.at(num_points, (vertex_[:, 1], vertex_[:, 0]), 1) + else: + assert(wt.shape[0] == vertex.shape[0]), \ + 'number of weights should be same as vertices.' + np.add.at(num_points, (vertex_[:, 1], vertex_[:, 0]), wt) + return num_points + +def make_map(padding, resolution, vertex=None, sc=1.): + """Returns a map structure.""" + min_, max_ = _get_xy_bounding_box(vertex*sc, padding=padding) + sz = np.ceil((max_ - min_ + 1) / resolution).astype(np.int32) + max_ = min_ + sz * resolution - 1 + map = utils.Foo(origin=min_, size=sz, max=max_, resolution=resolution, + padding=padding) + return map + +def _fill_holes(img, thresh): + """Fills holes less than thresh area (assumes 4 connectivity when computing + hole area.""" + l, n = scipy.ndimage.label(np.logical_not(img)) + img_ = img == True + cnts = np.bincount(l.reshape(-1)) + for i, cnt in enumerate(cnts): + if cnt < thresh: + l[l == i] = -1 + img_[l == -1] = True + return img_ + +def compute_traversibility(map, robot_base, robot_height, robot_radius, + valid_min, valid_max, num_point_threshold, shapess, + sc=100., n_samples_per_face=200): + """Returns a bit map with pixels that are traversible or not as long as the + robot center is inside this volume we are good colisions can be detected by + doing a line search on things, or walking from current location to final + location in the bitmap, or doing bwlabel on the traversibility map.""" + + tt = utils.Timer() + tt.tic() + num_obstcale_points = np.zeros((map.size[1], map.size[0])) + num_points = np.zeros((map.size[1], map.size[0])) + + for i, shapes in enumerate(shapess): + for j in range(shapes.get_number_of_meshes()): + p, face_areas, face_idx = shapes.sample_points_on_face_of_shape( + j, n_samples_per_face, sc) + wt = face_areas[face_idx]/n_samples_per_face + + ind = np.all(np.concatenate( + (p[:, [2]] > robot_base, + p[:, [2]] < robot_base + robot_height), axis=1),axis=1) + num_obstcale_points += _project_to_map(map, p[ind, :], wt[ind]) + + ind = np.all(np.concatenate( + (p[:, [2]] > valid_min, + p[:, [2]] < valid_max), axis=1),axis=1) + num_points += _project_to_map(map, p[ind, :], wt[ind]) + + selem = skimage.morphology.disk(robot_radius / map.resolution) + obstacle_free = skimage.morphology.binary_dilation( + _fill_holes(num_obstcale_points > num_point_threshold, 20), selem) != True + valid_space = _fill_holes(num_points > num_point_threshold, 20) + traversible = np.all(np.concatenate((obstacle_free[...,np.newaxis], + valid_space[...,np.newaxis]), axis=2), + axis=2) + # plt.imshow(np.concatenate((obstacle_free, valid_space, traversible), axis=1)) + # plt.show() + + map_out = copy.deepcopy(map) + map_out.num_obstcale_points = num_obstcale_points + map_out.num_points = num_points + map_out.traversible = traversible + map_out.obstacle_free = obstacle_free + map_out.valid_space = valid_space + tt.toc(log_at=1, log_str='src.map_utils.compute_traversibility: ') + return map_out + + +def resize_maps(map, map_scales, resize_method): + scaled_maps = [] + for i, sc in enumerate(map_scales): + if resize_method == 'antialiasing': + # Resize using open cv so that we can compute the size. + # Use PIL resize to use anti aliasing feature. + map_ = cv2.resize(map*1, None, None, fx=sc, fy=sc, interpolation=cv2.INTER_LINEAR) + w = map_.shape[1]; h = map_.shape[0] + + map_img = PIL.Image.fromarray((map*255).astype(np.uint8)) + map__img = map_img.resize((w,h), PIL.Image.ANTIALIAS) + map_ = np.asarray(map__img).astype(np.float32) + map_ = map_/255. + map_ = np.minimum(map_, 1.0) + map_ = np.maximum(map_, 0.0) + elif resize_method == 'linear_noantialiasing': + map_ = cv2.resize(map*1, None, None, fx=sc, fy=sc, interpolation=cv2.INTER_LINEAR) + else: + logging.error('Unknown resizing method') + scaled_maps.append(map_) + return scaled_maps + + +def pick_largest_cc(traversible): + out = scipy.ndimage.label(traversible)[0] + cnt = np.bincount(out.reshape(-1))[1:] + return out == np.argmax(cnt) + 1 + +def get_graph_origin_loc(rng, traversible): + """Erode the traversibility mask so that we get points in the bulk of the + graph, and not end up with a situation where the graph is localized in the + corner of a cramped room. Output Locs is in the coordinate frame of the + map.""" + + aa = pick_largest_cc(skimage.morphology.binary_erosion(traversible == True, + selem=np.ones((15,15)))) + y, x = np.where(aa > 0) + ind = rng.choice(y.size) + locs = np.array([x[ind], y[ind]]) + locs = locs + rng.rand(*(locs.shape)) - 0.5 + return locs + + +def generate_egocentric_maps(scaled_maps, map_scales, map_crop_sizes, loc, + x_axis, y_axis, theta): + maps = [] + for i, (map_, sc, map_crop_size) in enumerate(zip(scaled_maps, map_scales, map_crop_sizes)): + maps_i = np.array(get_map_to_predict(loc*sc, x_axis, y_axis, map_, + map_crop_size, + interpolation=cv2.INTER_LINEAR)[0]) + maps_i[np.isnan(maps_i)] = 0 + maps.append(maps_i) + return maps + +def generate_goal_images(map_scales, map_crop_sizes, n_ori, goal_dist, + goal_theta, rel_goal_orientation): + goal_dist = goal_dist[:,0] + goal_theta = goal_theta[:,0] + rel_goal_orientation = rel_goal_orientation[:,0] + + goals = []; + # Generate the map images. + for i, (sc, map_crop_size) in enumerate(zip(map_scales, map_crop_sizes)): + goal_i = np.zeros((goal_dist.shape[0], map_crop_size, map_crop_size, n_ori), + dtype=np.float32) + x = goal_dist*np.cos(goal_theta)*sc + (map_crop_size-1.)/2. + y = goal_dist*np.sin(goal_theta)*sc + (map_crop_size-1.)/2. + + for j in range(goal_dist.shape[0]): + gc = rel_goal_orientation[j] + x0 = np.floor(x[j]).astype(np.int32); x1 = x0 + 1; + y0 = np.floor(y[j]).astype(np.int32); y1 = y0 + 1; + if x0 >= 0 and x0 <= map_crop_size-1: + if y0 >= 0 and y0 <= map_crop_size-1: + goal_i[j, y0, x0, gc] = (x1-x[j])*(y1-y[j]) + if y1 >= 0 and y1 <= map_crop_size-1: + goal_i[j, y1, x0, gc] = (x1-x[j])*(y[j]-y0) + + if x1 >= 0 and x1 <= map_crop_size-1: + if y0 >= 0 and y0 <= map_crop_size-1: + goal_i[j, y0, x1, gc] = (x[j]-x0)*(y1-y[j]) + if y1 >= 0 and y1 <= map_crop_size-1: + goal_i[j, y1, x1, gc] = (x[j]-x0)*(y[j]-y0) + + goals.append(goal_i) + return goals + +def get_map_to_predict(src_locs, src_x_axiss, src_y_axiss, map, map_size, + interpolation=cv2.INTER_LINEAR): + fss = [] + valids = [] + + center = (map_size-1.0)/2.0 + dst_theta = np.pi/2.0 + dst_loc = np.array([center, center]) + dst_x_axis = np.array([np.cos(dst_theta), np.sin(dst_theta)]) + dst_y_axis = np.array([np.cos(dst_theta+np.pi/2), np.sin(dst_theta+np.pi/2)]) + + def compute_points(center, x_axis, y_axis): + points = np.zeros((3,2),dtype=np.float32) + points[0,:] = center + points[1,:] = center + x_axis + points[2,:] = center + y_axis + return points + + dst_points = compute_points(dst_loc, dst_x_axis, dst_y_axis) + for i in range(src_locs.shape[0]): + src_loc = src_locs[i,:] + src_x_axis = src_x_axiss[i,:] + src_y_axis = src_y_axiss[i,:] + src_points = compute_points(src_loc, src_x_axis, src_y_axis) + M = cv2.getAffineTransform(src_points, dst_points) + + fs = cv2.warpAffine(map, M, (map_size, map_size), None, flags=interpolation, + borderValue=np.NaN) + valid = np.invert(np.isnan(fs)) + valids.append(valid) + fss.append(fs) + return fss, valids + diff --git a/cognitive_mapping_and_planning/src/rotation_utils.py b/cognitive_mapping_and_planning/src/rotation_utils.py new file mode 100644 index 00000000000..8d6d4f3cbdb --- /dev/null +++ b/cognitive_mapping_and_planning/src/rotation_utils.py @@ -0,0 +1,73 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utilities for generating and applying rotation matrices. +""" +import numpy as np + +ANGLE_EPS = 0.001 + + +def normalize(v): + return v / np.linalg.norm(v) + + +def get_r_matrix(ax_, angle): + ax = normalize(ax_) + if np.abs(angle) > ANGLE_EPS: + S_hat = np.array( + [[0.0, -ax[2], ax[1]], [ax[2], 0.0, -ax[0]], [-ax[1], ax[0], 0.0]], + dtype=np.float32) + R = np.eye(3) + np.sin(angle)*S_hat + \ + (1-np.cos(angle))*(np.linalg.matrix_power(S_hat, 2)) + else: + R = np.eye(3) + return R + + +def r_between(v_from_, v_to_): + v_from = normalize(v_from_) + v_to = normalize(v_to_) + ax = normalize(np.cross(v_from, v_to)) + angle = np.arccos(np.dot(v_from, v_to)) + return get_r_matrix(ax, angle) + + +def rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to): + inputs = [up_from, lookat_from, up_to, lookat_to] + for i in range(4): + inputs[i] = normalize(np.array(inputs[i]).reshape((-1,))) + up_from, lookat_from, up_to, lookat_to = inputs + r1 = r_between(lookat_from, lookat_to) + + new_x = np.dot(r1, np.array([1, 0, 0]).reshape((-1, 1))).reshape((-1)) + to_x = normalize(np.cross(lookat_to, up_to)) + angle = np.arccos(np.dot(new_x, to_x)) + if angle > ANGLE_EPS: + if angle < np.pi - ANGLE_EPS: + ax = normalize(np.cross(new_x, to_x)) + flip = np.dot(lookat_to, ax) + if flip > 0: + r2 = get_r_matrix(lookat_to, angle) + elif flip < 0: + r2 = get_r_matrix(lookat_to, -1. * angle) + else: + # Angle of rotation is too close to 180 degrees, direction of rotation + # does not matter. + r2 = get_r_matrix(lookat_to, angle) + else: + r2 = np.eye(3) + return np.dot(r2, r1) + diff --git a/cognitive_mapping_and_planning/src/utils.py b/cognitive_mapping_and_planning/src/utils.py new file mode 100644 index 00000000000..f58820c1f4c --- /dev/null +++ b/cognitive_mapping_and_planning/src/utils.py @@ -0,0 +1,168 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r"""Generaly Utilities. +""" + +import numpy as np, cPickle, os, time +import src.file_utils as fu +import logging + +class Timer(): + def __init__(self): + self.calls = 0. + self.start_time = 0. + self.time_per_call = 0. + self.total_time = 0. + self.last_log_time = 0. + + def tic(self): + self.start_time = time.time() + + def toc(self, average=True, log_at=-1, log_str='', type='calls'): + if self.start_time == 0: + logging.error('Timer not started by calling tic().') + t = time.time() + diff = time.time() - self.start_time + self.total_time += diff + self.calls += 1. + self.time_per_call = self.total_time/self.calls + + if type == 'calls' and log_at > 0 and np.mod(self.calls, log_at) == 0: + _ = [] + logging.info('%s: %f seconds.', log_str, self.time_per_call) + elif type == 'time' and log_at > 0 and t - self.last_log_time >= log_at: + _ = [] + logging.info('%s: %f seconds.', log_str, self.time_per_call) + self.last_log_time = t + + if average: + return self.time_per_call + else: + return diff + +class Foo(object): + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + def __str__(self): + str_ = '' + for v in vars(self).keys(): + a = getattr(self, v) + if True: #isinstance(v, object): + str__ = str(a) + str__ = str__.replace('\n', '\n ') + else: + str__ = str(a) + str_ += '{:s}: {:s}'.format(v, str__) + str_ += '\n' + return str_ + + +def dict_equal(dict1, dict2): + assert(set(dict1.keys()) == set(dict2.keys())), "Sets of keys between 2 dictionaries are different." + for k in dict1.keys(): + assert(type(dict1[k]) == type(dict2[k])), "Type of key '{:s}' if different.".format(k) + if type(dict1[k]) == np.ndarray: + assert(dict1[k].dtype == dict2[k].dtype), "Numpy Type of key '{:s}' if different.".format(k) + assert(np.allclose(dict1[k], dict2[k])), "Value for key '{:s}' do not match.".format(k) + else: + assert(dict1[k] == dict2[k]), "Value for key '{:s}' do not match.".format(k) + return True + +def subplot(plt, Y_X, sz_y_sz_x = (10, 10)): + Y,X = Y_X + sz_y, sz_x = sz_y_sz_x + plt.rcParams['figure.figsize'] = (X*sz_x, Y*sz_y) + fig, axes = plt.subplots(Y, X) + plt.subplots_adjust(wspace=0.1, hspace=0.1) + return fig, axes + +def tic_toc_print(interval, string): + global tic_toc_print_time_old + if 'tic_toc_print_time_old' not in globals(): + tic_toc_print_time_old = time.time() + print string + else: + new_time = time.time() + if new_time - tic_toc_print_time_old > interval: + tic_toc_print_time_old = new_time; + print string + +def mkdir_if_missing(output_dir): + if not fu.exists(output_dir): + fu.makedirs(output_dir) + +def save_variables(pickle_file_name, var, info, overwrite = False): + if fu.exists(pickle_file_name) and overwrite == False: + raise Exception('{:s} exists and over write is false.'.format(pickle_file_name)) + # Construct the dictionary + assert(type(var) == list); assert(type(info) == list); + d = {} + for i in xrange(len(var)): + d[info[i]] = var[i] + with fu.fopen(pickle_file_name, 'w') as f: + cPickle.dump(d, f, cPickle.HIGHEST_PROTOCOL) + +def load_variables(pickle_file_name): + if fu.exists(pickle_file_name): + with fu.fopen(pickle_file_name, 'r') as f: + d = cPickle.load(f) + return d + else: + raise Exception('{:s} does not exists.'.format(pickle_file_name)) + +def voc_ap(rec, prec): + rec = rec.reshape((-1,1)) + prec = prec.reshape((-1,1)) + z = np.zeros((1,1)) + o = np.ones((1,1)) + mrec = np.vstack((z, rec, o)) + mpre = np.vstack((z, prec, z)) + for i in range(len(mpre)-2, -1, -1): + mpre[i] = max(mpre[i], mpre[i+1]) + + I = np.where(mrec[1:] != mrec[0:-1])[0]+1; + ap = 0; + for i in I: + ap = ap + (mrec[i] - mrec[i-1])*mpre[i]; + return ap + +def tight_imshow_figure(plt, figsize=None): + fig = plt.figure(figsize=figsize) + ax = plt.Axes(fig, [0,0,1,1]) + ax.set_axis_off() + fig.add_axes(ax) + return fig, ax + +def calc_pr(gt, out, wt=None): + if wt is None: + wt = np.ones((gt.size,1)) + + gt = gt.astype(np.float64).reshape((-1,1)) + wt = wt.astype(np.float64).reshape((-1,1)) + out = out.astype(np.float64).reshape((-1,1)) + + gt = gt*wt + tog = np.concatenate([gt, wt, out], axis=1)*1. + ind = np.argsort(tog[:,2], axis=0)[::-1] + tog = tog[ind,:] + cumsumsortgt = np.cumsum(tog[:,0]) + cumsumsortwt = np.cumsum(tog[:,1]) + prec = cumsumsortgt / cumsumsortwt + rec = cumsumsortgt / np.sum(tog[:,0]) + + ap = voc_ap(rec, prec) + return ap, rec, prec + diff --git a/cognitive_mapping_and_planning/tfcode/__init__.py b/cognitive_mapping_and_planning/tfcode/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cognitive_mapping_and_planning/tfcode/cmp.py b/cognitive_mapping_and_planning/tfcode/cmp.py new file mode 100644 index 00000000000..228ef90fddc --- /dev/null +++ b/cognitive_mapping_and_planning/tfcode/cmp.py @@ -0,0 +1,553 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Code for setting up the network for CMP. + +Sets up the mapper and the planner. +""" + +import sys, os, numpy as np +import matplotlib.pyplot as plt +import copy +import argparse, pprint +import time + + +import tensorflow as tf + +from tensorflow.contrib import slim +from tensorflow.contrib.slim import arg_scope + +import logging +from tensorflow.python.platform import app +from tensorflow.python.platform import flags +from src import utils +import src.file_utils as fu +import tfcode.nav_utils as nu +import tfcode.cmp_utils as cu +import tfcode.cmp_summary as cmp_s +from tfcode import tf_utils + +value_iteration_network = cu.value_iteration_network +rotate_preds = cu.rotate_preds +deconv = cu.deconv +get_visual_frustum = cu.get_visual_frustum +fr_v2 = cu.fr_v2 + +setup_train_step_kwargs = nu.default_train_step_kwargs +compute_losses_multi_or = nu.compute_losses_multi_or + +get_repr_from_image = nu.get_repr_from_image + +_save_d_at_t = nu.save_d_at_t +_save_all = nu.save_all +_eval_ap = nu.eval_ap +_eval_dist = nu.eval_dist +_plot_trajectories = nu.plot_trajectories + +_vis_readout_maps = cmp_s._vis_readout_maps +_vis = cmp_s._vis +_summary_vis = cmp_s._summary_vis +_summary_readout_maps = cmp_s._summary_readout_maps +_add_summaries = cmp_s._add_summaries + +def _inputs(problem): + # Set up inputs. + with tf.name_scope('inputs'): + inputs = [] + inputs.append(('orig_maps', tf.float32, + (problem.batch_size, 1, None, None, 1))) + inputs.append(('goal_loc', tf.float32, + (problem.batch_size, problem.num_goals, 2))) + common_input_data, _ = tf_utils.setup_inputs(inputs) + + inputs = [] + if problem.input_type == 'vision': + # Multiple images from an array of cameras. + inputs.append(('imgs', tf.float32, + (problem.batch_size, None, len(problem.aux_delta_thetas)+1, + problem.img_height, problem.img_width, + problem.img_channels))) + elif problem.input_type == 'analytical_counts': + for i in range(len(problem.map_crop_sizes)): + inputs.append(('analytical_counts_{:d}'.format(i), tf.float32, + (problem.batch_size, None, problem.map_crop_sizes[i], + problem.map_crop_sizes[i], problem.map_channels))) + + if problem.outputs.readout_maps: + for i in range(len(problem.readout_maps_crop_sizes)): + inputs.append(('readout_maps_{:d}'.format(i), tf.float32, + (problem.batch_size, None, + problem.readout_maps_crop_sizes[i], + problem.readout_maps_crop_sizes[i], + problem.readout_maps_channels))) + + for i in range(len(problem.map_crop_sizes)): + inputs.append(('ego_goal_imgs_{:d}'.format(i), tf.float32, + (problem.batch_size, None, problem.map_crop_sizes[i], + problem.map_crop_sizes[i], problem.goal_channels))) + for s in ['sum_num', 'sum_denom', 'max_denom']: + inputs.append(('running_'+s+'_{:d}'.format(i), tf.float32, + (problem.batch_size, 1, problem.map_crop_sizes[i], + problem.map_crop_sizes[i], problem.map_channels))) + + inputs.append(('incremental_locs', tf.float32, + (problem.batch_size, None, 2))) + inputs.append(('incremental_thetas', tf.float32, + (problem.batch_size, None, 1))) + inputs.append(('step_number', tf.int32, (1, None, 1))) + inputs.append(('node_ids', tf.int32, (problem.batch_size, None, + problem.node_ids_dim))) + inputs.append(('perturbs', tf.float32, (problem.batch_size, None, + problem.perturbs_dim))) + + # For plotting result plots + inputs.append(('loc_on_map', tf.float32, (problem.batch_size, None, 2))) + inputs.append(('gt_dist_to_goal', tf.float32, (problem.batch_size, None, 1))) + + step_input_data, _ = tf_utils.setup_inputs(inputs) + + inputs = [] + inputs.append(('action', tf.int32, (problem.batch_size, None, problem.num_actions))) + train_data, _ = tf_utils.setup_inputs(inputs) + train_data.update(step_input_data) + train_data.update(common_input_data) + return common_input_data, step_input_data, train_data + +def readout_general(multi_scale_belief, num_neurons, strides, layers_per_block, + kernel_size, batch_norm_is_training_op, wt_decay): + multi_scale_belief = tf.stop_gradient(multi_scale_belief) + with tf.variable_scope('readout_maps_deconv'): + x, outs = deconv(multi_scale_belief, batch_norm_is_training_op, + wt_decay=wt_decay, neurons=num_neurons, strides=strides, + layers_per_block=layers_per_block, kernel_size=kernel_size, + conv_fn=slim.conv2d_transpose, offset=0, + name='readout_maps_deconv') + probs = tf.sigmoid(x) + return x, probs + + +def running_combine(fss_logits, confs_probs, incremental_locs, + incremental_thetas, previous_sum_num, previous_sum_denom, + previous_max_denom, map_size, num_steps): + # fss_logits is B x N x H x W x C + # confs_logits is B x N x H x W x C + # incremental_locs is B x N x 2 + # incremental_thetas is B x N x 1 + # previous_sum_num etc is B x 1 x H x W x C + + with tf.name_scope('combine_{:d}'.format(num_steps)): + running_sum_nums_ = []; running_sum_denoms_ = []; + running_max_denoms_ = []; + + fss_logits_ = tf.unstack(fss_logits, axis=1, num=num_steps) + confs_probs_ = tf.unstack(confs_probs, axis=1, num=num_steps) + incremental_locs_ = tf.unstack(incremental_locs, axis=1, num=num_steps) + incremental_thetas_ = tf.unstack(incremental_thetas, axis=1, num=num_steps) + running_sum_num = tf.unstack(previous_sum_num, axis=1, num=1)[0] + running_sum_denom = tf.unstack(previous_sum_denom, axis=1, num=1)[0] + running_max_denom = tf.unstack(previous_max_denom, axis=1, num=1)[0] + + for i in range(num_steps): + # Rotate the previous running_num and running_denom + running_sum_num, running_sum_denom, running_max_denom = rotate_preds( + incremental_locs_[i], incremental_thetas_[i], map_size, + [running_sum_num, running_sum_denom, running_max_denom], + output_valid_mask=False)[0] + # print i, num_steps, running_sum_num.get_shape().as_list() + running_sum_num = running_sum_num + fss_logits_[i] * confs_probs_[i] + running_sum_denom = running_sum_denom + confs_probs_[i] + running_max_denom = tf.maximum(running_max_denom, confs_probs_[i]) + running_sum_nums_.append(running_sum_num) + running_sum_denoms_.append(running_sum_denom) + running_max_denoms_.append(running_max_denom) + + running_sum_nums = tf.stack(running_sum_nums_, axis=1) + running_sum_denoms = tf.stack(running_sum_denoms_, axis=1) + running_max_denoms = tf.stack(running_max_denoms_, axis=1) + return running_sum_nums, running_sum_denoms, running_max_denoms + +def get_map_from_images(imgs, mapper_arch, task_params, freeze_conv, wt_decay, + is_training, batch_norm_is_training_op, num_maps, + split_maps=True): + # Hit image with a resnet. + n_views = len(task_params.aux_delta_thetas) + 1 + out = utils.Foo() + + images_reshaped = tf.reshape(imgs, + shape=[-1, task_params.img_height, + task_params.img_width, + task_params.img_channels], name='re_image') + + x, out.vars_to_restore = get_repr_from_image( + images_reshaped, task_params.modalities, task_params.data_augment, + mapper_arch.encoder, freeze_conv, wt_decay, is_training) + + # Reshape into nice things so that these can be accumulated over time steps + # for faster backprop. + sh_before = x.get_shape().as_list() + out.encoder_output = tf.reshape(x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:]) + x = tf.reshape(out.encoder_output, shape=[-1] + sh_before[1:]) + + # Add a layer to reduce dimensions for a fc layer. + if mapper_arch.dim_reduce_neurons > 0: + ks = 1; neurons = mapper_arch.dim_reduce_neurons; + init_var = np.sqrt(2.0/(ks**2)/neurons) + batch_norm_param = mapper_arch.batch_norm_param + batch_norm_param['is_training'] = batch_norm_is_training_op + out.conv_feat = slim.conv2d(x, neurons, kernel_size=ks, stride=1, + normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param, + padding='SAME', scope='dim_reduce', + weights_regularizer=slim.l2_regularizer(wt_decay), + weights_initializer=tf.random_normal_initializer(stddev=init_var)) + reshape_conv_feat = slim.flatten(out.conv_feat) + sh = reshape_conv_feat.get_shape().as_list() + out.reshape_conv_feat = tf.reshape(reshape_conv_feat, shape=[-1, sh[1]*n_views]) + + with tf.variable_scope('fc'): + # Fully connected layers to compute the representation in top-view space. + fc_batch_norm_param = {'center': True, 'scale': True, + 'activation_fn':tf.nn.relu, + 'is_training': batch_norm_is_training_op} + f = out.reshape_conv_feat + out_neurons = (mapper_arch.fc_out_size**2)*mapper_arch.fc_out_neurons + neurons = mapper_arch.fc_neurons + [out_neurons] + f, _ = tf_utils.fc_network(f, neurons=neurons, wt_decay=wt_decay, + name='fc', offset=0, + batch_norm_param=fc_batch_norm_param, + is_training=is_training, + dropout_ratio=mapper_arch.fc_dropout) + f = tf.reshape(f, shape=[-1, mapper_arch.fc_out_size, + mapper_arch.fc_out_size, + mapper_arch.fc_out_neurons], name='re_fc') + + # Use pool5 to predict the free space map via deconv layers. + with tf.variable_scope('deconv'): + x, outs = deconv(f, batch_norm_is_training_op, wt_decay=wt_decay, + neurons=mapper_arch.deconv_neurons, + strides=mapper_arch.deconv_strides, + layers_per_block=mapper_arch.deconv_layers_per_block, + kernel_size=mapper_arch.deconv_kernel_size, + conv_fn=slim.conv2d_transpose, offset=0, name='deconv') + + # Reshape x the right way. + sh = x.get_shape().as_list() + x = tf.reshape(x, shape=[task_params.batch_size, -1] + sh[1:]) + out.deconv_output = x + + # Separate out the map and the confidence predictions, pass the confidence + # through a sigmoid. + if split_maps: + with tf.name_scope('split'): + out_all = tf.split(value=x, axis=4, num_or_size_splits=2*num_maps) + out.fss_logits = out_all[:num_maps] + out.confs_logits = out_all[num_maps:] + with tf.name_scope('sigmoid'): + out.confs_probs = [tf.nn.sigmoid(x) for x in out.confs_logits] + return out + +def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode): + assert(args.arch.multi_scale), 'removed support for old single scale code.' + # Set up the model. + tf.set_random_seed(args.solver.seed) + task_params = args.navtask.task_params + + batch_norm_is_training_op = \ + tf.placeholder_with_default(batch_norm_is_training, shape=[], + name='batch_norm_is_training_op') + + # Setup the inputs + m.input_tensors = {} + m.train_ops = {} + m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \ + _inputs(task_params) + + m.init_fn = None + + if task_params.input_type == 'vision': + m.vision_ops = get_map_from_images( + m.input_tensors['step']['imgs'], args.mapper_arch, + task_params, args.solver.freeze_conv, + args.solver.wt_decay, is_training, batch_norm_is_training_op, + num_maps=len(task_params.map_crop_sizes)) + + # Load variables from snapshot if needed. + if args.solver.pretrained_path is not None: + m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path, + m.vision_ops.vars_to_restore) + + # Set up caching of vision features if needed. + if args.solver.freeze_conv: + m.train_ops['step_data_cache'] = [m.vision_ops.encoder_output] + else: + m.train_ops['step_data_cache'] = [] + + # Set up blobs that are needed for the computation in rest of the graph. + m.ego_map_ops = m.vision_ops.fss_logits + m.coverage_ops = m.vision_ops.confs_probs + + # Zero pad these to make them same size as what the planner expects. + for i in range(len(m.ego_map_ops)): + if args.mapper_arch.pad_map_with_zeros_each[i] > 0: + paddings = np.zeros((5,2), dtype=np.int32) + paddings[2:4,:] = args.mapper_arch.pad_map_with_zeros_each[i] + paddings_op = tf.constant(paddings, dtype=tf.int32) + m.ego_map_ops[i] = tf.pad(m.ego_map_ops[i], paddings=paddings_op) + m.coverage_ops[i] = tf.pad(m.coverage_ops[i], paddings=paddings_op) + + elif task_params.input_type == 'analytical_counts': + m.ego_map_ops = []; m.coverage_ops = [] + for i in range(len(task_params.map_crop_sizes)): + ego_map_op = m.input_tensors['step']['analytical_counts_{:d}'.format(i)] + coverage_op = tf.cast(tf.greater_equal( + tf.reduce_max(ego_map_op, reduction_indices=[4], + keep_dims=True), 1), tf.float32) + coverage_op = tf.ones_like(ego_map_op) * coverage_op + m.ego_map_ops.append(ego_map_op) + m.coverage_ops.append(coverage_op) + m.train_ops['step_data_cache'] = [] + + num_steps = task_params.num_steps + num_goals = task_params.num_goals + + map_crop_size_ops = [] + for map_crop_size in task_params.map_crop_sizes: + map_crop_size_ops.append(tf.constant(map_crop_size, dtype=tf.int32, shape=(2,))) + + with tf.name_scope('check_size'): + is_single_step = tf.equal(tf.unstack(tf.shape(m.ego_map_ops[0]), num=5)[1], 1) + + fr_ops = []; value_ops = []; + fr_intermediate_ops = []; value_intermediate_ops = []; + crop_value_ops = []; + resize_crop_value_ops = []; + confs = []; occupancys = []; + + previous_value_op = None + updated_state = []; state_names = []; + + for i in range(len(task_params.map_crop_sizes)): + map_crop_size = task_params.map_crop_sizes[i] + with tf.variable_scope('scale_{:d}'.format(i)): + # Accumulate the map. + fn = lambda ns: running_combine( + m.ego_map_ops[i], + m.coverage_ops[i], + m.input_tensors['step']['incremental_locs'] * task_params.map_scales[i], + m.input_tensors['step']['incremental_thetas'], + m.input_tensors['step']['running_sum_num_{:d}'.format(i)], + m.input_tensors['step']['running_sum_denom_{:d}'.format(i)], + m.input_tensors['step']['running_max_denom_{:d}'.format(i)], + map_crop_size, ns) + + running_sum_num, running_sum_denom, running_max_denom = \ + tf.cond(is_single_step, lambda: fn(1), lambda: fn(num_steps*num_goals)) + updated_state += [running_sum_num, running_sum_denom, running_max_denom] + state_names += ['running_sum_num_{:d}'.format(i), + 'running_sum_denom_{:d}'.format(i), + 'running_max_denom_{:d}'.format(i)] + + # Concat the accumulated map and goal + occupancy = running_sum_num / tf.maximum(running_sum_denom, 0.001) + conf = running_max_denom + # print occupancy.get_shape().as_list() + + # Concat occupancy, how much occupied and goal. + with tf.name_scope('concat'): + sh = [-1, map_crop_size, map_crop_size, task_params.map_channels] + occupancy = tf.reshape(occupancy, shape=sh) + conf = tf.reshape(conf, shape=sh) + + sh = [-1, map_crop_size, map_crop_size, task_params.goal_channels] + goal = tf.reshape(m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)], shape=sh) + to_concat = [occupancy, conf, goal] + + if previous_value_op is not None: + to_concat.append(previous_value_op) + + x = tf.concat(to_concat, 3) + + # Pass the map, previous rewards and the goal through a few convolutional + # layers to get fR. + fr_op, fr_intermediate_op = fr_v2( + x, output_neurons=args.arch.fr_neurons, + inside_neurons=args.arch.fr_inside_neurons, + is_training=batch_norm_is_training_op, name='fr', + wt_decay=args.solver.wt_decay, stride=args.arch.fr_stride) + + # Do Value Iteration on the fR + if args.arch.vin_num_iters > 0: + value_op, value_intermediate_op = value_iteration_network( + fr_op, num_iters=args.arch.vin_num_iters, + val_neurons=args.arch.vin_val_neurons, + action_neurons=args.arch.vin_action_neurons, + kernel_size=args.arch.vin_ks, share_wts=args.arch.vin_share_wts, + name='vin', wt_decay=args.solver.wt_decay) + else: + value_op = fr_op + value_intermediate_op = [] + + # Crop out and upsample the previous value map. + remove = args.arch.crop_remove_each + if remove > 0: + crop_value_op = value_op[:, remove:-remove, remove:-remove,:] + else: + crop_value_op = value_op + crop_value_op = tf.reshape(crop_value_op, shape=[-1, args.arch.value_crop_size, + args.arch.value_crop_size, + args.arch.vin_val_neurons]) + if i < len(task_params.map_crop_sizes)-1: + # Reshape it to shape of the next scale. + previous_value_op = tf.image.resize_bilinear(crop_value_op, + map_crop_size_ops[i+1], + align_corners=True) + resize_crop_value_ops.append(previous_value_op) + + occupancys.append(occupancy) + confs.append(conf) + value_ops.append(value_op) + crop_value_ops.append(crop_value_op) + fr_ops.append(fr_op) + fr_intermediate_ops.append(fr_intermediate_op) + + m.value_ops = value_ops + m.value_intermediate_ops = value_intermediate_ops + m.fr_ops = fr_ops + m.fr_intermediate_ops = fr_intermediate_ops + m.final_value_op = crop_value_op + m.crop_value_ops = crop_value_ops + m.resize_crop_value_ops = resize_crop_value_ops + m.confs = confs + m.occupancys = occupancys + + sh = [-1, args.arch.vin_val_neurons*((args.arch.value_crop_size)**2)] + m.value_features_op = tf.reshape(m.final_value_op, sh, name='reshape_value_op') + + # Determine what action to take. + with tf.variable_scope('action_pred'): + batch_norm_param = args.arch.pred_batch_norm_param + if batch_norm_param is not None: + batch_norm_param['is_training'] = batch_norm_is_training_op + m.action_logits_op, _ = tf_utils.fc_network( + m.value_features_op, neurons=args.arch.pred_neurons, + wt_decay=args.solver.wt_decay, name='pred', offset=0, + num_pred=task_params.num_actions, + batch_norm_param=batch_norm_param) + m.action_prob_op = tf.nn.softmax(m.action_logits_op) + + init_state = tf.constant(0., dtype=tf.float32, shape=[ + task_params.batch_size, 1, map_crop_size, map_crop_size, + task_params.map_channels]) + + m.train_ops['state_names'] = state_names + m.train_ops['updated_state'] = updated_state + m.train_ops['init_state'] = [init_state for _ in updated_state] + + m.train_ops['step'] = m.action_prob_op + m.train_ops['common'] = [m.input_tensors['common']['orig_maps'], + m.input_tensors['common']['goal_loc']] + m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op + m.loss_ops = []; m.loss_ops_names = []; + + if args.arch.readout_maps: + with tf.name_scope('readout_maps'): + all_occupancys = tf.concat(m.occupancys + m.confs, 3) + readout_maps, probs = readout_general( + all_occupancys, num_neurons=args.arch.rom_arch.num_neurons, + strides=args.arch.rom_arch.strides, + layers_per_block=args.arch.rom_arch.layers_per_block, + kernel_size=args.arch.rom_arch.kernel_size, + batch_norm_is_training_op=batch_norm_is_training_op, + wt_decay=args.solver.wt_decay) + + gt_ego_maps = [m.input_tensors['step']['readout_maps_{:d}'.format(i)] + for i in range(len(task_params.readout_maps_crop_sizes))] + m.readout_maps_gt = tf.concat(gt_ego_maps, 4) + gt_shape = tf.shape(m.readout_maps_gt) + m.readout_maps_logits = tf.reshape(readout_maps, gt_shape) + m.readout_maps_probs = tf.reshape(probs, gt_shape) + + # Add a loss op + m.readout_maps_loss_op = tf.losses.sigmoid_cross_entropy( + tf.reshape(m.readout_maps_gt, [-1, len(task_params.readout_maps_crop_sizes)]), + tf.reshape(readout_maps, [-1, len(task_params.readout_maps_crop_sizes)]), + scope='loss') + m.readout_maps_loss_op = 10.*m.readout_maps_loss_op + + ewma_decay = 0.99 if is_training else 0.0 + weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32, + name='weight') + m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \ + compute_losses_multi_or(m.action_logits_op, + m.input_tensors['train']['action'], weights=weight, + num_actions=task_params.num_actions, + data_loss_wt=args.solver.data_loss_wt, + reg_loss_wt=args.solver.reg_loss_wt, + ewma_decay=ewma_decay) + + if args.arch.readout_maps: + m.total_loss_op = m.total_loss_op + m.readout_maps_loss_op + m.loss_ops += [m.readout_maps_loss_op] + m.loss_ops_names += ['readout_maps_loss'] + + m.loss_ops += [m.reg_loss_op, m.data_loss_op, m.total_loss_op] + m.loss_ops_names += ['reg_loss', 'data_loss', 'total_loss'] + + if args.solver.freeze_conv: + vars_to_optimize = list(set(tf.trainable_variables()) - + set(m.vision_ops.vars_to_restore)) + else: + vars_to_optimize = None + + m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \ + m.sync_optimizer = tf_utils.setup_training( + m.total_loss_op, + args.solver.initial_learning_rate, + args.solver.steps_per_decay, + args.solver.learning_rate_decay, + args.solver.momentum, + args.solver.max_steps, + args.solver.sync, + args.solver.adjust_lr_sync, + args.solver.num_workers, + args.solver.task, + vars_to_optimize=vars_to_optimize, + clip_gradient_norm=args.solver.clip_gradient_norm, + typ=args.solver.typ, momentum2=args.solver.momentum2, + adam_eps=args.solver.adam_eps) + + if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay': + m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k, + m.global_step_op) + elif args.arch.sample_gt_prob_type == 'zero': + m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32) + + elif args.arch.sample_gt_prob_type.split('_')[0] == 'step': + step = int(args.arch.sample_gt_prob_type.split('_')[1]) + m.sample_gt_prob_op = tf_utils.step_gt_prob( + step, m.input_tensors['step']['step_number'][0,0,0]) + + m.sample_action_type = args.arch.action_sample_type + m.sample_action_combine_type = args.arch.action_sample_combine_type + + m.summary_ops = { + summary_mode: _add_summaries(m, args, summary_mode, + args.summary.arop_full_summary_iters)} + + m.init_op = tf.group(tf.global_variables_initializer(), + tf.local_variables_initializer()) + m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4, + write_version=tf.train.SaverDef.V2) + return m diff --git a/cognitive_mapping_and_planning/tfcode/cmp_summary.py b/cognitive_mapping_and_planning/tfcode/cmp_summary.py new file mode 100644 index 00000000000..55313bfbd52 --- /dev/null +++ b/cognitive_mapping_and_planning/tfcode/cmp_summary.py @@ -0,0 +1,213 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Code for setting up summaries for CMP. +""" + +import sys, os, numpy as np +import matplotlib.pyplot as plt + + +import tensorflow as tf + +from tensorflow.contrib import slim +from tensorflow.contrib.slim import arg_scope + +import logging +from tensorflow.python.platform import app +from tensorflow.python.platform import flags +from src import utils +import src.file_utils as fu +import tfcode.nav_utils as nu + +def _vis_readout_maps(outputs, global_step, output_dir, metric_summary, N): + # outputs is [gt_map, pred_map]: + if N >= 0: + outputs = outputs[:N] + N = len(outputs) + + plt.set_cmap('jet') + fig, axes = utils.subplot(plt, (N, outputs[0][0].shape[4]*2), (5,5)) + axes = axes.ravel()[::-1].tolist() + for i in range(N): + gt_map, pred_map = outputs[i] + for j in [0]: + for k in range(gt_map.shape[4]): + # Display something like the midpoint of the trajectory. + id = np.int(gt_map.shape[1]/2) + + ax = axes.pop(); + ax.imshow(gt_map[j,id,:,:,k], origin='lower', interpolation='none', + vmin=0., vmax=1.) + ax.set_axis_off(); + if i == 0: ax.set_title('gt_map') + + ax = axes.pop(); + ax.imshow(pred_map[j,id,:,:,k], origin='lower', interpolation='none', + vmin=0., vmax=1.) + ax.set_axis_off(); + if i == 0: ax.set_title('pred_map') + + file_name = os.path.join(output_dir, 'readout_map_{:d}.png'.format(global_step)) + with fu.fopen(file_name, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + plt.close(fig) + +def _vis(outputs, global_step, output_dir, metric_summary, N): + # Plot the value map, goal for various maps to see what if the model is + # learning anything useful. + # + # outputs is [values, goals, maps, occupancy, conf]. + # + if N >= 0: + outputs = outputs[:N] + N = len(outputs) + + plt.set_cmap('jet') + fig, axes = utils.subplot(plt, (N, outputs[0][0].shape[4]*5), (5,5)) + axes = axes.ravel()[::-1].tolist() + for i in range(N): + values, goals, maps, occupancy, conf = outputs[i] + for j in [0]: + for k in range(values.shape[4]): + # Display something like the midpoint of the trajectory. + id = np.int(values.shape[1]/2) + + ax = axes.pop(); + ax.imshow(goals[j,id,:,:,k], origin='lower', interpolation='none') + ax.set_axis_off(); + if i == 0: ax.set_title('goal') + + ax = axes.pop(); + ax.imshow(occupancy[j,id,:,:,k], origin='lower', interpolation='none') + ax.set_axis_off(); + if i == 0: ax.set_title('occupancy') + + ax = axes.pop(); + ax.imshow(conf[j,id,:,:,k], origin='lower', interpolation='none', + vmin=0., vmax=1.) + ax.set_axis_off(); + if i == 0: ax.set_title('conf') + + ax = axes.pop(); + ax.imshow(values[j,id,:,:,k], origin='lower', interpolation='none') + ax.set_axis_off(); + if i == 0: ax.set_title('value') + + ax = axes.pop(); + ax.imshow(maps[j,id,:,:,k], origin='lower', interpolation='none') + ax.set_axis_off(); + if i == 0: ax.set_title('incr map') + + file_name = os.path.join(output_dir, 'value_vis_{:d}.png'.format(global_step)) + with fu.fopen(file_name, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + plt.close(fig) + +def _summary_vis(m, batch_size, num_steps, arop_full_summary_iters): + arop = []; arop_summary_iters = []; arop_eval_fns = []; + vis_value_ops = []; vis_goal_ops = []; vis_map_ops = []; + vis_occupancy_ops = []; vis_conf_ops = []; + for i, val_op in enumerate(m.value_ops): + vis_value_op = tf.reduce_mean(tf.abs(val_op), axis=3, keep_dims=True) + vis_value_ops.append(vis_value_op) + + vis_occupancy_op = tf.reduce_mean(tf.abs(m.occupancys[i]), 3, True) + vis_occupancy_ops.append(vis_occupancy_op) + + vis_conf_op = tf.reduce_max(tf.abs(m.confs[i]), axis=3, keep_dims=True) + vis_conf_ops.append(vis_conf_op) + + ego_goal_imgs_i_op = m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)] + vis_goal_op = tf.reduce_max(ego_goal_imgs_i_op, 4, True) + vis_goal_ops.append(vis_goal_op) + + vis_map_op = tf.reduce_mean(tf.abs(m.ego_map_ops[i]), 4, True) + vis_map_ops.append(vis_map_op) + + vis_goal_ops = tf.concat(vis_goal_ops, 4) + vis_map_ops = tf.concat(vis_map_ops, 4) + vis_value_ops = tf.concat(vis_value_ops, 3) + vis_occupancy_ops = tf.concat(vis_occupancy_ops, 3) + vis_conf_ops = tf.concat(vis_conf_ops, 3) + + sh = tf.unstack(tf.shape(vis_value_ops))[1:] + vis_value_ops = tf.reshape(vis_value_ops, shape=[batch_size, -1] + sh) + + sh = tf.unstack(tf.shape(vis_conf_ops))[1:] + vis_conf_ops = tf.reshape(vis_conf_ops, shape=[batch_size, -1] + sh) + + sh = tf.unstack(tf.shape(vis_occupancy_ops))[1:] + vis_occupancy_ops = tf.reshape(vis_occupancy_ops, shape=[batch_size,-1] + sh) + + # Save memory, only return time steps that need to be visualized, factor of + # 32 CPU memory saving. + id = np.int(num_steps/2) + vis_goal_ops = tf.expand_dims(vis_goal_ops[:,id,:,:,:], axis=1) + vis_map_ops = tf.expand_dims(vis_map_ops[:,id,:,:,:], axis=1) + vis_value_ops = tf.expand_dims(vis_value_ops[:,id,:,:,:], axis=1) + vis_conf_ops = tf.expand_dims(vis_conf_ops[:,id,:,:,:], axis=1) + vis_occupancy_ops = tf.expand_dims(vis_occupancy_ops[:,id,:,:,:], axis=1) + + arop += [[vis_value_ops, vis_goal_ops, vis_map_ops, vis_occupancy_ops, + vis_conf_ops]] + arop_summary_iters += [arop_full_summary_iters] + arop_eval_fns += [_vis] + return arop, arop_summary_iters, arop_eval_fns + +def _summary_readout_maps(m, num_steps, arop_full_summary_iters): + arop = []; arop_summary_iters = []; arop_eval_fns = []; + id = np.int(num_steps-1) + vis_readout_maps_gt = m.readout_maps_gt + vis_readout_maps_prob = tf.reshape(m.readout_maps_probs, + shape=tf.shape(vis_readout_maps_gt)) + vis_readout_maps_gt = tf.expand_dims(vis_readout_maps_gt[:,id,:,:,:], 1) + vis_readout_maps_prob = tf.expand_dims(vis_readout_maps_prob[:,id,:,:,:], 1) + arop += [[vis_readout_maps_gt, vis_readout_maps_prob]] + arop_summary_iters += [arop_full_summary_iters] + arop_eval_fns += [_vis_readout_maps] + return arop, arop_summary_iters, arop_eval_fns + +def _add_summaries(m, args, summary_mode, arop_full_summary_iters): + task_params = args.navtask.task_params + + summarize_ops = [m.lr_op, m.global_step_op, m.sample_gt_prob_op] + \ + m.loss_ops + m.acc_ops + summarize_names = ['lr', 'global_step', 'sample_gt_prob_op'] + \ + m.loss_ops_names + ['acc_{:d}'.format(i) for i in range(len(m.acc_ops))] + to_aggregate = [0, 0, 0] + [1]*len(m.loss_ops_names) + [1]*len(m.acc_ops) + + scope_name = 'summary' + with tf.name_scope(scope_name): + s_ops = nu.add_default_summaries(summary_mode, arop_full_summary_iters, + summarize_ops, summarize_names, + to_aggregate, m.action_prob_op, + m.input_tensors, scope_name=scope_name) + if summary_mode == 'val': + arop, arop_summary_iters, arop_eval_fns = _summary_vis( + m, task_params.batch_size, task_params.num_steps, + arop_full_summary_iters) + s_ops.additional_return_ops += arop + s_ops.arop_summary_iters += arop_summary_iters + s_ops.arop_eval_fns += arop_eval_fns + + if args.arch.readout_maps: + arop, arop_summary_iters, arop_eval_fns = _summary_readout_maps( + m, task_params.num_steps, arop_full_summary_iters) + s_ops.additional_return_ops += arop + s_ops.arop_summary_iters += arop_summary_iters + s_ops.arop_eval_fns += arop_eval_fns + + return s_ops diff --git a/cognitive_mapping_and_planning/tfcode/cmp_utils.py b/cognitive_mapping_and_planning/tfcode/cmp_utils.py new file mode 100644 index 00000000000..6d87c697b4b --- /dev/null +++ b/cognitive_mapping_and_planning/tfcode/cmp_utils.py @@ -0,0 +1,164 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utility functions for setting up the CMP graph. +""" + +import os, numpy as np +import matplotlib.pyplot as plt + + +import tensorflow as tf + +from tensorflow.contrib import slim +from tensorflow.contrib.slim import arg_scope +import logging +from src import utils +import src.file_utils as fu +from tfcode import tf_utils + +resnet_v2 = tf_utils.resnet_v2 +custom_residual_block = tf_utils.custom_residual_block + +def value_iteration_network( + fr, num_iters, val_neurons, action_neurons, kernel_size, share_wts=False, + name='vin', wt_decay=0.0001, activation_fn=None, shape_aware=False): + """ + Constructs a Value Iteration Network, convolutions and max pooling across + channels. + Input: + fr: NxWxHxC + val_neurons: Number of channels for maintaining the value. + action_neurons: Computes action_neurons * val_neurons at each iteration to + max pool over. + Output: + value image: NxHxWx(val_neurons) + """ + init_var = np.sqrt(2.0/(kernel_size**2)/(val_neurons*action_neurons)) + vals = [] + with tf.variable_scope(name) as varscope: + if shape_aware == False: + fr_shape = tf.unstack(tf.shape(fr)) + val_shape = tf.stack(fr_shape[:-1] + [val_neurons]) + val = tf.zeros(val_shape, name='val_init') + else: + val = tf.expand_dims(tf.zeros_like(fr[:,:,:,0]), dim=-1) * \ + tf.constant(0., dtype=tf.float32, shape=[1,1,1,val_neurons]) + val_shape = tf.shape(val) + vals.append(val) + for i in range(num_iters): + if share_wts: + # The first Value Iteration maybe special, so it can have its own + # paramterss. + scope = 'conv' + if i == 0: scope = 'conv_0' + if i > 1: varscope.reuse_variables() + else: + scope = 'conv_{:d}'.format(i) + val = slim.conv2d(tf.concat([val, fr], 3, name='concat_{:d}'.format(i)), + num_outputs=action_neurons*val_neurons, + kernel_size=kernel_size, stride=1, activation_fn=activation_fn, + scope=scope, normalizer_fn=None, + weights_regularizer=slim.l2_regularizer(wt_decay), + weights_initializer=tf.random_normal_initializer(stddev=init_var), + biases_initializer=tf.zeros_initializer()) + val = tf.reshape(val, [-1, action_neurons*val_neurons, 1, 1], + name='re_{:d}'.format(i)) + val = slim.max_pool2d(val, kernel_size=[action_neurons,1], + stride=[action_neurons,1], padding='VALID', + scope='val_{:d}'.format(i)) + val = tf.reshape(val, val_shape, name='unre_{:d}'.format(i)) + vals.append(val) + return val, vals + + +def rotate_preds(loc_on_map, relative_theta, map_size, preds, + output_valid_mask): + with tf.name_scope('rotate'): + flow_op = tf_utils.get_flow(loc_on_map, relative_theta, map_size=map_size) + if type(preds) != list: + rotated_preds, valid_mask_warps = tf_utils.dense_resample(preds, flow_op, + output_valid_mask) + else: + rotated_preds = [] ;valid_mask_warps = [] + for pred in preds: + rotated_pred, valid_mask_warp = tf_utils.dense_resample(pred, flow_op, + output_valid_mask) + rotated_preds.append(rotated_pred) + valid_mask_warps.append(valid_mask_warp) + return rotated_preds, valid_mask_warps + +def get_visual_frustum(map_size, shape_like, expand_dims=[0,0]): + with tf.name_scope('visual_frustum'): + l = np.tril(np.ones(map_size)) ;l = l + l[:,::-1] + l = (l == 2).astype(np.float32) + for e in expand_dims: + l = np.expand_dims(l, axis=e) + confs_probs = tf.constant(l, dtype=tf.float32) + confs_probs = tf.ones_like(shape_like, dtype=tf.float32) * confs_probs + return confs_probs + +def deconv(x, is_training, wt_decay, neurons, strides, layers_per_block, + kernel_size, conv_fn, name, offset=0): + """Generates a up sampling network with residual connections. + """ + batch_norm_param = {'center': True, 'scale': True, + 'activation_fn': tf.nn.relu, + 'is_training': is_training} + outs = [] + for i, (neuron, stride) in enumerate(zip(neurons, strides)): + for s in range(layers_per_block): + scope = '{:s}_{:d}_{:d}'.format(name, i+1+offset,s+1) + x = custom_residual_block(x, neuron, kernel_size, stride, scope, + is_training, wt_decay, use_residual=True, + residual_stride_conv=True, conv_fn=conv_fn, + batch_norm_param=batch_norm_param) + stride = 1 + outs.append((x,True)) + return x, outs + +def fr_v2(x, output_neurons, inside_neurons, is_training, name='fr', + wt_decay=0.0001, stride=1, updates_collections=tf.GraphKeys.UPDATE_OPS): + """Performs fusion of information between the map and the reward map. + Inputs + x: NxHxWxC1 + + Outputs + fr map: NxHxWx(output_neurons) + """ + if type(stride) != list: + stride = [stride] + with slim.arg_scope(resnet_v2.resnet_utils.resnet_arg_scope( + is_training=is_training, weight_decay=wt_decay)): + with slim.arg_scope([slim.batch_norm], updates_collections=updates_collections) as arg_sc: + # Change the updates_collections for the conv normalizer_params to None + for i in range(len(arg_sc.keys())): + if 'convolution' in arg_sc.keys()[i]: + arg_sc.values()[i]['normalizer_params']['updates_collections'] = updates_collections + with slim.arg_scope(arg_sc): + bottleneck = resnet_v2.bottleneck + blocks = [] + for i, s in enumerate(stride): + b = resnet_v2.resnet_utils.Block( + 'block{:d}'.format(i + 1), bottleneck, [{ + 'depth': output_neurons, + 'depth_bottleneck': inside_neurons, + 'stride': stride[i] + }]) + blocks.append(b) + x, outs = resnet_v2.resnet_v2(x, blocks, num_classes=None, global_pool=False, + output_stride=None, include_root_block=False, + reuse=False, scope=name) + return x, outs diff --git a/cognitive_mapping_and_planning/tfcode/nav_utils.py b/cognitive_mapping_and_planning/tfcode/nav_utils.py new file mode 100644 index 00000000000..2f764f33df9 --- /dev/null +++ b/cognitive_mapping_and_planning/tfcode/nav_utils.py @@ -0,0 +1,435 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Various losses for training navigation agents. + +Defines various loss functions for navigation agents, +compute_losses_multi_or. +""" + +import os, numpy as np +import matplotlib.pyplot as plt + + +import tensorflow as tf + +from tensorflow.contrib import slim +from tensorflow.contrib.slim import arg_scope +from tensorflow.contrib.slim.nets import resnet_v2 +from tensorflow.python.training import moving_averages +import logging +from src import utils +import src.file_utils as fu +from tfcode import tf_utils + + +def compute_losses_multi_or(logits, actions_one_hot, weights=None, + num_actions=-1, data_loss_wt=1., reg_loss_wt=1., + ewma_decay=0.99, reg_loss_op=None): + assert(num_actions > 0), 'num_actions must be specified and must be > 0.' + + with tf.name_scope('loss'): + if weights is None: + weight = tf.ones_like(actions_one_hot, dtype=tf.float32, name='weight') + + actions_one_hot = tf.cast(tf.reshape(actions_one_hot, [-1, num_actions], + 're_actions_one_hot'), tf.float32) + weights = tf.reduce_sum(tf.reshape(weights, [-1, num_actions], 're_weight'), + reduction_indices=1) + total = tf.reduce_sum(weights) + + action_prob = tf.nn.softmax(logits) + action_prob = tf.reduce_sum(tf.multiply(action_prob, actions_one_hot), + reduction_indices=1) + example_loss = -tf.log(tf.maximum(tf.constant(1e-4), action_prob)) + + data_loss_op = tf.reduce_sum(example_loss * weights) / total + if reg_loss_op is None: + if reg_loss_wt > 0: + reg_loss_op = tf.add_n(tf.losses.get_regularization_losses()) + else: + reg_loss_op = tf.constant(0.) + + if reg_loss_wt > 0: + total_loss_op = data_loss_wt*data_loss_op + reg_loss_wt*reg_loss_op + else: + total_loss_op = data_loss_wt*data_loss_op + + is_correct = tf.cast(tf.greater(action_prob, 0.5, name='pred_class'), tf.float32) + acc_op = tf.reduce_sum(is_correct*weights) / total + + ewma_acc_op = moving_averages.weighted_moving_average( + acc_op, ewma_decay, weight=total, name='ewma_acc') + + acc_ops = [ewma_acc_op] + + return reg_loss_op, data_loss_op, total_loss_op, acc_ops + + +def get_repr_from_image(images_reshaped, modalities, data_augment, encoder, + freeze_conv, wt_decay, is_training): + # Pass image through lots of convolutional layers, to obtain pool5 + if modalities == ['rgb']: + with tf.name_scope('pre_rgb'): + x = (images_reshaped + 128.) / 255. # Convert to brightness between 0 and 1. + if data_augment.relight and is_training: + x = tf_utils.distort_image(x, fast_mode=data_augment.relight_fast) + x = (x-0.5)*2.0 + scope_name = encoder + elif modalities == ['depth']: + with tf.name_scope('pre_d'): + d_image = images_reshaped + x = 2*(d_image[...,0] - 80.0)/100.0 + y = d_image[...,1] + d_image = tf.concat([tf.expand_dims(x, -1), tf.expand_dims(y, -1)], 3) + x = d_image + scope_name = 'd_'+encoder + + resnet_is_training = is_training and (not freeze_conv) + with slim.arg_scope(resnet_v2.resnet_utils.resnet_arg_scope(resnet_is_training)): + fn = getattr(tf_utils, encoder) + x, end_points = fn(x, num_classes=None, global_pool=False, + output_stride=None, reuse=None, + scope=scope_name) + vars_ = slim.get_variables_to_restore() + + conv_feat = x + return conv_feat, vars_ + +def default_train_step_kwargs(m, obj, logdir, rng_seed, is_chief, num_steps, + iters, train_display_interval, + dagger_sample_bn_false): + train_step_kwargs = {} + train_step_kwargs['obj'] = obj + train_step_kwargs['m'] = m + + # rng_data has 2 independent rngs, one for sampling episodes and one for + # sampling perturbs (so that we can make results reproducible. + train_step_kwargs['rng_data'] = [np.random.RandomState(rng_seed), + np.random.RandomState(rng_seed)] + train_step_kwargs['rng_action'] = np.random.RandomState(rng_seed) + if is_chief: + train_step_kwargs['writer'] = tf.summary.FileWriter(logdir) #, m.tf_graph) + else: + train_step_kwargs['writer'] = None + train_step_kwargs['iters'] = iters + train_step_kwargs['train_display_interval'] = train_display_interval + train_step_kwargs['num_steps'] = num_steps + train_step_kwargs['logdir'] = logdir + train_step_kwargs['dagger_sample_bn_false'] = dagger_sample_bn_false + return train_step_kwargs + +# Utilities for visualizing and analysing validation output. +def save_d_at_t(outputs, global_step, output_dir, metric_summary, N): + """Save distance to goal at all time steps. + + Args: + outputs : [gt_dist_to_goal]. + global_step : number of iterations. + output_dir : output directory. + metric_summary : to append scalars to summary. + N : number of outputs to process. + + """ + d_at_t = np.concatenate(map(lambda x: x[0][:,:,0]*1, outputs), axis=0) + fig, axes = utils.subplot(plt, (1,1), (5,5)) + axes.plot(np.arange(d_at_t.shape[1]), np.mean(d_at_t, axis=0), 'r.') + axes.set_xlabel('time step') + axes.set_ylabel('dist to next goal') + axes.grid('on') + file_name = os.path.join(output_dir, 'dist_at_t_{:d}.png'.format(global_step)) + with fu.fopen(file_name, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + file_name = os.path.join(output_dir, 'dist_at_t_{:d}.pkl'.format(global_step)) + utils.save_variables(file_name, [d_at_t], ['d_at_t'], overwrite=True) + plt.close(fig) + return None + +def save_all(outputs, global_step, output_dir, metric_summary, N): + """Save numerous statistics. + + Args: + outputs : [locs, goal_loc, gt_dist_to_goal, node_ids, perturbs] + global_step : number of iterations. + output_dir : output directory. + metric_summary : to append scalars to summary. + N : number of outputs to process. + """ + all_locs = np.concatenate(map(lambda x: x[0], outputs), axis=0) + all_goal_locs = np.concatenate(map(lambda x: x[1], outputs), axis=0) + all_d_at_t = np.concatenate(map(lambda x: x[2][:,:,0]*1, outputs), axis=0) + all_node_ids = np.concatenate(map(lambda x: x[3], outputs), axis=0) + all_perturbs = np.concatenate(map(lambda x: x[4], outputs), axis=0) + + file_name = os.path.join(output_dir, 'all_locs_at_t_{:d}.pkl'.format(global_step)) + vars = [all_locs, all_goal_locs, all_d_at_t, all_node_ids, all_perturbs] + var_names = ['all_locs', 'all_goal_locs', 'all_d_at_t', 'all_node_ids', 'all_perturbs'] + utils.save_variables(file_name, vars, var_names, overwrite=True) + return None + +def eval_ap(outputs, global_step, output_dir, metric_summary, N, num_classes=4): + """Processes the collected outputs to compute AP for action prediction. + + Args: + outputs : [logits, labels] + global_step : global_step. + output_dir : where to store results. + metric_summary : summary object to add summaries to. + N : number of outputs to process. + num_classes : number of classes to compute AP over, and to reshape tensors. + """ + if N >= 0: + outputs = outputs[:N] + logits = np.concatenate(map(lambda x: x[0], outputs), axis=0).reshape((-1, num_classes)) + labels = np.concatenate(map(lambda x: x[1], outputs), axis=0).reshape((-1, num_classes)) + aps = [] + for i in range(logits.shape[1]): + ap, rec, prec = utils.calc_pr(labels[:,i], logits[:,i]) + ap = ap[0] + tf_utils.add_value_to_summary(metric_summary, 'aps/ap_{:d}: '.format(i), ap) + aps.append(ap) + return aps + +def eval_dist(outputs, global_step, output_dir, metric_summary, N): + """Processes the collected outputs during validation to + 1. Plot the distance over time curve. + 2. Compute mean and median distances. + 3. Plots histogram of end distances. + + Args: + outputs : [locs, goal_loc, gt_dist_to_goal]. + global_step : global_step. + output_dir : where to store results. + metric_summary : summary object to add summaries to. + N : number of outputs to process. + """ + SUCCESS_THRESH = 3 + if N >= 0: + outputs = outputs[:N] + + # Plot distance at time t. + d_at_t = [] + for i in range(len(outputs)): + locs, goal_loc, gt_dist_to_goal = outputs[i] + d_at_t.append(gt_dist_to_goal[:,:,0]*1) + + # Plot the distance. + fig, axes = utils.subplot(plt, (1,1), (5,5)) + d_at_t = np.concatenate(d_at_t, axis=0) + axes.plot(np.arange(d_at_t.shape[1]), np.mean(d_at_t, axis=0), 'r.') + axes.set_xlabel('time step') + axes.set_ylabel('dist to next goal') + axes.grid('on') + file_name = os.path.join(output_dir, 'dist_at_t_{:d}.png'.format(global_step)) + with fu.fopen(file_name, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + file_name = os.path.join(output_dir, 'dist_at_t_{:d}.pkl'.format(global_step)) + utils.save_variables(file_name, [d_at_t], ['d_at_t'], overwrite=True) + plt.close(fig) + + # Plot the trajectories and the init_distance and final distance. + d_inits = [] + d_ends = [] + for i in range(len(outputs)): + locs, goal_loc, gt_dist_to_goal = outputs[i] + d_inits.append(gt_dist_to_goal[:,0,0]*1) + d_ends.append(gt_dist_to_goal[:,-1,0]*1) + + # Plot the distance. + fig, axes = utils.subplot(plt, (1,1), (5,5)) + d_inits = np.concatenate(d_inits, axis=0) + d_ends = np.concatenate(d_ends, axis=0) + axes.plot(d_inits+np.random.rand(*(d_inits.shape))-0.5, + d_ends+np.random.rand(*(d_ends.shape))-0.5, '.', mec='red', mew=1.0) + axes.set_xlabel('init dist'); axes.set_ylabel('final dist'); + axes.grid('on'); axes.axis('equal'); + title_str = 'mean: {:0.1f}, 50: {:0.1f}, 75: {:0.2f}, s: {:0.1f}' + title_str = title_str.format( + np.mean(d_ends), np.median(d_ends), np.percentile(d_ends, q=75), + 100*(np.mean(d_ends <= SUCCESS_THRESH))) + axes.set_title(title_str) + file_name = os.path.join(output_dir, 'dist_{:d}.png'.format(global_step)) + with fu.fopen(file_name, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + + file_name = os.path.join(output_dir, 'dist_{:d}.pkl'.format(global_step)) + utils.save_variables(file_name, [d_inits, d_ends], ['d_inits', 'd_ends'], + overwrite=True) + plt.close(fig) + + # Plot the histogram of the end_distance. + with plt.style.context('seaborn-white'): + d_ends_ = np.sort(d_ends) + d_inits_ = np.sort(d_inits) + leg = []; + fig, ax = utils.subplot(plt, (1,1), (5,5)) + ax.grid('on') + ax.set_xlabel('Distance from goal'); ax.xaxis.label.set_fontsize(16); + ax.set_ylabel('Fraction of data'); ax.yaxis.label.set_fontsize(16); + ax.plot(d_ends_, np.arange(d_ends_.size)*1./d_ends_.size, 'r') + ax.plot(d_inits_, np.arange(d_inits_.size)*1./d_inits_.size, 'k') + leg.append('Final'); leg.append('Init'); + ax.legend(leg, fontsize='x-large'); + ax.set_axis_on() + title_str = 'mean: {:0.1f}, 50: {:0.1f}, 75: {:0.2f}, s: {:0.1f}' + title_str = title_str.format( + np.mean(d_ends), np.median(d_ends), np.percentile(d_ends, q=75), + 100*(np.mean(d_ends <= SUCCESS_THRESH))) + ax.set_title(title_str) + file_name = os.path.join(output_dir, 'dist_hist_{:d}.png'.format(global_step)) + with fu.fopen(file_name, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + + # Log distance metrics. + tf_utils.add_value_to_summary(metric_summary, 'dists/success_init: ', + 100*(np.mean(d_inits <= SUCCESS_THRESH))) + tf_utils.add_value_to_summary(metric_summary, 'dists/success_end: ', + 100*(np.mean(d_ends <= SUCCESS_THRESH))) + tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (75): ', + np.percentile(d_inits, q=75)) + tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (75): ', + np.percentile(d_ends, q=75)) + tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (median): ', + np.median(d_inits)) + tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (median): ', + np.median(d_ends)) + tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (mean): ', + np.mean(d_inits)) + tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (mean): ', + np.mean(d_ends)) + return np.median(d_inits), np.median(d_ends), np.mean(d_inits), np.mean(d_ends), \ + np.percentile(d_inits, q=75), np.percentile(d_ends, q=75), \ + 100*(np.mean(d_inits) <= SUCCESS_THRESH), 100*(np.mean(d_ends) <= SUCCESS_THRESH) + +def plot_trajectories(outputs, global_step, output_dir, metric_summary, N): + """Processes the collected outputs during validation to plot the trajectories + in the top view. + + Args: + outputs : [locs, orig_maps, goal_loc]. + global_step : global_step. + output_dir : where to store results. + metric_summary : summary object to add summaries to. + N : number of outputs to process. + """ + if N >= 0: + outputs = outputs[:N] + N = len(outputs) + + plt.set_cmap('gray') + fig, axes = utils.subplot(plt, (N, outputs[0][1].shape[0]), (5,5)) + axes = axes.ravel()[::-1].tolist() + for i in range(N): + locs, orig_maps, goal_loc = outputs[i] + is_semantic = np.isnan(goal_loc[0,0,1]) + for j in range(orig_maps.shape[0]): + ax = axes.pop(); + ax.plot(locs[j,0,0], locs[j,0,1], 'ys') + # Plot one by one, so that they come in different colors. + for k in range(goal_loc.shape[1]): + if not is_semantic: + ax.plot(goal_loc[j,k,0], goal_loc[j,k,1], 's') + if False: + ax.plot(locs[j,:,0], locs[j,:,1], 'r.', ms=3) + ax.imshow(orig_maps[j,0,:,:,0], origin='lower') + ax.set_axis_off(); + else: + ax.scatter(locs[j,:,0], locs[j,:,1], c=np.arange(locs.shape[1]), + cmap='jet', s=10, lw=0) + ax.imshow(orig_maps[j,0,:,:,0], origin='lower', vmin=-1.0, vmax=2.0) + if not is_semantic: + xymin = np.minimum(np.min(goal_loc[j,:,:], axis=0), np.min(locs[j,:,:], axis=0)) + xymax = np.maximum(np.max(goal_loc[j,:,:], axis=0), np.max(locs[j,:,:], axis=0)) + else: + xymin = np.min(locs[j,:,:], axis=0) + xymax = np.max(locs[j,:,:], axis=0) + xy1 = (xymax+xymin)/2. - np.maximum(np.max(xymax-xymin), 12) + xy2 = (xymax+xymin)/2. + np.maximum(np.max(xymax-xymin), 12) + ax.set_xlim([xy1[0], xy2[0]]) + ax.set_ylim([xy1[1], xy2[1]]) + ax.set_axis_off() + file_name = os.path.join(output_dir, 'trajectory_{:d}.png'.format(global_step)) + with fu.fopen(file_name, 'w') as f: + fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) + plt.close(fig) + return None + +def add_default_summaries(mode, arop_full_summary_iters, summarize_ops, + summarize_names, to_aggregate, action_prob_op, + input_tensors, scope_name): + assert(mode == 'train' or mode == 'val' or mode == 'test'), \ + 'add_default_summaries mode is neither train or val or test.' + + s_ops = tf_utils.get_default_summary_ops() + + if mode == 'train': + s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \ + arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries( + summarize_ops, summarize_names, mode, to_aggregate=False, + scope_name=scope_name) + s_ops.additional_return_ops += additional_return_ops + s_ops.arop_summary_iters += arop_summary_iters + s_ops.arop_eval_fns += arop_eval_fns + elif mode == 'val': + s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \ + arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries( + summarize_ops, summarize_names, mode, to_aggregate=to_aggregate, + scope_name=scope_name) + s_ops.additional_return_ops += additional_return_ops + s_ops.arop_summary_iters += arop_summary_iters + s_ops.arop_eval_fns += arop_eval_fns + + elif mode == 'test': + s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \ + arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries( + [], [], mode, to_aggregate=[], scope_name=scope_name) + s_ops.additional_return_ops += additional_return_ops + s_ops.arop_summary_iters += arop_summary_iters + s_ops.arop_eval_fns += arop_eval_fns + + + if mode == 'val': + arop = s_ops.additional_return_ops + arop += [[action_prob_op, input_tensors['train']['action']]] + arop += [[input_tensors['step']['loc_on_map'], + input_tensors['common']['goal_loc'], + input_tensors['step']['gt_dist_to_goal']]] + arop += [[input_tensors['step']['loc_on_map'], + input_tensors['common']['orig_maps'], + input_tensors['common']['goal_loc']]] + s_ops.arop_summary_iters += [-1, arop_full_summary_iters, + arop_full_summary_iters] + s_ops.arop_eval_fns += [eval_ap, eval_dist, plot_trajectories] + + elif mode == 'test': + arop = s_ops.additional_return_ops + arop += [[input_tensors['step']['loc_on_map'], + input_tensors['common']['goal_loc'], + input_tensors['step']['gt_dist_to_goal']]] + arop += [[input_tensors['step']['gt_dist_to_goal']]] + arop += [[input_tensors['step']['loc_on_map'], + input_tensors['common']['goal_loc'], + input_tensors['step']['gt_dist_to_goal'], + input_tensors['step']['node_ids'], + input_tensors['step']['perturbs']]] + arop += [[input_tensors['step']['loc_on_map'], + input_tensors['common']['orig_maps'], + input_tensors['common']['goal_loc']]] + s_ops.arop_summary_iters += [-1, -1, -1, arop_full_summary_iters] + s_ops.arop_eval_fns += [eval_dist, save_d_at_t, save_all, + plot_trajectories] + return s_ops + + diff --git a/cognitive_mapping_and_planning/tfcode/tf_utils.py b/cognitive_mapping_and_planning/tfcode/tf_utils.py new file mode 100644 index 00000000000..5f96d8ff5ce --- /dev/null +++ b/cognitive_mapping_and_planning/tfcode/tf_utils.py @@ -0,0 +1,840 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import sys +import tensorflow as tf +import src.utils as utils +import logging +from tensorflow.contrib import slim +from tensorflow.contrib.metrics.python.ops import confusion_matrix_ops +from tensorflow.contrib.slim import arg_scope +from tensorflow.contrib.slim.nets import resnet_v2 +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope +sys.path.insert(0, '../slim') +from preprocessing import inception_preprocessing as ip + +resnet_v2_50 = resnet_v2.resnet_v2_50 + + +def custom_residual_block(x, neurons, kernel_size, stride, name, is_training, + wt_decay=0.0001, use_residual=True, + residual_stride_conv=True, conv_fn=slim.conv2d, + batch_norm_param=None): + + # batch norm x and relu + init_var = np.sqrt(2.0/(kernel_size**2)/neurons) + with arg_scope([conv_fn], + weights_regularizer=slim.l2_regularizer(wt_decay), + weights_initializer=tf.random_normal_initializer(stddev=init_var), + biases_initializer=tf.zeros_initializer()): + + if batch_norm_param is None: + batch_norm_param = {'center': True, 'scale': False, + 'activation_fn':tf.nn.relu, + 'is_training': is_training} + + y = slim.batch_norm(x, scope=name+'_bn', **batch_norm_param) + + y = conv_fn(y, num_outputs=neurons, kernel_size=kernel_size, stride=stride, + activation_fn=None, scope=name+'_1', + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_param) + + y = conv_fn(y, num_outputs=neurons, kernel_size=kernel_size, + stride=1, activation_fn=None, scope=name+'_2') + + if use_residual: + if stride != 1 or x.get_shape().as_list()[-1] != neurons: + batch_norm_param_ = dict(batch_norm_param) + batch_norm_param_['activation_fn'] = None + x = conv_fn(x, num_outputs=neurons, kernel_size=1, + stride=stride if residual_stride_conv else 1, + activation_fn=None, scope=name+'_0_1x1', + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_param_) + if not residual_stride_conv: + x = slim.avg_pool2d(x, 1, stride=stride, scope=name+'_0_avg') + + y = tf.add(x, y, name=name+'_add') + + return y + +def step_gt_prob(step, step_number_op): + # Change samping probability from 1 to -1 at step steps. + with tf.name_scope('step_gt_prob'): + out = tf.cond(tf.less(step_number_op, step), + lambda: tf.constant(1.), lambda: tf.constant(-1.)) + return out + +def inverse_sigmoid_decay(k, global_step_op): + with tf.name_scope('inverse_sigmoid_decay'): + k = tf.constant(k, dtype=tf.float32) + tmp = k*tf.exp(-tf.cast(global_step_op, tf.float32)/k) + tmp = tmp / (1. + tmp) + return tmp + +def dense_resample(im, flow_im, output_valid_mask, name='dense_resample'): + """ Resample reward at particular locations. + Args: + im: ...xHxWxC matrix to sample from. + flow_im: ...xHxWx2 matrix, samples the image using absolute offsets as given + by the flow_im. + """ + with tf.name_scope(name): + valid_mask = None + + x, y = tf.unstack(flow_im, axis=-1) + x = tf.cast(tf.reshape(x, [-1]), tf.float32) + y = tf.cast(tf.reshape(y, [-1]), tf.float32) + + # constants + shape = tf.unstack(tf.shape(im)) + channels = shape[-1] + width = shape[-2] + height = shape[-3] + num_batch = tf.cast(tf.reduce_prod(tf.stack(shape[:-3])), 'int32') + zero = tf.constant(0, dtype=tf.int32) + + # Round up and down. + x0 = tf.cast(tf.floor(x), 'int32'); x1 = x0 + 1; + y0 = tf.cast(tf.floor(y), 'int32'); y1 = y0 + 1; + + if output_valid_mask: + valid_mask = tf.logical_and( + tf.logical_and(tf.less_equal(x, tf.cast(width, tf.float32)-1.), tf.greater_equal(x, 0.)), + tf.logical_and(tf.less_equal(y, tf.cast(height, tf.float32)-1.), tf.greater_equal(y, 0.))) + valid_mask = tf.reshape(valid_mask, shape=shape[:-1] + [1]) + + x0 = tf.clip_by_value(x0, zero, width-1) + x1 = tf.clip_by_value(x1, zero, width-1) + y0 = tf.clip_by_value(y0, zero, height-1) + y1 = tf.clip_by_value(y1, zero, height-1) + + dim2 = width; dim1 = width * height; + + # Create base index + base = tf.reshape(tf.range(num_batch) * dim1, shape=[-1,1]) + base = tf.reshape(tf.tile(base, [1, height*width]), shape=[-1]) + + base_y0 = base + y0 * dim2 + base_y1 = base + y1 * dim2 + idx_a = base_y0 + x0 + idx_b = base_y1 + x0 + idx_c = base_y0 + x1 + idx_d = base_y1 + x1 + + # use indices to lookup pixels in the flat image and restore channels dim + sh = tf.stack([tf.constant(-1,dtype=tf.int32), channels]) + im_flat = tf.cast(tf.reshape(im, sh), dtype=tf.float32) + pixel_a = tf.gather(im_flat, idx_a) + pixel_b = tf.gather(im_flat, idx_b) + pixel_c = tf.gather(im_flat, idx_c) + pixel_d = tf.gather(im_flat, idx_d) + + # and finally calculate interpolated values + x1_f = tf.to_float(x1) + y1_f = tf.to_float(y1) + + wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) + wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1) + wc = tf.expand_dims(((1.0 - (x1_f - x)) * (y1_f - y)), 1) + wd = tf.expand_dims(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1) + + output = tf.add_n([wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d]) + output = tf.reshape(output, shape=tf.shape(im)) + return output, valid_mask + +def get_flow(t, theta, map_size, name_scope='gen_flow'): + """ + Rotates the map by theta and translates the rotated map by t. + + Assume that the robot rotates by an angle theta and then moves forward by + translation t. This function returns the flow field field. For every pixel in + the new image it tells us which pixel in the original image it came from: + NewI(x, y) = OldI(flow_x(x,y), flow_y(x,y)). + + Assume there is a point p in the original image. Robot rotates by R and moves + forward by t. p1 = Rt*p; p2 = p1 - t; (the world moves in opposite direction. + So, p2 = Rt*p - t, thus p2 came from R*(p2+t), which is what this function + calculates. + + t: ... x 2 (translation for B batches of N motions each). + theta: ... x 1 (rotation for B batches of N motions each). + + Output: ... x map_size x map_size x 2 + """ + + with tf.name_scope(name_scope): + tx, ty = tf.unstack(tf.reshape(t, shape=[-1, 1, 1, 1, 2]), axis=4) + theta = tf.reshape(theta, shape=[-1, 1, 1, 1]) + c = tf.constant((map_size-1.)/2., dtype=tf.float32) + + x, y = np.meshgrid(np.arange(map_size), np.arange(map_size)) + x = tf.constant(x[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='x', + shape=[1, map_size, map_size, 1]) + y = tf.constant(y[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='y', + shape=[1,map_size, map_size, 1]) + + x = x-(-tx+c) + y = y-(-ty+c) + + sin_theta = tf.sin(theta) + cos_theta = tf.cos(theta) + xr = cos_theta*x - sin_theta*y + yr = sin_theta*x + cos_theta*y + + xr = xr + c + yr = yr + c + + flow = tf.stack([xr, yr], axis=-1) + sh = tf.unstack(tf.shape(t), axis=0) + sh = tf.stack(sh[:-1]+[tf.constant(_, dtype=tf.int32) for _ in [map_size, map_size, 2]]) + flow = tf.reshape(flow, shape=sh) + return flow + +def distort_image(im, fast_mode=False): + # All images in the same batch are transformed the same way, but over + # iterations you see different distortions. + # im should be float with values between 0 and 1. + im_ = tf.reshape(im, shape=(-1,1,3)) + im_ = ip.apply_with_random_selector( + im_, lambda x, ordering: ip.distort_color(x, ordering, fast_mode), + num_cases=4) + im_ = tf.reshape(im_, tf.shape(im)) + return im_ + +def fc_network(x, neurons, wt_decay, name, num_pred=None, offset=0, + batch_norm_param=None, dropout_ratio=0.0, is_training=None): + if dropout_ratio > 0: + assert(is_training is not None), \ + 'is_training needs to be defined when trainnig with dropout.' + + repr = [] + for i, neuron in enumerate(neurons): + init_var = np.sqrt(2.0/neuron) + if batch_norm_param is not None: + x = slim.fully_connected(x, neuron, activation_fn=None, + weights_initializer=tf.random_normal_initializer(stddev=init_var), + weights_regularizer=slim.l2_regularizer(wt_decay), + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_param, + biases_initializer=tf.zeros_initializer(), + scope='{:s}_{:d}'.format(name, offset+i)) + else: + x = slim.fully_connected(x, neuron, activation_fn=tf.nn.relu, + weights_initializer=tf.random_normal_initializer(stddev=init_var), + weights_regularizer=slim.l2_regularizer(wt_decay), + biases_initializer=tf.zeros_initializer(), + scope='{:s}_{:d}'.format(name, offset+i)) + if dropout_ratio > 0: + x = slim.dropout(x, keep_prob=1-dropout_ratio, is_training=is_training, + scope='{:s}_{:d}'.format('dropout_'+name, offset+i)) + repr.append(x) + + if num_pred is not None: + init_var = np.sqrt(2.0/num_pred) + x = slim.fully_connected(x, num_pred, + weights_regularizer=slim.l2_regularizer(wt_decay), + weights_initializer=tf.random_normal_initializer(stddev=init_var), + biases_initializer=tf.zeros_initializer(), + activation_fn=None, + scope='{:s}_pred'.format(name)) + return x, repr + +def concat_state_x_list(f, names): + af = {} + for i, k in enumerate(names): + af[k] = np.concatenate([x[i] for x in f], axis=1) + return af + +def concat_state_x(f, names): + af = {} + for k in names: + af[k] = np.concatenate([x[k] for x in f], axis=1) + # af[k] = np.swapaxes(af[k], 0, 1) + return af + +def sample_action(rng, action_probs, optimal_action, sample_gt_prob, + type='sample', combine_type='one_or_other'): + optimal_action_ = optimal_action/np.sum(optimal_action+0., 1, keepdims=True) + action_probs_ = action_probs/np.sum(action_probs+0.001, 1, keepdims=True) + batch_size = action_probs_.shape[0] + + action = np.zeros((batch_size), dtype=np.int32) + action_sample_wt = np.zeros((batch_size), dtype=np.float32) + if combine_type == 'add': + sample_gt_prob_ = np.minimum(np.maximum(sample_gt_prob, 0.), 1.) + + for i in range(batch_size): + if combine_type == 'one_or_other': + sample_gt = rng.rand() < sample_gt_prob + if sample_gt: distr_ = optimal_action_[i,:]*1. + else: distr_ = action_probs_[i,:]*1. + elif combine_type == 'add': + distr_ = optimal_action_[i,:]*sample_gt_prob_ + \ + (1.-sample_gt_prob_)*action_probs_[i,:] + distr_ = distr_ / np.sum(distr_) + + if type == 'sample': + action[i] = np.argmax(rng.multinomial(1, distr_, size=1)) + elif type == 'argmax': + action[i] = np.argmax(distr_) + action_sample_wt[i] = action_probs_[i, action[i]] / distr_[action[i]] + return action, action_sample_wt + +def train_step_custom_online_sampling(sess, train_op, global_step, + train_step_kwargs, mode='train'): + m = train_step_kwargs['m'] + obj = train_step_kwargs['obj'] + rng_data = train_step_kwargs['rng_data'] + rng_action = train_step_kwargs['rng_action'] + writer = train_step_kwargs['writer'] + iters = train_step_kwargs['iters'] + num_steps = train_step_kwargs['num_steps'] + logdir = train_step_kwargs['logdir'] + dagger_sample_bn_false = train_step_kwargs['dagger_sample_bn_false'] + train_display_interval = train_step_kwargs['train_display_interval'] + if 'outputs' not in m.train_ops: + m.train_ops['outputs'] = [] + + s_ops = m.summary_ops[mode] + val_additional_ops = [] + + # Print all variables here. + if False: + v = tf.get_collection(tf.GraphKeys.VARIABLES) + v_op = [_.value() for _ in v] + v_op_value = sess.run(v_op) + + filter = lambda x, y: 'Adam' in x.name + # filter = lambda x, y: np.is_any_nan(y) + ind = [i for i, (_, __) in enumerate(zip(v, v_op_value)) if filter(_, __)] + v = [v[i] for i in ind] + v_op_value = [v_op_value[i] for i in ind] + + for i in range(len(v)): + logging.info('XXXX: variable: %30s, is_any_nan: %5s, norm: %f.', + v[i].name, np.any(np.isnan(v_op_value[i])), + np.linalg.norm(v_op_value[i])) + + tt = utils.Timer() + for i in range(iters): + tt.tic() + # Sample a room. + e = obj.sample_env(rng_data) + + # Initialize the agent. + init_env_state = e.reset(rng_data) + + # Get and process the common data. + input = e.get_common_data() + input = e.pre_common_data(input) + feed_dict = prepare_feed_dict(m.input_tensors['common'], input) + if dagger_sample_bn_false: + feed_dict[m.train_ops['batch_norm_is_training_op']] = False + common_data = sess.run(m.train_ops['common'], feed_dict=feed_dict) + + states = [] + state_features = [] + state_targets = [] + net_state_to_input = [] + step_data_cache = [] + executed_actions = [] + rewards = [] + action_sample_wts = [] + states.append(init_env_state) + + net_state = sess.run(m.train_ops['init_state'], feed_dict=feed_dict) + net_state = dict(zip(m.train_ops['state_names'], net_state)) + net_state_to_input.append(net_state) + for j in range(num_steps): + f = e.get_features(states[j], j) + f = e.pre_features(f) + f.update(net_state) + f['step_number'] = np.ones((1,1,1), dtype=np.int32)*j + state_features.append(f) + + feed_dict = prepare_feed_dict(m.input_tensors['step'], state_features[-1]) + optimal_action = e.get_optimal_action(states[j], j) + for x, v in zip(m.train_ops['common'], common_data): + feed_dict[x] = v + if dagger_sample_bn_false: + feed_dict[m.train_ops['batch_norm_is_training_op']] = False + outs = sess.run([m.train_ops['step'], m.sample_gt_prob_op, + m.train_ops['step_data_cache'], + m.train_ops['updated_state'], + m.train_ops['outputs']], feed_dict=feed_dict) + action_probs = outs[0] + sample_gt_prob = outs[1] + step_data_cache.append(dict(zip(m.train_ops['step_data_cache'], outs[2]))) + net_state = outs[3] + if hasattr(e, 'update_state'): + outputs = outs[4] + outputs = dict(zip(m.train_ops['output_names'], outputs)) + e.update_state(outputs, j) + state_targets.append(e.get_targets(states[j], j)) + + if j < num_steps-1: + # Sample from action_probs and optimal action. + action, action_sample_wt = sample_action( + rng_action, action_probs, optimal_action, sample_gt_prob, + m.sample_action_type, m.sample_action_combine_type) + next_state, reward = e.take_action(states[j], action, j) + executed_actions.append(action) + states.append(next_state) + rewards.append(reward) + action_sample_wts.append(action_sample_wt) + net_state = dict(zip(m.train_ops['state_names'], net_state)) + net_state_to_input.append(net_state) + + # Concatenate things together for training. + rewards = np.array(rewards).T + action_sample_wts = np.array(action_sample_wts).T + executed_actions = np.array(executed_actions).T + all_state_targets = concat_state_x(state_targets, e.get_targets_name()) + all_state_features = concat_state_x(state_features, + e.get_features_name()+['step_number']) + # all_state_net = concat_state_x(net_state_to_input, + # m.train_ops['state_names']) + all_step_data_cache = concat_state_x(step_data_cache, + m.train_ops['step_data_cache']) + + dict_train = dict(input) + dict_train.update(all_state_features) + dict_train.update(all_state_targets) + # dict_train.update(all_state_net) + dict_train.update(net_state_to_input[0]) + dict_train.update(all_step_data_cache) + dict_train.update({'rewards': rewards, + 'action_sample_wts': action_sample_wts, + 'executed_actions': executed_actions}) + feed_dict = prepare_feed_dict(m.input_tensors['train'], dict_train) + for x in m.train_ops['step_data_cache']: + feed_dict[x] = all_step_data_cache[x] + if mode == 'train': + n_step = sess.run(global_step) + + if np.mod(n_step, train_display_interval) == 0: + total_loss, np_global_step, summary, print_summary = sess.run( + [train_op, global_step, s_ops.summary_ops, s_ops.print_summary_ops], + feed_dict=feed_dict) + logging.error("") + else: + total_loss, np_global_step, summary = sess.run( + [train_op, global_step, s_ops.summary_ops], feed_dict=feed_dict) + + if writer is not None and summary is not None: + writer.add_summary(summary, np_global_step) + + should_stop = sess.run(m.should_stop_op) + + if mode != 'train': + arop = [[] for j in range(len(s_ops.additional_return_ops))] + for j in range(len(s_ops.additional_return_ops)): + if s_ops.arop_summary_iters[j] < 0 or i < s_ops.arop_summary_iters[j]: + arop[j] = s_ops.additional_return_ops[j] + val = sess.run(arop, feed_dict=feed_dict) + val_additional_ops.append(val) + tt.toc(log_at=60, log_str='val timer {:d} / {:d}: '.format(i, iters), + type='time') + + if mode != 'train': + # Write the default val summaries. + summary, print_summary, np_global_step = sess.run( + [s_ops.summary_ops, s_ops.print_summary_ops, global_step]) + if writer is not None and summary is not None: + writer.add_summary(summary, np_global_step) + + # write custom validation ops + val_summarys = [] + val_additional_ops = zip(*val_additional_ops) + if len(s_ops.arop_eval_fns) > 0: + val_metric_summary = tf.summary.Summary() + for i in range(len(s_ops.arop_eval_fns)): + val_summary = None + if s_ops.arop_eval_fns[i] is not None: + val_summary = s_ops.arop_eval_fns[i](val_additional_ops[i], + np_global_step, logdir, + val_metric_summary, + s_ops.arop_summary_iters[i]) + val_summarys.append(val_summary) + if writer is not None: + writer.add_summary(val_metric_summary, np_global_step) + + # Return the additional val_ops + total_loss = (val_additional_ops, val_summarys) + should_stop = None + + return total_loss, should_stop + +def train_step_custom_v2(sess, train_op, global_step, train_step_kwargs, + mode='train'): + m = train_step_kwargs['m'] + obj = train_step_kwargs['obj'] + rng = train_step_kwargs['rng'] + writer = train_step_kwargs['writer'] + iters = train_step_kwargs['iters'] + logdir = train_step_kwargs['logdir'] + train_display_interval = train_step_kwargs['train_display_interval'] + + s_ops = m.summary_ops[mode] + val_additional_ops = [] + + # Print all variables here. + if False: + v = tf.get_collection(tf.GraphKeys.VARIABLES) + v_op = [_.value() for _ in v] + v_op_value = sess.run(v_op) + + filter = lambda x, y: 'Adam' in x.name + # filter = lambda x, y: np.is_any_nan(y) + ind = [i for i, (_, __) in enumerate(zip(v, v_op_value)) if filter(_, __)] + v = [v[i] for i in ind] + v_op_value = [v_op_value[i] for i in ind] + + for i in range(len(v)): + logging.info('XXXX: variable: %30s, is_any_nan: %5s, norm: %f.', + v[i].name, np.any(np.isnan(v_op_value[i])), + np.linalg.norm(v_op_value[i])) + + tt = utils.Timer() + for i in range(iters): + tt.tic() + e = obj.sample_env(rng) + rngs = e.gen_rng(rng) + input_data = e.gen_data(*rngs) + input_data = e.pre_data(input_data) + feed_dict = prepare_feed_dict(m.input_tensors, input_data) + + if mode == 'train': + n_step = sess.run(global_step) + + if np.mod(n_step, train_display_interval) == 0: + total_loss, np_global_step, summary, print_summary = sess.run( + [train_op, global_step, s_ops.summary_ops, s_ops.print_summary_ops], + feed_dict=feed_dict) + else: + total_loss, np_global_step, summary = sess.run( + [train_op, global_step, s_ops.summary_ops], + feed_dict=feed_dict) + + if writer is not None and summary is not None: + writer.add_summary(summary, np_global_step) + + should_stop = sess.run(m.should_stop_op) + + if mode != 'train': + arop = [[] for j in range(len(s_ops.additional_return_ops))] + for j in range(len(s_ops.additional_return_ops)): + if s_ops.arop_summary_iters[j] < 0 or i < s_ops.arop_summary_iters[j]: + arop[j] = s_ops.additional_return_ops[j] + val = sess.run(arop, feed_dict=feed_dict) + val_additional_ops.append(val) + tt.toc(log_at=60, log_str='val timer {:d} / {:d}: '.format(i, iters), + type='time') + + if mode != 'train': + # Write the default val summaries. + summary, print_summary, np_global_step = sess.run( + [s_ops.summary_ops, s_ops.print_summary_ops, global_step]) + if writer is not None and summary is not None: + writer.add_summary(summary, np_global_step) + + # write custom validation ops + val_summarys = [] + val_additional_ops = zip(*val_additional_ops) + if len(s_ops.arop_eval_fns) > 0: + val_metric_summary = tf.summary.Summary() + for i in range(len(s_ops.arop_eval_fns)): + val_summary = None + if s_ops.arop_eval_fns[i] is not None: + val_summary = s_ops.arop_eval_fns[i](val_additional_ops[i], + np_global_step, logdir, + val_metric_summary, + s_ops.arop_summary_iters[i]) + val_summarys.append(val_summary) + if writer is not None: + writer.add_summary(val_metric_summary, np_global_step) + + # Return the additional val_ops + total_loss = (val_additional_ops, val_summarys) + should_stop = None + + return total_loss, should_stop + +def train_step_custom(sess, train_op, global_step, train_step_kwargs, + mode='train'): + m = train_step_kwargs['m'] + params = train_step_kwargs['params'] + rng = train_step_kwargs['rng'] + writer = train_step_kwargs['writer'] + iters = train_step_kwargs['iters'] + gen_rng = train_step_kwargs['gen_rng'] + logdir = train_step_kwargs['logdir'] + gen_data = train_step_kwargs['gen_data'] + pre_data = train_step_kwargs['pre_data'] + train_display_interval = train_step_kwargs['train_display_interval'] + + val_additional_ops = [] + # Print all variables here. + if False: + v = tf.get_collection(tf.GraphKeys.VARIABLES) + for _ in v: + val = sess.run(_.value()) + logging.info('variable: %30s, is_any_nan: %5s, norm: %f.', _.name, + np.any(np.isnan(val)), np.linalg.norm(val)) + + for i in range(iters): + rngs = gen_rng(params, rng) + input_data = gen_data(params, *rngs) + input_data = pre_data(params, input_data) + feed_dict = prepare_feed_dict(m.input_tensors, input_data) + + if mode == 'train': + n_step = sess.run(global_step) + + if np.mod(n_step, train_display_interval) == 0: + total_loss, np_global_step, summary, print_summary = sess.run( + [train_op, global_step, m.summary_op[mode], m.print_summary_op[mode]], + feed_dict=feed_dict) + else: + total_loss, np_global_step, summary = sess.run( + [train_op, global_step, m.summary_op[mode]], + feed_dict=feed_dict) + + if writer is not None: + writer.add_summary(summary, np_global_step) + + should_stop = sess.run(m.should_stop_op) + + if mode == 'val': + val = sess.run(m.agg_update_op[mode] + m.additional_return_op[mode], + feed_dict=feed_dict) + val_additional_ops.append(val[len(m.agg_update_op[mode]):]) + + if mode == 'val': + summary, print_summary, np_global_step = sess.run( + [m.summary_op[mode], m.print_summary_op[mode], global_step]) + if writer is not None: + writer.add_summary(summary, np_global_step) + sess.run([m.agg_reset_op[mode]]) + + # write custom validation ops + if m.eval_metrics_fn[mode] is not None: + val_metric_summary = m.eval_metrics_fn[mode](val_additional_ops, + np_global_step, logdir) + if writer is not None: + writer.add_summary(val_metric_summary, np_global_step) + + total_loss = val_additional_ops + should_stop = None + + return total_loss, should_stop + +def setup_training(loss_op, initial_learning_rate, steps_per_decay, + learning_rate_decay, momentum, max_steps, + sync=False, adjust_lr_sync=True, + num_workers=1, replica_id=0, vars_to_optimize=None, + clip_gradient_norm=0, typ=None, momentum2=0.999, + adam_eps=1e-8): + if sync and adjust_lr_sync: + initial_learning_rate = initial_learning_rate * num_workers + max_steps = np.int(max_steps / num_workers) + steps_per_decay = np.int(steps_per_decay / num_workers) + + global_step_op = slim.get_or_create_global_step() + lr_op = tf.train.exponential_decay(initial_learning_rate, + global_step_op, steps_per_decay, learning_rate_decay, staircase=True) + if typ == 'sgd': + optimizer = tf.train.MomentumOptimizer(lr_op, momentum) + elif typ == 'adam': + optimizer = tf.train.AdamOptimizer(learning_rate=lr_op, beta1=momentum, + beta2=momentum2, epsilon=adam_eps) + + if sync: + + sync_optimizer = tf.train.SyncReplicasOptimizer(optimizer, + replicas_to_aggregate=num_workers, + replica_id=replica_id, + total_num_replicas=num_workers) + train_op = slim.learning.create_train_op(loss_op, sync_optimizer, + variables_to_train=vars_to_optimize, + clip_gradient_norm=clip_gradient_norm) + else: + sync_optimizer = None + train_op = slim.learning.create_train_op(loss_op, optimizer, + variables_to_train=vars_to_optimize, + clip_gradient_norm=clip_gradient_norm) + should_stop_op = tf.greater_equal(global_step_op, max_steps) + return lr_op, global_step_op, train_op, should_stop_op, optimizer, sync_optimizer + +def add_value_to_summary(metric_summary, tag, val, log=True, tag_str=None): + """Adds a scalar summary to the summary object. Optionally also logs to + logging.""" + new_value = metric_summary.value.add(); + new_value.tag = tag + new_value.simple_value = val + if log: + if tag_str is None: + tag_str = tag + '%f' + logging.info(tag_str, val) + +def add_scalar_summary_op(tensor, name=None, + summary_key='summaries', print_summary_key='print_summaries', prefix=''): + collections = [] + op = tf.summary.scalar(name, tensor, collections=collections) + if summary_key != print_summary_key: + tf.add_to_collection(summary_key, op) + + op = tf.Print(op, [tensor], ' {:-<25s}: '.format(name) + prefix) + tf.add_to_collection(print_summary_key, op) + return op + +def setup_inputs(inputs): + input_tensors = {} + input_shapes = {} + for (name, typ, sz) in inputs: + _ = tf.placeholder(typ, shape=sz, name=name) + input_tensors[name] = _ + input_shapes[name] = sz + return input_tensors, input_shapes + +def prepare_feed_dict(input_tensors, inputs): + feed_dict = {} + for n in input_tensors.keys(): + feed_dict[input_tensors[n]] = inputs[n].astype(input_tensors[n].dtype.as_numpy_dtype) + return feed_dict + +def simple_add_summaries(summarize_ops, summarize_names, + summary_key='summaries', + print_summary_key='print_summaries', prefix=''): + for op, name, in zip(summarize_ops, summarize_names): + add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix) + + summary_op = tf.summary.merge_all(summary_key) + print_summary_op = tf.summary.merge_all(print_summary_key) + return summary_op, print_summary_op + +def add_summary_ops(m, summarize_ops, summarize_names, to_aggregate=None, + summary_key='summaries', + print_summary_key='print_summaries', prefix=''): + if type(to_aggregate) != list: + to_aggregate = [to_aggregate for _ in summarize_ops] + + # set up aggregating metrics + if np.any(to_aggregate): + agg_ops = [] + for op, name, to_agg in zip(summarize_ops, summarize_names, to_aggregate): + if to_agg: + # agg_ops.append(slim.metrics.streaming_mean(op, return_reset_op=True)) + agg_ops.append(tf.contrib.metrics.streaming_mean(op)) + # agg_ops.append(tf.contrib.metrics.streaming_mean(op, return_reset_op=True)) + else: + agg_ops.append([None, None, None]) + + # agg_values_op, agg_update_op, agg_reset_op = zip(*agg_ops) + # agg_update_op = [x for x in agg_update_op if x is not None] + # agg_reset_op = [x for x in agg_reset_op if x is not None] + agg_values_op, agg_update_op = zip(*agg_ops) + agg_update_op = [x for x in agg_update_op if x is not None] + agg_reset_op = [tf.no_op()] + else: + agg_values_op = [None for _ in to_aggregate] + agg_update_op = [tf.no_op()] + agg_reset_op = [tf.no_op()] + + for op, name, to_agg, agg_op in zip(summarize_ops, summarize_names, to_aggregate, agg_values_op): + if to_agg: + add_scalar_summary_op(agg_op, name, summary_key, print_summary_key, prefix) + else: + add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix) + + summary_op = tf.summary.merge_all(summary_key) + print_summary_op = tf.summary.merge_all(print_summary_key) + return summary_op, print_summary_op, agg_update_op, agg_reset_op + + + +def accum_val_ops(outputs, names, global_step, output_dir, metric_summary, N): + """Processes the collected outputs to compute AP for action prediction. + + Args: + outputs : List of scalar ops to summarize. + names : Name of the scalar ops. + global_step : global_step. + output_dir : where to store results. + metric_summary : summary object to add summaries to. + N : number of outputs to process. + """ + outs = [] + if N >= 0: + outputs = outputs[:N] + for i in range(len(outputs[0])): + scalar = np.array(map(lambda x: x[i], outputs)) + assert(scalar.ndim == 1) + add_value_to_summary(metric_summary, names[i], np.mean(scalar), + tag_str='{:>27s}: [{:s}]: %f'.format(names[i], '')) + outs.append(np.mean(scalar)) + return outs + +def get_default_summary_ops(): + return utils.Foo(summary_ops=None, print_summary_ops=None, + additional_return_ops=[], arop_summary_iters=[], + arop_eval_fns=[]) + + +def simple_summaries(summarize_ops, summarize_names, mode, to_aggregate=False, + scope_name='summary'): + + if type(to_aggregate) != list: + to_aggregate = [to_aggregate for _ in summarize_ops] + + summary_key = '{:s}_summaries'.format(mode) + print_summary_key = '{:s}_print_summaries'.format(mode) + prefix=' [{:s}]: '.format(mode) + + # Default ops for things that dont need to be aggregated. + if not np.all(to_aggregate): + for op, name, to_agg in zip(summarize_ops, summarize_names, to_aggregate): + if not to_agg: + add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix) + summary_ops = tf.summary.merge_all(summary_key) + print_summary_ops = tf.summary.merge_all(print_summary_key) + else: + summary_ops = tf.no_op() + print_summary_ops = tf.no_op() + + # Default ops for things that dont need to be aggregated. + if np.any(to_aggregate): + additional_return_ops = [[summarize_ops[i] + for i, x in enumerate(to_aggregate )if x]] + arop_summary_iters = [-1] + s_names = ['{:s}/{:s}'.format(scope_name, summarize_names[i]) + for i, x in enumerate(to_aggregate) if x] + fn = lambda outputs, global_step, output_dir, metric_summary, N: \ + accum_val_ops(outputs, s_names, global_step, output_dir, metric_summary, + N) + arop_eval_fns = [fn] + else: + additional_return_ops = [] + arop_summary_iters = [] + arop_eval_fns = [] + return summary_ops, print_summary_ops, additional_return_ops, \ + arop_summary_iters, arop_eval_fns diff --git a/cognitive_mapping_and_planning/tfcode/vision_baseline_lstm.py b/cognitive_mapping_and_planning/tfcode/vision_baseline_lstm.py new file mode 100644 index 00000000000..1b9d6877241 --- /dev/null +++ b/cognitive_mapping_and_planning/tfcode/vision_baseline_lstm.py @@ -0,0 +1,533 @@ +# Copyright 2016 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np + + +import tensorflow as tf + +from tensorflow.contrib import slim + +import logging +from tensorflow.python.platform import app +from tensorflow.python.platform import flags +from src import utils +import src.file_utils as fu +import tfcode.nav_utils as nu +from tfcode import tf_utils + +setup_train_step_kwargs = nu.default_train_step_kwargs +compute_losses_multi_or = nu.compute_losses_multi_or +get_repr_from_image = nu.get_repr_from_image + +_save_d_at_t = nu.save_d_at_t +_save_all = nu.save_all +_eval_ap = nu.eval_ap +_eval_dist = nu.eval_dist +_plot_trajectories = nu.plot_trajectories + +def lstm_online(cell_fn, num_steps, inputs, state, varscope): + # inputs is B x num_steps x C, C channels. + # state is 2 tuple with B x 1 x C1, B x 1 x C2 + # Output state is always B x 1 x C + inputs = tf.unstack(inputs, axis=1, num=num_steps) + state = tf.unstack(state, axis=1, num=1)[0] + outputs = [] + + if num_steps > 1: + varscope.reuse_variables() + + for s in range(num_steps): + output, state = cell_fn(inputs[s], state) + outputs.append(output) + outputs = tf.stack(outputs, axis=1) + state = tf.stack([state], axis=1) + return outputs, state + +def _inputs(problem, lstm_states, lstm_state_dims): + # Set up inputs. + with tf.name_scope('inputs'): + n_views = problem.n_views + + inputs = [] + inputs.append(('orig_maps', tf.float32, + (problem.batch_size, 1, None, None, 1))) + inputs.append(('goal_loc', tf.float32, + (problem.batch_size, problem.num_goals, 2))) + + # For initing LSTM. + inputs.append(('rel_goal_loc_at_start', tf.float32, + (problem.batch_size, problem.num_goals, + problem.rel_goal_loc_dim))) + common_input_data, _ = tf_utils.setup_inputs(inputs) + + inputs = [] + inputs.append(('imgs', tf.float32, (problem.batch_size, None, n_views, + problem.img_height, problem.img_width, + problem.img_channels))) + # Goal location as a tuple of delta location and delta theta. + inputs.append(('rel_goal_loc', tf.float32, (problem.batch_size, None, + problem.rel_goal_loc_dim))) + if problem.outputs.visit_count: + inputs.append(('visit_count', tf.int32, (problem.batch_size, None, 1))) + inputs.append(('last_visit', tf.int32, (problem.batch_size, None, 1))) + + for i, (state, dim) in enumerate(zip(lstm_states, lstm_state_dims)): + inputs.append((state, tf.float32, (problem.batch_size, 1, dim))) + + if problem.outputs.egomotion: + inputs.append(('incremental_locs', tf.float32, + (problem.batch_size, None, 2))) + inputs.append(('incremental_thetas', tf.float32, + (problem.batch_size, None, 1))) + + inputs.append(('step_number', tf.int32, (1, None, 1))) + inputs.append(('node_ids', tf.int32, (problem.batch_size, None, + problem.node_ids_dim))) + inputs.append(('perturbs', tf.float32, (problem.batch_size, None, + problem.perturbs_dim))) + + # For plotting result plots + inputs.append(('loc_on_map', tf.float32, (problem.batch_size, None, 2))) + inputs.append(('gt_dist_to_goal', tf.float32, (problem.batch_size, None, 1))) + step_input_data, _ = tf_utils.setup_inputs(inputs) + + inputs = [] + inputs.append(('executed_actions', tf.int32, (problem.batch_size, None))) + inputs.append(('rewards', tf.float32, (problem.batch_size, None))) + inputs.append(('action_sample_wts', tf.float32, (problem.batch_size, None))) + inputs.append(('action', tf.int32, (problem.batch_size, None, + problem.num_actions))) + train_data, _ = tf_utils.setup_inputs(inputs) + train_data.update(step_input_data) + train_data.update(common_input_data) + return common_input_data, step_input_data, train_data + + +def _add_summaries(m, summary_mode, arop_full_summary_iters): + summarize_ops = [m.lr_op, m.global_step_op, m.sample_gt_prob_op, + m.total_loss_op, m.data_loss_op, m.reg_loss_op] + m.acc_ops + summarize_names = ['lr', 'global_step', 'sample_gt_prob_op', 'total_loss', + 'data_loss', 'reg_loss'] + \ + ['acc_{:d}'.format(i) for i in range(len(m.acc_ops))] + to_aggregate = [0, 0, 0, 1, 1, 1] + [1]*len(m.acc_ops) + + scope_name = 'summary' + with tf.name_scope(scope_name): + s_ops = nu.add_default_summaries(summary_mode, arop_full_summary_iters, + summarize_ops, summarize_names, + to_aggregate, m.action_prob_op, + m.input_tensors, scope_name=scope_name) + m.summary_ops = {summary_mode: s_ops} + +def visit_count_fc(visit_count, last_visit, embed_neurons, wt_decay, fc_dropout): + with tf.variable_scope('embed_visit_count'): + visit_count = tf.reshape(visit_count, shape=[-1]) + last_visit = tf.reshape(last_visit, shape=[-1]) + + visit_count = tf.clip_by_value(visit_count, clip_value_min=-1, + clip_value_max=15) + last_visit = tf.clip_by_value(last_visit, clip_value_min=-1, + clip_value_max=15) + visit_count = tf.one_hot(visit_count, depth=16, axis=1, dtype=tf.float32, + on_value=10., off_value=0.) + last_visit = tf.one_hot(last_visit, depth=16, axis=1, dtype=tf.float32, + on_value=10., off_value=0.) + f = tf.concat_v2([visit_count, last_visit], 1) + x, _ = tf_utils.fc_network( + f, neurons=embed_neurons, wt_decay=wt_decay, name='visit_count_embed', + offset=0, batch_norm_param=None, dropout_ratio=fc_dropout, + is_training=is_training) + return x + +def lstm_setup(name, x, batch_size, is_single_step, lstm_dim, lstm_out, + num_steps, state_input_op): + # returns state_name, state_init_op, updated_state_op, out_op + with tf.name_scope('reshape_'+name): + sh = x.get_shape().as_list() + x = tf.reshape(x, shape=[batch_size, -1, sh[-1]]) + + with tf.variable_scope(name) as varscope: + cell = tf.contrib.rnn.LSTMCell( + num_units=lstm_dim, forget_bias=1.0, state_is_tuple=False, + num_proj=lstm_out, use_peepholes=True, + initializer=tf.random_uniform_initializer(-0.01, 0.01, seed=0), + cell_clip=None, proj_clip=None) + + sh = [batch_size, 1, lstm_dim+lstm_out] + state_init_op = tf.constant(0., dtype=tf.float32, shape=sh) + + fn = lambda ns: lstm_online(cell, ns, x, state_input_op, varscope) + out_op, updated_state_op = tf.cond(is_single_step, lambda: fn(1), lambda: + fn(num_steps)) + + return name, state_init_op, updated_state_op, out_op + +def combine_setup(name, combine_type, embed_img, embed_goal, num_img_neuorons=None, + num_goal_neurons=None): + with tf.name_scope(name + '_' + combine_type): + if combine_type == 'add': + # Simple concat features from goal and image + out = embed_img + embed_goal + + elif combine_type == 'multiply': + # Multiply things together + re_embed_img = tf.reshape( + embed_img, shape=[-1, num_img_neuorons / num_goal_neurons, + num_goal_neurons]) + re_embed_goal = tf.reshape(embed_goal, shape=[-1, num_goal_neurons, 1]) + x = tf.matmul(re_embed_img, re_embed_goal, transpose_a=False, transpose_b=False) + out = slim.flatten(x) + elif combine_type == 'none' or combine_type == 'imgonly': + out = embed_img + elif combine_type == 'goalonly': + out = embed_goal + else: + logging.fatal('Undefined combine_type: %s', combine_type) + return out + + +def preprocess_egomotion(locs, thetas): + with tf.name_scope('pre_ego'): + pre_ego = tf.concat_v2([locs, tf.sin(thetas), tf.cos(thetas)], 2) + sh = pre_ego.get_shape().as_list() + pre_ego = tf.reshape(pre_ego, [-1, sh[-1]]) + return pre_ego + +def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode): + # Set up the model. + tf.set_random_seed(args.solver.seed) + task_params = args.navtask.task_params + num_steps = task_params.num_steps + num_goals = task_params.num_goals + num_actions = task_params.num_actions + num_actions_ = num_actions + + n_views = task_params.n_views + + batch_norm_is_training_op = \ + tf.placeholder_with_default(batch_norm_is_training, shape=[], + name='batch_norm_is_training_op') + # Setup the inputs + m.input_tensors = {} + lstm_states = []; lstm_state_dims = []; + state_names = []; updated_state_ops = []; init_state_ops = []; + if args.arch.lstm_output: + lstm_states += ['lstm_output'] + lstm_state_dims += [args.arch.lstm_output_dim+task_params.num_actions] + if args.arch.lstm_ego: + lstm_states += ['lstm_ego'] + lstm_state_dims += [args.arch.lstm_ego_dim + args.arch.lstm_ego_out] + lstm_states += ['lstm_img'] + lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out] + elif args.arch.lstm_img: + # An LSTM only on the image + lstm_states += ['lstm_img'] + lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out] + else: + # No LSTMs involved here. + None + + m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \ + _inputs(task_params, lstm_states, lstm_state_dims) + + with tf.name_scope('check_size'): + is_single_step = tf.equal(tf.unstack(tf.shape(m.input_tensors['step']['imgs']), + num=6)[1], 1) + + images_reshaped = tf.reshape(m.input_tensors['step']['imgs'], + shape=[-1, task_params.img_height, task_params.img_width, + task_params.img_channels], name='re_image') + + rel_goal_loc_reshaped = tf.reshape(m.input_tensors['step']['rel_goal_loc'], + shape=[-1, task_params.rel_goal_loc_dim], name='re_rel_goal_loc') + + x, vars_ = get_repr_from_image( + images_reshaped, task_params.modalities, task_params.data_augment, + args.arch.encoder, args.solver.freeze_conv, args.solver.wt_decay, + is_training) + + # Reshape into nice things so that these can be accumulated over time steps + # for faster backprop. + sh_before = x.get_shape().as_list() + m.encoder_output = tf.reshape( + x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:]) + x = tf.reshape(m.encoder_output, shape=[-1] + sh_before[1:]) + + # Add a layer to reduce dimensions for a fc layer. + if args.arch.dim_reduce_neurons > 0: + ks = 1; neurons = args.arch.dim_reduce_neurons; + init_var = np.sqrt(2.0/(ks**2)/neurons) + batch_norm_param = args.arch.batch_norm_param + batch_norm_param['is_training'] = batch_norm_is_training_op + m.conv_feat = slim.conv2d( + x, neurons, kernel_size=ks, stride=1, normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_param, padding='SAME', scope='dim_reduce', + weights_regularizer=slim.l2_regularizer(args.solver.wt_decay), + weights_initializer=tf.random_normal_initializer(stddev=init_var)) + reshape_conv_feat = slim.flatten(m.conv_feat) + sh = reshape_conv_feat.get_shape().as_list() + m.reshape_conv_feat = tf.reshape(reshape_conv_feat, + shape=[-1, sh[1]*n_views]) + + # Restore these from a checkpoint. + if args.solver.pretrained_path is not None: + m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path, + vars_) + else: + m.init_fn = None + + # Hit the goal_location with a bunch of fully connected layers, to embed it + # into some space. + with tf.variable_scope('embed_goal'): + batch_norm_param = args.arch.batch_norm_param + batch_norm_param['is_training'] = batch_norm_is_training_op + m.embed_goal, _ = tf_utils.fc_network( + rel_goal_loc_reshaped, neurons=args.arch.goal_embed_neurons, + wt_decay=args.solver.wt_decay, name='goal_embed', offset=0, + batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, + is_training=is_training) + + if args.arch.embed_goal_for_state: + with tf.variable_scope('embed_goal_for_state'): + batch_norm_param = args.arch.batch_norm_param + batch_norm_param['is_training'] = batch_norm_is_training_op + m.embed_goal_for_state, _ = tf_utils.fc_network( + m.input_tensors['common']['rel_goal_loc_at_start'][:,0,:], + neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay, + name='goal_embed', offset=0, batch_norm_param=batch_norm_param, + dropout_ratio=args.arch.fc_dropout, is_training=is_training) + + # Hit the goal_location with a bunch of fully connected layers, to embed it + # into some space. + with tf.variable_scope('embed_img'): + batch_norm_param = args.arch.batch_norm_param + batch_norm_param['is_training'] = batch_norm_is_training_op + m.embed_img, _ = tf_utils.fc_network( + m.reshape_conv_feat, neurons=args.arch.img_embed_neurons, + wt_decay=args.solver.wt_decay, name='img_embed', offset=0, + batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, + is_training=is_training) + + # For lstm_ego, and lstm_image, embed the ego motion, accumulate it into an + # LSTM, combine with image features and accumulate those in an LSTM. Finally + # combine what you get from the image LSTM with the goal to output an action. + if args.arch.lstm_ego: + ego_reshaped = preprocess_egomotion(m.input_tensors['step']['incremental_locs'], + m.input_tensors['step']['incremental_thetas']) + with tf.variable_scope('embed_ego'): + batch_norm_param = args.arch.batch_norm_param + batch_norm_param['is_training'] = batch_norm_is_training_op + m.embed_ego, _ = tf_utils.fc_network( + ego_reshaped, neurons=args.arch.ego_embed_neurons, + wt_decay=args.solver.wt_decay, name='ego_embed', offset=0, + batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, + is_training=is_training) + + state_name, state_init_op, updated_state_op, out_op = lstm_setup( + 'lstm_ego', m.embed_ego, task_params.batch_size, is_single_step, + args.arch.lstm_ego_dim, args.arch.lstm_ego_out, num_steps*num_goals, + m.input_tensors['step']['lstm_ego']) + state_names += [state_name] + init_state_ops += [state_init_op] + updated_state_ops += [updated_state_op] + + # Combine the output with the vision features. + m.img_ego_op = combine_setup('img_ego', args.arch.combine_type_ego, + m.embed_img, out_op, + args.arch.img_embed_neurons[-1], + args.arch.lstm_ego_out) + + # LSTM on these vision features. + state_name, state_init_op, updated_state_op, out_op = lstm_setup( + 'lstm_img', m.img_ego_op, task_params.batch_size, is_single_step, + args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals, + m.input_tensors['step']['lstm_img']) + state_names += [state_name] + init_state_ops += [state_init_op] + updated_state_ops += [updated_state_op] + + m.img_for_goal = out_op + num_img_for_goal_neurons = args.arch.lstm_img_out + + elif args.arch.lstm_img: + # LSTM on just the image features. + state_name, state_init_op, updated_state_op, out_op = lstm_setup( + 'lstm_img', m.embed_img, task_params.batch_size, is_single_step, + args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals, + m.input_tensors['step']['lstm_img']) + state_names += [state_name] + init_state_ops += [state_init_op] + updated_state_ops += [updated_state_op] + m.img_for_goal = out_op + num_img_for_goal_neurons = args.arch.lstm_img_out + + else: + m.img_for_goal = m.embed_img + num_img_for_goal_neurons = args.arch.img_embed_neurons[-1] + + + if args.arch.use_visit_count: + m.embed_visit_count = visit_count_fc( + m.input_tensors['step']['visit_count'], + m.input_tensors['step']['last_visit'], args.arch.goal_embed_neurons, + args.solver.wt_decay, args.arch.fc_dropout, is_training=is_training) + m.embed_goal = m.embed_goal + m.embed_visit_count + + m.combined_f = combine_setup('img_goal', args.arch.combine_type, + m.img_for_goal, m.embed_goal, + num_img_for_goal_neurons, + args.arch.goal_embed_neurons[-1]) + + # LSTM on the combined representation. + if args.arch.lstm_output: + name = 'lstm_output' + # A few fully connected layers here. + with tf.variable_scope('action_pred'): + batch_norm_param = args.arch.batch_norm_param + batch_norm_param['is_training'] = batch_norm_is_training_op + x, _ = tf_utils.fc_network( + m.combined_f, neurons=args.arch.pred_neurons, + wt_decay=args.solver.wt_decay, name='pred', offset=0, + batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout) + + if args.arch.lstm_output_init_state_from_goal: + # Use the goal embedding to initialize the LSTM state. + # UGLY CLUGGY HACK: if this is doing computation for a single time step + # then this will not involve back prop, so we can use the state input from + # the feed dict, otherwise we compute the state representation from the + # goal and feed that in. Necessary for using goal location to generate the + # state representation. + m.embed_goal_for_state = tf.expand_dims(m.embed_goal_for_state, dim=1) + state_op = tf.cond(is_single_step, lambda: m.input_tensors['step'][name], + lambda: m.embed_goal_for_state) + state_name, state_init_op, updated_state_op, out_op = lstm_setup( + name, x, task_params.batch_size, is_single_step, + args.arch.lstm_output_dim, + num_actions_, + num_steps*num_goals, state_op) + init_state_ops += [m.embed_goal_for_state] + else: + state_op = m.input_tensors['step'][name] + state_name, state_init_op, updated_state_op, out_op = lstm_setup( + name, x, task_params.batch_size, is_single_step, + args.arch.lstm_output_dim, + num_actions_, num_steps*num_goals, state_op) + init_state_ops += [state_init_op] + + state_names += [state_name] + updated_state_ops += [updated_state_op] + + out_op = tf.reshape(out_op, shape=[-1, num_actions_]) + if num_actions_ > num_actions: + m.action_logits_op = out_op[:,:num_actions] + m.baseline_op = out_op[:,num_actions:] + else: + m.action_logits_op = out_op + m.baseline_op = None + m.action_prob_op = tf.nn.softmax(m.action_logits_op) + + else: + # A few fully connected layers here. + with tf.variable_scope('action_pred'): + batch_norm_param = args.arch.batch_norm_param + batch_norm_param['is_training'] = batch_norm_is_training_op + out_op, _ = tf_utils.fc_network( + m.combined_f, neurons=args.arch.pred_neurons, + wt_decay=args.solver.wt_decay, name='pred', offset=0, + num_pred=num_actions_, + batch_norm_param=batch_norm_param, + dropout_ratio=args.arch.fc_dropout, is_training=is_training) + if num_actions_ > num_actions: + m.action_logits_op = out_op[:,:num_actions] + m.baseline_op = out_op[:,num_actions:] + else: + m.action_logits_op = out_op + m.baseline_op = None + m.action_prob_op = tf.nn.softmax(m.action_logits_op) + + m.train_ops = {} + m.train_ops['step'] = m.action_prob_op + m.train_ops['common'] = [m.input_tensors['common']['orig_maps'], + m.input_tensors['common']['goal_loc'], + m.input_tensors['common']['rel_goal_loc_at_start']] + m.train_ops['state_names'] = state_names + m.train_ops['init_state'] = init_state_ops + m.train_ops['updated_state'] = updated_state_ops + m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op + + # Flat list of ops which cache the step data. + m.train_ops['step_data_cache'] = [tf.no_op()] + + if args.solver.freeze_conv: + m.train_ops['step_data_cache'] = [m.encoder_output] + else: + m.train_ops['step_data_cache'] = [] + + ewma_decay = 0.99 if is_training else 0.0 + weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32, + name='weight') + + m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \ + compute_losses_multi_or( + m.action_logits_op, m.input_tensors['train']['action'], + weights=weight, num_actions=num_actions, + data_loss_wt=args.solver.data_loss_wt, + reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay) + + + if args.solver.freeze_conv: + vars_to_optimize = list(set(tf.trainable_variables()) - set(vars_)) + else: + vars_to_optimize = None + + m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \ + m.sync_optimizer = tf_utils.setup_training( + m.total_loss_op, + args.solver.initial_learning_rate, + args.solver.steps_per_decay, + args.solver.learning_rate_decay, + args.solver.momentum, + args.solver.max_steps, + args.solver.sync, + args.solver.adjust_lr_sync, + args.solver.num_workers, + args.solver.task, + vars_to_optimize=vars_to_optimize, + clip_gradient_norm=args.solver.clip_gradient_norm, + typ=args.solver.typ, momentum2=args.solver.momentum2, + adam_eps=args.solver.adam_eps) + + + if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay': + m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k, + m.global_step_op) + elif args.arch.sample_gt_prob_type == 'zero': + m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32) + elif args.arch.sample_gt_prob_type.split('_')[0] == 'step': + step = int(args.arch.sample_gt_prob_type.split('_')[1]) + m.sample_gt_prob_op = tf_utils.step_gt_prob( + step, m.input_tensors['step']['step_number'][0,0,0]) + + m.sample_action_type = args.arch.action_sample_type + m.sample_action_combine_type = args.arch.action_sample_combine_type + _add_summaries(m, summary_mode, args.summary.arop_full_summary_iters) + + m.init_op = tf.group(tf.global_variables_initializer(), + tf.local_variables_initializer()) + m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4, + write_version=tf.train.SaverDef.V2) + + return m