From 9b29db4ff17b07ad6f1083a8a4924c2b4dd7a1f9 Mon Sep 17 00:00:00 2001
From: Ben Stabler <bstabler@users.noreply.github.com>
Date: Fri, 21 Feb 2020 15:32:13 -0800
Subject: [PATCH] various updates  (#111)

* Package updates (#109)

* ActivitySim 0.9.2; Pandas 1.0

* Freeze ortools package below 7.5

* Add info about zero-person households (#106)

This addresses the documentation needs arising from #104

* Fixes (#110)

* ActivitySim 0.9.2; Pandas 1.0

* Freeze ortools package below 7.5

* fix issue #103

* fix issue #102

* Update setup.py (#112)

Co-authored-by: Blake <brosenthalpdx@gmail.com>
Co-authored-by: Greg Macfarlane <gregmacfarlane@gmail.com>
---
 docs/application_configuration.rst            |  2 +-
 example_calm/run_populationsim.py             |  2 +-
 example_calm_repop/run_populationsim.py       |  2 +-
 example_survey_weighting/run_populationsim.py |  2 +-
 example_test/run_populationsim.py             |  2 +-
 populationsim/balancer.py                     |  2 +-
 populationsim/integerizer.py                  |  2 +-
 populationsim/lp.py                           |  2 +-
 populationsim/lp_cvx.py                       |  2 +-
 populationsim/multi_integerizer.py            |  2 +-
 populationsim/simul_balancer.py               |  2 +-
 populationsim/steps/__init__.py               | 16 ++-
 populationsim/steps/expand_households.py      |  2 +-
 populationsim/steps/final_seed_balancing.py   |  2 +-
 populationsim/steps/initial_seed_balancing.py |  2 +-
 populationsim/steps/input_pre_processor.py    | 98 +++----------------
 .../steps/integerize_final_seed_weights.py    |  2 +-
 populationsim/steps/repop_balancing.py        |  2 +-
 populationsim/steps/setup_data_structures.py  | 17 ++--
 populationsim/steps/sub_balancing.py          |  2 +-
 populationsim/steps/summarize.py              | 26 ++---
 .../steps/write_synthetic_population.py       |  2 +-
 populationsim/steps/write_tables.py           | 94 ------------------
 populationsim/tests/configs/settings.yaml     |  2 +-
 populationsim/tests/configs2/settings.yaml    |  1 +
 populationsim/tests/test_balancer.py          |  2 +-
 populationsim/tests/test_flex.py              |  7 +-
 populationsim/util.py                         | 38 -------
 setup.py                                      |  6 +-
 29 files changed, 77 insertions(+), 266 deletions(-)
 delete mode 100644 populationsim/steps/write_tables.py
 delete mode 100644 populationsim/util.py

diff --git a/docs/application_configuration.rst b/docs/application_configuration.rst
index db7fb71..450fc1c 100644
--- a/docs/application_configuration.rst
+++ b/docs/application_configuration.rst
@@ -81,7 +81,7 @@ Seed sample
 
 As mentioned in previous section, the seed sample is typically obtained from the ACS PUMS. One of the main requirements for the seed sample is that it should be representative of the modeling region. In case of ACS PUMS, this can be ensured by selecting PUMAs representing the modeling region both demographically and geographically. PUMA boundaries may not perfectly line up against the modeling region boundaries and overlaps are possible. Each sub-seed geography must be assigned to a Seed geography, and each Seed geography must be assigned to a Meta geography.
 
-The seed sample must contain all of the specified control variables, as well as any variables that are needed for the travel model but not specified as controls. For population groups that use completely separate, non-overlapping controls, such as residential population and group-quarter population, separate seed samples are prepared. PopulationSim can be set up and run separately for each population segment using the same geographic system. The outputs from each run can be combined into a unified synthetic population as a post processing step.
+The seed sample must contain all of the specified control variables, as well as any variables that are needed for the travel model but not specified as controls. For population groups that use completely separate, non-overlapping controls, such as residential population and group-quarter population, separate seed samples are prepared. In the ACS PUMS datasets, it is possible to have zero-person households in the raw data table (`NP = 0`); these records must be filtered from the seed data. PopulationSim can be set up and run separately for each population segment using the same geographic system. The outputs from each run can be combined into a unified synthetic population as a post processing step.
 
 Finally, the seed sample must include an initial weight field. The PopulationSim algorithm is designed to assign weights as close to the initial weight as possible to minimize the changes in distribution of uncontrolled variables. All the fields in the seed sample should be appropriately recoded to specify controls (see more details in next section). Household-level population variables must be computed in advance (for e.g., number of workers in each household) and monetary variables must be inflation adjusted to be consistent with year of control data (e.g., Household Income). The ACS PUMS data contain 3 or 5 years of household records, where  each record's income is reported in the year in which it was collected. The ACS PUMS data includes the rolling reference factor for the year and the inflation adjustment factor, these must be used to code each household's income to a common income year.
 
diff --git a/example_calm/run_populationsim.py b/example_calm/run_populationsim.py
index 2c8ffc7..ddb1d12 100644
--- a/example_calm/run_populationsim.py
+++ b/example_calm/run_populationsim.py
@@ -12,7 +12,7 @@
 from activitysim.core.config import handle_standard_args
 from activitysim.core.tracing import print_elapsed_time
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 from populationsim import lp
 from populationsim import multi_integerizer
 
diff --git a/example_calm_repop/run_populationsim.py b/example_calm_repop/run_populationsim.py
index 2c8ffc7..ddb1d12 100644
--- a/example_calm_repop/run_populationsim.py
+++ b/example_calm_repop/run_populationsim.py
@@ -12,7 +12,7 @@
 from activitysim.core.config import handle_standard_args
 from activitysim.core.tracing import print_elapsed_time
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 from populationsim import lp
 from populationsim import multi_integerizer
 
diff --git a/example_survey_weighting/run_populationsim.py b/example_survey_weighting/run_populationsim.py
index d1bb23e..85e5979 100755
--- a/example_survey_weighting/run_populationsim.py
+++ b/example_survey_weighting/run_populationsim.py
@@ -12,7 +12,7 @@
 from activitysim.core.config import handle_standard_args
 from activitysim.core.tracing import print_elapsed_time
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 from populationsim import lp
 from populationsim import multi_integerizer
 
diff --git a/example_test/run_populationsim.py b/example_test/run_populationsim.py
index 6295582..d305d20 100644
--- a/example_test/run_populationsim.py
+++ b/example_test/run_populationsim.py
@@ -11,7 +11,7 @@
 from activitysim.core.config import handle_standard_args
 
 from populationsim import steps
-from populationsim.util import setting
+from activitysim.core.config import setting
 from populationsim import lp
 from populationsim import multi_integerizer
 
diff --git a/populationsim/balancer.py b/populationsim/balancer.py
index 68e2836..49e0126 100644
--- a/populationsim/balancer.py
+++ b/populationsim/balancer.py
@@ -10,7 +10,7 @@
 
 import pandas as pd
 
-from .util import setting
+from activitysim.core.config import setting
 
 
 logger = logging.getLogger(__name__)
diff --git a/populationsim/integerizer.py b/populationsim/integerizer.py
index 4483e91..0abdd98 100644
--- a/populationsim/integerizer.py
+++ b/populationsim/integerizer.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import pandas as pd
-from .util import setting
+from activitysim.core.config import setting
 
 from .lp import get_single_integerizer
 from .lp import STATUS_SUCCESS
diff --git a/populationsim/lp.py b/populationsim/lp.py
index be1f0ae..9ec225b 100644
--- a/populationsim/lp.py
+++ b/populationsim/lp.py
@@ -4,7 +4,7 @@
 
 import logging
 
-from .util import setting
+from activitysim.core.config import setting
 from . import lp_cvx
 from . import lp_ortools
 
diff --git a/populationsim/lp_cvx.py b/populationsim/lp_cvx.py
index 9ba119d..578fa32 100644
--- a/populationsim/lp_cvx.py
+++ b/populationsim/lp_cvx.py
@@ -5,7 +5,7 @@
 import logging
 
 import numpy as np
-from .util import setting
+from activitysim.core.config import setting
 
 logger = logging.getLogger(__name__)
 
diff --git a/populationsim/multi_integerizer.py b/populationsim/multi_integerizer.py
index 5ba67de..cf0281c 100644
--- a/populationsim/multi_integerizer.py
+++ b/populationsim/multi_integerizer.py
@@ -12,7 +12,7 @@
 import pandas as pd
 
 
-from .util import setting
+from activitysim.core.config import setting
 
 from .lp import get_simul_integerizer
 from .lp import STATUS_SUCCESS
diff --git a/populationsim/simul_balancer.py b/populationsim/simul_balancer.py
index 916fba4..c745871 100644
--- a/populationsim/simul_balancer.py
+++ b/populationsim/simul_balancer.py
@@ -11,7 +11,7 @@
 
 import pandas as pd
 
-from .util import setting
+from activitysim.core.config import setting
 
 logger = logging.getLogger(__name__)
 
diff --git a/populationsim/steps/__init__.py b/populationsim/steps/__init__.py
index bfe6b2a..a5bfb9b 100644
--- a/populationsim/steps/__init__.py
+++ b/populationsim/steps/__init__.py
@@ -1,7 +1,10 @@
-from __future__ import absolute_import
 # PopulationSim
 # See full license in LICENSE.txt.
 
+from __future__ import absolute_import
+
+from activitysim.core import inject as _inject
+
 from . import input_pre_processor
 from . import setup_data_structures
 from . import initial_seed_balancing
@@ -11,7 +14,14 @@
 from . import sub_balancing
 from . import expand_households
 from . import summarize
-from . import write_tables
 from . import write_synthetic_population
-
 from . import repop_balancing
+
+from activitysim.core.steps.output import write_data_dictionary
+from activitysim.core.steps.output import write_tables
+
+
+@_inject.injectable(cache=True)
+def preload_injectables():
+    _inject.add_step('write_data_dictionary', write_data_dictionary)
+    _inject.add_step('write_tables', write_tables)
diff --git a/populationsim/steps/expand_households.py b/populationsim/steps/expand_households.py
index 11bfff5..a7846b0 100644
--- a/populationsim/steps/expand_households.py
+++ b/populationsim/steps/expand_households.py
@@ -11,7 +11,7 @@
 from activitysim.core import pipeline
 from activitysim.core import inject
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 from .helper import get_control_table
 from .helper import get_weight_table
 
diff --git a/populationsim/steps/final_seed_balancing.py b/populationsim/steps/final_seed_balancing.py
index a2c7ef9..398e33b 100644
--- a/populationsim/steps/final_seed_balancing.py
+++ b/populationsim/steps/final_seed_balancing.py
@@ -8,7 +8,7 @@
 
 from activitysim.core import inject
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 
 from ..balancer import do_balancing
 from .helper import get_control_table
diff --git a/populationsim/steps/initial_seed_balancing.py b/populationsim/steps/initial_seed_balancing.py
index b5267b3..b75cb97 100644
--- a/populationsim/steps/initial_seed_balancing.py
+++ b/populationsim/steps/initial_seed_balancing.py
@@ -8,7 +8,7 @@
 from activitysim.core import inject
 from activitysim.core import pipeline
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 
 from ..balancer import do_balancing
 
diff --git a/populationsim/steps/input_pre_processor.py b/populationsim/steps/input_pre_processor.py
index cdd7675..722f93b 100644
--- a/populationsim/steps/input_pre_processor.py
+++ b/populationsim/steps/input_pre_processor.py
@@ -7,12 +7,11 @@
 import pandas as pd
 import numpy as np
 
-from activitysim.core import inject
-from activitysim.core import pipeline
-
-from populationsim.util import data_dir_from_settings
-from populationsim.util import setting
-
+from activitysim.core import (
+    inject,
+    config,
+    input
+)
 
 logger = logging.getLogger(__name__)
 
@@ -30,8 +29,8 @@ def input_pre_processor():
     unless an alternate table_list name is specified as a model step argument 'table_list'.
     (This allows alternate/additional input files to be read for repop)
 
-    In the case of repop, this step is being run after an initial populationsim run has
-    completed, in which case the input_table_list may specify replacement tables.
+    In the case of repop, this step is being run after an initial run has completed,
+    in which case the input_table_list may specify replacement tables.
     (e.g. lowest geography controls that will replace the previous low controls dataframe.)
 
     See input_table_list in settings.yaml in the example folder for a working example
@@ -39,7 +38,7 @@ def input_pre_processor():
     +--------------+----------------------------------------------------------+
     | key          | description                                              |
     +==============+=========================================+================+
-    | tablename    |  ame of pipeline table in which to store dataframe       |
+    | tablename    | name of pipeline table in which to store dataframe       |
     +--------------+----------------------------------------------------------+
     | filename     | name of csv file to read (in data_dir)                   |
     +--------------+----------------------------------------------------------+
@@ -54,85 +53,18 @@ def input_pre_processor():
 
     # alternate table list name may have been provided as a model argument
     table_list_name = inject.get_step_arg('table_list', default='input_table_list')
-    table_list = setting(table_list_name)
-    assert table_list is not None, "table list '%s' not in settings." % table_list_name
+    table_list = config.setting(table_list_name)
 
-    data_dir = data_dir_from_settings()
+    assert table_list is not None, "no table list '%s' found in settings." % table_list_name
+
+    logger.info('Using table list: %s' % table_list)
 
     for table_info in table_list:
 
-        tablename = table_info['tablename']
-
-        logger.info("input_pre_processor processing %s" % tablename)
-
-        # read the csv file
-        data_filename = table_info.get('filename', None)
-        data_file_path = os.path.join(data_dir, data_filename)
-        if not os.path.exists(data_file_path):
-            raise RuntimeError("input_pre_processor %s - input file not found: %s"
-                               % (tablename, data_file_path, ))
-
-        logger.info("Reading csv file %s" % data_file_path)
-        df = read_csv_with_fallback_encoding(data_file_path)
-
-        logger.info("input file columns: %s" % df.columns.values)
-
-        drop_columns = table_info.get('drop_columns', None)
-        if drop_columns:
-            for c in drop_columns:
-                logger.info("dropping column '%s'" % c)
-                del df[c]
-
-        # rename columns
-        column_map = table_info.get('column_map', None)
-        if column_map:
-            df.rename(columns=column_map, inplace=True)
-
-        # set index
-        index_col = table_info.get('index_col', None)
-        if index_col is not None:
-            if index_col in df.columns:
-                assert not df.duplicated(index_col).any()
-                df.set_index(index_col, inplace=True)
-            else:
-                df.index.names = [index_col]
-
-        # read expression file
-        # expression_filename = table_info.get('expression_filename', None)
-        # if expression_filename:
-        #     assert False
-        #     expression_file_path = os.path.join(configs_dir, expression_filename)
-        #     if not os.path.exists(expression_file_path):
-        #         raise RuntimeError("input_pre_processor %s - expression file not found: %s"
-        #                            % (table, expression_file_path, ))
-        #     spec = assign.read_assignment_spec(expression_file_path)
-        #
-        #     df_alias = table_info.get('df_alias', table)
-        #
-        #     locals_d = {}
-        #
-        #     results, trace_results, trace_assigned_locals \
-        #         = assign.assign_variables(spec, df, locals_d, df_alias=df_alias)
-        #     # for column in results.columns:
-        #     #     orca.add_column(table, column, results[column])
-        #
-        #     df = pd.concat([df, results], axis=1)
-
-        logger.info("adding table %s" % tablename)
+        tablename = table_info.get('tablename')
+        df = input.read_from_table_info(table_info)
+        logger.info('registering table %s' % tablename)
 
         # add (or replace) pipeline table
         repop = inject.get_step_arg('repop', default=False)
         inject.add_table(tablename, df, replace=repop)
-
-
-def read_csv_with_fallback_encoding(filepath):
-    """read a CSV to a pandas DataFrame using default utf-8 encoding,
-    but try alternate Windows-compatible cp1252 if unicode fails
-
-    """
-    try:
-        return pd.read_csv(filepath, comment='#')
-    except UnicodeDecodeError:
-        logger.warning(
-            "Reading %s with default utf-8 encoding failed, trying cp1252 instead", filepath)
-        return pd.read_csv(filepath, comment='#', encoding='cp1252')
diff --git a/populationsim/steps/integerize_final_seed_weights.py b/populationsim/steps/integerize_final_seed_weights.py
index ffea4a6..a93d1fb 100644
--- a/populationsim/steps/integerize_final_seed_weights.py
+++ b/populationsim/steps/integerize_final_seed_weights.py
@@ -13,7 +13,7 @@
 from .helper import get_control_table
 from .helper import weight_table_name
 from .helper import get_weight_table
-from populationsim.util import setting
+from activitysim.core.config import setting
 
 logger = logging.getLogger(__name__)
 
diff --git a/populationsim/steps/repop_balancing.py b/populationsim/steps/repop_balancing.py
index 44c8ee8..cc89a42 100644
--- a/populationsim/steps/repop_balancing.py
+++ b/populationsim/steps/repop_balancing.py
@@ -7,7 +7,7 @@
 
 from activitysim.core import inject
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 
 from .helper import get_control_table
 from .helper import weight_table_name
diff --git a/populationsim/steps/setup_data_structures.py b/populationsim/steps/setup_data_structures.py
index 2bb4fd4..5d0bcd3 100644
--- a/populationsim/steps/setup_data_structures.py
+++ b/populationsim/steps/setup_data_structures.py
@@ -12,21 +12,22 @@
 
 from activitysim.core import inject
 from activitysim.core import pipeline
+from activitysim.core import config
 
 from ..assign import assign_variable
 from .helper import control_table_name
 from .helper import get_control_table
 from .helper import get_control_data_table
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 
 logger = logging.getLogger(__name__)
 
 
-def read_control_spec(data_filename, configs_dir):
+def read_control_spec(data_filename):
 
     # read the csv file
-    data_file_path = os.path.join(configs_dir, data_filename)
+    data_file_path = config.config_file_path(data_filename)
     if not os.path.exists(data_file_path):
         raise RuntimeError(
             "initial_seed_balancing - control file not found: %s" % (data_file_path,))
@@ -269,7 +270,7 @@ def filter_households(households_df, persons_df, crosswalk_df):
 
 
 @inject.step()
-def setup_data_structures(settings, configs_dir, households, persons):
+def setup_data_structures(settings, households, persons):
     """
     Setup geographic correspondence (crosswalk), control sets, and incidence tables.
 
@@ -289,7 +290,6 @@ def setup_data_structures(settings, configs_dir, households, persons):
     ----------
     settings: dict
         contents of settings.yaml as dict
-    configs_dir: str
     households: pipeline table
     persons: pipeline table
 
@@ -314,7 +314,7 @@ def setup_data_structures(settings, configs_dir, households, persons):
     crosswalk_df = build_crosswalk_table()
     inject.add_table('crosswalk', crosswalk_df)
 
-    control_spec = read_control_spec(setting('control_file_name', 'controls.csv'), configs_dir)
+    control_spec = read_control_spec(setting('control_file_name', 'controls.csv'))
     inject.add_table('control_spec', control_spec)
 
     geographies = settings['geographies']
@@ -346,7 +346,7 @@ def setup_data_structures(settings, configs_dir, households, persons):
 
 
 @inject.step()
-def repop_setup_data_structures(configs_dir, households, persons):
+def repop_setup_data_structures(households, persons):
     """
     Setup geographic correspondence (crosswalk), control sets, and incidence tables for repop run.
 
@@ -360,7 +360,6 @@ def repop_setup_data_structures(configs_dir, households, persons):
 
     Parameters
     ----------
-    configs_dir : str
     households: pipeline table
     persons: pipeline table
 
@@ -379,7 +378,7 @@ def repop_setup_data_structures(configs_dir, households, persons):
 
     # replace control_spec
     control_file_name = setting('repop_control_file_name', 'repop_controls.csv')
-    control_spec = read_control_spec(control_file_name, configs_dir)
+    control_spec = read_control_spec(control_file_name)
 
     # repop control spec should only specify controls for lowest level geography
     assert control_spec.geography.unique() == [low_geography]
diff --git a/populationsim/steps/sub_balancing.py b/populationsim/steps/sub_balancing.py
index de5d8dd..cc29c6c 100644
--- a/populationsim/steps/sub_balancing.py
+++ b/populationsim/steps/sub_balancing.py
@@ -12,7 +12,7 @@
 from activitysim.core import inject
 from activitysim.core import pipeline
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 
 from .helper import get_control_table
 from .helper import weight_table_name
diff --git a/populationsim/steps/summarize.py b/populationsim/steps/summarize.py
index eda857e..74e02a5 100644
--- a/populationsim/steps/summarize.py
+++ b/populationsim/steps/summarize.py
@@ -12,7 +12,7 @@
 
 from .helper import get_control_table
 from .helper import get_weight_table
-from populationsim.util import setting
+from activitysim.core.config import setting
 
 logger = logging.getLogger(__name__)
 
@@ -36,7 +36,7 @@ def out_table(table_name, df):
         inject.add_table(table_name, df, replace=repop)
 
 
-def summarize_geography(geography, weight_col,
+def summarize_geography(geography, weight_col, hh_id_col,
                         crosswalk_df, results_df, incidence_df):
 
     # controls_table for current geography level
@@ -58,7 +58,7 @@ def summarize_geography(geography, weight_col,
         zone_row_map = results_df[geography] == zone_id
         zone_weights = results_df[zone_row_map]
 
-        incidence = incidence_df.loc[zone_weights.hh_id]
+        incidence = incidence_df.loc[zone_weights[hh_id_col]]
 
         weights = zone_weights[weight_col].tolist()
         x = [(incidence[c] * weights).sum() for c in control_names]
@@ -94,7 +94,7 @@ def summarize_geography(geography, weight_col,
     return summary_df
 
 
-def meta_summary(incidence_df, control_spec, top_geography, top_id, sub_geographies):
+def meta_summary(incidence_df, control_spec, top_geography, top_id, sub_geographies, hh_id_col):
 
     if setting('NO_INTEGERIZATION_EVER', False):
         seed_weight_cols = ['preliminary_balanced_weight', 'balanced_weight']
@@ -138,7 +138,7 @@ def meta_summary(incidence_df, control_spec, top_geography, top_id, sub_geograph
 
         sub_weights = sub_weights[sub_weights[top_geography] == top_id]
 
-        sub_weights = sub_weights[['hh_id'] + sub_weight_cols].groupby('hh_id').sum()
+        sub_weights = sub_weights[[hh_id_col] + sub_weight_cols].groupby(hh_id_col).sum()
 
         for c in sub_weight_cols:
             summary['%s_%s' % (g, c)] = \
@@ -172,12 +172,13 @@ def summarize(crosswalk, incidence_table, control_spec):
     seed_geography = setting('seed_geography')
     meta_geography = geographies[0]
     sub_geographies = geographies[geographies.index(seed_geography) + 1:]
-    household_id_col = setting('household_id_col')
+    hh_id_col = setting('household_id_col')
 
     meta_ids = crosswalk_df[meta_geography].unique()
     for meta_id in meta_ids:
         meta_summary_df = \
-            meta_summary(incidence_df, control_spec, meta_geography, meta_id, sub_geographies)
+            meta_summary(incidence_df, control_spec, meta_geography,
+                         meta_id, sub_geographies, hh_id_col)
         out_table('%s_%s' % (meta_geography, meta_id), meta_summary_df)
 
     hh_weights_summary = pd.DataFrame(index=incidence_df.index)
@@ -196,17 +197,16 @@ def summarize(crosswalk, incidence_table, control_spec):
             continue
 
         if include_integer_colums:
-            hh_weight_cols = [household_id_col, 'balanced_weight', 'integer_weight']
+            hh_weight_cols = [hh_id_col, 'balanced_weight', 'integer_weight']
         else:
-            hh_weight_cols = [household_id_col, 'balanced_weight']
+            hh_weight_cols = [hh_id_col, 'balanced_weight']
 
-        hh_weights = weights_df[hh_weight_cols].groupby([household_id_col]).sum()
+        hh_weights = weights_df[hh_weight_cols].groupby([hh_id_col]).sum()
         hh_weights_summary['%s_balanced_weight' % geography] = hh_weights['balanced_weight']
         if include_integer_colums:
             hh_weights_summary['%s_integer_weight' % geography] = hh_weights['integer_weight']
 
         # aggregate to seed level
-        hh_id_col = incidence_df.index.name
         aggegrate_weights = weights_df.groupby([seed_geography, hh_id_col], as_index=False).sum()
         aggegrate_weights.set_index(hh_id_col, inplace=True)
 
@@ -228,11 +228,11 @@ def summarize(crosswalk, incidence_table, control_spec):
         out_table('%s_aggregate' % (geography,), aggegrate_weights)
 
         summary_col = 'integer_weight' if include_integer_colums else 'balanced_weight'
-        df = summarize_geography(seed_geography, summary_col,
+        df = summarize_geography(seed_geography, summary_col, hh_id_col,
                                  crosswalk_df, weights_df, incidence_df)
         out_table('%s_%s' % (geography, seed_geography,), df)
 
-        df = summarize_geography(geography, summary_col,
+        df = summarize_geography(geography, summary_col, hh_id_col,
                                  crosswalk_df, weights_df, incidence_df)
         out_table('%s' % (geography,), df)
 
diff --git a/populationsim/steps/write_synthetic_population.py b/populationsim/steps/write_synthetic_population.py
index 2078a80..5045005 100644
--- a/populationsim/steps/write_synthetic_population.py
+++ b/populationsim/steps/write_synthetic_population.py
@@ -8,7 +8,7 @@
 from activitysim.core import pipeline
 from activitysim.core import inject
 
-from populationsim.util import setting
+from activitysim.core.config import setting
 
 logger = logging.getLogger(__name__)
 
diff --git a/populationsim/steps/write_tables.py b/populationsim/steps/write_tables.py
deleted file mode 100644
index 1768d2f..0000000
--- a/populationsim/steps/write_tables.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# PopulationSim
-# See full license in LICENSE.txt.
-
-import logging
-import os
-
-from activitysim.core import pipeline
-from activitysim.core import inject
-
-from populationsim.util import setting
-
-logger = logging.getLogger(__name__)
-
-
-@inject.step()
-def write_tables(output_dir):
-    """
-    Write pipeline tables as csv files (in output directory) as specified by output_tables list
-    in settings file.
-
-    Pipeline tables are intermediate computational tables, not to be confused with the
-    synthetic population tables written by the write_synthetic_population step.
-
-    'output_tables' can specify either a list of output tables to include or to skip
-    if no output_tables list is specified, then no checkpointed tables will be written
-
-    Intermediate tables likely to be of particular interest or utility are the controls and weights
-    tables for the various geographies. For example, if one of your geographies is TRACT, then:
-    TRACT_controls has control totals for every TRACT (and aggregated subzone) controls.
-    TRACT_weights has balanced_weight and integer_weight for every TRACT.
-
-    To write all output tables EXCEPT the households and persons tables:
-
-    ::
-
-      output_tables:
-        action: skip
-        tables:
-          - households
-          - persons
-
-    To write ONLY the expanded_household_ids table:
-
-    ::
-
-      output_tables:
-        action: include
-        tables:
-           - expanded_household_ids
-
-    Parameters
-    ----------
-    output_dir: str
-
-    """
-
-    output_tables_settings_name = 'output_tables'
-
-    output_tables_settings = setting(output_tables_settings_name)
-
-    output_tables_list = pipeline.checkpointed_tables()
-
-    if output_tables_settings is None:
-        logger.info("No output_tables specified in settings file. Nothing to write.")
-        return
-
-    action = output_tables_settings.get('action')
-    tables = output_tables_settings.get('tables')
-
-    if action not in ['include', 'skip']:
-        raise RuntimeError("expected %s action '%s' to be either 'include' or 'skip'" %
-                           (output_tables_settings_name, action))
-
-    if action == 'include':
-        output_tables_list = tables
-    elif action == 'skip':
-        output_tables_list = [t for t in output_tables_list if t not in tables]
-
-    # should provide option to also write checkpoints?
-    # output_tables_list.append("checkpoints.csv")
-
-    for table_name in output_tables_list:
-        table = inject.get_table(table_name, None)
-
-        if table is None:
-            logger.warn("Skipping '%s': Table not found." % table_name)
-            continue
-
-        df = table.to_frame()
-        file_name = "%s.csv" % table_name
-        logger.info("writing output file %s" % file_name)
-        file_path = os.path.join(output_dir, file_name)
-        write_index = df.index.name is not None
-        df.to_csv(file_path, index=write_index)
diff --git a/populationsim/tests/configs/settings.yaml b/populationsim/tests/configs/settings.yaml
index f7273d8..95bb2f1 100644
--- a/populationsim/tests/configs/settings.yaml
+++ b/populationsim/tests/configs/settings.yaml
@@ -37,6 +37,7 @@ seed_geography: PUMA
 
 output_tables:
   action: skip
+  prefix: ''
   tables:
     - households
     - persons
@@ -62,4 +63,3 @@ output_synthetic_population:
       - per_num
       - OSUTAG
       - OCCP
-
diff --git a/populationsim/tests/configs2/settings.yaml b/populationsim/tests/configs2/settings.yaml
index 84a66e3..70498ea 100644
--- a/populationsim/tests/configs2/settings.yaml
+++ b/populationsim/tests/configs2/settings.yaml
@@ -39,6 +39,7 @@ seed_geography: PUMA
 
 output_tables:
   action: include
+  prefix: ''
   tables:
     - expanded_household_ids
     - summary_DISTRICT
diff --git a/populationsim/tests/test_balancer.py b/populationsim/tests/test_balancer.py
index c5f2bdb..ef8aa3b 100644
--- a/populationsim/tests/test_balancer.py
+++ b/populationsim/tests/test_balancer.py
@@ -52,7 +52,7 @@ def test_Konduri():
 
     published_final_weights = [1.36, 25.66, 7.98, 27.79, 18.45, 8.64, 1.47, 8.64]
     published_weighted_sum = [
-        round((incidence_table.ix[:, c] * published_final_weights).sum(), 2)
+        round((incidence_table.loc[:, c] * published_final_weights).sum(), 2)
         for c in controls.index]
     npt.assert_almost_equal(weighted_sum, published_weighted_sum, decimal=1)
 
diff --git a/populationsim/tests/test_flex.py b/populationsim/tests/test_flex.py
index 6c13ea5..6b8927c 100644
--- a/populationsim/tests/test_flex.py
+++ b/populationsim/tests/test_flex.py
@@ -6,6 +6,7 @@
 from activitysim.core import tracing
 from activitysim.core import pipeline
 from activitysim.core import inject
+from activitysim.core.config import setting
 
 from populationsim import steps
 
@@ -53,9 +54,9 @@ def test_full_run2():
     assert isinstance(pipeline.get_table('expanded_household_ids'), pd.DataFrame)
 
     # output tables list action: include
-    assert os.path.exists(os.path.join(output_dir, 'expanded_household_ids.csv'))
-    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv'))
-    assert not os.path.exists(os.path.join(output_dir, 'summary_TAZ.csv'))
+    assert os.path.exists(config.output_file_path('expanded_household_ids.csv'))
+    assert os.path.exists(config.output_file_path('summary_DISTRICT.csv'))
+    assert not os.path.exists(config.output_file_path('summary_TAZ.csv'))
 
     # tables will no longer be available after pipeline is closed
     pipeline.close_pipeline()
diff --git a/populationsim/util.py b/populationsim/util.py
deleted file mode 100644
index 73ef141..0000000
--- a/populationsim/util.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# PopulationSim
-# See full license in LICENSE.txt.
-
-from __future__ import absolute_import
-
-import logging
-
-from activitysim.core import inject
-
-
-logger = logging.getLogger(__name__)
-
-
-def setting(key, default=None):
-
-    settings = inject.get_injectable('settings')
-
-    return settings.get(key, default)
-
-
-def data_dir_from_settings():
-    """
-    legacy strategy foir specifying data_dir is with orca injectable.
-    Calling this function provides an alternative by reading it from settings file
-    """
-
-    # FIXME - not sure this plays well with orca
-    # it may depend on when file with orca decorator is imported
-
-    data_dir = setting('data_dir', None)
-
-    if data_dir:
-        inject.add_injectable('data_dir', data_dir)
-    else:
-        data_dir = inject.get_injectable('data_dir')
-
-    logger.info("data_dir: %s" % data_dir)
-    return data_dir
diff --git a/setup.py b/setup.py
index 8534aaa..1f543a5 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 
 setup(
     name='populationsim',
-    version='0.4',
+    version='0.4.1',
     description='Population Synthesis',
     author='contributing authors',
     author_email='ben.stabler@rsginc.com',
@@ -27,10 +27,10 @@
     include_package_data=True,
     python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*',
     install_requires=[
+        'activitysim >= 0.9.2',
         'numpy >= 1.16.1',
         'pandas >= 0.24.1',
-        'activitysim >= 0.9.1',
-        'ortools >= 5.1.4045',
+        'ortools >= 5.1.4045, < 7.5',
         'future >= 0.16.0'
     ]
 )