Post-merge cleanup from @theonaunheim + @TheCleric.

theonaunheim · Mar 21, 2021 · 94e4811 · 94e4811
2 parents f9c00b1 + 8a2c2ec
commit 94e4811
Show file tree

Hide file tree

Showing 15 changed files with 30 additions and 26 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,3 +1,7 @@
+v.1.1.1:
+========
+-Addition of tract-level processing
+
 v.1.1.0:
 ========
 -Addition of First Name and BIFSG models

diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,6 +1,7 @@
 The MIT License (MIT)
 
 Copyright (c) 2021 Adam Weeden
+Copyright (c) 2021 Algorex Health Technologies LLC
 Copyright (c) 2020 Theo Naunheim
 
 Permission is hereby granted, free of charge, to any person obtaining a copy

diff --git a/README.rst b/README.rst
@@ -19,6 +19,7 @@ The documentation for Surgeo may be found here: `<https://surgeo.readthedocs.io/
 Contributors
 ------------
 * `Adam Weeden <https://github.com/TheCleric>`_
+* `Algorex Health <https://github.com/AlgorexHealth>`_
 * `Theo Naunheim <https://github.com/theonaunheim>`_
 
 Overview
@@ -101,16 +102,16 @@ To use the CLI, type in "surgeo" followed by your arguments.
     optional arguments:
     -h, --help            show this help message and exit
     --zcta_column ZCTA_COLUMN
-                            The input column to analyze as ZCTA/ZIP)
+                            The input column to analyze as ZCTA/ZIP
     --surname_column SURNAME_COLUMN
-                            The input column to analyze as surname")
+                            The input column to analyze as surname
     --first_name_column FIRST_NAME_COLUMN
-                            The input column to analyze as first name")
+                            The input column to analyze as first name
 
 As a Module
 ~~~~~~~~~~~
 
-Surgeo is best used as a module. Add Census Tract Input Examples. 
+Surgeo is best used as a module.
 
 .. code-block:: python
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -22,8 +22,8 @@
 project = 'surgeo'
 copyright = '2020, Theo Naunheim'
 author = 'Theo Naunheim'
-release = '1.1.0'
-version = '1.1.0'
+release = '1.1.1'
+version = '1.1.1'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/source/contributors.rst b/docs/source/contributors.rst
@@ -2,4 +2,5 @@ Contributors
 ============
 
 * `Adam Weeden <github.com/TheCleric>`_
+* `Algorex Health <https://github.com/AlgorexHealth>`_
 * `Theo Naunheim <github.com/theonaunheim>`_
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name='surgeo',
-    version='1.1.0',
+    version='1.1.1',
     description='Bayesian Improved Surname Geocoder model',
     long_description="""
         **Surgeo** is an impelmentation of the Bayesian Improved Surname

diff --git a/setup_exe.py b/setup_exe.py
@@ -28,7 +28,7 @@
     options=OPTIONS,
     # Normal setup.py
     name='surgeo',
-    version='1.1.0',
+    version='1.1.1',
     description='Bayesian Improved Surname Geocoder model',
     long_description="""
         Surgeo is an impelmentation of the Bayesian Improved Surname

diff --git a/surgeo/__init__.py b/surgeo/__init__.py
@@ -6,4 +6,4 @@
 from surgeo.models.surname_model import SurnameModel
 from surgeo.models.surgeo_model import SurgeoModel
 
-VERSION = '1.1.0'
+VERSION = '1.1.1'
diff --git a/surgeo/app/surgeo_cli.py b/surgeo/app/surgeo_cli.py
@@ -51,14 +51,14 @@ class SurgeoCLI(object):
             -h, --help            show this help message and exit
             -ct                  Process for CENSUS Tract as opposed to ZCTA/ZIP
             --zcta_column ZCTA_COLUMN
-                                The input column to analyze as ZCTA/ZIP)
+                                The input column to analyze as ZCTA/ZIP
             --first_name_column FIRST_NAME_COLUMN
-                                The input column to analyze as first name")
+                                The input column to analyze as first name
             --surname_column SURNAME_COLUMN
-                                The input column to analyze as surname"
+                                The input column to analyze as surname
             --state_column STATE_COLUMN input column containing two digit FIPS state code
             --county_column input column containing three digit FIPS County Code
-            --tract_column input column containing six digit tract code)
+            --tract_column input column containing six digit tract code
 
     """
 
@@ -135,7 +135,7 @@ def _run_geo(self, df):
             model = GeocodeModel("TRACT")
         else:
             model = GeocodeModel("ZCTA")
-        # If an optional name is speicied, select that column and run
+        # If an optional name is specified, select that column and run
         if self._zcta_col is not None and not self._ct:
             model = GeocodeModel()
         # TODO: if they supply a name not found in CSV ... more specific error?
@@ -151,7 +151,7 @@ def _run_geo(self, df):
             try:
                 target = df[['state', 'column', 'tract']]
             except KeyError:
-                raise SurgeoException("No state county or tract column found")
+                raise SurgeoException("Columns for state, county, and tract not found.")
         else:
             try:
                 target = df[self._zcta_col_default]

diff --git a/surgeo/models/base_model.py b/surgeo/models/base_model.py
@@ -68,6 +68,7 @@ def _get_prob_race_given_zcta(self):
                                 .str.zfill(5)
         )
         return prob_race_given_zcta
+
     def _get_prob_race_given_tract(self):
         prob_race_given_tract = pd.read_csv(
             self._package_root / 'data' / 'prob_race_given_tract_2010.csv',
@@ -80,7 +81,6 @@ def _get_prob_race_given_tract(self):
     def _get_prob_tract_given_race(self):
         prob_tract_given_race = pd.read_csv(
             self._package_root / 'data' / 'prob_tract_given_race_2010.csv',
-
             na_values=[''],
             keep_default_na=False,
             dtype={'state':str,'county':str,'tract':str}
@@ -168,4 +168,4 @@ def _normalize_zctas(self, zcta: pd.Series) -> pd.Series:
     def _normalize_tracts(self, geo_target_df: pd.DataFrame) -> pd.DataFrame:
         """Transform rename the columns to standard into standardized strings"""
         converted = geo_target_df.rename(columns={old_col:new_col for old_col, new_col in zip(geo_target_df.columns, ['state','county','tract'])})
-        return converted
+        return converted
diff --git a/surgeo/models/geocode_model.py b/surgeo/models/geocode_model.py
@@ -32,7 +32,7 @@ class GeocodeModel(BaseModel):
 
     def __init__(self, geo_level='ZCTA'):
         super().__init__()
-        if geo_level == 'TRACT':
+        if geo_level.upper() == 'TRACT':
             self._PROB_RACE_GIVEN_GEO = self._get_prob_race_given_tract()
         else:
             self._PROB_RACE_GIVEN_GEO = self._get_prob_race_given_zcta()
@@ -91,4 +91,4 @@ def get_probabilities_tract(self, geo_df):
             right_index=True,
             how='left',
         )
-        return geocode_probs
+        return geocode_probs
diff --git a/surgeo/models/surgeo_model.py b/surgeo/models/surgeo_model.py
@@ -70,7 +70,7 @@ class SurgeoModel(BaseModel):
     """
     def __init__(self, geo_level="ZCTA"):
         super().__init__()
-        self.geo_level = geo_level
+        self.geo_level = geo_level.upper()
         if geo_level == "TRACT":
             self._PROB_GEO_GIVEN_RACE = self._get_prob_tract_given_race()
         else:
@@ -93,7 +93,7 @@ def get_probabilities(self, names, geo_df):
         ----------
         names : pd.Series
             A series of names to use for the BISG algorithm
-        geo_df : Union[pd.Series , pd.DataFrame]
+        geo_df : Union[pd.Series, pd.DataFrame]
             A series of target ZIP/ZCTA codes or State County Tract for the BISG algorithm
 
         Returns
@@ -137,7 +137,7 @@ def _adjust_frame(self,
         # Build frame from zctas, names, and probabilities
         if self.geo_level == 'TRACT':
             surgeo_data = pd.concat([geo_probs, 
-            sur_probs['name'].to_frame()
+                sur_probs['name'].to_frame()
             ], axis=1)
         else:
             surgeo_data = pd.concat([
@@ -176,7 +176,7 @@ def _get_surname_probs(self,
         )
         return surname_probs
 
-    def _get_geocode_probs(self, geo_df: Union[pd.Series,pd.DataFrame]) -> pd.DataFrame:
+    def _get_geocode_probs(self, geo_df: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame:
         """Normalizes ZCTAs/ZIPs and joins them to their race probs."""
         # Normalize
         if self.geo_level == 'TRACT':

diff --git a/tests/app/test_cli.py b/tests/app/test_cli.py
@@ -171,7 +171,6 @@ def test_malformed(self):
         df_true = pd.read_csv(self._DATA_FOLDER / 'surgeo_output.csv')
         self._is_close_enough(df_generated, df_true)
 
-
     def test_tract_simple(self):
         """Test arguments to specify column names for census tract defaults"""
         # Generate input name based on input file

diff --git a/tests/models/test_geocode_model.py b/tests/models/test_geocode_model.py
@@ -35,7 +35,6 @@ def test_get_probabilities(self):
             result.equals(true_result)
         )
 
-
     def test_get_probabilities_tract(self):
         """Test Geocode model versus known result with Tracts"""
         # Get our data and clean it

diff --git a/tests/models/test_surgeo_model.py b/tests/models/test_surgeo_model.py
@@ -40,7 +40,6 @@ def test_get_probabilities(self):
             result.equals(true_result)
         )
 
-
     def test_get_probabilities_tract(self):
         """Test Surgeo model versus known result"""
         # Load data