Skip to content

Commit

Permalink
Post-merge cleanup from @theonaunheim + @TheCleric.
Browse files Browse the repository at this point in the history
  • Loading branch information
theonaunheim committed Mar 21, 2021
2 parents f9c00b1 + 8a2c2ec commit 94e4811
Show file tree
Hide file tree
Showing 15 changed files with 30 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
v.1.1.1:
========
-Addition of tract-level processing

v.1.1.0:
========
-Addition of First Name and BIFSG models
Expand Down
1 change: 1 addition & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
The MIT License (MIT)

Copyright (c) 2021 Adam Weeden
Copyright (c) 2021 Algorex Health Technologies LLC
Copyright (c) 2020 Theo Naunheim

Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down
9 changes: 5 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The documentation for Surgeo may be found here: `<https://surgeo.readthedocs.io/
Contributors
------------
* `Adam Weeden <https://github.com/TheCleric>`_
* `Algorex Health <https://github.com/AlgorexHealth>`_
* `Theo Naunheim <https://github.com/theonaunheim>`_

Overview
Expand Down Expand Up @@ -101,16 +102,16 @@ To use the CLI, type in "surgeo" followed by your arguments.
optional arguments:
-h, --help show this help message and exit
--zcta_column ZCTA_COLUMN
The input column to analyze as ZCTA/ZIP)
The input column to analyze as ZCTA/ZIP
--surname_column SURNAME_COLUMN
The input column to analyze as surname")
The input column to analyze as surname
--first_name_column FIRST_NAME_COLUMN
The input column to analyze as first name")
The input column to analyze as first name
As a Module
~~~~~~~~~~~

Surgeo is best used as a module. Add Census Tract Input Examples.
Surgeo is best used as a module.

.. code-block:: python
Expand Down
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
project = 'surgeo'
copyright = '2020, Theo Naunheim'
author = 'Theo Naunheim'
release = '1.1.0'
version = '1.1.0'
release = '1.1.1'
version = '1.1.1'


# -- General configuration ---------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions docs/source/contributors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ Contributors
============

* `Adam Weeden <github.com/TheCleric>`_
* `Algorex Health <https://github.com/AlgorexHealth>`_
* `Theo Naunheim <github.com/theonaunheim>`_
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name='surgeo',
version='1.1.0',
version='1.1.1',
description='Bayesian Improved Surname Geocoder model',
long_description="""
**Surgeo** is an impelmentation of the Bayesian Improved Surname
Expand Down
2 changes: 1 addition & 1 deletion setup_exe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
options=OPTIONS,
# Normal setup.py
name='surgeo',
version='1.1.0',
version='1.1.1',
description='Bayesian Improved Surname Geocoder model',
long_description="""
Surgeo is an impelmentation of the Bayesian Improved Surname
Expand Down
2 changes: 1 addition & 1 deletion surgeo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
from surgeo.models.surname_model import SurnameModel
from surgeo.models.surgeo_model import SurgeoModel

VERSION = '1.1.0'
VERSION = '1.1.1'
12 changes: 6 additions & 6 deletions surgeo/app/surgeo_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ class SurgeoCLI(object):
-h, --help show this help message and exit
-ct Process for CENSUS Tract as opposed to ZCTA/ZIP
--zcta_column ZCTA_COLUMN
The input column to analyze as ZCTA/ZIP)
The input column to analyze as ZCTA/ZIP
--first_name_column FIRST_NAME_COLUMN
The input column to analyze as first name")
The input column to analyze as first name
--surname_column SURNAME_COLUMN
The input column to analyze as surname"
The input column to analyze as surname
--state_column STATE_COLUMN input column containing two digit FIPS state code
--county_column input column containing three digit FIPS County Code
--tract_column input column containing six digit tract code)
--tract_column input column containing six digit tract code
"""

Expand Down Expand Up @@ -135,7 +135,7 @@ def _run_geo(self, df):
model = GeocodeModel("TRACT")
else:
model = GeocodeModel("ZCTA")
# If an optional name is speicied, select that column and run
# If an optional name is specified, select that column and run
if self._zcta_col is not None and not self._ct:
model = GeocodeModel()
# TODO: if they supply a name not found in CSV ... more specific error?
Expand All @@ -151,7 +151,7 @@ def _run_geo(self, df):
try:
target = df[['state', 'column', 'tract']]
except KeyError:
raise SurgeoException("No state county or tract column found")
raise SurgeoException("Columns for state, county, and tract not found.")
else:
try:
target = df[self._zcta_col_default]
Expand Down
4 changes: 2 additions & 2 deletions surgeo/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def _get_prob_race_given_zcta(self):
.str.zfill(5)
)
return prob_race_given_zcta

def _get_prob_race_given_tract(self):
prob_race_given_tract = pd.read_csv(
self._package_root / 'data' / 'prob_race_given_tract_2010.csv',
Expand All @@ -80,7 +81,6 @@ def _get_prob_race_given_tract(self):
def _get_prob_tract_given_race(self):
prob_tract_given_race = pd.read_csv(
self._package_root / 'data' / 'prob_tract_given_race_2010.csv',

na_values=[''],
keep_default_na=False,
dtype={'state':str,'county':str,'tract':str}
Expand Down Expand Up @@ -168,4 +168,4 @@ def _normalize_zctas(self, zcta: pd.Series) -> pd.Series:
def _normalize_tracts(self, geo_target_df: pd.DataFrame) -> pd.DataFrame:
"""Transform rename the columns to standard into standardized strings"""
converted = geo_target_df.rename(columns={old_col:new_col for old_col, new_col in zip(geo_target_df.columns, ['state','county','tract'])})
return converted
return converted
4 changes: 2 additions & 2 deletions surgeo/models/geocode_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class GeocodeModel(BaseModel):

def __init__(self, geo_level='ZCTA'):
super().__init__()
if geo_level == 'TRACT':
if geo_level.upper() == 'TRACT':
self._PROB_RACE_GIVEN_GEO = self._get_prob_race_given_tract()
else:
self._PROB_RACE_GIVEN_GEO = self._get_prob_race_given_zcta()
Expand Down Expand Up @@ -91,4 +91,4 @@ def get_probabilities_tract(self, geo_df):
right_index=True,
how='left',
)
return geocode_probs
return geocode_probs
8 changes: 4 additions & 4 deletions surgeo/models/surgeo_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class SurgeoModel(BaseModel):
"""
def __init__(self, geo_level="ZCTA"):
super().__init__()
self.geo_level = geo_level
self.geo_level = geo_level.upper()
if geo_level == "TRACT":
self._PROB_GEO_GIVEN_RACE = self._get_prob_tract_given_race()
else:
Expand All @@ -93,7 +93,7 @@ def get_probabilities(self, names, geo_df):
----------
names : pd.Series
A series of names to use for the BISG algorithm
geo_df : Union[pd.Series , pd.DataFrame]
geo_df : Union[pd.Series, pd.DataFrame]
A series of target ZIP/ZCTA codes or State County Tract for the BISG algorithm
Returns
Expand Down Expand Up @@ -137,7 +137,7 @@ def _adjust_frame(self,
# Build frame from zctas, names, and probabilities
if self.geo_level == 'TRACT':
surgeo_data = pd.concat([geo_probs,
sur_probs['name'].to_frame()
sur_probs['name'].to_frame()
], axis=1)
else:
surgeo_data = pd.concat([
Expand Down Expand Up @@ -176,7 +176,7 @@ def _get_surname_probs(self,
)
return surname_probs

def _get_geocode_probs(self, geo_df: Union[pd.Series,pd.DataFrame]) -> pd.DataFrame:
def _get_geocode_probs(self, geo_df: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame:
"""Normalizes ZCTAs/ZIPs and joins them to their race probs."""
# Normalize
if self.geo_level == 'TRACT':
Expand Down
1 change: 0 additions & 1 deletion tests/app/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ def test_malformed(self):
df_true = pd.read_csv(self._DATA_FOLDER / 'surgeo_output.csv')
self._is_close_enough(df_generated, df_true)


def test_tract_simple(self):
"""Test arguments to specify column names for census tract defaults"""
# Generate input name based on input file
Expand Down
1 change: 0 additions & 1 deletion tests/models/test_geocode_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def test_get_probabilities(self):
result.equals(true_result)
)


def test_get_probabilities_tract(self):
"""Test Geocode model versus known result with Tracts"""
# Get our data and clean it
Expand Down
1 change: 0 additions & 1 deletion tests/models/test_surgeo_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def test_get_probabilities(self):
result.equals(true_result)
)


def test_get_probabilities_tract(self):
"""Test Surgeo model versus known result"""
# Load data
Expand Down

0 comments on commit 94e4811

Please sign in to comment.