diff --git a/.ipynb_checkpoints/env-checkpoint.yaml b/.ipynb_checkpoints/env-checkpoint.yaml
new file mode 100644
index 0000000..133e4f0
--- /dev/null
+++ b/.ipynb_checkpoints/env-checkpoint.yaml
@@ -0,0 +1,130 @@
+name: equity_toolkit
+channels:
+  - defaults
+dependencies:
+  - bzip2=1.0.8=he774522_0
+  - ca-certificates=2023.01.10=haa95532_0
+  - certifi=2022.12.7=py311haa95532_0
+  - libffi=3.4.2=hd77b12b_6
+  - openssl=1.1.1t=h2bbff1b_0
+  - pip=23.0.1=py311haa95532_0
+  - python=3.11.0=h966fe2a_3
+  - setuptools=65.6.3=py311haa95532_0
+  - sqlite=3.41.1=h2bbff1b_0
+  - tk=8.6.12=h2bbff1b_0
+  - vc=14.2=h21ff451_1
+  - vs2015_runtime=14.27.29016=h5e58377_2
+  - wheel=0.38.4=py311haa95532_0
+  - wincertstore=0.2=py311haa95532_0
+  - xz=5.2.10=h8cc25b3_1
+  - zlib=1.2.13=h8cc25b3_0
+  - pip:
+      - aiofiles==22.1.0
+      - aiosqlite==0.18.0
+      - anyio==3.6.2
+      - argon2-cffi==21.3.0
+      - argon2-cffi-bindings==21.2.0
+      - arrow==1.2.3
+      - asttokens==2.2.1
+      - attrs==22.2.0
+      - babel==2.12.1
+      - backcall==0.2.0
+      - beautifulsoup4==4.12.2
+      - bleach==6.0.0
+      - cffi==1.15.1
+      - charset-normalizer==3.1.0
+      - click==8.1.3
+      - colorama==0.4.6
+      - comm==0.1.3
+      - dash==2.9.2
+      - dash-bootstrap-components==1.6.0
+      - dash-core-components==2.0.0
+      - dash-html-components==2.0.0
+      - dash-table==5.0.0
+      - debugpy==1.6.7
+      - decorator==5.1.1
+      - defusedxml==0.7.1
+      - executing==1.2.0
+      - fastjsonschema==2.16.3
+      - flask==2.2.3
+      - fqdn==1.5.1
+      - idna==3.4
+      - ipykernel==6.22.0
+      - ipython==8.12.0
+      - ipython-genutils==0.2.0
+      - isoduration==20.11.0
+      - itsdangerous==2.1.2
+      - jedi==0.18.2
+      - jinja2==3.1.2
+      - json5==0.9.11
+      - jsonpointer==2.3
+      - jsonschema==4.17.3
+      - jupyter-client==8.1.0
+      - jupyter-core==5.3.0
+      - jupyter-events==0.6.3
+      - jupyter-server==2.5.0
+      - jupyter-server-fileid==0.8.0
+      - jupyter-server-terminals==0.4.4
+      - jupyter-server-ydoc==0.8.0
+      - jupyter-ydoc==0.2.3
+      - jupyterlab==3.6.3
+      - jupyterlab-pygments==0.2.2
+      - jupyterlab-server==2.22.0
+      - markupsafe==2.1.2
+      - mistune==2.0.5
+      - mypy==1.11.2
+      - mypy-extensions==1.0.0
+      - nbclassic==0.5.5
+      - nbclient==0.7.3
+      - nbconvert==7.3.0
+      - nbformat==5.8.0
+      - nest-asyncio==1.5.6
+      - notebook==6.5.4
+      - notebook-shim==0.2.2
+      - numpy==1.24.2
+      - packaging==23.0
+      - pandas==2.0.0
+      - pandocfilters==1.5.0
+      - parso==0.8.3
+      - pickleshare==0.7.5
+      - platformdirs==3.2.0
+      - plotly==5.14.1
+      - prometheus-client==0.16.0
+      - prompt-toolkit==3.0.38
+      - psutil==5.9.4
+      - pure-eval==0.2.2
+      - pycparser==2.21
+      - pygments==2.14.0
+      - pyrsistent==0.19.3
+      - python-dateutil==2.8.2
+      - python-json-logger==2.0.7
+      - pytz==2023.3
+      - pywin32==306
+      - pywinpty==2.0.10
+      - pyyaml==6.0
+      - pyzmq==25.0.2
+      - requests==2.28.2
+      - rfc3339-validator==0.1.4
+      - rfc3986-validator==0.1.1
+      - scipy==1.10.1
+      - send2trash==1.8.0
+      - six==1.16.0
+      - sniffio==1.3.0
+      - soupsieve==2.4
+      - stack-data==0.6.2
+      - tenacity==8.2.2
+      - terminado==0.17.1
+      - tinycss2==1.2.1
+      - tornado==6.2
+      - traitlets==5.9.0
+      - typing-extensions==4.12.2
+      - tzdata==2023.3
+      - uri-template==1.2.0
+      - urllib3==1.26.15
+      - wcwidth==0.2.6
+      - webcolors==1.13
+      - webencodings==0.5.1
+      - websocket-client==1.5.1
+      - werkzeug==2.2.3
+      - y-py==0.5.9
+      - ypy-websocket==0.8.2
\ No newline at end of file
diff --git a/.ipynb_checkpoints/model-checkpoint.py b/.ipynb_checkpoints/model-checkpoint.py
index 2302456..3956f79 100644
--- a/.ipynb_checkpoints/model-checkpoint.py
+++ b/.ipynb_checkpoints/model-checkpoint.py
@@ -4,11 +4,15 @@
 from typing import List
 from typing import Tuple
 
+import os
 import yaml
 
 import src.model_classes as mc 
 
-with open('config.yaml') as f:
+package_dir = os.path.dirname(os.path.abspath(__file__))
+config_fp = os.path.join(package_dir, "config.yaml")
+
+with open(config_fp) as f:
     config = yaml.safe_load(f)
     
 class Model:
diff --git a/.ipynb_checkpoints/toolkit-checkpoint.ipynb b/.ipynb_checkpoints/toolkit-checkpoint.ipynb
deleted file mode 100644
index b307bda..0000000
--- a/.ipynb_checkpoints/toolkit-checkpoint.ipynb
+++ /dev/null
@@ -1,1181 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "ab695a7b-3043-4154-a59b-01e57feaf8f0",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "ename": "IndentationError",
-     "evalue": "unexpected indent (3060257492.py, line 620)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;36m  Cell \u001b[1;32mIn[14], line 620\u001b[1;36m\u001b[0m\n\u001b[1;33m    self.A = A\u001b[0m\n\u001b[1;37m    ^\u001b[0m\n\u001b[1;31mIndentationError\u001b[0m\u001b[1;31m:\u001b[0m unexpected indent\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from scipy.stats import chi2_contingency\n",
-    "import yaml\n",
-    "\n",
-    "from pandas import DataFrame\n",
-    "from typing import Dict\n",
-    "from typing import List\n",
-    "from typing import Any\n",
-    "from typing import Tuple\n",
-    "\n",
-    "with open('config.yaml') as f:\n",
-    "    config = yaml.safe_load(f)\n",
-    "\n",
-    "class Ingest:\n",
-    "    \n",
-    "    \"\"\"\n",
-    "    Class to ingest dataframe input.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        config: Dict[Any, Any]\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Inits class with the config file\n",
-    "        and unpacks the config file.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        self.config = config\n",
-    "        \n",
-    "        self.unpack_config()\n",
-    "\n",
-    "        \n",
-    "    def run(\n",
-    "        self\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Run function for the class.\n",
-    "        \n",
-    "        :param None:\n",
-    "        :return df:\n",
-    "            DataFrame, ingested df\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        df = self.run_load()\n",
-    "        \n",
-    "        df = self.run_harmonize(df)\n",
-    "        \n",
-    "        return df\n",
-    "        \n",
-    "        \n",
-    "    def unpack_config(\n",
-    "        self\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Function to unpack config vars.\n",
-    "        \n",
-    "        :var filepath:\n",
-    "            str, the relative filepath \n",
-    "        :var group_variable:\n",
-    "            str, the column name for the \n",
-    "            group variable of interest e.g.\n",
-    "            gender, which contains the target \n",
-    "            class and non-target class e.g.\n",
-    "            females and males.\n",
-    "        :var group_target_val:\n",
-    "            str, within the group_variable column,\n",
-    "            contains the contains the target \n",
-    "            class value e.g.\n",
-    "            females.\n",
-    "        :var group_other_val:\n",
-    "            str, within the group_variable column,\n",
-    "            contains the contains the non-target \n",
-    "            class value e.g. males.\n",
-    "        :var outcome_variable:\n",
-    "            str, the column name for the \n",
-    "            outcome variable of interest e.g.\n",
-    "            hired, which contains the target \n",
-    "            class and non-target class e.g.\n",
-    "            hired and not-hired.\n",
-    "        :var outcome_target_val:\n",
-    "            str, within the outcome_variable column,\n",
-    "            contains the contains the target \n",
-    "            class value e.g.\n",
-    "            hired.\n",
-    "        :var outcome_other_val:\n",
-    "            str, within the outcome_variable column,\n",
-    "            contains the contains the non-target \n",
-    "            class value e.g. not-hired.\n",
-    "        :var grpers:\n",
-    "            Dict[str,str], can be any set of filterable\n",
-    "            columns to slice into particular groups within\n",
-    "            the broader employee roster. The key is the column,\n",
-    "            the value is the desired class within the column\n",
-    "            e.g. job_title: analyst.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        config = self.config\n",
-    "        \n",
-    "        try:\n",
-    "            self.filepath: str = config[\"Ingest\"][\"filepath\"]\n",
-    "            self.group_variable: str = config[\"Ingest\"][\"group_variable\"]\n",
-    "            self.group_target_val: str = config[\"Ingest\"][\"group_target_val\"]\n",
-    "            self.group_other_val: str = config[\"Ingest\"][\"group_other_val\"]\n",
-    "            self.outcome_variable: str = config[\"Ingest\"][\"outcome_variable\"]\n",
-    "            self.outcome_target_val: str = config[\"Ingest\"][\"outcome_target_val\"]\n",
-    "            self.outcome_other_val: str = config[\"Ingest\"][\"outcome_other_val\"]\n",
-    "            self.grpers: Dict[str, str] = config[\"Ingest\"][\"grpers\"]\n",
-    "\n",
-    "            # Type validation\n",
-    "            if not isinstance(self.filepath, str):\n",
-    "                raise TypeError(\"Expected 'filepath' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_variable, str):\n",
-    "                raise TypeError(\"Expected 'group_variable' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_target_val, str):\n",
-    "                raise TypeError(\"Expected 'group_target_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_other_val, str):\n",
-    "                raise TypeError(\"Expected 'group_other_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_variable, str):\n",
-    "                raise TypeError(\"Expected 'outcome_variable' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_target_val, str):\n",
-    "                raise TypeError(\"Expected 'outcome_target_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_other_val, str):\n",
-    "                raise TypeError(\"Expected 'outcome_other_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.grpers, dict):\n",
-    "                raise TypeError(\"Expected 'grpers' to be of type 'dict'.\")\n",
-    "\n",
-    "        except KeyError as e:\n",
-    "            raise KeyError(f\"Missing key '{e.args[0]}' in the config file. \"\n",
-    "                           \"Please ensure the config file contains all required keys under the 'Ingest' section: \"\n",
-    "                           \"'filepath', 'group_variable', 'group_target_val', 'group_other_val', \"\n",
-    "                           \"'outcome_variable', 'outcome_target_val', 'outcome_other_val', and 'grpers'.\")\n",
-    "\n",
-    "        except TypeError as e:\n",
-    "            raise TypeError(f\"Config file error: {e}\")\n",
-    "        \n",
-    "    def run_load(\n",
-    "        self\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Loads csv file. Assumes headers are row 0.\n",
-    "        \n",
-    "        :param None:\n",
-    "        :return DataFrame:\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        filepath = self.filepath\n",
-    "        \n",
-    "        try:\n",
-    "            return pd.read_csv(filepath, skiprows=0)\n",
-    "\n",
-    "        except FileNotFoundError:\n",
-    "            raise FileNotFoundError(\n",
-    "                f\"The file at {filepath} was not found. Please check the file path.\"\n",
-    "            )\n",
-    "\n",
-    "        except pd.errors.EmptyDataError:\n",
-    "            raise ValueError(\n",
-    "                f\"The file at {filepath} is empty and cannot be loaded.\"\n",
-    "            )\n",
-    "\n",
-    "        except pd.errors.ParserError:\n",
-    "            raise ValueError(\n",
-    "                f\"The file at {filepath} contains malformed data and could not be parsed as a valid CSV.\"\n",
-    "            )\n",
-    "\n",
-    "        except PermissionError:\n",
-    "            raise PermissionError(\n",
-    "                f\"Permission denied when attempting to read the file at {filepath}.\"\n",
-    "                f\"Please check the file permissions.\"\n",
-    "            )\n",
-    "\n",
-    "        except Exception as e:\n",
-    "            raise Exception(\n",
-    "                f\"An unexpected error occurred while loading the file: {str(e)}\"\n",
-    "            )\n",
-    "        \n",
-    "    def run_harmonize(\n",
-    "        self,\n",
-    "        df: DataFrame\n",
-    "    ) -> DataFrame:        \n",
-    "\n",
-    "        \"\"\"\n",
-    "        Function to harmonize the dataset.\n",
-    "        \n",
-    "        :param df: \n",
-    "            DataFrame, loaded df\n",
-    "        :return df:\n",
-    "            DataFrame, filtered down to target and other group and\n",
-    "            harmonize the fields\n",
-    "        \"\"\"\n",
-    "    \n",
-    "        group_variable = self.group_variable\n",
-    "        group_target_val = self.group_target_val\n",
-    "        group_other_val = self.group_other_val\n",
-    "        outcome_variable = self.outcome_variable\n",
-    "        outcome_target_val = self.outcome_target_val\n",
-    "        outcome_other_val = self.outcome_other_val\n",
-    "        grpers = self.grpers\n",
-    "\n",
-    "        df = self._apply_filters(\n",
-    "            df=df,\n",
-    "            group_variable=group_variable,\n",
-    "            group_target_val=group_target_val,\n",
-    "            group_other_val=group_other_val,\n",
-    "            grpers=grpers\n",
-    "        )\n",
-    "        \n",
-    "        df = self._apply_harmonize(\n",
-    "            df=df,\n",
-    "            group_variable=group_variable,\n",
-    "            group_target_val=group_target_val,\n",
-    "            group_other_val=group_other_val,\n",
-    "            outcome_variable=outcome_variable,\n",
-    "            outcome_target_val=outcome_target_val,\n",
-    "            outcome_other_val=outcome_other_val\n",
-    "        )\n",
-    "      \n",
-    "        return df\n",
-    "    \n",
-    "    def _apply_filters(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        group_variable: str,\n",
-    "        group_target_val: str,\n",
-    "        group_other_val: str,\n",
-    "        grpers: Dict[str,str],\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to apply filters\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, target df\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value\n",
-    "        :return df:\n",
-    "            DataFrame, filtered df\n",
-    "        \"\"\"\n",
-    "                \n",
-    "        df = df.loc[\n",
-    "            df[group_variable].isin(\n",
-    "                [\n",
-    "                    group_target_val, \n",
-    "                    group_other_val\n",
-    "                ]\n",
-    "            )\n",
-    "        ]\n",
-    "        \n",
-    "        for k, v in grpers.items():\n",
-    "        \n",
-    "            df = df.loc[\n",
-    "                df[k].isin([v])\n",
-    "            ]  \n",
-    "            \n",
-    "        return df\n",
-    "    \n",
-    "    def _apply_harmonize(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        group_variable: str,\n",
-    "        group_target_val: str,\n",
-    "        group_other_val: str,\n",
-    "        outcome_variable: str,\n",
-    "        outcome_target_val: str,\n",
-    "        outcome_other_val: str\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to harmonize targets.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, target df\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value     \n",
-    "        :param outcome_variable:\n",
-    "            str, the column name of the outcome \n",
-    "        :param outcome_target_val:\n",
-    "            str, class target value of the outcome_variable\n",
-    "            aka success\n",
-    "        :param outcome_other_val:\n",
-    "            str, class nontarget value of the outcome_variable\n",
-    "        :return df:\n",
-    "            DataFrame, target df\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        # harmonize the group target\n",
-    "        df['group_var_clean'] = np.where(\n",
-    "            df[group_variable]==group_target_val, \n",
-    "            1,\n",
-    "            np.where(\n",
-    "                df[group_variable]==group_other_val, \n",
-    "                0, \n",
-    "                -1\n",
-    "            )\n",
-    "        )\n",
-    "        \n",
-    "        # harmonize the outcome target\n",
-    "        df['outcome_var_clean'] = np.where(\n",
-    "            df[outcome_variable]==outcome_target_val, \n",
-    "            1,  \n",
-    "            np.where(\n",
-    "                df[self.outcome_variable]==outcome_other_val,\n",
-    "                0, \n",
-    "                -1\n",
-    "            )\n",
-    "        )  \n",
-    "        \n",
-    "        return df\n",
-    "            \n",
-    "class Transform:\n",
-    "    \n",
-    "    \"\"\"\n",
-    "    Class to transform dataframe inputs into \n",
-    "    2x2 contingency table.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(\n",
-    "        self, \n",
-    "        df: DataFrame\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        :param df:\n",
-    "            DataFrame, input df\n",
-    "        \"\"\"\n",
-    "    \n",
-    "        self.df = df\n",
-    "    \n",
-    "    def run_build_cont_table(\n",
-    "        self\n",
-    "    ) -> List[int]:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Function to generate contingency table format.\n",
-    "        \n",
-    "        Places the target group val in the top row and the\n",
-    "        target group other to the bottom row.\n",
-    "        \n",
-    "        Places no-success outcome on the first column and success\n",
-    "        on the second column.\n",
-    "        \n",
-    "        :return tbl:\n",
-    "            List[int], filtered down to target and other group.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        df = self.df\n",
-    "        \n",
-    "        cols = [\n",
-    "            'group_var_clean', \n",
-    "            'outcome_var_clean'\n",
-    "        ]\n",
-    "        \n",
-    "        df = df[cols]\n",
-    "        \n",
-    "        tbl = (\n",
-    "            df.pivot_table(\n",
-    "                index='group_var_clean',\n",
-    "                columns='outcome_var_clean', \n",
-    "                aggfunc=len\n",
-    "            ).\n",
-    "            sort_index(\n",
-    "                axis=1, \n",
-    "                ascending=True\n",
-    "            ).\n",
-    "            sort_index(ascending=False). # ensure always [1,0]\n",
-    "            values.tolist()\n",
-    "        ) \n",
-    "            \n",
-    "        return tbl\n",
-    "        \n",
-    "class StatsTesting2x2Cont:\n",
-    "    \n",
-    "    \"\"\"\n",
-    "    Class to perform 2x2 Contigency Table analysis\n",
-    "    with Chi2 and Phi Correlation Coefficent Testing.\n",
-    "\n",
-    "    Provides context into potential association between\n",
-    "    variables and the strength of the association.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        config: Dict[Any, Any],\n",
-    "        tbl: List[int],\n",
-    "        df: DataFrame\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Inits the class variables and unpacks the\n",
-    "        config variables.\n",
-    "        \n",
-    "        :param config:\n",
-    "            Dict[str,Any], loaded config file.\n",
-    "        :param tbl:\n",
-    "            List[int], 2x2 cont table.\n",
-    "        :param df:\n",
-    "            DataFrame, original input DataFrame.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        self.config = config\n",
-    "        self.tbl = tbl\n",
-    "        self.df = df\n",
-    "\n",
-    "        self.unpack_config()\n",
-    "\n",
-    "    def run_testing(\n",
-    "        self\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Run function for the class.\n",
-    "        \n",
-    "        Runs hypothesis evaluation and builds\n",
-    "        the output report DataFrame.\n",
-    "        \n",
-    "        :param None:\n",
-    "        :return df_results:\n",
-    "            DataFrame, with testing results.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        alpha = self.alpha\n",
-    "        tbl = self.tbl\n",
-    "        process = self.process\n",
-    "        group_variable = self.group_variable\n",
-    "        group_target_val = self.group_target_val\n",
-    "        group_other_val = self.group_other_val\n",
-    "        bin_edges = self.bin_edges\n",
-    "        bin_labels = self.bin_labels\n",
-    "                \n",
-    "        res = self.gen_hypothesis_eval(tbl)\n",
-    "\n",
-    "        df_results = self.run_report_bld(\n",
-    "            alpha=alpha,\n",
-    "            res=res,\n",
-    "            tbl=tbl,\n",
-    "            process=process,\n",
-    "            group_variable=group_variable,\n",
-    "            group_target_val=group_target_val,\n",
-    "            group_other_val=group_other_val,\n",
-    "            bin_edges=bin_edges,\n",
-    "            bin_labels=bin_labels\n",
-    "        )\n",
-    "        \n",
-    "        return df_results\n",
-    "    \n",
-    "    def unpack_config(\n",
-    "        self\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Function to unpack config variables.\n",
-    "        \n",
-    "        :param None:\n",
-    "        :return None:\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        config = self.config\n",
-    "\n",
-    "        try:\n",
-    "            self.alpha: float = config[\"StatsTesting2x2Cont\"][\"alpha\"]\n",
-    "            self.group_variable: str = config[\"Ingest\"][\"group_variable\"]\n",
-    "            self.group_target_val: str = config[\"Ingest\"][\"group_target_val\"]\n",
-    "            self.group_other_val: str = config[\"Ingest\"][\"group_other_val\"]\n",
-    "            self.outcome_variable: str = config[\"Ingest\"][\"outcome_variable\"]\n",
-    "            self.outcome_target_val: str = config[\"Ingest\"][\"outcome_target_val\"]\n",
-    "            self.outcome_other_val: str = config[\"Ingest\"][\"outcome_other_val\"]\n",
-    "            self.grpers: Dict[str, str] = config[\"Ingest\"][\"grpers\"]\n",
-    "            self.testing: str = config[\"StatsTesting2x2Cont\"][\"testing\"]\n",
-    "            self.process: str = config[\"StatsTesting2x2Cont\"][\"process\"]\n",
-    "            self.bin_edges: List[float] = config[\"StatsTesting2x2Cont\"][\"phi_bin_edges\"]\n",
-    "            self.bin_labels: List[str] = config[\"StatsTesting2x2Cont\"][\"phi_bin_labels\"]\n",
-    "\n",
-    "            if not isinstance(self.alpha, float):\n",
-    "                raise TypeError(\"Expected 'alpha' to be of type 'float'.\")\n",
-    "            if not isinstance(self.group_variable, str):\n",
-    "                raise TypeError(\"Expected 'group_variable' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_target_val, str):\n",
-    "                raise TypeError(\"Expected 'group_target_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_other_val, str):\n",
-    "                raise TypeError(\"Expected 'group_other_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_variable, str):\n",
-    "                raise TypeError(\"Expected 'outcome_variable' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_target_val, str):\n",
-    "                raise TypeError(\"Expected 'outcome_target_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_other_val, str):\n",
-    "                raise TypeError(\"Expected 'outcome_other_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.grpers, dict):\n",
-    "                raise TypeError(\"Expected 'grpers' to be of type 'dict'.\")\n",
-    "            if not isinstance(self.testing, str):\n",
-    "                raise TypeError(\"Expected 'testing' to be of type 'str'.\")\n",
-    "            if not isinstance(self.process, str):\n",
-    "                raise TypeError(\"Expected 'process' to be of type 'str'.\")\n",
-    "            if not isinstance(\n",
-    "                self.bin_edges, list\n",
-    "            ) or not all(\n",
-    "                isinstance(\n",
-    "                    i, (int, float)\n",
-    "                ) for i in self.bin_edges\n",
-    "            ):\n",
-    "                raise TypeError(\"Expected 'bin_edges' to be a list of floats.\")\n",
-    "            if not isinstance(\n",
-    "                self.bin_labels, list\n",
-    "            ) or not all(\n",
-    "                isinstance(i, str) for i in self.bin_labels\n",
-    "            ):\n",
-    "                raise TypeError(\"Expected 'bin_labels' to be a list of strings.\")\n",
-    "        \n",
-    "        except KeyError as e:\n",
-    "            raise KeyError(\n",
-    "                f\"Missing key '{e.args[0]}' in the config file. \"\n",
-    "                f\"Ensure all required keys are present in the 'Ingest' and 'StatsTesting2x2Cont' sections.\"\n",
-    "            )\n",
-    "\n",
-    "        except TypeError as e:\n",
-    "            raise TypeError(f\"Config file error: {e}\")\n",
-    "\n",
-    "        \n",
-    "    def gen_hypothesis_eval(\n",
-    "        self,\n",
-    "        tbl: List[int]\n",
-    "    ) -> chi2_contingency:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Function to generate the chi2_contigency\n",
-    "        statistic and result.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        #size = np.shape(tbl)\n",
-    "        #tbl_len = len(tbl)\n",
-    "        \n",
-    "        res = chi2_contingency(\n",
-    "            tbl\n",
-    "        )\n",
-    "            \n",
-    "        return res\n",
-    "        \n",
-    "    def run_report_bld(\n",
-    "        self,\n",
-    "        alpha: float,\n",
-    "        res: chi2_contingency,\n",
-    "        tbl: List[int],\n",
-    "        process: str,\n",
-    "        group_variable: str,\n",
-    "        group_target_val: str,\n",
-    "        group_other_val: str,\n",
-    "        bin_edges: List[float],\n",
-    "        bin_labels: List[str]\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Runs report for statistical testing\n",
-    "        chi2_contingency results\n",
-    "        \n",
-    "        :param alpha:\n",
-    "            float, alpha value for significance evaluation.\n",
-    "        :param res:\n",
-    "            chi2_contingency, result of the chi2_contingency.\n",
-    "        :param tbl:\n",
-    "            List[int], the contingency table.\n",
-    "        :param process: \n",
-    "            str, the name of the business process\n",
-    "            being tested, e.g. 'hiring'.\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable.\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value.\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value.  \n",
-    "        :param bin_edges:\n",
-    "            List[float], edges for phi\n",
-    "            bins.\n",
-    "        :param bin_labels:\n",
-    "            List[str], labels for the phi\n",
-    "            bins.\n",
-    "        :return df:\n",
-    "            DataFrame, target\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        pvalue = res[1]\n",
-    "        \n",
-    "        df = pd.DataFrame()\n",
-    "\n",
-    "        df = self._gen_significance_test(\n",
-    "            df=df,\n",
-    "            pvalue=pvalue,\n",
-    "            alpha=alpha\n",
-    "        )\n",
-    "        \n",
-    "        (\n",
-    "            df,\n",
-    "            A,\n",
-    "            B,\n",
-    "            C,\n",
-    "            D,\n",
-    "            total_target_grp,\n",
-    "            total_non_target_grp,\n",
-    "            diagonals,\n",
-    "            percent_target_succ,\n",
-    "            percent_non_target_succ,\n",
-    "            phi_numerator,\n",
-    "            phi_denominator\n",
-    "        ) = self._gen_table_calcs(\n",
-    "                df=df,\n",
-    "                tbl=tbl,\n",
-    "        )\n",
-    "        \n",
-    "        if res[1] <= alpha:\n",
-    "            df, phi_result = self._gen_phi_coefficient(\n",
-    "                df=df,\n",
-    "                tbl=tbl,\n",
-    "                bin_edges=bin_edges,\n",
-    "                bin_labels=bin_labels,\n",
-    "                process=process,\n",
-    "                group_variable=group_variable,\n",
-    "                group_target_val=group_target_val,\n",
-    "                group_other_val=group_other_val\n",
-    "            )\n",
-    "            \n",
-    "        else:\n",
-    "            df['phi_corr_coeff'] = np.nan\n",
-    "            df['phi_bins'] = np.nan\n",
-    "            \n",
-    "            phi_result = \"\"\n",
-    "        \n",
-    "        df = self._gen_four_fifths_test(df)\n",
-    "                \n",
-    "        df = self._gen_outcome_meta(\n",
-    "            df,\n",
-    "            round(res[1],3),\n",
-    "            phi_result\n",
-    "        )\n",
-    "        \n",
-    "        df = self._gen_unpack_stats(\n",
-    "            df,\n",
-    "            res\n",
-    "        )\n",
-    "        \n",
-    "        return df\n",
-    "        \n",
-    "    def _gen_unpack_stats(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        res: chi2_contingency\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to unpack test stats from\n",
-    "        chi2_contingency results.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, output df.\n",
-    "        :param res:\n",
-    "            chi2_contingency, results array.\n",
-    "        :return df:\n",
-    "            DataFrame, output df.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        group_target_val = self.group_target_val\n",
-    "        group_other_val = self.group_other_val\n",
-    "        rows = [group_target_val] + [group_other_val]\n",
-    "    \n",
-    "        df['statistic'] = res[0]\n",
-    "        df['pvalue'] = res[1]\n",
-    "        df['dof'] = res[2]\n",
-    "        df['tbl_row'] = [rows]\n",
-    "        df['tbl'] = [tbl]\n",
-    "        df['expected_freq'] = [res[3]]\n",
-    "        df['tbl_expected_diff'] = [tbl - res[3]]\n",
-    "        \n",
-    "        return df\n",
-    "    \n",
-    "    def _gen_significance_test(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        pvalue: float,\n",
-    "        alpha: float\n",
-    "    ):\n",
-    "        \"\"\"\n",
-    "        Method to report on test significance.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, results df.\n",
-    "        :param pval:\n",
-    "            int, pvalue.\n",
-    "        :param alpha:\n",
-    "            float, the alpha value for testing eval.\n",
-    "        :return df:\n",
-    "            DataFrame with metadata added.     \n",
-    "        \"\"\"\n",
-    "            \n",
-    "        if pvalue <= alpha:\n",
-    "            val = 'statistically significant result'\n",
-    "            \n",
-    "        else:\n",
-    "            val = 'no statistically significant result'\n",
-    "                    \n",
-    "        df['test_result'] = [val]\n",
-    "        \n",
-    "        return df\n",
-    "    \n",
-    "    def _gen_phi_coefficient(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        tbl: List[int],\n",
-    "        process: str,\n",
-    "        group_variable: str,\n",
-    "        group_target_val: str,\n",
-    "        group_other_val: str,\n",
-    "        bin_edges: List[float],\n",
-    "        bin_labels: List[str]\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate the phi coefficient.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, the results df.\n",
-    "        :param tbl:\n",
-    "            List[int], the 2x2 cont table.\n",
-    "        :param process: \n",
-    "            str, the name of the business process\n",
-    "            being tested, e.g. 'hiring'.\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable.\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value.\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value.  \n",
-    "        :param bin_edges:\n",
-    "            List[float], edges for phi\n",
-    "            bins.\n",
-    "        :param bin_labels:\n",
-    "            List[str], lab\n",
-    "        :return df:\n",
-    "            DataFrame, output df.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        diagonals = self.diagonals\n",
-    "        numerator = self.phi_numerator\n",
-    "        denominator = self.phi_denominator\n",
-    "\n",
-    "        phi = numerator / denominator if denominator != 0 else 0\n",
-    "\n",
-    "        df['phi_corr_coeff'] = phi\n",
-    "                \n",
-    "        df = self._gen_prep_phi_bins(\n",
-    "            df=df,\n",
-    "            bin_edges=bin_edges,\n",
-    "            bin_labels=bin_labels\n",
-    "        )\n",
-    "\n",
-    "        df, phi_result = self._gen_prep_diagonals(\n",
-    "            df=df,\n",
-    "            diagonals=diagonals,\n",
-    "            process=process,\n",
-    "            group_variable=group_variable,\n",
-    "            group_other_val=group_other_val,\n",
-    "            group_target_val=group_target_val,\n",
-    "            percent_non_target_succ=self.percent_non_target_succ,\n",
-    "            percent_target_succ=self.percent_target_succ,\n",
-    "        )\n",
-    "        \n",
-    "        return df, phi_result\n",
-    "    \n",
-    "    def _gen_table_calcs(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        tbl: List[int]\n",
-    "    ) -> Tuple[\n",
-    "        DataFrame, float, float, float, float,\n",
-    "        float, float, float, float, \n",
-    "        float, float, float\n",
-    "    ]:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate phi bins. Provides additional\n",
-    "        explainability on the magnitude of association, when \n",
-    "        an association is found.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, output df.\n",
-    "        :param tbl:\n",
-    "            List[int], 2x2 contingency.\n",
-    "        :return [\n",
-    "            df, A, B, C, D, total_target_grp,\n",
-    "            total_non_target_grp, diagonals,\n",
-    "            percent_target_succ, percent_non_target_succ,\n",
-    "            phi_numerator, phi_denominator\n",
-    "        ]:\n",
-    "            Tuple[DataFrame, float, float, float, float,\n",
-    "        float, float, float, float, \n",
-    "        float, float, float\n",
-    "        ]\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        # females, males; no succ, succ\n",
-    "        A, B = tbl[0] \n",
-    "        C, D = tbl[1]\n",
-    "        \n",
-    "        total_target_grp = A + B\n",
-    "        total_non_target_grp = C + D\n",
-    "        diagonals = (A + D) > (B + C)\n",
-    "        percent_target_succ = (B / total_target_grp) * 100\n",
-    "        percent_non_target_succ = (D / total_non_target_grp) * 100\n",
-    "        phi_numerator = (A * D) - (B * C)\n",
-    "        phi_denominator = np.sqrt((A + B) * (C + D) * (A + C) * (B + D))      \n",
-    "           \n",
-    "        return (\n",
-    "            df,\n",
-    "            A,\n",
-    "            B,\n",
-    "            C,\n",
-    "            D,\n",
-    "            total_target_grp,\n",
-    "            total_non_target_grp,\n",
-    "            diagonals,\n",
-    "            percent_target_succ,\n",
-    "            percent_non_target_succ,\n",
-    "            phi_numerator,\n",
-    "            phi_denominator\n",
-    "        )\n",
-    "    \n",
-    "    def _gen_prep_phi_bins(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        bin_edges: List[float],\n",
-    "        bin_labels: List[str]\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate pandas bins for \n",
-    "        phi coeff.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, output df.\n",
-    "        :param bin_edges:\n",
-    "            List[float], edges for phi\n",
-    "            bins.\n",
-    "        :param bin_labels:\n",
-    "            List[str], labels for the phi\n",
-    "            bins.\n",
-    "        :return df:\n",
-    "            DataFrame, output df.\n",
-    "        \"\"\"\n",
-    "    \n",
-    "        df['phi_bins'] = pd.cut(\n",
-    "            df['phi_corr_coeff'], \n",
-    "            bins=bin_edges, \n",
-    "            labels=bin_labels, \n",
-    "            include_lowest=True\n",
-    "        )\n",
-    "        \n",
-    "        return df\n",
-    "    \n",
-    "    def _gen_four_fifths_test(\n",
-    "        self,\n",
-    "        df: DataFrame\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        percent_target_succ = self.percent_target_succ\n",
-    "        percent_non_target_succ = self.percent_non_target_succ         \n",
-    "        \n",
-    "        ratio = percent_target_succ / percent_non_target_succ\n",
-    "        \n",
-    "        if ratio < .8:\n",
-    "            ratio_desc = f'failed with 4/5 test at {round(ratio,3)}'\n",
-    "        elif ratio >= .8:\n",
-    "            ratio_desc = f'passed with 4/5 test at {round(ratio,3)}'\n",
-    "        else:\n",
-    "            ratio_desc = 'error calculating 4/5 test'\n",
-    "        \n",
-    "        df['four_fifths_test'] = ratio_desc\n",
-    "        return df\n",
-    "    \n",
-    "    def _gen_prep_diagonals(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        diagonals: bool,\n",
-    "        process: str,\n",
-    "        group_variable: str,\n",
-    "        group_other_val: str,\n",
-    "        group_target_val: str,\n",
-    "        percent_non_target_succ: float,\n",
-    "        percent_target_succ: float,\n",
-    "    ) -> Tuple[DataFrame, str]:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate the magnitude of the\n",
-    "        assocation using phi coefficient analysis.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, output df.\n",
-    "        :param diagonals:\n",
-    "            bool,\n",
-    "        :param process: \n",
-    "            str, the name of the business process\n",
-    "            being tested, e.g. 'hiring'.\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable.\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value.\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value.   \n",
-    "        :param percent_non_target_succ:\n",
-    "            float, the success percentage attained\n",
-    "            for the the non-target group.\n",
-    "        :param percent_target_succ:\n",
-    "            float, the success percentage attained for the\n",
-    "            target class.\n",
-    "        :return (df, phi_col):\n",
-    "            Tuple[df, phi_col]\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        phi_bin = df['phi_bins'].values[0]    \n",
-    "        phi_corr_coeff = df['phi_corr_coeff'].values[0]    \n",
-    "\n",
-    "        if diagonals:\n",
-    "            diagonal_msg = (\n",
-    "                f\"The values on the positive diagonal of the 'tbl' indicate the distribution of {process} success across {group_variable} categories.\"\n",
-    "                f\" {group_other_val} had a higher proportion of successful outcomes compared to {group_target_val}.\"\n",
-    "                f\" Specifically, {percent_non_target_succ:.1f}% of {group_other_val} had success while only {percent_target_succ:.1f}%\"\n",
-    "                f\" of {group_target_val} had success.\"\n",
-    "                f\" This significant difference in {process} success rates suggests a potential {group_variable} bias, with {group_other_val} success in {process}\"\n",
-    "                f\" at a higher rate than {group_target_val}.\"\n",
-    "            )\n",
-    "            \n",
-    "        else:\n",
-    "            diagonal_msg = \"the diagonal values are not substantially higher, suggesting the relationship might be more nuanced.\"\n",
-    "        \n",
-    "        phi_col = f\"The phi correlation coefficient is {phi_corr_coeff:.3f}, indicating a {phi_bin} effect size. {diagonal_msg}\"\n",
-    "            \n",
-    "        return df, phi_col\n",
-    "    \n",
-    "    def _gen_outcome_meta(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        pval: float,\n",
-    "        phi_result: str\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate meta data for \n",
-    "        reporting dataframe\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, results df\n",
-    "        :param pval:\n",
-    "            int, pvalue\n",
-    "        :param phi_result:\n",
-    "            str, result of phi testing.\n",
-    "        :return df:\n",
-    "            DataFrame with metadata added\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        grpers = self.grpers\n",
-    "        result = df['test_result'].values[0]\n",
-    "        phi_col = df['phi_corr_coeff'].values[0]\n",
-    "        testing = self.testing\n",
-    "        process = self.process\n",
-    "        group_target_val = self.group_target_val\n",
-    "        alpha = self.alpha\n",
-    "        four_fifths = df['four_fifths_test'].values[0]\n",
-    "        \n",
-    "        col = f\"Testing for {grpers}, {four_fifths}. Based on the results of the chi-square test of independence, there is {result} for {testing}-based {process} discrimination against {group_target_val} at the chosen significance level of {alpha}.\"\n",
-    "\n",
-    "        if result == \"statistically significant result\":\n",
-    "            col = f\"{col} {phi_result}\"\n",
-    "                \n",
-    "        df['result_desc'] = col\n",
-    "        \n",
-    "        return df\n",
-    "        \n",
-    "# pipeline\n",
-    "\n",
-    "ingestObj = Ingest(config)\n",
-    "df = ingestObj.run()\n",
-    "\n",
-    "transObj = Transform(\n",
-    "    df.copy()\n",
-    ")\n",
-    "tbl = transObj.run_build_cont_table()\n",
-    "\n",
-    "statsObj = StatsTesting2x2Cont(\n",
-    "    config,\n",
-    "    tbl,\n",
-    "    df.copy() # need to add some more context in plain text\n",
-    ")\n",
-    "df_result = statsObj.run_testing()\n",
-    "\n",
-    "df_result['result_desc'].tolist()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "01672735-2ad5-42ae-9488-1962f3d0e63e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>test_result</th>\n",
-       "      <th>phi_corr_coeff</th>\n",
-       "      <th>phi_bins</th>\n",
-       "      <th>four_fifths_test</th>\n",
-       "      <th>result_desc</th>\n",
-       "      <th>statistic</th>\n",
-       "      <th>pvalue</th>\n",
-       "      <th>dof</th>\n",
-       "      <th>tbl_row</th>\n",
-       "      <th>tbl</th>\n",
-       "      <th>expected_freq</th>\n",
-       "      <th>tbl_expected_diff</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>statistically significant result</td>\n",
-       "      <td>0.39736</td>\n",
-       "      <td>moderate</td>\n",
-       "      <td>failed 4/5 test at 0.167</td>\n",
-       "      <td>Testing for {'job_title': 'analyst'}, based on...</td>\n",
-       "      <td>5.218246</td>\n",
-       "      <td>0.022351</td>\n",
-       "      <td>1</td>\n",
-       "      <td>[Female, Male]</td>\n",
-       "      <td>[[10, 1], [15, 18]]</td>\n",
-       "      <td>[[6.25, 4.75], [18.75, 14.25]]</td>\n",
-       "      <td>[[3.75, -3.75], [-3.75, 3.75]]</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                        test_result  phi_corr_coeff  phi_bins   \n",
-       "0  statistically significant result         0.39736  moderate  \\\n",
-       "\n",
-       "           four_fifths_test   \n",
-       "0  failed 4/5 test at 0.167  \\\n",
-       "\n",
-       "                                         result_desc  statistic    pvalue   \n",
-       "0  Testing for {'job_title': 'analyst'}, based on...   5.218246  0.022351  \\\n",
-       "\n",
-       "   dof         tbl_row                  tbl                   expected_freq   \n",
-       "0    1  [Female, Male]  [[10, 1], [15, 18]]  [[6.25, 4.75], [18.75, 14.25]]  \\\n",
-       "\n",
-       "                tbl_expected_diff  \n",
-       "0  [[3.75, -3.75], [-3.75, 3.75]]  "
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_result"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "94114f77-caa8-41a3-900a-44317c84f4b7",
-   "metadata": {},
-   "source": [
-    "to do:\n",
-    "    \n",
-    "implement these tests\n",
-    "\n",
-    "\n",
-    "https://en.wikipedia.org/wiki/Disparate_impact\n",
-    "\n",
-    "Add handler for filtered size of group must be ...\n",
-    "\n",
-    "# need to check this size\n",
-    "# https://online.stat.psu.edu/stat500/lesson/8/8.2#:~:text=That%20equates%20to%20the%20Chi,count%20of%20at%20least%205.\n",
-    "\n",
-    "# make sure at least 5 in each slice, then at least 50"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1d975b96-c8af-4e46-9521-0c7fbe442ff1",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "hrailabs_dev",
-   "language": "python",
-   "name": "hrailabs"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/__pycache__/__init__.cpython-311.pyc b/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000..27c8d58
Binary files /dev/null and b/__pycache__/__init__.cpython-311.pyc differ
diff --git a/__pycache__/model.cpython-311.pyc b/__pycache__/model.cpython-311.pyc
index 42d4609..ef93758 100644
Binary files a/__pycache__/model.cpython-311.pyc and b/__pycache__/model.cpython-311.pyc differ
diff --git a/env.yaml b/env.yaml
new file mode 100644
index 0000000..133e4f0
--- /dev/null
+++ b/env.yaml
@@ -0,0 +1,130 @@
+name: equity_toolkit
+channels:
+  - defaults
+dependencies:
+  - bzip2=1.0.8=he774522_0
+  - ca-certificates=2023.01.10=haa95532_0
+  - certifi=2022.12.7=py311haa95532_0
+  - libffi=3.4.2=hd77b12b_6
+  - openssl=1.1.1t=h2bbff1b_0
+  - pip=23.0.1=py311haa95532_0
+  - python=3.11.0=h966fe2a_3
+  - setuptools=65.6.3=py311haa95532_0
+  - sqlite=3.41.1=h2bbff1b_0
+  - tk=8.6.12=h2bbff1b_0
+  - vc=14.2=h21ff451_1
+  - vs2015_runtime=14.27.29016=h5e58377_2
+  - wheel=0.38.4=py311haa95532_0
+  - wincertstore=0.2=py311haa95532_0
+  - xz=5.2.10=h8cc25b3_1
+  - zlib=1.2.13=h8cc25b3_0
+  - pip:
+      - aiofiles==22.1.0
+      - aiosqlite==0.18.0
+      - anyio==3.6.2
+      - argon2-cffi==21.3.0
+      - argon2-cffi-bindings==21.2.0
+      - arrow==1.2.3
+      - asttokens==2.2.1
+      - attrs==22.2.0
+      - babel==2.12.1
+      - backcall==0.2.0
+      - beautifulsoup4==4.12.2
+      - bleach==6.0.0
+      - cffi==1.15.1
+      - charset-normalizer==3.1.0
+      - click==8.1.3
+      - colorama==0.4.6
+      - comm==0.1.3
+      - dash==2.9.2
+      - dash-bootstrap-components==1.6.0
+      - dash-core-components==2.0.0
+      - dash-html-components==2.0.0
+      - dash-table==5.0.0
+      - debugpy==1.6.7
+      - decorator==5.1.1
+      - defusedxml==0.7.1
+      - executing==1.2.0
+      - fastjsonschema==2.16.3
+      - flask==2.2.3
+      - fqdn==1.5.1
+      - idna==3.4
+      - ipykernel==6.22.0
+      - ipython==8.12.0
+      - ipython-genutils==0.2.0
+      - isoduration==20.11.0
+      - itsdangerous==2.1.2
+      - jedi==0.18.2
+      - jinja2==3.1.2
+      - json5==0.9.11
+      - jsonpointer==2.3
+      - jsonschema==4.17.3
+      - jupyter-client==8.1.0
+      - jupyter-core==5.3.0
+      - jupyter-events==0.6.3
+      - jupyter-server==2.5.0
+      - jupyter-server-fileid==0.8.0
+      - jupyter-server-terminals==0.4.4
+      - jupyter-server-ydoc==0.8.0
+      - jupyter-ydoc==0.2.3
+      - jupyterlab==3.6.3
+      - jupyterlab-pygments==0.2.2
+      - jupyterlab-server==2.22.0
+      - markupsafe==2.1.2
+      - mistune==2.0.5
+      - mypy==1.11.2
+      - mypy-extensions==1.0.0
+      - nbclassic==0.5.5
+      - nbclient==0.7.3
+      - nbconvert==7.3.0
+      - nbformat==5.8.0
+      - nest-asyncio==1.5.6
+      - notebook==6.5.4
+      - notebook-shim==0.2.2
+      - numpy==1.24.2
+      - packaging==23.0
+      - pandas==2.0.0
+      - pandocfilters==1.5.0
+      - parso==0.8.3
+      - pickleshare==0.7.5
+      - platformdirs==3.2.0
+      - plotly==5.14.1
+      - prometheus-client==0.16.0
+      - prompt-toolkit==3.0.38
+      - psutil==5.9.4
+      - pure-eval==0.2.2
+      - pycparser==2.21
+      - pygments==2.14.0
+      - pyrsistent==0.19.3
+      - python-dateutil==2.8.2
+      - python-json-logger==2.0.7
+      - pytz==2023.3
+      - pywin32==306
+      - pywinpty==2.0.10
+      - pyyaml==6.0
+      - pyzmq==25.0.2
+      - requests==2.28.2
+      - rfc3339-validator==0.1.4
+      - rfc3986-validator==0.1.1
+      - scipy==1.10.1
+      - send2trash==1.8.0
+      - six==1.16.0
+      - sniffio==1.3.0
+      - soupsieve==2.4
+      - stack-data==0.6.2
+      - tenacity==8.2.2
+      - terminado==0.17.1
+      - tinycss2==1.2.1
+      - tornado==6.2
+      - traitlets==5.9.0
+      - typing-extensions==4.12.2
+      - tzdata==2023.3
+      - uri-template==1.2.0
+      - urllib3==1.26.15
+      - wcwidth==0.2.6
+      - webcolors==1.13
+      - webencodings==0.5.1
+      - websocket-client==1.5.1
+      - werkzeug==2.2.3
+      - y-py==0.5.9
+      - ypy-websocket==0.8.2
\ No newline at end of file
diff --git a/model.py b/model.py
index 2302456..3956f79 100644
--- a/model.py
+++ b/model.py
@@ -4,11 +4,15 @@
 from typing import List
 from typing import Tuple
 
+import os
 import yaml
 
 import src.model_classes as mc 
 
-with open('config.yaml') as f:
+package_dir = os.path.dirname(os.path.abspath(__file__))
+config_fp = os.path.join(package_dir, "config.yaml")
+
+with open(config_fp) as f:
     config = yaml.safe_load(f)
     
 class Model:
diff --git a/notebooks/.ipynb_checkpoints/toolkit-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/toolkit-checkpoint.ipynb
new file mode 100644
index 0000000..d6ae82b
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/toolkit-checkpoint.ipynb
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f9c027ac-4352-4da6-a6d7-394eade3031c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import yaml\n",
+    "import sys\n",
+    "import os\n",
+    "\n",
+    "project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))\n",
+    "sys.path.append(project_dir)\n",
+    "import model as model\n",
+    "\n",
+    "config_fp = os.path.join(project_dir, \"config.yaml\")\n",
+    "with open(config_fp) as f:\n",
+    "    config = yaml.safe_load(f)\n",
+    "    \n",
+    "mod = model.Model(config)\n",
+    "\n",
+    "df_prep, tbl = mod.prep()\n",
+    "\n",
+    "df_result = mod.analysis(df_prep.copy(), tbl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8c15a313-bfc9-44a9-9891-387ed56564be",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>test_result</th>\n",
+       "      <th>phi_corr_coeff</th>\n",
+       "      <th>phi_bins</th>\n",
+       "      <th>four_fifths_test</th>\n",
+       "      <th>result_desc</th>\n",
+       "      <th>statistic</th>\n",
+       "      <th>pvalue</th>\n",
+       "      <th>alpha</th>\n",
+       "      <th>dof</th>\n",
+       "      <th>tbl_rows</th>\n",
+       "      <th>tbl_cols</th>\n",
+       "      <th>tbl</th>\n",
+       "      <th>expected_freq</th>\n",
+       "      <th>tbl_expected_diff</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Statistically significant result</td>\n",
+       "      <td>0.39736</td>\n",
+       "      <td>moderate</td>\n",
+       "      <td>4/5ths Test passed at a ratio of: 1.0.</td>\n",
+       "      <td>Testing for {'job_title': 'analyst'}, 4/5ths T...</td>\n",
+       "      <td>5.218246</td>\n",
+       "      <td>0.022351</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>1</td>\n",
+       "      <td>[Female, Male]</td>\n",
+       "      <td>[not_hired, hired]</td>\n",
+       "      <td>[[10, 1], [15, 18]]</td>\n",
+       "      <td>[[6.25, 4.75], [18.75, 14.25]]</td>\n",
+       "      <td>[[3.75, -3.75], [-3.75, 3.75]]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        test_result  phi_corr_coeff  phi_bins   \n",
+       "0  Statistically significant result         0.39736  moderate  \\\n",
+       "\n",
+       "                         four_fifths_test   \n",
+       "0  4/5ths Test passed at a ratio of: 1.0.  \\\n",
+       "\n",
+       "                                         result_desc  statistic    pvalue   \n",
+       "0  Testing for {'job_title': 'analyst'}, 4/5ths T...   5.218246  0.022351  \\\n",
+       "\n",
+       "   alpha  dof        tbl_rows            tbl_cols                  tbl   \n",
+       "0   0.05    1  [Female, Male]  [not_hired, hired]  [[10, 1], [15, 18]]  \\\n",
+       "\n",
+       "                    expected_freq               tbl_expected_diff  \n",
+       "0  [[6.25, 4.75], [18.75, 14.25]]  [[3.75, -3.75], [-3.75, 3.75]]  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_result"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hrailabs_dev",
+   "language": "python",
+   "name": "hrailabs"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/toolkit.ipynb b/notebooks/toolkit.ipynb
new file mode 100644
index 0000000..d6ae82b
--- /dev/null
+++ b/notebooks/toolkit.ipynb
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f9c027ac-4352-4da6-a6d7-394eade3031c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import yaml\n",
+    "import sys\n",
+    "import os\n",
+    "\n",
+    "project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))\n",
+    "sys.path.append(project_dir)\n",
+    "import model as model\n",
+    "\n",
+    "config_fp = os.path.join(project_dir, \"config.yaml\")\n",
+    "with open(config_fp) as f:\n",
+    "    config = yaml.safe_load(f)\n",
+    "    \n",
+    "mod = model.Model(config)\n",
+    "\n",
+    "df_prep, tbl = mod.prep()\n",
+    "\n",
+    "df_result = mod.analysis(df_prep.copy(), tbl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8c15a313-bfc9-44a9-9891-387ed56564be",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>test_result</th>\n",
+       "      <th>phi_corr_coeff</th>\n",
+       "      <th>phi_bins</th>\n",
+       "      <th>four_fifths_test</th>\n",
+       "      <th>result_desc</th>\n",
+       "      <th>statistic</th>\n",
+       "      <th>pvalue</th>\n",
+       "      <th>alpha</th>\n",
+       "      <th>dof</th>\n",
+       "      <th>tbl_rows</th>\n",
+       "      <th>tbl_cols</th>\n",
+       "      <th>tbl</th>\n",
+       "      <th>expected_freq</th>\n",
+       "      <th>tbl_expected_diff</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Statistically significant result</td>\n",
+       "      <td>0.39736</td>\n",
+       "      <td>moderate</td>\n",
+       "      <td>4/5ths Test passed at a ratio of: 1.0.</td>\n",
+       "      <td>Testing for {'job_title': 'analyst'}, 4/5ths T...</td>\n",
+       "      <td>5.218246</td>\n",
+       "      <td>0.022351</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>1</td>\n",
+       "      <td>[Female, Male]</td>\n",
+       "      <td>[not_hired, hired]</td>\n",
+       "      <td>[[10, 1], [15, 18]]</td>\n",
+       "      <td>[[6.25, 4.75], [18.75, 14.25]]</td>\n",
+       "      <td>[[3.75, -3.75], [-3.75, 3.75]]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        test_result  phi_corr_coeff  phi_bins   \n",
+       "0  Statistically significant result         0.39736  moderate  \\\n",
+       "\n",
+       "                         four_fifths_test   \n",
+       "0  4/5ths Test passed at a ratio of: 1.0.  \\\n",
+       "\n",
+       "                                         result_desc  statistic    pvalue   \n",
+       "0  Testing for {'job_title': 'analyst'}, 4/5ths T...   5.218246  0.022351  \\\n",
+       "\n",
+       "   alpha  dof        tbl_rows            tbl_cols                  tbl   \n",
+       "0   0.05    1  [Female, Male]  [not_hired, hired]  [[10, 1], [15, 18]]  \\\n",
+       "\n",
+       "                    expected_freq               tbl_expected_diff  \n",
+       "0  [[6.25, 4.75], [18.75, 14.25]]  [[3.75, -3.75], [-3.75, 3.75]]  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_result"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hrailabs_dev",
+   "language": "python",
+   "name": "hrailabs"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/Pay_Gap_Reg-checkpoint.ipynb b/pay_equity/.ipynb_checkpoints/Pay_Gap_Reg-checkpoint.ipynb
similarity index 100%
rename from .ipynb_checkpoints/Pay_Gap_Reg-checkpoint.ipynb
rename to pay_equity/.ipynb_checkpoints/Pay_Gap_Reg-checkpoint.ipynb
diff --git a/Pay_Gap_Reg.ipynb b/pay_equity/Pay_Gap_Reg.ipynb
similarity index 100%
rename from Pay_Gap_Reg.ipynb
rename to pay_equity/Pay_Gap_Reg.ipynb
diff --git a/src/.ipynb_checkpoints/model_classes-checkpoint.py b/src/.ipynb_checkpoints/model_classes-checkpoint.py
index d44109b..3b601b7 100644
--- a/src/.ipynb_checkpoints/model_classes-checkpoint.py
+++ b/src/.ipynb_checkpoints/model_classes-checkpoint.py
@@ -6,8 +6,12 @@
 
 import pandas as pd
 import numpy as np
+import os
 from scipy.stats import chi2_contingency
 
+package_dir = os.path.dirname(os.path.abspath(__file__))
+main_dir = os.path.abspath(os.path.join(package_dir, ".."))
+
 class Ingest:
     
     """
@@ -146,28 +150,29 @@ def run_load(
         """
         
         filepath = self.filepath
-        
+        csv_fp = os.path.join(main_dir, filepath)
+
         try:
-            return pd.read_csv(filepath, skiprows=0)
+            return pd.read_csv(csv_fp, skiprows=0)
 
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"The file at {filepath} was not found. Please check the file path."
+                f"The file at {csv_fp} was not found. Please check the file path."
             )
 
         except pd.errors.EmptyDataError:
             raise ValueError(
-                f"The file at {filepath} is empty and cannot be loaded."
+                f"The file at {csv_fp} is empty and cannot be loaded."
             )
 
         except pd.errors.ParserError:
             raise ValueError(
-                f"The file at {filepath} contains malformed data and could not be parsed as a valid CSV."
+                f"The file at {csv_fp} contains malformed data and could not be parsed as a valid CSV."
             )
 
         except PermissionError:
             raise PermissionError(
-                f"Permission denied when attempting to read the file at {filepath}."
+                f"Permission denied when attempting to read the file at {csv_fp}."
                 f"Please check the file permissions."
             )
 
diff --git a/src/__pycache__/model_classes.cpython-311.pyc b/src/__pycache__/model_classes.cpython-311.pyc
index 1e700e5..a3fee23 100644
Binary files a/src/__pycache__/model_classes.cpython-311.pyc and b/src/__pycache__/model_classes.cpython-311.pyc differ
diff --git a/src/model_classes.py b/src/model_classes.py
index d44109b..3b601b7 100644
--- a/src/model_classes.py
+++ b/src/model_classes.py
@@ -6,8 +6,12 @@
 
 import pandas as pd
 import numpy as np
+import os
 from scipy.stats import chi2_contingency
 
+package_dir = os.path.dirname(os.path.abspath(__file__))
+main_dir = os.path.abspath(os.path.join(package_dir, ".."))
+
 class Ingest:
     
     """
@@ -146,28 +150,29 @@ def run_load(
         """
         
         filepath = self.filepath
-        
+        csv_fp = os.path.join(main_dir, filepath)
+
         try:
-            return pd.read_csv(filepath, skiprows=0)
+            return pd.read_csv(csv_fp, skiprows=0)
 
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"The file at {filepath} was not found. Please check the file path."
+                f"The file at {csv_fp} was not found. Please check the file path."
             )
 
         except pd.errors.EmptyDataError:
             raise ValueError(
-                f"The file at {filepath} is empty and cannot be loaded."
+                f"The file at {csv_fp} is empty and cannot be loaded."
             )
 
         except pd.errors.ParserError:
             raise ValueError(
-                f"The file at {filepath} contains malformed data and could not be parsed as a valid CSV."
+                f"The file at {csv_fp} contains malformed data and could not be parsed as a valid CSV."
             )
 
         except PermissionError:
             raise PermissionError(
-                f"Permission denied when attempting to read the file at {filepath}."
+                f"Permission denied when attempting to read the file at {csv_fp}."
                 f"Please check the file permissions."
             )
 
diff --git a/toolkit.ipynb b/toolkit.ipynb
deleted file mode 100644
index aa39ad2..0000000
--- a/toolkit.ipynb
+++ /dev/null
@@ -1,1304 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "ab695a7b-3043-4154-a59b-01e57feaf8f0",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[\"Testing for {'job_title': 'analyst'}, passed with 4/5 test at 1.0. Based on the results of the chi-square test of independence, there is statistically significant result for gender-based hiring discrimination against Female at the chosen significance level of 0.05. The phi correlation coefficient is 0.397, indicating a moderate effect size. The values on the positive diagonal of the 'tbl' indicate the distribution of hiring success across gen categories. Male had a higher proportion of successful outcomes compared to Female. Specifically, 54.5% of Male had success while only 54.5% of Female had success. This significant difference in hiring success rates suggests a potential gen bias, with Male success in hiring at a higher rate than Female.\"]"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from scipy.stats import chi2_contingency\n",
-    "import yaml\n",
-    "\n",
-    "from pandas import DataFrame\n",
-    "from typing import Dict\n",
-    "from typing import List\n",
-    "from typing import Any\n",
-    "from typing import Tuple\n",
-    "\n",
-    "with open('config.yaml') as f:\n",
-    "    config = yaml.safe_load(f)\n",
-    "\n",
-    "class Ingest:\n",
-    "    \n",
-    "    \"\"\"\n",
-    "    Class to ingest dataframe input.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        config: Dict[Any, Any]\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Inits class with the config file\n",
-    "        and unpacks the config file.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        self.config = config\n",
-    "        \n",
-    "        self.unpack_config()\n",
-    "\n",
-    "        \n",
-    "    def run(\n",
-    "        self\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Run function for the class.\n",
-    "        \n",
-    "        :param None:\n",
-    "        :return df:\n",
-    "            DataFrame, ingested df\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        df = self.run_load()\n",
-    "        \n",
-    "        df = self.run_harmonize(df)\n",
-    "        \n",
-    "        return df\n",
-    "        \n",
-    "        \n",
-    "    def unpack_config(\n",
-    "        self\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Function to unpack config vars.\n",
-    "        \n",
-    "        :var filepath:\n",
-    "            str, the relative filepath \n",
-    "        :var group_variable:\n",
-    "            str, the column name for the \n",
-    "            group variable of interest e.g.\n",
-    "            gender, which contains the target \n",
-    "            class and non-target class e.g.\n",
-    "            females and males.\n",
-    "        :var group_target_val:\n",
-    "            str, within the group_variable column,\n",
-    "            contains the contains the target \n",
-    "            class value e.g.\n",
-    "            females.\n",
-    "        :var group_other_val:\n",
-    "            str, within the group_variable column,\n",
-    "            contains the contains the non-target \n",
-    "            class value e.g. males.\n",
-    "        :var outcome_variable:\n",
-    "            str, the column name for the \n",
-    "            outcome variable of interest e.g.\n",
-    "            hired, which contains the target \n",
-    "            class and non-target class e.g.\n",
-    "            hired and not-hired.\n",
-    "        :var outcome_target_val:\n",
-    "            str, within the outcome_variable column,\n",
-    "            contains the contains the target \n",
-    "            class value e.g.\n",
-    "            hired.\n",
-    "        :var outcome_other_val:\n",
-    "            str, within the outcome_variable column,\n",
-    "            contains the contains the non-target \n",
-    "            class value e.g. not-hired.\n",
-    "        :var grpers:\n",
-    "            Dict[str,str], can be any set of filterable\n",
-    "            columns to slice into particular groups within\n",
-    "            the broader employee roster. The key is the column,\n",
-    "            the value is the desired class within the column\n",
-    "            e.g. job_title: analyst.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        config = self.config\n",
-    "        \n",
-    "        try:\n",
-    "            self.filepath: str = config[\"Ingest\"][\"filepath\"]\n",
-    "            self.group_variable: str = config[\"Ingest\"][\"group_variable\"]\n",
-    "            self.group_target_val: str = config[\"Ingest\"][\"group_target_val\"]\n",
-    "            self.group_other_val: str = config[\"Ingest\"][\"group_other_val\"]\n",
-    "            self.outcome_variable: str = config[\"Ingest\"][\"outcome_variable\"]\n",
-    "            self.outcome_target_val: str = config[\"Ingest\"][\"outcome_target_val\"]\n",
-    "            self.outcome_other_val: str = config[\"Ingest\"][\"outcome_other_val\"]\n",
-    "            self.grpers: Dict[str, str] = config[\"Ingest\"][\"grpers\"]\n",
-    "\n",
-    "            # Type validation\n",
-    "            if not isinstance(self.filepath, str):\n",
-    "                raise TypeError(\"Expected 'filepath' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_variable, str):\n",
-    "                raise TypeError(\"Expected 'group_variable' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_target_val, str):\n",
-    "                raise TypeError(\"Expected 'group_target_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_other_val, str):\n",
-    "                raise TypeError(\"Expected 'group_other_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_variable, str):\n",
-    "                raise TypeError(\"Expected 'outcome_variable' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_target_val, str):\n",
-    "                raise TypeError(\"Expected 'outcome_target_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_other_val, str):\n",
-    "                raise TypeError(\"Expected 'outcome_other_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.grpers, dict):\n",
-    "                raise TypeError(\"Expected 'grpers' to be of type 'dict'.\")\n",
-    "\n",
-    "        except KeyError as e:\n",
-    "            raise KeyError(f\"Missing key '{e.args[0]}' in the config file. \"\n",
-    "                           \"Please ensure the config file contains all required keys under the 'Ingest' section: \"\n",
-    "                           \"'filepath', 'group_variable', 'group_target_val', 'group_other_val', \"\n",
-    "                           \"'outcome_variable', 'outcome_target_val', 'outcome_other_val', and 'grpers'.\")\n",
-    "\n",
-    "        except TypeError as e:\n",
-    "            raise TypeError(f\"Config file error: {e}\")\n",
-    "        \n",
-    "    def run_load(\n",
-    "        self\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Loads csv file. Assumes headers are row 0.\n",
-    "        \n",
-    "        :param None:\n",
-    "        :return DataFrame:\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        filepath = self.filepath\n",
-    "        \n",
-    "        try:\n",
-    "            return pd.read_csv(filepath, skiprows=0)\n",
-    "\n",
-    "        except FileNotFoundError:\n",
-    "            raise FileNotFoundError(\n",
-    "                f\"The file at {filepath} was not found. Please check the file path.\"\n",
-    "            )\n",
-    "\n",
-    "        except pd.errors.EmptyDataError:\n",
-    "            raise ValueError(\n",
-    "                f\"The file at {filepath} is empty and cannot be loaded.\"\n",
-    "            )\n",
-    "\n",
-    "        except pd.errors.ParserError:\n",
-    "            raise ValueError(\n",
-    "                f\"The file at {filepath} contains malformed data and could not be parsed as a valid CSV.\"\n",
-    "            )\n",
-    "\n",
-    "        except PermissionError:\n",
-    "            raise PermissionError(\n",
-    "                f\"Permission denied when attempting to read the file at {filepath}.\"\n",
-    "                f\"Please check the file permissions.\"\n",
-    "            )\n",
-    "\n",
-    "        except Exception as e:\n",
-    "            raise Exception(\n",
-    "                f\"An unexpected error occurred while loading the file: {str(e)}\"\n",
-    "            )\n",
-    "        \n",
-    "    def run_harmonize(\n",
-    "        self,\n",
-    "        df: DataFrame\n",
-    "    ) -> DataFrame:        \n",
-    "\n",
-    "        \"\"\"\n",
-    "        Function to harmonize the dataset.\n",
-    "        \n",
-    "        :param df: \n",
-    "            DataFrame, loaded df\n",
-    "        :return df:\n",
-    "            DataFrame, filtered down to target and other group and\n",
-    "            harmonize the fields\n",
-    "        \"\"\"\n",
-    "    \n",
-    "        group_variable = self.group_variable\n",
-    "        group_target_val = self.group_target_val\n",
-    "        group_other_val = self.group_other_val\n",
-    "        outcome_variable = self.outcome_variable\n",
-    "        outcome_target_val = self.outcome_target_val\n",
-    "        outcome_other_val = self.outcome_other_val\n",
-    "        grpers = self.grpers\n",
-    "\n",
-    "        df = self._apply_filters(\n",
-    "            df=df,\n",
-    "            group_variable=group_variable,\n",
-    "            group_target_val=group_target_val,\n",
-    "            group_other_val=group_other_val,\n",
-    "            grpers=grpers\n",
-    "        )\n",
-    "        \n",
-    "        df = self._apply_harmonize(\n",
-    "            df=df,\n",
-    "            group_variable=group_variable,\n",
-    "            group_target_val=group_target_val,\n",
-    "            group_other_val=group_other_val,\n",
-    "            outcome_variable=outcome_variable,\n",
-    "            outcome_target_val=outcome_target_val,\n",
-    "            outcome_other_val=outcome_other_val\n",
-    "        )\n",
-    "      \n",
-    "        return df\n",
-    "    \n",
-    "    def _apply_filters(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        group_variable: str,\n",
-    "        group_target_val: str,\n",
-    "        group_other_val: str,\n",
-    "        grpers: Dict[str,str],\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to apply filters\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, target df\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value\n",
-    "        :return df:\n",
-    "            DataFrame, filtered df\n",
-    "        \"\"\"\n",
-    "                \n",
-    "        df = df.loc[\n",
-    "            df[group_variable].isin(\n",
-    "                [\n",
-    "                    group_target_val, \n",
-    "                    group_other_val\n",
-    "                ]\n",
-    "            )\n",
-    "        ]\n",
-    "        \n",
-    "        for k, v in grpers.items():\n",
-    "        \n",
-    "            df = df.loc[\n",
-    "                df[k].isin([v])\n",
-    "            ]  \n",
-    "            \n",
-    "        return df\n",
-    "    \n",
-    "    def _apply_harmonize(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        group_variable: str,\n",
-    "        group_target_val: str,\n",
-    "        group_other_val: str,\n",
-    "        outcome_variable: str,\n",
-    "        outcome_target_val: str,\n",
-    "        outcome_other_val: str\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to harmonize targets.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, target df\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value     \n",
-    "        :param outcome_variable:\n",
-    "            str, the column name of the outcome \n",
-    "        :param outcome_target_val:\n",
-    "            str, class target value of the outcome_variable\n",
-    "            aka success\n",
-    "        :param outcome_other_val:\n",
-    "            str, class nontarget value of the outcome_variable\n",
-    "        :return df:\n",
-    "            DataFrame, target df\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        # harmonize the group target\n",
-    "        df['group_var_clean'] = np.where(\n",
-    "            df[group_variable]==group_target_val, \n",
-    "            1,\n",
-    "            np.where(\n",
-    "                df[group_variable]==group_other_val, \n",
-    "                0, \n",
-    "                -1\n",
-    "            )\n",
-    "        )\n",
-    "        \n",
-    "        # harmonize the outcome target\n",
-    "        df['outcome_var_clean'] = np.where(\n",
-    "            df[outcome_variable]==outcome_target_val, \n",
-    "            1,  \n",
-    "            np.where(\n",
-    "                df[self.outcome_variable]==outcome_other_val,\n",
-    "                0, \n",
-    "                -1\n",
-    "            )\n",
-    "        )  \n",
-    "        \n",
-    "        return df\n",
-    "            \n",
-    "class Transform:\n",
-    "    \n",
-    "    \"\"\"\n",
-    "    Class to transform dataframe inputs into \n",
-    "    2x2 contingency table.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(\n",
-    "        self, \n",
-    "        df: DataFrame\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        :param df:\n",
-    "            DataFrame, input df\n",
-    "        \"\"\"\n",
-    "    \n",
-    "        self.df = df\n",
-    "    \n",
-    "    def run_build_cont_table(\n",
-    "        self\n",
-    "    ) -> List[int]:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Function to generate contingency table format.\n",
-    "        \n",
-    "        Places the target group val in the top row and the\n",
-    "        target group other to the bottom row.\n",
-    "        \n",
-    "        Places no-success outcome on the first column and success\n",
-    "        on the second column.\n",
-    "        \n",
-    "        :return tbl:\n",
-    "            List[int], filtered down to target and other group.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        df = self.df\n",
-    "        \n",
-    "        cols = [\n",
-    "            'group_var_clean', \n",
-    "            'outcome_var_clean'\n",
-    "        ]\n",
-    "        \n",
-    "        df = df[cols]\n",
-    "        \n",
-    "        tbl = (\n",
-    "            df.pivot_table(\n",
-    "                index='group_var_clean',\n",
-    "                columns='outcome_var_clean', \n",
-    "                aggfunc=len\n",
-    "            ).\n",
-    "            sort_index(\n",
-    "                axis=1, \n",
-    "                ascending=True\n",
-    "            ).\n",
-    "            sort_index(ascending=False). # ensure always [1,0]\n",
-    "            values.tolist()\n",
-    "        ) \n",
-    "                    \n",
-    "        return tbl\n",
-    "        \n",
-    "class StatsTesting2x2Cont:\n",
-    "    \n",
-    "    \"\"\"\n",
-    "    Class to perform 2x2 Contigency Table analysis\n",
-    "    with Chi2 and Phi Correlation Coefficent Testing.\n",
-    "\n",
-    "    Provides context into potential association between\n",
-    "    variables and the strength of the association.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        config: Dict[Any, Any],\n",
-    "        tbl: List[int],\n",
-    "        df: DataFrame\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Inits the class variables and unpacks the\n",
-    "        config variables.\n",
-    "        \n",
-    "        :param config:\n",
-    "            Dict[str,Any], loaded config file.\n",
-    "        :param tbl:\n",
-    "            List[int], 2x2 cont table.\n",
-    "        :param df:\n",
-    "            DataFrame, original input DataFrame.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        self.config = config\n",
-    "        self.tbl = tbl\n",
-    "        self.df = df\n",
-    "\n",
-    "        self.unpack_config()\n",
-    "\n",
-    "    def run_testing(\n",
-    "        self\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Run function for the class.\n",
-    "        \n",
-    "        Runs hypothesis evaluation and builds\n",
-    "        the output report DataFrame.\n",
-    "        \n",
-    "        :param None:\n",
-    "        :return df_results:\n",
-    "            DataFrame, with testing results.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        alpha = self.alpha\n",
-    "        tbl = self.tbl\n",
-    "        process = self.process\n",
-    "        group_variable = self.group_variable\n",
-    "        group_target_val = self.group_target_val\n",
-    "        group_other_val = self.group_other_val\n",
-    "        bin_edges = self.bin_edges\n",
-    "        bin_labels = self.bin_labels\n",
-    "                \n",
-    "        res = self.gen_hypothesis_eval(tbl)\n",
-    "\n",
-    "        df_results = self.run_report_bld(\n",
-    "            alpha=alpha,\n",
-    "            res=res,\n",
-    "            tbl=tbl,\n",
-    "            process=process,\n",
-    "            group_variable=group_variable,\n",
-    "            group_target_val=group_target_val,\n",
-    "            group_other_val=group_other_val,\n",
-    "            bin_edges=bin_edges,\n",
-    "            bin_labels=bin_labels\n",
-    "        )\n",
-    "        \n",
-    "        return df_results\n",
-    "    \n",
-    "    def unpack_config(\n",
-    "        self\n",
-    "    ) -> None:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Function to unpack config variables.\n",
-    "        \n",
-    "        :param None:\n",
-    "        :return None:\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        config = self.config\n",
-    "\n",
-    "        try:\n",
-    "            self.alpha: float = config[\"StatsTesting2x2Cont\"][\"alpha\"]\n",
-    "            self.group_variable: str = config[\"Ingest\"][\"group_variable\"]\n",
-    "            self.group_target_val: str = config[\"Ingest\"][\"group_target_val\"]\n",
-    "            self.group_other_val: str = config[\"Ingest\"][\"group_other_val\"]\n",
-    "            self.outcome_variable: str = config[\"Ingest\"][\"outcome_variable\"]\n",
-    "            self.outcome_target_val: str = config[\"Ingest\"][\"outcome_target_val\"]\n",
-    "            self.outcome_other_val: str = config[\"Ingest\"][\"outcome_other_val\"]\n",
-    "            self.grpers: Dict[str, str] = config[\"Ingest\"][\"grpers\"]\n",
-    "            self.testing: str = config[\"StatsTesting2x2Cont\"][\"testing\"]\n",
-    "            self.process: str = config[\"StatsTesting2x2Cont\"][\"process\"]\n",
-    "            self.bin_edges: List[float] = config[\"StatsTesting2x2Cont\"][\"phi_bin_edges\"]\n",
-    "            self.bin_labels: List[str] = config[\"StatsTesting2x2Cont\"][\"phi_bin_labels\"]\n",
-    "\n",
-    "            if not isinstance(self.alpha, float):\n",
-    "                raise TypeError(\"Expected 'alpha' to be of type 'float'.\")\n",
-    "            if not isinstance(self.group_variable, str):\n",
-    "                raise TypeError(\"Expected 'group_variable' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_target_val, str):\n",
-    "                raise TypeError(\"Expected 'group_target_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.group_other_val, str):\n",
-    "                raise TypeError(\"Expected 'group_other_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_variable, str):\n",
-    "                raise TypeError(\"Expected 'outcome_variable' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_target_val, str):\n",
-    "                raise TypeError(\"Expected 'outcome_target_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.outcome_other_val, str):\n",
-    "                raise TypeError(\"Expected 'outcome_other_val' to be of type 'str'.\")\n",
-    "            if not isinstance(self.grpers, dict):\n",
-    "                raise TypeError(\"Expected 'grpers' to be of type 'dict'.\")\n",
-    "            if not isinstance(self.testing, str):\n",
-    "                raise TypeError(\"Expected 'testing' to be of type 'str'.\")\n",
-    "            if not isinstance(self.process, str):\n",
-    "                raise TypeError(\"Expected 'process' to be of type 'str'.\")\n",
-    "            if not isinstance(\n",
-    "                self.bin_edges, list\n",
-    "            ) or not all(\n",
-    "                isinstance(\n",
-    "                    i, (int, float)\n",
-    "                ) for i in self.bin_edges\n",
-    "            ):\n",
-    "                raise TypeError(\"Expected 'bin_edges' to be a list of floats.\")\n",
-    "            if not isinstance(\n",
-    "                self.bin_labels, list\n",
-    "            ) or not all(\n",
-    "                isinstance(i, str) for i in self.bin_labels\n",
-    "            ):\n",
-    "                raise TypeError(\"Expected 'bin_labels' to be a list of strings.\")\n",
-    "        \n",
-    "        except KeyError as e:\n",
-    "            raise KeyError(\n",
-    "                f\"Missing key '{e.args[0]}' in the config file. \"\n",
-    "                f\"Ensure all required keys are present in the 'Ingest' and 'StatsTesting2x2Cont' sections.\"\n",
-    "            )\n",
-    "\n",
-    "        except TypeError as e:\n",
-    "            raise TypeError(f\"Config file error: {e}\")\n",
-    "\n",
-    "        \n",
-    "    def gen_hypothesis_eval(\n",
-    "        self,\n",
-    "        tbl: List[int]\n",
-    "    ) -> chi2_contingency:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Function to generate the chi2_contigency\n",
-    "        statistic and result.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        #size = np.shape(tbl)\n",
-    "        #tbl_len = len(tbl)\n",
-    "        \n",
-    "        res = chi2_contingency(\n",
-    "            tbl\n",
-    "        )\n",
-    "            \n",
-    "        return res\n",
-    "        \n",
-    "    def run_report_bld(\n",
-    "        self,\n",
-    "        alpha: float,\n",
-    "        res: chi2_contingency,\n",
-    "        tbl: List[int],\n",
-    "        process: str,\n",
-    "        group_variable: str,\n",
-    "        group_target_val: str,\n",
-    "        group_other_val: str,\n",
-    "        bin_edges: List[float],\n",
-    "        bin_labels: List[str]\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Runs report for statistical testing\n",
-    "        chi2_contingency results\n",
-    "        \n",
-    "        :param alpha:\n",
-    "            float, alpha value for significance evaluation.\n",
-    "        :param res:\n",
-    "            chi2_contingency, result of the chi2_contingency.\n",
-    "        :param tbl:\n",
-    "            List[int], the contingency table.\n",
-    "        :param process: \n",
-    "            str, the name of the business process\n",
-    "            being tested, e.g. 'hiring'.\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable.\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value.\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value.  \n",
-    "        :param bin_edges:\n",
-    "            List[float], edges for phi\n",
-    "            bins.\n",
-    "        :param bin_labels:\n",
-    "            List[str], labels for the phi\n",
-    "            bins.\n",
-    "        :return df:\n",
-    "            DataFrame, target\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        pvalue = res[1]\n",
-    "        \n",
-    "        df = pd.DataFrame()\n",
-    "\n",
-    "        df = self._gen_significance_test(\n",
-    "            df=df,\n",
-    "            pvalue=pvalue,\n",
-    "            alpha=alpha\n",
-    "        )\n",
-    "        \n",
-    "        (\n",
-    "            df,\n",
-    "            A,\n",
-    "            B,\n",
-    "            C,\n",
-    "            D,\n",
-    "            total_target_grp,\n",
-    "            total_non_target_grp,\n",
-    "            diagonals,\n",
-    "            percent_target_succ,\n",
-    "            percent_non_target_succ,\n",
-    "            phi_numerator,\n",
-    "            phi_denominator\n",
-    "        ) = self._gen_table_calcs(\n",
-    "                df=df,\n",
-    "                tbl=tbl,\n",
-    "        )\n",
-    "        \n",
-    "        if res[1] <= alpha:\n",
-    "            df, phi_result = self._gen_phi_coefficient(\n",
-    "                df=df,\n",
-    "                tbl=tbl,\n",
-    "                bin_edges=bin_edges,\n",
-    "                bin_labels=bin_labels,\n",
-    "                process=process,\n",
-    "                group_variable=group_variable,\n",
-    "                group_target_val=group_target_val,\n",
-    "                group_other_val=group_other_val,\n",
-    "                diagonals=diagonals,\n",
-    "                numerator=phi_numerator,\n",
-    "                denominator=phi_denominator,\n",
-    "                percent_target_succ=percent_non_target_succ,\n",
-    "                percent_non_target_succ=percent_non_target_succ,\n",
-    "            )\n",
-    "            \n",
-    "        else:\n",
-    "            df['phi_corr_coeff'] = np.nan\n",
-    "            df['phi_bins'] = np.nan\n",
-    "            \n",
-    "            phi_result = \"\"\n",
-    "        \n",
-    "        df = self._gen_four_fifths_test(\n",
-    "            df,\n",
-    "            percent_target_succ=percent_non_target_succ,\n",
-    "            percent_non_target_succ=percent_non_target_succ\n",
-    "        )\n",
-    "                \n",
-    "        df = self._gen_outcome_meta(\n",
-    "            df,\n",
-    "            round(res[1],3),\n",
-    "            phi_result\n",
-    "        )\n",
-    "        \n",
-    "        df = self._gen_unpack_stats(\n",
-    "            df,\n",
-    "            res\n",
-    "        )\n",
-    "        \n",
-    "        return df\n",
-    "        \n",
-    "    def _gen_unpack_stats(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        res: chi2_contingency\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to unpack test stats from\n",
-    "        chi2_contingency results.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, output df.\n",
-    "        :param res:\n",
-    "            chi2_contingency, results array.\n",
-    "        :return df:\n",
-    "            DataFrame, output df.\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        group_target_val = self.group_target_val\n",
-    "        group_other_val = self.group_other_val\n",
-    "        rows = [group_target_val] + [group_other_val]\n",
-    "    \n",
-    "        df['statistic'] = res[0]\n",
-    "        df['pvalue'] = res[1]\n",
-    "        df['dof'] = res[2]\n",
-    "        df['tbl_row'] = [rows]\n",
-    "        df['tbl'] = [tbl]\n",
-    "        df['expected_freq'] = [res[3]]\n",
-    "        df['tbl_expected_diff'] = [tbl - res[3]]\n",
-    "        \n",
-    "        return df\n",
-    "    \n",
-    "    def _gen_significance_test(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        pvalue: float,\n",
-    "        alpha: float\n",
-    "    ):\n",
-    "        \"\"\"\n",
-    "        Method to report on test significance.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, results df.\n",
-    "        :param pval:\n",
-    "            int, pvalue.\n",
-    "        :param alpha:\n",
-    "            float, the alpha value for testing eval.\n",
-    "        :return df:\n",
-    "            DataFrame with metadata added.     \n",
-    "        \"\"\"\n",
-    "            \n",
-    "        if pvalue <= alpha:\n",
-    "            val = 'statistically significant result'\n",
-    "            \n",
-    "        else:\n",
-    "            val = 'no statistically significant result'\n",
-    "                    \n",
-    "        df['test_result'] = [val]\n",
-    "        \n",
-    "        return df\n",
-    "    \n",
-    "    def _gen_phi_coefficient(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        tbl: List[int],\n",
-    "        process: str,\n",
-    "        group_variable: str,\n",
-    "        group_target_val: str,\n",
-    "        group_other_val: str,\n",
-    "        bin_edges: List[float],\n",
-    "        bin_labels: List[str],\n",
-    "        diagonals: List[float],\n",
-    "        numerator: float,\n",
-    "        denominator: float,\n",
-    "        percent_target_succ: float,\n",
-    "        percent_non_target_succ: float,\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate the phi coefficient.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, the results df.\n",
-    "        :param tbl:\n",
-    "            List[int], the 2x2 cont table.\n",
-    "        :param process: \n",
-    "            str, the name of the business process\n",
-    "            being tested, e.g. 'hiring'.\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable.\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value.\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value.  \n",
-    "        :param bin_edges:\n",
-    "            List[float], edges for phi\n",
-    "            bins.\n",
-    "        :param bin_labels:\n",
-    "            List[str], lab\n",
-    "        :return df:\n",
-    "            DataFrame, output df.\n",
-    "        \"\"\"\n",
-    "        phi = numerator / denominator if denominator != 0 else 0\n",
-    "\n",
-    "        df['phi_corr_coeff'] = phi\n",
-    "                \n",
-    "        df = self._gen_prep_phi_bins(\n",
-    "            df=df,\n",
-    "            bin_edges=bin_edges,\n",
-    "            bin_labels=bin_labels\n",
-    "        )\n",
-    "\n",
-    "        df, phi_result = self._gen_prep_diagonals(\n",
-    "            df=df,\n",
-    "            diagonals=diagonals,\n",
-    "            process=process,\n",
-    "            group_variable=group_variable,\n",
-    "            group_other_val=group_other_val,\n",
-    "            group_target_val=group_target_val,\n",
-    "            percent_non_target_succ=percent_non_target_succ,\n",
-    "            percent_target_succ=percent_target_succ,\n",
-    "        )\n",
-    "        \n",
-    "        return df, phi_result\n",
-    "    \n",
-    "    def _gen_table_calcs(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        tbl: List[int]\n",
-    "    ) -> Tuple[\n",
-    "        DataFrame, float, float, float, float,\n",
-    "        float, float, float, float, \n",
-    "        float, float, float\n",
-    "    ]:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate phi bins. Provides additional\n",
-    "        explainability on the magnitude of association, when \n",
-    "        an association is found.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, output df.\n",
-    "        :param tbl:\n",
-    "            List[int], 2x2 contingency.\n",
-    "        :return [\n",
-    "            df, A, B, C, D, total_target_grp,\n",
-    "            total_non_target_grp, diagonals,\n",
-    "            percent_target_succ, percent_non_target_succ,\n",
-    "            phi_numerator, phi_denominator\n",
-    "        ]:\n",
-    "            Tuple[DataFrame, float, float, float, float,\n",
-    "        float, float, float, float, \n",
-    "        float, float, float\n",
-    "        ]\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        # females, males; no succ, succ\n",
-    "        A, B = tbl[0] \n",
-    "        C, D = tbl[1]\n",
-    "        \n",
-    "        total_target_grp = A + B\n",
-    "        total_non_target_grp = C + D\n",
-    "        diagonals = (A + D) > (B + C)\n",
-    "        percent_target_succ = (B / total_target_grp) * 100\n",
-    "        percent_non_target_succ = (D / total_non_target_grp) * 100\n",
-    "        phi_numerator = (A * D) - (B * C)\n",
-    "        phi_denominator = np.sqrt((A + B) * (C + D) * (A + C) * (B + D))      \n",
-    "           \n",
-    "        return (\n",
-    "            df,\n",
-    "            A,\n",
-    "            B,\n",
-    "            C,\n",
-    "            D,\n",
-    "            total_target_grp,\n",
-    "            total_non_target_grp,\n",
-    "            diagonals,\n",
-    "            percent_target_succ,\n",
-    "            percent_non_target_succ,\n",
-    "            phi_numerator,\n",
-    "            phi_denominator\n",
-    "        )\n",
-    "    \n",
-    "    def _gen_prep_phi_bins(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        bin_edges: List[float],\n",
-    "        bin_labels: List[str]\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate pandas bins for \n",
-    "        phi coeff.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, output df.\n",
-    "        :param bin_edges:\n",
-    "            List[float], edges for phi\n",
-    "            bins.\n",
-    "        :param bin_labels:\n",
-    "            List[str], labels for the phi\n",
-    "            bins.\n",
-    "        :return df:\n",
-    "            DataFrame, output df.\n",
-    "        \"\"\"\n",
-    "    \n",
-    "        df['phi_bins'] = pd.cut(\n",
-    "            df['phi_corr_coeff'], \n",
-    "            bins=bin_edges, \n",
-    "            labels=bin_labels, \n",
-    "            include_lowest=True\n",
-    "        )\n",
-    "        \n",
-    "        return df\n",
-    "    \n",
-    "    def _gen_four_fifths_test(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        percent_target_succ: float,\n",
-    "        percent_non_target_succ: float\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        ratio = percent_target_succ / percent_non_target_succ\n",
-    "        \n",
-    "        if ratio < .8:\n",
-    "            ratio_desc = f'failed with 4/5 test at {round(ratio,3)}'\n",
-    "        elif ratio >= .8:\n",
-    "            ratio_desc = f'passed with 4/5 test at {round(ratio,3)}'\n",
-    "        else:\n",
-    "            ratio_desc = 'error calculating 4/5 test'\n",
-    "        \n",
-    "        df['four_fifths_test'] = ratio_desc\n",
-    "        return df\n",
-    "    \n",
-    "    def _gen_prep_diagonals(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        diagonals: bool,\n",
-    "        process: str,\n",
-    "        group_variable: str,\n",
-    "        group_other_val: str,\n",
-    "        group_target_val: str,\n",
-    "        percent_non_target_succ: float,\n",
-    "        percent_target_succ: float,\n",
-    "    ) -> Tuple[DataFrame, str]:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate the magnitude of the\n",
-    "        assocation using phi coefficient analysis.\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, output df.\n",
-    "        :param diagonals:\n",
-    "            bool,\n",
-    "        :param process: \n",
-    "            str, the name of the business process\n",
-    "            being tested, e.g. 'hiring'.\n",
-    "        :param group_variable:\n",
-    "            str, column name of the\n",
-    "            target variable.\n",
-    "        :param group_target_val:\n",
-    "            str, class target value of the group_variable\n",
-    "            aka the protected class value.\n",
-    "        :param group_other_val:\n",
-    "            str, class nontarget value of the group_variable\n",
-    "            aka the nonprotected class value.   \n",
-    "        :param percent_non_target_succ:\n",
-    "            float, the success percentage attained\n",
-    "            for the the non-target group.\n",
-    "        :param percent_target_succ:\n",
-    "            float, the success percentage attained for the\n",
-    "            target class.\n",
-    "        :return (df, phi_col):\n",
-    "            Tuple[df, phi_col]\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        phi_bin = df['phi_bins'].values[0]    \n",
-    "        phi_corr_coeff = df['phi_corr_coeff'].values[0]    \n",
-    "\n",
-    "        if diagonals:\n",
-    "            diagonal_msg = (\n",
-    "                f\"The values on the positive diagonal of the 'tbl' indicate the distribution of {process} success across {group_variable} categories.\"\n",
-    "                f\" {group_other_val} had a higher proportion of successful outcomes compared to {group_target_val}.\"\n",
-    "                f\" Specifically, {percent_non_target_succ:.1f}% of {group_other_val} had success while only {percent_target_succ:.1f}%\"\n",
-    "                f\" of {group_target_val} had success.\"\n",
-    "                f\" This significant difference in {process} success rates suggests a potential {group_variable} bias, with {group_other_val} success in {process}\"\n",
-    "                f\" at a higher rate than {group_target_val}.\"\n",
-    "            )\n",
-    "            \n",
-    "        else:\n",
-    "            diagonal_msg = \"the diagonal values are not substantially higher, suggesting the relationship might be more nuanced.\"\n",
-    "        \n",
-    "        phi_col = f\"The phi correlation coefficient is {phi_corr_coeff:.3f}, indicating a {phi_bin} effect size. {diagonal_msg}\"\n",
-    "            \n",
-    "        return df, phi_col\n",
-    "    \n",
-    "    def _gen_outcome_meta(\n",
-    "        self,\n",
-    "        df: DataFrame,\n",
-    "        pval: float,\n",
-    "        phi_result: str\n",
-    "    ) -> DataFrame:\n",
-    "        \n",
-    "        \"\"\"\n",
-    "        Method to generate meta data for \n",
-    "        reporting dataframe\n",
-    "        \n",
-    "        :param df:\n",
-    "            DataFrame, results df\n",
-    "        :param pval:\n",
-    "            int, pvalue\n",
-    "        :param phi_result:\n",
-    "            str, result of phi testing.\n",
-    "        :return df:\n",
-    "            DataFrame with metadata added\n",
-    "        \"\"\"\n",
-    "        \n",
-    "        grpers = self.grpers\n",
-    "        result = df['test_result'].values[0]\n",
-    "        phi_col = df['phi_corr_coeff'].values[0]\n",
-    "        testing = self.testing\n",
-    "        process = self.process\n",
-    "        group_target_val = self.group_target_val\n",
-    "        alpha = self.alpha\n",
-    "        four_fifths = df['four_fifths_test'].values[0]\n",
-    "        \n",
-    "        col = f\"Testing for {grpers}, {four_fifths}. Based on the results of the chi-square test of independence, there is {result} for {testing}-based {process} discrimination against {group_target_val} at the chosen significance level of {alpha}.\"\n",
-    "\n",
-    "        if result == \"statistically significant result\":\n",
-    "            col = f\"{col} {phi_result}\"\n",
-    "                \n",
-    "        df['result_desc'] = col\n",
-    "        \n",
-    "        return df\n",
-    "        \n",
-    "# pipeline\n",
-    "\n",
-    "ingestObj = Ingest(config)\n",
-    "df = ingestObj.run()\n",
-    "\n",
-    "transObj = Transform(\n",
-    "    df.copy()\n",
-    ")\n",
-    "tbl = transObj.run_build_cont_table()\n",
-    "\n",
-    "statsObj = StatsTesting2x2Cont(\n",
-    "    config,\n",
-    "    tbl,\n",
-    "    df.copy() # need to add some more context in plain text\n",
-    ")\n",
-    "df_result = statsObj.run_testing()\n",
-    "\n",
-    "df_result['result_desc'].tolist()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "d303e1d1-c69a-4b59-9489-14574000bd55",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import yaml\n",
-    "import model\n",
-    "\n",
-    "with open('config.yaml') as f:\n",
-    "    config = yaml.safe_load(f)\n",
-    "    \n",
-    "model = model.Model(config)\n",
-    "\n",
-    "df_prep, tbl = model.prep()\n",
-    "\n",
-    "df_result = model.analysis(df_prep.copy(), tbl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "cebbe841-c185-443d-ac02-7ccd8c50e005",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>test_result</th>\n",
-       "      <th>phi_corr_coeff</th>\n",
-       "      <th>phi_bins</th>\n",
-       "      <th>four_fifths_test</th>\n",
-       "      <th>result_desc</th>\n",
-       "      <th>statistic</th>\n",
-       "      <th>pvalue</th>\n",
-       "      <th>dof</th>\n",
-       "      <th>tbl_rows</th>\n",
-       "      <th>tbl_cols</th>\n",
-       "      <th>tbl</th>\n",
-       "      <th>expected_freq</th>\n",
-       "      <th>tbl_expected_diff</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>statistically significant result</td>\n",
-       "      <td>0.39736</td>\n",
-       "      <td>moderate</td>\n",
-       "      <td>passed with 4/5 test at 1.0</td>\n",
-       "      <td>Testing for {'job_title': 'analyst'}, passed w...</td>\n",
-       "      <td>5.218246</td>\n",
-       "      <td>0.022351</td>\n",
-       "      <td>1</td>\n",
-       "      <td>[Female, Male]</td>\n",
-       "      <td>[hired, not_hired]</td>\n",
-       "      <td>[[10, 1], [15, 18]]</td>\n",
-       "      <td>[[6.25, 4.75], [18.75, 14.25]]</td>\n",
-       "      <td>[[3.75, -3.75], [-3.75, 3.75]]</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                        test_result  phi_corr_coeff  phi_bins   \n",
-       "0  statistically significant result         0.39736  moderate  \\\n",
-       "\n",
-       "              four_fifths_test   \n",
-       "0  passed with 4/5 test at 1.0  \\\n",
-       "\n",
-       "                                         result_desc  statistic    pvalue   \n",
-       "0  Testing for {'job_title': 'analyst'}, passed w...   5.218246  0.022351  \\\n",
-       "\n",
-       "   dof        tbl_rows            tbl_cols                  tbl   \n",
-       "0    1  [Female, Male]  [hired, not_hired]  [[10, 1], [15, 18]]  \\\n",
-       "\n",
-       "                    expected_freq               tbl_expected_diff  \n",
-       "0  [[6.25, 4.75], [18.75, 14.25]]  [[3.75, -3.75], [-3.75, 3.75]]  "
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_result"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "01672735-2ad5-42ae-9488-1962f3d0e63e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>test_result</th>\n",
-       "      <th>phi_corr_coeff</th>\n",
-       "      <th>phi_bins</th>\n",
-       "      <th>four_fifths_test</th>\n",
-       "      <th>result_desc</th>\n",
-       "      <th>statistic</th>\n",
-       "      <th>pvalue</th>\n",
-       "      <th>dof</th>\n",
-       "      <th>tbl_row</th>\n",
-       "      <th>tbl</th>\n",
-       "      <th>expected_freq</th>\n",
-       "      <th>tbl_expected_diff</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>statistically significant result</td>\n",
-       "      <td>0.39736</td>\n",
-       "      <td>moderate</td>\n",
-       "      <td>failed 4/5 test at 0.167</td>\n",
-       "      <td>Testing for {'job_title': 'analyst'}, based on...</td>\n",
-       "      <td>5.218246</td>\n",
-       "      <td>0.022351</td>\n",
-       "      <td>1</td>\n",
-       "      <td>[Female, Male]</td>\n",
-       "      <td>[[10, 1], [15, 18]]</td>\n",
-       "      <td>[[6.25, 4.75], [18.75, 14.25]]</td>\n",
-       "      <td>[[3.75, -3.75], [-3.75, 3.75]]</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                        test_result  phi_corr_coeff  phi_bins   \n",
-       "0  statistically significant result         0.39736  moderate  \\\n",
-       "\n",
-       "           four_fifths_test   \n",
-       "0  failed 4/5 test at 0.167  \\\n",
-       "\n",
-       "                                         result_desc  statistic    pvalue   \n",
-       "0  Testing for {'job_title': 'analyst'}, based on...   5.218246  0.022351  \\\n",
-       "\n",
-       "   dof         tbl_row                  tbl                   expected_freq   \n",
-       "0    1  [Female, Male]  [[10, 1], [15, 18]]  [[6.25, 4.75], [18.75, 14.25]]  \\\n",
-       "\n",
-       "                tbl_expected_diff  \n",
-       "0  [[3.75, -3.75], [-3.75, 3.75]]  "
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_result"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "94114f77-caa8-41a3-900a-44317c84f4b7",
-   "metadata": {},
-   "source": [
-    "to do:\n",
-    "    \n",
-    "implement these tests\n",
-    "\n",
-    "\n",
-    "https://en.wikipedia.org/wiki/Disparate_impact\n",
-    "\n",
-    "Add handler for filtered size of group must be ...\n",
-    "\n",
-    "# need to check this size\n",
-    "# https://online.stat.psu.edu/stat500/lesson/8/8.2#:~:text=That%20equates%20to%20the%20Chi,count%20of%20at%20least%205.\n",
-    "\n",
-    "# make sure at least 5 in each slice, then at least 50"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1d975b96-c8af-4e46-9521-0c7fbe442ff1",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "hrailabs_dev",
-   "language": "python",
-   "name": "hrailabs"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}