From daab72117578ac5221461da7173ba978588c6554 Mon Sep 17 00:00:00 2001 From: Juniper Feld Date: Sun, 3 Nov 2024 19:55:01 -0700 Subject: [PATCH] Delete pay_equity directory needs additional refactoring before pushing up --- .../Pay_Gap_Reg-checkpoint.ipynb | 748 ------------------ pay_equity/Pay_Gap_Reg.ipynb | 748 ------------------ 2 files changed, 1496 deletions(-) delete mode 100644 pay_equity/.ipynb_checkpoints/Pay_Gap_Reg-checkpoint.ipynb delete mode 100644 pay_equity/Pay_Gap_Reg.ipynb diff --git a/pay_equity/.ipynb_checkpoints/Pay_Gap_Reg-checkpoint.ipynb b/pay_equity/.ipynb_checkpoints/Pay_Gap_Reg-checkpoint.ipynb deleted file mode 100644 index 10ba745..0000000 --- a/pay_equity/.ipynb_checkpoints/Pay_Gap_Reg-checkpoint.ipynb +++ /dev/null @@ -1,748 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "cannot import name 'factorial'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdatasets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlinear_model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear_model\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mstatsmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mscipy\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mstats\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/statsmodels/api.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrobust\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mrobust\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrobust_linear_model\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mRLM\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m from .discrete.discrete_model import (Poisson, Logit, Probit,\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0mMNLogit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNegativeBinomial\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mGeneralizedPoisson\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/statsmodels/discrete/discrete_model.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mstatsmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbase\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ml1_slsqp\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mfit_l1_slsqp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mstatsmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdistributions\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mgenpoisson_p\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 46\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/statsmodels/distributions/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mempirical_distribution\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mECDF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmonotone_fn_inverter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStepFunction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0medgeworth\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mExpandedNormal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mdiscrete\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mgenpoisson_p\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mzipoisson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mzigenpoisson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mzinegbin\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/statsmodels/distributions/edgeworth.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpolynomial\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhermite_e\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mHermiteE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmisc\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mfactorial\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstats\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrv_continuous\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspecial\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mspecial\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mImportError\u001b[0m: cannot import name 'factorial'" - ] - } - ], - "source": [ - "# Import libraries\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn import linear_model\n", - "import matplotlib.pyplot as plt\n", - "from sklearn.metrics import r2_score, mean_squared_error\n", - "from sklearn.preprocessing import PolynomialFeatures\n", - "from sklearn import datasets, linear_model\n", - "from sklearn.linear_model import LinearRegression\n", - "import statsmodels.api as sm\n", - "from scipy import stats" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1000, 9)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Reading the data\n", - "\n", - "glassdoor_data = pd.read_csv(\"Glassdoor Gender Pay Gap.csv\")\n", - "glassdoor_data.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
JobTitleGenderAgePerfEvalEducationDeptSeniorityBasePayBonus
0Graphic DesignerFemale185CollegeOperations2423639938
1Software EngineerMale215CollegeManagement510847611128
2Warehouse AssociateFemale194PhDAdministration5902089268
3Software EngineerMale205MastersSales410808010154
4Graphic DesignerMale265MastersEngineering5994649319
\n", - "
" - ], - "text/plain": [ - " JobTitle Gender Age PerfEval Education Dept \\\n", - "0 Graphic Designer Female 18 5 College Operations \n", - "1 Software Engineer Male 21 5 College Management \n", - "2 Warehouse Associate Female 19 4 PhD Administration \n", - "3 Software Engineer Male 20 5 Masters Sales \n", - "4 Graphic Designer Male 26 5 Masters Engineering \n", - "\n", - " Seniority BasePay Bonus \n", - "0 2 42363 9938 \n", - "1 5 108476 11128 \n", - "2 5 90208 9268 \n", - "3 4 108080 10154 \n", - "4 5 99464 9319 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Check top 5 rows of the data\n", - "\n", - "glassdoor_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AgePerfEvalSeniorityBasePayBonus
count1000.0000001000.0000001000.0000001000.0000001000.000000
mean41.3930003.0370002.97100094472.6530006467.161000
std14.2948561.4239591.39502925337.4932722004.377365
min18.0000001.0000001.00000034208.0000001703.000000
25%29.0000002.0000002.00000076850.2500004849.500000
50%41.0000003.0000003.00000093327.5000006507.000000
75%54.2500004.0000004.000000111558.0000008026.000000
max65.0000005.0000005.000000179726.00000011293.000000
\n", - "
" - ], - "text/plain": [ - " Age PerfEval Seniority BasePay Bonus\n", - "count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000\n", - "mean 41.393000 3.037000 2.971000 94472.653000 6467.161000\n", - "std 14.294856 1.423959 1.395029 25337.493272 2004.377365\n", - "min 18.000000 1.000000 1.000000 34208.000000 1703.000000\n", - "25% 29.000000 2.000000 2.000000 76850.250000 4849.500000\n", - "50% 41.000000 3.000000 3.000000 93327.500000 6507.000000\n", - "75% 54.250000 4.000000 4.000000 111558.000000 8026.000000\n", - "max 65.000000 5.000000 5.000000 179726.000000 11293.000000" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Summary Stats of numerical variables\n", - "\n", - "glassdoor_data.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
JobTitleGenderEducationDept
count1000100010001000
unique10245
topMarketing AssociateMaleHigh SchoolOperations
freq118532265210
\n", - "
" - ], - "text/plain": [ - " JobTitle Gender Education Dept\n", - "count 1000 1000 1000 1000\n", - "unique 10 2 4 5\n", - "top Marketing Associate Male High School Operations\n", - "freq 118 532 265 210" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Summary stats of categorical variables\n", - "\n", - "glassdoor_data.describe(include=np.object)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([2, 5, 4, 3, 1])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Seniority has 5 discrete levels and can be converted to a categorical variable\n", - "\n", - "glassdoor_data.Seniority.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([5, 4, 3, 2, 1])" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# PerfEval has 5 discrete variables and can be converted to a categorical variable\n", - "\n", - "glassdoor_data.PerfEval.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
JobTitleAgePerfEvalEducationDeptSeniorityBasePayBonusTotalPayFemale
0Graphic Designer1851Operations2423639938523011
1Software Engineer2151Management5108476111281196040
2Warehouse Associate1941Administration5902089268994761
3Software Engineer2051Sales4108080101541182340
4Graphic Designer2651Engineering59946493191087830
\n", - "
" - ], - "text/plain": [ - " JobTitle Age PerfEval Education Dept Seniority \\\n", - "0 Graphic Designer 18 5 1 Operations 2 \n", - "1 Software Engineer 21 5 1 Management 5 \n", - "2 Warehouse Associate 19 4 1 Administration 5 \n", - "3 Software Engineer 20 5 1 Sales 4 \n", - "4 Graphic Designer 26 5 1 Engineering 5 \n", - "\n", - " BasePay Bonus TotalPay Female \n", - "0 42363 9938 52301 1 \n", - "1 108476 11128 119604 0 \n", - "2 90208 9268 99476 1 \n", - "3 108080 10154 118234 0 \n", - "4 99464 9319 108783 0 " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate total pay (add base and bonus)\n", - " \n", - "glassdoor_data['TotalPay'] = glassdoor_data['BasePay'] + glassdoor_data['Bonus']\n", - "\n", - "# Change Gender and Dept to numerical (Dummy Coding)\n", - "\n", - "gender_dummies = pd.get_dummies(glassdoor_data['Gender'])\n", - "dept_dummies = pd.get_dummies(glassdoor_data['Dept'])\n", - "glassdoor_data = glassdoor_data.join(gender_dummies)\n", - "del glassdoor_data['Gender']\n", - "del glassdoor_data['Male'] # We can delete this one because all the information is in Female (1 = Female, 0 = Male)\n", - "\n", - "# Switch from Education(College, High School, ...) to just College (or more) yes/no \n", - "\n", - "glassdoor_data['Education'] = np.where(glassdoor_data['Education'].isin(['College', 'Masters', 'PhD']), 1, 0)\n", - "\n", - "glassdoor_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Independent variables: ['Age' 'PerfEval' 'Education' 'Seniority' 'Female']\n", - "Train set: (800, 5) (800,)\n", - "Test set: (200, 5) (200,)\n" - ] - } - ], - "source": [ - "# Feature sets\n", - "\n", - "X = glassdoor_data.loc[:, ~glassdoor_data.columns.isin(['BasePay', 'Bonus', 'JobTitle', 'Dept', 'TotalPay'])].values\n", - "y = glassdoor_data['TotalPay'].values\n", - "\n", - "print('Independent variables: {}'.format(glassdoor_data.loc[:, ~glassdoor_data.columns.isin(['BasePay', 'Bonus', 'JobTitle', 'Dept', 'TotalPay'])].columns.values))\n", - "\n", - "# Split in train and test\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)\n", - "print ('Train set:', X_train.shape, y_train.shape)\n", - "print ('Test set:', X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Coefficients: [ 960.16959348 734.33619703 4664.8070167 9631.11189259\n", - " -10410.75370526]\n", - "Intercept: 31258.715819032033\n" - ] - } - ], - "source": [ - "# Model\n", - "\n", - "regr = linear_model.LinearRegression()\n", - "regr.fit (X_train, y_train)\n", - "\n", - "# The coefficients\n", - "\n", - "print (f'Coefficients: {regr.coef_}')\n", - "print (f'Intercept: {regr.intercept_}')" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean Squared Error (MSE): 277234107.3688191\n", - "R2 Score: 0.5895890743645553\n" - ] - } - ], - "source": [ - "# Evaluation of the predictions with the test data\n", - "\n", - "y_test_pred = regr.predict(X_test)\n", - "\n", - "print(f'Mean Squared Error (MSE): {mean_squared_error(y_test, y_test_pred)}')\n", - "print(f'R2 Score: {r2_score(y_test , y_test_pred)}')" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature: 0, Score: 960.16959\n", - "Feature: 1, Score: 734.33620\n", - "Feature: 2, Score: 4664.80702\n", - "Feature: 3, Score: 9631.11189\n", - "Feature: 4, Score: -10410.75371\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUBUlEQVR4nO3df6zd9X3f8eerdmBRuwwSPOLaZHYTp5JBq1uuCFKWagtJMFDFpEpTI424HYsTBUuJVKk1yx/JaJlo1zRTNkrlNFacLYvDQlKsxBk1FDWaNIIvwQMMYVwICFsO3AIJ2VKRmrz3x/l4OZh7/QH7nnNq3+dDOjrf7/vz+X7P+6sovPz9ce5JVSFJ0rH8zKQbkCT9/WdYSJK6DAtJUpdhIUnqMiwkSV1LJ93AqJx11lm1atWqSbchSSeVu++++2+qatnR9VM2LFatWsX09PSk25Ckk0qSx+eqexlKktS1IGGRZHuSp5LcP1R7bZI9SR5u72e2epJ8OslMknuT/MrQNpva/IeTbBqqn5/kvrbNp5NkIfqWJL08C3Vm8Tlg/VG1rcDtVbUGuL2tA1wCrGmvzcCNMAgX4OPAW4ALgI8fCZg25wND2x39WZKkEVqQsKiqbwLPHFXeAOxoyzuAy4fqn6+BO4EzkiwHLgb2VNUzVfUssAdY38ZeU1V31uBvk3x+aF+SpDEY5T2Ls6vqUFv+HnB2W14BPDE070CrHat+YI66JGlMxnKDu50RjPwvFibZnGQ6yfTs7OyoP06SFo1RhsWT7RIS7f2pVj8InDM0b2WrHau+co76S1TVtqqaqqqpZcte8piwJOk4jTIsdgFHnmjaBNwyVH9/eyrqQuAH7XLVrcC7kpzZbmy/C7i1jT2X5ML2FNT7h/YlSRqDBflSXpIvAv8cOCvJAQZPNV0P3JTkKuBx4H1t+m7gUmAG+BHw2wBV9UyS3wf2tnnXVtWRm+YfZvDE1auBb7SXpAW0auvXJ93Cgnns+ssm3cIpZ0HCoqqumGfoojnmFnD1PPvZDmyfoz4NnHciPUqSjp/f4JYkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqWukYZHkF5PsG3o9l+SjST6R5OBQ/dKhba5JMpPkoSQXD9XXt9pMkq2j7FuS9GIL8kt586mqh4B1AEmWAAeBrzL4KdVPVdUfD89PshbYCJwL/DxwW5I3t+EbgHcCB4C9SXZV1QOj7F+SNDDSsDjKRcAjVfV4kvnmbAB2VtXzwHeTzAAXtLGZqnoUIMnONtewkKQxGOc9i43AF4fWtyS5N8n2JGe22grgiaE5B1ptvvqLJNmcZDrJ9Ozs7MJ2L0mL2FjCIslpwLuB/9ZKNwJvZHCJ6hDwyYX4nKraVlVTVTW1bNmyhdilJInxXYa6BPh2VT0JcOQdIMlngK+11YPAOUPbrWw1jlGXJI3YuC5DXcHQJagky4fG3gPc35Z3ARuTnJ5kNbAGuAvYC6xJsrqdpWxscyVJYzDyM4skP8vgKaYPDpX/KMk6oIDHjoxV1f4kNzG4cX0YuLqqXmj72QLcCiwBtlfV/lH3LkkaGHlYVNX/BV53VO3KY8y/DrhujvpuYPeCNyhJ6vIb3JKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdY08LJI8luS+JPuSTLfaa5PsSfJwez+z1ZPk00lmktyb5FeG9rOpzX84yaZR9y1J+qlxnVn8i6paV1VTbX0rcHtVrQFub+sAlzD43e01wGbgRhiEC/Bx4C3ABcDHjwSMJGn0JnUZagOwoy3vAC4fqn++Bu4EzkiyHLgY2FNVz1TVs8AeYP2Ye5akRWscYVHAXya5O8nmVju7qg615e8BZ7flFcATQ9seaLX56i+SZHOS6STTs7OzC3kMkrSoLR3DZ/yzqjqY5B8De5J8Z3iwqipJLcQHVdU2YBvA1NTUguxTkjSGM4uqOtjenwK+yuCew5Pt8hLt/ak2/SBwztDmK1ttvrokaQxGemaR5GeBn6mqH7bldwHXAruATcD17f2WtskuYEuSnQxuZv+gqg4luRX4d0M3td8FXDPK3rU4rdr69Um3sGAeu/6ySbegU8ioL0OdDXw1yZHP+q9V9d+T7AVuSnIV8DjwvjZ/N3ApMAP8CPhtgKp6JsnvA3vbvGur6pkR9y5JakYaFlX1KPBLc9SfBi6ao17A1fPsazuwfaF7lCT1+Q1uSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1DWysEhyTpI7kjyQZH+Sj7T6J5IcTLKvvS4d2uaaJDNJHkpy8VB9favNJNk6qp4lSXMb5Y8fHQZ+p6q+neQfAncn2dPGPlVVfzw8OclaYCNwLvDzwG1J3tyGbwDeCRwA9ibZVVUPjLB3SdKQkYVFVR0CDrXlHyZ5EFhxjE02ADur6nngu0lmgAva2Ez71T3a73NvAAwLSRqTsdyzSLIK+GXgW620Jcm9SbYnObPVVgBPDG12oNXmq0uSxmTkYZHk54CbgY9W1XPAjcAbgXUMzjw+uYCftTnJdJLp2dnZhdqtJC16Iw2LJK9iEBRfqKqvAFTVk1X1QlX9BPgMP73UdBA4Z2jzla02X/0lqmpbVU1V1dSyZcsW9mAkaREb5dNQAT4LPFhVfzJUXz407T3A/W15F7AxyelJVgNrgLuAvcCaJKuTnMbgJviuUfUtSXqpUT4N9VbgSuC+JPta7d8AVyRZBxTwGPBBgKran+QmBjeuDwNXV9ULAEm2ALcCS4DtVbV/hH1Lko4yyqeh/geQOYZ2H2Ob64Dr5qjvPtZ2kqTR8hvckqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktQ1yr8NddJatfXrk25hwTx2/WWTbkHSKcAzC0lSl2cWeolT5czKsypp4XhmIUnqMiwkSV2GhSSp66QJiyTrkzyUZCbJ1kn3I0mLyUkRFkmWADcAlwBrGfw069rJdiVJi8dJERbABcBMVT1aVT8GdgIbJtyTJC0aqapJ99CV5L3A+qr61239SuAtVbXlqHmbgc0Ab3jDG85//PHHx96rpJPTqfLIOJzYY+NJ7q6qqaPrJ8uZxctSVduqaqqqppYtWzbpdiTplHGyhMVB4Jyh9ZWtJkkag5MlLPYCa5KsTnIasBHYNeGeJGnROCn+3EdVHU6yBbgVWAJsr6r9E25LkhaNkyIsAKpqN7B70n1I0mJ0slyGkiRNkGEhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSukYSFkn+fZLvJLk3yVeTnNHqq5L8bZJ97fVnQ9ucn+S+JDNJPp0krf7aJHuSPNzezxxFz5Kk+Y3qzGIPcF5V/VPgfwPXDI09UlXr2utDQ/UbgQ8Aa9prfatvBW6vqjXA7W1dkjRGIwmLqvrLqjrcVu8EVh5rfpLlwGuq6s6qKuDzwOVteAOwoy3vGKpLksZkHPcs/hXwjaH11UnuSfLXSd7WaiuAA0NzDrQawNlVdagtfw84e74PSrI5yXSS6dnZ2QVqX5K09Hg3THIb8Po5hj5WVbe0OR8DDgNfaGOHgDdU1dNJzgf+Ism5L/czq6qS1DHGtwHbAKampuadJ0l6ZY47LKrqHccaT/JbwK8BF7VLS1TV88DzbfnuJI8AbwYO8uJLVStbDeDJJMur6lC7XPXU8fYsSTo+o3oaaj3wu8C7q+pHQ/VlSZa05V9gcCP70XaZ6bkkF7anoN4P3NI22wVsasubhuqSpDE57jOLjv8EnA7saU/A3tmefPpV4Nokfwf8BPhQVT3Ttvkw8Dng1QzucRy5z3E9cFOSq4DHgfeNqGdJ0jxGEhZV9aZ56jcDN88zNg2cN0f9aeCiBW1QkvSK+A1uSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1DWysEjyiSQHk+xrr0uHxq5JMpPkoSQXD9XXt9pMkq1D9dVJvtXqX0py2qj6liS91KjPLD5VVevaazdAkrXARuBcYD3wp0mWtJ9bvQG4BFgLXNHmAvxh29ebgGeBq0bctyRpyCQuQ20AdlbV81X1XWAGuKC9Zqrq0ar6MbAT2NB+k/vtwJfb9juAy8fftiQtXqMOiy1J7k2yPcmZrbYCeGJozoFWm6/+OuD7VXX4qLokaUxOKCyS3Jbk/jleG4AbgTcC64BDwCdPvN1uP5uTTCeZnp2dHfXHSdKisfRENq6qd7yceUk+A3ytrR4EzhkaXtlqzFN/GjgjydJ2djE8/+h+tgHbAKampuplHoYkqWOUT0MtH1p9D3B/W94FbExyepLVwBrgLmAvsKY9+XQag5vgu6qqgDuA97btNwG3jKpvSdJLndCZRccfJVkHFPAY8EGAqtqf5CbgAeAwcHVVvQCQZAtwK7AE2F5V+9u+fg/YmeQPgHuAz46wb0nSUUYWFlV15THGrgOum6O+G9g9R/1RBk9LSZImwG9wS5K6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUNZKwSPKlJPva67Ek+1p9VZK/HRr7s6Ftzk9yX5KZJJ9OklZ/bZI9SR5u72eOomdJ0vxGEhZV9ZtVta6q1gE3A18ZGn7kyFhVfWiofiPwAQa/yb0GWN/qW4Hbq2oNcHtblySN0UgvQ7Wzg/cBX+zMWw68pqrurKoCPg9c3oY3ADva8o6huiRpTEZ9z+JtwJNV9fBQbXWSe5L8dZK3tdoK4MDQnAOtBnB2VR1qy98Dzp7vw5JsTjKdZHp2dnaBDkGStPR4N0xyG/D6OYY+VlW3tOUrePFZxSHgDVX1dJLzgb9Icu7L/cyqqiR1jPFtwDaAqampeedJkl6Z4w6LqnrHscaTLAV+HTh/aJvngefb8t1JHgHeDBwEVg5tvrLVAJ5MsryqDrXLVU8db8+SpOMzystQ7wC+U1X///JSkmVJlrTlX2BwI/vRdpnpuSQXtvsc7weOnJ3sAja15U1DdUnSmBz3mcXLsJGX3tj+VeDaJH8H/AT4UFU908Y+DHwOeDXwjfYCuB64KclVwOMMbphLksZoZGFRVb81R+1mBo/SzjV/GjhvjvrTwEUL3Z8k6eXzG9ySpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHWdUFgk+Y0k+5P8JMnUUWPXJJlJ8lCSi4fq61ttJsnWofrqJN9q9S8lOa3VT2/rM2181Yn0LEl65U70zOJ+4NeBbw4Xk6xl8LOq5wLrgT9NsqT9/vYNwCXAWuCKNhfgD4FPVdWbgGeBq1r9KuDZVv9UmydJGqMTCouqerCqHppjaAOws6qer6rvAjPABe01U1WPVtWPgZ3AhiQB3g58uW2/A7h8aF872vKXgYvafEnSmIzqnsUK4Imh9QOtNl/9dcD3q+rwUfUX7auN/6DNf4kkm5NMJ5menZ1doEORJC3tTUhyG/D6OYY+VlW3LHxLx6+qtgHbAKampmrC7UjSKaMbFlX1juPY70HgnKH1la3GPPWngTOSLG1nD8Pzj+zrQJKlwD9q8yVJYzKqy1C7gI3tSabVwBrgLmAvsKY9+XQag5vgu6qqgDuA97btNwG3DO1rU1t+L/BXbb4kaUy6ZxbHkuQ9wH8ElgFfT7Kvqi6uqv1JbgIeAA4DV1fVC22bLcCtwBJge1Xtb7v7PWBnkj8A7gE+2+qfBf5zkhngGQYBI0kL6rHrL5t0C3+v5VT9R/rU1FRNT09Pug1JOqkkubuqpo6u+w1uSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkrpO2S/lJZkFHp90Hx1nAX8z6SYmxGNfvBbz8Z8Mx/5PqmrZ0cVTNixOBkmm5/qm5GLgsS/OY4fFffwn87F7GUqS1GVYSJK6DIvJ2jbpBibIY1+8FvPxn7TH7j0LSVKXZxaSpC7DQpLUZVhMQJL1SR5KMpNk66T7Gack25M8leT+SfcybknOSXJHkgeS7E/ykUn3NC5J/kGSu5L8r3bs/3bSPY1bkiVJ7knytUn3cjwMizFLsgS4AbgEWAtckWTtZLsaq88B6yfdxIQcBn6nqtYCFwJXL6L/7Z8H3l5VvwSsA9YnuXCyLY3dR4AHJ93E8TIsxu8CYKaqHq2qHwM7gQ0T7mlsquqbDH5LfdGpqkNV9e22/EMG/+FYMdmuxqMG/k9bfVV7LZqna5KsBC4D/nzSvRwvw2L8VgBPDK0fYJH8B0M/lWQV8MvAtybcyti0yzD7gKeAPVW1aI4d+A/A7wI/mXAfx82wkMYsyc8BNwMfrarnJt3PuFTVC1W1DlgJXJDkvAm3NBZJfg14qqrunnQvJ8KwGL+DwDlD6ytbTYtAklcxCIovVNVXJt3PJFTV94E7WDz3rt4KvDvJYwwuO789yX+ZbEuvnGExfnuBNUlWJzkN2AjsmnBPGoMkAT4LPFhVfzLpfsYpybIkZ7TlVwPvBL4z0abGpKquqaqVVbWKwf/f/6qq/uWE23rFDIsxq6rDwBbgVgY3OG+qqv2T7Wp8knwR+J/ALyY5kOSqSfc0Rm8FrmTwL8t97XXppJsak+XAHUnuZfAPpj1VdVI+QrpY+ec+JEldnllIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqSu/wcfTuSq+bwGyAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# linear regression feature importance\n", - "from sklearn.datasets import make_regression\n", - "from sklearn.linear_model import LinearRegression\n", - "from matplotlib import pyplot\n", - "\n", - "# get importance\n", - "importance = regr.coef_\n", - "# summarize feature importance\n", - "for i,v in enumerate(importance):\n", - "\tprint('Feature: %0d, Score: %.5f' % (i,v))\n", - "# plot feature importance\n", - "pyplot.bar([x for x in range(len(importance))], importance)\n", - "pyplot.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/pay_equity/Pay_Gap_Reg.ipynb b/pay_equity/Pay_Gap_Reg.ipynb deleted file mode 100644 index 53accfc..0000000 --- a/pay_equity/Pay_Gap_Reg.ipynb +++ /dev/null @@ -1,748 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "cannot import name 'factorial'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdatasets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlinear_model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear_model\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mstatsmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mscipy\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mstats\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/statsmodels/api.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrobust\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mrobust\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrobust_linear_model\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mRLM\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m from .discrete.discrete_model import (Poisson, Logit, Probit,\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0mMNLogit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNegativeBinomial\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mGeneralizedPoisson\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/statsmodels/discrete/discrete_model.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mstatsmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbase\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ml1_slsqp\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mfit_l1_slsqp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mstatsmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdistributions\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mgenpoisson_p\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 46\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/statsmodels/distributions/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mempirical_distribution\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mECDF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmonotone_fn_inverter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStepFunction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0medgeworth\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mExpandedNormal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mdiscrete\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mgenpoisson_p\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mzipoisson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mzigenpoisson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mzinegbin\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/statsmodels/distributions/edgeworth.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpolynomial\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhermite_e\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mHermiteE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmisc\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mfactorial\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstats\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrv_continuous\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspecial\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mspecial\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mImportError\u001b[0m: cannot import name 'factorial'" - ] - } - ], - "source": [ - "# Import libraries\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn import linear_model\n", - "import matplotlib.pyplot as plt\n", - "from sklearn.metrics import r2_score, mean_squared_error\n", - "from sklearn.preprocessing import PolynomialFeatures\n", - "from sklearn import datasets, linear_model\n", - "from sklearn.linear_model import LinearRegression\n", - "import statsmodels.api as sm\n", - "from scipy import stats" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1000, 9)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Reading the data\n", - "\n", - "glassdoor_data = pd.read_csv(\"Glassdoor Gender Pay Gap.csv\")\n", - "glassdoor_data.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
JobTitleGenderAgePerfEvalEducationDeptSeniorityBasePayBonus
0Graphic DesignerFemale185CollegeOperations2423639938
1Software EngineerMale215CollegeManagement510847611128
2Warehouse AssociateFemale194PhDAdministration5902089268
3Software EngineerMale205MastersSales410808010154
4Graphic DesignerMale265MastersEngineering5994649319
\n", - "
" - ], - "text/plain": [ - " JobTitle Gender Age PerfEval Education Dept \\\n", - "0 Graphic Designer Female 18 5 College Operations \n", - "1 Software Engineer Male 21 5 College Management \n", - "2 Warehouse Associate Female 19 4 PhD Administration \n", - "3 Software Engineer Male 20 5 Masters Sales \n", - "4 Graphic Designer Male 26 5 Masters Engineering \n", - "\n", - " Seniority BasePay Bonus \n", - "0 2 42363 9938 \n", - "1 5 108476 11128 \n", - "2 5 90208 9268 \n", - "3 4 108080 10154 \n", - "4 5 99464 9319 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Check top 5 rows of the data\n", - "\n", - "glassdoor_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AgePerfEvalSeniorityBasePayBonus
count1000.0000001000.0000001000.0000001000.0000001000.000000
mean41.3930003.0370002.97100094472.6530006467.161000
std14.2948561.4239591.39502925337.4932722004.377365
min18.0000001.0000001.00000034208.0000001703.000000
25%29.0000002.0000002.00000076850.2500004849.500000
50%41.0000003.0000003.00000093327.5000006507.000000
75%54.2500004.0000004.000000111558.0000008026.000000
max65.0000005.0000005.000000179726.00000011293.000000
\n", - "
" - ], - "text/plain": [ - " Age PerfEval Seniority BasePay Bonus\n", - "count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000\n", - "mean 41.393000 3.037000 2.971000 94472.653000 6467.161000\n", - "std 14.294856 1.423959 1.395029 25337.493272 2004.377365\n", - "min 18.000000 1.000000 1.000000 34208.000000 1703.000000\n", - "25% 29.000000 2.000000 2.000000 76850.250000 4849.500000\n", - "50% 41.000000 3.000000 3.000000 93327.500000 6507.000000\n", - "75% 54.250000 4.000000 4.000000 111558.000000 8026.000000\n", - "max 65.000000 5.000000 5.000000 179726.000000 11293.000000" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Summary Stats of numerical variables\n", - "\n", - "glassdoor_data.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
JobTitleGenderEducationDept
count1000100010001000
unique10245
topMarketing AssociateMaleHigh SchoolOperations
freq118532265210
\n", - "
" - ], - "text/plain": [ - " JobTitle Gender Education Dept\n", - "count 1000 1000 1000 1000\n", - "unique 10 2 4 5\n", - "top Marketing Associate Male High School Operations\n", - "freq 118 532 265 210" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Summary stats of categorical variables\n", - "\n", - "glassdoor_data.describe(include=np.object)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([2, 5, 4, 3, 1])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Seniority has 5 discrete levels and can be converted to a categorical variable\n", - "\n", - "glassdoor_data.Seniority.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([5, 4, 3, 2, 1])" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# PerfEval has 5 discrete variables and can be converted to a categorical variable\n", - "\n", - "glassdoor_data.PerfEval.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
JobTitleAgePerfEvalEducationDeptSeniorityBasePayBonusTotalPayFemale
0Graphic Designer1851Operations2423639938523011
1Software Engineer2151Management5108476111281196040
2Warehouse Associate1941Administration5902089268994761
3Software Engineer2051Sales4108080101541182340
4Graphic Designer2651Engineering59946493191087830
\n", - "
" - ], - "text/plain": [ - " JobTitle Age PerfEval Education Dept Seniority \\\n", - "0 Graphic Designer 18 5 1 Operations 2 \n", - "1 Software Engineer 21 5 1 Management 5 \n", - "2 Warehouse Associate 19 4 1 Administration 5 \n", - "3 Software Engineer 20 5 1 Sales 4 \n", - "4 Graphic Designer 26 5 1 Engineering 5 \n", - "\n", - " BasePay Bonus TotalPay Female \n", - "0 42363 9938 52301 1 \n", - "1 108476 11128 119604 0 \n", - "2 90208 9268 99476 1 \n", - "3 108080 10154 118234 0 \n", - "4 99464 9319 108783 0 " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate total pay (add base and bonus)\n", - " \n", - "glassdoor_data['TotalPay'] = glassdoor_data['BasePay'] + glassdoor_data['Bonus']\n", - "\n", - "# Change Gender and Dept to numerical (Dummy Coding)\n", - "\n", - "gender_dummies = pd.get_dummies(glassdoor_data['Gender'])\n", - "dept_dummies = pd.get_dummies(glassdoor_data['Dept'])\n", - "glassdoor_data = glassdoor_data.join(gender_dummies)\n", - "del glassdoor_data['Gender']\n", - "del glassdoor_data['Male'] # We can delete this one because all the information is in Female (1 = Female, 0 = Male)\n", - "\n", - "# Switch from Education(College, High School, ...) to just College (or more) yes/no \n", - "\n", - "glassdoor_data['Education'] = np.where(glassdoor_data['Education'].isin(['College', 'Masters', 'PhD']), 1, 0)\n", - "\n", - "glassdoor_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Independent variables: ['Age' 'PerfEval' 'Education' 'Seniority' 'Female']\n", - "Train set: (800, 5) (800,)\n", - "Test set: (200, 5) (200,)\n" - ] - } - ], - "source": [ - "# Feature sets\n", - "\n", - "X = glassdoor_data.loc[:, ~glassdoor_data.columns.isin(['BasePay', 'Bonus', 'JobTitle', 'Dept', 'TotalPay'])].values\n", - "y = glassdoor_data['TotalPay'].values\n", - "\n", - "print('Independent variables: {}'.format(glassdoor_data.loc[:, ~glassdoor_data.columns.isin(['BasePay', 'Bonus', 'JobTitle', 'Dept', 'TotalPay'])].columns.values))\n", - "\n", - "# Split in train and test\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)\n", - "print ('Train set:', X_train.shape, y_train.shape)\n", - "print ('Test set:', X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Coefficients: [ 960.16959348 734.33619703 4664.8070167 9631.11189259\n", - " -10410.75370526]\n", - "Intercept: 31258.715819032033\n" - ] - } - ], - "source": [ - "# Model\n", - "\n", - "regr = linear_model.LinearRegression()\n", - "regr.fit (X_train, y_train)\n", - "\n", - "# The coefficients\n", - "\n", - "print (f'Coefficients: {regr.coef_}')\n", - "print (f'Intercept: {regr.intercept_}')" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean Squared Error (MSE): 277234107.3688191\n", - "R2 Score: 0.5895890743645553\n" - ] - } - ], - "source": [ - "# Evaluation of the predictions with the test data\n", - "\n", - "y_test_pred = regr.predict(X_test)\n", - "\n", - "print(f'Mean Squared Error (MSE): {mean_squared_error(y_test, y_test_pred)}')\n", - "print(f'R2 Score: {r2_score(y_test , y_test_pred)}')" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature: 0, Score: 960.16959\n", - "Feature: 1, Score: 734.33620\n", - "Feature: 2, Score: 4664.80702\n", - "Feature: 3, Score: 9631.11189\n", - "Feature: 4, Score: -10410.75371\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUBUlEQVR4nO3df6zd9X3f8eerdmBRuwwSPOLaZHYTp5JBq1uuCFKWagtJMFDFpEpTI424HYsTBUuJVKk1yx/JaJlo1zRTNkrlNFacLYvDQlKsxBk1FDWaNIIvwQMMYVwICFsO3AIJ2VKRmrz3x/l4OZh7/QH7nnNq3+dDOjrf7/vz+X7P+6sovPz9ce5JVSFJ0rH8zKQbkCT9/WdYSJK6DAtJUpdhIUnqMiwkSV1LJ93AqJx11lm1atWqSbchSSeVu++++2+qatnR9VM2LFatWsX09PSk25Ckk0qSx+eqexlKktS1IGGRZHuSp5LcP1R7bZI9SR5u72e2epJ8OslMknuT/MrQNpva/IeTbBqqn5/kvrbNp5NkIfqWJL08C3Vm8Tlg/VG1rcDtVbUGuL2tA1wCrGmvzcCNMAgX4OPAW4ALgI8fCZg25wND2x39WZKkEVqQsKiqbwLPHFXeAOxoyzuAy4fqn6+BO4EzkiwHLgb2VNUzVfUssAdY38ZeU1V31uBvk3x+aF+SpDEY5T2Ls6vqUFv+HnB2W14BPDE070CrHat+YI66JGlMxnKDu50RjPwvFibZnGQ6yfTs7OyoP06SFo1RhsWT7RIS7f2pVj8InDM0b2WrHau+co76S1TVtqqaqqqpZcte8piwJOk4jTIsdgFHnmjaBNwyVH9/eyrqQuAH7XLVrcC7kpzZbmy/C7i1jT2X5ML2FNT7h/YlSRqDBflSXpIvAv8cOCvJAQZPNV0P3JTkKuBx4H1t+m7gUmAG+BHw2wBV9UyS3wf2tnnXVtWRm+YfZvDE1auBb7SXpAW0auvXJ93Cgnns+ssm3cIpZ0HCoqqumGfoojnmFnD1PPvZDmyfoz4NnHciPUqSjp/f4JYkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqWukYZHkF5PsG3o9l+SjST6R5OBQ/dKhba5JMpPkoSQXD9XXt9pMkq2j7FuS9GIL8kt586mqh4B1AEmWAAeBrzL4KdVPVdUfD89PshbYCJwL/DxwW5I3t+EbgHcCB4C9SXZV1QOj7F+SNDDSsDjKRcAjVfV4kvnmbAB2VtXzwHeTzAAXtLGZqnoUIMnONtewkKQxGOc9i43AF4fWtyS5N8n2JGe22grgiaE5B1ptvvqLJNmcZDrJ9Ozs7MJ2L0mL2FjCIslpwLuB/9ZKNwJvZHCJ6hDwyYX4nKraVlVTVTW1bNmyhdilJInxXYa6BPh2VT0JcOQdIMlngK+11YPAOUPbrWw1jlGXJI3YuC5DXcHQJagky4fG3gPc35Z3ARuTnJ5kNbAGuAvYC6xJsrqdpWxscyVJYzDyM4skP8vgKaYPDpX/KMk6oIDHjoxV1f4kNzG4cX0YuLqqXmj72QLcCiwBtlfV/lH3LkkaGHlYVNX/BV53VO3KY8y/DrhujvpuYPeCNyhJ6vIb3JKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdY08LJI8luS+JPuSTLfaa5PsSfJwez+z1ZPk00lmktyb5FeG9rOpzX84yaZR9y1J+qlxnVn8i6paV1VTbX0rcHtVrQFub+sAlzD43e01wGbgRhiEC/Bx4C3ABcDHjwSMJGn0JnUZagOwoy3vAC4fqn++Bu4EzkiyHLgY2FNVz1TVs8AeYP2Ye5akRWscYVHAXya5O8nmVju7qg615e8BZ7flFcATQ9seaLX56i+SZHOS6STTs7OzC3kMkrSoLR3DZ/yzqjqY5B8De5J8Z3iwqipJLcQHVdU2YBvA1NTUguxTkjSGM4uqOtjenwK+yuCew5Pt8hLt/ak2/SBwztDmK1ttvrokaQxGemaR5GeBn6mqH7bldwHXAruATcD17f2WtskuYEuSnQxuZv+gqg4luRX4d0M3td8FXDPK3rU4rdr69Um3sGAeu/6ySbegU8ioL0OdDXw1yZHP+q9V9d+T7AVuSnIV8DjwvjZ/N3ApMAP8CPhtgKp6JsnvA3vbvGur6pkR9y5JakYaFlX1KPBLc9SfBi6ao17A1fPsazuwfaF7lCT1+Q1uSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1DWysEhyTpI7kjyQZH+Sj7T6J5IcTLKvvS4d2uaaJDNJHkpy8VB9favNJNk6qp4lSXMb5Y8fHQZ+p6q+neQfAncn2dPGPlVVfzw8OclaYCNwLvDzwG1J3tyGbwDeCRwA9ibZVVUPjLB3SdKQkYVFVR0CDrXlHyZ5EFhxjE02ADur6nngu0lmgAva2Ez71T3a73NvAAwLSRqTsdyzSLIK+GXgW620Jcm9SbYnObPVVgBPDG12oNXmq0uSxmTkYZHk54CbgY9W1XPAjcAbgXUMzjw+uYCftTnJdJLp2dnZhdqtJC16Iw2LJK9iEBRfqKqvAFTVk1X1QlX9BPgMP73UdBA4Z2jzla02X/0lqmpbVU1V1dSyZcsW9mAkaREb5dNQAT4LPFhVfzJUXz407T3A/W15F7AxyelJVgNrgLuAvcCaJKuTnMbgJviuUfUtSXqpUT4N9VbgSuC+JPta7d8AVyRZBxTwGPBBgKran+QmBjeuDwNXV9ULAEm2ALcCS4DtVbV/hH1Lko4yyqeh/geQOYZ2H2Ob64Dr5qjvPtZ2kqTR8hvckqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktQ1yr8NddJatfXrk25hwTx2/WWTbkHSKcAzC0lSl2cWeolT5czKsypp4XhmIUnqMiwkSV2GhSSp66QJiyTrkzyUZCbJ1kn3I0mLyUkRFkmWADcAlwBrGfw069rJdiVJi8dJERbABcBMVT1aVT8GdgIbJtyTJC0aqapJ99CV5L3A+qr61239SuAtVbXlqHmbgc0Ab3jDG85//PHHx96rpJPTqfLIOJzYY+NJ7q6qqaPrJ8uZxctSVduqaqqqppYtWzbpdiTplHGyhMVB4Jyh9ZWtJkkag5MlLPYCa5KsTnIasBHYNeGeJGnROCn+3EdVHU6yBbgVWAJsr6r9E25LkhaNkyIsAKpqN7B70n1I0mJ0slyGkiRNkGEhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSukYSFkn+fZLvJLk3yVeTnNHqq5L8bZJ97fVnQ9ucn+S+JDNJPp0krf7aJHuSPNzezxxFz5Kk+Y3qzGIPcF5V/VPgfwPXDI09UlXr2utDQ/UbgQ8Aa9prfatvBW6vqjXA7W1dkjRGIwmLqvrLqjrcVu8EVh5rfpLlwGuq6s6qKuDzwOVteAOwoy3vGKpLksZkHPcs/hXwjaH11UnuSfLXSd7WaiuAA0NzDrQawNlVdagtfw84e74PSrI5yXSS6dnZ2QVqX5K09Hg3THIb8Po5hj5WVbe0OR8DDgNfaGOHgDdU1dNJzgf+Ism5L/czq6qS1DHGtwHbAKampuadJ0l6ZY47LKrqHccaT/JbwK8BF7VLS1TV88DzbfnuJI8AbwYO8uJLVStbDeDJJMur6lC7XPXU8fYsSTo+o3oaaj3wu8C7q+pHQ/VlSZa05V9gcCP70XaZ6bkkF7anoN4P3NI22wVsasubhuqSpDE57jOLjv8EnA7saU/A3tmefPpV4Nokfwf8BPhQVT3Ttvkw8Dng1QzucRy5z3E9cFOSq4DHgfeNqGdJ0jxGEhZV9aZ56jcDN88zNg2cN0f9aeCiBW1QkvSK+A1uSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1DWysEjyiSQHk+xrr0uHxq5JMpPkoSQXD9XXt9pMkq1D9dVJvtXqX0py2qj6liS91KjPLD5VVevaazdAkrXARuBcYD3wp0mWtJ9bvQG4BFgLXNHmAvxh29ebgGeBq0bctyRpyCQuQ20AdlbV81X1XWAGuKC9Zqrq0ar6MbAT2NB+k/vtwJfb9juAy8fftiQtXqMOiy1J7k2yPcmZrbYCeGJozoFWm6/+OuD7VXX4qLokaUxOKCyS3Jbk/jleG4AbgTcC64BDwCdPvN1uP5uTTCeZnp2dHfXHSdKisfRENq6qd7yceUk+A3ytrR4EzhkaXtlqzFN/GjgjydJ2djE8/+h+tgHbAKampuplHoYkqWOUT0MtH1p9D3B/W94FbExyepLVwBrgLmAvsKY9+XQag5vgu6qqgDuA97btNwG3jKpvSdJLndCZRccfJVkHFPAY8EGAqtqf5CbgAeAwcHVVvQCQZAtwK7AE2F5V+9u+fg/YmeQPgHuAz46wb0nSUUYWFlV15THGrgOum6O+G9g9R/1RBk9LSZImwG9wS5K6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUNZKwSPKlJPva67Ek+1p9VZK/HRr7s6Ftzk9yX5KZJJ9OklZ/bZI9SR5u72eOomdJ0vxGEhZV9ZtVta6q1gE3A18ZGn7kyFhVfWiofiPwAQa/yb0GWN/qW4Hbq2oNcHtblySN0UgvQ7Wzg/cBX+zMWw68pqrurKoCPg9c3oY3ADva8o6huiRpTEZ9z+JtwJNV9fBQbXWSe5L8dZK3tdoK4MDQnAOtBnB2VR1qy98Dzp7vw5JsTjKdZHp2dnaBDkGStPR4N0xyG/D6OYY+VlW3tOUrePFZxSHgDVX1dJLzgb9Icu7L/cyqqiR1jPFtwDaAqampeedJkl6Z4w6LqnrHscaTLAV+HTh/aJvngefb8t1JHgHeDBwEVg5tvrLVAJ5MsryqDrXLVU8db8+SpOMzystQ7wC+U1X///JSkmVJlrTlX2BwI/vRdpnpuSQXtvsc7weOnJ3sAja15U1DdUnSmBz3mcXLsJGX3tj+VeDaJH8H/AT4UFU908Y+DHwOeDXwjfYCuB64KclVwOMMbphLksZoZGFRVb81R+1mBo/SzjV/GjhvjvrTwEUL3Z8k6eXzG9ySpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHWdUFgk+Y0k+5P8JMnUUWPXJJlJ8lCSi4fq61ttJsnWofrqJN9q9S8lOa3VT2/rM2181Yn0LEl65U70zOJ+4NeBbw4Xk6xl8LOq5wLrgT9NsqT9/vYNwCXAWuCKNhfgD4FPVdWbgGeBq1r9KuDZVv9UmydJGqMTCouqerCqHppjaAOws6qer6rvAjPABe01U1WPVtWPgZ3AhiQB3g58uW2/A7h8aF872vKXgYvafEnSmIzqnsUK4Imh9QOtNl/9dcD3q+rwUfUX7auN/6DNf4kkm5NMJ5menZ1doEORJC3tTUhyG/D6OYY+VlW3LHxLx6+qtgHbAKampmrC7UjSKaMbFlX1juPY70HgnKH1la3GPPWngTOSLG1nD8Pzj+zrQJKlwD9q8yVJYzKqy1C7gI3tSabVwBrgLmAvsKY9+XQag5vgu6qqgDuA97btNwG3DO1rU1t+L/BXbb4kaUy6ZxbHkuQ9wH8ElgFfT7Kvqi6uqv1JbgIeAA4DV1fVC22bLcCtwBJge1Xtb7v7PWBnkj8A7gE+2+qfBf5zkhngGQYBI0kL6rHrL5t0C3+v5VT9R/rU1FRNT09Pug1JOqkkubuqpo6u+w1uSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkrpO2S/lJZkFHp90Hx1nAX8z6SYmxGNfvBbz8Z8Mx/5PqmrZ0cVTNixOBkmm5/qm5GLgsS/OY4fFffwn87F7GUqS1GVYSJK6DIvJ2jbpBibIY1+8FvPxn7TH7j0LSVKXZxaSpC7DQpLUZVhMQJL1SR5KMpNk66T7Gack25M8leT+SfcybknOSXJHkgeS7E/ykUn3NC5J/kGSu5L8r3bs/3bSPY1bkiVJ7knytUn3cjwMizFLsgS4AbgEWAtckWTtZLsaq88B6yfdxIQcBn6nqtYCFwJXL6L/7Z8H3l5VvwSsA9YnuXCyLY3dR4AHJ93E8TIsxu8CYKaqHq2qHwM7gQ0T7mlsquqbDH5LfdGpqkNV9e22/EMG/+FYMdmuxqMG/k9bfVV7LZqna5KsBC4D/nzSvRwvw2L8VgBPDK0fYJH8B0M/lWQV8MvAtybcyti0yzD7gKeAPVW1aI4d+A/A7wI/mXAfx82wkMYsyc8BNwMfrarnJt3PuFTVC1W1DlgJXJDkvAm3NBZJfg14qqrunnQvJ8KwGL+DwDlD6ytbTYtAklcxCIovVNVXJt3PJFTV94E7WDz3rt4KvDvJYwwuO789yX+ZbEuvnGExfnuBNUlWJzkN2AjsmnBPGoMkAT4LPFhVfzLpfsYpybIkZ7TlVwPvBL4z0abGpKquqaqVVbWKwf/f/6qq/uWE23rFDIsxq6rDwBbgVgY3OG+qqv2T7Wp8knwR+J/ALyY5kOSqSfc0Rm8FrmTwL8t97XXppJsak+XAHUnuZfAPpj1VdVI+QrpY+ec+JEldnllIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqSu/wcfTuSq+bwGyAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# linear regression feature importance\n", - "from sklearn.datasets import make_regression\n", - "from sklearn.linear_model import LinearRegression\n", - "from matplotlib import pyplot\n", - "\n", - "# get importance\n", - "importance = regr.coef_\n", - "# summarize feature importance\n", - "for i,v in enumerate(importance):\n", - "\tprint('Feature: %0d, Score: %.5f' % (i,v))\n", - "# plot feature importance\n", - "pyplot.bar([x for x in range(len(importance))], importance)\n", - "pyplot.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}