-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
150 lines (95 loc) · 4.16 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import numpy as np
import pandas as pd
import scipy
from scipy.optimize import linprog
# Read in the data
county_education = pd.read_excel('data/Education.xls', skiprows=(0, 1, 2, 3), usecols=(0, 5, 6, 43, 44, 45, 46))
# Column 0 is the FIPS code, 19 is the most recent population estimate
population_estimation = pd.read_excel('data/PopulationEstimates.xls', skiprows=(0, 1), usecols=(0, 19))
# Rename column so that the merge works correctly
population_estimation = population_estimation.rename({'FIPStxt': 'FIPS Code'}, axis='columns')
# Column 0 is the FIPS code
# 85 is the most recent unemployment rate, 86 is the percent of median income compared to average
unemployment = pd.read_excel('data/Unemployment.xls', skiprows=(0, 1, 2, 3), usecols=(0, 85, 87))
# Rename column so that the merge works correctly
unemployment = unemployment.rename({'fips_txt': 'FIPS Code'}, axis='columns')
# Column 0 is the FIPS code
# 10 is percent of people in poverty
poverty = pd.read_excel('data/PovertyEstimates.xls', skiprows=(0, 1, 2, 3), usecols=(0, 10))
# Rename column so that the merge works correctly
poverty = poverty.rename({'FIPStxt': 'FIPS Code'}, axis='columns')
# Get mask use data
mask_use = pd.read_csv('data/mask-use-by-county.csv')
# Rename column so that the merge works correctly
mask_use = mask_use.rename({'FIPS': 'FIPS Code'}, axis='columns')
# Get cases and death information
case_info = pd.read_csv('data/us-counties-covid-death-July.csv')
# Rename column so that the merge works correctly
case_info = case_info.rename({'fips': 'FIPS Code'}, axis='columns')
# Now sum to get the total number of cases and deaths for each county
case_info_totals = case_info.groupby(['FIPS Code']).sum()
# Merge all data with mask use data by the County FIPS code
data = mask_use.merge(county_education, on='FIPS Code', how='inner')
data = data.merge(population_estimation, on='FIPS Code', how='inner')
data = data.merge(unemployment, on='FIPS Code', how='inner')
data = data.merge(case_info_totals, on='FIPS Code', how='inner')
data = data.drop('FIPS Code', axis=1)
# Turn Data into a numpy array
data_array = data.to_numpy()
#Create an empty list to hold the Data Envelopment Scores.
dea_scores = []
# Class that takes in a data set and has a method
# that returns the efficiency score of a given row.
class DEA:
def __init__(self,data):
self.data = data
def get_coef_matrix(self):
matrix = []
for i in range(self.data.shape[0]):
new_row = []
for j in range(self.data.shape[1]-2):
x = -self.data[i,j]
new_row.append(x)
new_row.append(self.data[i,14])
new_row.append(self.data[i,15])
matrix.append(new_row)
new_matrix = np.array(matrix)
return new_matrix
def compute_score(self,row):
c_vec = []
for i in range(self.data.shape[1]-2):
c_vec.append(self.data[row,i])
c_vec.append(-self.data[row,14])
c_vec.append(-self.data[row,15])
c_vec = np.array(c_vec)
A_matrix = self.get_coef_matrix()
v_matrix = []
for i in range(self.data.shape[1]-2):
v_matrix.append(self.data[row,i])
v_matrix.append(0)
v_matrix.append(0)
v_matrix = np.array(v_matrix)
v_matrix =v_matrix.reshape((1,16))
zero_vec = [0 for i in range(self.data.shape[0])]
zero_vec = np.array(zero_vec)
one_vec = [1]
one_vec = np.array(one_vec)
res = scipy.optimize.linprog(c_vec, A_ub = A_matrix, b_ub = zero_vec, A_eq = v_matrix, b_eq = one_vec )
weights = res.x
a_1 = self.data[row,-1]
a_2 = self.data[row,-2]
u_1 = weights[-1]
u_2 = weights[-2]
score = a_1*u_1 + a_2*u_2
return score
#create an instance of the DEA class using the data set we have above
dea_instance = DEA(data_array)
# Loop through the data to get a score for each row.
for i in range(data.shape[0]):
score = dea_instance.compute_score(i)
dea_scores.append(score)
if i%10 == 0:
print(score)
dea_scores = np.array(dea_scores)
dea_scores = dea_scores.transpose()
np.savetxt('dea_scores.csv',dea_scores)