-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocessInputs.py
129 lines (98 loc) · 6.33 KB
/
processInputs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Dependencies
import pandas as pd
from pull import SQL_Pull
import plotly
def scores (dictionaryOfUserInput):
w_budget = dictionaryOfUserInput["budget"]
w_sales = dictionaryOfUserInput["salesWeight"]
w_crime = dictionaryOfUserInput["crimeWeight"]
w_schools = dictionaryOfUserInput["schoolWeight"]
w_acreage = dictionaryOfUserInput["acreageWeight"]
w_SQ_FT = dictionaryOfUserInput["sqftWeight"]
w_flood = dictionaryOfUserInput["floodWeight"]
w_change = dictionaryOfUserInput["changeValueWeight"]
# call SQL_Pull function to query the database and create a dataframe
df = SQL_Pull(w_budget)
# Normalize data for each parameter
max=df['sales_neighborhood_2019'].max()
min=df['sales_neighborhood_2019'].min()
df["Sales Index"]=(df['sales_neighborhood_2019']-min)/(max-min)*100
max=df['Offense_Count'].max()
min=df['Offense_Count'].min()
df["Crime Index"]=(df['Offense_Count']-min)/(max-min)*100
max=df['school_rating'].max()
min=df['school_rating'].min()
df["School Rating Index"]=(df['school_rating']-min)/(max-min)*100
max=df['acreage'].max()
min=df['acreage'].min()
df["Acreage Index"]=(df['acreage']-min)/(max-min)*100
max=df['sq_ft'].max()
min=df['sq_ft'].min()
df["SQ_FT Index"]=(df['sq_ft']-min)/(max-min)*100
max=df['flood_risk'].max()
min=df['flood_risk'].min()
df["Flood Risk Index"]=(df['flood_risk']-min)/(max-min)*100
max=df['pct_value_change'].max()
df['Valuation Index']=df['pct_value_change']/max*100
# Calculate scores for each address.
total_weights=w_sales+w_crime+w_schools+w_acreage+w_SQ_FT+w_flood+w_change
# Add calculated scores to the dataframe
df["Sales Index_W"]=w_sales*df['Sales Index']
df['Crime Index_W']= w_crime*df['Crime Index']
df["School Rating Index_W"]=w_schools*df['School Rating Index']
df["Acreage Index_W"]= w_acreage*df['Acreage Index']
df["SQ_FT_Index_W"]= w_SQ_FT*df['SQ_FT Index']
df["Flood Risk Index_W"]=w_flood*df['Flood Risk Index']
df['Valuation Index_W']= w_change*df['Valuation Index']
# Calculate total score per row
df["Score"]=round((w_sales*df['Sales Index']-
w_crime*df['Crime Index']+
w_schools*df['School Rating Index']+
w_acreage*df['Acreage Index']+
w_SQ_FT*df['SQ_FT Index']-
w_flood*df['Flood Risk Index']+
w_change*df['Valuation Index'])/total_weights,2)
# look at only the parameters of interest
parameter_and_score = df[['latitude','longitude', 'pct_value_change',
'acreage','sq_ft','Offense_Count','Sales Index','Crime Index','School Rating Index',
'Acreage Index','SQ_FT Index', 'Flood Risk Index', 'Valuation Index','Score',
'total_appraised_value_2019','neighborhood']]
# group parameters by neighborhood name
neighborhood_group = parameter_and_score.groupby(['neighborhood']).mean()
# add count of residences per neighborhood
residence_count = parameter_and_score.groupby(["neighborhood"]).count()
renamed_count = residence_count.rename(columns = {"latitude":"Counts"})
single_column_counts = renamed_count["Counts"]
# merge the counts to the neighborhood group
neighborhood_group_with_counts = neighborhood_group.merge(single_column_counts,how='inner',on='neighborhood')
# To get to the top list, neighborhoods need positive valuation index and non-zero sales index
neighborhood_group_with_counts=neighborhood_group_with_counts.loc[(neighborhood_group_with_counts['Valuation Index']>0)&(neighborhood_group_with_counts['Sales Index']>0)&(neighborhood_group_with_counts['Score']>0),:]
min=neighborhood_group_with_counts['Valuation Index'].min()
max=neighborhood_group_with_counts['Valuation Index'].max()
neighborhood_group_with_counts['Valuation Index']=(neighborhood_group_with_counts['Valuation Index']-min)/(max-min)*100
min=neighborhood_group_with_counts['Score'].min()
max=neighborhood_group_with_counts['Score'].max()
neighborhood_group_with_counts['Score']=(neighborhood_group_with_counts['Score']-min)/(max-min)*100
min=neighborhood_group_with_counts['Sales Index'].min()
max=neighborhood_group_with_counts['Sales Index'].max()
neighborhood_group_with_counts['Sales Index']=(neighborhood_group_with_counts['Sales Index']-min)/(max-min)*100
min=neighborhood_group_with_counts['Crime Index'].min()
max=neighborhood_group_with_counts['Crime Index'].max()
neighborhood_group_with_counts['Crime Index']=(neighborhood_group_with_counts['Crime Index']-min)/(max-min)*100
min=neighborhood_group_with_counts['School Rating Index'].min()
max=neighborhood_group_with_counts['School Rating Index'].max()
neighborhood_group_with_counts['School Rating Index']=(neighborhood_group_with_counts['School Rating Index']-min)/(max-min)*100
min=neighborhood_group_with_counts['Acreage Index'].min()
max=neighborhood_group_with_counts['Acreage Index'].max()
neighborhood_group_with_counts['Acreage Index']=(neighborhood_group_with_counts['Acreage Index']-min)/(max-min)*100
min=neighborhood_group_with_counts['SQ_FT Index'].min()
max=neighborhood_group_with_counts['SQ_FT Index'].max()
neighborhood_group_with_counts['SQ_FT Index']=(neighborhood_group_with_counts['SQ_FT Index']-min)/(max-min)*100
min=neighborhood_group_with_counts['Flood Risk Index'].min()
max=neighborhood_group_with_counts['Flood Risk Index'].max()
neighborhood_group_with_counts['Flood Risk Index']=(neighborhood_group_with_counts['Flood Risk Index']-min)/(max-min)*100
neighborhood_group_with_counts=neighborhood_group_with_counts.rename(columns={"total_appraised_value_2019":"Mean Value 2019"})
# sort scores
ranked_neighborhoods = neighborhood_group_with_counts.sort_values('Score',ascending=False)
top5neighborhoods= ranked_neighborhoods.head()
return top5neighborhoods