-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgraphing.py
166 lines (127 loc) · 5.21 KB
/
graphing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""Input to all functions should be the data from csv"""
import logging
import warnings
import pandas as pd
from pandas import DataFrame
import ggplot
warnings.filterwarnings('ignore')
def graph(data):
""" Takes only most recent input data and then displays graphs """
# get all data because we need to see trend over time
data = convert_to_ints(data)
# display generated graphs
print(graph1(data))
print(graph2(data))
print(graph3(data))
logging.info("Calls and prints graphical data")
def convert_to_ints(data):
""" The numerical answers in the input comes in as strings.
Convert these to integers so they can be graphed. """
output = list()
for row in data:
cur = list()
for item in row:
try:
cur.append(int(item))
except ValueError:
cur.append(item)
output.append(cur)
return output
def find_time_stamp(data):
""" Finds the timestamp from the responses """
columns = data[0]
for i in range(0, len(columns)):
if columns[i] == "Timestamp":
return i
def graph1(score_data):
""" Average score as time goes on;
Creates and returns graph 1, a line graph. """
date_column = score_data[0][find_time_stamp(score_data)]
data = DataFrame(score_data[1:], columns=score_data[0])
# Get all columns that arlabels = date_format("%Y-%m-%d")e numerical
# questions so we know what to graph
num_questions = data.select_dtypes(include=['int64']).columns.values
# Melt data so that each question is in a seperate row
new_data = pd.melt(
data,
id_vars=date_column,
value_vars=num_questions,
var_name="Question",
value_name="Score")
# Convert date string into an actual date type
new_data[date_column] = pd.to_datetime(
new_data[date_column], format="%m/%d/%Y")
# Group all rows with same date and question, and then take the average.
new_data = new_data.groupby([date_column, 'Question']).mean().reset_index()
new_data['All'] = "Indiviual Questions"
new_data2 = new_data.groupby(date_column).mean().reset_index()
new_data2['Question'] = "All Questions"
new_data2['All'] = "Average of All Questions"
new_data = pd.concat([new_data, new_data2])
new_data[date_column] = new_data[date_column].astype('int64')
# Create time graph with seperate lines for each question
ret = ggplot.ggplot(ggplot.aes(x=date_column, y="Score", colour="Question"), new_data) +\
ggplot.geom_point() +\
ggplot.geom_line() +\
ggplot.facet_grid("All") +\
ggplot.scale_x_continuous(labels=[""], breaks=0) +\
ggplot.labs(x="Time", y="Average Question Score") +\
ggplot.ggtitle("Question Scores Over Time")
return ret
def graph2(score_data):
""" Average scores for each question on most recent date;
Creates and returns graph 2, a bar graph. """
date_column = score_data[0][find_time_stamp(score_data)]
columns_data = score_data[0]
for i in range(0, len(columns_data)):
columns_data[i] = columns_data[i].split('.')[0]
data = DataFrame(score_data[1:], columns=columns_data)
# Get all columns that are numerical questions so we know what to graph
num_questions = data.select_dtypes(include=['int64']).columns.values
# Melt data so that each question is in a seperate row
new_data = pd.melt(
data,
id_vars=date_column,
value_vars=num_questions,
var_name="Question",
value_name="Score")
# Convert date string into actual data type
new_data[date_column] = pd.to_datetime(
new_data[date_column], format="%m/%d/%Y")
# Latest Dates
recent_date = new_data[date_column].max()
# Removing all dates that are recent
new_data = new_data[new_data.Timestamp == recent_date]
# Group all rows with question, and then take the average.
new_data = new_data.groupby(['Question']).mean().reset_index()
# Create bar graph with data from past week
ret = ggplot.ggplot(ggplot.aes(x="Question", weight="Score"), new_data) +\
ggplot.geom_bar() +\
ggplot.ggtitle("Most Recent Average Scores")
return ret
def graph3(score_data):
""" Box plot for scores;
Creates and returns graph 3, a box plot. """
date_column = score_data[0][find_time_stamp(score_data)]
data = DataFrame(score_data[1:], columns=score_data[0])
# Get all columns that are numerical questions
num_questions = data.select_dtypes(include=['int64']).columns.values
# Melt data so that each question is in a seperate row
new_data = pd.melt(
data,
id_vars=[
date_column,
"Name"],
value_vars=num_questions,
var_name="Question",
value_name="Score")
# Get rid of unecessary column
new_data = new_data.drop('Name', axis=1)
# Convert date string into an actual date type
new_data[date_column] = pd.to_datetime(
new_data[date_column], format="%m/%d/%Y")
# Create box plot graph
box_plot = ggplot.ggplot(ggplot.aes(x=date_column, y='Score'), new_data) +\
ggplot.geom_boxplot() +\
ggplot.ggtitle("Distribution of Question Scores over Time")
return box_plot