-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis.py
63 lines (50 loc) · 2.11 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import random
import numpy as np
import pandas as pd
import elo
import matplotlib.pyplot as plt
def ParseFeedbackFile(filename,df):
"""
Parses the file containing the selections from the community
feedback forms and then determines a Elo ranking of each
variable. We apply the ranking on 100 shuffles of the question
orders and report the mean Elo rating as well as the envelope of
encountered values. The rating information is added to the input
dataframe.
"""
nv = 3 # number of variables
nm = 20 # number of models
ns = 100 # number of shuffles
lines = open(filename).readlines()
Elo = []
for s in range(ns): # loop over shuffles
random.shuffle(lines)
R = [elo.Rating() for f in range(nv*nm)]
for i,line in enumerate(lines): # loop over lines in file
# sometimes ppl click the button without having selected any entries
if "NaN" in line: continue
# some records may lack a time at the end, append a -1
if len(line.strip().split()) % 3 == 2: line = line.strip() + " -1"
# parse the row of data
s = np.asarray(line.strip().split(),dtype=int)[:-3].reshape((-1,3))
for j in range(s.shape[0]): # loop over completed questions
if s[j,2] == s[j,0]:
R[s[j,0]],R[s[j,1]] = elo.rate_1vs1(R[s[j,0]],R[s[j,1]])
else:
R[s[j,1]],R[s[j,0]] = elo.rate_1vs1(R[s[j,1]],R[s[j,0]])
# add to list of ratings, shuffle and repeat
Elo.append([float(r) for r in R])
# rating is the mean, also report out the envelope
Elo = np.asarray(Elo)
df['Elo rating mean'] = Elo.mean(axis=0)
df['Elo rating min'] = Elo.min (axis=0)
df['Elo rating max'] = Elo.max (axis=0)
return df
if __name__ == "__main__":
df = pd.read_csv('method_comparison_data.csv')
df = ParseFeedbackFile('feedback_form_data.txt',df)
for v,dfv in df.groupby('variable'):
ax = dfv.plot(x='Elo rating mean',y='bias score',kind='scatter')
ax.set_title(v)
plt.show()