-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstatistics.py
46 lines (37 loc) · 1.21 KB
/
statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
def getVariance(dataset):
"""
Computes the variance (see ``numpy.var``).
"""
return np.var(dataset)
def findOutliers(dataset):
"""
Returns all outliers in a given dataset, which are not within a symmetric range of 3 * standard deviation around the mean value.
"""
mean = np.mean(dataset)
std = np.std(dataset)
out = std*3
lowerLimit = mean-out
upperLimit = mean+out
outliers=[]
for datapoint in dataset:
if datapoint > upperLimit or datapoint < lowerLimit:
outliers.append(datapoint)
return outliers
def getCorrelation(x,y):
"""
Computes the pearson correlation coefficient (see ``scipy.stats.pearsonr``).
"""
return stats.pearsonr(x,y)
def mean_confidence_interval(data, confidence=0.95):
"""
Computes the mean and the confidence interval (see ``scipy.stats.t``).
"""
# Modified version of https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
a = 1.0 * np.array(data)
n = len(a)
m, se = np.mean(a), stats.sem(a)
h = se * stats.t.ppf((1 + confidence) / 2., n-1)
return m, m-h, m+h