-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstats.go
136 lines (115 loc) · 2.78 KB
/
stats.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package main
import (
"sort"
"github.com/montanaflynn/stats"
)
type statsSummary interface {
summary(a []string)
getSummaryData() [][]string
getSummaryStr(a []string) string
}
type DiscreteStats struct {
data []string
summaryData [][]string
//count int
counter map[string]int
}
type ContinuousStats struct {
data []float64
summaryData [][]string
count int
min float64
max float64
mean float64
median float64
sd float64
q1 float64
q2 float64
q3 float64
}
func (s *ContinuousStats) summary(a []string) {
s.count = len(a)
data := stats.LoadRawData(a)
s.data = data
s.min, _ = stats.Min(data)
s.max, _ = stats.Max(data)
s.mean, _ = stats.Mean(data)
s.median, _ = stats.Median(data)
s.sd, _ = stats.StandardDeviation(data)
q, _ := stats.Quartile(data)
s.q1, s.q2, s.q3 = q.Q1, q.Q2, q.Q3
summaryArray := [][]string{{"#Count", I2S(s.count)}, {"#Min", F2S(s.min)},
{"#Max", F2S(s.max)}, {"#Mean", F2S(s.median)}, {"#Median", F2S(s.median)}, {"#SD", F2S(s.sd)},
{"#Q1", F2S(s.q1)}, {"#Q2", F2S(s.q2)}, {"#Q3", F2S(s.q3)}}
s.summaryData = summaryArray
}
func (s *ContinuousStats) getSummaryData() [][]string {
return s.summaryData
}
func (s *ContinuousStats) getSummaryStr(a []string) string {
result := ""
s.summary(a)
summaryArray := s.getSummaryData()
for _, i := range summaryArray {
var n, v string
n, v = i[0], i[1]
result = result + "#" + n + " : " + v + "\n"
}
return result
}
func (s *DiscreteStats) summary(a []string) {
s.data = a
//s.count = len(a)
//summaryArray := [][]string{{"#Count", I2S(s.count)}}
//s.summaryData = summaryArray
//catalogue counter
s.counter = make(map[string]int)
for _, row := range a {
s.counter[row]++
}
type kv struct {
Key string
Value int
}
//sortByStr map by value
var ss []kv
for k, v := range s.counter {
ss = append(ss, kv{k, v})
}
sort.Slice(ss, func(i, j int) bool {
return ss[i].Value > ss[j].Value
})
for _, kv := range ss {
s.summaryData = append(s.summaryData, []string{kv.Key, I2S(kv.Value)})
}
}
func (s *DiscreteStats) getSummaryData() [][]string {
return s.summaryData
}
func (s *DiscreteStats) getSummaryStr(a []string) string {
s.summary(a)
summaryArray := s.getSummaryData()
result := ""
for _, i := range summaryArray {
var n, v string
n, v = i[0], i[1]
result = result + "#" + n + " : " + v + "\n"
}
result = result + "----------\n" + "Top 20 variable\n\n"
type kv struct {
Key string
Value int
}
//sortByStr map by value
var ss []kv
for k, v := range s.counter {
ss = append(ss, kv{k, v})
}
sort.Slice(ss, func(i, j int) bool {
return ss[i].Value > ss[j].Value
})
for _, kv := range ss {
result = result + "#" + kv.Key + " : " + I2S(kv.Value) + "\n"
}
return result
}