-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpct.go
139 lines (124 loc) · 2.7 KB
/
pct.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"sort"
)
func usage() {
const help = `usage: ... | pct [-f] [-n] [-c]
pct calculates the distribution of lines in text.
It is similar to sort | uniq -c | sort -n -r, except
that it prints percentages as well as counts.
`
fmt.Fprintln(os.Stderr, help)
flag.PrintDefaults()
}
type recorder interface {
Record([]byte)
Top(int) []stringCount
All() []stringCount
}
type mcount map[string]uint64
func (m mcount) Record(b []byte) {
m[string(b)]++
}
func (m mcount) Top(n int) []stringCount {
var l []stringCount
for k, v := range m {
l = append(l, stringCount{n: v, s: k})
}
sort.Sort(stringsByCount(l))
if n > len(l) {
return l
}
return l[:n]
}
func (m mcount) All() []stringCount {
var l []stringCount
for k, v := range m {
l = append(l, stringCount{n: v, s: k})
}
sort.Sort(stringsByCount(l))
return l
}
func dump(w io.Writer, tot int, r recorder) error {
f := 100 / float64(tot)
runtot := uint64(0)
var top []stringCount
if *limit == 0 {
top = r.All()
} else {
top = r.Top(*limit)
}
for _, line := range top {
runtot += line.n
p := f * float64(line.n)
var err error
switch {
case *cum:
_, err = fmt.Fprintf(w, "% 6.2f%% % 6.2f%%% 6d %s\n", p, f*float64(runtot), line.n, line.s)
case *quiet:
_, err = fmt.Fprintln(w, line.s)
default:
_, err = fmt.Fprintf(w, "% 6.2f%%% 6d %s\n", p, line.n, line.s)
}
if err != nil {
return err
}
}
return nil
}
func pct(r io.Reader, w io.Writer, rec recorder) error {
s := bufio.NewScanner(r)
n := 0
for s.Scan() {
rec.Record(s.Bytes())
n++
if *every > 0 && n%*every == 0 {
if err := dump(w, n, rec); err != nil {
return err
} else {
fmt.Fprintln(w)
}
}
}
if err := s.Err(); err != nil {
dump(w, n, rec)
fmt.Fprintf(w, "Stopped at line %d: %v\n", n, err)
return err
}
return dump(w, n, rec)
}
var (
every = flag.Int("f", 0, "print running percents every f lines, requires -n")
limit = flag.Int("n", 0, "only print top n lines")
cum = flag.Bool("c", false, "print cumulative percents as well")
approx = flag.Bool("x", false, "use a fast approximate counter, only suitable for large input, requires -n")
quiet = flag.Bool("q", false, "print only sorted lines; do not print percents or counts")
)
func main() {
flag.Usage = usage
flag.Parse()
if *approx && *limit <= 0 {
fmt.Fprintln(os.Stderr, "-x requires -n > 0")
os.Exit(2)
}
if *every != 0 && *limit <= 0 {
fmt.Fprintln(os.Stderr, "-f requires -n > 0")
os.Exit(2)
}
if *cum && *quiet {
fmt.Fprintln(os.Stderr, "-c and -q conflict")
os.Exit(2)
}
var r recorder
if *approx {
r = newTopK(*limit, 8, 16384)
} else {
r = make(mcount)
}
pct(os.Stdin, os.Stdout, r)
}