-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbit.go
360 lines (328 loc) · 8.79 KB
/
bit.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
// Package bit implements an encoding scheme in which one byte is represented by eight binary digits of 0 or 1.
//
// It also provides a Dump() function to dump a sequence of bytes (similar to encoding/hex in the standard package).
package bit
import (
"encoding/hex"
"errors"
"fmt"
"io"
"strings"
)
const (
bitTable = "01"
b1 = 0b00000001
b2 = 0b00000010
b3 = 0b00000100
b4 = 0b00001000
b5 = 0b00010000
b6 = 0b00100000
b7 = 0b01000000
)
// ErrLength reports an attempt to decode not a multiple of 8 length input
// using Decode or DecodeString.
// The stream-based Decoder returns io.ErrUnexpectedEOF instead of ErrLength.
var ErrLength = errors.New("bit: bit string length not a multiple of 8")
// InvalidByteError values describe errors resulting from an invalid byte in a bit string.
type InvalidByteError byte
func (e InvalidByteError) Error() string {
return fmt.Sprintf("bit: invalid byte: %#U", rune(e))
}
// EncodedLen returns the length of an encoding of n source bytes.
// Specifically, it returns n * 8.
func EncodedLen(n int) int { return n * 8 }
// Encode encodes src into EncodedLen(len(src))
// bytes of dst. As a convenience, it returns the number
// of bytes written to dst, but this value is always EncodedLen(len(src)).
// Encode implements bit encoding.
func Encode(dst, src []byte) int {
j := 0
for _, v := range src {
dst[j] = bitTable[v>>7]
dst[j+1] = bitTable[(v&b7)>>6]
dst[j+2] = bitTable[(v&b6)>>5]
dst[j+3] = bitTable[(v&b5)>>4]
dst[j+4] = bitTable[(v&b4)>>3]
dst[j+5] = bitTable[(v&b3)>>2]
dst[j+6] = bitTable[(v&b2)>>1]
dst[j+7] = bitTable[(v & b1)]
j += 8
}
return len(src) * 8
}
// EncodeToString returns the bit encoding of src.
func EncodeToString(src []byte) string {
dst := make([]byte, EncodedLen(len(src)))
Encode(dst, src)
return string(dst)
}
// bufferSize is the number of bit characters to buffer in encoder and decoder.
const bufferSize = 1024
type encoder struct {
w io.Writer
err error
out [bufferSize]byte // output buffer
}
// NewEncoder returns an io.Writer that writes bit characters to w.
func NewEncoder(w io.Writer) io.Writer {
return &encoder{w: w}
}
func (e *encoder) Write(p []byte) (n int, err error) {
for len(p) > 0 && e.err == nil {
chunkSize := bufferSize / 8
if len(p) < chunkSize {
chunkSize = len(p)
}
var written int
encoded := Encode(e.out[:], p[:chunkSize])
written, e.err = e.w.Write(e.out[:encoded])
n += written / 8
p = p[chunkSize:]
}
return n, e.err
}
// DecodedLen returns the length of a decoding of x source bytes.
// Specifically, it returns x / 8.
func DecodedLen(x int) int { return x / 8 }
// Decode decodes src into DecodedLen(len(src)) bytes,
// returning the actual number of bytes written to dst.
//
// Decode expects that src contains only '0' or '1'
// characters and that src has multiple of 8 length.
// If the input is malformed, Decode returns the number
// of bytes decoded before the error.
func Decode(dst, src []byte) (int, error) {
i, j := 0, 7
for ; j < len(src); j += 8 {
a, ok := fromBitChar(src[j-7])
if !ok {
return i, InvalidByteError(src[j-7])
}
b, ok := fromBitChar(src[j-6])
if !ok {
return i, InvalidByteError(src[j-6])
}
c, ok := fromBitChar(src[j-5])
if !ok {
return i, InvalidByteError(src[j-5])
}
d, ok := fromBitChar(src[j-4])
if !ok {
return i, InvalidByteError(src[j-4])
}
e, ok := fromBitChar(src[j-3])
if !ok {
return i, InvalidByteError(src[j-3])
}
f, ok := fromBitChar(src[j-2])
if !ok {
return i, InvalidByteError(src[j-2])
}
g, ok := fromBitChar(src[j-1])
if !ok {
return i, InvalidByteError(src[j-1])
}
h, ok := fromBitChar(src[j])
if !ok {
return i, InvalidByteError(src[j])
}
dst[i] = (a << 7) | (b << 6) | (c << 5) | (d << 4) | (e << 3) | (f << 2) | (g << 1) | h
i++
}
if len(src)%8 != 0 {
// Check for invalid char before reporting bad length,
// since the invalid char (if present) is an earlier problem.
for k := j - 7; k < (j-7)+len(src)%8; k++ {
_, ok := fromBitChar(src[k])
if !ok {
return i, InvalidByteError(src[k])
}
}
return i, ErrLength
}
return i, nil
}
// fromBitChar converts a bit character into its value and a success flag.
func fromBitChar(c byte) (byte, bool) {
switch c {
case '0':
return 0, true
case '1':
return 1, true
}
return 0, false
}
// DecodeString returns the bytes represented by the bit string s.
//
// DecodeString expects that src contains only bit
// characters and that src has multiple of 8 length.
// If the input is malformed, DecodeString returns
// the bytes decoded before the error.
func DecodeString(s string) ([]byte, error) {
src := []byte(s)
// We can use the source slice itself as the destination
// because the decode loop increments by one and then the 'seen' byte is not used anymore.
n, err := Decode(src, src)
return src[:n], err
}
type decoder struct {
r io.Reader
err error
in []byte // input buffer (encoded form)
arr [bufferSize]byte // backing array for in
}
// NewDecoder returns an io.Reader that decodes bit characters from r.
// NewDecoder expects that r contain only an multiple of 8 length of bit characters.
func NewDecoder(r io.Reader) io.Reader {
return &decoder{r: r}
}
func (d *decoder) Read(p []byte) (n int, err error) {
// Fill internal buffer with sufficient bytes to decode
if len(d.in) < 8 && d.err == nil {
var numCopy, numRead int
numCopy = copy(d.arr[:], d.in) // Copies 0 ~ 7 bytes
numRead, d.err = d.r.Read(d.arr[numCopy:])
d.in = d.arr[:numCopy+numRead]
if d.err == io.EOF && len(d.in)%8 != 0 {
if _, ok := fromBitChar(d.in[len(d.in)-1]); !ok {
d.err = InvalidByteError(d.in[len(d.in)-1])
} else {
d.err = io.ErrUnexpectedEOF
}
}
}
// Decode internal buffer into output buffer
if numAvail := len(d.in) / 8; len(p) > numAvail {
p = p[:numAvail]
}
numDec, err := Decode(p, d.in[:len(p)*8])
d.in = d.in[8*numDec:]
if err != nil {
d.in, d.err = nil, err // Decode error; discard input remainder
}
if len(d.in) < 8 {
return numDec, d.err // Only expose errors when buffer fully consumed
}
return numDec, nil
}
// Dump returns a string that contains a bit dump of the given data. The format
// of the bit dump matches the output of `xxd -b` on the command line.
func Dump(data []byte) string {
if len(data) == 0 {
return ""
}
var buf strings.Builder
// Dumper will write 72 bytes per complete 6 byte chunk, and at least
// 67 bytes for whatever remains. Round the allocation up, since only a
// maximum of 5 bytes will be wasted.
buf.Grow((1 + ((len(data) - 1) / 6)) * 72)
dumper := Dumper(&buf)
dumper.Write(data)
dumper.Close()
return buf.String()
}
// Dumper returns a WriteCloser that writes a bit dump of all written data to
// w. The format of the dump matches the output of `xxd -b` on the command
// line.
func Dumper(w io.Writer) io.WriteCloser {
return &dumper{w: w}
}
type dumper struct {
w io.Writer
rightChars [7]byte
buf [14]byte
used int // number of bytes in the current line
n uint // number of bytes, total
closed bool
}
func toChar(b byte) byte {
if b < 32 || b > 126 {
return '.'
}
return b
}
func (h *dumper) Write(data []byte) (n int, err error) {
if h.closed {
return 0, errors.New("bit: dumper closed")
}
// Output lines look like:
// 00000000: 00110000 00110001 00110010 00110011 00110100 00110101 012345
// ^ offset ^ ASCII of line.
for i := range data {
if h.used == 0 {
// At the beginning of a line we print the current
// offset in bit.
h.buf[0] = byte(h.n >> 24)
h.buf[1] = byte(h.n >> 16)
h.buf[2] = byte(h.n >> 8)
h.buf[3] = byte(h.n)
hex.Encode(h.buf[4:], h.buf[:4])
h.buf[12] = ':'
h.buf[13] = ' '
_, err = h.w.Write(h.buf[4:])
if err != nil {
return
}
}
Encode(h.buf[:], data[i:i+1])
h.buf[8] = ' '
l := 9
if h.used == 5 {
// There's an additional space after the 8th byte.
h.buf[9] = ' '
l = 10
}
_, err = h.w.Write(h.buf[:l])
if err != nil {
return
}
n++
h.rightChars[h.used] = toChar(data[i])
h.used++
h.n++
if h.used == 6 {
h.rightChars[6] = '\n'
_, err = h.w.Write(h.rightChars[:])
if err != nil {
return
}
h.used = 0
}
}
return
}
func (h *dumper) Close() (err error) {
// See the comments in Write() for the details of this format.
if h.closed {
return
}
h.closed = true
if h.used == 0 {
return
}
h.buf[0] = ' '
h.buf[1] = ' '
h.buf[2] = ' '
h.buf[3] = ' '
h.buf[4] = ' '
h.buf[5] = ' '
h.buf[6] = ' '
h.buf[7] = ' '
h.buf[8] = ' '
h.buf[9] = ' '
nBytes := h.used
for h.used < 6 {
l := 9
if h.used == 5 {
l = 10
}
_, err = h.w.Write(h.buf[:l])
if err != nil {
return
}
h.used++
}
h.rightChars[nBytes] = '\n'
_, err = h.w.Write(h.rightChars[:nBytes+1])
return
}