-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbinarysearch.go
56 lines (47 loc) · 1.26 KB
/
binarysearch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package datarithms
import (
"bufio"
"io"
"os"
"time"
)
// BinarySearchFileByDate returns the offset of the first line which date is after lastUpdated
// this could panic if the parse function isn't guaranteed to work on every line of the file
func BinarySearchFileByDate(file string, lastUpdated time.Time, parse func(line string) (time.Time, error)) (offset int64, err error) {
// Open the log file
f, err := os.Open(file)
if err != nil {
return 0, err
}
// Load all of the line offsets into memory
lines := make([]int64, 0)
// Precalculate the seek offset of each line
scanner := bufio.NewScanner(f)
for scanner.Scan() {
lines = append(lines, offset)
offset += int64(len(scanner.Bytes())) + 1
}
// Run a binary search for the first line which is past the lastUpdated
start := 0
end := len(lines) - 1
for start < end {
mid := (start + end) / 2
// Get the text of the line
f.Seek(lines[mid], io.SeekStart)
scanner := bufio.NewScanner(f)
scanner.Scan()
line := scanner.Text()
tm, err := parse(line)
if err != nil {
// if for some reason we can't parse the line we increment start and try again
start = mid + 1
continue
}
if tm.After(lastUpdated) {
end = mid
} else {
start = mid + 1
}
}
return lines[start], nil
}