From 8788c27159d0de88a57b876dc20e0498faff83d2 Mon Sep 17 00:00:00 2001 From: Yegor Myskin Date: Mon, 3 Jun 2019 18:06:36 +0300 Subject: [PATCH] Buffered line begin seeker --- README.md | 2 +- line_test.go | 4 +++ seeker_test.go | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ tailor.go | 57 ++++++++++++++++++++++++++++-------------- 4 files changed, 110 insertions(+), 20 deletions(-) create mode 100644 seeker_test.go diff --git a/README.md b/README.md index 901ed43..687dd85 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ go get github.com/un000/tailor ``` ## TODO -- [ ] Better Test Code Coverage +- [x] Better Test Code Coverage - [ ] Benchmarks - [ ] Rate limiter + Leaky Bucket diff --git a/line_test.go b/line_test.go index 1392783..0c4c406 100644 --- a/line_test.go +++ b/line_test.go @@ -1,3 +1,7 @@ +// Copyright 2019 Yegor Myskin. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package tailor import ( diff --git a/seeker_test.go b/seeker_test.go new file mode 100644 index 0000000..aeb31b4 --- /dev/null +++ b/seeker_test.go @@ -0,0 +1,67 @@ +package tailor + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "strings" + "testing" +) + +func TestNewLineFinder(t *testing.T) { + var tests = []struct { + content string + offsetFromStart int64 + res string + }{ + {"", 0, ""}, + {"\n", 0, "\n"}, + {"\n\n", 1, "\n"}, + {"\n\na\n", 3, "a\n"}, + {"a", 0, "a"}, + {"a\n", 0, "a\n"}, + {"abc", 2, "abc"}, + {"abc\n", 2, "abc\n"}, + {"a\nb", 2, "b"}, + {"a\nb\n", 2, "b\n"}, + {"aaaaa\nbbbbbbbb\n", 4, "aaaaa\n"}, + {"aaaaa\nbbbbbbbb\n", 10, "bbbbbbbb\n"}, + {strings.Repeat("a", 300), 280, strings.Repeat("a", 300)}, + {strings.Repeat("a", 300) + "\n", 280, strings.Repeat("a", 300) + "\n"}, + {strings.Repeat("a", 100) + "\n" + strings.Repeat("a", 200), 280, strings.Repeat("a", 200)}, + } + + const file = "./tst" + defer os.Remove(file) + + for i, data := range tests { + t.Run(fmt.Sprint(i), func(t *testing.T) { + err := ioutil.WriteFile(file, []byte(data.content), os.ModePerm) + if err != nil { + t.Error(err) + return + } + + f := New(file) + err = f.openFile(data.offsetFromStart, io.SeekStart) + if err != nil { + t.Errorf("[%d] error executing: %s, data: %+v", i, err, data) + return + } + + r := bufio.NewReader(f.file) + line, err := r.ReadString('\n') + if err != nil && err != io.EOF { + t.Errorf("[%d] error reading line: %s, data: %+v", i, err, data) + return + } + + if line != data.res { + t.Errorf("[%d] actual: '%s', want: '%s', data: %+v", i, line, data.res, data) + return + } + }) + } +} diff --git a/tailor.go b/tailor.go index 9ebd79e..ee1db5c 100644 --- a/tailor.go +++ b/tailor.go @@ -269,42 +269,61 @@ func (t *Tailor) openFile(offset int64, whence int) (err error) { return nil } -// seekToLineStart seeks the cursor at the beginning of a line at offset. -// If the byte at offset equals \n, so next line will be selected. +// seekToLineStart seeks the cursor at the beginning of a line at offset. Internally this function uses a buffer +// to find the beginning of a line. If the byte at offset equals \n, so the next line will be selected. func (t *Tailor) seekToLineStart(offset int64, whence int) error { - bts := make([]byte, 1) + const ( + bufSize int64 = 256 + ) - offset, err := t.file.Seek(offset, whence) - if err == io.EOF { + initialOffset, err := t.file.Seek(offset, whence) + if initialOffset == 0 { return nil } + if err == io.EOF { + err = nil + } if err != nil { return err } - for offset > 0 { - _, err = t.file.Read(bts) + min := func(a, b int64) int64 { + if a < b { + return a + } + return b + } + + var current int64 = 0 +Loop: + for { + current += min(bufSize, initialOffset-current) + buf := make([]byte, min(current, bufSize)) + + n, err := t.file.ReadAt(buf, initialOffset-current) if err != nil && err != io.EOF { return err } + buf = buf[:n] - b := bts[0] - if b == '\n' { - return nil + current -= int64(n) + for i := int64(len(buf)) - 1; i >= 0; i-- { + if buf[i] == '\n' { + break Loop + } + current++ } - - newOffset := int64(-2) - if offset-2 < 0 { - newOffset = -1 + if initialOffset-current == 0 { + break } + } - offset, err = t.file.Seek(newOffset, io.SeekCurrent) - if err != nil { - return err - } + _, err = t.file.Seek(-current, io.SeekCurrent) + if err == io.EOF { + err = nil } - return nil + return err } // updateFileStatus update a current seek from the file an an actual file size.