Skip to content

Commit

Permalink
Optimize PeekingLexer to provide constant-time Peek
Browse files Browse the repository at this point in the history
The attached Benchmark is over 3x faster after the optimization
  • Loading branch information
Peter Dolak committed Nov 10, 2022
1 parent 9fb16d2 commit 98197fe
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 39 deletions.
78 changes: 39 additions & 39 deletions lexer/peek.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package lexer
// PeekingLexer supports arbitrary lookahead as well as cloning.
type PeekingLexer struct {
Checkpoint
eof Token
tokens []Token
elide map[TokenType]bool
}
Expand All @@ -15,8 +14,9 @@ type RawCursor int
//
// Copying and restoring just this state is a bit faster than copying the entire PeekingLexer.
type Checkpoint struct {
rawCursor RawCursor
cursor int
rawCursor RawCursor // The raw position of the next possibly elided token
nextCursor RawCursor // The raw position of the next non-elided token
cursor int // Index of the next non-elided token among other non-elided tokens
}

// Upgrade a Lexer to a PeekingLexer with arbitrary lookahead.
Expand All @@ -34,12 +34,12 @@ func Upgrade(lex Lexer, elide ...TokenType) (*PeekingLexer, error) {
if err != nil {
return r, err
}
r.tokens = append(r.tokens, t)
if t.EOF() {
r.eof = t
break
}
r.tokens = append(r.tokens, t)
}
r.advanceToNonElided()
return r, nil
}

Expand All @@ -60,28 +60,37 @@ func (c Checkpoint) RawCursor() RawCursor {

// Next consumes and returns the next token.
func (p *PeekingLexer) Next() Token {
for int(p.rawCursor) < len(p.tokens) {
t := p.tokens[p.rawCursor]
p.rawCursor++
if p.elide[t.Type] {
continue
}
p.cursor++
t := p.tokens[p.nextCursor]
if t.EOF() {
return t
}
return p.eof
p.nextCursor++
p.rawCursor = p.nextCursor
p.cursor++
p.advanceToNonElided()
return t
}

// Peek ahead at the next token.
// Peek ahead at the next non-elided token.
func (p *PeekingLexer) Peek() Token {
for i := int(p.rawCursor); i < len(p.tokens); i++ {
t := p.tokens[i]
if p.elide[t.Type] {
continue
return p.tokens[p.nextCursor]
}

// RawPeek peeks ahead at the next raw token.
//
// Unlike Peek, this will include elided tokens.
func (p *PeekingLexer) RawPeek() Token {
return p.tokens[p.rawCursor]
}

// advanceToNonElided advances nextCursor to the closest non-elided token
func (p *PeekingLexer) advanceToNonElided() {
for ; ; p.nextCursor++ {
t := p.tokens[p.nextCursor]
if t.EOF() || !p.elide[t.Type] {
return
}
return t
}
return p.eof
}

// PeekAny peeks forward over elided and non-elided tokens.
Expand All @@ -92,34 +101,25 @@ func (p *PeekingLexer) Peek() Token {
// The returned RawCursor position is the location of the returned token.
// Use FastForward to move the internal cursors forward.
func (p *PeekingLexer) PeekAny(match func(Token) bool) (t Token, rawCursor RawCursor) {
tokenCount := RawCursor(len(p.tokens))
for i := p.rawCursor; i < tokenCount; i++ {
for i := p.rawCursor; ; i++ {
t = p.tokens[i]
if match(t) || !p.elide[t.Type] {
if t.EOF() || match(t) || !p.elide[t.Type] {
return t, i
}
}
return p.eof, tokenCount
}

// FastForward the internal cursors to this RawCursor position.
func (p *PeekingLexer) FastForward(rawCursor RawCursor) {
tokenCount := RawCursor(len(p.tokens))
for ; p.rawCursor <= rawCursor && p.rawCursor < tokenCount; p.rawCursor++ {
for ; p.rawCursor <= rawCursor; p.rawCursor++ {
t := p.tokens[p.rawCursor]
if p.elide[t.Type] {
continue
if t.EOF() {
break
}
if !p.elide[t.Type] {
p.cursor++
}
p.cursor++
}
}

// RawPeek peeks ahead at the next raw token.
//
// Unlike Peek, this will include elided tokens.
func (p *PeekingLexer) RawPeek() Token {
if int(p.rawCursor) < len(p.tokens) {
return p.tokens[p.rawCursor]
}
return p.eof
p.nextCursor = p.rawCursor
p.advanceToNonElided()
}
16 changes: 16 additions & 0 deletions lexer/peek_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,19 @@ func TestPeekingLexer_Peek_Next_Checkpoint(t *testing.T) {
plex.Checkpoint = checkpoint
require.Equal(t, expected[0], plex.Peek(), "should have reverted to pre-Next state")
}

func BenchmarkPeekingLexer_Peek(b *testing.B) {
tokens := []lexer.Token{{Type: 1, Value: "x"}, {Type: 3, Value: " "}, {Type: 2, Value: "y"}}
l, err := lexer.Upgrade(&staticLexer{tokens: tokens}, 3)
require.NoError(b, err)
l.Next()
t := l.Peek()
b.ResetTimer()
for i := 0; i < b.N; i++ {
t = l.Peek()
if t.EOF() {
return
}
}
require.Equal(b, lexer.Token{Type: 2, Value: "y"}, t)
}

0 comments on commit 98197fe

Please sign in to comment.