From 323775c2cf70a45eb4f5a413b2d6c9769b93a001 Mon Sep 17 00:00:00 2001 From: Quint Daenen Date: Fri, 11 Oct 2024 18:25:05 +0200 Subject: [PATCH] Add BNF support. --- abnf/abnf_test.go | 5 +-- abnf/gen/generator.go | 5 +-- abnf/gen/generator_test.go | 1 + bnf.go | 3 ++ bnf/definition.abnf | 20 ++++++++++++ bnf/definition.go | 39 ++++++++++++++++++++++++ {abnf/ir => ir}/ir.go | 62 +++++++++++++++++--------------------- {abnf/ir => ir}/ir_test.go | 50 ++++++++++++++++++++++++++++-- parser/op/and_test.go | 5 ++- parser/reader_test.go | 5 ++- 10 files changed, 149 insertions(+), 46 deletions(-) create mode 100644 bnf.go create mode 100644 bnf/definition.abnf create mode 100644 bnf/definition.go rename {abnf/ir => ir}/ir.go (92%) rename {abnf/ir => ir}/ir_test.go (77%) diff --git a/abnf/abnf_test.go b/abnf/abnf_test.go index be64eb6..98cea4f 100644 --- a/abnf/abnf_test.go +++ b/abnf/abnf_test.go @@ -3,11 +3,12 @@ package abnf_test import ( _ "embed" "fmt" + "testing" + "github.com/0x51-dev/upeg/abnf" - "github.com/0x51-dev/upeg/abnf/ir" + "github.com/0x51-dev/upeg/ir" "github.com/0x51-dev/upeg/parser" "github.com/0x51-dev/upeg/parser/op" - "testing" ) var ( diff --git a/abnf/gen/generator.go b/abnf/gen/generator.go index 0ced9e8..a5c3b5e 100644 --- a/abnf/gen/generator.go +++ b/abnf/gen/generator.go @@ -4,8 +4,6 @@ import ( "bytes" "embed" "fmt" - "github.com/0x51-dev/upeg/abnf" - "github.com/0x51-dev/upeg/abnf/ir" "io" "io/fs" "log" @@ -13,6 +11,9 @@ import ( "strings" "text/template" "unicode" + + "github.com/0x51-dev/upeg/abnf" + "github.com/0x51-dev/upeg/ir" ) const ( diff --git a/abnf/gen/generator_test.go b/abnf/gen/generator_test.go index d41560a..9fc57ec 100644 --- a/abnf/gen/generator_test.go +++ b/abnf/gen/generator_test.go @@ -2,6 +2,7 @@ package gen_test import ( "fmt" + "github.com/0x51-dev/upeg/abnf/gen" ) diff --git a/bnf.go b/bnf.go new file mode 100644 index 0000000..394bec5 --- /dev/null +++ b/bnf.go @@ -0,0 +1,3 @@ +package upeg + +//go:generate go run github.com/0x51-dev/upeg/cmd/abnf --in=bnf/definition.abnf --out=bnf/definition.go --ignore=defined-as,elements,c-wsp,c-nl,element,group,rulename-br,literal-double,literal-single --importCore --package=bnf diff --git a/bnf/definition.abnf b/bnf/definition.abnf new file mode 100644 index 0000000..9de79d3 --- /dev/null +++ b/bnf/definition.abnf @@ -0,0 +1,20 @@ +; BNF +rulelist = 1*( rule / (*WSP c-nl) ) +rule = rulename-br defined-as elements c-nl +rulename-br = "<" rulename ">" +rulename = ALPHA *(ALPHA / DIGIT / "-") +defined-as = *c-wsp ("::=") *c-wsp +elements = alternation *WSP +c-wsp = WSP / (c-nl WSP) +c-nl = comment / CRLF +comment = ";" *(WSP / VCHAR) CRLF +alternation = concatenation *(*c-wsp "|" *c-wsp concatenation) +concatenation = repetition *(1*c-wsp repetition) +repetition = element [repeat] +repeat = "*" / "+" +element = rulename-br / group / option / char-val +group = "(" *c-wsp alternation *c-wsp ")" +option = "[" *c-wsp alternation *c-wsp "]" +char-val = literal-double / literal-single +literal-double = %x22 *(%x20-21 / %x23-7E) %x22 +literal-single = %x27 *(%x20-26 / %x28-7E) %x27 diff --git a/bnf/definition.go b/bnf/definition.go new file mode 100644 index 0000000..4a50bd2 --- /dev/null +++ b/bnf/definition.go @@ -0,0 +1,39 @@ +// Package bnf is autogenerated by https://github.com/0x51-dev/upeg. DO NOT EDIT. +package bnf + +import ( + . "github.com/0x51-dev/upeg/abnf/core" + "github.com/0x51-dev/upeg/parser" + "github.com/0x51-dev/upeg/parser/op" +) + +var ( + Rulelist = op.Capture{Name: "Rulelist", Value: op.OneOrMore{Value: op.Or{Rule, op.And{op.ZeroOrMore{Value: WSP}, CNl}}}} + Rule = op.Capture{Name: "Rule", Value: op.And{RulenameBr, DefinedAs, Elements, CNl}} + RulenameBr = op.And{'<', Rulename, '>'} + Rulename = op.Capture{Name: "Rulename", Value: op.And{ALPHA, op.ZeroOrMore{Value: op.Or{ALPHA, DIGIT, '-'}}}} + DefinedAs = op.And{op.ZeroOrMore{Value: CWsp}, "::=", op.ZeroOrMore{Value: CWsp}} + Elements = op.And{Alternation, op.ZeroOrMore{Value: WSP}} + CWsp = op.Or{WSP, op.And{CNl, WSP}} + CNl = op.Or{Comment, CRLF} + Comment = op.Capture{Name: "Comment", Value: op.And{';', op.ZeroOrMore{Value: op.Or{WSP, VCHAR}}, CRLF}} + Alternation = op.Capture{Name: "Alternation", Value: op.And{Concatenation, op.ZeroOrMore{Value: op.And{op.ZeroOrMore{Value: CWsp}, '|', op.ZeroOrMore{Value: CWsp}, Concatenation}}}} + Concatenation = op.Capture{Name: "Concatenation", Value: op.And{Repetition, op.ZeroOrMore{Value: op.And{op.OneOrMore{Value: CWsp}, Repetition}}}} + Repetition = op.Capture{Name: "Repetition", Value: op.And{Element, op.Optional{Value: Repeat}}} + Repeat = op.Capture{Name: "Repeat", Value: op.Or{'*', '+'}} + Element = op.Or{RulenameBr, Group, Option, CharVal} + Group = op.And{'(', op.ZeroOrMore{Value: CWsp}, op.Reference{Name: "Alternation"}, op.ZeroOrMore{Value: CWsp}, ')'} + Option = op.Capture{Name: "Option", Value: op.And{'[', op.ZeroOrMore{Value: CWsp}, op.Reference{Name: "Alternation"}, op.ZeroOrMore{Value: CWsp}, ']'}} + CharVal = op.Capture{Name: "CharVal", Value: op.Or{LiteralDouble, LiteralSingle}} + LiteralDouble = op.And{rune(0x22), op.ZeroOrMore{Value: op.Or{op.RuneRange{Min: 0x20, Max: 0x21}, op.RuneRange{Min: 0x23, Max: 0x7E}}}, rune(0x22)} + LiteralSingle = op.And{rune(0x27), op.ZeroOrMore{Value: op.Or{op.RuneRange{Min: 0x20, Max: 0x26}, op.RuneRange{Min: 0x28, Max: 0x7E}}}, rune(0x27)} +) + +func NewParser(input []rune) (*parser.Parser, error) { + p, err := parser.New(input) + if err != nil { + return nil, err + } + p.Rules["Alternation"] = Alternation + return p, nil +} diff --git a/abnf/ir/ir.go b/ir/ir.go similarity index 92% rename from abnf/ir/ir.go rename to ir/ir.go index 0428c80..eb7d70c 100644 --- a/abnf/ir/ir.go +++ b/ir/ir.go @@ -2,8 +2,9 @@ package ir import ( "fmt" - "github.com/0x51-dev/upeg/parser" "strings" + + "github.com/0x51-dev/upeg/parser" ) func ParseRulename(n *parser.Node) (string, error) { @@ -244,6 +245,16 @@ func ParseRepeat(n *parser.Node) (*Repeat, error) { return nil, err } v := n.Value() + switch v { + case "+": + one := "1" + return &Repeat{ + Min: &one, + }, nil + case "*": + return &Repeat{}, nil + } + if !strings.ContainsRune(v, '*') { return &Repeat{ Min: &v, @@ -298,65 +309,48 @@ func ParseRepetition(n *parser.Node) (*Repetition, error) { if err := checkParent(n, "Repetition"); err != nil { return nil, err } + var v Element var r *Repeat - for i, n := range n.Children() { + for _, n := range n.Children() { switch n.Name { case "Repeat": - if i != 0 { - return nil, NewInvalidNodeError("Repeat", n.Name) - } repeat, err := ParseRepeat(n) if err != nil { return nil, err } r = repeat case "Rulename": - v := Rulename(n.Value()) - return &Repetition{ - Repeat: r, - Value: &v, - }, nil + rn := Rulename(n.Value()) + v = &rn case "Alternation": a, err := ParseAlternation(n) if err != nil { return nil, err } - return &Repetition{ - Repeat: r, - Value: a, - }, nil + v = a case "Option": o, err := ParseOption(n) if err != nil { return nil, err } - return &Repetition{ - Repeat: r, - Value: o, - }, nil + v = o case "CharVal": - v := CharVal(n.Value()) - return &Repetition{ - Repeat: r, - Value: &v, - }, nil + cv := CharVal(n.Value()) + v = &cv case "NumVal": - v := NumVal(n.Children()[0].Value()) - return &Repetition{ - Repeat: r, - Value: &v, - }, nil + nv := NumVal(n.Children()[0].Value()) + v = &nv case "ProseVal": - v := ProseVal(n.Value()) - return &Repetition{ - Repeat: r, - Value: &v, - }, nil + pv := ProseVal(n.Value()) + v = &pv default: return nil, NewInvalidNodeError("Element / Repeat", n.Name) } } - return nil, NewInvalidNodeError("Element / Repeat", "") + return &Repetition{ + Repeat: r, + Value: v, + }, nil } func (r *Repetition) String() string { diff --git a/abnf/ir/ir_test.go b/ir/ir_test.go similarity index 77% rename from abnf/ir/ir_test.go rename to ir/ir_test.go index dffbfb8..bd18e91 100644 --- a/abnf/ir/ir_test.go +++ b/ir/ir_test.go @@ -1,12 +1,58 @@ package ir_test import ( + "testing" + "github.com/0x51-dev/upeg/abnf" - "github.com/0x51-dev/upeg/abnf/ir" + "github.com/0x51-dev/upeg/bnf" + "github.com/0x51-dev/upeg/ir" "github.com/0x51-dev/upeg/parser" - "testing" ) +func TestBNF_Rulelist(t *testing.T) { + for _, test := range []struct { + raw string + expected string + }{ + { + raw: " ::= ", + expected: "X = Y", + }, + { + raw: " ::= ", + expected: "X = ( Y Z )", + }, + { + raw: " ::= | ", + expected: "X = ( Y / Z )", + }, + { + raw: " ::= +", + expected: "X = 1*Y", + }, + { + raw: " ::= *", + expected: "X = *Y", + }, + } { + p, err := parser.New([]rune(test.raw + "\n")) + if err != nil { + t.Fatal(err) + } + n, err := p.ParseEOF(bnf.Rulelist) + if err != nil { + t.Fatal(err) + } + l, err := ir.ParseRulelist(n) + if err != nil { + t.Fatal(err) + } + if l.String() != test.expected { + t.Errorf("expected %s, got %s", test.expected, l.String()) + } + } +} + func TestParseAlternation(t *testing.T) { for _, test := range []struct { raw string diff --git a/parser/op/and_test.go b/parser/op/and_test.go index d4f34e7..a04a91c 100644 --- a/parser/op/and_test.go +++ b/parser/op/and_test.go @@ -2,10 +2,10 @@ package op_test import ( "errors" - "fmt" + "testing" + "github.com/0x51-dev/upeg/parser" "github.com/0x51-dev/upeg/parser/op" - "testing" ) var AndTestCases = []AndTestCase{ @@ -84,7 +84,6 @@ func TestAnd_error(t *testing.T) { errors.As(err, &stack) var match *parser.NoMatchError errors.As(stack.Errors[1], &match) - fmt.Println(stack) if match.End.Character() != 'b' { t.Fatalf("expected cursor to be at 'b', got %c", match.End.Character()) } diff --git a/parser/reader_test.go b/parser/reader_test.go index f7f4a54..8dc0fc1 100644 --- a/parser/reader_test.go +++ b/parser/reader_test.go @@ -1,9 +1,9 @@ package parser_test import ( - "fmt" - . "github.com/0x51-dev/upeg/parser" "testing" + + . "github.com/0x51-dev/upeg/parser" ) func TestReader(t *testing.T) { @@ -104,7 +104,6 @@ func TestReader_Cursor(t *testing.T) { lastNl: 4, }, } { - fmt.Println(test.input) r, err := NewReader([]rune(test.input)) if err != nil { t.Fatal(err)