From 0c023e3cb59ed993f898cc72ad1782d635f7784d Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Wed, 19 May 2021 09:04:37 -0600 Subject: [PATCH 01/25] fix deep nesting --- fields.go | 29 ++++--- internal/dremel/write_repeated.go | 7 +- parquet_generated_test.go | 122 ++++++++++++++++++++++++++++++ parquet_test.go | 16 +++- 4 files changed, 157 insertions(+), 17 deletions(-) diff --git a/fields.go b/fields.go index 779f018..a78b037 100644 --- a/fields.go +++ b/fields.go @@ -189,21 +189,23 @@ func (f *OptionalField) valsFromDefs(defs []uint8, max uint8) int { func (f *OptionalField) DoWrite(w io.Writer, meta *Metadata, vals []byte, count int, stats Stats) error { buf := bytes.Buffer{} wc := &writeCounter{w: &buf} - err := writeLevels(wc, f.Defs, int32(bits.Len(uint(f.MaxLevels.Def)))) - if err != nil { - return err - } - defLen := wc.n + var repLen int64 if f.repeated { err := writeLevels(wc, f.Reps, int32(bits.Len(uint(f.MaxLevels.Rep)))) if err != nil { return err } + repLen = wc.n + } + + err := writeLevels(wc, f.Defs, int32(bits.Len(uint(f.MaxLevels.Def)))) + if err != nil { + return err } - repLen := wc.n - defLen + defLen := wc.n - repLen wc.Write(vals) l, cl, vals, err := compress(f.compression, buf.Bytes()) @@ -238,21 +240,24 @@ func (f *OptionalField) DoRead(r io.ReadSeeker, pg Page) (io.Reader, []int, erro return nil, nil, err } - defs, l, err := readLevels(bytes.NewBuffer(data), int32(bits.Len(uint(f.MaxLevels.Def)))) - if err != nil { - return nil, nil, err - } + var l int - f.Defs = append(f.Defs, defs[:int(ph.DataPageHeader.NumValues)]...) if f.repeated { reps, l2, err := readLevels(bytes.NewBuffer(data[l:]), int32(bits.Len(uint(f.MaxLevels.Rep)))) if err != nil { return nil, nil, err } - l += l2 f.Reps = append(f.Reps, reps[:int(ph.DataPageHeader.NumValues)]...) + l += l2 } + defs, l2, err := readLevels(bytes.NewBuffer(data[l:]), int32(bits.Len(uint(f.MaxLevels.Def)))) + if err != nil { + return nil, nil, err + } + f.Defs = append(f.Defs, defs[:int(ph.DataPageHeader.NumValues)]...) + l += l2 + n := f.valsFromDefs(defs, uint8(f.MaxLevels.Def)) sizes = append(sizes, n) out = append(out, data[l:]...) diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go index 6ee8a1f..e8d1a1d 100644 --- a/internal/dremel/write_repeated.go +++ b/internal/dremel/write_repeated.go @@ -176,7 +176,12 @@ func writeCases(f fields.Field, seen fields.RepetitionTypes) []int { start = 1 + len(seen) } - for def := start; def <= f.MaxDef(); def++ { + maxDef := f.MaxDef() + if start > maxDef { + start = maxDef //hack! figure out why start is > maxDef + } + + for def := start; def <= maxDef; def++ { dfs = append(dfs, def) } return dfs diff --git a/parquet_generated_test.go b/parquet_generated_test.go index 4519e5f..b34fd84 100644 --- a/parquet_generated_test.go +++ b/parquet_generated_test.go @@ -60,6 +60,8 @@ func Fields(compression compression) []Field { NewBoolField(readHungry, writeHungry, []string{"hungry"}, fieldCompression(compression)), NewStringOptionalField(readHobbyName, writeHobbyName, []string{"hobby", "name"}, []int{1, 0}, optionalFieldCompression(compression)), NewInt32OptionalField(readHobbyDifficulty, writeHobbyDifficulty, []string{"hobby", "difficulty"}, []int{1, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, optionalFieldCompression(compression)), NewInt32OptionalField(readFriendsID, writeFriendsID, []string{"friends", "id"}, []int{2, 0}, optionalFieldCompression(compression)), NewInt32OptionalField(readFriendsAge, writeFriendsAge, []string{"friends", "age"}, []int{2, 1}, optionalFieldCompression(compression)), NewBoolField(readSleepy, writeSleepy, []string{"Sleepy"}, fieldCompression(compression)), @@ -292,6 +294,126 @@ func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int return 0, 1 } +func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Name) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + if x.Hobby == nil { + x.Hobby = &Hobby{} + } + case 2: + switch rep { + case 0: + if x.Hobby == nil { + x.Hobby = &Hobby{Skills: []Skill{{Name: vals[nVals]}}} + } else { + x.Hobby.Skills = []Skill{{Name: vals[nVals]}} + } + case 1: + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Difficulty) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0: + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] + case 1: + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Difficulty: vals[nVals]}) + } + nVals++ + } + } + + return nVals, nLevels +} + func readFriendsID(x Person) ([]int32, []uint8, []uint8) { var vals []int32 var defs, reps []uint8 diff --git a/parquet_test.go b/parquet_test.go index ebc531a..d895566 100644 --- a/parquet_test.go +++ b/parquet_test.go @@ -42,7 +42,9 @@ func TestParquet(t *testing.T) { { name: "single nested person", input: [][]Person{ - {{Hobby: &Hobby{Name: "napping", Difficulty: pint32(10)}}}, + { + {Hobby: &Hobby{Name: "napping", Difficulty: pint32(10), Skills: []Skill{{Name: "meditation", Difficulty: "very"}}}}, + }, }, }, { @@ -514,7 +516,7 @@ func TestPageHeaders(t *testing.T) { return } - assert.Equal(t, 72, len(pageHeaders)) + assert.Equal(t, 80, len(pageHeaders)) } func TestStats(t *testing.T) { @@ -973,9 +975,15 @@ type Being struct { Age *int32 `parquet:"age"` } -type Hobby struct { +type Skill struct { Name string `parquet:"name"` - Difficulty *int32 `parquet:"difficulty"` + Difficulty string `parquet:"difficulty"` +} + +type Hobby struct { + Name string `parquet:"name"` + Difficulty *int32 `parquet:"difficulty"` + Skills []Skill `parquet:"skills"` } type Person struct { From feda1333dd0def239f9a5a5190d4051375ab7563 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Thu, 27 May 2021 06:51:00 -0600 Subject: [PATCH 02/25] wip --- internal/dremel/dremel_test.go | 102 +- internal/dremel/testcases/doc/doc.go | 24 + .../doc/generated.go} | 12 +- internal/dremel/testcases/person/generated.go | 1018 +++++++++++++++++ internal/dremel/testcases/person/person.go | 19 + parquet_test.go | 16 +- 6 files changed, 1154 insertions(+), 37 deletions(-) create mode 100644 internal/dremel/testcases/doc/doc.go rename internal/dremel/{dremel_generated_test.go => testcases/doc/generated.go} (98%) create mode 100644 internal/dremel/testcases/person/generated.go create mode 100644 internal/dremel/testcases/person/person.go diff --git a/internal/dremel/dremel_test.go b/internal/dremel/dremel_test.go index b5a1785..6ea6160 100644 --- a/internal/dremel/dremel_test.go +++ b/internal/dremel/dremel_test.go @@ -5,19 +5,19 @@ import ( "log" "testing" + "github.com/parsyl/parquet/internal/dremel/testcases/doc" + "github.com/parsyl/parquet/internal/dremel/testcases/person" "github.com/stretchr/testify/assert" ) -//go:generate parquetgen -input dremel_test.go -type Document -package dremel_test -output dremel_generated_test.go - var ( - dremelDocs = []Document{ + dremelDocs = []doc.Document{ { DocID: 10, - Link: &Link{Forward: []int64{20, 40, 60}}, - Names: []Name{ + Link: &doc.Link{Forward: []int64{20, 40, 60}}, + Names: []doc.Name{ { - Languages: []Language{ + Languages: []doc.Language{ {Code: "en-us", Country: pstring("us")}, {Code: "en"}, }, @@ -27,7 +27,7 @@ var ( URL: pstring("http://B"), }, { - Languages: []Language{ + Languages: []doc.Language{ {Code: "en-gb", Country: pstring("gb")}, }, }, @@ -35,8 +35,8 @@ var ( }, { DocID: 20, - Link: &Link{Backward: []int64{10, 30}, Forward: []int64{80}}, - Names: []Name{{URL: pstring("http://C")}}, + Link: &doc.Link{Backward: []int64{10, 30}, Forward: []int64{80}}, + Names: []doc.Name{{URL: pstring("http://C")}}, }, } ) @@ -45,7 +45,7 @@ var ( // results in the correct definition and repetition levels. func TestLevels(t *testing.T) { var buf bytes.Buffer - pw, err := NewParquetWriter(&buf) + pw, err := doc.NewParquetWriter(&buf) if err != nil { assert.NoError(t, err) } @@ -60,12 +60,12 @@ func TestLevels(t *testing.T) { pw.Close() - pr, err := NewParquetReader(bytes.NewReader(buf.Bytes())) + pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) if err != nil { assert.NoError(t, err) } - expected := []Levels{ + expected := []doc.Levels{ {Name: "docid"}, {Name: "link.backward", Defs: []uint8{1, 2, 2}, Reps: []uint8{0, 0, 1}}, {Name: "link.forward", Defs: []uint8{2, 2, 2, 2}, Reps: []uint8{0, 1, 1, 0}}, @@ -77,11 +77,60 @@ func TestLevels(t *testing.T) { assert.Equal(t, expected, pr.Levels()) } +var ( + people = []person.Person{ + { + Name: "peep", + Hobby: &person.Hobby{ + Name: "napping", + Difficulty: pint32(10), + Skills: []person.Skill{ + {Name: "meditation", Difficulty: "very"}, + {Name: "calmness", Difficulty: "so-so"}, + }, + }, + }, + } +) + +func TestPersonLevels(t *testing.T) { + var buf bytes.Buffer + pw, err := person.NewParquetWriter(&buf) + if err != nil { + assert.NoError(t, err) + } + + for _, p := range people { + pw.Add(p) + } + + if err := pw.Write(); err != nil { + assert.NoError(t, err) + } + + pw.Close() + + pr, err := person.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + assert.NoError(t, err) + } + + expected := []person.Levels{ + {Name: "name"}, + {Name: "hobby.name", Defs: []uint8{1}}, + {Name: "hobby.difficulty", Defs: []uint8{2}}, + {Name: "hobby.skills.name", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, + {Name: "hobby.skills.difficulty", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, + } + + assert.Equal(t, expected, pr.Levels()) +} + // TestDremel uses the example from the dremel paper and writes then // reads from a parquet file to make sure nested fields work correctly. func TestDremel(t *testing.T) { var buf bytes.Buffer - pw, err := NewParquetWriter(&buf) + pw, err := doc.NewParquetWriter(&buf) if err != nil { log.Fatal(err) } @@ -96,14 +145,14 @@ func TestDremel(t *testing.T) { pw.Close() - pr, err := NewParquetReader(bytes.NewReader(buf.Bytes())) + pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) if err != nil { log.Fatal(err) } - var out []Document + var out []doc.Document for pr.Next() { - var d Document + var d doc.Document pr.Scan(&d) out = append(out, d) } @@ -111,23 +160,10 @@ func TestDremel(t *testing.T) { assert.Equal(t, dremelDocs, out) } -type Link struct { - Backward []int64 `parquet:"backward"` - Forward []int64 `parquet:"forward"` -} - -type Language struct { - Code string `parquet:"code"` - Country *string `parquet:"country"` -} - -type Name struct { - Languages []Language `parquet:"languages"` - URL *string `parquet:"url"` +func pstring(s string) *string { + return &s } -type Document struct { - DocID int64 `parquet:"docid"` - Link *Link `parquet:"link"` - Names []Name `parquet:"names"` +func pint32(i int32) *int32 { + return &i } diff --git a/internal/dremel/testcases/doc/doc.go b/internal/dremel/testcases/doc/doc.go new file mode 100644 index 0000000..0807af6 --- /dev/null +++ b/internal/dremel/testcases/doc/doc.go @@ -0,0 +1,24 @@ +package doc + +//go:generate parquetgen -input doc.go -type Document -package doc -output generated.go + +type Link struct { + Backward []int64 `parquet:"backward"` + Forward []int64 `parquet:"forward"` +} + +type Language struct { + Code string `parquet:"code"` + Country *string `parquet:"country"` +} + +type Name struct { + Languages []Language `parquet:"languages"` + URL *string `parquet:"url"` +} + +type Document struct { + DocID int64 `parquet:"docid"` + Link *Link `parquet:"link"` + Names []Name `parquet:"names"` +} diff --git a/internal/dremel/dremel_generated_test.go b/internal/dremel/testcases/doc/generated.go similarity index 98% rename from internal/dremel/dremel_generated_test.go rename to internal/dremel/testcases/doc/generated.go index 9868cc5..ada8f0a 100644 --- a/internal/dremel/dremel_generated_test.go +++ b/internal/dremel/testcases/doc/generated.go @@ -1,4 +1,4 @@ -package dremel_test +package doc // Code generated by github.com/parsyl/parquet. DO NOT EDIT. @@ -21,6 +21,7 @@ type compression int const ( compressionUncompressed compression = 0 compressionSnappy compression = 1 + compressionGzip compression = 2 compressionUnknown compression = -1 ) @@ -362,6 +363,8 @@ func fieldCompression(c compression) func(*parquet.RequiredField) { return parquet.RequiredFieldUncompressed case compressionSnappy: return parquet.RequiredFieldSnappy + case compressionGzip: + return parquet.RequiredFieldGzip default: return parquet.RequiredFieldUncompressed } @@ -373,6 +376,8 @@ func optionalFieldCompression(c compression) func(*parquet.OptionalField) { return parquet.OptionalFieldUncompressed case compressionSnappy: return parquet.OptionalFieldSnappy + case compressionGzip: + return parquet.OptionalFieldGzip default: return parquet.OptionalFieldUncompressed } @@ -438,6 +443,11 @@ func Snappy(p *ParquetWriter) error { return nil } +func Gzip(p *ParquetWriter) error { + p.compression = compressionGzip + return nil +} + func withCompression(c compression) func(*ParquetWriter) error { return func(p *ParquetWriter) error { p.compression = c diff --git a/internal/dremel/testcases/person/generated.go b/internal/dremel/testcases/person/generated.go new file mode 100644 index 0000000..f328dc3 --- /dev/null +++ b/internal/dremel/testcases/person/generated.go @@ -0,0 +1,1018 @@ +package person + +// Code generated by github.com/parsyl/parquet. DO NOT EDIT. + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "strings" + + "github.com/parsyl/parquet" + sch "github.com/parsyl/parquet/schema" + + "math" + "sort" +) + +type compression int + +const ( + compressionUncompressed compression = 0 + compressionSnappy compression = 1 + compressionGzip compression = 2 + compressionUnknown compression = -1 +) + +// ParquetWriter reprents a row group +type ParquetWriter struct { + fields []Field + + len int + + // child points to the next page + child *ParquetWriter + + // max is the number of Record items that can get written before + // a new set of column chunks is written + max int + + meta *parquet.Metadata + w io.Writer + compression compression +} + +func Fields(compression compression) []Field { + return []Field{ + NewStringField(readName, writeName, []string{"name"}, fieldCompression(compression)), + NewStringOptionalField(readHobbyName, writeHobbyName, []string{"hobby", "name"}, []int{1, 0}, optionalFieldCompression(compression)), + NewInt32OptionalField(readHobbyDifficulty, writeHobbyDifficulty, []string{"hobby", "difficulty"}, []int{1, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + } +} + +func readName(x Person) string { + return x.Name +} + +func writeName(x *Person, vals []string) { + x.Name = vals[0] +} + +func readHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Hobby == nil: + return nil, []uint8{0}, nil + default: + return []string{x.Hobby.Name}, []uint8{1}, nil + } +} + +func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Hobby = &Hobby{Name: vals[0]} + return 1, 1 + } + + return 0, 1 +} + +func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { + switch { + case x.Hobby == nil: + return nil, []uint8{0}, nil + case x.Hobby.Difficulty == nil: + return nil, []uint8{1}, nil + default: + return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil + } +} + +func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + if x.Hobby == nil { + x.Hobby = &Hobby{} + } + case 2: + if x.Hobby == nil { + x.Hobby = &Hobby{Difficulty: pint32(vals[0])} + } else { + x.Hobby.Difficulty = pint32(vals[0]) + } + return 1, 1 + } + + return 0, 1 +} + +func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Name) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + if x.Hobby == nil { + x.Hobby = &Hobby{} + } + case 2: + switch rep { + case 0: + if x.Hobby == nil { + x.Hobby = &Hobby{Skills: []Skill{{Name: vals[nVals]}}} + } else { + x.Hobby.Skills = []Skill{{Name: vals[nVals]}} + } + case 1: + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Difficulty) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0: + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] + case 1: + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Difficulty: vals[nVals]}) + } + nVals++ + } + } + + return nVals, nLevels +} + +func fieldCompression(c compression) func(*parquet.RequiredField) { + switch c { + case compressionUncompressed: + return parquet.RequiredFieldUncompressed + case compressionSnappy: + return parquet.RequiredFieldSnappy + case compressionGzip: + return parquet.RequiredFieldGzip + default: + return parquet.RequiredFieldUncompressed + } +} + +func optionalFieldCompression(c compression) func(*parquet.OptionalField) { + switch c { + case compressionUncompressed: + return parquet.OptionalFieldUncompressed + case compressionSnappy: + return parquet.OptionalFieldSnappy + case compressionGzip: + return parquet.OptionalFieldGzip + default: + return parquet.OptionalFieldUncompressed + } +} + +func NewParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + return newParquetWriter(w, append(opts, begin)...) +} + +func newParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + p := &ParquetWriter{ + max: 1000, + w: w, + compression: compressionSnappy, + } + + for _, opt := range opts { + if err := opt(p); err != nil { + return nil, err + } + } + + p.fields = Fields(p.compression) + if p.meta == nil { + ff := Fields(p.compression) + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + schema[i] = f.Schema() + } + p.meta = parquet.New(schema...) + } + + return p, nil +} + +// MaxPageSize is the maximum number of rows in each row groups' page. +func MaxPageSize(m int) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.max = m + return nil + } +} + +func begin(p *ParquetWriter) error { + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func withMeta(m *parquet.Metadata) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.meta = m + return nil + } +} + +func Uncompressed(p *ParquetWriter) error { + p.compression = compressionUncompressed + return nil +} + +func Snappy(p *ParquetWriter) error { + p.compression = compressionSnappy + return nil +} + +func Gzip(p *ParquetWriter) error { + p.compression = compressionGzip + return nil +} + +func withCompression(c compression) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.compression = c + return nil + } +} + +func (p *ParquetWriter) Write() error { + for i, f := range p.fields { + if err := f.Write(p.w, p.meta); err != nil { + return err + } + + for child := p.child; child != nil; child = child.child { + if err := child.fields[i].Write(p.w, p.meta); err != nil { + return err + } + } + } + + p.fields = Fields(p.compression) + p.child = nil + p.len = 0 + + schema := make([]parquet.Field, len(p.fields)) + for i, f := range p.fields { + schema[i] = f.Schema() + } + p.meta.StartRowGroup(schema...) + return nil +} + +func (p *ParquetWriter) Close() error { + if err := p.meta.Footer(p.w); err != nil { + return err + } + + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func (p *ParquetWriter) Add(rec Person) { + if p.len == p.max { + if p.child == nil { + // an error can't happen here + p.child, _ = newParquetWriter(p.w, MaxPageSize(p.max), withMeta(p.meta), withCompression(p.compression)) + } + + p.child.Add(rec) + return + } + + p.meta.NextDoc() + for _, f := range p.fields { + f.Add(rec) + } + + p.len++ +} + +type Field interface { + Add(r Person) + Write(w io.Writer, meta *parquet.Metadata) error + Schema() parquet.Field + Scan(r *Person) + Read(r io.ReadSeeker, pg parquet.Page) error + Name() string + Levels() ([]uint8, []uint8) +} + +func getFields(ff []Field) map[string]Field { + m := make(map[string]Field, len(ff)) + for _, f := range ff { + m[f.Name()] = f + } + return m +} + +func NewParquetReader(r io.ReadSeeker, opts ...func(*ParquetReader)) (*ParquetReader, error) { + ff := Fields(compressionUnknown) + pr := &ParquetReader{ + r: r, + } + + for _, opt := range opts { + opt(pr) + } + + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + pr.fieldNames = append(pr.fieldNames, f.Name()) + schema[i] = f.Schema() + } + + meta := parquet.New(schema...) + if err := meta.ReadFooter(r); err != nil { + return nil, err + } + pr.rows = meta.Rows() + var err error + pr.pages, err = meta.Pages() + if err != nil { + return nil, err + } + + pr.rowGroups = meta.RowGroups() + _, err = r.Seek(4, io.SeekStart) + if err != nil { + return nil, err + } + pr.meta = meta + + return pr, pr.readRowGroup() +} + +func readerIndex(i int) func(*ParquetReader) { + return func(p *ParquetReader) { + p.index = i + } +} + +// ParquetReader reads one page from a row group. +type ParquetReader struct { + fields map[string]Field + fieldNames []string + index int + cursor int64 + rows int64 + rowGroupCursor int64 + rowGroupCount int64 + pages map[string][]parquet.Page + meta *parquet.Metadata + err error + + r io.ReadSeeker + rowGroups []parquet.RowGroup +} + +type Levels struct { + Name string + Defs []uint8 + Reps []uint8 +} + +func (p *ParquetReader) Levels() []Levels { + var out []Levels + //for { + for _, name := range p.fieldNames { + f := p.fields[name] + d, r := f.Levels() + out = append(out, Levels{Name: f.Name(), Defs: d, Reps: r}) + } + // if err := p.readRowGroup(); err != nil { + // break + // } + //} + return out +} + +func (p *ParquetReader) Error() error { + return p.err +} + +func (p *ParquetReader) readRowGroup() error { + p.rowGroupCursor = 0 + + if len(p.rowGroups) == 0 { + p.rowGroupCount = 0 + return nil + } + + rg := p.rowGroups[0] + p.fields = getFields(Fields(compressionUnknown)) + p.rowGroupCount = rg.Rows + p.rowGroupCursor = 0 + for _, col := range rg.Columns() { + name := strings.Join(col.MetaData.PathInSchema, ".") + f, ok := p.fields[name] + if !ok { + return fmt.Errorf("unknown field: %s", name) + } + pages := p.pages[name] + if len(pages) <= p.index { + break + } + + pg := pages[0] + if err := f.Read(p.r, pg); err != nil { + return fmt.Errorf("unable to read field %s, err: %s", f.Name(), err) + } + p.pages[name] = p.pages[name][1:] + } + p.rowGroups = p.rowGroups[1:] + return nil +} + +func (p *ParquetReader) Rows() int64 { + return p.rows +} + +func (p *ParquetReader) Next() bool { + if p.err == nil && p.cursor >= p.rows { + return false + } + if p.rowGroupCursor >= p.rowGroupCount { + p.err = p.readRowGroup() + if p.err != nil { + return false + } + } + + p.cursor++ + p.rowGroupCursor++ + return true +} + +func (p *ParquetReader) Scan(x *Person) { + if p.err != nil { + return + } + + for _, name := range p.fieldNames { + f := p.fields[name] + f.Scan(x) + } +} + +type StringField struct { + parquet.RequiredField + vals []string + read func(r Person) string + write func(r *Person, vals []string) + stats *stringStats +} + +func NewStringField(read func(r Person) string, write func(r *Person, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { + return &StringField{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newStringStats(), + } +} + +func (f *StringField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < pg.N; j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringField) Scan(r *Person) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *StringField) Add(r Person) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *StringField) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type StringOptionalField struct { + parquet.OptionalField + vals []string + read func(r Person) ([]string, []uint8, []uint8) + write func(r *Person, vals []string, def, rep []uint8) (int, int) + stats *stringOptionalStats +} + +func NewStringOptionalField(read func(r Person) ([]string, []uint8, []uint8), write func(r *Person, vals []string, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { + return &StringOptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newStringOptionalStats(maxDef(types)), + } +} + +func (f *StringOptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *StringOptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *StringOptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *StringOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *StringOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < f.Values(); j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringOptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type Int32OptionalField struct { + parquet.OptionalField + vals []int32 + read func(r Person) ([]int32, []uint8, []uint8) + write func(r *Person, vals []int32, def, rep []uint8) (int, int) + stats *int32optionalStats +} + +func NewInt32OptionalField(read func(r Person) ([]int32, []uint8, []uint8), write func(r *Person, vals []int32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int32OptionalField { + return &Int32OptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newint32optionalStats(maxDef(types)), + } +} + +func (f *Int32OptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int32Type, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *Int32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *Int32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]int32, f.Values()-len(f.vals)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Int32OptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *Int32OptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *Int32OptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type stringStats struct { + vals []string + min []byte + max []byte +} + +func newStringStats() *stringStats { + return &stringStats{} +} + +func (s *stringStats) add(val string) { + s.vals = append(s.vals, val) +} + +func (s *stringStats) NullCount() *int64 { + return nil +} + +func (s *stringStats) DistinctCount() *int64 { + return nil +} + +func (s *stringStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +type stringOptionalStats struct { + vals []string + min []byte + max []byte + nils int64 + maxDef uint8 +} + +func newStringOptionalStats(d uint8) *stringOptionalStats { + return &stringOptionalStats{maxDef: d} +} + +func (s *stringOptionalStats) add(vals []string, defs []uint8) { + var i int + for _, def := range defs { + if def < s.maxDef { + s.nils++ + } else { + s.vals = append(s.vals, vals[i]) + i++ + } + } +} + +func (s *stringOptionalStats) NullCount() *int64 { + return &s.nils +} + +func (s *stringOptionalStats) DistinctCount() *int64 { + return nil +} + +func (s *stringOptionalStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringOptionalStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringOptionalStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +type int32optionalStats struct { + min int32 + max int32 + nils int64 + nonNils int64 + maxDef uint8 +} + +func newint32optionalStats(d uint8) *int32optionalStats { + return &int32optionalStats{ + min: int32(math.MaxInt32), + maxDef: d, + } +} + +func (f *int32optionalStats) add(vals []int32, defs []uint8) { + var i int + for _, def := range defs { + if def < f.maxDef { + f.nils++ + } else { + val := vals[i] + i++ + + f.nonNils++ + if val < f.min { + f.min = val + } + if val > f.max { + f.max = val + } + } + } +} + +func (f *int32optionalStats) bytes(val int32) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *int32optionalStats) NullCount() *int64 { + return &f.nils +} + +func (f *int32optionalStats) DistinctCount() *int64 { + return nil +} + +func (f *int32optionalStats) Min() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.min) +} + +func (f *int32optionalStats) Max() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.max) +} + +func pint32(i int32) *int32 { return &i } +func puint32(i uint32) *uint32 { return &i } +func pint64(i int64) *int64 { return &i } +func puint64(i uint64) *uint64 { return &i } +func pbool(b bool) *bool { return &b } +func pstring(s string) *string { return &s } +func pfloat32(f float32) *float32 { return &f } +func pfloat64(f float64) *float64 { return &f } + +// keeps track of the indices of repeated fields +// that have already been handled by a previous field +type indices []int + +func (i indices) rep(rep uint8) { + if rep > 0 { + r := int(rep) - 1 + i[r] = i[r] + 1 + for j := int(rep); j < len(i); j++ { + i[j] = 0 + } + } +} + +func maxDef(types []int) uint8 { + var out uint8 + for _, typ := range types { + if typ > 0 { + out++ + } + } + return out +} + +func Int32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t +} + +func Uint32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t + ct := sch.ConvertedType_UINT_32 + se.ConvertedType = &ct +} + +func Int64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t +} + +func Uint64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t + ct := sch.ConvertedType_UINT_64 + se.ConvertedType = &ct +} + +func Float32Type(se *sch.SchemaElement) { + t := sch.Type_FLOAT + se.Type = &t +} + +func Float64Type(se *sch.SchemaElement) { + t := sch.Type_DOUBLE + se.Type = &t +} + +func BoolType(se *sch.SchemaElement) { + t := sch.Type_BOOLEAN + se.Type = &t +} + +func StringType(se *sch.SchemaElement) { + t := sch.Type_BYTE_ARRAY + se.Type = &t +} diff --git a/internal/dremel/testcases/person/person.go b/internal/dremel/testcases/person/person.go new file mode 100644 index 0000000..ce9cef0 --- /dev/null +++ b/internal/dremel/testcases/person/person.go @@ -0,0 +1,19 @@ +package person + +//go:generate parquetgen -input person.go -type Person -package person -output generated.go + +type Skill struct { + Name string `parquet:"name"` + Difficulty string `parquet:"difficulty"` +} + +type Hobby struct { + Name string `parquet:"name"` + Difficulty *int32 `parquet:"difficulty"` + Skills []Skill `parquet:"skills"` +} + +type Person struct { + Name string `parquet:"name"` + Hobby *Hobby `parquet:"hobby"` +} diff --git a/parquet_test.go b/parquet_test.go index d895566..8a26e2a 100644 --- a/parquet_test.go +++ b/parquet_test.go @@ -43,7 +43,16 @@ func TestParquet(t *testing.T) { name: "single nested person", input: [][]Person{ { - {Hobby: &Hobby{Name: "napping", Difficulty: pint32(10), Skills: []Skill{{Name: "meditation", Difficulty: "very"}}}}, + { + Hobby: &Hobby{ + Name: "napping", + Difficulty: pint32(10), + Skills: []Skill{ + {Name: "meditation", Difficulty: "very"}, + {Name: "calmness", Difficulty: "so-so"}, + }, + }, + }, }, }, }, @@ -971,8 +980,9 @@ func writeFloat64(f float64) []byte { } type Being struct { - ID int32 `parquet:"id"` - Age *int32 `parquet:"age"` + ID int32 `parquet:"id"` + Name string `parquet:"name"` + Age *int32 `parquet:"age"` } type Skill struct { From e89fd9ff5c94ca908ff1ba5211de9f522204b1b2 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Tue, 1 Jun 2021 08:51:12 -0600 Subject: [PATCH 03/25] git TestFields passing --- internal/dremel/dremel_test.go | 15 +- internal/dremel/testcases/doc/doc.go | 2 +- internal/dremel/testcases/doc/generated.go | 38 +- internal/dremel/write_repeated.go | 7 +- internal/dremel/write_test.go | 35 ++ internal/fields/fields.go | 427 ++++++++----------- internal/fields/fields_test.go | 463 +++++++++------------ internal/fields/repetition.go | 9 - internal/gen/funcs.go | 8 +- internal/parse/parse.go | 27 +- 10 files changed, 468 insertions(+), 563 deletions(-) diff --git a/internal/dremel/dremel_test.go b/internal/dremel/dremel_test.go index 6ea6160..7048c0e 100644 --- a/internal/dremel/dremel_test.go +++ b/internal/dremel/dremel_test.go @@ -14,7 +14,9 @@ var ( dremelDocs = []doc.Document{ { DocID: 10, - Link: &doc.Link{Forward: []int64{20, 40, 60}}, + Links: &doc.Link{ + Forward: []int64{20, 40, 60}, + }, Names: []doc.Name{ { Languages: []doc.Language{ @@ -35,8 +37,15 @@ var ( }, { DocID: 20, - Link: &doc.Link{Backward: []int64{10, 30}, Forward: []int64{80}}, - Names: []doc.Name{{URL: pstring("http://C")}}, + Links: &doc.Link{ + Backward: []int64{10, 30}, + Forward: []int64{80}, + }, + Names: []doc.Name{ + { + URL: pstring("http://C"), + }, + }, }, } ) diff --git a/internal/dremel/testcases/doc/doc.go b/internal/dremel/testcases/doc/doc.go index 0807af6..684b991 100644 --- a/internal/dremel/testcases/doc/doc.go +++ b/internal/dremel/testcases/doc/doc.go @@ -19,6 +19,6 @@ type Name struct { type Document struct { DocID int64 `parquet:"docid"` - Link *Link `parquet:"link"` + Links *Link `parquet:"link"` Names []Name `parquet:"names"` } diff --git a/internal/dremel/testcases/doc/generated.go b/internal/dremel/testcases/doc/generated.go index ada8f0a..cf07e11 100644 --- a/internal/dremel/testcases/doc/generated.go +++ b/internal/dremel/testcases/doc/generated.go @@ -46,8 +46,8 @@ type ParquetWriter struct { func Fields(compression compression) []Field { return []Field{ NewInt64Field(readDocID, writeDocID, []string{"docid"}, fieldCompression(compression)), - NewInt64OptionalField(readLinkBackward, writeLinkBackward, []string{"link", "backward"}, []int{1, 2}, optionalFieldCompression(compression)), - NewInt64OptionalField(readLinkForward, writeLinkForward, []string{"link", "forward"}, []int{1, 2}, optionalFieldCompression(compression)), + NewInt64OptionalField(readLinksBackward, writeLinksBackward, []string{"link", "backward"}, []int{1, 2}, optionalFieldCompression(compression)), + NewInt64OptionalField(readLinksForward, writeLinksForward, []string{"link", "forward"}, []int{1, 2}, optionalFieldCompression(compression)), NewStringOptionalField(readNamesLanguagesCode, writeNamesLanguagesCode, []string{"names", "languages", "code"}, []int{2, 2, 0}, optionalFieldCompression(compression)), NewStringOptionalField(readNamesLanguagesCountry, writeNamesLanguagesCountry, []string{"names", "languages", "country"}, []int{2, 2, 1}, optionalFieldCompression(compression)), NewStringOptionalField(readNamesURL, writeNamesURL, []string{"names", "url"}, []int{2, 1}, optionalFieldCompression(compression)), @@ -62,20 +62,20 @@ func writeDocID(x *Document, vals []int64) { x.DocID = vals[0] } -func readLinkBackward(x Document) ([]int64, []uint8, []uint8) { +func readLinksBackward(x Document) ([]int64, []uint8, []uint8) { var vals []int64 var defs, reps []uint8 var lastRep uint8 - if x.Link == nil { + if x.Links == nil { defs = append(defs, 0) reps = append(reps, lastRep) } else { - if len(x.Link.Backward) == 0 { + if len(x.Links.Backward) == 0 { defs = append(defs, 1) reps = append(reps, lastRep) } else { - for i0, x0 := range x.Link.Backward { + for i0, x0 := range x.Links.Backward { if i0 == 1 { lastRep = 1 } @@ -89,7 +89,7 @@ func readLinkBackward(x Document) ([]int64, []uint8, []uint8) { return vals, defs, reps } -func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { +func writeLinksBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -105,13 +105,13 @@ func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) switch def { case 1: - x.Link = &Link{} + x.Links = &Link{} case 2: switch rep { case 0: - x.Link = &Link{Backward: []int64{vals[nVals]}} + x.Links = &Link{Backward: []int64{vals[nVals]}} case 1: - x.Link.Backward = append(x.Link.Backward, vals[nVals]) + x.Links.Backward = append(x.Links.Backward, vals[nVals]) } nVals++ } @@ -120,20 +120,20 @@ func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) return nVals, nLevels } -func readLinkForward(x Document) ([]int64, []uint8, []uint8) { +func readLinksForward(x Document) ([]int64, []uint8, []uint8) { var vals []int64 var defs, reps []uint8 var lastRep uint8 - if x.Link == nil { + if x.Links == nil { defs = append(defs, 0) reps = append(reps, lastRep) } else { - if len(x.Link.Forward) == 0 { + if len(x.Links.Forward) == 0 { defs = append(defs, 1) reps = append(reps, lastRep) } else { - for i0, x0 := range x.Link.Forward { + for i0, x0 := range x.Links.Forward { if i0 == 1 { lastRep = 1 } @@ -147,7 +147,7 @@ func readLinkForward(x Document) ([]int64, []uint8, []uint8) { return vals, defs, reps } -func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { +func writeLinksForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -164,8 +164,14 @@ func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) switch def { case 2: switch rep { + case 0: + if x.Links == nil { + x.Links = &Link{Forward: []int64{vals[nVals]}} + } else { + x.Links.Forward = append(x.Links.Forward, vals[nVals]) + } default: - x.Link.Forward = append(x.Link.Forward, vals[nVals]) + x.Links.Forward = append(x.Links.Forward, vals[nVals]) } nVals++ } diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go index e8d1a1d..5136b10 100644 --- a/internal/dremel/write_repeated.go +++ b/internal/dremel/write_repeated.go @@ -113,6 +113,7 @@ func writeRequired(f fields.Field) string { func writeRepeated(i int, flds []fields.Field) string { f := flds[i] f.Seen = fields.Seen(i, flds) + fmt.Println("seen", f.Seen) wi := writeRepeatedInput{ Field: f, @@ -170,7 +171,7 @@ func useIfElse(def, rep int, seen fields.RepetitionTypes, f fields.Field) bool { } func writeCases(f fields.Field, seen fields.RepetitionTypes) []int { - var dfs []int + var defs []int start := 1 if seen.Repeated() { start = 1 + len(seen) @@ -182,9 +183,9 @@ func writeCases(f fields.Field, seen fields.RepetitionTypes) []int { } for def := start; def <= maxDef; def++ { - dfs = append(dfs, def) + defs = append(defs, def) } - return dfs + return defs } func nilField(i int, f fields.Field) string { diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index cc522c9..f2666d5 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -621,6 +621,41 @@ func TestWrite(t *testing.T) { } } + return nVals, nLevels +}`, + }, + { + name: "nested 2 deep", + fields: []fields.Field{ + {FieldNames: []string{"Hobby", "Skills", "Name"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, + {Type: "Person", TypeName: "string", FieldNames: []string{"Hobby", "Skills", "Difficulty"}, FieldTypes: []string{"Hobby", "Skill", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, + }, + result: `func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0: + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] + case 1: + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] + } + nVals++ + } + } + return nVals, nLevels }`, }, diff --git a/internal/fields/fields.go b/internal/fields/fields.go index efea9cb..3093fbe 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -1,25 +1,24 @@ package fields import ( - "bytes" "fmt" - "log" "strings" ) // Field holds metadata that is required by parquetgen in order // to generate code. type Field struct { - Type string - RepetitionTypes RepetitionTypes - FieldNames []string - ColumnNames []string - FieldTypes []string - Seen RepetitionTypes - TypeName string - FieldType string - ParquetType string - Category string + // Type of the top level struct + Type string + RepetitionType RepetitionType + FieldName string + ColumnName string + TypeName string + FieldType string + ParquetType string + Category string + Parent *Field + Children []Field } type input struct { @@ -28,40 +27,65 @@ type input struct { Append bool } -// Seen counts how many sub-fields have been previously processed -// so that some of the cases and if statements can be skipped when -// re-assembling records -func Seen(i int, flds []Field) []RepetitionType { - f := flds[i] - out := rts([]RepetitionType{}) - - l := len(f.FieldNames) - for _, fld := range flds[:i] { - end := l - if len(fld.FieldNames) <= l { - end = len(fld.FieldNames) - } - for i, n := range fld.FieldNames[:end] { - if n == f.FieldNames[i] { - out = out.add(i, fld.RepetitionTypes) - } - } +func (f Field) chain() []Field { + out := []Field{f} + for fld := f.Parent; fld != nil; fld = fld.Parent { + out = append(out, *fld) + } + return out +} + +func reverse(out []Field) []Field { + for i, j := 0, len(out)-1; i < j; i, j = i+1, j-1 { + out[i], out[j] = out[j], out[i] } + return out +} - return []RepetitionType(out) +func (f Field) FieldNames() []string { + var out []string + for _, fld := range reverse(f.chain()) { + out = append(out, fld.FieldName) + } + return out +} + +func (f Field) FieldTypes() []string { + var out []string + for _, fld := range reverse(f.chain()) { + out = append(out, fld.FieldType) + } + return out +} + +func (f Field) ColumnNames() []string { + var out []string + for _, fld := range reverse(f.chain()) { + out = append(out, fld.ColumnName) + } + return out +} + +func (f Field) RepetitionTypes() RepetitionTypes { + var out []RepetitionType + for _, fld := range reverse(f.chain()) { + out = append(out, fld.RepetitionType) + } + return out } // DefIndex calculates the index of the // nested field with the given definition level. func (f Field) DefIndex(def int) int { - var count int - for j, o := range f.RepetitionTypes { - if o == Optional || o == Repeated { + var count, i int + for _, fld := range reverse(f.chain()) { + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { count++ } if count == def { - return j + return i } + i++ } return def } @@ -70,8 +94,8 @@ func (f Field) DefIndex(def int) int { // level for the nested field. func (f Field) MaxDef() int { var out int - for _, o := range f.RepetitionTypes { - if o == Optional || o == Repeated { + for _, fld := range reverse(f.chain()) { + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { out++ } } @@ -82,8 +106,8 @@ func (f Field) MaxDef() int { // level for the nested field. func (f Field) MaxRep() int { var out int - for _, o := range f.RepetitionTypes { - if o == Repeated { + for _, fld := range reverse(f.chain()) { + if fld.RepetitionType == Repeated { out++ } } @@ -121,8 +145,9 @@ func (f Field) NilField(n int) (string, RepetitionType, int, int) { var j, reps int var o RepetitionType - for j, o = range f.RepetitionTypes { - fields = append(fields, f.FieldNames[j]) + fieldNames := f.FieldNames() + for j, o = range f.RepetitionTypes() { + fields = append(fields, fieldNames[j]) if o == Optional { count++ } else if o == Repeated { @@ -138,253 +163,167 @@ func (f Field) NilField(n int) (string, RepetitionType, int, int) { // Child returns a sub-field based on i func (f Field) Child(i int) Field { - return Field{ - RepetitionTypes: f.RepetitionTypes[i:], - FieldNames: f.FieldNames[i:], - FieldTypes: f.FieldTypes[i:], - } -} - -// Parent returns a parent field based on i -func (f Field) Parent(i int) Field { - return Field{ - RepetitionTypes: f.RepetitionTypes[:i], - FieldNames: f.FieldNames[:i], - FieldTypes: f.FieldTypes[:i], - } -} - -// Copy returns a deep copy of the field -func (f Field) Copy() Field { - return Field{ - RepetitionTypes: append(f.RepetitionTypes[:0:0], f.RepetitionTypes...), - FieldNames: append(f.FieldNames[:0:0], f.FieldNames...), - FieldTypes: append(f.FieldTypes[:0:0], f.FieldTypes...), - Seen: append(f.Seen[:0:0], f.Seen...), - } + return reverse(f.chain())[i] } // Repeated wraps RepetitionTypes.Repeated() func (f Field) Repeated() bool { - return f.RepetitionTypes.Repeated() + return f.RepetitionTypes().Repeated() } // Optional wraps RepetitionTypes.Optional() func (f Field) Optional() bool { - return f.RepetitionTypes.Optional() + return f.RepetitionTypes().Optional() } // Required wraps RepetitionTypes.Required() func (f Field) Required() bool { - return f.RepetitionTypes.Required() + return f.RepetitionTypes().Required() } // Init is called by parquetgen's templates to generate the code -// that writes to a struct's field (for example: x.Friend.Hobby = &Item{}) -func (f Field) Init(def, rep int) string { - md := f.MaxDef() - if rep > 0 { - var count int - s := Seen(1, []Field{f, f}) - for i, rt := range f.RepetitionTypes { - if rt == Repeated { - count++ - } - if count == rep { - f.Seen = s[:i] - } - } - } - - start, end := f.start(def, rep), f.end(def, rep) - flds := make([]field, len(f.RepetitionTypes[start:end])) +// that writes to a struct's field +// +// example: x.Friend.Hobby = &Item{} +func (f Field) Init(def, rep, nthChild int) string { + maxDef := f.MaxDef() + maxRep := f.MaxRep() + var defs, reps int + var fld Field - i := start - var j int - var nReps int - for _, rt := range f.RepetitionTypes[start:end] { - if rt == Repeated { - nReps++ - } - flds[j] = field{ - RT: rt, - Name: f.FieldNames[i], - Type: f.FieldTypes[i], - i: i, - start: start, - seen: f.Seen, - rep: rep, - nReps: nReps, - } + left, right := "%s", "%s" - i++ - j++ - } + chain := reverse(f.chain()) - // start with the innermost field - flds = reverse(flds) - - var remainder []field - if len(flds) > 1 { - remainder = flds[1:] - } - - if def == md { - if flds[0].Primitive() && f.RepetitionTypes.Repeated() { - flds[0].Val = "vals[nVals]" - } else if flds[0].Primitive() && !f.RepetitionTypes.Repeated() { - flds[0].Val = "vals[0]" + var i int + for i, fld = range chain { + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { + defs++ } - } - in := input{ - Parent: f.parent(start), - Val: flds[0].init(remainder), - Append: f.append(rep, flds[0]), - } - - var buf bytes.Buffer - fieldTpl.Execute(&buf, in) - return buf.String() -} - -func (f Field) append(rep int, fld field) bool { - return rep > 0 || - (f.Seen.Repeated() && fld.RT == Repeated) -} - -func (f Field) parent(start int) string { - names := make([]string, start+1) - var r int - l := len(f.FieldNames[:start+1]) - for i, n := range f.FieldNames[:start+1] { - if i < l-1 && f.RepetitionTypes[i] == Repeated { - n = fmt.Sprintf("%s[ind[%d]]", n, r) - r++ + if fld.RepetitionType == Repeated { + reps++ } - names[i] = n - } - return strings.Join(names, ".") -} -// Path creates gocode for initializing a string slice in a go template -func (f Field) Path() string { - out := make([]string, len(f.ColumnNames)) - for i, n := range f.ColumnNames { - out[i] = fmt.Sprintf(`"%s"`, n) - } - return strings.Join(out, ", ") -} - -// start calculates which nested field is -// being written to based on the definition -// level and which parent fields have already -// been written to by previous fields with -// common ancestors. -func (f Field) start(def, rep int) int { - di := f.DefIndex(def) - seen := f.Seen - if len(seen) > di { - seen = seen[:di+1] - } - - if len(f.RepetitionTypes)-1 > di { - for _, rt := range f.RepetitionTypes[di+1:] { - if rt >= Optional { - break + switch fld.RepetitionType { + case Required: + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.FieldName)) + case Optional: + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.FieldName)) + case Repeated: + if (rep > 0 && reps < rep) || (nthChild > 0 && !fld.Primitive()) { + left = fmt.Sprintf(left, fmt.Sprintf(".%s[ind[%d]]%%s", fld.FieldName, reps-1)) + } else { + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.FieldName)) } - di++ } - } - if rep == 0 { - rep = int(seen.MaxRep()) + 1 + if (defs >= def || ((rep == 0 && fld.RepetitionType != Required) || (rep > 0 && reps == rep))) && nthChild == 0 { + break + } } - var i, reps int - var rt RepetitionType - for i, rt = range f.RepetitionTypes[:di+1] { - if rt == Required { - continue + left = fmt.Sprintf(left, "") + defs = 0 + for j, fld := range chain[i:] { + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { + defs++ } - if rt == Repeated { + if j > 0 && fld.RepetitionType == Repeated { reps++ } - if reps == rep { - break + switch fld.RepetitionType { + case Required: + if fld.Primitive() { + if fld.Parent.RepetitionType == Repeated && rep < maxRep { //need one more case: + right = fmt.Sprintf(right, fmt.Sprintf("{%s: vals[nVals]}%%s", fld.FieldName)) + } else if fld.Parent.RepetitionType == Repeated { + right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[nVals]%%s", fld.FieldName)) + } else if nthChild > 0 { + right = fmt.Sprintf(right, "vals[0]%s") + } else { + right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[0]%%s", fld.FieldName)) + } + } else { + if fld.Parent.RepetitionType == Repeated && rep < maxRep { + right = fmt.Sprintf(right, fmt.Sprintf("{%s: %s{%%s}}", fld.FieldType, fld.FieldName)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("%s: %s{%%s}", fld.FieldType, fld.FieldName)) + } + } + case Optional: + if fld.Primitive() { + if nthChild == 0 && fld.Parent.Optional() && !fld.Parent.Repeated() { + right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[0])%%s", fld.FieldName, fld.FieldType)) + } else if fld.Parent.RepetitionType == Repeated { + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.FieldType)) + } else if fld.Parent.Repeated() && nthChild == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[nVals])%%s", fld.FieldName, fld.FieldType)) + } else if fld.Parent.Repeated() && nthChild > 0 { + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.FieldType)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[0])%%s", fld.FieldType)) + } + } else { + if j == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("&%s{%%s}", fld.FieldType)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("%s: &%s{%%s}", fld.FieldName, fld.FieldType)) + } + } + case Repeated: + if fld.Primitive() { + if rep == 0 && fld.Parent != nil && fld.Parent.RepetitionType == Repeated { + right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{vals[nVals]}}%%s", fld.FieldName, fld.FieldType)) + } else if fld.Parent == nil && rep == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.FieldType)) + } else if rep == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{vals[nVals]}%%s", fld.FieldName, fld.FieldType)) + } else if reps == rep { + right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, vals[nVals])%%s", left)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("[%s: []%s{vals[nVals]}]%%s", fld.FieldName, fld.FieldType)) + } + } else { + if rep == 0 && j == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{%%s}", fld.FieldType)) + } else if rep == 0 && reps == maxRep && fld.Parent != nil && fld.Parent.RepetitionType == Repeated { + right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{%%s}}", fld.FieldName, fld.FieldType)) + } else if rep == 0 && reps == maxRep { + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.FieldName, fld.FieldType)) + } else if reps == rep { + right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, %s{%%s})", left, fld.FieldType)) + } else { + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.FieldName, fld.FieldType)) + } + } } - if rt >= Optional && i >= len(seen) { + if defs >= def && fld.RepetitionType != Required && def < maxDef { break } } - return i -} - -func (f Field) end(def, rep int) int { - if def == f.MaxDef() { - return len(f.RepetitionTypes) - } - - s := f.start(def, rep) - - var i int - md := int(f.RepetitionTypes[:s].MaxDef()) - for _, rt := range f.RepetitionTypes[s:] { - if (rt == Optional || rt == Repeated) && i < def-md { - i++ - } - } - return s + i -} - -type field struct { - RT RepetitionType - Name string - Type string - Val string - i int - start int - seen RepetitionTypes - rep int - nReps int + right = fmt.Sprintf(right, "") + fmt.Printf("x%s = %s\n", left, right) + return fmt.Sprintf("x%s = %s", left, right) } -func (f field) init(flds []field) string { - var buf bytes.Buffer - err := initTpl.Execute(&buf, f) - if err != nil { - log.Fatal(err) - } - - if len(flds) == 0 { - return buf.String() - } - - f2 := flds[0] - var flds2 []field - if len(flds) > 1 { - flds2 = flds[1:] +// Path creates gocode for initializing a string slice in a go template +func (f Field) Path() string { + names := f.ColumnNames() + out := make([]string, len(names)) + for i, n := range names { + out[i] = fmt.Sprintf(`"%s"`, n) } - - f2.Val = fmt.Sprintf("%s: %s", f.Name, buf.String()) - return f2.init(flds2) -} - -// Slice is called by parquetgen's go templates to determine -// if the field is repeated or not. -func (f field) Slice() bool { - return (f.RT == Repeated && f.i != f.start) || - (f.RT == Repeated && f.rep == 0 && f.i == f.start && !f.seen.NRepeated(f.i+1) && !f.Primitive()) || - (f.RT == Repeated && f.rep == 0 && f.Primitive() && f.i == 0) + return strings.Join(out, ", ") } // Primitive is called in order to determine if the field is primitive or not. -func (f field) Primitive() bool { - return primitiveTypes[f.Type] +func (f Field) Primitive() bool { + return primitiveTypes[f.FieldType] } var primitiveTypes = map[string]bool{ diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 57b5cae..762fbf1 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -14,318 +14,233 @@ func TestFields(t *testing.T) { field fields.Field def int rep int - seen []fields.RepetitionType + nthChild int expected string }{ { - field: fields.Field{TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + field: fields.Field{FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional}}, + rep: 0, def: 1, - expected: "x.Link = &Link{}", + expected: "x.Links = &Link{}", }, { - field: fields.Field{TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - def: 2, + field: fields.Field{FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional}}, rep: 0, - expected: "x.Link = &Link{Backward: []int64{vals[nVals]}}", + def: 2, + expected: "x.Links = &Link{Backward: []int64{vals[nVals]}}", }, + { - field: fields.Field{TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + field: fields.Field{FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional}}, def: 2, rep: 1, - expected: "x.Link.Backward = append(x.Link.Backward, vals[nVals])", + expected: "x.Links.Backward = append(x.Links.Backward, vals[nVals])", }, { - field: fields.Field{TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + field: fields.Field{FieldName: "Forward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional}}, def: 2, - rep: 0, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", + rep: 1, + expected: "x.Links.Forward = append(x.Links.Forward, vals[nVals])", }, { - field: fields.Field{TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, + field: fields.Field{FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, def: 2, rep: 0, expected: "x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}}", }, { - field: fields.Field{TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, + field: fields.Field{FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, def: 2, rep: 1, expected: "x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}})", }, { - field: fields.Field{TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, + field: fields.Field{FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, def: 2, rep: 2, expected: "x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]})", }, { - field: fields.Field{TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - def: 1, - expected: "x.Hobby = &Hobby{}", - }, - { - field: fields.Field{TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - def: 2, - expected: "x.Hobby = &Hobby{Difficulty: pint32(vals[0])}", - }, - { - field: fields.Field{TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - def: 2, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Hobby.Difficulty = pint32(vals[0])", - }, - { - field: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - def: 1, - expected: "x.Hobby = &Hobby{Name: vals[0]}", - }, - { - field: fields.Field{TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - def: 1, - expected: "x.Hobby.Name = pstring(vals[0])", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, + field: fields.Field{FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, def: 1, rep: 1, expected: "x.Names = append(x.Names, Name{})", }, { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 1, - expected: "x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}})", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 0, - expected: "x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}}", - }, - { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - def: 2, - rep: 2, - expected: "x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]})", - }, - { - field: fields.Field{FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + field: fields.Field{FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional}}, def: 1, rep: 0, expected: "x.Link = &Link{}", }, { - field: fields.Field{FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + field: fields.Field{FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional}}, def: 2, rep: 0, - expected: "x.Link = &Link{Backward: []int64{vals[nVals]}}", + expected: "x.Link = &Link{Backward: []string{vals[nVals]}}", }, { - field: fields.Field{FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + field: fields.Field{FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional}}, def: 2, rep: 1, expected: "x.Link.Backward = append(x.Link.Backward, vals[nVals])", }, { - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, - def: 1, - rep: 1, - expected: "x.Names = append(x.Names, Name{})", - }, - { - - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, - def: 2, - rep: 0, - expected: "x.Names = []Name{{Language: Language{Codes: []string{vals[nVals]}}}}", - }, - { - - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, - def: 2, - rep: 1, - expected: "x.Names = append(x.Names, Name{Language: Language{Codes: []string{vals[nVals]}}})", - }, - { - - field: fields.Field{FieldNames: []string{"Name", "Languages", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated, fields.Repeated}}, - def: 2, - rep: 1, - expected: "x.Name.Languages = append(x.Name.Languages, Language{Codes: []string{vals[nVals]}})", - }, - { - - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, + field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Language", FieldType: "Language", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, def: 2, rep: 0, expected: "x.Names = []Name{{Language: Language{Codes: []string{vals[nVals]}}}}", }, { - - field: fields.Field{FieldNames: []string{"Name", "Languages", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated, fields.Repeated}}, + field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Required}}}, def: 2, rep: 0, expected: "x.Name.Languages = []Language{{Codes: []string{vals[nVals]}}}", }, { - - field: fields.Field{FieldNames: []string{"Names", "Language", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Repeated}}, + field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Language", FieldType: "Language", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, def: 2, rep: 2, expected: "x.Names[ind[0]].Language.Codes = append(x.Names[ind[0]].Language.Codes, vals[nVals])", }, { - field: fields.Field{FieldNames: []string{"Name", "Languages", "Codes"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated, fields.Repeated}}, + field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Required}}}, def: 2, rep: 2, expected: "x.Name.Languages[ind[0]].Codes = append(x.Name.Languages[ind[0]].Codes, vals[nVals])", }, { - field: fields.Field{FieldNames: []string{"Thing", "Names", "Languages", "Codes"}, FieldTypes: []string{"Thing", "Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated, fields.Repeated, fields.Repeated}}, + field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Thing", FieldType: "Thing", RepetitionType: fields.Required}}}}, def: 3, rep: 3, expected: "x.Thing.Names[ind[0]].Languages[ind[1]].Codes = append(x.Thing.Names[ind[0]].Languages[ind[1]].Codes, vals[nVals])", }, { - field: fields.Field{FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, + field: fields.Field{FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional}}, + def: 1, + expected: "x.Hobby = &Hobby{}", + }, + { + field: fields.Field{FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional}}, + def: 2, + expected: "x.Hobby = &Hobby{Difficulty: pint32(vals[0])}", + }, + { + field: fields.Field{FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional}}, + def: 2, + nthChild: 1, + expected: "x.Hobby.Difficulty = pint32(vals[0])", + }, + { + field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional}}, + def: 1, + expected: "x.Hobby = &Hobby{Name: vals[0]}", + }, + { + field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Required}}, + def: 1, + expected: "x.Hobby.Name = pstring(vals[0])", + }, + { + field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional}}, def: 1, expected: "x.Hobby = &Item{}", }, { - field: fields.Field{FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, + field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional}}, def: 2, expected: "x.Hobby = &Item{Name: pstring(vals[0])}", }, { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, + field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}, def: 3, expected: "x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}}", }, { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, + field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required}}}, def: 1, expected: "x.Friend.Hobby = &Item{}", }, { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, + field: fields.Field{FieldName: "Country", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, def: 1, rep: 1, expected: "x.Names = append(x.Names, Name{})", }, { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, + field: fields.Field{FieldName: "Country", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, def: 3, rep: 0, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, + nthChild: 1, expected: "x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals])", }, { - field: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - def: 3, - rep: 0, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Names[ind[0]].Languages = []Language{{Country: pstring(vals[nVals])}}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, + field: fields.Field{FieldName: "First", FieldType: "string", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}}, def: 1, expected: "x.Friend = &Entity{}", }, { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, + field: fields.Field{FieldName: "First", FieldType: "string", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}}, def: 2, expected: "x.Friend = &Entity{Hobby: &Item{}}", }, { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 2, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Friend.Hobby = &Item{}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, + field: fields.Field{FieldName: "First", FieldType: "string", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}}, def: 3, expected: "x.Friend = &Entity{Hobby: &Item{Name: &Name{First: vals[0]}}}", }, { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated}, - expected: "x.Friend.Hobby = &Item{Name: &Name{First: vals[0]}}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - expected: "x.Friend.Hobby.Name = &Name{First: vals[0]}", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Repeated}, - expected: "x.Friend.Hobby.Name.First = vals[0]", - }, - { - field: fields.Field{FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, + field: fields.Field{FieldName: "First", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}}, def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Repeated}, + nthChild: 1, expected: "x.Friend.Hobby.Name.First = pstring(vals[0])", }, { - field: fields.Field{FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + field: fields.Field{FieldName: "Forward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional}}, + rep: 1, def: 2, - seen: []fields.RepetitionType{fields.Repeated}, + nthChild: 1, expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", }, { - field: fields.Field{FieldNames: []string{"LuckyNumbers"}, FieldTypes: []string{"int64"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, + field: fields.Field{FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, def: 1, rep: 0, expected: "x.LuckyNumbers = []int64{vals[nVals]}", }, { - field: fields.Field{FieldNames: []string{"LuckyNumbers"}, FieldTypes: []string{"int64"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, + field: fields.Field{FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, def: 1, rep: 1, expected: "x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals])", }, { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, - def: 3, - expected: "x.A.B = &B{C: C{D: []D{{E: E{F: pstring(vals[nVals])}}}}}", - }, - { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, + field: fields.Field{FieldName: "F", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{ + FieldName: "E", FieldType: "E", RepetitionType: fields.Required, Parent: &fields.Field{ + FieldName: "D", FieldType: "D", RepetitionType: fields.Repeated, Parent: &fields.Field{ + FieldName: "C", FieldType: "C", RepetitionType: fields.Required, Parent: &fields.Field{ + FieldName: "B", FieldType: "B", RepetitionType: fields.Optional, Parent: &fields.Field{ + FieldName: "A", FieldType: "A", RepetitionType: fields.Required}}}}}}, def: 3, - seen: []fields.RepetitionType{fields.Repeated}, + rep: 0, expected: "x.A.B = &B{C: C{D: []D{{E: E{F: pstring(vals[nVals])}}}}}", }, { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - expected: "x.A.B.C.D = []D{{E: E{F: pstring(vals[nVals])}}}", - }, - { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, + field: fields.Field{FieldName: "F", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{ + FieldName: "E", FieldType: "E", RepetitionType: fields.Required, Parent: &fields.Field{ + FieldName: "D", FieldType: "D", RepetitionType: fields.Repeated, Parent: &fields.Field{ + FieldName: "C", FieldType: "C", RepetitionType: fields.Required, Parent: &fields.Field{ + FieldName: "B", FieldType: "B", RepetitionType: fields.Optional, Parent: &fields.Field{ + FieldName: "A", FieldType: "A", RepetitionType: fields.Required}}}}}}, def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Repeated}, - expected: "x.A.B.C.D = []D{{E: E{F: pstring(vals[nVals])}}}", - }, - { - field: fields.Field{FieldNames: []string{"A", "B", "C", "D", "E", "F"}, FieldTypes: []string{"A", "B", "C", "D", "E", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required, fields.Repeated, fields.Required, fields.Optional}}, - def: 3, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Repeated, fields.Repeated}, + nthChild: 1, expected: "x.A.B.C.D[ind[0]].E.F = pstring(vals[nVals])", }, } for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d %v def %d rep %d %v", i, tc.field.FieldNames, tc.def, tc.rep, tc.seen), func(t *testing.T) { + t.Run(fmt.Sprintf("%02d %v def %d rep %d", i, tc.field.FieldNames(), tc.def, tc.rep), func(t *testing.T) { field := tc.field - field.Seen = tc.seen - s := field.Init(tc.def, tc.rep) + s := field.Init(tc.def, tc.rep, tc.nthChild) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) @@ -333,114 +248,114 @@ func TestFields(t *testing.T) { } } -func TestSeen(t *testing.T) { - testCases := []struct { - flds []fields.Field - expected []fields.RepetitionType - }{ - { - flds: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - expected: []fields.RepetitionType{fields.Optional}, - }, - { - flds: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required}}, - {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated}}, - }, - expected: []fields.RepetitionType{fields.Required}, - }, - { - flds: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {FieldNames: []string{"Link", "Backward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - expected: []fields.RepetitionType{fields.Repeated}, - }, - { - flds: []fields.Field{ - {FieldNames: []string{"Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, - expected: []fields.RepetitionType{}, - }, - { - flds: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {FieldNames: []string{"Link", "Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Optional}}, - {FieldNames: []string{"Link", "Name", "Last"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, - }, - expected: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - }, - } +// func TestSeen(t *testing.T) { +// testCases := []struct { +// flds []fields.Field +// expected []fields.RepetitionType +// }{ +// { +// flds: []fields.Field{ +// {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, +// {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, +// }, +// expected: []fields.RepetitionType{fields.Optional}, +// }, +// { +// flds: []fields.Field{ +// {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required}}, +// {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated}}, +// }, +// expected: []fields.RepetitionType{fields.Required}, +// }, +// { +// flds: []fields.Field{ +// {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, +// {FieldNames: []string{"Link", "Backward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, +// {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, +// }, +// expected: []fields.RepetitionType{fields.Repeated}, +// }, +// { +// flds: []fields.Field{ +// {FieldNames: []string{"Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, +// {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, +// }, +// expected: []fields.RepetitionType{}, +// }, +// { +// flds: []fields.Field{ +// {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, +// {FieldNames: []string{"Link", "Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Optional}}, +// {FieldNames: []string{"Link", "Name", "Last"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, +// }, +// expected: []fields.RepetitionType{fields.Repeated, fields.Repeated}, +// }, +// } - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { - i := len(tc.flds) - 1 - assert.Equal(t, tc.expected, fields.Seen(i, tc.flds)) - }) - } -} +// for i, tc := range testCases { +// t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { +// i := len(tc.flds) - 1 +// assert.Equal(t, tc.expected, fields.Seen(i, tc.flds)) +// }) +// } +// } -func TestChild(t *testing.T) { - f := fields.Field{ - FieldNames: []string{"Friends", "Name", "First"}, - FieldTypes: []string{"Being", "Name", "string"}, - RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Optional}, - } - ch := fields.Field{ - FieldNames: []string{"Name", "First"}, - FieldTypes: []string{"Name", "string"}, - RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}, - } - assert.Equal(t, ch, f.Child(1)) -} +// func TestChild(t *testing.T) { +// f := fields.Field{ +// FieldNames: []string{"Friends", "Name", "First"}, +// FieldTypes: []string{"Being", "Name", "string"}, +// RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Optional}, +// } +// ch := fields.Field{ +// FieldNames: []string{"Name", "First"}, +// FieldTypes: []string{"Name", "string"}, +// RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}, +// } +// assert.Equal(t, ch, f.Child(1)) +// } -func TestRepCases(t *testing.T) { - testCases := []struct { - f fields.Field - seen []fields.RepetitionType - expected []fields.RepCase - }{ - { - f: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - expected: []fields.RepCase{{Case: "case 0:", Rep: 0}, {Case: "case 1:", Rep: 1}, {Case: "case 2:", Rep: 2}}, - }, - { - f: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, - expected: []fields.RepCase{{Case: "default:", Rep: 0}}, - }, - } +// func TestRepCases(t *testing.T) { +// testCases := []struct { +// f fields.Field +// seen []fields.RepetitionType +// expected []fields.RepCase +// }{ +// { +// f: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, +// expected: []fields.RepCase{{Case: "case 0:", Rep: 0}, {Case: "case 1:", Rep: 1}, {Case: "case 2:", Rep: 2}}, +// }, +// { +// f: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, +// seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, +// expected: []fields.RepCase{{Case: "default:", Rep: 0}}, +// }, +// } - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { - assert.Equal(t, tc.expected, tc.f.RepCases(tc.seen)) - }) - } -} +// for i, tc := range testCases { +// t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { +// assert.Equal(t, tc.expected, tc.f.RepCases(tc.seen)) +// }) +// } +// } -func TestNilField(t *testing.T) { - f := fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}} - name, rt, i, reps := f.NilField(1) - assert.Equal(t, "Names.Languages", name) - assert.Equal(t, fields.Repeated, rt) - assert.Equal(t, 1, i) - assert.Equal(t, 2, reps) -} +// func TestNilField(t *testing.T) { +// f := fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}} +// name, rt, i, reps := f.NilField(1) +// assert.Equal(t, "Names.Languages", name) +// assert.Equal(t, fields.Repeated, rt) +// assert.Equal(t, 1, i) +// assert.Equal(t, 2, reps) +// } -func TestField(t *testing.T) { - f := fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}} - assert.True(t, f.Repeated()) - assert.True(t, f.Optional()) - assert.False(t, f.Required()) -} +// func TestField(t *testing.T) { +// f := fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}} +// assert.True(t, f.Repeated()) +// assert.True(t, f.Optional()) +// assert.False(t, f.Required()) +// } -func TestRepetitionTypes(t *testing.T) { - rts := fields.RepetitionTypes([]fields.RepetitionType{fields.Repeated, fields.Optional}) - assert.Equal(t, rts.Def(1), fields.Repeated) - assert.Equal(t, rts.Def(2), fields.Optional) -} +// func TestRepetitionTypes(t *testing.T) { +// rts := fields.RepetitionTypes([]fields.RepetitionType{fields.Repeated, fields.Optional}) +// assert.Equal(t, rts.Def(1), fields.Repeated) +// assert.Equal(t, rts.Def(2), fields.Optional) +// } diff --git a/internal/fields/repetition.go b/internal/fields/repetition.go index 1381933..0ef54d7 100644 --- a/internal/fields/repetition.go +++ b/internal/fields/repetition.go @@ -99,15 +99,6 @@ func (r RepetitionTypes) NRepeated(i int) bool { return false } -func reverse(in []field) []field { - flds := append(in[:0:0], in...) - for i := len(flds)/2 - 1; i >= 0; i-- { - opp := len(flds) - 1 - i - flds[i], flds[opp] = flds[opp], flds[i] - } - return flds -} - type rts []RepetitionType func (r rts) add(i int, rts []RepetitionType) rts { diff --git a/internal/gen/funcs.go b/internal/gen/funcs.go index 9ceefb3..79ef561 100644 --- a/internal/gen/funcs.go +++ b/internal/gen/funcs.go @@ -29,7 +29,7 @@ var ( return "fieldCompression" }, "funcName": func(f fields.Field) string { - return strings.Join(f.FieldNames, "") + return strings.Join(f.FieldNames(), "") }, "join": func(names []string) string { return strings.Join(names, ".") @@ -74,11 +74,11 @@ var ( } return out }, - "columnName": func(f fields.Field) string { return strings.Join(f.ColumnNames, ".") }, + "columnName": func(f fields.Field) string { return strings.Join(f.ColumnNames(), ".") }, "writeFunc": dremel.Write, "readFunc": dremel.Read, - "writeFuncName": func(f fields.Field) string { return fmt.Sprintf("write%s", strings.Join(f.FieldNames, "")) }, - "readFuncName": func(f fields.Field) string { return fmt.Sprintf("read%s", strings.Join(f.FieldNames, "")) }, + "writeFuncName": func(f fields.Field) string { return fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")) }, + "readFuncName": func(f fields.Field) string { return fmt.Sprintf("read%s", strings.Join(f.FieldNames(), "")) }, "parquetType": func(f fields.Field) string { if f.Optional() { return "parquet.OptionalField" diff --git a/internal/parse/parse.go b/internal/parse/parse.go index 560ea0c..6b07045 100644 --- a/internal/parse/parse.go +++ b/internal/parse/parse.go @@ -187,7 +187,7 @@ func doGetFields(n map[string]ast.Node) (map[string][]field, error) { return fields, nil } -func getField(name string, x ast.Node) field { +func getField(name string, x ast.Node, parent *flds.Field) field { var typ, tag string var optional, repeated bool ast.Inspect(x, func(n ast.Node) bool { @@ -220,15 +220,24 @@ func getField(name string, x ast.Node) field { } fn, cat, pt := lookupTypeAndCategory(typ, optional, repeated) + + f := flds.Field{ + FieldName: name, + FieldType: typ, + ColumnName: tag, + TypeName: getTypeName(typ, optional), + Type: fn, + ParquetType: pt, + Category: cat, + Parent: parent, + } + + if parent != nil { + parent.Children = append(parent.Children, f) + } + return field{ - Field: flds.Field{ - FieldNames: []string{name}, - FieldTypes: []string{typ}, - ColumnNames: []string{tag}, - TypeName: getTypeName(typ, optional), - FieldType: fn, - ParquetType: pt, - Category: cat}, + Field: f, fieldName: name, fieldType: typ, omit: tag == "-", From 2ef6fac94003232b01e99366cccd07fc61afe5a4 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Mon, 7 Jun 2021 07:49:26 -0600 Subject: [PATCH 04/25] wip --- internal/fields/fields.go | 1 + internal/parse/fields_test.go | 538 ++++++++++++++++++++-------------- internal/parse/parse.go | 233 ++++++++------- internal/parse/parse_test.go | 264 +++++++++-------- 4 files changed, 573 insertions(+), 463 deletions(-) diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 3093fbe..b882947 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -18,6 +18,7 @@ type Field struct { ParquetType string Category string Parent *Field + Embedded bool Children []Field } diff --git a/internal/parse/fields_test.go b/internal/parse/fields_test.go index 668880d..9ab8360 100644 --- a/internal/parse/fields_test.go +++ b/internal/parse/fields_test.go @@ -24,14 +24,14 @@ func TestField(t *testing.T) { testCases := []testInput{ { - f: fields.Field{FieldNames: []string{"Friends", "Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Optional}}, + f: fields.Field{FieldName: "First", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Name", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Friends", RepetitionType: fields.Repeated}}}, expected: []string{ "Friends", "Friends.Name.First", }, }, { - f: fields.Field{FieldNames: []string{"Friend", "Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Optional}}, + f: fields.Field{FieldName: "First", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Name", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Friends", RepetitionType: fields.Required}}}, expected: []string{ "Friend.Name.First", }, @@ -57,7 +57,7 @@ func TestFields(t *testing.T) { type testInput struct { name string typ string - expected []fields.Field + expected fields.Field errors []error } @@ -65,233 +65,319 @@ func TestFields(t *testing.T) { { name: "flat", typ: "Being", - expected: []fields.Field{ - {Type: "Being", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Being", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + expected: fields.Field{ + Type: "Being", + FieldName: "Being", + FieldType: "Being", + ColumnName: "Being", + TypeName: "Being", + Children: []fields.Field{ + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }, }, }, { name: "private fields", typ: "Private", - expected: []fields.Field{ - {Type: "Private", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Private", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + expected: fields.Field{ + Type: "Private", + FieldName: "Private", + FieldType: "Private", + ColumnName: "Private", + TypeName: "Private", + Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }, }, }, { name: "nested struct", typ: "Nested", - expected: []fields.Field{ - {Type: "Nested", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Being", "ID"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required}}, - {Type: "Nested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Being", "Age"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Nested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + expected: fields.Field{ + Type: "Nested", + FieldName: "Nested", + FieldType: "Nested", + ColumnName: "Nested", + TypeName: "Nested", + Children: []fields.Field{ + {Type: "Being", TypeName: "Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, }, errors: []error{}, }, { name: "nested struct with name that doesn't match the struct type", typ: "Nested2", - expected: []fields.Field{ - {Type: "Nested2", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Info", "ID"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Info", "ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required}}, - {Type: "Nested2", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Info", "Age"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Info", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Nested2", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + expected: fields.Field{ + Type: "Nested2", + FieldName: "Nested2", + FieldType: "Nested2", + ColumnName: "Nested2", + TypeName: "Nested2", + Children: []fields.Field{ + {Type: "Being", TypeName: "Being", FieldName: "Info", FieldType: "Being", ColumnName: "Info", RepetitionType: fields.Required, Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, }, errors: []error{}, }, { name: "2 deep nested struct", typ: "DoubleNested", - expected: []fields.Field{ - {Type: "DoubleNested", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Nested", "Being", "ID"}, FieldTypes: []string{"Nested", "Being", "int32"}, ColumnNames: []string{"Nested", "Being", "ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}, - {Type: "DoubleNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Nested", "Being", "Age"}, FieldTypes: []string{"Nested", "Being", "int32"}, ColumnNames: []string{"Nested", "Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Optional}}, - {Type: "DoubleNested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Nested", "Anniversary"}, FieldTypes: []string{"Nested", "uint64"}, ColumnNames: []string{"Nested", "Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, + expected: fields.Field{ + Type: "DoubleNested", + FieldName: "DoubleNested", + FieldType: "DoubleNested", + ColumnName: "DoubleNested", + TypeName: "DoubleNested", + Children: []fields.Field{ + { + Type: "Nested", + FieldName: "Nested", + FieldType: "Nested", + ColumnName: "Nested", + TypeName: "Nested", + Children: []fields.Field{ + {Type: "Being", TypeName: "Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + }, }, errors: []error{}, }, { name: "2 deep optional nested struct", typ: "OptionalDoubleNested", - expected: []fields.Field{ - {Type: "OptionalDoubleNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"OptionalNested", "Being", "ID"}, FieldTypes: []string{"OptionalNested", "Being", "int32"}, ColumnNames: []string{"OptionalNested", "Being", "ID"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Required}}, - {Type: "OptionalDoubleNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"OptionalNested", "Being", "Age"}, FieldTypes: []string{"OptionalNested", "Being", "int32"}, ColumnNames: []string{"OptionalNested", "Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, - {Type: "OptionalDoubleNested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"OptionalNested", "Anniversary"}, FieldTypes: []string{"OptionalNested", "uint64"}, ColumnNames: []string{"OptionalNested", "Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, + expected: fields.Field{ + Type: "OptionalDoubleNested", + FieldName: "OptionalDoubleNested", + FieldType: "OptionalDoubleNested", + ColumnName: "OptionalDoubleNested", + TypeName: "OptionalDoubleNested", + Children: []fields.Field{ + { + Type: "OptionalNested", + FieldName: "OptionalNested", + FieldType: "OptionalNested", + ColumnName: "OptionalNested", + TypeName: "OptionalNested", + Children: []fields.Field{ + {Type: "Being", TypeName: "*Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + }, }, errors: []error{}, }, - { - name: "optional nested struct", - typ: "OptionalNested", - expected: []fields.Field{ - {Type: "OptionalNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Being", "ID"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "ID"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "OptionalNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Being", "Age"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - {Type: "OptionalNested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - errors: []error{}, - }, - { - name: "optional nested struct v2", - typ: "OptionalNested2", - expected: []fields.Field{ - {Type: "OptionalNested2", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Being", "Name"}, FieldTypes: []string{"Thing", "string"}, ColumnNames: []string{"Being", "Name"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "OptionalNested2", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - errors: []error{}, - }, - { - name: "unsupported fields", - typ: "Unsupported", - errors: []error{fmt.Errorf("unsupported type: Time")}, - expected: []fields.Field{ - {Type: "Unsupported", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Unsupported", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "unsupported fields mixed in with supported and embedded", - typ: "SupportedAndUnsupported", - expected: []fields.Field{ - {Type: "SupportedAndUnsupported", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "SupportedAndUnsupported", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "SupportedAndUnsupported", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "SupportedAndUnsupported", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - errors: []error{ - fmt.Errorf("unsupported type: T1"), - fmt.Errorf("unsupported type: T2"), - }, - }, - { - name: "embedded", - typ: "Person", - expected: []fields.Field{ - {Type: "Person", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Int64OptionalField", ParquetType: "Int64Type", TypeName: "*int64", FieldNames: []string{"Sadness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Sadness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Code"}, FieldTypes: []string{"string"}, ColumnNames: []string{"Code"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Float32Field", ParquetType: "Float32Type", TypeName: "float32", FieldNames: []string{"Funkiness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Funkiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Float32OptionalField", ParquetType: "Float32Type", TypeName: "*float32", FieldNames: []string{"Lameness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Lameness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", FieldType: "BoolOptionalField", ParquetType: "BoolType", TypeName: "*bool", FieldNames: []string{"Keen"}, FieldTypes: []string{"bool"}, ColumnNames: []string{"Keen"}, Category: "boolOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", FieldType: "Uint32Field", ParquetType: "Uint32Type", TypeName: "uint32", FieldNames: []string{"Birthday"}, FieldTypes: []string{"uint32"}, ColumnNames: []string{"Birthday"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Person", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "embedded preserve order", - typ: "NewOrderPerson", - expected: []fields.Field{ - {Type: "NewOrderPerson", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Int64OptionalField", ParquetType: "Int64Type", TypeName: "*int64", FieldNames: []string{"Sadness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Sadness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "NewOrderPerson", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Code"}, FieldTypes: []string{"string"}, ColumnNames: []string{"Code"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Float32Field", ParquetType: "Float32Type", TypeName: "float32", FieldNames: []string{"Funkiness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Funkiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Float32OptionalField", ParquetType: "Float32Type", TypeName: "*float32", FieldNames: []string{"Lameness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Lameness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "NewOrderPerson", FieldType: "BoolOptionalField", ParquetType: "BoolType", TypeName: "*bool", FieldNames: []string{"Keen"}, FieldTypes: []string{"bool"}, ColumnNames: []string{"Keen"}, Category: "boolOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "NewOrderPerson", FieldType: "Uint32Field", ParquetType: "Uint32Type", TypeName: "uint32", FieldNames: []string{"Birthday"}, FieldTypes: []string{"uint32"}, ColumnNames: []string{"Birthday"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "NewOrderPerson", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "NewOrderPerson", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "tags", - typ: "Tagged", - expected: []fields.Field{ - {Type: "Tagged", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Tagged", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Name"}, FieldTypes: []string{"string"}, ColumnNames: []string{"name"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - }, - }, - { - name: "omit tag", - typ: "IgnoreMe", - expected: []fields.Field{ - {Type: "IgnoreMe", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - }, - }, + // { + // name: "optional nested struct", + // typ: "OptionalNested", + // expected: []fields.Field{ + // {Type: "OptionalNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Being", "ID"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "ID"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, + // {Type: "OptionalNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Being", "Age"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, + // {Type: "OptionalNested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // }, + // errors: []error{}, + // }, + // { + // name: "optional nested struct v2", + // typ: "OptionalNested2", + // expected: []fields.Field{ + // {Type: "OptionalNested2", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Being", "Name"}, FieldTypes: []string{"Thing", "string"}, ColumnNames: []string{"Being", "Name"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, + // {Type: "OptionalNested2", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // }, + // errors: []error{}, + // }, + // { + // name: "unsupported fields", + // typ: "Unsupported", + // errors: []error{fmt.Errorf("unsupported type: Time")}, + // expected: []fields.Field{ + // {Type: "Unsupported", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Unsupported", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // }, + // }, + // { + // name: "unsupported fields mixed in with supported and embedded", + // typ: "SupportedAndUnsupported", + // expected: []fields.Field{ + // {Type: "SupportedAndUnsupported", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "SupportedAndUnsupported", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "SupportedAndUnsupported", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "SupportedAndUnsupported", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // }, + // errors: []error{ + // fmt.Errorf("unsupported type: T1"), + // fmt.Errorf("unsupported type: T2"), + // }, + // }, + // { + // name: "embedded", + // typ: "Person", + // expected: []fields.Field{ + // {Type: "Person", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Person", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "Person", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Person", FieldType: "Int64OptionalField", ParquetType: "Int64Type", TypeName: "*int64", FieldNames: []string{"Sadness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Sadness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "Person", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Code"}, FieldTypes: []string{"string"}, ColumnNames: []string{"Code"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Person", FieldType: "Float32Field", ParquetType: "Float32Type", TypeName: "float32", FieldNames: []string{"Funkiness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Funkiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Person", FieldType: "Float32OptionalField", ParquetType: "Float32Type", TypeName: "*float32", FieldNames: []string{"Lameness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Lameness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "Person", FieldType: "BoolOptionalField", ParquetType: "BoolType", TypeName: "*bool", FieldNames: []string{"Keen"}, FieldTypes: []string{"bool"}, ColumnNames: []string{"Keen"}, Category: "boolOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "Person", FieldType: "Uint32Field", ParquetType: "Uint32Type", TypeName: "uint32", FieldNames: []string{"Birthday"}, FieldTypes: []string{"uint32"}, ColumnNames: []string{"Birthday"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Person", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // }, + // }, + // { + // name: "embedded preserve order", + // typ: "NewOrderPerson", + // expected: []fields.Field{ + // {Type: "NewOrderPerson", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "NewOrderPerson", FieldType: "Int64OptionalField", ParquetType: "Int64Type", TypeName: "*int64", FieldNames: []string{"Sadness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Sadness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "NewOrderPerson", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Code"}, FieldTypes: []string{"string"}, ColumnNames: []string{"Code"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "NewOrderPerson", FieldType: "Float32Field", ParquetType: "Float32Type", TypeName: "float32", FieldNames: []string{"Funkiness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Funkiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "NewOrderPerson", FieldType: "Float32OptionalField", ParquetType: "Float32Type", TypeName: "*float32", FieldNames: []string{"Lameness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Lameness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "NewOrderPerson", FieldType: "BoolOptionalField", ParquetType: "BoolType", TypeName: "*bool", FieldNames: []string{"Keen"}, FieldTypes: []string{"bool"}, ColumnNames: []string{"Keen"}, Category: "boolOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "NewOrderPerson", FieldType: "Uint32Field", ParquetType: "Uint32Type", TypeName: "uint32", FieldNames: []string{"Birthday"}, FieldTypes: []string{"uint32"}, ColumnNames: []string{"Birthday"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "NewOrderPerson", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "NewOrderPerson", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "NewOrderPerson", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // }, + // }, + // { + // name: "tags", + // typ: "Tagged", + // expected: []fields.Field{ + // {Type: "Tagged", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Tagged", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Name"}, FieldTypes: []string{"string"}, ColumnNames: []string{"name"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // }, + // }, + // { + // name: "omit tag", + // typ: "IgnoreMe", + // expected: []fields.Field{ + // {Type: "IgnoreMe", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // }, + // }, { name: "repeated", typ: "Slice", - expected: []fields.Field{ - {Type: "Slice", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, + expected: fields.Field{ + Type: "Slice", + FieldName: "Slice", + FieldType: "Slice", + ColumnName: "Slice", + TypeName: "Slice", + Children: []fields.Field{ + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "IDs", ColumnName: "ids", Category: "numericOptional", RepetitionType: fields.Repeated}, + }, }, }, { name: "repeated v2", typ: "Slice2", - expected: []fields.Field{ - {Type: "Slice2", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice2", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - }, - }, - { - name: "repeated v2", - typ: "Slice3", - expected: []fields.Field{ - {Type: "Slice3", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice3", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - {Type: "Slice3", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - }, - }, - { - name: "nested and repeated", - typ: "Slice4", - expected: []fields.Field{ - {Type: "Slice4", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice4", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Hobbies", "Name"}, FieldTypes: []string{"Hobby", "string"}, ColumnNames: []string{"Hobbies", "Name"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required}}, - }, - }, - { - name: "nested and repeated v2", - typ: "Slice5", - expected: []fields.Field{ - {Type: "Slice5", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice5", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Hobby", "Names"}, FieldTypes: []string{"Hobby2", "string"}, ColumnNames: []string{"hobby", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated}}, - }, - }, - { - name: "repeated and repeated", - typ: "Slice6", - expected: []fields.Field{ - {Type: "Slice6", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - {Type: "Slice6", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Hobbies", "Names"}, FieldTypes: []string{"Hobby2", "string"}, ColumnNames: []string{"hobbies", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated}}, - }, - }, - { - name: "nested repeated and repeated", - typ: "Slice7", - expected: []fields.Field{ - {Type: "Slice7", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Thing", "ID"}, FieldTypes: []string{"Slice6", "int32"}, ColumnNames: []string{"thing", "id"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "Slice7", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Thing", "Hobbies", "Names"}, FieldTypes: []string{"Slice6", "Hobby2", "string"}, ColumnNames: []string{"thing", "hobbies", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Repeated}}, - }, - }, - { - name: "dremel paper example", - typ: "Document", - expected: []fields.Field{ - {Type: "Document", FieldNames: []string{"DocID"}, FieldTypes: []string{"int64"}, TypeName: "int64", FieldType: "Int64Field", ParquetType: "Int64Type", ColumnNames: []string{"DocID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{0}}, - {Type: "Document", FieldNames: []string{"Links", "Backward"}, FieldTypes: []string{"Link", "int64"}, TypeName: "[]int64", FieldType: "Int64OptionalField", ParquetType: "Int64Type", ColumnNames: []string{"Links", "Backward"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 2}}, - {Type: "Document", FieldNames: []string{"Links", "Forward"}, FieldTypes: []string{"Link", "int64"}, TypeName: "[]int64", FieldType: "Int64OptionalField", ParquetType: "Int64Type", ColumnNames: []string{"Links", "Forward"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 2}}, - {Type: "Document", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, TypeName: "string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "Languages", "Code"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 2, 0}}, - {Type: "Document", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, TypeName: "*string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "Languages", "Country"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 2, 1}}, - {Type: "Document", FieldNames: []string{"Names", "URL"}, FieldTypes: []string{"Name", "string"}, TypeName: "*string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "URL"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 1}}, - }, - }, - { - name: "embedded embedded embedded", - typ: "A", - expected: []fields.Field{ - {Type: "A", FieldNames: []string{"D"}, FieldTypes: []string{"int32"}, TypeName: "int32", FieldType: "Int32Field", ParquetType: "Int32Type", ColumnNames: []string{"D"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{0}}, - {Type: "A", FieldNames: []string{"C"}, FieldTypes: []string{"string"}, TypeName: "string", FieldType: "StringField", ParquetType: "StringType", ColumnNames: []string{"C"}, Category: "string", RepetitionTypes: []fields.RepetitionType{0}}, - {Type: "A", FieldNames: []string{"B"}, FieldTypes: []string{"bool"}, TypeName: "bool", FieldType: "BoolField", ParquetType: "BoolType", ColumnNames: []string{"B"}, Category: "bool", RepetitionTypes: []fields.RepetitionType{0}}, - {Type: "A", FieldNames: []string{"Name"}, FieldTypes: []string{"string"}, TypeName: "string", FieldType: "StringField", ParquetType: "StringType", ColumnNames: []string{"Name"}, Category: "string", RepetitionTypes: []fields.RepetitionType{0}}, + expected: fields.Field{ + Type: "Slice2", + FieldName: "Slice2", + FieldType: "Slice2", + ColumnName: "Slice2", + TypeName: "Slice2", + Children: []fields.Field{ + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "IDs", ColumnName: "ids", Category: "numericOptional", RepetitionType: fields.Repeated}, + }, }, + // expected: []fields.Field{ + // {Type: "Slice2", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Slice2", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, + // }, }, + // { + // name: "repeated v2", + // typ: "Slice3", + // expected: []fields.Field{ + // {Type: "Slice3", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Slice3", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, + // {Type: "Slice3", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // }, + // }, + // { + // name: "nested and repeated", + // typ: "Slice4", + // expected: []fields.Field{ + // {Type: "Slice4", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Slice4", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Hobbies", "Name"}, FieldTypes: []string{"Hobby", "string"}, ColumnNames: []string{"Hobbies", "Name"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required}}, + // }, + // }, + // { + // name: "nested and repeated v2", + // typ: "Slice5", + // expected: []fields.Field{ + // {Type: "Slice5", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Slice5", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Hobby", "Names"}, FieldTypes: []string{"Hobby2", "string"}, ColumnNames: []string{"hobby", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated}}, + // }, + // }, + // { + // name: "repeated and repeated", + // typ: "Slice6", + // expected: []fields.Field{ + // {Type: "Slice6", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, + // {Type: "Slice6", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Hobbies", "Names"}, FieldTypes: []string{"Hobby2", "string"}, ColumnNames: []string{"hobbies", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated}}, + // }, + // }, + // { + // name: "nested repeated and repeated", + // typ: "Slice7", + // expected: []fields.Field{ + // {Type: "Slice7", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Thing", "ID"}, FieldTypes: []string{"Slice6", "int32"}, ColumnNames: []string{"thing", "id"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, + // {Type: "Slice7", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Thing", "Hobbies", "Names"}, FieldTypes: []string{"Slice6", "Hobby2", "string"}, ColumnNames: []string{"thing", "hobbies", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Repeated}}, + // }, + // }, + // { + // name: "dremel paper example", + // typ: "Document", + // expected: []fields.Field{ + // {Type: "Document", FieldNames: []string{"DocID"}, FieldTypes: []string{"int64"}, TypeName: "int64", FieldType: "Int64Field", ParquetType: "Int64Type", ColumnNames: []string{"DocID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{0}}, + // {Type: "Document", FieldNames: []string{"Links", "Backward"}, FieldTypes: []string{"Link", "int64"}, TypeName: "[]int64", FieldType: "Int64OptionalField", ParquetType: "Int64Type", ColumnNames: []string{"Links", "Backward"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 2}}, + // {Type: "Document", FieldNames: []string{"Links", "Forward"}, FieldTypes: []string{"Link", "int64"}, TypeName: "[]int64", FieldType: "Int64OptionalField", ParquetType: "Int64Type", ColumnNames: []string{"Links", "Forward"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 2}}, + // {Type: "Document", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, TypeName: "string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "Languages", "Code"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 2, 0}}, + // {Type: "Document", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, TypeName: "*string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "Languages", "Country"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 2, 1}}, + // {Type: "Document", FieldNames: []string{"Names", "URL"}, FieldTypes: []string{"Name", "string"}, TypeName: "*string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "URL"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 1}}, + // }, + // }, + // { + // name: "embedded embedded embedded", + // typ: "A", + // expected: []fields.Field{ + // {Type: "A", FieldNames: []string{"D"}, FieldTypes: []string{"int32"}, TypeName: "int32", FieldType: "Int32Field", ParquetType: "Int32Type", ColumnNames: []string{"D"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{0}}, + // {Type: "A", FieldNames: []string{"C"}, FieldTypes: []string{"string"}, TypeName: "string", FieldType: "StringField", ParquetType: "StringType", ColumnNames: []string{"C"}, Category: "string", RepetitionTypes: []fields.RepetitionType{0}}, + // {Type: "A", FieldNames: []string{"B"}, FieldTypes: []string{"bool"}, TypeName: "bool", FieldType: "BoolField", ParquetType: "BoolType", ColumnNames: []string{"B"}, Category: "bool", RepetitionTypes: []fields.RepetitionType{0}}, + // {Type: "A", FieldNames: []string{"Name"}, FieldTypes: []string{"string"}, TypeName: "string", FieldType: "StringField", ParquetType: "StringType", ColumnNames: []string{"Name"}, Category: "string", RepetitionTypes: []fields.RepetitionType{0}}, + // }, + // }, } for i, tc := range testCases { t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { out, err := parse.Fields(tc.typ, "./parse_test.go") assert.Nil(t, err, tc.name) - assert.Equal(t, tc.expected, out.Fields, tc.name) + assert.Equal(t, tc.expected, out.Parent, tc.name) if assert.Equal(t, len(tc.errors), len(out.Errors), tc.name) { for i, err := range out.Errors { assert.EqualError(t, tc.errors[i], err.Error(), tc.name) @@ -317,47 +403,47 @@ func pt(t sch.Type) *sch.Type { return &t } -func TestDefIndex(t *testing.T) { - testCases := []struct { - def int - field fields.Field - expected int - }{ - { - def: 1, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Repeated}}, - expected: 1, - }, - { - def: 2, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Repeated}}, - expected: 2, - }, - { - def: 1, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Repeated}}, - expected: 0, - }, - { - def: 2, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Repeated}}, - expected: 2, - }, - { - def: 2, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional, fields.Required}}, - expected: 1, - }, - { - def: 1, - field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional, fields.Required}}, - expected: 0, - }, - } +// func TestDefIndex(t *testing.T) { +// testCases := []struct { +// def int +// field fields.Field +// expected int +// }{ +// { +// def: 1, +// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Repeated}}, +// expected: 1, +// }, +// { +// def: 2, +// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Repeated}}, +// expected: 2, +// }, +// { +// def: 1, +// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Repeated}}, +// expected: 0, +// }, +// { +// def: 2, +// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Repeated}}, +// expected: 2, +// }, +// { +// def: 2, +// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional, fields.Required}}, +// expected: 1, +// }, +// { +// def: 1, +// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional, fields.Required}}, +// expected: 0, +// }, +// } - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { - assert.Equal(t, tc.expected, tc.field.DefIndex(tc.def)) - }) - } -} +// for i, tc := range testCases { +// t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { +// assert.Equal(t, tc.expected, tc.field.DefIndex(tc.def)) +// }) +// } +// } diff --git a/internal/parse/parse.go b/internal/parse/parse.go index 6b07045..3300322 100644 --- a/internal/parse/parse.go +++ b/internal/parse/parse.go @@ -31,7 +31,7 @@ type field struct { // by reading a go struct. type Result struct { // Fields are the fields that will be written to and read from a parquet file. - Fields []flds.Field + Parent flds.Field // Errors is a list of errors that occurred while parsing a struct. Errors []error } @@ -50,7 +50,6 @@ func Fields(typ, pth string) (*Result, error) { } f := &finder{n: map[string]ast.Node{}} - ast.Walk(visitorFunc(f.findTypes), file) if f.n == nil { @@ -62,66 +61,132 @@ func Fields(typ, pth string) (*Result, error) { return nil, err } - var out []field var errs []error - var i int - for _, f := range fields[typ] { - i, out, errs = getOut(i, f, fields, errs, out) + parent, ok := fields[typ] + if !ok { + return nil, fmt.Errorf("could not find %s", typ) } + getChildren(&parent, fields, errs) + return &Result{ - Fields: getFields(fullTyp, out, fields), + Parent: parent, Errors: errs, }, nil } -func getOut(i int, f field, fields map[string][]field, errs []error, out []field) (int, []field, []error) { - ff, ok := fields[f.fieldType] - var o flds.RepetitionType = flds.Required - if strings.Contains(f.Field.TypeName, "*") { - o = flds.Optional - } else if f.repeated || strings.Contains(f.Field.TypeName, "[]") { - o = flds.Repeated +func getChildren(parent *flds.Field, fields map[string]flds.Field, errs []error) { + var children []flds.Field + p, ok := fields[parent.FieldType] + fmt.Printf("getChildren %s: %+v\n", parent.FieldType, p) + if !ok { + errs = append(errs, fmt.Errorf("could not find %s", parent.Type)) + return } - if ok { - for _, fld := range ff { - if fld.embedded { - x, more, moreerrs := getOut(0, fld, fields, nil, nil) - i += x - out = append(out, more...) - errs = append(errs, moreerrs...) - } else { - if (!fld.optional && (o == flds.Optional || f.optional)) || (!fld.repeated && (o == flds.Repeated || f.repeated)) { - fld = makeOptional(fld) - } - if !f.embedded { - fld.Field.RepetitionTypes = append(append(f.Field.RepetitionTypes[:0:0], f.Field.RepetitionTypes...), o) //make a copy - fld.Field.FieldNames = append(f.Field.FieldNames, fld.Field.FieldNames...) - fld.Field.FieldTypes = append(f.Field.FieldTypes, fld.Field.FieldTypes...) - fld.Field.ColumnNames = append(f.Field.ColumnNames, fld.Field.ColumnNames...) - } - i, out, errs = getOut(i, fld, fields, errs, out) - } + for _, child := range p.Children { + if child.Primitive() { + children = append(children, child) + continue + } + + f, ok := fields[child.FieldType] + if !ok { + errs = append(errs, fmt.Errorf("could not find %s", child.Type)) + continue } - return i, out, errs - } else if f.err == nil { - _, ok := types[f.fieldType] - if ok { - f.Field.RepetitionTypes = append(f.Field.RepetitionTypes, o) - out = append(out, f) - i++ + + getChildren(&child, fields, errs) + + f.FieldName = child.FieldName + f.TypeName = child.TypeName + f.ColumnName = child.ColumnName + f.Children = child.Children + f.RepetitionType = child.RepetitionType + + fmt.Printf("adding child: %+v\n", child) + + if child.Embedded { + for _, ch := range f.Children { + children = append(children, ch) + } } else { - errs = append(errs, fmt.Errorf("unsupported type: %s", f.fieldName)) + children = append(children, f) } } - return i, out, errs + parent.Children = children +} + +func isPrivate(x *ast.Field) bool { + var s string + if len(x.Names) == 0 { + s = fmt.Sprintf("%s", x.Type) + } else { + s = fmt.Sprintf("%s", x.Names[0]) + } + return strings.Contains(letters, string(s[0])) +} + +func doGetFields(n map[string]ast.Node) (map[string]fields.Field, error) { + fields := map[string]flds.Field{} + for k, n := range n { + x, ok := n.(*ast.TypeSpec) + if !ok { + continue + } + + fmt.Printf("parent? (%s): %+v\n", k, x) + parent := flds.Field{ + Type: x.Name.Name, + TypeName: x.Name.Name, + ColumnName: x.Name.Name, + FieldName: x.Name.Name, + FieldType: x.Name.Name, + } + + ast.Inspect(n, func(n ast.Node) bool { + if n == nil { + return false + } + + switch x := n.(type) { + case *ast.Field: + fmt.Printf("child?: %+v, type: %v\n", n, x.Type) + if len(x.Names) == 1 && !isPrivate(x) { + fmt.Println("a") + f, skip := getField(x.Names[0].Name, x, nil) + if !skip { + parent.Children = append(parent.Children, f) + } + } else if len(x.Names) == 0 && !isPrivate(x) { + fmt.Println("b") + f, skip := getField(fmt.Sprintf("%s", x.Type), x, nil) + f.Embedded = true + if !skip { + parent.Children = append(parent.Children, f) + } + } + case *ast.ArrayType: + fmt.Printf("array child: %+v\n", x) + // s := fields[k] + // f := s[len(s)-1] + // f.repeated = true + // s[len(s)-1] = f + //fields[k] = s + } + return true + }) + + fields[k] = parent + } + + return fields, nil } func makeOptional(f field) field { f.optional = true - fn, cat, pt := lookupTypeAndCategory(strings.Replace(strings.Replace(f.Field.TypeName, "*", "", 1), "[]", "", 1), true, true) + fn, cat, pt, _ := lookupTypeAndCategory(strings.Replace(strings.Replace(f.Field.TypeName, "*", "", 1), "[]", "", 1), true, true) f.Field.FieldType = fn f.Field.ParquetType = pt f.Field.Category = cat @@ -152,42 +217,7 @@ func getFields(fullTyp string, fields []field, m map[string][]field) []flds.Fiel return out } -func isPrivate(x *ast.Field) bool { - var s string - if len(x.Names) == 0 { - s = fmt.Sprintf("%s", x.Type) - } else { - s = fmt.Sprintf("%s", x.Names[0]) - } - return strings.Contains(letters, string(s[0])) -} - -func doGetFields(n map[string]ast.Node) (map[string][]field, error) { - fields := map[string][]field{} - for k, n := range n { - ast.Inspect(n, func(n ast.Node) bool { - switch x := n.(type) { - case *ast.Field: - if len(x.Names) == 1 && !isPrivate(x) { - f := getField(x.Names[0].Name, x) - fields[k] = append(fields[k], f) - } else if len(x.Names) == 0 && !isPrivate(x) { - fields[k] = append(fields[k], field{embedded: true, fieldType: fmt.Sprintf("%s", x.Type), Field: flds.Field{TypeName: fmt.Sprintf("%s", x.Type)}}) - } - case *ast.ArrayType: - s := fields[k] - f := s[len(s)-1] - f.repeated = true - s[len(s)-1] = f - fields[k] = s - } - return true - }) - } - return fields, nil -} - -func getField(name string, x ast.Node, parent *flds.Field) field { +func getField(name string, x ast.Node, parent *flds.Field) (flds.Field, bool) { var typ, tag string var optional, repeated bool ast.Inspect(x, func(n ast.Node) bool { @@ -219,31 +249,25 @@ func getField(name string, x ast.Node, parent *flds.Field) field { tag = name } - fn, cat, pt := lookupTypeAndCategory(typ, optional, repeated) - - f := flds.Field{ - FieldName: name, - FieldType: typ, - ColumnName: tag, - TypeName: getTypeName(typ, optional), - Type: fn, - ParquetType: pt, - Category: cat, - Parent: parent, - } + _, cat, pt, _ := lookupTypeAndCategory(typ, optional, repeated) - if parent != nil { - parent.Children = append(parent.Children, f) + rt := fields.Required + if repeated { + rt = fields.Repeated + } else if optional { + rt = fields.Optional } - return field{ - Field: f, - fieldName: name, - fieldType: typ, - omit: tag == "-", - optional: optional, - repeated: repeated, - } + return flds.Field{ + FieldName: name, + FieldType: typ, + ColumnName: tag, + TypeName: getTypeName(typ, optional), + //Type: fn, + ParquetType: pt, + Category: cat, + RepetitionType: rt, + }, tag == "-" } func parseTag(t string) string { @@ -263,16 +287,16 @@ func getTypeName(s string, optional bool) string { return fmt.Sprintf("%s%s", star, s) } -func lookupTypeAndCategory(name string, optional, repeated bool) (string, string, string) { +func lookupTypeAndCategory(name string, optional, repeated bool) (string, string, string, bool) { var op string if optional || repeated { op = "Optional" } f, ok := types[name] if !ok { - return "", "", "" + return "", "", "", false } - return fmt.Sprintf(f.name, op, "Field"), fmt.Sprintf(f.category, op), fmt.Sprintf(f.name, "", "Type") + return fmt.Sprintf(f.name, op, "Field"), fmt.Sprintf(f.category, op), fmt.Sprintf(f.name, "", "Type"), true } type fieldType struct { @@ -314,6 +338,7 @@ func (f *finder) findTypes(n ast.Node) ast.Visitor { return visitorFunc(f.findTypes) } case *ast.TypeSpec: + //fmt.Printf("node: %+v\n", n) f.n[n.Name.Name] = n return visitorFunc(f.findTypes) } diff --git a/internal/parse/parse_test.go b/internal/parse/parse_test.go index d592529..7ec32b6 100644 --- a/internal/parse/parse_test.go +++ b/internal/parse/parse_test.go @@ -1,58 +1,56 @@ package parse_test -import "time" - type Being struct { ID int32 Age *int32 } -type Person struct { - Being - Happiness int64 - Sadness *int64 - Code string - Funkiness float32 - Lameness *float32 - Keen *bool - Birthday uint32 - Anniversary *uint64 -} - -type NewOrderPerson struct { - Happiness int64 - Sadness *int64 - Code string - Funkiness float32 - Lameness *float32 - Keen *bool - Birthday uint32 - Being - Anniversary *uint64 -} - -type IgnoreMe struct { - ID int32 `parquet:"id"` - Secret string `parquet:"-"` -} - -type Tagged struct { - ID int32 `parquet:"id"` - Name string `parquet:"name"` -} +// type Person struct { +// Being +// Happiness int64 +// Sadness *int64 +// Code string +// Funkiness float32 +// Lameness *float32 +// Keen *bool +// Birthday uint32 +// Anniversary *uint64 +// } + +// type NewOrderPerson struct { +// Happiness int64 +// Sadness *int64 +// Code string +// Funkiness float32 +// Lameness *float32 +// Keen *bool +// Birthday uint32 +// Being +// Anniversary *uint64 +// } + +// type IgnoreMe struct { +// ID int32 `parquet:"id"` +// Secret string `parquet:"-"` +// } + +// type Tagged struct { +// ID int32 `parquet:"id"` +// Name string `parquet:"name"` +// } type Private struct { Being name string } -type Nested struct { - Being Being +type Nested2 struct { + Info Being Anniversary *uint64 } -type Nested2 struct { - Info Being +type Nested struct { + Being Being Anniversary *uint64 } @@ -65,35 +63,35 @@ type OptionalNested struct { Anniversary *uint64 } -type Thing struct { - Name string -} +// type Thing struct { +// Name string +// } -type OptionalNested2 struct { - Being *Thing - Anniversary *uint64 -} +// type OptionalNested2 struct { +// Being *Thing +// Anniversary *uint64 +// } type OptionalDoubleNested struct { OptionalNested OptionalNested } -type Unsupported struct { - Being - // This field will be ignored because it's not one of the - // supported types. - Time time.Time -} +// type Unsupported struct { +// Being +// // This field will be ignored because it's not one of the +// // supported types. +// Time time.Time +// } -type SupportedAndUnsupported struct { - Happiness int64 - x int - T1 time.Time - Being - y int - T2 time.Time - Anniversary *uint64 -} +// type SupportedAndUnsupported struct { +// Happiness int64 +// x int +// T1 time.Time +// Being +// y int +// T2 time.Time +// Anniversary *uint64 +// } type Slice struct { IDs []int32 `parquet:"ids"` @@ -104,75 +102,75 @@ type Slice2 struct { IDs []int32 `parquet:"ids"` } -type Slice3 struct { - ID int32 `parquet:"id"` - IDs []int32 `parquet:"ids"` - Age *int32 -} - -type Hobby struct { - Name string -} - -type Slice4 struct { - ID int32 `parquet:"id"` - Hobbies []Hobby -} - -type Hobby2 struct { - Names []string `parquet:"names"` -} - -type Slice5 struct { - ID int32 `parquet:"id"` - Hobby Hobby2 `parquet:"hobby"` -} - -type Slice6 struct { - ID int32 `parquet:"id"` - Hobbies []Hobby2 `parquet:"hobbies"` -} - -type Slice7 struct { - Thing *Slice6 `parquet:"thing"` -} - -type Link struct { - Backward []int64 - Forward []int64 -} - -type Language struct { - Code string - Country *string -} - -type Name struct { - Languages []Language - URL *string -} - -type Document struct { - DocID int64 - Links []Link - Names []Name -} - -type D struct { - D int32 -} - -type C struct { - D - C string -} - -type B struct { - C - B bool -} - -type A struct { - B - Name string -} +// type Slice3 struct { +// ID int32 `parquet:"id"` +// IDs []int32 `parquet:"ids"` +// Age *int32 +// } + +// type Hobby struct { +// Name string +// } + +// type Slice4 struct { +// ID int32 `parquet:"id"` +// Hobbies []Hobby +// } + +// type Hobby2 struct { +// Names []string `parquet:"names"` +// } + +// type Slice5 struct { +// ID int32 `parquet:"id"` +// Hobby Hobby2 `parquet:"hobby"` +// } + +// type Slice6 struct { +// ID int32 `parquet:"id"` +// Hobbies []Hobby2 `parquet:"hobbies"` +// } + +// type Slice7 struct { +// Thing *Slice6 `parquet:"thing"` +// } + +// type Link struct { +// Backward []int64 +// Forward []int64 +// } + +// type Language struct { +// Code string +// Country *string +// } + +// type Name struct { +// Languages []Language +// URL *string +// } + +// type Document struct { +// DocID int64 +// Links []Link +// Names []Name +// } + +// type D struct { +// D int32 +// } + +// type C struct { +// D +// C string +// } + +// type B struct { +// C +// B bool +// } + +// type A struct { +// B +// Name string +// } From 03d864ea110ab1077c88928d812fa8e5d91d9607 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Tue, 8 Jun 2021 07:58:39 -0600 Subject: [PATCH 05/25] wip (added field.Fields()) --- internal/fields/fields.go | 36 +- internal/fields/fields_test.go | 453 +++++++++++++++---------- internal/fields/repetition.go | 11 + internal/gen/gen.go | 4 +- internal/parse/fields_test.go | 581 +++++++++++++++++---------------- internal/parse/parse.go | 63 +--- internal/parse/parse_test.go | 257 ++++++++------- 7 files changed, 761 insertions(+), 644 deletions(-) diff --git a/internal/fields/fields.go b/internal/fields/fields.go index b882947..6da8cf4 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -20,6 +20,7 @@ type Field struct { Parent *Field Embedded bool Children []Field + NthChild int } type input struct { @@ -28,6 +29,26 @@ type input struct { Append bool } +func (f Field) Fields() []Field { + return f.fields(0) +} + +func (f Field) fields(i int) []Field { + var out []Field + for j, fld := range f.Children { + fld.NthChild = j + if i > 0 { + fld.Parent = &f + } + if fld.Primitive() { + out = append(out, fld) + } else { + out = append(out, fld.fields(i+1)...) + } + } + return out +} + func (f Field) chain() []Field { out := []Field{f} for fld := f.Parent; fld != nil; fld = fld.Parent { @@ -186,7 +207,7 @@ func (f Field) Required() bool { // that writes to a struct's field // // example: x.Friend.Hobby = &Item{} -func (f Field) Init(def, rep, nthChild int) string { +func (f Field) Init(def, rep int) string { maxDef := f.MaxDef() maxRep := f.MaxRep() var defs, reps int @@ -212,14 +233,14 @@ func (f Field) Init(def, rep, nthChild int) string { case Optional: left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.FieldName)) case Repeated: - if (rep > 0 && reps < rep) || (nthChild > 0 && !fld.Primitive()) { + if (rep > 0 && reps < rep) || (f.NthChild > 0 && !fld.Primitive()) { left = fmt.Sprintf(left, fmt.Sprintf(".%s[ind[%d]]%%s", fld.FieldName, reps-1)) } else { left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.FieldName)) } } - if (defs >= def || ((rep == 0 && fld.RepetitionType != Required) || (rep > 0 && reps == rep))) && nthChild == 0 { + if (defs >= def || ((rep == 0 && fld.RepetitionType != Required) || (rep > 0 && reps == rep))) && f.NthChild == 0 { break } } @@ -242,7 +263,7 @@ func (f Field) Init(def, rep, nthChild int) string { right = fmt.Sprintf(right, fmt.Sprintf("{%s: vals[nVals]}%%s", fld.FieldName)) } else if fld.Parent.RepetitionType == Repeated { right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[nVals]%%s", fld.FieldName)) - } else if nthChild > 0 { + } else if f.NthChild > 0 { right = fmt.Sprintf(right, "vals[0]%s") } else { right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[0]%%s", fld.FieldName)) @@ -256,13 +277,13 @@ func (f Field) Init(def, rep, nthChild int) string { } case Optional: if fld.Primitive() { - if nthChild == 0 && fld.Parent.Optional() && !fld.Parent.Repeated() { + if f.NthChild == 0 && fld.Parent.Optional() && !fld.Parent.Repeated() { right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[0])%%s", fld.FieldName, fld.FieldType)) } else if fld.Parent.RepetitionType == Repeated { right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.FieldType)) - } else if fld.Parent.Repeated() && nthChild == 0 { + } else if fld.Parent.Repeated() && f.NthChild == 0 { right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[nVals])%%s", fld.FieldName, fld.FieldType)) - } else if fld.Parent.Repeated() && nthChild > 0 { + } else if fld.Parent.Repeated() && f.NthChild > 0 { right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.FieldType)) } else { right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[0])%%s", fld.FieldType)) @@ -308,7 +329,6 @@ func (f Field) Init(def, rep, nthChild int) string { } right = fmt.Sprintf(right, "") - fmt.Printf("x%s = %s\n", left, right) return fmt.Sprintf("x%s = %s", left, right) } diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 762fbf1..7179858 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -9,353 +9,468 @@ import ( "github.com/stretchr/testify/assert" ) -func TestFields(t *testing.T) { +func TestNilFields(t *testing.T) { + type testInput struct { + f fields.Field + expected []string + } + + testCases := []testInput{ + { + f: fields.Field{FieldName: "First", RepetitionType: fields.Optional, Parent: &fields.Field{ + FieldName: "Name", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Friends", RepetitionType: fields.Repeated}}}, + expected: []string{ + "Friends", + "Friends.Name.First", + }, + }, + { + f: fields.Field{FieldName: "First", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Name", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Friend", RepetitionType: fields.Required}}}, + expected: []string{ + "Friend.Name.First", + }, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { + if !assert.Equal(t, len(tc.expected), tc.f.MaxDef()) { + return + } + + for i := 0; i < tc.f.MaxDef(); i++ { + s, _, _, _ := tc.f.NilField(i) + assert.Equal(t, tc.expected[i], s) + } + }) + } +} + +func TestInit(t *testing.T) { testCases := []struct { - field fields.Field + fields []fields.Field def int rep int - nthChild int expected string }{ { - field: fields.Field{FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated}, + }}, + }, rep: 0, def: 1, expected: "x.Links = &Link{}", }, { - field: fields.Field{FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated}, + }}, + }, rep: 0, def: 2, expected: "x.Links = &Link{Backward: []int64{vals[nVals]}}", }, { - field: fields.Field{FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated}, + }}, + }, def: 2, rep: 1, expected: "x.Links.Backward = append(x.Links.Backward, vals[nVals])", }, { - field: fields.Field{FieldName: "Forward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Forward", FieldType: "int64", RepetitionType: fields.Repeated}, + }}, + }, def: 2, rep: 1, expected: "x.Links.Forward = append(x.Links.Forward, vals[nVals])", }, { - field: fields.Field{FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, + fields: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required}, + }}, + }}, + }, def: 2, rep: 0, expected: "x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}}", }, { - field: fields.Field{FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, + fields: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required}, + }}, + }}, + }, def: 2, rep: 1, expected: "x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}})", }, { - field: fields.Field{FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, + fields: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required}, + }}, + }}, + }, def: 2, rep: 2, expected: "x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]})", }, { - field: fields.Field{FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, + fields: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required}, + }}, + }}, + }, def: 1, rep: 1, expected: "x.Names = append(x.Names, Name{})", }, { - field: fields.Field{FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }, def: 1, rep: 0, expected: "x.Link = &Link{}", }, { - field: fields.Field{FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }, def: 2, rep: 0, expected: "x.Link = &Link{Backward: []string{vals[nVals]}}", }, { - field: fields.Field{FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }, def: 2, rep: 1, expected: "x.Link.Backward = append(x.Link.Backward, vals[nVals])", }, { - field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Language", FieldType: "Language", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, + fields: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Language", FieldType: "Language", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, def: 2, rep: 0, expected: "x.Names = []Name{{Language: Language{Codes: []string{vals[nVals]}}}}", }, { - field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Required}}}, + fields: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, def: 2, rep: 0, expected: "x.Name.Languages = []Language{{Codes: []string{vals[nVals]}}}", }, { - field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Language", FieldType: "Language", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, + fields: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Language", FieldType: "Language", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, def: 2, rep: 2, expected: "x.Names[ind[0]].Language.Codes = append(x.Names[ind[0]].Language.Codes, vals[nVals])", }, { - field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Required}}}, + fields: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, def: 2, rep: 2, expected: "x.Name.Languages[ind[0]].Codes = append(x.Name.Languages[ind[0]].Codes, vals[nVals])", }, { - field: fields.Field{FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Thing", FieldType: "Thing", RepetitionType: fields.Required}}}}, + fields: []fields.Field{ + {FieldName: "Thing", FieldType: "Thing", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }}, + }, def: 3, rep: 3, expected: "x.Thing.Names[ind[0]].Languages[ind[1]].Codes = append(x.Thing.Names[ind[0]].Languages[ind[1]].Codes, vals[nVals])", }, { - field: fields.Field{FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional}, + }}, + }, def: 1, expected: "x.Hobby = &Hobby{}", }, { - field: fields.Field{FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional}, + }}, + }, def: 2, expected: "x.Hobby = &Hobby{Difficulty: pint32(vals[0])}", }, { - field: fields.Field{FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + {FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional}, + }}, + }, def: 2, - nthChild: 1, expected: "x.Hobby.Difficulty = pint32(vals[0])", }, { - field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Required}, + }}, + }, def: 1, expected: "x.Hobby = &Hobby{Name: vals[0]}", }, { - field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Required}}, + fields: []fields.Field{ + {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }, def: 1, expected: "x.Hobby.Name = pstring(vals[0])", }, { - field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }, def: 1, expected: "x.Hobby = &Item{}", }, { - field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }, def: 2, expected: "x.Hobby = &Item{Name: pstring(vals[0])}", }, { - field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}, + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, def: 3, expected: "x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}}", }, { - field: fields.Field{FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required}}}, + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, def: 1, expected: "x.Friend.Hobby = &Item{}", }, { - field: fields.Field{FieldName: "Country", FieldType: "string", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, + fields: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Country", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, def: 1, rep: 1, expected: "x.Names = append(x.Names, Name{})", }, { - field: fields.Field{FieldName: "Country", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated}}}, + fields: []fields.Field{ + {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Zip", FieldType: "string", RepetitionType: fields.Optional}, + {FieldName: "Country", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, def: 3, rep: 0, - nthChild: 1, expected: "x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals])", }, { - field: fields.Field{FieldName: "First", FieldType: "string", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}}, + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "First", FieldType: "string", RepetitionType: fields.Required}, + }}, + }}, + }}, + }, def: 1, expected: "x.Friend = &Entity{}", }, { - field: fields.Field{FieldName: "First", FieldType: "string", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}}, + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "First", FieldType: "string", RepetitionType: fields.Required}, + }}, + }}, + }}, + }, def: 2, expected: "x.Friend = &Entity{Hobby: &Item{}}", }, { - field: fields.Field{FieldName: "First", FieldType: "string", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}}, + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "First", FieldType: "string", RepetitionType: fields.Required}, + }}, + }}, + }}, + }, def: 3, expected: "x.Friend = &Entity{Hobby: &Item{Name: &Name{First: vals[0]}}}", }, { - field: fields.Field{FieldName: "First", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional}}}}, + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Suffix", FieldType: "string", RepetitionType: fields.Optional}, + {FieldName: "First", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }}, + }}, + }, def: 3, - nthChild: 1, expected: "x.Friend.Hobby.Name.First = pstring(vals[0])", }, { - field: fields.Field{FieldName: "Forward", FieldType: "int64", RepetitionType: fields.Repeated, Parent: &fields.Field{FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional}}, + fields: []fields.Field{ + {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Repeated}, + {FieldName: "Forward", FieldType: "int64", RepetitionType: fields.Repeated}, + }}, + }, rep: 1, def: 2, - nthChild: 1, expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", }, { - field: fields.Field{FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, + fields: []fields.Field{ + {FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, + }, def: 1, rep: 0, expected: "x.LuckyNumbers = []int64{vals[nVals]}", }, { - field: fields.Field{FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, + fields: []fields.Field{ + {FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, + }, def: 1, rep: 1, expected: "x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals])", }, { - field: fields.Field{FieldName: "F", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{ - FieldName: "E", FieldType: "E", RepetitionType: fields.Required, Parent: &fields.Field{ - FieldName: "D", FieldType: "D", RepetitionType: fields.Repeated, Parent: &fields.Field{ - FieldName: "C", FieldType: "C", RepetitionType: fields.Required, Parent: &fields.Field{ - FieldName: "B", FieldType: "B", RepetitionType: fields.Optional, Parent: &fields.Field{ - FieldName: "A", FieldType: "A", RepetitionType: fields.Required}}}}}}, + fields: []fields.Field{ + {FieldName: "A", FieldType: "A", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "B", FieldType: "B", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "C", FieldType: "C", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "D", FieldType: "D", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "E", FieldType: "E", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "F", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }}, + }}, + }}, + }}, + }, def: 3, rep: 0, expected: "x.A.B = &B{C: C{D: []D{{E: E{F: pstring(vals[nVals])}}}}}", }, { - field: fields.Field{FieldName: "F", FieldType: "string", RepetitionType: fields.Optional, Parent: &fields.Field{ - FieldName: "E", FieldType: "E", RepetitionType: fields.Required, Parent: &fields.Field{ - FieldName: "D", FieldType: "D", RepetitionType: fields.Repeated, Parent: &fields.Field{ - FieldName: "C", FieldType: "C", RepetitionType: fields.Required, Parent: &fields.Field{ - FieldName: "B", FieldType: "B", RepetitionType: fields.Optional, Parent: &fields.Field{ - FieldName: "A", FieldType: "A", RepetitionType: fields.Required}}}}}}, + fields: []fields.Field{ + {FieldName: "A", FieldType: "A", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "B", FieldType: "B", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "C", FieldType: "C", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "D", FieldType: "D", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "E", FieldType: "E", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "x", FieldType: "string", RepetitionType: fields.Optional}, + {FieldName: "F", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }}, + }}, + }}, + }}, + }, def: 3, - nthChild: 1, expected: "x.A.B.C.D[ind[0]].E.F = pstring(vals[nVals])", }, } for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d %v def %d rep %d", i, tc.field.FieldNames(), tc.def, tc.rep), func(t *testing.T) { - field := tc.field - s := field.Init(tc.def, tc.rep, tc.nthChild) + t.Run(fmt.Sprintf("%02d def %d rep %d", i, tc.def, tc.rep), func(t *testing.T) { + fields := fields.Field{Children: tc.fields}.Fields() + field := fields[len(fields)-1] + s := field.Init(tc.def, tc.rep) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) }) } } - -// func TestSeen(t *testing.T) { -// testCases := []struct { -// flds []fields.Field -// expected []fields.RepetitionType -// }{ -// { -// flds: []fields.Field{ -// {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, -// {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, -// }, -// expected: []fields.RepetitionType{fields.Optional}, -// }, -// { -// flds: []fields.Field{ -// {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required}}, -// {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated}}, -// }, -// expected: []fields.RepetitionType{fields.Required}, -// }, -// { -// flds: []fields.Field{ -// {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, -// {FieldNames: []string{"Link", "Backward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, -// {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, -// }, -// expected: []fields.RepetitionType{fields.Repeated}, -// }, -// { -// flds: []fields.Field{ -// {FieldNames: []string{"Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, -// {FieldNames: []string{"Link", "Forward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, -// }, -// expected: []fields.RepetitionType{}, -// }, -// { -// flds: []fields.Field{ -// {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, -// {FieldNames: []string{"Link", "Name", "First"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Optional}}, -// {FieldNames: []string{"Link", "Name", "Last"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, -// }, -// expected: []fields.RepetitionType{fields.Repeated, fields.Repeated}, -// }, -// } - -// for i, tc := range testCases { -// t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { -// i := len(tc.flds) - 1 -// assert.Equal(t, tc.expected, fields.Seen(i, tc.flds)) -// }) -// } -// } - -// func TestChild(t *testing.T) { -// f := fields.Field{ -// FieldNames: []string{"Friends", "Name", "First"}, -// FieldTypes: []string{"Being", "Name", "string"}, -// RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Optional}, -// } -// ch := fields.Field{ -// FieldNames: []string{"Name", "First"}, -// FieldTypes: []string{"Name", "string"}, -// RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}, -// } -// assert.Equal(t, ch, f.Child(1)) -// } - -// func TestRepCases(t *testing.T) { -// testCases := []struct { -// f fields.Field -// seen []fields.RepetitionType -// expected []fields.RepCase -// }{ -// { -// f: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, -// expected: []fields.RepCase{{Case: "case 0:", Rep: 0}, {Case: "case 1:", Rep: 1}, {Case: "case 2:", Rep: 2}}, -// }, -// { -// f: fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, -// seen: []fields.RepetitionType{fields.Repeated, fields.Repeated}, -// expected: []fields.RepCase{{Case: "default:", Rep: 0}}, -// }, -// } - -// for i, tc := range testCases { -// t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { -// assert.Equal(t, tc.expected, tc.f.RepCases(tc.seen)) -// }) -// } -// } - -// func TestNilField(t *testing.T) { -// f := fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}} -// name, rt, i, reps := f.NilField(1) -// assert.Equal(t, "Names.Languages", name) -// assert.Equal(t, fields.Repeated, rt) -// assert.Equal(t, 1, i) -// assert.Equal(t, 2, reps) -// } - -// func TestField(t *testing.T) { -// f := fields.Field{FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}} -// assert.True(t, f.Repeated()) -// assert.True(t, f.Optional()) -// assert.False(t, f.Required()) -// } - -// func TestRepetitionTypes(t *testing.T) { -// rts := fields.RepetitionTypes([]fields.RepetitionType{fields.Repeated, fields.Optional}) -// assert.Equal(t, rts.Def(1), fields.Repeated) -// assert.Equal(t, rts.Def(2), fields.Optional) -// } diff --git a/internal/fields/repetition.go b/internal/fields/repetition.go index 0ef54d7..2f280dc 100644 --- a/internal/fields/repetition.go +++ b/internal/fields/repetition.go @@ -11,6 +11,17 @@ const ( Repeated RepetitionType = 2 ) +func (r RepetitionType) Prefix() string { + switch r { + case Optional: + return "*" + case Repeated: + return "[]" + default: + return "" + } +} + // RepetitionTypes provides several functions used by parquetgen's // go templates to generate code. type RepetitionTypes []RepetitionType diff --git a/internal/gen/gen.go b/internal/gen/gen.go index 354bf41..f44d772 100644 --- a/internal/gen/gen.go +++ b/internal/gen/gen.go @@ -46,7 +46,7 @@ func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) { Package: pkg, Type: typ, Import: getImport(imp), - Fields: result.Fields, + Parent: result.Parent, } tmpl := template.New("output").Funcs(funcs) @@ -156,7 +156,7 @@ type input struct { Package string Type string Import string - Fields []fields.Field + Parent fields.Field } func getFieldType(se *sch.SchemaElement) string { diff --git a/internal/parse/fields_test.go b/internal/parse/fields_test.go index 9ab8360..2823a03 100644 --- a/internal/parse/fields_test.go +++ b/internal/parse/fields_test.go @@ -16,42 +16,6 @@ func init() { log.SetOutput(ioutil.Discard) } -func TestField(t *testing.T) { - type testInput struct { - f fields.Field - expected []string - } - - testCases := []testInput{ - { - f: fields.Field{FieldName: "First", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Name", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Friends", RepetitionType: fields.Repeated}}}, - expected: []string{ - "Friends", - "Friends.Name.First", - }, - }, - { - f: fields.Field{FieldName: "First", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Name", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Friends", RepetitionType: fields.Required}}}, - expected: []string{ - "Friend.Name.First", - }, - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { - if !assert.Equal(t, len(tc.expected), tc.f.MaxDef()) { - return - } - - for i := 0; i < tc.f.MaxDef(); i++ { - s, _, _, _ := tc.f.NilField(i) - assert.Equal(t, tc.expected[i], s) - } - }) - } -} - func TestFields(t *testing.T) { type testInput struct { @@ -66,11 +30,6 @@ func TestFields(t *testing.T) { name: "flat", typ: "Being", expected: fields.Field{ - Type: "Being", - FieldName: "Being", - FieldType: "Being", - ColumnName: "Being", - TypeName: "Being", Children: []fields.Field{ {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, {FieldType: "int32", ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, @@ -81,11 +40,6 @@ func TestFields(t *testing.T) { name: "private fields", typ: "Private", expected: fields.Field{ - Type: "Private", - FieldName: "Private", - FieldType: "Private", - ColumnName: "Private", - TypeName: "Private", Children: []fields.Field{ {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, @@ -96,11 +50,6 @@ func TestFields(t *testing.T) { name: "nested struct", typ: "Nested", expected: fields.Field{ - Type: "Nested", - FieldName: "Nested", - FieldType: "Nested", - ColumnName: "Nested", - TypeName: "Nested", Children: []fields.Field{ {Type: "Being", TypeName: "Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, @@ -115,11 +64,6 @@ func TestFields(t *testing.T) { name: "nested struct with name that doesn't match the struct type", typ: "Nested2", expected: fields.Field{ - Type: "Nested2", - FieldName: "Nested2", - FieldType: "Nested2", - ColumnName: "Nested2", - TypeName: "Nested2", Children: []fields.Field{ {Type: "Being", TypeName: "Being", FieldName: "Info", FieldType: "Being", ColumnName: "Info", RepetitionType: fields.Required, Children: []fields.Field{ {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, @@ -134,18 +78,9 @@ func TestFields(t *testing.T) { name: "2 deep nested struct", typ: "DoubleNested", expected: fields.Field{ - Type: "DoubleNested", - FieldName: "DoubleNested", - FieldType: "DoubleNested", - ColumnName: "DoubleNested", - TypeName: "DoubleNested", Children: []fields.Field{ { - Type: "Nested", - FieldName: "Nested", - FieldType: "Nested", - ColumnName: "Nested", - TypeName: "Nested", + Type: "Nested", FieldName: "Nested", FieldType: "Nested", ColumnName: "Nested", TypeName: "Nested", Children: []fields.Field{ {Type: "Being", TypeName: "Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, @@ -162,18 +97,9 @@ func TestFields(t *testing.T) { name: "2 deep optional nested struct", typ: "OptionalDoubleNested", expected: fields.Field{ - Type: "OptionalDoubleNested", - FieldName: "OptionalDoubleNested", - FieldType: "OptionalDoubleNested", - ColumnName: "OptionalDoubleNested", - TypeName: "OptionalDoubleNested", Children: []fields.Field{ { - Type: "OptionalNested", - FieldName: "OptionalNested", - FieldType: "OptionalNested", - ColumnName: "OptionalNested", - TypeName: "OptionalNested", + Type: "OptionalNested", FieldName: "OptionalNested", FieldType: "OptionalNested", ColumnName: "OptionalNested", TypeName: "OptionalNested", Children: []fields.Field{ {Type: "Being", TypeName: "*Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, @@ -186,104 +112,119 @@ func TestFields(t *testing.T) { }, errors: []error{}, }, - // { - // name: "optional nested struct", - // typ: "OptionalNested", - // expected: []fields.Field{ - // {Type: "OptionalNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Being", "ID"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "ID"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - // {Type: "OptionalNested", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Being", "Age"}, FieldTypes: []string{"Being", "int32"}, ColumnNames: []string{"Being", "Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - // {Type: "OptionalNested", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // }, - // errors: []error{}, - // }, - // { - // name: "optional nested struct v2", - // typ: "OptionalNested2", - // expected: []fields.Field{ - // {Type: "OptionalNested2", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Being", "Name"}, FieldTypes: []string{"Thing", "string"}, ColumnNames: []string{"Being", "Name"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - // {Type: "OptionalNested2", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // }, - // errors: []error{}, - // }, - // { - // name: "unsupported fields", - // typ: "Unsupported", - // errors: []error{fmt.Errorf("unsupported type: Time")}, - // expected: []fields.Field{ - // {Type: "Unsupported", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Unsupported", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // }, - // }, - // { - // name: "unsupported fields mixed in with supported and embedded", - // typ: "SupportedAndUnsupported", - // expected: []fields.Field{ - // {Type: "SupportedAndUnsupported", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "SupportedAndUnsupported", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "SupportedAndUnsupported", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "SupportedAndUnsupported", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // }, - // errors: []error{ - // fmt.Errorf("unsupported type: T1"), - // fmt.Errorf("unsupported type: T2"), - // }, - // }, - // { - // name: "embedded", - // typ: "Person", - // expected: []fields.Field{ - // {Type: "Person", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Person", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "Person", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Person", FieldType: "Int64OptionalField", ParquetType: "Int64Type", TypeName: "*int64", FieldNames: []string{"Sadness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Sadness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "Person", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Code"}, FieldTypes: []string{"string"}, ColumnNames: []string{"Code"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Person", FieldType: "Float32Field", ParquetType: "Float32Type", TypeName: "float32", FieldNames: []string{"Funkiness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Funkiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Person", FieldType: "Float32OptionalField", ParquetType: "Float32Type", TypeName: "*float32", FieldNames: []string{"Lameness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Lameness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "Person", FieldType: "BoolOptionalField", ParquetType: "BoolType", TypeName: "*bool", FieldNames: []string{"Keen"}, FieldTypes: []string{"bool"}, ColumnNames: []string{"Keen"}, Category: "boolOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "Person", FieldType: "Uint32Field", ParquetType: "Uint32Type", TypeName: "uint32", FieldNames: []string{"Birthday"}, FieldTypes: []string{"uint32"}, ColumnNames: []string{"Birthday"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Person", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // }, - // }, - // { - // name: "embedded preserve order", - // typ: "NewOrderPerson", - // expected: []fields.Field{ - // {Type: "NewOrderPerson", FieldType: "Int64Field", ParquetType: "Int64Type", TypeName: "int64", FieldNames: []string{"Happiness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Happiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "NewOrderPerson", FieldType: "Int64OptionalField", ParquetType: "Int64Type", TypeName: "*int64", FieldNames: []string{"Sadness"}, FieldTypes: []string{"int64"}, ColumnNames: []string{"Sadness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "NewOrderPerson", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Code"}, FieldTypes: []string{"string"}, ColumnNames: []string{"Code"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "NewOrderPerson", FieldType: "Float32Field", ParquetType: "Float32Type", TypeName: "float32", FieldNames: []string{"Funkiness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Funkiness"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "NewOrderPerson", FieldType: "Float32OptionalField", ParquetType: "Float32Type", TypeName: "*float32", FieldNames: []string{"Lameness"}, FieldTypes: []string{"float32"}, ColumnNames: []string{"Lameness"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "NewOrderPerson", FieldType: "BoolOptionalField", ParquetType: "BoolType", TypeName: "*bool", FieldNames: []string{"Keen"}, FieldTypes: []string{"bool"}, ColumnNames: []string{"Keen"}, Category: "boolOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "NewOrderPerson", FieldType: "Uint32Field", ParquetType: "Uint32Type", TypeName: "uint32", FieldNames: []string{"Birthday"}, FieldTypes: []string{"uint32"}, ColumnNames: []string{"Birthday"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "NewOrderPerson", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "NewOrderPerson", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "NewOrderPerson", FieldType: "Uint64OptionalField", ParquetType: "Uint64Type", TypeName: "*uint64", FieldNames: []string{"Anniversary"}, FieldTypes: []string{"uint64"}, ColumnNames: []string{"Anniversary"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // }, - // }, - // { - // name: "tags", - // typ: "Tagged", - // expected: []fields.Field{ - // {Type: "Tagged", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Tagged", FieldType: "StringField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Name"}, FieldTypes: []string{"string"}, ColumnNames: []string{"name"}, Category: "string", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // }, - // }, - // { - // name: "omit tag", - // typ: "IgnoreMe", - // expected: []fields.Field{ - // {Type: "IgnoreMe", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // }, - // }, + { + name: "optional nested struct", + typ: "OptionalNested", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "Being", TypeName: "*Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + errors: []error{}, + }, + { + name: "optional nested struct v2", + typ: "OptionalNested2", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "Thing", TypeName: "*Thing", FieldName: "Being", FieldType: "Thing", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {ParquetType: "StringType", TypeName: "string", FieldName: "Name", FieldType: "string", ColumnName: "Name", Category: "string", RepetitionType: fields.Required}, + }}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + errors: []error{}, + }, + { + name: "unsupported fields", + typ: "Unsupported", + errors: []error{fmt.Errorf("unsupported type &{time Time}")}, + expected: fields.Field{ + Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "unsupported fields mixed in with supported and embedded", + typ: "SupportedAndUnsupported", + expected: fields.Field{ + Children: []fields.Field{ + {ParquetType: "Int64Type", TypeName: "int64", FieldName: "Happiness", FieldType: "int64", ColumnName: "Happiness", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + errors: []error{ + fmt.Errorf("unsupported type &{time Time}"), + fmt.Errorf("unsupported type &{time Time}"), + }, + }, + { + name: "embedded", + typ: "Person", + expected: fields.Field{ + Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {ParquetType: "Int64Type", TypeName: "int64", FieldName: "Happiness", FieldType: "int64", ColumnName: "Happiness", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int64Type", TypeName: "*int64", FieldName: "Sadness", FieldType: "int64", ColumnName: "Sadness", Category: "numericOptional", RepetitionType: fields.Optional}, + {ParquetType: "StringType", TypeName: "string", FieldName: "Code", FieldType: "string", ColumnName: "Code", Category: "string", RepetitionType: fields.Required}, + {ParquetType: "Float32Type", TypeName: "float32", FieldType: "float32", FieldName: "Funkiness", ColumnName: "Funkiness", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Float32Type", TypeName: "*float32", FieldType: "float32", FieldName: "Lameness", ColumnName: "Lameness", Category: "numericOptional", RepetitionType: fields.Optional}, + {ParquetType: "BoolType", TypeName: "*bool", FieldType: "bool", FieldName: "Keen", ColumnName: "Keen", Category: "boolOptional", RepetitionType: fields.Optional}, + {ParquetType: "Uint32Type", TypeName: "uint32", FieldType: "uint32", FieldName: "Birthday", ColumnName: "Birthday", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "embedded preserve order", + typ: "NewOrderPerson", + expected: fields.Field{ + Children: []fields.Field{ + {ParquetType: "Int64Type", TypeName: "int64", FieldName: "Happiness", FieldType: "int64", ColumnName: "Happiness", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int64Type", TypeName: "*int64", FieldName: "Sadness", FieldType: "int64", ColumnName: "Sadness", Category: "numericOptional", RepetitionType: fields.Optional}, + {ParquetType: "StringType", TypeName: "string", FieldName: "Code", FieldType: "string", ColumnName: "Code", Category: "string", RepetitionType: fields.Required}, + {ParquetType: "Float32Type", TypeName: "float32", FieldType: "float32", FieldName: "Funkiness", ColumnName: "Funkiness", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Float32Type", TypeName: "*float32", FieldType: "float32", FieldName: "Lameness", ColumnName: "Lameness", Category: "numericOptional", RepetitionType: fields.Optional}, + {ParquetType: "BoolType", TypeName: "*bool", FieldType: "bool", FieldName: "Keen", ColumnName: "Keen", Category: "boolOptional", RepetitionType: fields.Optional}, + {ParquetType: "Uint32Type", TypeName: "uint32", FieldType: "uint32", FieldName: "Birthday", ColumnName: "Birthday", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "tags", + typ: "Tagged", + expected: fields.Field{ + Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + {ParquetType: "StringType", TypeName: "string", FieldName: "Name", FieldType: "string", ColumnName: "name", Category: "string", RepetitionType: fields.Required}, + }, + }, + }, + { + name: "omit tag", + typ: "IgnoreMe", + expected: fields.Field{ + Children: []fields.Field{ + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + }, + }, + }, { name: "repeated", typ: "Slice", expected: fields.Field{ - Type: "Slice", - FieldName: "Slice", - FieldType: "Slice", - ColumnName: "Slice", - TypeName: "Slice", Children: []fields.Field{ {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "IDs", ColumnName: "ids", Category: "numericOptional", RepetitionType: fields.Repeated}, }, @@ -293,84 +234,106 @@ func TestFields(t *testing.T) { name: "repeated v2", typ: "Slice2", expected: fields.Field{ - Type: "Slice2", - FieldName: "Slice2", - FieldType: "Slice2", - ColumnName: "Slice2", - TypeName: "Slice2", Children: []fields.Field{ {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "IDs", ColumnName: "ids", Category: "numericOptional", RepetitionType: fields.Repeated}, }, }, - // expected: []fields.Field{ - // {Type: "Slice2", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Slice2", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - // }, }, - // { - // name: "repeated v2", - // typ: "Slice3", - // expected: []fields.Field{ - // {Type: "Slice3", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Slice3", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "[]int32", FieldNames: []string{"IDs"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"ids"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - // {Type: "Slice3", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "*int32", FieldNames: []string{"Age"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"Age"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // }, - // }, - // { - // name: "nested and repeated", - // typ: "Slice4", - // expected: []fields.Field{ - // {Type: "Slice4", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Slice4", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "string", FieldNames: []string{"Hobbies", "Name"}, FieldTypes: []string{"Hobby", "string"}, ColumnNames: []string{"Hobbies", "Name"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required}}, - // }, - // }, - // { - // name: "nested and repeated v2", - // typ: "Slice5", - // expected: []fields.Field{ - // {Type: "Slice5", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Slice5", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Hobby", "Names"}, FieldTypes: []string{"Hobby2", "string"}, ColumnNames: []string{"hobby", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Repeated}}, - // }, - // }, - // { - // name: "repeated and repeated", - // typ: "Slice6", - // expected: []fields.Field{ - // {Type: "Slice6", FieldType: "Int32Field", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, ColumnNames: []string{"id"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{fields.Required}}, - // {Type: "Slice6", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Hobbies", "Names"}, FieldTypes: []string{"Hobby2", "string"}, ColumnNames: []string{"hobbies", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated}}, - // }, - // }, - // { - // name: "nested repeated and repeated", - // typ: "Slice7", - // expected: []fields.Field{ - // {Type: "Slice7", FieldType: "Int32OptionalField", ParquetType: "Int32Type", TypeName: "int32", FieldNames: []string{"Thing", "ID"}, FieldTypes: []string{"Slice6", "int32"}, ColumnNames: []string{"thing", "id"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - // {Type: "Slice7", FieldType: "StringOptionalField", ParquetType: "StringType", TypeName: "[]string", FieldNames: []string{"Thing", "Hobbies", "Names"}, FieldTypes: []string{"Slice6", "Hobby2", "string"}, ColumnNames: []string{"thing", "hobbies", "names"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Repeated}}, - // }, - // }, - // { - // name: "dremel paper example", - // typ: "Document", - // expected: []fields.Field{ - // {Type: "Document", FieldNames: []string{"DocID"}, FieldTypes: []string{"int64"}, TypeName: "int64", FieldType: "Int64Field", ParquetType: "Int64Type", ColumnNames: []string{"DocID"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{0}}, - // {Type: "Document", FieldNames: []string{"Links", "Backward"}, FieldTypes: []string{"Link", "int64"}, TypeName: "[]int64", FieldType: "Int64OptionalField", ParquetType: "Int64Type", ColumnNames: []string{"Links", "Backward"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 2}}, - // {Type: "Document", FieldNames: []string{"Links", "Forward"}, FieldTypes: []string{"Link", "int64"}, TypeName: "[]int64", FieldType: "Int64OptionalField", ParquetType: "Int64Type", ColumnNames: []string{"Links", "Forward"}, Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 2}}, - // {Type: "Document", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, TypeName: "string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "Languages", "Code"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 2, 0}}, - // {Type: "Document", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, TypeName: "*string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "Languages", "Country"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 2, 1}}, - // {Type: "Document", FieldNames: []string{"Names", "URL"}, FieldTypes: []string{"Name", "string"}, TypeName: "*string", FieldType: "StringOptionalField", ParquetType: "StringType", ColumnNames: []string{"Names", "URL"}, Category: "stringOptional", RepetitionTypes: []fields.RepetitionType{2, 1}}, - // }, - // }, - // { - // name: "embedded embedded embedded", - // typ: "A", - // expected: []fields.Field{ - // {Type: "A", FieldNames: []string{"D"}, FieldTypes: []string{"int32"}, TypeName: "int32", FieldType: "Int32Field", ParquetType: "Int32Type", ColumnNames: []string{"D"}, Category: "numeric", RepetitionTypes: []fields.RepetitionType{0}}, - // {Type: "A", FieldNames: []string{"C"}, FieldTypes: []string{"string"}, TypeName: "string", FieldType: "StringField", ParquetType: "StringType", ColumnNames: []string{"C"}, Category: "string", RepetitionTypes: []fields.RepetitionType{0}}, - // {Type: "A", FieldNames: []string{"B"}, FieldTypes: []string{"bool"}, TypeName: "bool", FieldType: "BoolField", ParquetType: "BoolType", ColumnNames: []string{"B"}, Category: "bool", RepetitionTypes: []fields.RepetitionType{0}}, - // {Type: "A", FieldNames: []string{"Name"}, FieldTypes: []string{"string"}, TypeName: "string", FieldType: "StringField", ParquetType: "StringType", ColumnNames: []string{"Name"}, Category: "string", RepetitionTypes: []fields.RepetitionType{0}}, - // }, - // }, + { + name: "repeated v2", + typ: "Slice3", + expected: fields.Field{ + Children: []fields.Field{ + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "IDs", ColumnName: "ids", Category: "numericOptional", RepetitionType: fields.Repeated}, + {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + }, + }, + }, + { + name: "nested and repeated", + typ: "Slice4", + expected: fields.Field{ + Children: []fields.Field{ + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + {Type: "Hobby", TypeName: "Hobby", FieldName: "Hobbies", FieldType: "Hobby", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {ParquetType: "StringType", TypeName: "string", FieldName: "Name", FieldType: "string", ColumnName: "Name", Category: "string", RepetitionType: fields.Required}, + {ParquetType: "Int32Type", TypeName: "int32", FieldName: "Difficulty", FieldType: "int32", ColumnName: "Difficulty", Category: "numeric", RepetitionType: fields.Required}, + }}, + }, + }, + }, + { + name: "nested and repeated v2", + typ: "Slice5", + expected: fields.Field{ + Children: []fields.Field{ + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + {Type: "Hobby2", TypeName: "Hobby2", FieldName: "Hobby", FieldType: "Hobby2", ColumnName: "hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {ParquetType: "StringType", TypeName: "string", FieldName: "Names", FieldType: "string", ColumnName: "names", Category: "stringOptional", RepetitionType: fields.Repeated}, + }}, + }, + }, + }, + { + name: "repeated and repeated", + typ: "Slice6", + expected: fields.Field{ + Children: []fields.Field{ + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + {Type: "Hobby2", TypeName: "Hobby2", FieldName: "Hobbies", FieldType: "Hobby2", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {ParquetType: "StringType", TypeName: "string", FieldName: "Names", FieldType: "string", ColumnName: "names", Category: "stringOptional", RepetitionType: fields.Repeated}, + }}, + }, + }, + }, + { + name: "nested repeated and repeated", + typ: "Slice7", + expected: fields.Field{ + Children: []fields.Field{ + {Type: "Slice6", TypeName: "*Slice6", FieldName: "Thing", FieldType: "Slice6", ColumnName: "thing", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + {Type: "Hobby2", TypeName: "Hobby2", FieldName: "Hobbies", FieldType: "Hobby2", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {ParquetType: "StringType", TypeName: "string", FieldName: "Names", FieldType: "string", ColumnName: "names", Category: "stringOptional", RepetitionType: fields.Repeated}, + }}, + }}, + }, + }, + }, + { + name: "dremel paper example", + typ: "Document", + expected: fields.Field{ + Children: []fields.Field{ + {FieldType: "int64", ParquetType: "Int64Type", TypeName: "int64", FieldName: "DocID", ColumnName: "DocID", Category: "numeric", RepetitionType: fields.Required}, + {Type: "Link", TypeName: "Link", FieldName: "Links", FieldType: "Link", ColumnName: "Links", RepetitionType: fields.Repeated, Children: []fields.Field{ + {TypeName: "int64", ParquetType: "Int64Type", FieldName: "Backward", FieldType: "int64", ColumnName: "Backward", Category: "numericOptional", RepetitionType: fields.Repeated}, + {TypeName: "int64", ParquetType: "Int64Type", FieldName: "Forward", FieldType: "int64", ColumnName: "Forward", Category: "numericOptional", RepetitionType: fields.Repeated}, + }}, + {Type: "Name", TypeName: "Name", FieldName: "Names", FieldType: "Name", ColumnName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "Language", TypeName: "Language", FieldName: "Languages", FieldType: "Language", ColumnName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {TypeName: "string", ParquetType: "StringType", FieldName: "Code", FieldType: "string", ColumnName: "Code", Category: "string", RepetitionType: fields.Required}, + {TypeName: "*string", ParquetType: "StringType", FieldName: "Country", FieldType: "string", ColumnName: "Country", Category: "stringOptional", RepetitionType: fields.Optional}, + }}, + {TypeName: "*string", ParquetType: "StringType", FieldName: "URL", FieldType: "string", ColumnName: "URL", Category: "stringOptional", RepetitionType: fields.Optional}, + }}, + }, + }, + }, + { + name: "embedded embedded embedded", + typ: "A", + expected: fields.Field{ + Children: []fields.Field{ + {FieldName: "D", FieldType: "int32", TypeName: "int32", ParquetType: "Int32Type", ColumnName: "D", Category: "numeric", RepetitionType: fields.Required}, + {FieldName: "C", FieldType: "string", TypeName: "string", ParquetType: "StringType", ColumnName: "C", Category: "string", RepetitionType: fields.Required}, + {FieldName: "B", FieldType: "bool", TypeName: "bool", ParquetType: "BoolType", ColumnName: "B", Category: "bool", RepetitionType: fields.Required}, + {FieldName: "Name", FieldType: "string", TypeName: "string", ParquetType: "StringType", ColumnName: "Name", Category: "string", RepetitionType: fields.Required}, + }, + }, + }, } for i, tc := range testCases { @@ -403,47 +366,95 @@ func pt(t sch.Type) *sch.Type { return &t } -// func TestDefIndex(t *testing.T) { -// testCases := []struct { -// def int -// field fields.Field -// expected int -// }{ -// { -// def: 1, -// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Repeated}}, -// expected: 1, -// }, -// { -// def: 2, -// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Repeated}}, -// expected: 2, -// }, -// { -// def: 1, -// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Repeated}}, -// expected: 0, -// }, -// { -// def: 2, -// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Repeated}}, -// expected: 2, -// }, -// { -// def: 2, -// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional, fields.Required}}, -// expected: 1, -// }, -// { -// def: 1, -// field: fields.Field{RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional, fields.Required}}, -// expected: 0, -// }, -// } +func TestDefIndex(t *testing.T) { + testCases := []struct { + def int + field fields.Field + expected int + }{ + { + def: 1, + field: fields.Field{ + RepetitionType: fields.Repeated, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Required, + }, + }, + }, + expected: 1, + }, + { + def: 2, + field: fields.Field{ + RepetitionType: fields.Repeated, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Required, + }, + }, + }, + expected: 2, + }, + { + def: 0, + field: fields.Field{ + RepetitionType: fields.Repeated, + Parent: &fields.Field{ + RepetitionType: fields.Required, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + }, + }, + }, + expected: 0, + }, + { + def: 2, + field: fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Required, + Parent: &fields.Field{ + RepetitionType: fields.Repeated, + }, + }, + }, + expected: 2, + }, + { + def: 2, + field: fields.Field{ + RepetitionType: fields.Required, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Repeated, + }, + }, + }, + expected: 1, + }, + { + def: 1, + field: fields.Field{ + RepetitionType: fields.Required, + Parent: &fields.Field{ + RepetitionType: fields.Optional, + Parent: &fields.Field{ + RepetitionType: fields.Repeated, + }, + }, + }, + expected: 0, + }, + } -// for i, tc := range testCases { -// t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { -// assert.Equal(t, tc.expected, tc.field.DefIndex(tc.def)) -// }) -// } -// } + for i, tc := range testCases { + t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) { + assert.Equal(t, tc.expected, tc.field.DefIndex(tc.def)) + }) + } +} diff --git a/internal/parse/parse.go b/internal/parse/parse.go index 3300322..44a01a2 100644 --- a/internal/parse/parse.go +++ b/internal/parse/parse.go @@ -56,33 +56,30 @@ func Fields(typ, pth string) (*Result, error) { return nil, fmt.Errorf("could not find %s", typ) } - fields, err := doGetFields(f.n) + fields, err := getFields(f.n) if err != nil { return nil, err } - var errs []error - parent, ok := fields[typ] if !ok { return nil, fmt.Errorf("could not find %s", typ) } - getChildren(&parent, fields, errs) + errs := getChildren(&parent, fields) return &Result{ - Parent: parent, + Parent: flds.Field{Children: parent.Children}, Errors: errs, }, nil } -func getChildren(parent *flds.Field, fields map[string]flds.Field, errs []error) { +func getChildren(parent *flds.Field, fields map[string]flds.Field) []error { var children []flds.Field + var errs []error p, ok := fields[parent.FieldType] - fmt.Printf("getChildren %s: %+v\n", parent.FieldType, p) if !ok { - errs = append(errs, fmt.Errorf("could not find %s", parent.Type)) - return + errs = append(errs, fmt.Errorf("could not find %+v", parent)) } for _, child := range p.Children { @@ -93,11 +90,11 @@ func getChildren(parent *flds.Field, fields map[string]flds.Field, errs []error) f, ok := fields[child.FieldType] if !ok { - errs = append(errs, fmt.Errorf("could not find %s", child.Type)) + errs = append(errs, fmt.Errorf("unsupported type %+v", child.FieldType)) continue } - getChildren(&child, fields, errs) + errs = append(errs, getChildren(&child, fields)...) f.FieldName = child.FieldName f.TypeName = child.TypeName @@ -105,8 +102,6 @@ func getChildren(parent *flds.Field, fields map[string]flds.Field, errs []error) f.Children = child.Children f.RepetitionType = child.RepetitionType - fmt.Printf("adding child: %+v\n", child) - if child.Embedded { for _, ch := range f.Children { children = append(children, ch) @@ -116,6 +111,7 @@ func getChildren(parent *flds.Field, fields map[string]flds.Field, errs []error) } } parent.Children = children + return errs } func isPrivate(x *ast.Field) bool { @@ -128,7 +124,7 @@ func isPrivate(x *ast.Field) bool { return strings.Contains(letters, string(s[0])) } -func doGetFields(n map[string]ast.Node) (map[string]fields.Field, error) { +func getFields(n map[string]ast.Node) (map[string]fields.Field, error) { fields := map[string]flds.Field{} for k, n := range n { x, ok := n.(*ast.TypeSpec) @@ -136,7 +132,6 @@ func doGetFields(n map[string]ast.Node) (map[string]fields.Field, error) { continue } - fmt.Printf("parent? (%s): %+v\n", k, x) parent := flds.Field{ Type: x.Name.Name, TypeName: x.Name.Name, @@ -152,28 +147,18 @@ func doGetFields(n map[string]ast.Node) (map[string]fields.Field, error) { switch x := n.(type) { case *ast.Field: - fmt.Printf("child?: %+v, type: %v\n", n, x.Type) if len(x.Names) == 1 && !isPrivate(x) { - fmt.Println("a") f, skip := getField(x.Names[0].Name, x, nil) if !skip { parent.Children = append(parent.Children, f) } } else if len(x.Names) == 0 && !isPrivate(x) { - fmt.Println("b") f, skip := getField(fmt.Sprintf("%s", x.Type), x, nil) f.Embedded = true if !skip { parent.Children = append(parent.Children, f) } } - case *ast.ArrayType: - fmt.Printf("array child: %+v\n", x) - // s := fields[k] - // f := s[len(s)-1] - // f.repeated = true - // s[len(s)-1] = f - //fields[k] = s } return true }) @@ -184,39 +169,11 @@ func doGetFields(n map[string]ast.Node) (map[string]fields.Field, error) { return fields, nil } -func makeOptional(f field) field { - f.optional = true - fn, cat, pt, _ := lookupTypeAndCategory(strings.Replace(strings.Replace(f.Field.TypeName, "*", "", 1), "[]", "", 1), true, true) - f.Field.FieldType = fn - f.Field.ParquetType = pt - f.Field.Category = cat - return f -} - func getType(typ string) string { parts := strings.Split(typ, ".") return parts[len(parts)-1] } -func getFields(fullTyp string, fields []field, m map[string][]field) []flds.Field { - typ := getType(fullTyp) - out := make([]flds.Field, 0, len(fields)) - for _, f := range fields { - _, ok := m[typ] - if f.omit || !ok { - continue - } - - if f.repeated { - f.Field.TypeName = fmt.Sprintf("[]%s", f.Field.TypeName) - } - - f.Field.Type = fullTyp - out = append(out, f.Field) - } - return out -} - func getField(name string, x ast.Node, parent *flds.Field) (flds.Field, bool) { var typ, tag string var optional, repeated bool diff --git a/internal/parse/parse_test.go b/internal/parse/parse_test.go index 7ec32b6..6be0a3e 100644 --- a/internal/parse/parse_test.go +++ b/internal/parse/parse_test.go @@ -1,43 +1,45 @@ package parse_test +import "time" + type Being struct { ID int32 Age *int32 } -// type Person struct { -// Being -// Happiness int64 -// Sadness *int64 -// Code string -// Funkiness float32 -// Lameness *float32 -// Keen *bool -// Birthday uint32 -// Anniversary *uint64 -// } - -// type NewOrderPerson struct { -// Happiness int64 -// Sadness *int64 -// Code string -// Funkiness float32 -// Lameness *float32 -// Keen *bool -// Birthday uint32 -// Being -// Anniversary *uint64 -// } - -// type IgnoreMe struct { -// ID int32 `parquet:"id"` -// Secret string `parquet:"-"` -// } - -// type Tagged struct { -// ID int32 `parquet:"id"` -// Name string `parquet:"name"` -// } +type Person struct { + Being + Happiness int64 + Sadness *int64 + Code string + Funkiness float32 + Lameness *float32 + Keen *bool + Birthday uint32 + Anniversary *uint64 +} + +type NewOrderPerson struct { + Happiness int64 + Sadness *int64 + Code string + Funkiness float32 + Lameness *float32 + Keen *bool + Birthday uint32 + Being + Anniversary *uint64 +} + +type IgnoreMe struct { + ID int32 `parquet:"id"` + Secret string `parquet:"-"` +} + +type Tagged struct { + ID int32 `parquet:"id"` + Name string `parquet:"name"` +} type Private struct { Being @@ -63,35 +65,35 @@ type OptionalNested struct { Anniversary *uint64 } -// type Thing struct { -// Name string -// } +type Thing struct { + Name string +} -// type OptionalNested2 struct { -// Being *Thing -// Anniversary *uint64 -// } +type OptionalNested2 struct { + Being *Thing + Anniversary *uint64 +} type OptionalDoubleNested struct { OptionalNested OptionalNested } -// type Unsupported struct { -// Being -// // This field will be ignored because it's not one of the -// // supported types. -// Time time.Time -// } +type Unsupported struct { + Being + // This field will be ignored because it's not one of the + // supported types. + Time time.Time +} -// type SupportedAndUnsupported struct { -// Happiness int64 -// x int -// T1 time.Time -// Being -// y int -// T2 time.Time -// Anniversary *uint64 -// } +type SupportedAndUnsupported struct { + Happiness int64 + x int + T1 time.Time + Being + y int + T2 time.Time + Anniversary *uint64 +} type Slice struct { IDs []int32 `parquet:"ids"` @@ -102,75 +104,76 @@ type Slice2 struct { IDs []int32 `parquet:"ids"` } -// type Slice3 struct { -// ID int32 `parquet:"id"` -// IDs []int32 `parquet:"ids"` -// Age *int32 -// } - -// type Hobby struct { -// Name string -// } - -// type Slice4 struct { -// ID int32 `parquet:"id"` -// Hobbies []Hobby -// } - -// type Hobby2 struct { -// Names []string `parquet:"names"` -// } - -// type Slice5 struct { -// ID int32 `parquet:"id"` -// Hobby Hobby2 `parquet:"hobby"` -// } - -// type Slice6 struct { -// ID int32 `parquet:"id"` -// Hobbies []Hobby2 `parquet:"hobbies"` -// } - -// type Slice7 struct { -// Thing *Slice6 `parquet:"thing"` -// } - -// type Link struct { -// Backward []int64 -// Forward []int64 -// } - -// type Language struct { -// Code string -// Country *string -// } - -// type Name struct { -// Languages []Language -// URL *string -// } - -// type Document struct { -// DocID int64 -// Links []Link -// Names []Name -// } - -// type D struct { -// D int32 -// } - -// type C struct { -// D -// C string -// } - -// type B struct { -// C -// B bool -// } - -// type A struct { -// B -// Name string -// } +type Slice3 struct { + ID int32 `parquet:"id"` + IDs []int32 `parquet:"ids"` + Age *int32 +} + +type Hobby struct { + Name string + Difficulty int32 +} + +type Slice4 struct { + ID int32 `parquet:"id"` + Hobbies []Hobby `parquet:"hobbies"` +} + +type Hobby2 struct { + Names []string `parquet:"names"` +} + +type Slice5 struct { + ID int32 `parquet:"id"` + Hobby Hobby2 `parquet:"hobby"` +} + +type Slice6 struct { + ID int32 `parquet:"id"` + Hobbies []Hobby2 `parquet:"hobbies"` +} + +type Slice7 struct { + Thing *Slice6 `parquet:"thing"` +} + +type Link struct { + Backward []int64 + Forward []int64 +} + +type Language struct { + Code string + Country *string +} + +type Name struct { + Languages []Language + URL *string +} + +type Document struct { + DocID int64 + Links []Link + Names []Name +} + +type D struct { + D int32 +} + +type C struct { + D + C string +} + +type B struct { + C + B bool +} + +type A struct { + B + Name string +} From e6a3e93a3a2c55452d20f9ba3944fc4232dc0b76 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Tue, 8 Jun 2021 12:43:47 -0600 Subject: [PATCH 06/25] about to start over in internal/dremel --- internal/dremel/dremel.go | 2 +- internal/dremel/read.go | 9 +- internal/dremel/read_repeated.go | 15 +-- internal/dremel/write_optional.go | 26 +++-- internal/dremel/write_repeated.go | 66 +++--------- internal/dremel/write_test.go | 171 +++++++++++++++--------------- internal/gen/template.go | 2 +- 7 files changed, 127 insertions(+), 164 deletions(-) diff --git a/internal/dremel/dremel.go b/internal/dremel/dremel.go index 3c2d8c2..4c9c0bd 100644 --- a/internal/dremel/dremel.go +++ b/internal/dremel/dremel.go @@ -13,7 +13,7 @@ import ( func Write(i int, fields []fields.Field) string { f := fields[i] if f.Repeated() { - return writeRepeated(i, fields) + return writeRepeated(f) } if f.Optional() { diff --git a/internal/dremel/read.go b/internal/dremel/read.go index e17f39f..c5acdeb 100644 --- a/internal/dremel/read.go +++ b/internal/dremel/read.go @@ -10,12 +10,12 @@ import ( func readRequired(f fields.Field) string { return fmt.Sprintf(`func read%s(x %s) %s { return x.%s -}`, strings.Join(f.FieldNames, ""), f.Type, f.TypeName, strings.Join(f.FieldNames, ".")) +}`, strings.Join(f.FieldNames(), ""), f.Type, f.TypeName, strings.Join(f.FieldNames(), ".")) } func readOptional(f fields.Field) string { var out string - n := defs(f) + n := f.MaxDef() for def := 0; def < n; def++ { out += fmt.Sprintf(`case x.%s == nil: return nil, []uint8{%d}, nil @@ -23,7 +23,8 @@ func readOptional(f fields.Field) string { } var ptr string - if f.RepetitionTypes[len(f.RepetitionTypes)-1] == fields.Optional { + rts := f.RepetitionTypes() + if rts[len(rts)-1] == fields.Optional { ptr = "*" } out += fmt.Sprintf(` default: @@ -33,7 +34,7 @@ func readOptional(f fields.Field) string { switch { %s } -}`, strings.Join(f.FieldNames, ""), f.Type, cleanTypeName(f.TypeName), out) +}`, strings.Join(f.FieldNames(), ""), f.Type, cleanTypeName(f.TypeName), out) } func cleanTypeName(s string) string { diff --git a/internal/dremel/read_repeated.go b/internal/dremel/read_repeated.go index 121e827..79668c5 100644 --- a/internal/dremel/read_repeated.go +++ b/internal/dremel/read_repeated.go @@ -69,7 +69,7 @@ func readRepeated(f fields.Field) string { return vals, defs, reps }`, - strings.Join(f.FieldNames, ""), + strings.Join(f.FieldNames(), ""), f.Type, cleanTypeName(f.TypeName), cleanTypeName(f.TypeName), @@ -79,12 +79,13 @@ func readRepeated(f fields.Field) string { func doReadRepeated(f fields.Field, i int, varName string) string { if i == f.MaxDef() { - if f.RepetitionTypes[len(f.RepetitionTypes)-1] == fields.Optional { + rts := f.RepetitionTypes() + if rts[len(rts)-1] == fields.Optional { varName = fmt.Sprintf("*%s", varName) } - if f.RepetitionTypes[len(f.RepetitionTypes)-1] != fields.Repeated { - n := lastRepeated(f.RepetitionTypes) - varName = strings.Join(append([]string{varName}, f.FieldNames[n+1:]...), ".") + if rts[len(rts)-1] != fields.Repeated { + n := lastRepeated(rts) + varName = strings.Join(append([]string{varName}, f.FieldNames()[n+1:]...), ".") } return fmt.Sprintf(`defs = append(defs, %d) reps = append(reps, lastRep) @@ -103,14 +104,14 @@ vals = append(vals, %s)`, i, varName) if rt == fields.Repeated { if reps > 1 { - rc.Field = f.FieldNames[n] + rc.Field = f.FieldNames()[n] } nextVar = fmt.Sprintf("x%d", reps-1) readRepeatedRepeatedTpl.Execute(&buf, rc) } else { nextVar = varName if reps > 0 { - rc.Field = strings.Join(f.FieldNames[i:], ".") + rc.Field = strings.Join(f.FieldNames()[i:], ".") } readRepeatedOptionalTpl.Execute(&buf, rc) } diff --git a/internal/dremel/write_optional.go b/internal/dremel/write_optional.go index edc6dfd..8501f1e 100644 --- a/internal/dremel/write_optional.go +++ b/internal/dremel/write_optional.go @@ -77,12 +77,10 @@ func (i ifElses) UseIf() bool { func writeOptional(i int, flds []fields.Field) string { f := flds[i] - s := fields.Seen(i, flds) - f.Seen = s wi := writeInput{ Field: f, - FuncName: strings.Join(f.FieldNames, ""), - Cases: writeOptionalCases(f, s), + FuncName: strings.Join(f.FieldNames(), ""), + Cases: writeOptionalCases(f), } var buf bytes.Buffer @@ -93,10 +91,10 @@ func writeOptional(i int, flds []fields.Field) string { return string(buf.Bytes()) } -func writeOptionalCases(f fields.Field, seen fields.RepetitionTypes) []ifElses { +func writeOptionalCases(f fields.Field) []ifElses { var out []ifElses - for def := 1; def <= defs(f); def++ { - if useIfElse(def, 0, seen, f) { + for def := 1; def <= f.MaxDef(); def++ { + if useIfElse(def, 0, f) { out = append(out, ifelses(def, 0, f)) } else { s := f.Init(def, 0) @@ -108,20 +106,20 @@ func writeOptionalCases(f fields.Field, seen fields.RepetitionTypes) []ifElses { type ifElseCase struct { f fields.Field - p fields.Field + p *fields.Field } // ifelses returns an if else block for the given definition and repetition level -func ifelses(def, rep int, orig fields.Field) ifElses { - opts := optionals(def, orig) +func ifelses(def, rep int, f fields.Field) ifElses { + opts := optionals(def, f) var cases ifElseCases for _, o := range opts { - f := orig.Copy() - f.Seen = seens(o) + //f := orig.Copy() + //f.Seen = seens(o) cases = append(cases, ifElseCase{f: f, p: f.Parent(o + 1)}) } - return cases.ifElses(def, rep, int(orig.MaxDef())) + return cases.ifElses(def, rep, int(f.MaxDef())) } func seens(i int) fields.RepetitionTypes { @@ -154,7 +152,7 @@ func (i ifElseCases) ifElses(def, rep, md int) ifElses { for _, iec := range leftovers { out.ElseIf = append(out.ElseIf, ifElse{ - Cond: fmt.Sprintf("x.%s == nil", strings.Join(iec.p.FieldNames, ".")), + Cond: fmt.Sprintf("x.%s == nil", strings.Join(iec.p.FieldName, ".")), Val: iec.f.Init(def, rep), }) } diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go index 5136b10..d77c8cb 100644 --- a/internal/dremel/write_repeated.go +++ b/internal/dremel/write_repeated.go @@ -33,7 +33,7 @@ func init() { "getRep": func(def int, f fields.Field) int { var rep int //defindex indead of def? - for _, rt := range f.RepetitionTypes[:f.DefIndex(def)] { + for _, rt := range f.RepetitionTypes()[:f.DefIndex(def)] { if rt == fields.Repeated { rep++ } @@ -100,26 +100,20 @@ func init() { type writeRepeatedInput struct { Field fields.Field Defs []int - Seen []fields.RepetitionType Func string } func writeRequired(f fields.Field) string { return fmt.Sprintf(`func %s(x *%s, vals []%s) { x.%s = vals[0] -}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames, "")), f.Type, f.TypeName, strings.Join(f.FieldNames, ".")) +}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), f.Type, f.TypeName, strings.Join(f.FieldNames(), ".")) } -func writeRepeated(i int, flds []fields.Field) string { - f := flds[i] - f.Seen = fields.Seen(i, flds) - fmt.Println("seen", f.Seen) - +func writeRepeated(f fields.Field) string { wi := writeRepeatedInput{ Field: f, - Func: fmt.Sprintf("write%s", strings.Join(f.FieldNames, "")), - Defs: writeCases(f, f.Seen), - Seen: f.Seen, + Func: fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), + Defs: writeCases(f), } var buf bytes.Buffer @@ -129,13 +123,13 @@ func writeRepeated(i int, flds []fields.Field) string { func initRepeated(def, rep int, seen fields.RepetitionTypes, f fields.Field) string { md := int(f.MaxDef()) - rt := f.RepetitionTypes.Def(def) + rt := f.RepetitionTypes().Def(def) if def < md && rep == 0 && rt == fields.Repeated { rep = def } - if useIfElse(def, rep, append(seen[:0:0], seen...), f) { + if useIfElse(def, rep, f) { ie := ifelses(def, rep, f) var buf bytes.Buffer if err := ifTpl.Execute(&buf, ie); err != nil { @@ -144,38 +138,19 @@ func initRepeated(def, rep int, seen fields.RepetitionTypes, f fields.Field) str return string(buf.Bytes()) } - f.Seen = seen return f.Init(def, rep) } -func useIfElse(def, rep int, seen fields.RepetitionTypes, f fields.Field) bool { - if len(seen) == 0 { - return false - } - - i := f.DefIndex(def) - - if i+1 > len(seen) && f.RepetitionTypes[:len(seen)].Required() { - return false - } - - if len(seen) > i+1 { - seen = seen[:i+1] - } - - if seen.Repeated() || (def == f.MaxDef() && rep > 0) { - return false - } - - return true +func useIfElse(def, rep int, f fields.Field) bool { + return f.NthChild == 0 } -func writeCases(f fields.Field, seen fields.RepetitionTypes) []int { +func writeCases(f fields.Field) []int { var defs []int start := 1 - if seen.Repeated() { - start = 1 + len(seen) - } + // if seen.Repeated() { + // start = 1 + len(seen) + // } maxDef := f.MaxDef() if start > maxDef { @@ -191,8 +166,8 @@ func writeCases(f fields.Field, seen fields.RepetitionTypes) []int { func nilField(i int, f fields.Field) string { var flds []string var count int - for j, o := range f.RepetitionTypes { - flds = append(flds, f.FieldNames[j]) + for j, o := range f.RepetitionTypes() { + flds = append(flds, f.FieldNames()[j]) if o == fields.Optional { count++ } @@ -202,14 +177,3 @@ func nilField(i int, f fields.Field) string { } return strings.Join(flds, ".") } - -// count the number of fields in the path that can be optional -func defs(f fields.Field) int { - var out int - for _, o := range f.RepetitionTypes { - if o == fields.Optional || o == fields.Repeated { - out++ - } - } - return out -} diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index f2666d5..212e248 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -12,23 +12,22 @@ import ( func TestWrite(t *testing.T) { testCases := []struct { - name string - // fields is a slice so that the parts of the field's path - // that have been seen already can be included in before the - // the field being tested. The last field is the one being tested. + name string fields []fields.Field result string }{ { - name: "required and not nested", - fields: []fields.Field{{Type: "Person", TypeName: "int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required}}}, + name: "required and not nested", + fields: []fields.Field{ + {Type: "Person", TypeName: "int32", FieldName: "ID", RepetitionType: fields.Required}, + }, result: `func writeID(x *Person, vals []int32) { x.ID = vals[0] }`, }, { - name: "optional and not nested", - fields: []fields.Field{{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}}, + name: "optional and not nested", + //fields: []fields.Field{{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}}, result: `func writeID(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -41,17 +40,17 @@ func TestWrite(t *testing.T) { }`, }, { - name: "required and nested", - fields: []fields.Field{{Type: "Person", TypeName: "int32", FieldNames: []string{"Other", "Hobby", "Difficulty"}, FieldTypes: []string{"Other", "Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}}, + name: "required and nested", + //fields: []fields.Field{{Type: "Person", TypeName: "int32", FieldNames: []string{"Other", "Hobby", "Difficulty"}, FieldTypes: []string{"Other", "Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}}, result: `func writeOtherHobbyDifficulty(x *Person, vals []int32) { x.Other.Hobby.Difficulty = vals[0] }`, }, { name: "optional and nested", - fields: []fields.Field{ - {Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - }, + // fields: []fields.Field{ + // {Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, + // }, result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -67,10 +66,10 @@ func TestWrite(t *testing.T) { }, { name: "optional and nested and seen by an optional fields", - fields: []fields.Field{ - {FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, + // {Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, + // }, result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -92,9 +91,9 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and required and nested", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - }, + // fields: []fields.Field{ + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, + // }, result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -108,9 +107,9 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and required and nested v2", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - }, + // fields: []fields.Field{ + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, + // }, result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -124,9 +123,9 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and require and nested 3 deep", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Optional}}, - }, + // fields: []fields.Field{ + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Optional}}, + // }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -142,10 +141,10 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and required and nested 3 deep v2 and seen by optional field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, + // }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -161,9 +160,9 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and required and nested 3 deep v3", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Required}}, - }, + // fields: []fields.Field{ + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Required}}, + // }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -179,9 +178,9 @@ func TestWrite(t *testing.T) { }, { name: "nested 3 deep all optional", - fields: []fields.Field{ - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, - }, + // fields: []fields.Field{ + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, + // }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -199,10 +198,10 @@ func TestWrite(t *testing.T) { }, { name: "nested 3 deep all optional and seen by optional field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, + // }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -229,8 +228,8 @@ func TestWrite(t *testing.T) { }`, }, { - name: "four deep", - fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}}, + name: "four deep", + //fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}}, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -250,10 +249,10 @@ func TestWrite(t *testing.T) { }, { name: "four deep and seen by optional field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}, + // }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -286,8 +285,8 @@ func TestWrite(t *testing.T) { }`, }, { - name: "four deep mixed", - fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}}, + name: "four deep mixed", + //fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}}, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -305,10 +304,10 @@ func TestWrite(t *testing.T) { }, { name: "four deep mixed and seen by a required sub-field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, + // }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -325,8 +324,8 @@ func TestWrite(t *testing.T) { }`, }, { - name: "four deep mixed v2", - fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}}, + name: "four deep mixed v2", + //fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}}, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -344,10 +343,10 @@ func TestWrite(t *testing.T) { }, { name: "four deep mixed v2 and seen by an optional field", - fields: []fields.Field{ - {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, + // }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -375,9 +374,9 @@ func TestWrite(t *testing.T) { }, { name: "writeLinkBackward", - fields: []fields.Field{ - {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, + // fields: []fields.Field{ + // {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + // }, result: `func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -411,10 +410,10 @@ func TestWrite(t *testing.T) { }, { name: "writeLinkFoward", - fields: []fields.Field{ - {FieldNames: []string{"Link", "Backward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Link", "Backward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + // {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + // }, result: `func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -444,9 +443,9 @@ func TestWrite(t *testing.T) { }, { name: "writeNamesLanguagesCode", - fields: []fields.Field{ - {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - }, + // fields: []fields.Field{ + // {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, + // }, result: `func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 2) @@ -482,10 +481,10 @@ func TestWrite(t *testing.T) { }, { name: "writeNamesLanguagesCountry", - fields: []fields.Field{ - {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - }, + // fields: []fields.Field{ + // {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, + // {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, + // }, result: `func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 2) @@ -515,9 +514,9 @@ func TestWrite(t *testing.T) { }, { name: "writeFriendsID", - fields: []fields.Field{ - {Type: "Person", FieldNames: []string{"Friends", "ID"}, FieldTypes: []string{"Being", "int32"}, TypeName: "int32", FieldType: "Int32OptionalField", ParquetType: "Int32Type", Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 0}}, - }, + // fields: []fields.Field{ + // {Type: "Person", FieldNames: []string{"Friends", "ID"}, FieldTypes: []string{"Being", "int32"}, TypeName: "int32", FieldType: "Int32OptionalField", ParquetType: "Int32Type", Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 0}}, + // }, result: `func writeFriendsID(x *Person, vals []int32, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -549,9 +548,9 @@ func TestWrite(t *testing.T) { }, { name: "repeated primitive", - fields: []fields.Field{ - {Type: "Document", TypeName: "int64", FieldNames: []string{"LuckyNumbers"}, FieldTypes: []string{"int64"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - }, + // fields: []fields.Field{ + // {Type: "Document", TypeName: "int64", FieldNames: []string{"LuckyNumbers"}, FieldTypes: []string{"int64"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, + // }, result: `func writeLuckyNumbers(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -583,10 +582,10 @@ func TestWrite(t *testing.T) { }, { name: "repeated field not handled by previous repeated field", - fields: []fields.Field{ - {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, + // {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, + // }, result: `func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -626,10 +625,10 @@ func TestWrite(t *testing.T) { }, { name: "nested 2 deep", - fields: []fields.Field{ - {FieldNames: []string{"Hobby", "Skills", "Name"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, - {Type: "Person", TypeName: "string", FieldNames: []string{"Hobby", "Skills", "Difficulty"}, FieldTypes: []string{"Hobby", "Skill", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, - }, + // fields: []fields.Field{ + // {FieldNames: []string{"Hobby", "Skills", "Name"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, + // {Type: "Person", TypeName: "string", FieldNames: []string{"Hobby", "Skills", "Difficulty"}, FieldTypes: []string{"Hobby", "Skill", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, + // }, result: `func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) diff --git a/internal/gen/template.go b/internal/gen/template.go index ec870ac..3497366 100644 --- a/internal/gen/template.go +++ b/internal/gen/template.go @@ -53,7 +53,7 @@ func Fields(compression compression) []Field { } } -{{range $i, $field := .Fields}}{{readFunc $field}} +{{range $i, $field := .Parent.Fields}}{{readFunc $field}} {{writeFunc $i $.Fields}} From bf18c2999f9c6d3c3da67448a7ce9b97b6bfef60 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Wed, 9 Jun 2021 10:04:14 -0600 Subject: [PATCH 07/25] wip --- internal/dremel/dremel.go | 5 +- internal/dremel/read_test.go | 834 +++++++++++++++--------------- internal/dremel/write_optional.go | 109 ++-- internal/dremel/write_repeated.go | 22 +- internal/dremel/write_test.go | 135 ++--- internal/fields/fields.go | 85 ++- internal/fields/fields_test.go | 36 +- internal/gen/gen.go | 2 +- internal/gen/template.go | 10 +- 9 files changed, 643 insertions(+), 595 deletions(-) diff --git a/internal/dremel/dremel.go b/internal/dremel/dremel.go index 4c9c0bd..0bcedfc 100644 --- a/internal/dremel/dremel.go +++ b/internal/dremel/dremel.go @@ -10,14 +10,13 @@ import ( // Write generates the code for initializing a struct // with data from a parquet file. -func Write(i int, fields []fields.Field) string { - f := fields[i] +func Write(f fields.Field) string { if f.Repeated() { return writeRepeated(f) } if f.Optional() { - return writeOptional(i, fields) + return writeOptional(f) } return writeRequired(f) diff --git a/internal/dremel/read_test.go b/internal/dremel/read_test.go index 132e698..f7439cd 100644 --- a/internal/dremel/read_test.go +++ b/internal/dremel/read_test.go @@ -1,441 +1,431 @@ package dremel_test -import ( - "fmt" - "go/format" - "testing" +// func TestRead(t *testing.T) { +// testCases := []struct { +// name string +// f fields.Field +// result string +// }{ +// { +// name: "required and not nested", +// f: fields.Field{Type: "Person", TypeName: "int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required}}, +// result: `func readID(x Person) int32 { +// return x.ID +// }`, +// }, +// { +// name: "optional and not nested", +// f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, +// result: `func readID(x Person) ([]int32, []uint8, []uint8) { +// switch { +// case x.ID == nil: +// return nil, []uint8{0}, nil +// default: +// return []int32{*x.ID}, []uint8{1}, nil +// } +// }`, +// }, +// { +// name: "required and nested", +// f: fields.Field{Type: "Person", TypeName: "int32", FieldNames: []string{"Other", "Hobby", "Difficulty"}, FieldTypes: []string{"Other", "Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}, +// result: `func readOtherHobbyDifficulty(x Person) int32 { +// return x.Other.Hobby.Difficulty +// }`, +// }, +// { +// name: "optional and nested", +// f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, +// result: `func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { +// switch { +// case x.Hobby == nil: +// return nil, []uint8{0}, nil +// case x.Hobby.Difficulty == nil: +// return nil, []uint8{1}, nil +// default: +// return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil +// } +// }`, +// }, +// { +// name: "mix of optional and required and nested", +// f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, +// result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Hobby == nil: +// return nil, []uint8{0}, nil +// default: +// return []string{x.Hobby.Name}, []uint8{1}, nil +// } +// }`, +// }, +// { +// name: "mix of optional and required and nested v2", +// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, +// result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Hobby.Name == nil: +// return nil, []uint8{0}, nil +// default: +// return []string{*x.Hobby.Name}, []uint8{1}, nil +// } +// }`, +// }, +// { +// name: "mix of optional and require and nested 3 deep", +// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Optional}}, +// result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Friend == nil: +// return nil, []uint8{0}, nil +// case x.Friend.Hobby.Name == nil: +// return nil, []uint8{1}, nil +// default: +// return []string{*x.Friend.Hobby.Name}, []uint8{2}, nil +// } +// }`, +// }, +// { +// name: "mix of optional and require and nested 3 deep v2", +// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, +// result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Friend.Hobby == nil: +// return nil, []uint8{0}, nil +// case x.Friend.Hobby.Name == nil: +// return nil, []uint8{1}, nil +// default: +// return []string{*x.Friend.Hobby.Name}, []uint8{2}, nil +// } +// }`, +// }, +// { +// name: "mix of optional and require and nested 3 deep v3", +// f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Required}}, +// result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Friend == nil: +// return nil, []uint8{0}, nil +// case x.Friend.Hobby == nil: +// return nil, []uint8{1}, nil +// default: +// return []string{x.Friend.Hobby.Name}, []uint8{2}, nil +// } +// }`, +// }, +// { +// name: "nested 3 deep all optional", +// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, +// result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Friend == nil: +// return nil, []uint8{0}, nil +// case x.Friend.Hobby == nil: +// return nil, []uint8{1}, nil +// case x.Friend.Hobby.Name == nil: +// return nil, []uint8{2}, nil +// default: +// return []string{*x.Friend.Hobby.Name}, []uint8{3}, nil +// } +// }`, +// }, +// { +// name: "four deep", +// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}, +// result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Friend == nil: +// return nil, []uint8{0}, nil +// case x.Friend.Hobby == nil: +// return nil, []uint8{1}, nil +// case x.Friend.Hobby.Name == nil: +// return nil, []uint8{2}, nil +// case x.Friend.Hobby.Name.First == nil: +// return nil, []uint8{3}, nil +// default: +// return []string{*x.Friend.Hobby.Name.First}, []uint8{4}, nil +// } +// }`, +// }, +// { +// name: "four deep mixed", +// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, +// result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Friend.Hobby == nil: +// return nil, []uint8{0}, nil +// case x.Friend.Hobby.Name == nil: +// return nil, []uint8{1}, nil +// case x.Friend.Hobby.Name.First == nil: +// return nil, []uint8{2}, nil +// default: +// return []string{*x.Friend.Hobby.Name.First}, []uint8{3}, nil +// } +// }`, +// }, +// { +// name: "four deep mixed v2", +// f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, +// result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { +// switch { +// case x.Friend == nil: +// return nil, []uint8{0}, nil +// case x.Friend.Hobby == nil: +// return nil, []uint8{1}, nil +// case x.Friend.Hobby.Name == nil: +// return nil, []uint8{2}, nil +// default: +// return []string{x.Friend.Hobby.Name.First}, []uint8{3}, nil +// } +// }`, +// }, +// { +// name: "repeated", +// f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friends"}, FieldTypes: []string{"string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, +// result: `func readFriends(x Person) ([]string, []uint8, []uint8) { +// var vals []string +// var defs, reps []uint8 +// var lastRep uint8 - "github.com/parsyl/parquet/internal/dremel" - "github.com/parsyl/parquet/internal/fields" - "github.com/stretchr/testify/assert" -) +// if len(x.Friends) == 0 { +// defs = append(defs, 0) +// reps = append(reps, lastRep) +// } else { +// for i0, x0 := range x.Friends { +// if i0 == 1 { +// lastRep = 1 +// } +// defs = append(defs, 1) +// reps = append(reps, lastRep) +// vals = append(vals, x0) +// } +// } -func TestRead(t *testing.T) { - testCases := []struct { - name string - f fields.Field - result string - }{ - { - name: "required and not nested", - f: fields.Field{Type: "Person", TypeName: "int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required}}, - result: `func readID(x Person) int32 { - return x.ID -}`, - }, - { - name: "optional and not nested", - f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - result: `func readID(x Person) ([]int32, []uint8, []uint8) { - switch { - case x.ID == nil: - return nil, []uint8{0}, nil - default: - return []int32{*x.ID}, []uint8{1}, nil - } -}`, - }, - { - name: "required and nested", - f: fields.Field{Type: "Person", TypeName: "int32", FieldNames: []string{"Other", "Hobby", "Difficulty"}, FieldTypes: []string{"Other", "Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}, - result: `func readOtherHobbyDifficulty(x Person) int32 { - return x.Other.Hobby.Difficulty -}`, - }, - { - name: "optional and nested", - f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - result: `func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { - switch { - case x.Hobby == nil: - return nil, []uint8{0}, nil - case x.Hobby.Difficulty == nil: - return nil, []uint8{1}, nil - default: - return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil - } -}`, - }, - { - name: "mix of optional and required and nested", - f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Hobby == nil: - return nil, []uint8{0}, nil - default: - return []string{x.Hobby.Name}, []uint8{1}, nil - } -}`, - }, - { - name: "mix of optional and required and nested v2", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Hobby.Name == nil: - return nil, []uint8{0}, nil - default: - return []string{*x.Hobby.Name}, []uint8{1}, nil - } -}`, - }, - { - name: "mix of optional and require and nested 3 deep", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Optional}}, - result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Friend == nil: - return nil, []uint8{0}, nil - case x.Friend.Hobby.Name == nil: - return nil, []uint8{1}, nil - default: - return []string{*x.Friend.Hobby.Name}, []uint8{2}, nil - } -}`, - }, - { - name: "mix of optional and require and nested 3 deep v2", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, - result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Friend.Hobby == nil: - return nil, []uint8{0}, nil - case x.Friend.Hobby.Name == nil: - return nil, []uint8{1}, nil - default: - return []string{*x.Friend.Hobby.Name}, []uint8{2}, nil - } -}`, - }, - { - name: "mix of optional and require and nested 3 deep v3", - f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Required}}, - result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Friend == nil: - return nil, []uint8{0}, nil - case x.Friend.Hobby == nil: - return nil, []uint8{1}, nil - default: - return []string{x.Friend.Hobby.Name}, []uint8{2}, nil - } -}`, - }, - { - name: "nested 3 deep all optional", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, - result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Friend == nil: - return nil, []uint8{0}, nil - case x.Friend.Hobby == nil: - return nil, []uint8{1}, nil - case x.Friend.Hobby.Name == nil: - return nil, []uint8{2}, nil - default: - return []string{*x.Friend.Hobby.Name}, []uint8{3}, nil - } -}`, - }, - { - name: "four deep", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}, - result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Friend == nil: - return nil, []uint8{0}, nil - case x.Friend.Hobby == nil: - return nil, []uint8{1}, nil - case x.Friend.Hobby.Name == nil: - return nil, []uint8{2}, nil - case x.Friend.Hobby.Name.First == nil: - return nil, []uint8{3}, nil - default: - return []string{*x.Friend.Hobby.Name.First}, []uint8{4}, nil - } -}`, - }, - { - name: "four deep mixed", - f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, - result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Friend.Hobby == nil: - return nil, []uint8{0}, nil - case x.Friend.Hobby.Name == nil: - return nil, []uint8{1}, nil - case x.Friend.Hobby.Name.First == nil: - return nil, []uint8{2}, nil - default: - return []string{*x.Friend.Hobby.Name.First}, []uint8{3}, nil - } -}`, - }, - { - name: "four deep mixed v2", - f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, - result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Friend == nil: - return nil, []uint8{0}, nil - case x.Friend.Hobby == nil: - return nil, []uint8{1}, nil - case x.Friend.Hobby.Name == nil: - return nil, []uint8{2}, nil - default: - return []string{x.Friend.Hobby.Name.First}, []uint8{3}, nil - } -}`, - }, - { - name: "repeated", - f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friends"}, FieldTypes: []string{"string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - result: `func readFriends(x Person) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 +// return vals, defs, reps +// }`, +// }, +// { +// name: "readLinkFoward", +// f: fields.Field{Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, +// result: `func readLinkForward(x Document) ([]int64, []uint8, []uint8) { +// var vals []int64 +// var defs, reps []uint8 +// var lastRep uint8 - if len(x.Friends) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Friends { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 1) - reps = append(reps, lastRep) - vals = append(vals, x0) - } - } +// if x.Link == nil { +// defs = append(defs, 0) +// reps = append(reps, lastRep) +// } else { +// if len(x.Link.Forward) == 0 { +// defs = append(defs, 1) +// reps = append(reps, lastRep) +// } else { +// for i0, x0 := range x.Link.Forward { +// if i0 == 1 { +// lastRep = 1 +// } +// defs = append(defs, 2) +// reps = append(reps, lastRep) +// vals = append(vals, x0) +// } +// } +// } - return vals, defs, reps -}`, - }, - { - name: "readLinkFoward", - f: fields.Field{Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - result: `func readLinkForward(x Document) ([]int64, []uint8, []uint8) { - var vals []int64 - var defs, reps []uint8 - var lastRep uint8 +// return vals, defs, reps +// }`, +// }, +// { +// name: "readNamesLanguagesCode", +// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, +// result: `func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { +// var vals []string +// var defs, reps []uint8 +// var lastRep uint8 - if x.Link == nil { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - if len(x.Link.Forward) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Link.Forward { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x0) - } - } - } +// if len(x.Names) == 0 { +// defs = append(defs, 0) +// reps = append(reps, lastRep) +// } else { +// for i0, x0 := range x.Names { +// if i0 == 1 { +// lastRep = 1 +// } +// if len(x0.Languages) == 0 { +// defs = append(defs, 1) +// reps = append(reps, lastRep) +// } else { +// for i1, x1 := range x0.Languages { +// if i1 == 1 { +// lastRep = 2 +// } +// defs = append(defs, 2) +// reps = append(reps, lastRep) +// vals = append(vals, x1.Code) +// } +// } +// } +// } - return vals, defs, reps -}`, - }, - { - name: "readNamesLanguagesCode", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - result: `func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 +// return vals, defs, reps +// }`, +// }, +// { +// name: "readNamesLanguagesCountry", +// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, +// result: `func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { +// var vals []string +// var defs, reps []uint8 +// var lastRep uint8 - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if len(x0.Languages) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i1, x1 := range x0.Languages { - if i1 == 1 { - lastRep = 2 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x1.Code) - } - } - } - } +// if len(x.Names) == 0 { +// defs = append(defs, 0) +// reps = append(reps, lastRep) +// } else { +// for i0, x0 := range x.Names { +// if i0 == 1 { +// lastRep = 1 +// } +// if len(x0.Languages) == 0 { +// defs = append(defs, 1) +// reps = append(reps, lastRep) +// } else { +// for i1, x1 := range x0.Languages { +// if i1 == 1 { +// lastRep = 2 +// } +// if x1.Country == nil { +// defs = append(defs, 2) +// reps = append(reps, lastRep) +// } else { +// defs = append(defs, 3) +// reps = append(reps, lastRep) +// vals = append(vals, *x1.Country) +// } +// } +// } +// } +// } - return vals, defs, reps -}`, - }, - { - name: "readNamesLanguagesCountry", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - result: `func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 +// return vals, defs, reps +// }`, +// }, +// { +// name: "readNamesURL", +// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "URL"}, FieldTypes: []string{"Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional}}, +// result: `func readNamesURL(x Document) ([]string, []uint8, []uint8) { +// var vals []string +// var defs, reps []uint8 +// var lastRep uint8 - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if len(x0.Languages) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i1, x1 := range x0.Languages { - if i1 == 1 { - lastRep = 2 - } - if x1.Country == nil { - defs = append(defs, 2) - reps = append(reps, lastRep) - } else { - defs = append(defs, 3) - reps = append(reps, lastRep) - vals = append(vals, *x1.Country) - } - } - } - } - } +// if len(x.Names) == 0 { +// defs = append(defs, 0) +// reps = append(reps, lastRep) +// } else { +// for i0, x0 := range x.Names { +// if i0 == 1 { +// lastRep = 1 +// } +// if x0.URL == nil { +// defs = append(defs, 1) +// reps = append(reps, lastRep) +// } else { +// defs = append(defs, 2) +// reps = append(reps, lastRep) +// vals = append(vals, *x0.URL) +// } +// } +// } - return vals, defs, reps -}`, - }, - { - name: "readNamesURL", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "URL"}, FieldTypes: []string{"Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional}}, - result: `func readNamesURL(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 +// return vals, defs, reps +// }`, +// }, +// { +// name: "run of required", +// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Friends", "Name", "Last"}, FieldTypes: []string{"Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Required}}, +// result: `func readFriendsNameLast(x Document) ([]string, []uint8, []uint8) { +// var vals []string +// var defs, reps []uint8 +// var lastRep uint8 - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if x0.URL == nil { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, *x0.URL) - } - } - } +// if len(x.Friends) == 0 { +// defs = append(defs, 0) +// reps = append(reps, lastRep) +// } else { +// for i0, x0 := range x.Friends { +// if i0 == 1 { +// lastRep = 1 +// } +// defs = append(defs, 1) +// reps = append(reps, lastRep) +// vals = append(vals, x0.Name.Last) +// } +// } - return vals, defs, reps -}`, - }, - { - name: "run of required", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Friends", "Name", "Last"}, FieldTypes: []string{"Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Required}}, - result: `func readFriendsNameLast(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 +// return vals, defs, reps +// }`, +// }, +// { +// name: "run of required v2", +// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Friend", "Name", "Aliases"}, FieldTypes: []string{"Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Repeated}}, +// result: `func readFriendNameAliases(x Document) ([]string, []uint8, []uint8) { +// var vals []string +// var defs, reps []uint8 +// var lastRep uint8 - if len(x.Friends) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Friends { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 1) - reps = append(reps, lastRep) - vals = append(vals, x0.Name.Last) - } - } +// if len(x.Friend.Name.Aliases) == 0 { +// defs = append(defs, 0) +// reps = append(reps, lastRep) +// } else { +// for i0, x0 := range x.Friend.Name.Aliases { +// if i0 == 1 { +// lastRep = 1 +// } +// defs = append(defs, 1) +// reps = append(reps, lastRep) +// vals = append(vals, x0) +// } +// } - return vals, defs, reps -}`, - }, - { - name: "run of required v2", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Friend", "Name", "Aliases"}, FieldTypes: []string{"Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Repeated}}, - result: `func readFriendNameAliases(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 +// return vals, defs, reps +// }`, +// }, +// { +// name: "run of required v3", +// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Other", "Friends", "Name", "Middle"}, FieldTypes: []string{"Other", "Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required, fields.Required}}, +// result: `func readOtherFriendsNameMiddle(x Document) ([]string, []uint8, []uint8) { +// var vals []string +// var defs, reps []uint8 +// var lastRep uint8 - if len(x.Friend.Name.Aliases) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Friend.Name.Aliases { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 1) - reps = append(reps, lastRep) - vals = append(vals, x0) - } - } +// if x.Other == nil { +// defs = append(defs, 0) +// reps = append(reps, lastRep) +// } else { +// if len(x.Other.Friends) == 0 { +// defs = append(defs, 1) +// reps = append(reps, lastRep) +// } else { +// for i0, x0 := range x.Other.Friends { +// if i0 == 1 { +// lastRep = 1 +// } +// defs = append(defs, 2) +// reps = append(reps, lastRep) +// vals = append(vals, x0.Name.Middle) +// } +// } +// } - return vals, defs, reps -}`, - }, - { - name: "run of required v3", - f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Other", "Friends", "Name", "Middle"}, FieldTypes: []string{"Other", "Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required, fields.Required}}, - result: `func readOtherFriendsNameMiddle(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 +// return vals, defs, reps +// }`, +// }, +// } - if x.Other == nil { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - if len(x.Other.Friends) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Other.Friends { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x0.Name.Middle) - } - } - } - - return vals, defs, reps -}`, - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { - s := dremel.Read(tc.f) - gocode, err := format.Source([]byte(s)) - assert.NoError(t, err) - assert.Equal(t, tc.result, string(gocode)) - }) - } -} +// for i, tc := range testCases { +// t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { +// s := dremel.Read(tc.f) +// gocode, err := format.Source([]byte(s)) +// assert.NoError(t, err) +// assert.Equal(t, tc.result, string(gocode)) +// }) +// } +// } diff --git a/internal/dremel/write_optional.go b/internal/dremel/write_optional.go index 8501f1e..d0f98b6 100644 --- a/internal/dremel/write_optional.go +++ b/internal/dremel/write_optional.go @@ -20,11 +20,11 @@ func init() { } var err error - writeTpl, err = template.New("output").Funcs(funcs).Parse(`func write{{.FuncName}}(x *{{.Field.Type}}, vals []{{removeStar .Field.TypeName}}, defs, reps []uint8) (int, int) { + writeTpl, err = template.New("output").Funcs(funcs).Parse(`func write{{.FuncName}}(x *{{.Field.StructType}}, vals []{{removeStar .Field.TypeName}}, defs, reps []uint8) (int, int) { def := defs[0] - switch def { {{range $i, $case := .Cases}}{{$def:=plusOne $i}} - case {{$def}}: - {{$defIndex := $.Field.DefIndex $def}}{{if $case.UseIf}}{{template "ifelse" $case}}{{else}}{{$case.Val}}{{end}}{{if eq $def $.MaxDef}} + switch def { {{range $i, $case := .Cases}} + case {{$case.Def}}: + {{$case.Val}}{{if $case.MaxDef}} return 1, 1{{end}}{{end}} } @@ -64,7 +64,10 @@ type ifElse struct { Val string } +// todo: rename to defCase type ifElses struct { + Def int + MaxDef bool If ifElse ElseIf []ifElse Else *ifElse @@ -75,8 +78,7 @@ func (i ifElses) UseIf() bool { return i.Val == nil } -func writeOptional(i int, flds []fields.Field) string { - f := flds[i] +func writeOptional(f fields.Field) string { wi := writeInput{ Field: f, FuncName: strings.Join(f.FieldNames(), ""), @@ -93,68 +95,45 @@ func writeOptional(i int, flds []fields.Field) string { func writeOptionalCases(f fields.Field) []ifElses { var out []ifElses - for def := 1; def <= f.MaxDef(); def++ { - if useIfElse(def, 0, f) { - out = append(out, ifelses(def, 0, f)) - } else { + md := f.MaxDef() + for def := 1; def <= md; def++ { + if f.NthChild == 0 || def == md { s := f.Init(def, 0) - out = append(out, ifElses{Val: &s}) + out = append(out, ifElses{Def: def, Val: &s, MaxDef: def == md}) } } return out } -type ifElseCase struct { - f fields.Field - p *fields.Field -} - // ifelses returns an if else block for the given definition and repetition level -func ifelses(def, rep int, f fields.Field) ifElses { - opts := optionals(def, f) - var cases ifElseCases - for _, o := range opts { - //f := orig.Copy() - //f.Seen = seens(o) - cases = append(cases, ifElseCase{f: f, p: f.Parent(o + 1)}) - } - - return cases.ifElses(def, rep, int(f.MaxDef())) -} - -func seens(i int) fields.RepetitionTypes { - out := make([]fields.RepetitionType, i) - for i := range out { - out[i] = fields.Repeated +func ifelses(def, rep int, fld fields.Field) ifElses { + var flds []fields.Field + for _, f := range fld.Chain() { + if f.Optional() && f.NthChild == 0 { + flds = append(flds, f) + } } - return fields.RepetitionTypes(out) -} - -type ifElseCases []ifElseCase -func (i ifElseCases) ifElses(def, rep, md int) ifElses { out := ifElses{ If: ifElse{ - Cond: fmt.Sprintf("x.%s == nil", strings.Join(i[0].p.FieldNames, ".")), - Val: i[0].f.Init(def, rep), + Cond: fmt.Sprintf("x.%s == nil", strings.Join(flds[0].FieldNames(), ".")), + Val: flds[0].Init(def, rep), }, } - var leftovers []ifElseCase - if len(i) > 1 { + if len(flds) > 1 { out.Else = &ifElse{ - Val: i[len(i)-1].f.Init(def, rep), - } - if len(i) > 2 { - leftovers = i[1 : len(i)-1] + Val: flds[len(flds)-1].Init(def, rep), } } - for _, iec := range leftovers { - out.ElseIf = append(out.ElseIf, ifElse{ - Cond: fmt.Sprintf("x.%s == nil", strings.Join(iec.p.FieldName, ".")), - Val: iec.f.Init(def, rep), - }) + if len(flds) > 2 { + for _, f := range flds[1 : len(flds)-2] { + out.ElseIf = append(out.ElseIf, ifElse{ + Cond: fmt.Sprintf("x.%s == nil", strings.Join(f.FieldNames(), ".")), + Val: f.Init(def, rep), + }) + } } return out @@ -164,21 +143,21 @@ func (i ifElseCases) ifElses(def, rep, md int) ifElses { // each optional field. func optionals(def int, f fields.Field) []int { var out []int - di := f.DefIndex(def) - seen := append(f.Seen[:0:0], f.Seen...) - - if len(seen) > di+1 { - seen = seen[:di+1] - } - - for i, rt := range f.RepetitionTypes[:di+1] { - if rt >= fields.Optional { - out = append(out, i) - } - if i > len(seen)-1 && rt >= fields.Optional { - break - } - } + // di := f.DefIndex(def) + // seen := append(f.Seen[:0:0], f.Seen...) + + // if len(seen) > di+1 { + // seen = seen[:di+1] + // } + + // for i, rt := range f.RepetitionTypes[:di+1] { + // if rt >= fields.Optional { + // out = append(out, i) + // } + // if i > len(seen)-1 && rt >= fields.Optional { + // break + // } + // } return out } diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go index d77c8cb..4a4e1b3 100644 --- a/internal/dremel/write_repeated.go +++ b/internal/dremel/write_repeated.go @@ -53,7 +53,7 @@ func init() { log.Fatalf("unable to create templates: %s", err) } - writeRepeatedTpl, err = template.New("output").Funcs(funcs).Parse(`func {{.Func}}(x *{{.Field.Type}}, vals []{{removeStar .Field.TypeName}}, defs, reps []uint8) (int, int) { + writeRepeatedTpl, err = template.New("output").Funcs(funcs).Parse(`func {{.Func}}(x *{{.Field.StructType}}, vals []{{removeStar .Field.TypeName}}, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, {{.Field.MaxRep}}) @@ -106,7 +106,7 @@ type writeRepeatedInput struct { func writeRequired(f fields.Field) string { return fmt.Sprintf(`func %s(x *%s, vals []%s) { x.%s = vals[0] -}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), f.Type, f.TypeName, strings.Join(f.FieldNames(), ".")) +}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), f.StructType(), f.TypeName, strings.Join(f.FieldNames(), ".")) } func writeRepeated(f fields.Field) string { @@ -142,25 +142,11 @@ func initRepeated(def, rep int, seen fields.RepetitionTypes, f fields.Field) str } func useIfElse(def, rep int, f fields.Field) bool { - return f.NthChild == 0 + return f.NthChild == 0 && f.Parent.Parent != nil && f.Optional() } func writeCases(f fields.Field) []int { - var defs []int - start := 1 - // if seen.Repeated() { - // start = 1 + len(seen) - // } - - maxDef := f.MaxDef() - if start > maxDef { - start = maxDef //hack! figure out why start is > maxDef - } - - for def := start; def <= maxDef; def++ { - defs = append(defs, def) - } - return defs + return nil } func nilField(i int, f fields.Field) string { diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index 212e248..562440d 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -13,13 +13,13 @@ import ( func TestWrite(t *testing.T) { testCases := []struct { name string - fields []fields.Field + field fields.Field result string }{ { name: "required and not nested", - fields: []fields.Field{ - {Type: "Person", TypeName: "int32", FieldName: "ID", RepetitionType: fields.Required}, + field: fields.Field{ + FieldType: "int32", TypeName: "int32", FieldName: "ID", RepetitionType: fields.Required, }, result: `func writeID(x *Person, vals []int32) { x.ID = vals[0] @@ -27,7 +27,9 @@ func TestWrite(t *testing.T) { }, { name: "optional and not nested", - //fields: []fields.Field{{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, FieldTypes: []string{"int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}}, + field: fields.Field{ + FieldType: "int32", TypeName: "*int32", FieldName: "ID", RepetitionType: fields.Optional, + }, result: `func writeID(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -41,16 +43,23 @@ func TestWrite(t *testing.T) { }, { name: "required and nested", - //fields: []fields.Field{{Type: "Person", TypeName: "int32", FieldNames: []string{"Other", "Hobby", "Difficulty"}, FieldTypes: []string{"Other", "Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}}, + field: fields.Field{ + FieldName: "Other", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Required}, + }}, + }}, result: `func writeOtherHobbyDifficulty(x *Person, vals []int32) { x.Other.Hobby.Difficulty = vals[0] }`, }, { name: "optional and nested", - // fields: []fields.Field{ - // {Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Optional}, + }, + }, result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -66,23 +75,17 @@ func TestWrite(t *testing.T) { }, { name: "optional and nested and seen by an optional fields", - // fields: []fields.Field{ - // {FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - // {Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Optional}, + }, + }, result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { - case 1: - if x.Hobby == nil { - x.Hobby = &Hobby{} - } case 2: - if x.Hobby == nil { - x.Hobby = &Hobby{Difficulty: pint32(vals[0])} - } else { - x.Hobby.Difficulty = pint32(vals[0]) - } + x.Hobby.Difficulty = pint32(vals[0]) return 1, 1 } @@ -91,9 +94,11 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and required and nested", - // fields: []fields.Field{ - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - // }, + field: fields.Field{ + FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + }, + }, result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -107,9 +112,11 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and required and nested v2", - // fields: []fields.Field{ - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, + }, + }, result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -123,9 +130,13 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and require and nested 3 deep", - // fields: []fields.Field{ - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", TypeName: "*string", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -141,10 +152,14 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and required and nested 3 deep v2 and seen by optional field", - // fields: []fields.Field{ - // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -160,9 +175,13 @@ func TestWrite(t *testing.T) { }, { name: "mix of optional and required and nested 3 deep v3", - // fields: []fields.Field{ - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Required}}, - // }, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Required}, + }}, + }, + }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -178,9 +197,13 @@ func TestWrite(t *testing.T) { }, { name: "nested 3 deep all optional", - // fields: []fields.Field{ - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -198,29 +221,21 @@ func TestWrite(t *testing.T) { }, { name: "nested 3 deep all optional and seen by optional field", - // fields: []fields.Field{ - // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { - case 1: - if x.Friend == nil { - x.Friend = &Entity{} - } case 2: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{}} - } else { - x.Friend.Hobby = &Item{} - } + x.Friend.Hobby = &Item{} case 3: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}} - } else { - x.Friend.Hobby = &Item{Name: pstring(vals[0])} - } + x.Friend.Hobby = &Item{Name: pstring(vals[0])} return 1, 1 } @@ -662,7 +677,9 @@ func TestWrite(t *testing.T) { for i, tc := range testCases { t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { - s := dremel.Write(len(tc.fields)-1, tc.fields) + flds := fields.Field{Type: "Person", Children: []fields.Field{tc.field}}.Fields() + s := dremel.Write(flds[len(flds)-1]) + fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.result, string(gocode)) diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 6da8cf4..f6164ee 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -29,6 +29,14 @@ type input struct { Append bool } +func (f Field) StructType() string { + var typ string + for fld := f.Parent; fld != nil; fld = fld.Parent { + typ = fld.Type + } + return typ +} + func (f Field) Fields() []Field { return f.fields(0) } @@ -37,9 +45,7 @@ func (f Field) fields(i int) []Field { var out []Field for j, fld := range f.Children { fld.NthChild = j - if i > 0 { - fld.Parent = &f - } + fld.Parent = &f if fld.Primitive() { out = append(out, fld) } else { @@ -49,7 +55,7 @@ func (f Field) fields(i int) []Field { return out } -func (f Field) chain() []Field { +func (f Field) Chain() []Field { out := []Field{f} for fld := f.Parent; fld != nil; fld = fld.Parent { out = append(out, *fld) @@ -66,31 +72,37 @@ func reverse(out []Field) []Field { func (f Field) FieldNames() []string { var out []string - for _, fld := range reverse(f.chain()) { - out = append(out, fld.FieldName) + for _, fld := range reverse(f.Chain()) { + if fld.FieldName != "" { + out = append(out, fld.FieldName) + } } return out } func (f Field) FieldTypes() []string { var out []string - for _, fld := range reverse(f.chain()) { - out = append(out, fld.FieldType) + for _, fld := range reverse(f.Chain()) { + if fld.FieldType != "" { + out = append(out, fld.FieldType) + } } return out } func (f Field) ColumnNames() []string { var out []string - for _, fld := range reverse(f.chain()) { - out = append(out, fld.ColumnName) + for _, fld := range reverse(f.Chain()) { + if fld.ColumnName != "" { + out = append(out, fld.ColumnName) + } } return out } func (f Field) RepetitionTypes() RepetitionTypes { var out []RepetitionType - for _, fld := range reverse(f.chain()) { + for _, fld := range reverse(f.Chain()) { out = append(out, fld.RepetitionType) } return out @@ -100,7 +112,7 @@ func (f Field) RepetitionTypes() RepetitionTypes { // nested field with the given definition level. func (f Field) DefIndex(def int) int { var count, i int - for _, fld := range reverse(f.chain()) { + for _, fld := range reverse(f.Chain()) { if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { count++ } @@ -116,7 +128,7 @@ func (f Field) DefIndex(def int) int { // level for the nested field. func (f Field) MaxDef() int { var out int - for _, fld := range reverse(f.chain()) { + for _, fld := range reverse(f.Chain()) { if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { out++ } @@ -128,7 +140,7 @@ func (f Field) MaxDef() int { // level for the nested field. func (f Field) MaxRep() int { var out int - for _, fld := range reverse(f.chain()) { + for _, fld := range reverse(f.Chain()) { if fld.RepetitionType == Repeated { out++ } @@ -185,7 +197,7 @@ func (f Field) NilField(n int) (string, RepetitionType, int, int) { // Child returns a sub-field based on i func (f Field) Child(i int) Field { - return reverse(f.chain())[i] + return reverse(f.Chain())[i] } // Repeated wraps RepetitionTypes.Repeated() @@ -203,6 +215,32 @@ func (f Field) Required() bool { return f.RepetitionTypes().Required() } +func (f Field) rightComplete(fld Field, i, def, rep, maxDef, maxRep, defs, reps int) bool { + //(defs >= def || ((rep == 0 && fld.RepetitionType == Repeated) || (rep > 0 && reps == rep))) && f.NthChild == 0 + fmt.Println("right complete", fld, i, def, rep, maxDef, maxRep, defs, reps) + if fld.NthChild > 0 { + return true + } + + if def == defs && fld.RepetitionType != Required && fld.NthChild == 0 { + return true + } + + if def == maxDef && fld.RepetitionType != Required && fld.NthChild == 0 { + return true + } + + if rep > 0 && reps == rep { + return true + } + + if rep == 0 && fld.RepetitionType != Required && (fld.RepetitionType == Repeated || f.RepetitionType == Repeated) { + return true + } + + return false +} + // Init is called by parquetgen's templates to generate the code // that writes to a struct's field // @@ -215,10 +253,14 @@ func (f Field) Init(def, rep int) string { left, right := "%s", "%s" - chain := reverse(f.chain()) + chain := reverse(f.Chain()) var i int for i, fld = range chain { + if fld.Parent == nil { + continue + } + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { defs++ } @@ -240,9 +282,10 @@ func (f Field) Init(def, rep int) string { } } - if (defs >= def || ((rep == 0 && fld.RepetitionType != Required) || (rep > 0 && reps == rep))) && f.NthChild == 0 { + if f.rightComplete(fld, i, def, rep, maxDef, maxRep, defs, reps) { break } + } left = fmt.Sprintf(left, "") @@ -270,9 +313,9 @@ func (f Field) Init(def, rep int) string { } } else { if fld.Parent.RepetitionType == Repeated && rep < maxRep { - right = fmt.Sprintf(right, fmt.Sprintf("{%s: %s{%%s}}", fld.FieldType, fld.FieldName)) + right = fmt.Sprintf(right, fmt.Sprintf("{%s: %s{%%s}}", fld.FieldName, fld.FieldType)) } else { - right = fmt.Sprintf(right, fmt.Sprintf("%s: %s{%%s}", fld.FieldType, fld.FieldName)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: %s{%%s}", fld.FieldName, fld.FieldType)) } } case Optional: @@ -297,9 +340,9 @@ func (f Field) Init(def, rep int) string { } case Repeated: if fld.Primitive() { - if rep == 0 && fld.Parent != nil && fld.Parent.RepetitionType == Repeated { + if rep == 0 && fld.Parent.RepetitionType == Repeated { right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{vals[nVals]}}%%s", fld.FieldName, fld.FieldType)) - } else if fld.Parent == nil && rep == 0 { + } else if fld.Parent.Parent == nil && rep == 0 { right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.FieldType)) } else if rep == 0 { right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{vals[nVals]}%%s", fld.FieldName, fld.FieldType)) diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 7179858..c6809e4 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -73,7 +73,6 @@ func TestInit(t *testing.T) { def: 2, expected: "x.Links = &Link{Backward: []int64{vals[nVals]}}", }, - { fields: []fields.Field{ {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ @@ -234,6 +233,40 @@ func TestInit(t *testing.T) { rep: 3, expected: "x.Thing.Names[ind[0]].Languages[ind[1]].Codes = append(x.Thing.Names[ind[0]].Languages[ind[1]].Codes, vals[nVals])", }, + { + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", TypeName: "*string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 2, + expected: "x.Friend = &Entity{Hobby: Item{Name: pstring(vals[0])}}", + }, + { + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", TypeName: "*string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 3, + expected: "x.Friend = &Entity{Hobby: &Item{Name: pstring(vals[0])}}", + }, + { + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", TypeName: "*string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 3, + expected: "x.Friend.Hobby = &Item{Name: pstring(vals[0])}", + }, { fields: []fields.Field{ {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ @@ -468,6 +501,7 @@ func TestInit(t *testing.T) { fields := fields.Field{Children: tc.fields}.Fields() field := fields[len(fields)-1] s := field.Init(tc.def, tc.rep) + fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) diff --git a/internal/gen/gen.go b/internal/gen/gen.go index f44d772..41588e5 100644 --- a/internal/gen/gen.go +++ b/internal/gen/gen.go @@ -85,7 +85,7 @@ func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) { gocode, err := format.Source(buf.Bytes()) if err != nil { - log.Fatal(err) + log.Fatal(err, string(buf.Bytes())) } f, err := os.Create(outPth) diff --git a/internal/gen/template.go b/internal/gen/template.go index 3497366..5463894 100644 --- a/internal/gen/template.go +++ b/internal/gen/template.go @@ -16,7 +16,7 @@ import ( "github.com/parsyl/parquet" sch "github.com/parsyl/parquet/schema" {{.Import}} - {{range imports .Fields}}{{.}} + {{range imports .Parent.Fields}}{{.}} {{end}} ) @@ -48,14 +48,14 @@ type ParquetWriter struct { } func Fields(compression compression) []Field { - return []Field{ {{range .Fields}} + return []Field{ {{range .Parent.Fields}} {{template "newField" .}}{{end}} } } {{range $i, $field := .Parent.Fields}}{{readFunc $field}} -{{writeFunc $i $.Fields}} +{{writeFunc $field}} {{end}} @@ -377,7 +377,7 @@ func (p *ParquetReader) Scan(x *{{.Type}}) { } } -{{range dedupe .Fields}} +{{range dedupe .Parent.Fields}} {{if eq .Category "numeric"}} {{ template "numericField" .}} {{end}} @@ -398,7 +398,7 @@ func (p *ParquetReader) Scan(x *{{.Type}}) { {{end}} {{end}} -{{range dedupe .Fields}} +{{range dedupe .Parent.Fields}} {{if eq .Category "numeric"}} {{ template "requiredStats" .}} {{end}} From e5d3792693daec0be0a432a0cd51c8a3805612e3 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Thu, 10 Jun 2021 08:02:46 -0600 Subject: [PATCH 08/25] TestInit passing --- internal/fields/fields.go | 32 ++++++++++++++++++++------------ internal/fields/fields_test.go | 1 - 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/internal/fields/fields.go b/internal/fields/fields.go index f6164ee..60cc166 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -21,6 +21,7 @@ type Field struct { Embedded bool Children []Field NthChild int + defined bool } type input struct { @@ -216,25 +217,20 @@ func (f Field) Required() bool { } func (f Field) rightComplete(fld Field, i, def, rep, maxDef, maxRep, defs, reps int) bool { - //(defs >= def || ((rep == 0 && fld.RepetitionType == Repeated) || (rep > 0 && reps == rep))) && f.NthChild == 0 - fmt.Println("right complete", fld, i, def, rep, maxDef, maxRep, defs, reps) - if fld.NthChild > 0 { + if fld.RepetitionType == Optional && rep == 0 && !fld.defined { return true } - if def == defs && fld.RepetitionType != Required && fld.NthChild == 0 { + if fld.RepetitionType == Repeated && rep > 0 && reps == rep && f.NthChild == 0 { return true } - if def == maxDef && fld.RepetitionType != Required && fld.NthChild == 0 { + if defs == maxDef && fld.RepetitionType != Required && f.NthChild == 0 { return true } - if rep > 0 && reps == rep { - return true - } - - if rep == 0 && fld.RepetitionType != Required && (fld.RepetitionType == Repeated || f.RepetitionType == Repeated) { + //if rep == 0 && fld.RepetitionType != Required && (fld.RepetitionType == Repeated || f.RepetitionType == Repeated) { + if rep == 0 && fld.RepetitionType == Repeated && !fld.defined { return true } @@ -253,10 +249,20 @@ func (f Field) Init(def, rep int) string { left, right := "%s", "%s" - chain := reverse(f.Chain()) + chain := f.Chain() + var defined bool + for i, fld := range chain { + fld.defined = defined + chain[i] = fld + if fld.Parent != nil && fld.NthChild > 0 { + defined = true + } + } + + chain = reverse(chain) var i int - for i, fld = range chain { + for _, fld = range chain { if fld.Parent == nil { continue } @@ -283,9 +289,11 @@ func (f Field) Init(def, rep int) string { } if f.rightComplete(fld, i, def, rep, maxDef, maxRep, defs, reps) { + i++ break } + i++ } left = fmt.Sprintf(left, "") diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index c6809e4..27f526d 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -501,7 +501,6 @@ func TestInit(t *testing.T) { fields := fields.Field{Children: tc.fields}.Fields() field := fields[len(fields)-1] s := field.Init(tc.def, tc.rep) - fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) From f757fe586225287b8e5f911b56091ba8c2e0c2a8 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Sat, 12 Jun 2021 07:06:46 -0600 Subject: [PATCH 09/25] got read tests passing --- internal/dremel/dremel.go | 9 + internal/dremel/read.go | 32 +- internal/dremel/read_repeated.go | 2 +- internal/dremel/read_test.go | 960 +++++++++++++++++------------- internal/dremel/write_optional.go | 90 +-- internal/dremel/write_repeated.go | 76 +-- internal/dremel/write_test.go | 254 ++++---- internal/fields/fields.go | 90 ++- internal/fields/fields_test.go | 38 ++ 9 files changed, 869 insertions(+), 682 deletions(-) diff --git a/internal/dremel/dremel.go b/internal/dremel/dremel.go index 0bcedfc..2d10853 100644 --- a/internal/dremel/dremel.go +++ b/internal/dremel/dremel.go @@ -1,6 +1,9 @@ package dremel import ( + "fmt" + "strings" + "github.com/parsyl/parquet/internal/fields" ) @@ -35,3 +38,9 @@ func Read(f fields.Field) string { return readRequired(f) } + +func writeRequired(f fields.Field) string { + return fmt.Sprintf(`func %s(x *%s, vals []%s) { + x.%s = vals[0] +}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), f.StructType(), f.TypeName, strings.Join(f.FieldNames(), ".")) +} diff --git a/internal/dremel/read.go b/internal/dremel/read.go index c5acdeb..d6dff4c 100644 --- a/internal/dremel/read.go +++ b/internal/dremel/read.go @@ -10,7 +10,7 @@ import ( func readRequired(f fields.Field) string { return fmt.Sprintf(`func read%s(x %s) %s { return x.%s -}`, strings.Join(f.FieldNames(), ""), f.Type, f.TypeName, strings.Join(f.FieldNames(), ".")) +}`, strings.Join(f.FieldNames(), ""), f.StructType(), f.TypeName, strings.Join(f.FieldNames(), ".")) } func readOptional(f fields.Field) string { @@ -18,8 +18,8 @@ func readOptional(f fields.Field) string { n := f.MaxDef() for def := 0; def < n; def++ { out += fmt.Sprintf(`case x.%s == nil: - return nil, []uint8{%d}, nil -`, nilField(def, f), def) + return nil, []uint8{%d}, nil + `, nilField(def, f), def) } var ptr string @@ -27,16 +27,32 @@ func readOptional(f fields.Field) string { if rts[len(rts)-1] == fields.Optional { ptr = "*" } + out += fmt.Sprintf(` default: - return []%s{%sx.%s}, []uint8{%d}, nil`, cleanTypeName(f.TypeName), ptr, nilField(n, f), n) + return []%s{%sx.%s}, []uint8{%d}, nil`, cleanTypeName(f.TypeName), ptr, nilField(n, f), n) return fmt.Sprintf(`func read%s(x %s) ([]%s, []uint8, []uint8) { - switch { - %s - } -}`, strings.Join(f.FieldNames(), ""), f.Type, cleanTypeName(f.TypeName), out) + switch { + %s + } + }`, strings.Join(f.FieldNames(), ""), f.StructType(), cleanTypeName(f.TypeName), out) } func cleanTypeName(s string) string { return strings.Replace(strings.Replace(s, "*", "", 1), "[]", "", 1) } + +func nilField(i int, f fields.Field) string { + var flds []string + var count int + for j, o := range f.RepetitionTypes() { + flds = append(flds, f.FieldNames()[j]) + if o == fields.Optional { + count++ + } + if count > i { + break + } + } + return strings.Join(flds, ".") +} diff --git a/internal/dremel/read_repeated.go b/internal/dremel/read_repeated.go index 79668c5..ce19814 100644 --- a/internal/dremel/read_repeated.go +++ b/internal/dremel/read_repeated.go @@ -70,7 +70,7 @@ func readRepeated(f fields.Field) string { return vals, defs, reps }`, strings.Join(f.FieldNames(), ""), - f.Type, + f.StructType(), cleanTypeName(f.TypeName), cleanTypeName(f.TypeName), doReadRepeated(f, 0, "x"), diff --git a/internal/dremel/read_test.go b/internal/dremel/read_test.go index f7439cd..2c867ef 100644 --- a/internal/dremel/read_test.go +++ b/internal/dremel/read_test.go @@ -1,431 +1,567 @@ package dremel_test -// func TestRead(t *testing.T) { -// testCases := []struct { -// name string -// f fields.Field -// result string -// }{ -// { -// name: "required and not nested", -// f: fields.Field{Type: "Person", TypeName: "int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Required}}, -// result: `func readID(x Person) int32 { -// return x.ID -// }`, -// }, -// { -// name: "optional and not nested", -// f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, -// result: `func readID(x Person) ([]int32, []uint8, []uint8) { -// switch { -// case x.ID == nil: -// return nil, []uint8{0}, nil -// default: -// return []int32{*x.ID}, []uint8{1}, nil -// } -// }`, -// }, -// { -// name: "required and nested", -// f: fields.Field{Type: "Person", TypeName: "int32", FieldNames: []string{"Other", "Hobby", "Difficulty"}, FieldTypes: []string{"Other", "Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Required}}, -// result: `func readOtherHobbyDifficulty(x Person) int32 { -// return x.Other.Hobby.Difficulty -// }`, -// }, -// { -// name: "optional and nested", -// f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"Hobby", "Difficulty"}, FieldTypes: []string{"Hobby", "int32"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional}}, -// result: `func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { -// switch { -// case x.Hobby == nil: -// return nil, []uint8{0}, nil -// case x.Hobby.Difficulty == nil: -// return nil, []uint8{1}, nil -// default: -// return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil -// } -// }`, -// }, -// { -// name: "mix of optional and required and nested", -// f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, -// result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Hobby == nil: -// return nil, []uint8{0}, nil -// default: -// return []string{x.Hobby.Name}, []uint8{1}, nil -// } -// }`, -// }, -// { -// name: "mix of optional and required and nested v2", -// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Hobby", "Name"}, FieldTypes: []string{"Hobby", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, -// result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Hobby.Name == nil: -// return nil, []uint8{0}, nil -// default: -// return []string{*x.Hobby.Name}, []uint8{1}, nil -// } -// }`, -// }, -// { -// name: "mix of optional and require and nested 3 deep", -// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required, fields.Optional}}, -// result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Friend == nil: -// return nil, []uint8{0}, nil -// case x.Friend.Hobby.Name == nil: -// return nil, []uint8{1}, nil -// default: -// return []string{*x.Friend.Hobby.Name}, []uint8{2}, nil -// } -// }`, -// }, -// { -// name: "mix of optional and require and nested 3 deep v2", -// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional}}, -// result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Friend.Hobby == nil: -// return nil, []uint8{0}, nil -// case x.Friend.Hobby.Name == nil: -// return nil, []uint8{1}, nil -// default: -// return []string{*x.Friend.Hobby.Name}, []uint8{2}, nil -// } -// }`, -// }, -// { -// name: "mix of optional and require and nested 3 deep v3", -// f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Required}}, -// result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Friend == nil: -// return nil, []uint8{0}, nil -// case x.Friend.Hobby == nil: -// return nil, []uint8{1}, nil -// default: -// return []string{x.Friend.Hobby.Name}, []uint8{2}, nil -// } -// }`, -// }, -// { -// name: "nested 3 deep all optional", -// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name"}, FieldTypes: []string{"Entity", "Item", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional}}, -// result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Friend == nil: -// return nil, []uint8{0}, nil -// case x.Friend.Hobby == nil: -// return nil, []uint8{1}, nil -// case x.Friend.Hobby.Name == nil: -// return nil, []uint8{2}, nil -// default: -// return []string{*x.Friend.Hobby.Name}, []uint8{3}, nil -// } -// }`, -// }, -// { -// name: "four deep", -// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}, -// result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Friend == nil: -// return nil, []uint8{0}, nil -// case x.Friend.Hobby == nil: -// return nil, []uint8{1}, nil -// case x.Friend.Hobby.Name == nil: -// return nil, []uint8{2}, nil -// case x.Friend.Hobby.Name.First == nil: -// return nil, []uint8{3}, nil -// default: -// return []string{*x.Friend.Hobby.Name.First}, []uint8{4}, nil -// } -// }`, -// }, -// { -// name: "four deep mixed", -// f: fields.Field{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, -// result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Friend.Hobby == nil: -// return nil, []uint8{0}, nil -// case x.Friend.Hobby.Name == nil: -// return nil, []uint8{1}, nil -// case x.Friend.Hobby.Name.First == nil: -// return nil, []uint8{2}, nil -// default: -// return []string{*x.Friend.Hobby.Name.First}, []uint8{3}, nil -// } -// }`, -// }, -// { -// name: "four deep mixed v2", -// f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, -// result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { -// switch { -// case x.Friend == nil: -// return nil, []uint8{0}, nil -// case x.Friend.Hobby == nil: -// return nil, []uint8{1}, nil -// case x.Friend.Hobby.Name == nil: -// return nil, []uint8{2}, nil -// default: -// return []string{x.Friend.Hobby.Name.First}, []uint8{3}, nil -// } -// }`, -// }, -// { -// name: "repeated", -// f: fields.Field{Type: "Person", TypeName: "string", FieldNames: []string{"Friends"}, FieldTypes: []string{"string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, -// result: `func readFriends(x Person) ([]string, []uint8, []uint8) { -// var vals []string -// var defs, reps []uint8 -// var lastRep uint8 +import ( + "fmt" + "go/format" + "testing" -// if len(x.Friends) == 0 { -// defs = append(defs, 0) -// reps = append(reps, lastRep) -// } else { -// for i0, x0 := range x.Friends { -// if i0 == 1 { -// lastRep = 1 -// } -// defs = append(defs, 1) -// reps = append(reps, lastRep) -// vals = append(vals, x0) -// } -// } + "github.com/parsyl/parquet/internal/dremel" + "github.com/parsyl/parquet/internal/fields" + "github.com/stretchr/testify/assert" +) -// return vals, defs, reps -// }`, -// }, -// { -// name: "readLinkFoward", -// f: fields.Field{Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, -// result: `func readLinkForward(x Document) ([]int64, []uint8, []uint8) { -// var vals []int64 -// var defs, reps []uint8 -// var lastRep uint8 +func TestRead(t *testing.T) { + testCases := []struct { + name string + structName string + f fields.Field + result string + }{ + { + name: "required and not nested", + f: fields.Field{ + FieldType: "int32", TypeName: "int32", FieldName: "ID", RepetitionType: fields.Required, + }, + result: `func readID(x Person) int32 { + return x.ID +}`, + }, + { + name: "optional and not nested", + ////f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, + f: fields.Field{ + FieldType: "int32", TypeName: "*int32", FieldName: "ID", RepetitionType: fields.Optional, + }, + result: `func readID(x Person) ([]int32, []uint8, []uint8) { + switch { + case x.ID == nil: + return nil, []uint8{0}, nil + default: + return []int32{*x.ID}, []uint8{1}, nil + } +}`, + }, + { + name: "required and nested", + f: fields.Field{ + FieldName: "Other", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Required}, + }}, + }, + }, + result: `func readOtherHobbyDifficulty(x Person) int32 { + return x.Other.Hobby.Difficulty +}`, + }, + { + name: "optional and nested", + f: fields.Field{ + FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int32", TypeName: "*int32", FieldName: "Difficulty", RepetitionType: fields.Optional}, + }, + }, + result: `func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { + switch { + case x.Hobby == nil: + return nil, []uint8{0}, nil + case x.Hobby.Difficulty == nil: + return nil, []uint8{1}, nil + default: + return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil + } +}`, + }, + { + name: "mix of optional and required and nested", + f: fields.Field{ + FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + }, + }, + result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Hobby == nil: + return nil, []uint8{0}, nil + default: + return []string{x.Hobby.Name}, []uint8{1}, nil + } +}`, + }, + { + name: "mix of optional and required and nested v2", + f: fields.Field{ + FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Optional}, + }, + }, + result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Hobby.Name == nil: + return nil, []uint8{0}, nil + default: + return []string{*x.Hobby.Name}, []uint8{1}, nil + } +}`, + }, + { + name: "mix of optional and require and nested 3 deep", + f: fields.Field{ + FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Friend == nil: + return nil, []uint8{0}, nil + case x.Friend.Hobby.Name == nil: + return nil, []uint8{1}, nil + default: + return []string{*x.Friend.Hobby.Name}, []uint8{2}, nil + } +}`, + }, + { + name: "mix of optional and require and nested 3 deep v2", + f: fields.Field{ + FieldName: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Friend.Hobby == nil: + return nil, []uint8{0}, nil + case x.Friend.Hobby.Name == nil: + return nil, []uint8{1}, nil + default: + return []string{*x.Friend.Hobby.Name}, []uint8{2}, nil + } +}`, + }, + { + name: "mix of optional and require and nested 3 deep v3", + f: fields.Field{ + FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + }}, + }, + }, + result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Friend == nil: + return nil, []uint8{0}, nil + case x.Friend.Hobby == nil: + return nil, []uint8{1}, nil + default: + return []string{x.Friend.Hobby.Name}, []uint8{2}, nil + } +}`, + }, + { + name: "nested 3 deep all optional", + f: fields.Field{ + FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func readFriendHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Friend == nil: + return nil, []uint8{0}, nil + case x.Friend.Hobby == nil: + return nil, []uint8{1}, nil + case x.Friend.Hobby.Name == nil: + return nil, []uint8{2}, nil + default: + return []string{*x.Friend.Hobby.Name}, []uint8{3}, nil + } +}`, + }, + { + name: "four deep", + f: fields.Field{ + FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, + result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Friend == nil: + return nil, []uint8{0}, nil + case x.Friend.Hobby == nil: + return nil, []uint8{1}, nil + case x.Friend.Hobby.Name == nil: + return nil, []uint8{2}, nil + case x.Friend.Hobby.Name.First == nil: + return nil, []uint8{3}, nil + default: + return []string{*x.Friend.Hobby.Name.First}, []uint8{4}, nil + } +}`, + }, + { + name: "four deep mixed", + f: fields.Field{ + FieldName: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, + result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Friend.Hobby == nil: + return nil, []uint8{0}, nil + case x.Friend.Hobby.Name == nil: + return nil, []uint8{1}, nil + case x.Friend.Hobby.Name.First == nil: + return nil, []uint8{2}, nil + default: + return []string{*x.Friend.Hobby.Name.First}, []uint8{3}, nil + } +}`, + }, + { + name: "four deep mixed v2", + f: fields.Field{ + FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "First", RepetitionType: fields.Required}, + }}, + }}, + }, + }, + result: `func readFriendHobbyNameFirst(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Friend == nil: + return nil, []uint8{0}, nil + case x.Friend.Hobby == nil: + return nil, []uint8{1}, nil + case x.Friend.Hobby.Name == nil: + return nil, []uint8{2}, nil + default: + return []string{x.Friend.Hobby.Name.First}, []uint8{3}, nil + } +}`, + }, + { + name: "repeated", + f: fields.Field{ + FieldType: "string", TypeName: "string", FieldName: "Friends", RepetitionType: fields.Repeated, + }, + result: `func readFriends(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 -// if x.Link == nil { -// defs = append(defs, 0) -// reps = append(reps, lastRep) -// } else { -// if len(x.Link.Forward) == 0 { -// defs = append(defs, 1) -// reps = append(reps, lastRep) -// } else { -// for i0, x0 := range x.Link.Forward { -// if i0 == 1 { -// lastRep = 1 -// } -// defs = append(defs, 2) -// reps = append(reps, lastRep) -// vals = append(vals, x0) -// } -// } -// } + if len(x.Friends) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Friends { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 1) + reps = append(reps, lastRep) + vals = append(vals, x0) + } + } -// return vals, defs, reps -// }`, -// }, -// { -// name: "readNamesLanguagesCode", -// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, -// result: `func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { -// var vals []string -// var defs, reps []uint8 -// var lastRep uint8 + return vals, defs, reps +}`, + }, + { + name: "readLinkFoward", + structName: "Document", + f: fields.Field{ + FieldName: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int64", TypeName: "int64", FieldName: "Forward", RepetitionType: fields.Repeated}, + }, + }, + result: `func readLinkForward(x Document) ([]int64, []uint8, []uint8) { + var vals []int64 + var defs, reps []uint8 + var lastRep uint8 -// if len(x.Names) == 0 { -// defs = append(defs, 0) -// reps = append(reps, lastRep) -// } else { -// for i0, x0 := range x.Names { -// if i0 == 1 { -// lastRep = 1 -// } -// if len(x0.Languages) == 0 { -// defs = append(defs, 1) -// reps = append(reps, lastRep) -// } else { -// for i1, x1 := range x0.Languages { -// if i1 == 1 { -// lastRep = 2 -// } -// defs = append(defs, 2) -// reps = append(reps, lastRep) -// vals = append(vals, x1.Code) -// } -// } -// } -// } + if x.Link == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Link.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Link.Forward { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0) + } + } + } -// return vals, defs, reps -// }`, -// }, -// { -// name: "readNamesLanguagesCountry", -// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, -// result: `func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { -// var vals []string -// var defs, reps []uint8 -// var lastRep uint8 + return vals, defs, reps +}`, + }, + { + name: "readNamesLanguagesCode", + structName: "Document", + f: fields.Field{ + FieldName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Code", RepetitionType: fields.Required}, + }}, + }, + }, + result: `func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 -// if len(x.Names) == 0 { -// defs = append(defs, 0) -// reps = append(reps, lastRep) -// } else { -// for i0, x0 := range x.Names { -// if i0 == 1 { -// lastRep = 1 -// } -// if len(x0.Languages) == 0 { -// defs = append(defs, 1) -// reps = append(reps, lastRep) -// } else { -// for i1, x1 := range x0.Languages { -// if i1 == 1 { -// lastRep = 2 -// } -// if x1.Country == nil { -// defs = append(defs, 2) -// reps = append(reps, lastRep) -// } else { -// defs = append(defs, 3) -// reps = append(reps, lastRep) -// vals = append(vals, *x1.Country) -// } -// } -// } -// } -// } + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if len(x0.Languages) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Languages { + if i1 == 1 { + lastRep = 2 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x1.Code) + } + } + } + } -// return vals, defs, reps -// }`, -// }, -// { -// name: "readNamesURL", -// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Names", "URL"}, FieldTypes: []string{"Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Optional}}, -// result: `func readNamesURL(x Document) ([]string, []uint8, []uint8) { -// var vals []string -// var defs, reps []uint8 -// var lastRep uint8 + return vals, defs, reps +}`, + }, + { + name: "readNamesLanguagesCountry", + structName: "Document", + f: fields.Field{ + FieldName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Country", RepetitionType: fields.Optional}, + }}, + }, + }, + result: `func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 -// if len(x.Names) == 0 { -// defs = append(defs, 0) -// reps = append(reps, lastRep) -// } else { -// for i0, x0 := range x.Names { -// if i0 == 1 { -// lastRep = 1 -// } -// if x0.URL == nil { -// defs = append(defs, 1) -// reps = append(reps, lastRep) -// } else { -// defs = append(defs, 2) -// reps = append(reps, lastRep) -// vals = append(vals, *x0.URL) -// } -// } -// } + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if len(x0.Languages) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Languages { + if i1 == 1 { + lastRep = 2 + } + if x1.Country == nil { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, *x1.Country) + } + } + } + } + } -// return vals, defs, reps -// }`, -// }, -// { -// name: "run of required", -// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Friends", "Name", "Last"}, FieldTypes: []string{"Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Required, fields.Required}}, -// result: `func readFriendsNameLast(x Document) ([]string, []uint8, []uint8) { -// var vals []string -// var defs, reps []uint8 -// var lastRep uint8 + return vals, defs, reps +}`, + }, + { + name: "readNamesURL", + structName: "Document", + f: fields.Field{ + FieldName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "URL", RepetitionType: fields.Optional}, + }, + }, + result: `func readNamesURL(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 -// if len(x.Friends) == 0 { -// defs = append(defs, 0) -// reps = append(reps, lastRep) -// } else { -// for i0, x0 := range x.Friends { -// if i0 == 1 { -// lastRep = 1 -// } -// defs = append(defs, 1) -// reps = append(reps, lastRep) -// vals = append(vals, x0.Name.Last) -// } -// } + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if x0.URL == nil { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, *x0.URL) + } + } + } -// return vals, defs, reps -// }`, -// }, -// { -// name: "run of required v2", -// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Friend", "Name", "Aliases"}, FieldTypes: []string{"Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Required, fields.Repeated}}, -// result: `func readFriendNameAliases(x Document) ([]string, []uint8, []uint8) { -// var vals []string -// var defs, reps []uint8 -// var lastRep uint8 + return vals, defs, reps +}`, + }, + { + name: "run of required", + structName: "Document", + f: fields.Field{ + FieldName: "Friends", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Last", RepetitionType: fields.Required}, + }}, + }, + }, + result: `func readFriendsNameLast(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 -// if len(x.Friend.Name.Aliases) == 0 { -// defs = append(defs, 0) -// reps = append(reps, lastRep) -// } else { -// for i0, x0 := range x.Friend.Name.Aliases { -// if i0 == 1 { -// lastRep = 1 -// } -// defs = append(defs, 1) -// reps = append(reps, lastRep) -// vals = append(vals, x0) -// } -// } + if len(x.Friends) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Friends { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 1) + reps = append(reps, lastRep) + vals = append(vals, x0.Name.Last) + } + } -// return vals, defs, reps -// }`, -// }, -// { -// name: "run of required v3", -// f: fields.Field{Type: "Document", TypeName: "string", FieldNames: []string{"Other", "Friends", "Name", "Middle"}, FieldTypes: []string{"Other", "Friend", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required, fields.Required}}, -// result: `func readOtherFriendsNameMiddle(x Document) ([]string, []uint8, []uint8) { -// var vals []string -// var defs, reps []uint8 -// var lastRep uint8 + return vals, defs, reps +}`, + }, + { + name: "run of required v2", + structName: "Document", + f: fields.Field{ + FieldName: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Aliases", RepetitionType: fields.Repeated}, + }}, + }, + }, + result: `func readFriendNameAliases(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 -// if x.Other == nil { -// defs = append(defs, 0) -// reps = append(reps, lastRep) -// } else { -// if len(x.Other.Friends) == 0 { -// defs = append(defs, 1) -// reps = append(reps, lastRep) -// } else { -// for i0, x0 := range x.Other.Friends { -// if i0 == 1 { -// lastRep = 1 -// } -// defs = append(defs, 2) -// reps = append(reps, lastRep) -// vals = append(vals, x0.Name.Middle) -// } -// } -// } + if len(x.Friend.Name.Aliases) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Friend.Name.Aliases { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 1) + reps = append(reps, lastRep) + vals = append(vals, x0) + } + } -// return vals, defs, reps -// }`, -// }, -// } + return vals, defs, reps +}`, + }, + { + name: "run of required v3", + structName: "Document", + f: fields.Field{ + FieldName: "Other", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Friends", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldName: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Middle", RepetitionType: fields.Required}, + }}, + }}, + }, + }, + result: `func readOtherFriendsNameMiddle(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 -// for i, tc := range testCases { -// t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { -// s := dremel.Read(tc.f) -// gocode, err := format.Source([]byte(s)) -// assert.NoError(t, err) -// assert.Equal(t, tc.result, string(gocode)) -// }) -// } -// } + if x.Other == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Other.Friends) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Other.Friends { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Name.Middle) + } + } + } + + return vals, defs, reps +}`, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { + sn := tc.structName + if sn == "" { + sn = "Person" + } + flds := fields.Field{Type: sn, Children: []fields.Field{tc.f}}.Fields() + s := dremel.Read(flds[len(flds)-1]) + gocode, err := format.Source([]byte(s)) + assert.NoError(t, err) + assert.Equal(t, tc.result, string(gocode)) + }) + } +} diff --git a/internal/dremel/write_optional.go b/internal/dremel/write_optional.go index d0f98b6..244629e 100644 --- a/internal/dremel/write_optional.go +++ b/internal/dremel/write_optional.go @@ -2,7 +2,6 @@ package dremel import ( "bytes" - "fmt" "log" "strings" "text/template" @@ -55,7 +54,7 @@ var ( type writeInput struct { fields.Field - Cases []ifElses + Cases []defCases FuncName string } @@ -64,18 +63,11 @@ type ifElse struct { Val string } -// todo: rename to defCase -type ifElses struct { - Def int - MaxDef bool - If ifElse - ElseIf []ifElse - Else *ifElse - Val *string -} - -func (i ifElses) UseIf() bool { - return i.Val == nil +type defCases struct { + Def int + MaxDef bool + Val *string + RepCases []string } func writeOptional(f fields.Field) string { @@ -93,71 +85,13 @@ func writeOptional(f fields.Field) string { return string(buf.Bytes()) } -func writeOptionalCases(f fields.Field) []ifElses { - var out []ifElses +func writeOptionalCases(f fields.Field) []defCases { md := f.MaxDef() - for def := 1; def <= md; def++ { - if f.NthChild == 0 || def == md { - s := f.Init(def, 0) - out = append(out, ifElses{Def: def, Val: &s, MaxDef: def == md}) - } - } - return out -} - -// ifelses returns an if else block for the given definition and repetition level -func ifelses(def, rep int, fld fields.Field) ifElses { - var flds []fields.Field - for _, f := range fld.Chain() { - if f.Optional() && f.NthChild == 0 { - flds = append(flds, f) - } - } - - out := ifElses{ - If: ifElse{ - Cond: fmt.Sprintf("x.%s == nil", strings.Join(flds[0].FieldNames(), ".")), - Val: flds[0].Init(def, rep), - }, - } - - if len(flds) > 1 { - out.Else = &ifElse{ - Val: flds[len(flds)-1].Init(def, rep), - } - } - - if len(flds) > 2 { - for _, f := range flds[1 : len(flds)-2] { - out.ElseIf = append(out.ElseIf, ifElse{ - Cond: fmt.Sprintf("x.%s == nil", strings.Join(f.FieldNames(), ".")), - Val: f.Init(def, rep), - }) - } + cases := writeCases(f) + out := make([]defCases, len(cases)) + for i, def := range cases { + s := f.Init(def, 0) + out[i] = defCases{Def: def, Val: &s, MaxDef: def == md} } - - return out -} - -// optionals returns a slice that contains the index of -// each optional field. -func optionals(def int, f fields.Field) []int { - var out []int - // di := f.DefIndex(def) - // seen := append(f.Seen[:0:0], f.Seen...) - - // if len(seen) > di+1 { - // seen = seen[:di+1] - // } - - // for i, rt := range f.RepetitionTypes[:di+1] { - // if rt >= fields.Optional { - // out = append(out, i) - // } - // if i > len(seen)-1 && rt >= fields.Optional { - // break - // } - // } - return out } diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go index 4a4e1b3..6824bfc 100644 --- a/internal/dremel/write_repeated.go +++ b/internal/dremel/write_repeated.go @@ -21,13 +21,19 @@ type defCase struct { Field fields.Field } +type writeRepeatedInput struct { + Field fields.Field + Defs []int + Func string +} + func init() { funcs := template.FuncMap{ "removeStar": func(s string) string { return strings.Replace(strings.Replace(s, "*", "", 1), "[]", "", 1) }, - "newDefCase": func(def int, seen []fields.RepetitionType, f fields.Field) defCase { - return defCase{Def: def, Seen: seen, Field: f} + "newDefCase": func(def int, f fields.Field) defCase { + return defCase{Def: def, Field: f} }, "init": initRepeated, "getRep": func(def int, f fields.Field) int { @@ -78,15 +84,15 @@ func init() { defSwitchTpl := `{{define "defSwitch"}}switch def { {{range $i, $def := .Defs}} case {{$def}}: - {{ template "defCase" newDefCase $def $.Seen $.Field}}{{if eq $def $.Field.MaxDef}} + {{ template "defCase" newDefCase $def $.Field}}{{if eq $def $.Field.MaxDef}} nVals++{{end}}{{end}} }{{end}}` - defCaseTpl := `{{define "defCase"}}{{if eq .Def .Field.MaxDef}}{{template "repSwitch" .}}{{else}}{{$rep:=getRep .Def .Field}}{{init .Def $rep .Seen .Field}}{{end}}{{end}}` + defCaseTpl := `{{define "defCase"}}{{if eq .Def .Field.MaxDef}}{{template "repSwitch" .}}{{else}}{{$rep:=getRep .Def .Field}}{{init .Def $rep .Field}}{{end}}{{end}}` repSwitchTpl := `{{define "repSwitch"}}switch rep { -{{range $case := .Field.RepCases $.Seen}}{{$case.Case}} -{{init $.Def $case.Rep $.Seen $.Field}} +{{range $case := .Field.RepCases}}{{$case.Case}} +{{init $.Def $case.Rep $.Field}} {{end}} } {{end}}` for _, t := range []string{defCaseTpl, defSwitchTpl, repSwitchTpl} { @@ -97,18 +103,6 @@ func init() { } } -type writeRepeatedInput struct { - Field fields.Field - Defs []int - Func string -} - -func writeRequired(f fields.Field) string { - return fmt.Sprintf(`func %s(x *%s, vals []%s) { - x.%s = vals[0] -}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), f.StructType(), f.TypeName, strings.Join(f.FieldNames(), ".")) -} - func writeRepeated(f fields.Field) string { wi := writeRepeatedInput{ Field: f, @@ -117,11 +111,14 @@ func writeRepeated(f fields.Field) string { } var buf bytes.Buffer - writeRepeatedTpl.Execute(&buf, wi) + if err := writeRepeatedTpl.Execute(&buf, wi); err != nil { + fmt.Println(err) + return "" + } return string(buf.Bytes()) } -func initRepeated(def, rep int, seen fields.RepetitionTypes, f fields.Field) string { +func initRepeated(def, rep int, f fields.Field) string { md := int(f.MaxDef()) rt := f.RepetitionTypes().Def(def) @@ -129,37 +126,22 @@ func initRepeated(def, rep int, seen fields.RepetitionTypes, f fields.Field) str rep = def } - if useIfElse(def, rep, f) { - ie := ifelses(def, rep, f) - var buf bytes.Buffer - if err := ifTpl.Execute(&buf, ie); err != nil { - log.Fatalf("unable to execute ifTpl: %s", err) - } - return string(buf.Bytes()) - } - return f.Init(def, rep) } -func useIfElse(def, rep int, f fields.Field) bool { - return f.NthChild == 0 && f.Parent.Parent != nil && f.Optional() -} - func writeCases(f fields.Field) []int { - return nil -} - -func nilField(i int, f fields.Field) string { - var flds []string - var count int - for j, o := range f.RepetitionTypes() { - flds = append(flds, f.FieldNames()[j]) - if o == fields.Optional { - count++ - } - if count > i { - break + var out []int + md := f.MaxDef() + chain := fields.Reverse(f.Chain()) + start := 1 + for _, f := range chain { + if f.RepetitionType != fields.Required && f.Defined && start < md { + start++ } } - return strings.Join(flds, ".") + + for def := start; def <= md; def++ { + out = append(out, def) + } + return out } diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index 562440d..716346e 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -12,9 +12,10 @@ import ( func TestWrite(t *testing.T) { testCases := []struct { - name string - field fields.Field - result string + structName string + name string + field fields.Field + result string }{ { name: "required and not nested", @@ -48,7 +49,8 @@ func TestWrite(t *testing.T) { {FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Required}, }}, - }}, + }, + }, result: `func writeOtherHobbyDifficulty(x *Person, vals []int32) { x.Other.Hobby.Difficulty = vals[0] }`, @@ -154,8 +156,8 @@ func TestWrite(t *testing.T) { name: "mix of optional and required and nested 3 deep v2 and seen by optional field", field: fields.Field{ FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, }}, }, @@ -163,10 +165,8 @@ func TestWrite(t *testing.T) { result: `func writeFriendHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { - case 1: - x.Friend.Hobby = &Item{} case 2: - x.Friend.Hobby = &Item{Name: pstring(vals[0])} + x.Friend.Hobby.Name = pstring(vals[0]) return 1, 1 } @@ -244,7 +244,15 @@ func TestWrite(t *testing.T) { }, { name: "four deep", - //fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}}, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -264,35 +272,25 @@ func TestWrite(t *testing.T) { }, { name: "four deep and seen by optional field", - // fields: []fields.Field{ - // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { - case 1: - if x.Friend == nil { - x.Friend = &Entity{} - } case 2: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{}} - } else { - x.Friend.Hobby = &Item{} - } + x.Friend.Hobby = &Item{} case 3: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{Name: &Name{}}} - } else { - x.Friend.Hobby = &Item{Name: &Name{}} - } + x.Friend.Hobby = &Item{Name: &Name{}} case 4: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{Name: &Name{First: pstring(vals[0])}}} - } else { - x.Friend.Hobby = &Item{Name: &Name{First: pstring(vals[0])}} - } + x.Friend.Hobby = &Item{Name: &Name{First: pstring(vals[0])}} return 1, 1 } @@ -301,7 +299,15 @@ func TestWrite(t *testing.T) { }, { name: "four deep mixed", - //fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}}, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -319,10 +325,16 @@ func TestWrite(t *testing.T) { }, { name: "four deep mixed and seen by a required sub-field", - // fields: []fields.Field{ - // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional}}, - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Required, fields.Optional, fields.Optional, fields.Optional}}, - // }, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Required}, + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Optional}, + }}, + }}, + }, + }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -340,7 +352,15 @@ func TestWrite(t *testing.T) { }, { name: "four deep mixed v2", - //fields: []fields.Field{{Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}}, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Required}, + }}, + }}, + }, + }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { @@ -362,25 +382,23 @@ func TestWrite(t *testing.T) { // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, // }, + field: fields.Field{ + FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "First", RepetitionType: fields.Required}, + }}, + }}, + }, + }, result: `func writeFriendHobbyNameFirst(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { - case 1: - if x.Friend == nil { - x.Friend = &Entity{} - } case 2: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{}} - } else { - x.Friend.Hobby = &Item{} - } + x.Friend.Hobby = &Item{} case 3: - if x.Friend == nil { - x.Friend = &Entity{Hobby: &Item{Name: &Name{First: vals[0]}}} - } else { - x.Friend.Hobby = &Item{Name: &Name{First: vals[0]}} - } + x.Friend.Hobby = &Item{Name: &Name{First: vals[0]}} return 1, 1 } @@ -388,11 +406,14 @@ func TestWrite(t *testing.T) { }`, }, { - name: "writeLinkBackward", - // fields: []fields.Field{ - // {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Backward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - // }, - result: `func writeLinkBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { + name: "writeLinkBackward", + structName: "Document", + field: fields.Field{ + FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Backward", RepetitionType: fields.Repeated}, + }, + }, + result: `func writeLinkBackward(x *Document, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -412,7 +433,7 @@ func TestWrite(t *testing.T) { case 2: switch rep { case 0: - x.Link = &Link{Backward: []int64{vals[nVals]}} + x.Link = &Link{Backward: []string{vals[nVals]}} case 1: x.Link.Backward = append(x.Link.Backward, vals[nVals]) } @@ -424,12 +445,15 @@ func TestWrite(t *testing.T) { }`, }, { - name: "writeLinkFoward", - // fields: []fields.Field{ - // {FieldNames: []string{"Link", "Backward"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - // {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - // }, - result: `func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { + name: "writeLinkFoward", + structName: "Document", + field: fields.Field{ + FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Backward", RepetitionType: fields.Repeated}, + {FieldType: "string", TypeName: "string", FieldName: "Forward", RepetitionType: fields.Repeated}, + }, + }, + result: `func writeLinkForward(x *Document, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -446,7 +470,9 @@ func TestWrite(t *testing.T) { switch def { case 2: switch rep { - default: + case 0: + x.Link.Forward = []string{vals[nVals]} + case 1: x.Link.Forward = append(x.Link.Forward, vals[nVals]) } nVals++ @@ -457,10 +483,15 @@ func TestWrite(t *testing.T) { }`, }, { - name: "writeNamesLanguagesCode", - // fields: []fields.Field{ - // {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - // }, + name: "writeNamesLanguagesCode", + structName: "Document", + field: fields.Field{ + FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "Language", FieldName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Code", RepetitionType: fields.Required}, + }}, + }, + }, result: `func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 2) @@ -500,6 +531,15 @@ func TestWrite(t *testing.T) { // {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, // {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, // }, + structName: "Document", + field: fields.Field{ + FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "Language", FieldName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Code", RepetitionType: fields.Required}, + {FieldType: "string", TypeName: "*string", FieldName: "Country", RepetitionType: fields.Optional}, + }}, + }, + }, result: `func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 2) @@ -517,7 +557,7 @@ func TestWrite(t *testing.T) { switch def { case 3: switch rep { - default: + case 0, 2: x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) } nVals++ @@ -528,10 +568,13 @@ func TestWrite(t *testing.T) { }`, }, { - name: "writeFriendsID", - // fields: []fields.Field{ - // {Type: "Person", FieldNames: []string{"Friends", "ID"}, FieldTypes: []string{"Being", "int32"}, TypeName: "int32", FieldType: "Int32OptionalField", ParquetType: "Int32Type", Category: "numericOptional", RepetitionTypes: []fields.RepetitionType{2, 0}}, - // }, + name: "writeFriendsID", + structName: "Person", + field: fields.Field{ + FieldName: "Friends", FieldType: "Being", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "int32", TypeName: "int32", FieldName: "ID", RepetitionType: fields.Required}, + }, + }, result: `func writeFriendsID(x *Person, vals []int32, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -562,10 +605,11 @@ func TestWrite(t *testing.T) { }`, }, { - name: "repeated primitive", - // fields: []fields.Field{ - // {Type: "Document", TypeName: "int64", FieldNames: []string{"LuckyNumbers"}, FieldTypes: []string{"int64"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated}}, - // }, + name: "repeated primitive", + structName: "Document", + field: fields.Field{ + FieldName: "LuckyNumbers", TypeName: "int64", FieldType: "int64", RepetitionType: fields.Repeated, + }, result: `func writeLuckyNumbers(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -596,12 +640,15 @@ func TestWrite(t *testing.T) { }`, }, { - name: "repeated field not handled by previous repeated field", - // fields: []fields.Field{ - // {FieldNames: []string{"Link", "ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Required}}, - // {Type: "Document", TypeName: "int64", FieldNames: []string{"Link", "Forward"}, FieldTypes: []string{"Link", "int64"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated}}, - // }, - result: `func writeLinkForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { + name: "repeated field not handled by previous repeated field", + structName: "Document", + field: fields.Field{ + FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "ID", RepetitionType: fields.Required}, + {FieldType: "string", TypeName: "string", FieldName: "Forward", RepetitionType: fields.Repeated}, + }, + }, + result: `func writeLinkForward(x *Document, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -616,18 +663,10 @@ func TestWrite(t *testing.T) { ind.rep(rep) switch def { - case 1: - if x.Link == nil { - x.Link = &Link{} - } case 2: switch rep { case 0: - if x.Link == nil { - x.Link = &Link{Forward: []int64{vals[nVals]}} - } else { - x.Link.Forward = append(x.Link.Forward, vals[nVals]) - } + x.Link.Forward = []string{vals[nVals]} case 1: x.Link.Forward = append(x.Link.Forward, vals[nVals]) } @@ -639,11 +678,16 @@ func TestWrite(t *testing.T) { }`, }, { - name: "nested 2 deep", - // fields: []fields.Field{ - // {FieldNames: []string{"Hobby", "Skills", "Name"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, - // {Type: "Person", TypeName: "string", FieldNames: []string{"Hobby", "Skills", "Difficulty"}, FieldTypes: []string{"Hobby", "Skill", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Repeated, fields.Required}}, - // }, + name: "nested 2 deep", + structName: "Person", + field: fields.Field{ + FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Skills", FieldType: "Skill", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + {FieldType: "string", TypeName: "string", FieldName: "Difficulty", RepetitionType: fields.Required}, + }}, + }, + }, result: `func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -661,9 +705,7 @@ func TestWrite(t *testing.T) { switch def { case 2: switch rep { - case 0: - x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] - case 1: + case 0, 1: x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] } nVals++ @@ -677,9 +719,13 @@ func TestWrite(t *testing.T) { for i, tc := range testCases { t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { - flds := fields.Field{Type: "Person", Children: []fields.Field{tc.field}}.Fields() - s := dremel.Write(flds[len(flds)-1]) - fmt.Println(s) + ty := tc.structName + if ty == "" { + ty = "Person" + } + flds := fields.Field{Type: ty, Children: []fields.Field{tc.field}}.Fields() + f := flds[len(flds)-1] + s := dremel.Write(f) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.result, string(gocode)) diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 60cc166..30847f2 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -21,7 +21,7 @@ type Field struct { Embedded bool Children []Field NthChild int - defined bool + Defined bool } type input struct { @@ -61,10 +61,20 @@ func (f Field) Chain() []Field { for fld := f.Parent; fld != nil; fld = fld.Parent { out = append(out, *fld) } + var defined bool + for i, fld := range out { + fld.Defined = defined + out[i] = fld + if fld.Parent != nil && fld.NthChild > 0 { + fld.Parent.Defined = true + defined = true + } + } + return out } -func reverse(out []Field) []Field { +func Reverse(out []Field) []Field { for i, j := 0, len(out)-1; i < j; i, j = i+1, j-1 { out[i], out[j] = out[j], out[i] } @@ -73,7 +83,7 @@ func reverse(out []Field) []Field { func (f Field) FieldNames() []string { var out []string - for _, fld := range reverse(f.Chain()) { + for _, fld := range Reverse(f.Chain()) { if fld.FieldName != "" { out = append(out, fld.FieldName) } @@ -83,7 +93,7 @@ func (f Field) FieldNames() []string { func (f Field) FieldTypes() []string { var out []string - for _, fld := range reverse(f.Chain()) { + for _, fld := range Reverse(f.Chain()) { if fld.FieldType != "" { out = append(out, fld.FieldType) } @@ -93,7 +103,7 @@ func (f Field) FieldTypes() []string { func (f Field) ColumnNames() []string { var out []string - for _, fld := range reverse(f.Chain()) { + for _, fld := range Reverse(f.Chain()) { if fld.ColumnName != "" { out = append(out, fld.ColumnName) } @@ -103,17 +113,17 @@ func (f Field) ColumnNames() []string { func (f Field) RepetitionTypes() RepetitionTypes { var out []RepetitionType - for _, fld := range reverse(f.Chain()) { + for _, fld := range Reverse(f.Chain()) { out = append(out, fld.RepetitionType) } - return out + return out[1:] } // DefIndex calculates the index of the // nested field with the given definition level. func (f Field) DefIndex(def int) int { var count, i int - for _, fld := range reverse(f.Chain()) { + for _, fld := range Reverse(f.Chain()) { if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { count++ } @@ -129,7 +139,7 @@ func (f Field) DefIndex(def int) int { // level for the nested field. func (f Field) MaxDef() int { var out int - for _, fld := range reverse(f.Chain()) { + for _, fld := range Reverse(f.Chain()) { if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { out++ } @@ -141,7 +151,28 @@ func (f Field) MaxDef() int { // level for the nested field. func (f Field) MaxRep() int { var out int - for _, fld := range reverse(f.Chain()) { + for _, fld := range Reverse(f.Chain()) { + if fld.RepetitionType == Repeated { + out++ + } + } + return out +} + +// MaxRepForDef cacluates the largest possible repetition +// level for the nested field at the given definition level. +func (f Field) MaxRepForDef(def int) int { + var out int + var defs int + for _, fld := range Reverse(f.Chain()) { + if fld.RepetitionType == Repeated || fld.RepetitionType == Optional { + defs++ + } + + if defs == def { + return out + } + if fld.RepetitionType == Repeated { out++ } @@ -159,10 +190,11 @@ type RepCase struct { // RepCases returns a RepCase slice based on the field types and // what sub-fields have already been seen. -func (f Field) RepCases(seen RepetitionTypes) []RepCase { +func (f Field) RepCases() []RepCase { mr := int(f.MaxRep()) - if mr == int(seen.MaxRep()) { - return []RepCase{{Case: "default:"}} + + if f.RepetitionType != Repeated && f.Parent != nil && f.Parent.RepetitionType == Repeated && f.Parent.Defined { + return []RepCase{{Case: fmt.Sprintf("case 0, %d:", mr)}} } var out []RepCase @@ -198,7 +230,7 @@ func (f Field) NilField(n int) (string, RepetitionType, int, int) { // Child returns a sub-field based on i func (f Field) Child(i int) Field { - return reverse(f.Chain())[i] + return Reverse(f.Chain())[i] } // Repeated wraps RepetitionTypes.Repeated() @@ -217,7 +249,7 @@ func (f Field) Required() bool { } func (f Field) rightComplete(fld Field, i, def, rep, maxDef, maxRep, defs, reps int) bool { - if fld.RepetitionType == Optional && rep == 0 && !fld.defined { + if fld.RepetitionType == Optional && rep == 0 && !fld.Defined { return true } @@ -230,7 +262,7 @@ func (f Field) rightComplete(fld Field, i, def, rep, maxDef, maxRep, defs, reps } //if rep == 0 && fld.RepetitionType != Required && (fld.RepetitionType == Repeated || f.RepetitionType == Repeated) { - if rep == 0 && fld.RepetitionType == Repeated && !fld.defined { + if rep == 0 && fld.RepetitionType == Repeated && !fld.Defined { return true } @@ -250,16 +282,8 @@ func (f Field) Init(def, rep int) string { left, right := "%s", "%s" chain := f.Chain() - var defined bool - for i, fld := range chain { - fld.defined = defined - chain[i] = fld - if fld.Parent != nil && fld.NthChild > 0 { - defined = true - } - } - chain = reverse(chain) + chain = Reverse(chain) var i int for _, fld = range chain { @@ -297,9 +321,9 @@ func (f Field) Init(def, rep int) string { } left = fmt.Sprintf(left, "") - defs = 0 + for j, fld := range chain[i:] { - if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { + if j > 0 && (fld.RepetitionType == Optional || fld.RepetitionType == Repeated) { defs++ } @@ -310,12 +334,14 @@ func (f Field) Init(def, rep int) string { switch fld.RepetitionType { case Required: if fld.Primitive() { - if fld.Parent.RepetitionType == Repeated && rep < maxRep { //need one more case: + if (fld.Parent.Parent == nil || fld.Parent.Defined) && fld.Parent.RepetitionType == Repeated && rep == 0 { //Should this be a check for repated anywhere in the full chain? + right = fmt.Sprintf(right, "vals[nVals]%s") + } else if (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { + right = fmt.Sprintf(right, "vals[0]%s") + } else if fld.Parent.RepetitionType == Repeated && rep < maxRep { //need one more case: right = fmt.Sprintf(right, fmt.Sprintf("{%s: vals[nVals]}%%s", fld.FieldName)) } else if fld.Parent.RepetitionType == Repeated { right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[nVals]%%s", fld.FieldName)) - } else if f.NthChild > 0 { - right = fmt.Sprintf(right, "vals[0]%s") } else { right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[0]%%s", fld.FieldName)) } @@ -350,7 +376,7 @@ func (f Field) Init(def, rep int) string { if fld.Primitive() { if rep == 0 && fld.Parent.RepetitionType == Repeated { right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{vals[nVals]}}%%s", fld.FieldName, fld.FieldType)) - } else if fld.Parent.Parent == nil && rep == 0 { + } else if (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.FieldType)) } else if rep == 0 { right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{vals[nVals]}%%s", fld.FieldName, fld.FieldType)) @@ -374,7 +400,7 @@ func (f Field) Init(def, rep int) string { } } - if defs >= def && fld.RepetitionType != Required && def < maxDef { + if def != maxDef && defs >= def { break } } diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 27f526d..25b03d8 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -353,6 +353,18 @@ func TestInit(t *testing.T) { def: 1, expected: "x.Friend.Hobby = &Item{}", }, + { + fields: []fields.Field{ + {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + }}, + }}, + }, + def: 2, + expected: "x.Friend.Hobby = &Item{}", + }, { fields: []fields.Field{ {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ @@ -431,6 +443,20 @@ func TestInit(t *testing.T) { def: 3, expected: "x.Friend.Hobby.Name.First = pstring(vals[0])", }, + { + fields: []fields.Field{ + { + FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Skills", FieldType: "Skill", RepetitionType: fields.Repeated, Children: []fields.Field{ + {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + {FieldType: "string", TypeName: "string", FieldName: "Difficulty", RepetitionType: fields.Required}, + }}, + }, + }, + }, + def: 2, + expected: "x.Hobby.Skills[ind[0]].Difficulty = vals[nVals]", + }, { fields: []fields.Field{ {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ @@ -442,6 +468,17 @@ func TestInit(t *testing.T) { def: 2, expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", }, + { + fields: []fields.Field{ + {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {FieldName: "Name", FieldType: "string", RepetitionType: fields.Repeated}, + {FieldName: "Forward", FieldType: "string", RepetitionType: fields.Repeated}, + }}, + }, + rep: 0, + def: 2, + expected: "x.Link.Forward = []string{vals[nVals]}", + }, { fields: []fields.Field{ {FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, @@ -501,6 +538,7 @@ func TestInit(t *testing.T) { fields := fields.Field{Children: tc.fields}.Fields() field := fields[len(fields)-1] s := field.Init(tc.def, tc.rep) + fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) From 6d4f3e95af932b1b118b906a2dfca5119164e7b4 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Sat, 12 Jun 2021 09:44:06 -0600 Subject: [PATCH 10/25] wip --- cmd/parquetgen/main.go | 9 +- internal/dremel/testcases/doc/generated.go | 694 +----------------- internal/dremel/testcases/person/generated.go | 240 ++---- internal/fields/fields.go | 4 + internal/gen/gen.go | 53 +- internal/gen/template.go | 8 +- internal/gen/template_bool.go | 4 +- internal/gen/template_bool_optional.go | 4 +- internal/gen/template_optional.go | 10 +- internal/gen/template_required.go | 10 +- internal/gen/template_string.go | 10 +- internal/gen/template_string_optional.go | 10 +- internal/parse/fields_test.go | 13 +- 13 files changed, 110 insertions(+), 959 deletions(-) diff --git a/cmd/parquetgen/main.go b/cmd/parquetgen/main.go index ef37ee7..8865600 100644 --- a/cmd/parquetgen/main.go +++ b/cmd/parquetgen/main.go @@ -32,14 +32,19 @@ func main() { log.Fatal("choose -parquet or -input, but not both") } + var err error if *metadata { readFooter() } else if *pageheaders { readPageHeaders() } else if *parq == "" { - gen.FromStruct(*pth, *outPth, *typ, *pkg, *imp, *ignore) + err = gen.FromStruct(*pth, *outPth, *typ, *pkg, *imp, *ignore) } else { - gen.FromParquet(*parq, *structOutPth, *outPth, *typ, *pkg, *imp, *ignore) + err = gen.FromParquet(*parq, *structOutPth, *outPth, *typ, *pkg, *imp, *ignore) + } + + if err != nil { + log.Fatal(err) } } diff --git a/internal/dremel/testcases/doc/generated.go b/internal/dremel/testcases/doc/generated.go index cf07e11..55fa643 100644 --- a/internal/dremel/testcases/doc/generated.go +++ b/internal/dremel/testcases/doc/generated.go @@ -11,9 +11,6 @@ import ( "github.com/parsyl/parquet" sch "github.com/parsyl/parquet/schema" - - "math" - "sort" ) type compression int @@ -44,323 +41,7 @@ type ParquetWriter struct { } func Fields(compression compression) []Field { - return []Field{ - NewInt64Field(readDocID, writeDocID, []string{"docid"}, fieldCompression(compression)), - NewInt64OptionalField(readLinksBackward, writeLinksBackward, []string{"link", "backward"}, []int{1, 2}, optionalFieldCompression(compression)), - NewInt64OptionalField(readLinksForward, writeLinksForward, []string{"link", "forward"}, []int{1, 2}, optionalFieldCompression(compression)), - NewStringOptionalField(readNamesLanguagesCode, writeNamesLanguagesCode, []string{"names", "languages", "code"}, []int{2, 2, 0}, optionalFieldCompression(compression)), - NewStringOptionalField(readNamesLanguagesCountry, writeNamesLanguagesCountry, []string{"names", "languages", "country"}, []int{2, 2, 1}, optionalFieldCompression(compression)), - NewStringOptionalField(readNamesURL, writeNamesURL, []string{"names", "url"}, []int{2, 1}, optionalFieldCompression(compression)), - } -} - -func readDocID(x Document) int64 { - return x.DocID -} - -func writeDocID(x *Document, vals []int64) { - x.DocID = vals[0] -} - -func readLinksBackward(x Document) ([]int64, []uint8, []uint8) { - var vals []int64 - var defs, reps []uint8 - var lastRep uint8 - - if x.Links == nil { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - if len(x.Links.Backward) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Links.Backward { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x0) - } - } - } - - return vals, defs, reps -} - -func writeLinksBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - x.Links = &Link{} - case 2: - switch rep { - case 0: - x.Links = &Link{Backward: []int64{vals[nVals]}} - case 1: - x.Links.Backward = append(x.Links.Backward, vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readLinksForward(x Document) ([]int64, []uint8, []uint8) { - var vals []int64 - var defs, reps []uint8 - var lastRep uint8 - - if x.Links == nil { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - if len(x.Links.Forward) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Links.Forward { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x0) - } - } - } - - return vals, defs, reps -} - -func writeLinksForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 2: - switch rep { - case 0: - if x.Links == nil { - x.Links = &Link{Forward: []int64{vals[nVals]}} - } else { - x.Links.Forward = append(x.Links.Forward, vals[nVals]) - } - default: - x.Links.Forward = append(x.Links.Forward, vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 - - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if len(x0.Languages) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i1, x1 := range x0.Languages { - if i1 == 1 { - lastRep = 2 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x1.Code) - } - } - } - } - - return vals, defs, reps -} - -func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 2) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - x.Names = append(x.Names, Name{}) - case 2: - switch rep { - case 0: - x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} - case 1: - x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) - case 2: - x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 - - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if len(x0.Languages) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i1, x1 := range x0.Languages { - if i1 == 1 { - lastRep = 2 - } - if x1.Country == nil { - defs = append(defs, 2) - reps = append(reps, lastRep) - } else { - defs = append(defs, 3) - reps = append(reps, lastRep) - vals = append(vals, *x1.Country) - } - } - } - } - } - - return vals, defs, reps -} - -func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 2) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 3: - switch rep { - default: - x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readNamesURL(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 - - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if x0.URL == nil { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, *x0.URL) - } - } - } - - return vals, defs, reps -} - -func writeNamesURL(x *Document, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 2: - switch rep { - default: - x.Names[ind[0]].URL = pstring(vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels + return []Field{} } func fieldCompression(c compression) func(*parquet.RequiredField) { @@ -681,379 +362,6 @@ func (p *ParquetReader) Scan(x *Document) { } } -type Int64Field struct { - vals []int64 - parquet.RequiredField - read func(r Document) int64 - write func(r *Document, vals []int64) - stats *int64stats -} - -func NewInt64Field(read func(r Document) int64, write func(r *Document, vals []int64), path []string, opts ...func(*parquet.RequiredField)) *Int64Field { - return &Int64Field{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newInt64stats(), - } -} - -func (f *Int64Field) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} -} - -func (f *Int64Field) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v := make([]int64, int(pg.N)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *Int64Field) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } - } - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} - -func (f *Int64Field) Scan(r *Document) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *Int64Field) Add(r Document) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) -} - -func (f *Int64Field) Levels() ([]uint8, []uint8) { - return nil, nil -} - -type Int64OptionalField struct { - parquet.OptionalField - vals []int64 - read func(r Document) ([]int64, []uint8, []uint8) - write func(r *Document, vals []int64, def, rep []uint8) (int, int) - stats *int64optionalStats -} - -func NewInt64OptionalField(read func(r Document) ([]int64, []uint8, []uint8), write func(r *Document, vals []int64, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int64OptionalField { - return &Int64OptionalField{ - read: read, - write: write, - OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newint64optionalStats(maxDef(types)), - } -} - -func (f *Int64OptionalField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: f.RepetitionType, Types: f.Types} -} - -func (f *Int64OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } - } - return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) -} - -func (f *Int64OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v := make([]int64, f.Values()-len(f.vals)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *Int64OptionalField) Add(r Document) { - vals, defs, reps := f.read(r) - f.stats.add(vals, defs) - f.vals = append(f.vals, vals...) - f.Defs = append(f.Defs, defs...) - f.Reps = append(f.Reps, reps...) -} - -func (f *Int64OptionalField) Scan(r *Document) { - if len(f.Defs) == 0 { - return - } - - v, l := f.write(r, f.vals, f.Defs, f.Reps) - f.vals = f.vals[v:] - f.Defs = f.Defs[l:] - if len(f.Reps) > 0 { - f.Reps = f.Reps[l:] - } -} - -func (f *Int64OptionalField) Levels() ([]uint8, []uint8) { - return f.Defs, f.Reps -} - -type StringOptionalField struct { - parquet.OptionalField - vals []string - read func(r Document) ([]string, []uint8, []uint8) - write func(r *Document, vals []string, def, rep []uint8) (int, int) - stats *stringOptionalStats -} - -func NewStringOptionalField(read func(r Document) ([]string, []uint8, []uint8), write func(r *Document, vals []string, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { - return &StringOptionalField{ - read: read, - write: write, - OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newStringOptionalStats(maxDef(types)), - } -} - -func (f *StringOptionalField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} -} - -func (f *StringOptionalField) Add(r Document) { - vals, defs, reps := f.read(r) - f.stats.add(vals, defs) - f.vals = append(f.vals, vals...) - f.Defs = append(f.Defs, defs...) - f.Reps = append(f.Reps, reps...) -} - -func (f *StringOptionalField) Scan(r *Document) { - if len(f.Defs) == 0 { - return - } - - v, l := f.write(r, f.vals, f.Defs, f.Reps) - f.vals = f.vals[v:] - f.Defs = f.Defs[l:] - if len(f.Reps) > 0 { - f.Reps = f.Reps[l:] - } -} - -func (f *StringOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { - buf := bytes.Buffer{} - - for _, s := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { - return err - } - buf.Write([]byte(s)) - } - - return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) -} - -func (f *StringOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - for j := 0; j < f.Values(); j++ { - var x int32 - if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { - return err - } - s := make([]byte, x) - if _, err := rr.Read(s); err != nil { - return err - } - - f.vals = append(f.vals, string(s)) - } - return nil -} - -func (f *StringOptionalField) Levels() ([]uint8, []uint8) { - return f.Defs, f.Reps -} - -type int64stats struct { - min int64 - max int64 -} - -func newInt64stats() *int64stats { - return &int64stats{ - min: int64(math.MaxInt64), - } -} - -func (i *int64stats) add(val int64) { - if val < i.min { - i.min = val - } - if val > i.max { - i.max = val - } -} - -func (f *int64stats) bytes(val int64) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *int64stats) NullCount() *int64 { - return nil -} - -func (f *int64stats) DistinctCount() *int64 { - return nil -} - -func (f *int64stats) Min() []byte { - return f.bytes(f.min) -} - -func (f *int64stats) Max() []byte { - return f.bytes(f.max) -} - -type int64optionalStats struct { - min int64 - max int64 - nils int64 - nonNils int64 - maxDef uint8 -} - -func newint64optionalStats(d uint8) *int64optionalStats { - return &int64optionalStats{ - min: int64(math.MaxInt64), - maxDef: d, - } -} - -func (f *int64optionalStats) add(vals []int64, defs []uint8) { - var i int - for _, def := range defs { - if def < f.maxDef { - f.nils++ - } else { - val := vals[i] - i++ - - f.nonNils++ - if val < f.min { - f.min = val - } - if val > f.max { - f.max = val - } - } - } -} - -func (f *int64optionalStats) bytes(val int64) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *int64optionalStats) NullCount() *int64 { - return &f.nils -} - -func (f *int64optionalStats) DistinctCount() *int64 { - return nil -} - -func (f *int64optionalStats) Min() []byte { - if f.nonNils == 0 { - return nil - } - return f.bytes(f.min) -} - -func (f *int64optionalStats) Max() []byte { - if f.nonNils == 0 { - return nil - } - return f.bytes(f.max) -} - -type stringOptionalStats struct { - vals []string - min []byte - max []byte - nils int64 - maxDef uint8 -} - -func newStringOptionalStats(d uint8) *stringOptionalStats { - return &stringOptionalStats{maxDef: d} -} - -func (s *stringOptionalStats) add(vals []string, defs []uint8) { - var i int - for _, def := range defs { - if def < s.maxDef { - s.nils++ - } else { - s.vals = append(s.vals, vals[i]) - i++ - } - } -} - -func (s *stringOptionalStats) NullCount() *int64 { - return &s.nils -} - -func (s *stringOptionalStats) DistinctCount() *int64 { - return nil -} - -func (s *stringOptionalStats) Min() []byte { - if s.min == nil { - s.minMax() - } - return s.min -} - -func (s *stringOptionalStats) Max() []byte { - if s.max == nil { - s.minMax() - } - return s.max -} - -func (s *stringOptionalStats) minMax() { - if len(s.vals) == 0 { - return - } - - tmp := make([]string, len(s.vals)) - copy(tmp, s.vals) - sort.Strings(tmp) - s.min = []byte(tmp[0]) - s.max = []byte(tmp[len(tmp)-1]) -} - func pint32(i int32) *int32 { return &i } func puint32(i uint32) *uint32 { return &i } func pint64(i int64) *int64 { return &i } diff --git a/internal/dremel/testcases/person/generated.go b/internal/dremel/testcases/person/generated.go index f328dc3..8181dbe 100644 --- a/internal/dremel/testcases/person/generated.go +++ b/internal/dremel/testcases/person/generated.go @@ -45,32 +45,32 @@ type ParquetWriter struct { func Fields(compression compression) []Field { return []Field{ - NewStringField(readName, writeName, []string{"name"}, fieldCompression(compression)), - NewStringOptionalField(readHobbyName, writeHobbyName, []string{"hobby", "name"}, []int{1, 0}, optionalFieldCompression(compression)), - NewInt32OptionalField(readHobbyDifficulty, writeHobbyDifficulty, []string{"hobby", "difficulty"}, []int{1, 1}, optionalFieldCompression(compression)), - NewStringOptionalField(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, optionalFieldCompression(compression)), - NewStringOptionalField(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + Newstring(readPersonName, writePersonName, []string{"Person", "name"}, fieldCompression(compression)), + Newstring(readPersonHobbyName, writePersonHobbyName, []string{"Person", "hobby", "name"}, []int{1, 0}, fieldCompression(compression)), + Newint32(readPersonHobbyDifficulty, writePersonHobbyDifficulty, []string{"Person", "hobby", "difficulty"}, []int{1, 1}, fieldCompression(compression)), + Newstring(readPersonHobbySkillsName, writePersonHobbySkillsName, []string{"Person", "hobby", "skills", "name"}, []int{1, 2, 0}, fieldCompression(compression)), + Newstring(readPersonHobbySkillsDifficulty, writePersonHobbySkillsDifficulty, []string{"Person", "hobby", "skills", "difficulty"}, []int{1, 2, 0}, fieldCompression(compression)), } } -func readName(x Person) string { - return x.Name +func readPersonName(x Person) string { + return x.Person.Name } -func writeName(x *Person, vals []string) { - x.Name = vals[0] +func writePersonName(x *Person, vals []string) { + x.Person.Name = vals[0] } -func readHobbyName(x Person) ([]string, []uint8, []uint8) { +func readPersonHobbyName(x Person) ([]string, []uint8, []uint8) { switch { - case x.Hobby == nil: + case x.Person == nil: return nil, []uint8{0}, nil default: - return []string{x.Hobby.Name}, []uint8{1}, nil + return []string{x.Person.Hobby}, []uint8{1}, nil } } -func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { +func writePersonHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { case 1: @@ -81,56 +81,48 @@ func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { return 0, 1 } -func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { +func readPersonHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { switch { - case x.Hobby == nil: + case x.Person == nil: return nil, []uint8{0}, nil - case x.Hobby.Difficulty == nil: + case x.Person.Hobby == nil: return nil, []uint8{1}, nil default: - return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil + return []int32{*x.Person.Hobby}, []uint8{2}, nil } } -func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { +func writePersonHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { - case 1: - if x.Hobby == nil { - x.Hobby = &Hobby{} - } case 2: - if x.Hobby == nil { - x.Hobby = &Hobby{Difficulty: pint32(vals[0])} - } else { - x.Hobby.Difficulty = pint32(vals[0]) - } + x.Hobby.Difficulty = pint32(vals[0]) return 1, 1 } return 0, 1 } -func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { +func readPersonHobbySkillsName(x Person) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 var lastRep uint8 - if x.Hobby == nil { + if x.Person == nil { defs = append(defs, 0) reps = append(reps, lastRep) } else { - if len(x.Hobby.Skills) == 0 { + if len(x.Person.Hobby) == 0 { defs = append(defs, 1) reps = append(reps, lastRep) } else { - for i0, x0 := range x.Hobby.Skills { + for i0, x0 := range x.Person.Hobby { if i0 == 1 { lastRep = 1 } defs = append(defs, 2) reps = append(reps, lastRep) - vals = append(vals, x0.Name) + vals = append(vals, x0.Skills.Name) } } } @@ -138,7 +130,7 @@ func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { return vals, defs, reps } -func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { +func writePersonHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -153,18 +145,10 @@ func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, in ind.rep(rep) switch def { - case 1: - if x.Hobby == nil { - x.Hobby = &Hobby{} - } case 2: switch rep { case 0: - if x.Hobby == nil { - x.Hobby = &Hobby{Skills: []Skill{{Name: vals[nVals]}}} - } else { - x.Hobby.Skills = []Skill{{Name: vals[nVals]}} - } + x.Hobby.Skills = []Skill{{Name: vals[nVals]}} case 1: x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) } @@ -175,26 +159,26 @@ func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, in return nVals, nLevels } -func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { +func readPersonHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 var lastRep uint8 - if x.Hobby == nil { + if x.Person == nil { defs = append(defs, 0) reps = append(reps, lastRep) } else { - if len(x.Hobby.Skills) == 0 { + if len(x.Person.Hobby) == 0 { defs = append(defs, 1) reps = append(reps, lastRep) } else { - for i0, x0 := range x.Hobby.Skills { + for i0, x0 := range x.Person.Hobby { if i0 == 1 { lastRep = 1 } defs = append(defs, 2) reps = append(reps, lastRep) - vals = append(vals, x0.Difficulty) + vals = append(vals, x0.Skills.Difficulty) } } } @@ -202,7 +186,7 @@ func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { return vals, defs, reps } -func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { +func writePersonHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -219,10 +203,8 @@ func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (i switch def { case 2: switch rep { - case 0: + case 0, 1: x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] - case 1: - x.Hobby.Skills = append(x.Hobby.Skills, Skill{Difficulty: vals[nVals]}) } nVals++ } @@ -623,87 +605,7 @@ func (f *StringField) Levels() ([]uint8, []uint8) { return nil, nil } -type StringOptionalField struct { - parquet.OptionalField - vals []string - read func(r Person) ([]string, []uint8, []uint8) - write func(r *Person, vals []string, def, rep []uint8) (int, int) - stats *stringOptionalStats -} - -func NewStringOptionalField(read func(r Person) ([]string, []uint8, []uint8), write func(r *Person, vals []string, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { - return &StringOptionalField{ - read: read, - write: write, - OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newStringOptionalStats(maxDef(types)), - } -} - -func (f *StringOptionalField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} -} - -func (f *StringOptionalField) Add(r Person) { - vals, defs, reps := f.read(r) - f.stats.add(vals, defs) - f.vals = append(f.vals, vals...) - f.Defs = append(f.Defs, defs...) - f.Reps = append(f.Reps, reps...) -} - -func (f *StringOptionalField) Scan(r *Person) { - if len(f.Defs) == 0 { - return - } - - v, l := f.write(r, f.vals, f.Defs, f.Reps) - f.vals = f.vals[v:] - f.Defs = f.Defs[l:] - if len(f.Reps) > 0 { - f.Reps = f.Reps[l:] - } -} - -func (f *StringOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { - buf := bytes.Buffer{} - - for _, s := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { - return err - } - buf.Write([]byte(s)) - } - - return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) -} - -func (f *StringOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - for j := 0; j < f.Values(); j++ { - var x int32 - if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { - return err - } - s := make([]byte, x) - if _, err := rr.Read(s); err != nil { - return err - } - - f.vals = append(f.vals, string(s)) - } - return nil -} - -func (f *StringOptionalField) Levels() ([]uint8, []uint8) { - return f.Defs, f.Reps -} - -type Int32OptionalField struct { +type int32 struct { parquet.OptionalField vals []int32 read func(r Person) ([]int32, []uint8, []uint8) @@ -711,8 +613,8 @@ type Int32OptionalField struct { stats *int32optionalStats } -func NewInt32OptionalField(read func(r Person) ([]int32, []uint8, []uint8), write func(r *Person, vals []int32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int32OptionalField { - return &Int32OptionalField{ +func Newint32(read func(r Person) ([]int32, []uint8, []uint8), write func(r *Person, vals []int32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *int32 { + return &int32{ read: read, write: write, OptionalField: parquet.NewOptionalField(path, types, opts...), @@ -720,11 +622,11 @@ func NewInt32OptionalField(read func(r Person) ([]int32, []uint8, []uint8), writ } } -func (f *Int32OptionalField) Schema() parquet.Field { +func (f *int32) Schema() parquet.Field { return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int32Type, RepetitionType: f.RepetitionType, Types: f.Types} } -func (f *Int32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { +func (f *int32) Write(w io.Writer, meta *parquet.Metadata) error { var buf bytes.Buffer for _, v := range f.vals { if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { @@ -734,7 +636,7 @@ func (f *Int32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) } -func (f *Int32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { +func (f *int32) Read(r io.ReadSeeker, pg parquet.Page) error { rr, _, err := f.DoRead(r, pg) if err != nil { return err @@ -746,7 +648,7 @@ func (f *Int32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { return err } -func (f *Int32OptionalField) Add(r Person) { +func (f *int32) Add(r Person) { vals, defs, reps := f.read(r) f.stats.add(vals, defs) f.vals = append(f.vals, vals...) @@ -754,7 +656,7 @@ func (f *Int32OptionalField) Add(r Person) { f.Reps = append(f.Reps, reps...) } -func (f *Int32OptionalField) Scan(r *Person) { +func (f *int32) Scan(r *Person) { if len(f.Defs) == 0 { return } @@ -767,7 +669,7 @@ func (f *Int32OptionalField) Scan(r *Person) { } } -func (f *Int32OptionalField) Levels() ([]uint8, []uint8) { +func (f *int32) Levels() ([]uint8, []uint8) { return f.Defs, f.Reps } @@ -819,64 +721,6 @@ func (s *stringStats) minMax() { s.max = []byte(tmp[len(tmp)-1]) } -type stringOptionalStats struct { - vals []string - min []byte - max []byte - nils int64 - maxDef uint8 -} - -func newStringOptionalStats(d uint8) *stringOptionalStats { - return &stringOptionalStats{maxDef: d} -} - -func (s *stringOptionalStats) add(vals []string, defs []uint8) { - var i int - for _, def := range defs { - if def < s.maxDef { - s.nils++ - } else { - s.vals = append(s.vals, vals[i]) - i++ - } - } -} - -func (s *stringOptionalStats) NullCount() *int64 { - return &s.nils -} - -func (s *stringOptionalStats) DistinctCount() *int64 { - return nil -} - -func (s *stringOptionalStats) Min() []byte { - if s.min == nil { - s.minMax() - } - return s.min -} - -func (s *stringOptionalStats) Max() []byte { - if s.max == nil { - s.minMax() - } - return s.max -} - -func (s *stringOptionalStats) minMax() { - if len(s.vals) == 0 { - return - } - - tmp := make([]string, len(s.vals)) - copy(tmp, s.vals) - sort.Strings(tmp) - s.min = []byte(tmp[0]) - s.max = []byte(tmp[len(tmp)-1]) -} - type int32optionalStats struct { min int32 max int32 diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 30847f2..1c0f059 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -31,6 +31,10 @@ type input struct { } func (f Field) StructType() string { + if f.Parent == nil { + return f.Type + } + var typ string for fld := f.Parent; fld != nil; fld = fld.Parent { typ = fld.Type diff --git a/internal/gen/gen.go b/internal/gen/gen.go index 41588e5..2eb1c4d 100644 --- a/internal/gen/gen.go +++ b/internal/gen/gen.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "go/format" - "log" "os" "text/template" @@ -28,18 +27,14 @@ var ( // FromStruct generates a parquet reader and writer based on the struct // of type 'typ' that is defined in the go file at 'pth'. -func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) { +func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) error { result, err := parse.Fields(typ, pth) if err != nil { - log.Fatal(err) - } - - for _, err := range result.Errors { - log.Println(err) + return err } if len(result.Errors) > 0 && !ignore { - log.Fatal("not generating parquet.go (-ignore set to false), err: ", result.Errors) + return fmt.Errorf("not generating parquet.go (-ignore set to false), err: %v", result.Errors) } i := input{ @@ -52,7 +47,7 @@ func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) { tmpl := template.New("output").Funcs(funcs) tmpl, err = tmpl.Parse(tpl) if err != nil { - log.Fatal(err) + return err } for _, t := range []string{ @@ -73,45 +68,45 @@ func FromStruct(pth, outPth, typ, pkg, imp string, ignore bool) { var err error tmpl, err = tmpl.Parse(t) if err != nil { - log.Fatal(err) + return err } } var buf bytes.Buffer err = tmpl.Execute(&buf, i) if err != nil { - log.Fatal(err) + return err } gocode, err := format.Source(buf.Bytes()) if err != nil { - log.Fatal(err, string(buf.Bytes())) + return fmt.Errorf("err: %s, gocode: %s", err, string(buf.Bytes())) } f, err := os.Create(outPth) if err != nil { - log.Fatal(err) + return err } _, err = f.Write(gocode) if err != nil { - log.Fatal(err) + return err } - f.Close() + return f.Close() } // FromParquet generates a go struct, a reader, and a writer based // on the parquet file at 'parq' -func FromParquet(parq, pth, outPth, typ, pkg, imp string, ignore bool) { +func FromParquet(parq, pth, outPth, typ, pkg, imp string, ignore bool) error { pf, err := os.Open(parq) if err != nil { - log.Fatal(err) + return err } footer, err := parquet.ReadMetaData(pf) if err != nil { - log.Fatal("couldn't read footer: ", err) + return fmt.Errorf("couldn't read footer: %s", err) } pf.Close() @@ -119,7 +114,7 @@ func FromParquet(parq, pth, outPth, typ, pkg, imp string, ignore bool) { tmpl := template.New("output").Funcs(funcs) tmpl, err = tmpl.Parse(structTpl) if err != nil { - log.Fatal(err) + return err } n := newStruct{ @@ -130,26 +125,26 @@ func FromParquet(parq, pth, outPth, typ, pkg, imp string, ignore bool) { var buf bytes.Buffer err = tmpl.Execute(&buf, n) if err != nil { - log.Fatal(err) + return err } gocode, err := format.Source(buf.Bytes()) if err != nil { - log.Fatal(err) + return err } f, err := os.Create(pth) if err != nil { - log.Fatal(err) + return err } _, err = f.Write(gocode) if err != nil { - log.Fatal(err) + return err } f.Close() - FromStruct(pth, outPth, typ, pkg, imp, ignore) + return FromStruct(pth, outPth, typ, pkg, imp, ignore) } type input struct { @@ -159,25 +154,25 @@ type input struct { Parent fields.Field } -func getFieldType(se *sch.SchemaElement) string { +func getFieldType(se *sch.SchemaElement) (string, error) { if se.Type == nil { - log.Fatal("nil parquet schema type") + return "", fmt.Errorf("nil parquet schema type") } s := se.Type.String() out, ok := parquetTypes[s] if !ok { - log.Fatalf("unsupported parquet schema type: %s", s) + return "", fmt.Errorf("unsupported parquet schema type: %s", s) } if se.RepetitionType != nil && *se.RepetitionType == sch.FieldRepetitionType_REPEATED { - log.Fatalf("field %s is FieldRepetitionType_REPEATED, which is currently not supported", se.Name) + return "", fmt.Errorf("field %s is FieldRepetitionType_REPEATED, which is currently not supported", se.Name) } var star string if se.RepetitionType != nil && *se.RepetitionType == sch.FieldRepetitionType_OPTIONAL { star = "*" } - return fmt.Sprintf("%s%s", star, out) + return fmt.Sprintf("%s%s", star, out), nil } func dedupe(flds []fields.Field) []fields.Field { diff --git a/internal/gen/template.go b/internal/gen/template.go index 5463894..0f7bc67 100644 --- a/internal/gen/template.go +++ b/internal/gen/template.go @@ -191,7 +191,7 @@ func (p *ParquetWriter) Close() error { return err } -func (p *ParquetWriter) Add(rec {{.Type}}) { +func (p *ParquetWriter) Add(rec {{.Parent.StructType}}) { if p.len == p.max { if p.child == nil { // an error can't happen here @@ -211,10 +211,10 @@ func (p *ParquetWriter) Add(rec {{.Type}}) { } type Field interface { - Add(r {{.Type}}) + Add(r {{.Parent.StructType}}) Write(w io.Writer, meta *parquet.Metadata) error Schema() parquet.Field - Scan(r *{{.Type}}) + Scan(r *{{.Parent.StructType}}) Read(r io.ReadSeeker, pg parquet.Page) error Name() string Levels() ([]uint8, []uint8) @@ -366,7 +366,7 @@ func (p *ParquetReader) Next() bool { return true } -func (p *ParquetReader) Scan(x *{{.Type}}) { +func (p *ParquetReader) Scan(x *{{.Parent.StructType}}) { if p.err != nil { return } diff --git a/internal/gen/template_bool.go b/internal/gen/template_bool.go index 9dad753..1554bd1 100644 --- a/internal/gen/template_bool.go +++ b/internal/gen/template_bool.go @@ -45,7 +45,7 @@ func (f *BoolField) Read(r io.ReadSeeker, pg parquet.Page) error { return err } -func (f *BoolField) Scan(r *{{.Type}}) { +func (f *BoolField) Scan(r *{{.StructType}}) { if len(f.vals) == 0 { return } @@ -54,7 +54,7 @@ func (f *BoolField) Scan(r *{{.Type}}) { f.vals = f.vals[1:] } -func (f *BoolField) Add(r {{.Type}}) { +func (f *BoolField) Add(r {{.StructType}}) { v := f.read(r) f.vals = append(f.vals, v) } diff --git a/internal/gen/template_bool_optional.go b/internal/gen/template_bool_optional.go index ae57161..a838090 100644 --- a/internal/gen/template_bool_optional.go +++ b/internal/gen/template_bool_optional.go @@ -32,7 +32,7 @@ func (f *BoolOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { return err } -func (f *BoolOptionalField) Scan(r *{{.Type}}) { +func (f *BoolOptionalField) Scan(r *{{.StructType}}) { if len(f.Defs) == 0 { return } @@ -45,7 +45,7 @@ func (f *BoolOptionalField) Scan(r *{{.Type}}) { } } -func (f *BoolOptionalField) Add(r {{.Type}}) { +func (f *BoolOptionalField) Add(r {{.StructType}}) { vals, defs, reps := f.read(r) f.stats.add(vals, defs) f.vals = append(f.vals, vals...) diff --git a/internal/gen/template_optional.go b/internal/gen/template_optional.go index 1d089dc..691948a 100644 --- a/internal/gen/template_optional.go +++ b/internal/gen/template_optional.go @@ -7,12 +7,12 @@ var optionalNumericTpl = `{{define "optionalField"}} type {{.FieldType}} struct { parquet.OptionalField vals []{{removeStar .TypeName}} - read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, def, rep []uint8) (int, int) + read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, def, rep []uint8) (int, int) stats *{{removeStar .TypeName}}optionalStats } -func New{{.FieldType}}(read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *{{.FieldType}} { +func New{{.FieldType}}(read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *{{.FieldType}} { return &{{.FieldType}}{ read: read, write: write, @@ -47,7 +47,7 @@ func (f *{{.FieldType}}) Read(r io.ReadSeeker, pg parquet.Page) error { return err } -func (f *{{.FieldType}}) Add(r {{.Type}}) { +func (f *{{.FieldType}}) Add(r {{.StructType}}) { vals, defs, reps := f.read(r) f.stats.add(vals, defs) f.vals = append(f.vals, vals...) @@ -55,7 +55,7 @@ func (f *{{.FieldType}}) Add(r {{.Type}}) { f.Reps = append(f.Reps, reps...) } -func (f *{{.FieldType}}) Scan(r *{{.Type}}) { +func (f *{{.FieldType}}) Scan(r *{{.StructType}}) { if len(f.Defs) == 0 { return } diff --git a/internal/gen/template_required.go b/internal/gen/template_required.go index eba5c97..d8ca5ee 100644 --- a/internal/gen/template_required.go +++ b/internal/gen/template_required.go @@ -4,12 +4,12 @@ var requiredNumericTpl = `{{define "numericField"}} type {{.FieldType}} struct { vals []{{.TypeName}} parquet.RequiredField - read func(r {{.Type}}) {{.TypeName}} - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}) + read func(r {{.StructType}}) {{.TypeName}} + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}) stats *{{.TypeName}}stats } -func New{{.FieldType}}(read func(r {{.Type}}) {{.TypeName}}, write func(r *{{.Type}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*parquet.RequiredField)) *{{.FieldType}} { +func New{{.FieldType}}(read func(r {{.StructType}}) {{.TypeName}}, write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*parquet.RequiredField)) *{{.FieldType}} { return &{{.FieldType}}{ read: read, write: write, @@ -44,7 +44,7 @@ func (f *{{.FieldType}}) Write(w io.Writer, meta *parquet.Metadata) error { return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) } -func (f *{{.FieldType}}) Scan(r *{{.Type}}) { +func (f *{{.FieldType}}) Scan(r *{{.StructType}}) { if len(f.vals) == 0 { return } @@ -53,7 +53,7 @@ func (f *{{.FieldType}}) Scan(r *{{.Type}}) { f.vals = f.vals[1:] } -func (f *{{.FieldType}}) Add(r {{.Type}}) { +func (f *{{.FieldType}}) Add(r {{.Parent.StructType}}) { v := f.read(r) f.stats.add(v) f.vals = append(f.vals, v) diff --git a/internal/gen/template_string.go b/internal/gen/template_string.go index 11160e4..edd0532 100644 --- a/internal/gen/template_string.go +++ b/internal/gen/template_string.go @@ -4,12 +4,12 @@ var stringTpl = `{{define "stringField"}} type StringField struct { parquet.RequiredField vals []string - read func(r {{.Type}}) {{.TypeName}} - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}) + read func(r {{.StructType}}) {{.TypeName}} + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}) stats *stringStats } -func NewStringField(read func(r {{.Type}}) {{.TypeName}}, write func(r *{{.Type}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*parquet.RequiredField)) *StringField { +func NewStringField(read func(r {{.StructType}}) {{.TypeName}}, write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*parquet.RequiredField)) *StringField { return &StringField{ read: read, write: write, @@ -56,7 +56,7 @@ func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { return nil } -func (f *StringField) Scan(r *{{.Type}}) { +func (f *StringField) Scan(r *{{.StructType}}) { if len(f.vals) == 0 { return } @@ -65,7 +65,7 @@ func (f *StringField) Scan(r *{{.Type}}) { f.vals = f.vals[1:] } -func (f *StringField) Add(r {{.Type}}) { +func (f *StringField) Add(r {{.StructType}}) { v := f.read(r) f.stats.add(v) f.vals = append(f.vals, v) diff --git a/internal/gen/template_string_optional.go b/internal/gen/template_string_optional.go index daefb9a..9b0a14d 100644 --- a/internal/gen/template_string_optional.go +++ b/internal/gen/template_string_optional.go @@ -4,12 +4,12 @@ var stringOptionalTpl = `{{define "stringOptionalField"}} type StringOptionalField struct { parquet.OptionalField vals []string - read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, def, rep []uint8) (int, int) + read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, def, rep []uint8) (int, int) stats *stringOptionalStats } -func NewStringOptionalField(read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { +func NewStringOptionalField(read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { return &StringOptionalField{ read: read, write: write, @@ -22,7 +22,7 @@ func (f *StringOptionalField) Schema() parquet.Field { return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} } -func (f *StringOptionalField) Add(r {{.Type}}) { +func (f *StringOptionalField) Add(r {{.StructType}}) { vals, defs, reps := f.read(r) f.stats.add(vals, defs) f.vals = append(f.vals, vals...) @@ -30,7 +30,7 @@ func (f *StringOptionalField) Add(r {{.Type}}) { f.Reps = append(f.Reps, reps...) } -func (f *StringOptionalField) Scan(r *{{.Type}}) { +func (f *StringOptionalField) Scan(r *{{.StructType}}) { if len(f.Defs) == 0 { return } diff --git a/internal/parse/fields_test.go b/internal/parse/fields_test.go index 2823a03..03308f9 100644 --- a/internal/parse/fields_test.go +++ b/internal/parse/fields_test.go @@ -340,16 +340,11 @@ func TestFields(t *testing.T) { t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { out, err := parse.Fields(tc.typ, "./parse_test.go") assert.Nil(t, err, tc.name) - assert.Equal(t, tc.expected, out.Parent, tc.name) - if assert.Equal(t, len(tc.errors), len(out.Errors), tc.name) { - for i, err := range out.Errors { - assert.EqualError(t, tc.errors[i], err.Error(), tc.name) - } - } else { - for _, err := range out.Errors { - fmt.Println(err) - } + if !assert.Equal(t, tc.errors, out.Errors, tc.name) { + return } + + assert.Equal(t, tc.expected.Children, out.Parent.Children, tc.name) }) } } From c6c1a00eff6913a023da59e2909e01a7a44d3578 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Sat, 12 Jun 2021 16:21:02 -0600 Subject: [PATCH 11/25] removing a bunch of fields from fields.Field --- internal/dremel/testcases/doc/generated.go | 543 +++++++++++++++++- internal/dremel/testcases/person/generated.go | 60 +- internal/fields/fields.go | 133 +++-- internal/fields/fields_test.go | 258 ++++----- internal/parse/fields_test.go | 199 +++---- internal/parse/parse.go | 87 +-- 6 files changed, 912 insertions(+), 368 deletions(-) diff --git a/internal/dremel/testcases/doc/generated.go b/internal/dremel/testcases/doc/generated.go index 55fa643..38191d5 100644 --- a/internal/dremel/testcases/doc/generated.go +++ b/internal/dremel/testcases/doc/generated.go @@ -11,6 +11,9 @@ import ( "github.com/parsyl/parquet" sch "github.com/parsyl/parquet/schema" + + "math" + "sort" ) type compression int @@ -41,7 +44,319 @@ type ParquetWriter struct { } func Fields(compression compression) []Field { - return []Field{} + return []Field{ + Newint64(readDocID, writeDocID, []string{"docid"}, fieldCompression(compression)), + Newint64(readLinksBackward, writeLinksBackward, []string{"link", "backward"}, []int{1, 2}, fieldCompression(compression)), + Newint64(readLinksForward, writeLinksForward, []string{"link", "forward"}, []int{1, 2}, fieldCompression(compression)), + Newstring(readNamesLanguagesCode, writeNamesLanguagesCode, []string{"names", "languages", "code"}, []int{2, 2, 0}, fieldCompression(compression)), + Newstring(readNamesLanguagesCountry, writeNamesLanguagesCountry, []string{"names", "languages", "country"}, []int{2, 2, 1}, fieldCompression(compression)), + Newstring(readNamesURL, writeNamesURL, []string{"names", "url"}, []int{2, 1}, fieldCompression(compression)), + } +} + +func readDocID(x Document) int64 { + return x.DocID +} + +func writeDocID(x *Document, vals []int64) { + x.DocID = vals[0] +} + +func readLinksBackward(x Document) ([]int64, []uint8, []uint8) { + var vals []int64 + var defs, reps []uint8 + var lastRep uint8 + + if x.Links == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Links.Backward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links.Backward { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0) + } + } + } + + return vals, defs, reps +} + +func writeLinksBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Links = &Link{} + case 2: + switch rep { + case 0: + x.Links = &Link{Backward: []int64{vals[nVals]}} + case 1: + x.Links.Backward = append(x.Links.Backward, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksForward(x Document) ([]int64, []uint8, []uint8) { + var vals []int64 + var defs, reps []uint8 + var lastRep uint8 + + if x.Links == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Links.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links.Forward { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0) + } + } + } + + return vals, defs, reps +} + +func writeLinksForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0: + x.Links.Forward = []int64{vals[nVals]} + case 1: + x.Links.Forward = append(x.Links.Forward, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if len(x0.Languages) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Languages { + if i1 == 1 { + lastRep = 2 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x1.Code) + } + } + } + } + + return vals, defs, reps +} + +func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Names = append(x.Names, Name{}) + case 2: + switch rep { + case 0: + x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} + case 1: + x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) + case 2: + x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if len(x0.Languages) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Languages { + if i1 == 1 { + lastRep = 2 + } + if x1.Country == nil { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, *x1.Country) + } + } + } + } + } + + return vals, defs, reps +} + +func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + switch rep { + case 0, 2: + x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readNamesURL(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if x0.URL == nil { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, *x0.URL) + } + } + } + + return vals, defs, reps +} + +func writeNamesURL(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0, 1: + x.Names[ind[0]].URL = pstring(vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels } func fieldCompression(c compression) func(*parquet.RequiredField) { @@ -362,6 +677,232 @@ func (p *ParquetReader) Scan(x *Document) { } } +type int64 struct { + vals []int64 + parquet.RequiredField + read func(r Document) int64 + write func(r *Document, vals []int64) + stats *int64stats +} + +func Newint64(read func(r Document) int64, write func(r *Document, vals []int64), path []string, opts ...func(*parquet.RequiredField)) *int64 { + return &int64{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newInt64stats(), + } +} + +func (f *int64) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *int64) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]int64, int(pg.N)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *int64) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *int64) Scan(r *Document) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *int64) Add(r Document) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *int64) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type StringField struct { + parquet.RequiredField + vals []string + read func(r Document) string + write func(r *Document, vals []string) + stats *stringStats +} + +func NewStringField(read func(r Document) string, write func(r *Document, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { + return &StringField{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newStringStats(), + } +} + +func (f *StringField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < pg.N; j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringField) Scan(r *Document) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *StringField) Add(r Document) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *StringField) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type int64stats struct { + min int64 + max int64 +} + +func newInt64stats() *int64stats { + return &int64stats{ + min: int64(math.MaxInt64), + } +} + +func (i *int64stats) add(val int64) { + if val < i.min { + i.min = val + } + if val > i.max { + i.max = val + } +} + +func (f *int64stats) bytes(val int64) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *int64stats) NullCount() *int64 { + return nil +} + +func (f *int64stats) DistinctCount() *int64 { + return nil +} + +func (f *int64stats) Min() []byte { + return f.bytes(f.min) +} + +func (f *int64stats) Max() []byte { + return f.bytes(f.max) +} + +type stringStats struct { + vals []string + min []byte + max []byte +} + +func newStringStats() *stringStats { + return &stringStats{} +} + +func (s *stringStats) add(val string) { + s.vals = append(s.vals, val) +} + +func (s *stringStats) NullCount() *int64 { + return nil +} + +func (s *stringStats) DistinctCount() *int64 { + return nil +} + +func (s *stringStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + func pint32(i int32) *int32 { return &i } func puint32(i uint32) *uint32 { return &i } func pint64(i int64) *int64 { return &i } diff --git a/internal/dremel/testcases/person/generated.go b/internal/dremel/testcases/person/generated.go index 8181dbe..1e0c15b 100644 --- a/internal/dremel/testcases/person/generated.go +++ b/internal/dremel/testcases/person/generated.go @@ -45,32 +45,32 @@ type ParquetWriter struct { func Fields(compression compression) []Field { return []Field{ - Newstring(readPersonName, writePersonName, []string{"Person", "name"}, fieldCompression(compression)), - Newstring(readPersonHobbyName, writePersonHobbyName, []string{"Person", "hobby", "name"}, []int{1, 0}, fieldCompression(compression)), - Newint32(readPersonHobbyDifficulty, writePersonHobbyDifficulty, []string{"Person", "hobby", "difficulty"}, []int{1, 1}, fieldCompression(compression)), - Newstring(readPersonHobbySkillsName, writePersonHobbySkillsName, []string{"Person", "hobby", "skills", "name"}, []int{1, 2, 0}, fieldCompression(compression)), - Newstring(readPersonHobbySkillsDifficulty, writePersonHobbySkillsDifficulty, []string{"Person", "hobby", "skills", "difficulty"}, []int{1, 2, 0}, fieldCompression(compression)), + Newstring(readName, writeName, []string{"name"}, fieldCompression(compression)), + Newstring(readHobbyName, writeHobbyName, []string{"hobby", "name"}, []int{1, 0}, fieldCompression(compression)), + Newint32(readHobbyDifficulty, writeHobbyDifficulty, []string{"hobby", "difficulty"}, []int{1, 1}, fieldCompression(compression)), + Newstring(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, fieldCompression(compression)), + Newstring(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, fieldCompression(compression)), } } -func readPersonName(x Person) string { - return x.Person.Name +func readName(x Person) string { + return x.Name } -func writePersonName(x *Person, vals []string) { - x.Person.Name = vals[0] +func writeName(x *Person, vals []string) { + x.Name = vals[0] } -func readPersonHobbyName(x Person) ([]string, []uint8, []uint8) { +func readHobbyName(x Person) ([]string, []uint8, []uint8) { switch { - case x.Person == nil: + case x.Hobby == nil: return nil, []uint8{0}, nil default: - return []string{x.Person.Hobby}, []uint8{1}, nil + return []string{x.Hobby.Name}, []uint8{1}, nil } } -func writePersonHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { +func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { def := defs[0] switch def { case 1: @@ -81,18 +81,18 @@ func writePersonHobbyName(x *Person, vals []string, defs, reps []uint8) (int, in return 0, 1 } -func readPersonHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { +func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { switch { - case x.Person == nil: + case x.Hobby == nil: return nil, []uint8{0}, nil - case x.Person.Hobby == nil: + case x.Hobby.Difficulty == nil: return nil, []uint8{1}, nil default: - return []int32{*x.Person.Hobby}, []uint8{2}, nil + return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil } } -func writePersonHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { +func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { case 2: @@ -103,26 +103,26 @@ func writePersonHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (in return 0, 1 } -func readPersonHobbySkillsName(x Person) ([]string, []uint8, []uint8) { +func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 var lastRep uint8 - if x.Person == nil { + if x.Hobby == nil { defs = append(defs, 0) reps = append(reps, lastRep) } else { - if len(x.Person.Hobby) == 0 { + if len(x.Hobby.Skills) == 0 { defs = append(defs, 1) reps = append(reps, lastRep) } else { - for i0, x0 := range x.Person.Hobby { + for i0, x0 := range x.Hobby.Skills { if i0 == 1 { lastRep = 1 } defs = append(defs, 2) reps = append(reps, lastRep) - vals = append(vals, x0.Skills.Name) + vals = append(vals, x0.Name) } } } @@ -130,7 +130,7 @@ func readPersonHobbySkillsName(x Person) ([]string, []uint8, []uint8) { return vals, defs, reps } -func writePersonHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { +func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) @@ -159,26 +159,26 @@ func writePersonHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (i return nVals, nLevels } -func readPersonHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { +func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 var lastRep uint8 - if x.Person == nil { + if x.Hobby == nil { defs = append(defs, 0) reps = append(reps, lastRep) } else { - if len(x.Person.Hobby) == 0 { + if len(x.Hobby.Skills) == 0 { defs = append(defs, 1) reps = append(reps, lastRep) } else { - for i0, x0 := range x.Person.Hobby { + for i0, x0 := range x.Hobby.Skills { if i0 == 1 { lastRep = 1 } defs = append(defs, 2) reps = append(reps, lastRep) - vals = append(vals, x0.Skills.Difficulty) + vals = append(vals, x0.Difficulty) } } } @@ -186,7 +186,7 @@ func readPersonHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { return vals, defs, reps } -func writePersonHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { +func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { var nVals, nLevels int ind := make(indices, 1) diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 1c0f059..583c557 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -8,18 +8,13 @@ import ( // Field holds metadata that is required by parquetgen in order // to generate code. type Field struct { - // Type of the top level struct Type string - RepetitionType RepetitionType - FieldName string + Name string ColumnName string - TypeName string - FieldType string - ParquetType string - Category string + RepetitionType RepetitionType Parent *Field - Embedded bool Children []Field + Embedded bool NthChild int Defined bool } @@ -88,8 +83,8 @@ func Reverse(out []Field) []Field { func (f Field) FieldNames() []string { var out []string for _, fld := range Reverse(f.Chain()) { - if fld.FieldName != "" { - out = append(out, fld.FieldName) + if fld.Name != "" { + out = append(out, fld.Name) } } return out @@ -98,8 +93,8 @@ func (f Field) FieldNames() []string { func (f Field) FieldTypes() []string { var out []string for _, fld := range Reverse(f.Chain()) { - if fld.FieldType != "" { - out = append(out, fld.FieldType) + if fld.Type != "" { + out = append(out, fld.Type) } } return out @@ -305,14 +300,14 @@ func (f Field) Init(def, rep int) string { switch fld.RepetitionType { case Required: - left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.FieldName)) + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.Name)) case Optional: - left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.FieldName)) + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.Name)) case Repeated: if (rep > 0 && reps < rep) || (f.NthChild > 0 && !fld.Primitive()) { - left = fmt.Sprintf(left, fmt.Sprintf(".%s[ind[%d]]%%s", fld.FieldName, reps-1)) + left = fmt.Sprintf(left, fmt.Sprintf(".%s[ind[%d]]%%s", fld.Name, reps-1)) } else { - left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.FieldName)) + left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.Name)) } } @@ -343,63 +338,63 @@ func (f Field) Init(def, rep int) string { } else if (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { right = fmt.Sprintf(right, "vals[0]%s") } else if fld.Parent.RepetitionType == Repeated && rep < maxRep { //need one more case: - right = fmt.Sprintf(right, fmt.Sprintf("{%s: vals[nVals]}%%s", fld.FieldName)) + right = fmt.Sprintf(right, fmt.Sprintf("{%s: vals[nVals]}%%s", fld.Name)) } else if fld.Parent.RepetitionType == Repeated { - right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[nVals]%%s", fld.FieldName)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[nVals]%%s", fld.Name)) } else { - right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[0]%%s", fld.FieldName)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[0]%%s", fld.Name)) } } else { if fld.Parent.RepetitionType == Repeated && rep < maxRep { - right = fmt.Sprintf(right, fmt.Sprintf("{%s: %s{%%s}}", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("{%s: %s{%%s}}", fld.Name, fld.Type)) } else { - right = fmt.Sprintf(right, fmt.Sprintf("%s: %s{%%s}", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: %s{%%s}", fld.Name, fld.Type)) } } case Optional: if fld.Primitive() { if f.NthChild == 0 && fld.Parent.Optional() && !fld.Parent.Repeated() { - right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[0])%%s", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[0])%%s", fld.Name, fld.Type)) } else if fld.Parent.RepetitionType == Repeated { - right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.Type)) } else if fld.Parent.Repeated() && f.NthChild == 0 { - right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[nVals])%%s", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: p%s(vals[nVals])%%s", fld.Name, fld.Type)) } else if fld.Parent.Repeated() && f.NthChild > 0 { - right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[nVals])%%s", fld.Type)) } else { - right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[0])%%s", fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("p%s(vals[0])%%s", fld.Type)) } } else { if j == 0 { - right = fmt.Sprintf(right, fmt.Sprintf("&%s{%%s}", fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("&%s{%%s}", fld.Type)) } else { - right = fmt.Sprintf(right, fmt.Sprintf("%s: &%s{%%s}", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: &%s{%%s}", fld.Name, fld.Type)) } } case Repeated: if fld.Primitive() { if rep == 0 && fld.Parent.RepetitionType == Repeated { - right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{vals[nVals]}}%%s", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{vals[nVals]}}%%s", fld.Name, fld.Type)) } else if (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { - right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.Type)) } else if rep == 0 { - right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{vals[nVals]}%%s", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{vals[nVals]}%%s", fld.Name, fld.Type)) } else if reps == rep { right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, vals[nVals])%%s", left)) } else { - right = fmt.Sprintf(right, fmt.Sprintf("[%s: []%s{vals[nVals]}]%%s", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("[%s: []%s{vals[nVals]}]%%s", fld.Name, fld.Type)) } } else { if rep == 0 && j == 0 { - right = fmt.Sprintf(right, fmt.Sprintf("[]%s{%%s}", fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{%%s}", fld.Type)) } else if rep == 0 && reps == maxRep && fld.Parent != nil && fld.Parent.RepetitionType == Repeated { - right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{%%s}}", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{%%s}}", fld.Name, fld.Type)) } else if rep == 0 && reps == maxRep { - right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.Name, fld.Type)) } else if reps == rep { - right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, %s{%%s})", left, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, %s{%%s})", left, fld.Type)) } else { - right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.FieldName, fld.FieldType)) + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.Name, fld.Type)) } } } @@ -424,17 +419,57 @@ func (f Field) Path() string { } // Primitive is called in order to determine if the field is primitive or not. + func (f Field) Primitive() bool { - return primitiveTypes[f.FieldType] -} - -var primitiveTypes = map[string]bool{ - "bool": true, - "int32": true, - "uint32": true, - "int64": true, - "uint64": true, - "float32": true, - "float64": true, - "string": true, + _, ok := primitiveTypes[f.Type] + return ok +} + +func (f Field) FieldName() string { + var op string + if f.RepetitionType == Optional || f.RepetitionType == Repeated { + op = "Optional" + } + + ft := primitiveTypes[f.Type] + return fmt.Sprintf(ft.name, op) +} + +func (f Field) ParquetType() string { + ft := primitiveTypes[f.Type] + return fmt.Sprintf(ft.name, "", "Type") +} + +func (f Field) Category() string { + var op string + if f.RepetitionType == Optional || f.RepetitionType == Repeated { + op = "Optional" + } + + ft := primitiveTypes[f.Type] + return fmt.Sprintf(ft.category, op, "Field") +} + +func (f Field) TypeName(s string, optional bool) string { + var star string + if f.RepetitionType == Optional { + star = "*" + } + return fmt.Sprintf("%s%s", star, f.Type) +} + +type fieldType struct { + name string + category string +} + +var primitiveTypes = map[string]fieldType{ + "int32": {"Int32%s%s", "numeric%s"}, + "uint32": {"Uint32%s%s", "numeric%s"}, + "int64": {"Int64%s%s", "numeric%s"}, + "uint64": {"Uint64%s%s", "numeric%s"}, + "float32": {"Float32%s%s", "numeric%s"}, + "float64": {"Float64%s%s", "numeric%s"}, + "bool": {"Bool%s%s", "bool%s"}, + "string": {"String%s%s", "string%s"}, } diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 25b03d8..cb9fb88 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -17,15 +17,15 @@ func TestNilFields(t *testing.T) { testCases := []testInput{ { - f: fields.Field{FieldName: "First", RepetitionType: fields.Optional, Parent: &fields.Field{ - FieldName: "Name", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Friends", RepetitionType: fields.Repeated}}}, + f: fields.Field{Name: "First", RepetitionType: fields.Optional, Parent: &fields.Field{ + Name: "Name", RepetitionType: fields.Required, Parent: &fields.Field{Name: "Friends", RepetitionType: fields.Repeated}}}, expected: []string{ "Friends", "Friends.Name.First", }, }, { - f: fields.Field{FieldName: "First", RepetitionType: fields.Optional, Parent: &fields.Field{FieldName: "Name", RepetitionType: fields.Required, Parent: &fields.Field{FieldName: "Friend", RepetitionType: fields.Required}}}, + f: fields.Field{Name: "First", RepetitionType: fields.Optional, Parent: &fields.Field{Name: "Name", RepetitionType: fields.Required, Parent: &fields.Field{Name: "Friend", RepetitionType: fields.Required}}}, expected: []string{ "Friend.Name.First", }, @@ -55,8 +55,8 @@ func TestInit(t *testing.T) { }{ { fields: []fields.Field{ - {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated}, + {Name: "Links", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "int64", RepetitionType: fields.Repeated}, }}, }, rep: 0, @@ -65,8 +65,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated}, + {Name: "Links", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "int64", RepetitionType: fields.Repeated}, }}, }, rep: 0, @@ -75,8 +75,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Backward", FieldType: "int64", RepetitionType: fields.Repeated}, + {Name: "Links", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "int64", RepetitionType: fields.Repeated}, }}, }, def: 2, @@ -85,8 +85,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Links", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Forward", FieldType: "int64", RepetitionType: fields.Repeated}, + {Name: "Links", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Forward", Type: "int64", RepetitionType: fields.Repeated}, }}, }, def: 2, @@ -95,9 +95,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required}, + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Code", Type: "int64", RepetitionType: fields.Required}, }}, }}, }, @@ -107,9 +107,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required}, + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Code", Type: "int64", RepetitionType: fields.Required}, }}, }}, }, @@ -119,9 +119,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required}, + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Code", Type: "int64", RepetitionType: fields.Required}, }}, }}, }, @@ -131,9 +131,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Code", FieldType: "int64", RepetitionType: fields.Required}, + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Code", Type: "int64", RepetitionType: fields.Required}, }}, }}, }, @@ -143,8 +143,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "string", RepetitionType: fields.Repeated}, }}, }, def: 1, @@ -153,8 +153,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "string", RepetitionType: fields.Repeated}, }}, }, def: 2, @@ -163,8 +163,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Backward", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Backward", Type: "string", RepetitionType: fields.Repeated}, }}, }, def: 2, @@ -173,9 +173,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Language", FieldType: "Language", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Language", Type: "Language", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }, @@ -185,9 +185,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Name", Type: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }, @@ -197,9 +197,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Language", FieldType: "Language", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Language", Type: "Language", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }, @@ -209,9 +209,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Name", Type: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }, @@ -221,10 +221,10 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Thing", FieldType: "Thing", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Codes", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Thing", Type: "Thing", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }}, @@ -235,9 +235,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", TypeName: "*string", RepetitionType: fields.Optional}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }}, }, @@ -246,9 +246,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", TypeName: "*string", RepetitionType: fields.Optional}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }}, }, @@ -257,10 +257,10 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", TypeName: "*string", RepetitionType: fields.Optional}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }}, }, @@ -269,8 +269,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Difficulty", Type: "int32", RepetitionType: fields.Optional}, }}, }, def: 1, @@ -278,8 +278,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Difficulty", Type: "int32", RepetitionType: fields.Optional}, }}, }, def: 2, @@ -287,9 +287,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, - {FieldName: "Difficulty", FieldType: "int32", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + {Name: "Difficulty", Type: "int32", RepetitionType: fields.Optional}, }}, }, def: 2, @@ -297,8 +297,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Required}, + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Required}, }}, }, def: 1, @@ -306,8 +306,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }, def: 1, @@ -315,8 +315,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }, def: 1, @@ -324,8 +324,8 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }, def: 2, @@ -333,9 +333,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }}, }, @@ -344,9 +344,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }}, }, @@ -355,10 +355,10 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }}, }, @@ -367,9 +367,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Country", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Country", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }, @@ -379,10 +379,10 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", FieldType: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Zip", FieldType: "string", RepetitionType: fields.Optional}, - {FieldName: "Country", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Zip", Type: "string", RepetitionType: fields.Optional}, + {Name: "Country", Type: "string", RepetitionType: fields.Optional}, }}, }}, }, @@ -392,10 +392,10 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "First", FieldType: "string", RepetitionType: fields.Required}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "First", Type: "string", RepetitionType: fields.Required}, }}, }}, }}, @@ -405,10 +405,10 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "First", FieldType: "string", RepetitionType: fields.Required}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "First", Type: "string", RepetitionType: fields.Required}, }}, }}, }}, @@ -418,10 +418,10 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "First", FieldType: "string", RepetitionType: fields.Required}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "First", Type: "string", RepetitionType: fields.Required}, }}, }}, }}, @@ -431,11 +431,11 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Suffix", FieldType: "string", RepetitionType: fields.Optional}, - {FieldName: "First", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Suffix", Type: "string", RepetitionType: fields.Optional}, + {Name: "First", Type: "string", RepetitionType: fields.Optional}, }}, }}, }}, @@ -446,10 +446,10 @@ func TestInit(t *testing.T) { { fields: []fields.Field{ { - FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Skills", FieldType: "Skill", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, - {FieldType: "string", TypeName: "string", FieldName: "Difficulty", RepetitionType: fields.Required}, + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Skills", Type: "Skill", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + {Type: "string", Name: "Difficulty", RepetitionType: fields.Required}, }}, }, }, @@ -459,9 +459,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Repeated}, - {FieldName: "Forward", FieldType: "int64", RepetitionType: fields.Repeated}, + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Forward", Type: "int64", RepetitionType: fields.Repeated}, }}, }, rep: 1, @@ -470,9 +470,9 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", RepetitionType: fields.Repeated}, - {FieldName: "Forward", FieldType: "string", RepetitionType: fields.Repeated}, + {Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Forward", Type: "string", RepetitionType: fields.Repeated}, }}, }, rep: 0, @@ -481,7 +481,7 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, + {Name: "LuckyNumbers", Type: "int64", RepetitionType: fields.Repeated}, }, def: 1, rep: 0, @@ -489,7 +489,7 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "LuckyNumbers", FieldType: "int64", RepetitionType: fields.Repeated}, + {Name: "LuckyNumbers", Type: "int64", RepetitionType: fields.Repeated}, }, def: 1, rep: 1, @@ -497,12 +497,12 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "A", FieldType: "A", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "B", FieldType: "B", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "C", FieldType: "C", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "D", FieldType: "D", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "E", FieldType: "E", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "F", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "A", Type: "A", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "B", Type: "B", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "C", Type: "C", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "D", Type: "D", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "E", Type: "E", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "F", Type: "string", RepetitionType: fields.Optional}, }}, }}, }}, @@ -515,13 +515,13 @@ func TestInit(t *testing.T) { }, { fields: []fields.Field{ - {FieldName: "A", FieldType: "A", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "B", FieldType: "B", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "C", FieldType: "C", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "D", FieldType: "D", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "E", FieldType: "E", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "x", FieldType: "string", RepetitionType: fields.Optional}, - {FieldName: "F", FieldType: "string", RepetitionType: fields.Optional}, + {Name: "A", Type: "A", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "B", Type: "B", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "C", Type: "C", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "D", Type: "D", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "E", Type: "E", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "x", Type: "string", RepetitionType: fields.Optional}, + {Name: "F", Type: "string", RepetitionType: fields.Optional}, }}, }}, }}, diff --git a/internal/parse/fields_test.go b/internal/parse/fields_test.go index 03308f9..11dc7f4 100644 --- a/internal/parse/fields_test.go +++ b/internal/parse/fields_test.go @@ -31,8 +31,8 @@ func TestFields(t *testing.T) { typ: "Being", expected: fields.Field{ Children: []fields.Field{ - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, }, }, }, @@ -41,8 +41,8 @@ func TestFields(t *testing.T) { typ: "Private", expected: fields.Field{ Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, }, }, }, @@ -51,11 +51,11 @@ func TestFields(t *testing.T) { typ: "Nested", expected: fields.Field{ Children: []fields.Field{ - {Type: "Being", TypeName: "Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {Name: "Being", Type: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, }}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, }, }, errors: []error{}, @@ -65,11 +65,11 @@ func TestFields(t *testing.T) { typ: "Nested2", expected: fields.Field{ Children: []fields.Field{ - {Type: "Being", TypeName: "Being", FieldName: "Info", FieldType: "Being", ColumnName: "Info", RepetitionType: fields.Required, Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "Being", Name: "Info", ColumnName: "Info", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, }}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, }, }, errors: []error{}, @@ -79,15 +79,13 @@ func TestFields(t *testing.T) { typ: "DoubleNested", expected: fields.Field{ Children: []fields.Field{ - { - Type: "Nested", FieldName: "Nested", FieldType: "Nested", ColumnName: "Nested", TypeName: "Nested", - Children: []fields.Field{ - {Type: "Being", TypeName: "Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, - }}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, - }, + {Type: "Nested", Name: "Nested", ColumnName: "Nested", Children: []fields.Field{ + {Type: "Being", Name: "Being", ColumnName: "Being", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, }, }, }, @@ -98,15 +96,13 @@ func TestFields(t *testing.T) { typ: "OptionalDoubleNested", expected: fields.Field{ Children: []fields.Field{ - { - Type: "OptionalNested", FieldName: "OptionalNested", FieldType: "OptionalNested", ColumnName: "OptionalNested", TypeName: "OptionalNested", - Children: []fields.Field{ - {Type: "Being", TypeName: "*Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, - }}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, - }, + {Type: "OptionalNested", Name: "OptionalNested", ColumnName: "OptionalNested", Children: []fields.Field{ + {Type: "Being", Name: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + }}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, + }, }, }, }, @@ -117,11 +113,11 @@ func TestFields(t *testing.T) { typ: "OptionalNested", expected: fields.Field{ Children: []fields.Field{ - {Type: "Being", TypeName: "*Being", FieldName: "Being", FieldType: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "Being", Name: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, }}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, }, }, errors: []error{}, @@ -131,10 +127,10 @@ func TestFields(t *testing.T) { typ: "OptionalNested2", expected: fields.Field{ Children: []fields.Field{ - {Type: "Thing", TypeName: "*Thing", FieldName: "Being", FieldType: "Thing", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ - {ParquetType: "StringType", TypeName: "string", FieldName: "Name", FieldType: "string", ColumnName: "Name", Category: "string", RepetitionType: fields.Required}, + {Type: "Thing", Name: "Being", ColumnName: "Being", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", ColumnName: "Name", RepetitionType: fields.Required}, }}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, }, }, errors: []error{}, @@ -145,8 +141,8 @@ func TestFields(t *testing.T) { errors: []error{fmt.Errorf("unsupported type &{time Time}")}, expected: fields.Field{ Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, }, }, }, @@ -155,10 +151,10 @@ func TestFields(t *testing.T) { typ: "SupportedAndUnsupported", expected: fields.Field{ Children: []fields.Field{ - {ParquetType: "Int64Type", TypeName: "int64", FieldName: "Happiness", FieldType: "int64", ColumnName: "Happiness", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "int64", Name: "Happiness", ColumnName: "Happiness", RepetitionType: fields.Required}, + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, }, }, errors: []error{ @@ -171,16 +167,16 @@ func TestFields(t *testing.T) { typ: "Person", expected: fields.Field{ Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, - {ParquetType: "Int64Type", TypeName: "int64", FieldName: "Happiness", FieldType: "int64", ColumnName: "Happiness", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int64Type", TypeName: "*int64", FieldName: "Sadness", FieldType: "int64", ColumnName: "Sadness", Category: "numericOptional", RepetitionType: fields.Optional}, - {ParquetType: "StringType", TypeName: "string", FieldName: "Code", FieldType: "string", ColumnName: "Code", Category: "string", RepetitionType: fields.Required}, - {ParquetType: "Float32Type", TypeName: "float32", FieldType: "float32", FieldName: "Funkiness", ColumnName: "Funkiness", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Float32Type", TypeName: "*float32", FieldType: "float32", FieldName: "Lameness", ColumnName: "Lameness", Category: "numericOptional", RepetitionType: fields.Optional}, - {ParquetType: "BoolType", TypeName: "*bool", FieldType: "bool", FieldName: "Keen", ColumnName: "Keen", Category: "boolOptional", RepetitionType: fields.Optional}, - {ParquetType: "Uint32Type", TypeName: "uint32", FieldType: "uint32", FieldName: "Birthday", ColumnName: "Birthday", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + {Type: "int64", Name: "Happiness", ColumnName: "Happiness", RepetitionType: fields.Required}, + {Type: "int64", Name: "Sadness", ColumnName: "Sadness", RepetitionType: fields.Optional}, + {Type: "string", Name: "Code", ColumnName: "Code", RepetitionType: fields.Required}, + {Type: "float32", Name: "Funkiness", ColumnName: "Funkiness", RepetitionType: fields.Required}, + {Type: "float32", Name: "Lameness", ColumnName: "Lameness", RepetitionType: fields.Optional}, + {Type: "bool", Name: "Keen", ColumnName: "Keen", RepetitionType: fields.Optional}, + {Type: "uint32", Name: "Birthday", ColumnName: "Birthday", RepetitionType: fields.Required}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, }, }, }, @@ -189,16 +185,16 @@ func TestFields(t *testing.T) { typ: "NewOrderPerson", expected: fields.Field{ Children: []fields.Field{ - {ParquetType: "Int64Type", TypeName: "int64", FieldName: "Happiness", FieldType: "int64", ColumnName: "Happiness", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int64Type", TypeName: "*int64", FieldName: "Sadness", FieldType: "int64", ColumnName: "Sadness", Category: "numericOptional", RepetitionType: fields.Optional}, - {ParquetType: "StringType", TypeName: "string", FieldName: "Code", FieldType: "string", ColumnName: "Code", Category: "string", RepetitionType: fields.Required}, - {ParquetType: "Float32Type", TypeName: "float32", FieldType: "float32", FieldName: "Funkiness", ColumnName: "Funkiness", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Float32Type", TypeName: "*float32", FieldType: "float32", FieldName: "Lameness", ColumnName: "Lameness", Category: "numericOptional", RepetitionType: fields.Optional}, - {ParquetType: "BoolType", TypeName: "*bool", FieldType: "bool", FieldName: "Keen", ColumnName: "Keen", Category: "boolOptional", RepetitionType: fields.Optional}, - {ParquetType: "Uint32Type", TypeName: "uint32", FieldType: "uint32", FieldName: "Birthday", ColumnName: "Birthday", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "ID", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, - {ParquetType: "Uint64Type", TypeName: "*uint64", FieldName: "Anniversary", FieldType: "uint64", ColumnName: "Anniversary", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "int64", Name: "Happiness", ColumnName: "Happiness", RepetitionType: fields.Required}, + {Type: "int64", Name: "Sadness", ColumnName: "Sadness", RepetitionType: fields.Optional}, + {Type: "string", Name: "Code", ColumnName: "Code", RepetitionType: fields.Required}, + {Type: "float32", Name: "Funkiness", ColumnName: "Funkiness", RepetitionType: fields.Required}, + {Type: "float32", Name: "Lameness", ColumnName: "Lameness", RepetitionType: fields.Optional}, + {Type: "bool", Name: "Keen", ColumnName: "Keen", RepetitionType: fields.Optional}, + {Type: "uint32", Name: "Birthday", ColumnName: "Birthday", RepetitionType: fields.Required}, + {Type: "int32", Name: "ID", ColumnName: "ID", RepetitionType: fields.Required}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, + {Type: "uint64", Name: "Anniversary", ColumnName: "Anniversary", RepetitionType: fields.Optional}, }, }, }, @@ -207,8 +203,8 @@ func TestFields(t *testing.T) { typ: "Tagged", expected: fields.Field{ Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, - {ParquetType: "StringType", TypeName: "string", FieldName: "Name", FieldType: "string", ColumnName: "name", Category: "string", RepetitionType: fields.Required}, + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "string", Name: "Name", ColumnName: "name", RepetitionType: fields.Required}, }, }, }, @@ -217,7 +213,7 @@ func TestFields(t *testing.T) { typ: "IgnoreMe", expected: fields.Field{ Children: []fields.Field{ - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", FieldType: "int32", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, }, }, }, @@ -226,7 +222,7 @@ func TestFields(t *testing.T) { typ: "Slice", expected: fields.Field{ Children: []fields.Field{ - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "IDs", ColumnName: "ids", Category: "numericOptional", RepetitionType: fields.Repeated}, + {Type: "int32", Name: "IDs", ColumnName: "ids", RepetitionType: fields.Repeated}, }, }, }, @@ -235,8 +231,8 @@ func TestFields(t *testing.T) { typ: "Slice2", expected: fields.Field{ Children: []fields.Field{ - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "IDs", ColumnName: "ids", Category: "numericOptional", RepetitionType: fields.Repeated}, + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "int32", Name: "IDs", ColumnName: "ids", RepetitionType: fields.Repeated}, }, }, }, @@ -245,9 +241,9 @@ func TestFields(t *testing.T) { typ: "Slice3", expected: fields.Field{ Children: []fields.Field{ - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "IDs", ColumnName: "ids", Category: "numericOptional", RepetitionType: fields.Repeated}, - {ParquetType: "Int32Type", TypeName: "*int32", FieldName: "Age", FieldType: "int32", ColumnName: "Age", Category: "numericOptional", RepetitionType: fields.Optional}, + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "int32", Name: "IDs", ColumnName: "ids", RepetitionType: fields.Repeated}, + {Type: "int32", Name: "Age", ColumnName: "Age", RepetitionType: fields.Optional}, }, }, }, @@ -256,10 +252,10 @@ func TestFields(t *testing.T) { typ: "Slice4", expected: fields.Field{ Children: []fields.Field{ - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, - {Type: "Hobby", TypeName: "Hobby", FieldName: "Hobbies", FieldType: "Hobby", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ - {ParquetType: "StringType", TypeName: "string", FieldName: "Name", FieldType: "string", ColumnName: "Name", Category: "string", RepetitionType: fields.Required}, - {ParquetType: "Int32Type", TypeName: "int32", FieldName: "Difficulty", FieldType: "int32", ColumnName: "Difficulty", Category: "numeric", RepetitionType: fields.Required}, + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "Hobby", Name: "Hobbies", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Name", ColumnName: "Name", RepetitionType: fields.Required}, + {Type: "int32", Name: "Difficulty", ColumnName: "Difficulty", RepetitionType: fields.Required}, }}, }, }, @@ -269,9 +265,9 @@ func TestFields(t *testing.T) { typ: "Slice5", expected: fields.Field{ Children: []fields.Field{ - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, - {Type: "Hobby2", TypeName: "Hobby2", FieldName: "Hobby", FieldType: "Hobby2", ColumnName: "hobby", RepetitionType: fields.Required, Children: []fields.Field{ - {ParquetType: "StringType", TypeName: "string", FieldName: "Names", FieldType: "string", ColumnName: "names", Category: "stringOptional", RepetitionType: fields.Repeated}, + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "Hobby2", Name: "Hobby", ColumnName: "hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Names", ColumnName: "names", RepetitionType: fields.Repeated}, }}, }, }, @@ -281,9 +277,9 @@ func TestFields(t *testing.T) { typ: "Slice6", expected: fields.Field{ Children: []fields.Field{ - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, - {Type: "Hobby2", TypeName: "Hobby2", FieldName: "Hobbies", FieldType: "Hobby2", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ - {ParquetType: "StringType", TypeName: "string", FieldName: "Names", FieldType: "string", ColumnName: "names", Category: "stringOptional", RepetitionType: fields.Repeated}, + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "Hobby2", Name: "Hobbies", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Names", ColumnName: "names", RepetitionType: fields.Repeated}, }}, }, }, @@ -293,10 +289,10 @@ func TestFields(t *testing.T) { typ: "Slice7", expected: fields.Field{ Children: []fields.Field{ - {Type: "Slice6", TypeName: "*Slice6", FieldName: "Thing", FieldType: "Slice6", ColumnName: "thing", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int32", ParquetType: "Int32Type", TypeName: "int32", FieldName: "ID", ColumnName: "id", Category: "numeric", RepetitionType: fields.Required}, - {Type: "Hobby2", TypeName: "Hobby2", FieldName: "Hobbies", FieldType: "Hobby2", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ - {ParquetType: "StringType", TypeName: "string", FieldName: "Names", FieldType: "string", ColumnName: "names", Category: "stringOptional", RepetitionType: fields.Repeated}, + {Type: "Slice6", Name: "Thing", ColumnName: "thing", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "ID", ColumnName: "id", RepetitionType: fields.Required}, + {Type: "Hobby2", Name: "Hobbies", ColumnName: "hobbies", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Names", ColumnName: "names", RepetitionType: fields.Repeated}, }}, }}, }, @@ -307,17 +303,17 @@ func TestFields(t *testing.T) { typ: "Document", expected: fields.Field{ Children: []fields.Field{ - {FieldType: "int64", ParquetType: "Int64Type", TypeName: "int64", FieldName: "DocID", ColumnName: "DocID", Category: "numeric", RepetitionType: fields.Required}, - {Type: "Link", TypeName: "Link", FieldName: "Links", FieldType: "Link", ColumnName: "Links", RepetitionType: fields.Repeated, Children: []fields.Field{ - {TypeName: "int64", ParquetType: "Int64Type", FieldName: "Backward", FieldType: "int64", ColumnName: "Backward", Category: "numericOptional", RepetitionType: fields.Repeated}, - {TypeName: "int64", ParquetType: "Int64Type", FieldName: "Forward", FieldType: "int64", ColumnName: "Forward", Category: "numericOptional", RepetitionType: fields.Repeated}, + {Type: "int64", Name: "DocID", ColumnName: "DocID", RepetitionType: fields.Required}, + {Type: "Link", Name: "Links", ColumnName: "Links", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "int64", Name: "Backward", ColumnName: "Backward", RepetitionType: fields.Repeated}, + {Type: "int64", Name: "Forward", ColumnName: "Forward", RepetitionType: fields.Repeated}, }}, - {Type: "Name", TypeName: "Name", FieldName: "Names", FieldType: "Name", ColumnName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Type: "Language", TypeName: "Language", FieldName: "Languages", FieldType: "Language", ColumnName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ - {TypeName: "string", ParquetType: "StringType", FieldName: "Code", FieldType: "string", ColumnName: "Code", Category: "string", RepetitionType: fields.Required}, - {TypeName: "*string", ParquetType: "StringType", FieldName: "Country", FieldType: "string", ColumnName: "Country", Category: "stringOptional", RepetitionType: fields.Optional}, + {Type: "Name", Name: "Names", ColumnName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "Language", Name: "Languages", ColumnName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Code", ColumnName: "Code", RepetitionType: fields.Required}, + {Type: "string", Name: "Country", ColumnName: "Country", RepetitionType: fields.Optional}, }}, - {TypeName: "*string", ParquetType: "StringType", FieldName: "URL", FieldType: "string", ColumnName: "URL", Category: "stringOptional", RepetitionType: fields.Optional}, + {Type: "string", Name: "URL", ColumnName: "URL", RepetitionType: fields.Optional}, }}, }, }, @@ -327,10 +323,10 @@ func TestFields(t *testing.T) { typ: "A", expected: fields.Field{ Children: []fields.Field{ - {FieldName: "D", FieldType: "int32", TypeName: "int32", ParquetType: "Int32Type", ColumnName: "D", Category: "numeric", RepetitionType: fields.Required}, - {FieldName: "C", FieldType: "string", TypeName: "string", ParquetType: "StringType", ColumnName: "C", Category: "string", RepetitionType: fields.Required}, - {FieldName: "B", FieldType: "bool", TypeName: "bool", ParquetType: "BoolType", ColumnName: "B", Category: "bool", RepetitionType: fields.Required}, - {FieldName: "Name", FieldType: "string", TypeName: "string", ParquetType: "StringType", ColumnName: "Name", Category: "string", RepetitionType: fields.Required}, + {Name: "D", Type: "int32", ColumnName: "D", RepetitionType: fields.Required}, + {Name: "C", Type: "string", ColumnName: "C", RepetitionType: fields.Required}, + {Name: "B", Type: "bool", ColumnName: "B", RepetitionType: fields.Required}, + {Name: "Name", Type: "string", ColumnName: "Name", RepetitionType: fields.Required}, }, }, }, @@ -340,6 +336,11 @@ func TestFields(t *testing.T) { t.Run(fmt.Sprintf("%02d %s", i, tc.name), func(t *testing.T) { out, err := parse.Fields(tc.typ, "./parse_test.go") assert.Nil(t, err, tc.name) + + if len(tc.errors) == 0 { + tc.errors = nil + } + if !assert.Equal(t, tc.errors, out.Errors, tc.name) { return } diff --git a/internal/parse/parse.go b/internal/parse/parse.go index 44a01a2..2243adf 100644 --- a/internal/parse/parse.go +++ b/internal/parse/parse.go @@ -69,7 +69,7 @@ func Fields(typ, pth string) (*Result, error) { errs := getChildren(&parent, fields) return &Result{ - Parent: flds.Field{Children: parent.Children}, + Parent: flds.Field{Type: typ, Children: parent.Children}, Errors: errs, }, nil } @@ -77,9 +77,9 @@ func Fields(typ, pth string) (*Result, error) { func getChildren(parent *flds.Field, fields map[string]flds.Field) []error { var children []flds.Field var errs []error - p, ok := fields[parent.FieldType] + p, ok := fields[parent.Type] if !ok { - errs = append(errs, fmt.Errorf("could not find %+v", parent)) + errs = append(errs, fmt.Errorf("could not find %s", parent.Type)) } for _, child := range p.Children { @@ -88,16 +88,19 @@ func getChildren(parent *flds.Field, fields map[string]flds.Field) []error { continue } - f, ok := fields[child.FieldType] + f, ok := fields[child.Type] if !ok { - errs = append(errs, fmt.Errorf("unsupported type %+v", child.FieldType)) - continue + f, ok = fields[child.Type] + if !ok { + errs = append(errs, fmt.Errorf("unsupported type %+v", child.Type)) + continue + } } errs = append(errs, getChildren(&child, fields)...) - f.FieldName = child.FieldName - f.TypeName = child.TypeName + f.Name = child.Name + f.Type = child.Type f.ColumnName = child.ColumnName f.Children = child.Children f.RepetitionType = child.RepetitionType @@ -127,17 +130,13 @@ func isPrivate(x *ast.Field) bool { func getFields(n map[string]ast.Node) (map[string]fields.Field, error) { fields := map[string]flds.Field{} for k, n := range n { - x, ok := n.(*ast.TypeSpec) + _, ok := n.(*ast.TypeSpec) if !ok { continue } parent := flds.Field{ - Type: x.Name.Name, - TypeName: x.Name.Name, - ColumnName: x.Name.Name, - FieldName: x.Name.Name, - FieldType: x.Name.Name, + Type: k, } ast.Inspect(n, func(n ast.Node) bool { @@ -206,8 +205,6 @@ func getField(name string, x ast.Node, parent *flds.Field) (flds.Field, bool) { tag = name } - _, cat, pt, _ := lookupTypeAndCategory(typ, optional, repeated) - rt := fields.Required if repeated { rt = fields.Repeated @@ -216,13 +213,9 @@ func getField(name string, x ast.Node, parent *flds.Field) (flds.Field, bool) { } return flds.Field{ - FieldName: name, - FieldType: typ, - ColumnName: tag, - TypeName: getTypeName(typ, optional), - //Type: fn, - ParquetType: pt, - Category: cat, + Type: typ, + Name: name, + ColumnName: tag, RepetitionType: rt, }, tag == "-" } @@ -236,42 +229,6 @@ func parseTag(t string) string { return t[:strings.Index(t, `"`)] } -func getTypeName(s string, optional bool) string { - var star string - if optional { - star = "*" - } - return fmt.Sprintf("%s%s", star, s) -} - -func lookupTypeAndCategory(name string, optional, repeated bool) (string, string, string, bool) { - var op string - if optional || repeated { - op = "Optional" - } - f, ok := types[name] - if !ok { - return "", "", "", false - } - return fmt.Sprintf(f.name, op, "Field"), fmt.Sprintf(f.category, op), fmt.Sprintf(f.name, "", "Type"), true -} - -type fieldType struct { - name string - category string -} - -var types = map[string]fieldType{ - "int32": {"Int32%s%s", "numeric%s"}, - "uint32": {"Uint32%s%s", "numeric%s"}, - "int64": {"Int64%s%s", "numeric%s"}, - "uint64": {"Uint64%s%s", "numeric%s"}, - "float32": {"Float32%s%s", "numeric%s"}, - "float64": {"Float64%s%s", "numeric%s"}, - "bool": {"Bool%s%s", "bool%s"}, - "string": {"String%s%s", "string%s"}, -} - type visitorFunc func(n ast.Node) ast.Visitor func (f visitorFunc) Visit(n ast.Node) ast.Visitor { @@ -295,10 +252,20 @@ func (f *finder) findTypes(n ast.Node) ast.Visitor { return visitorFunc(f.findTypes) } case *ast.TypeSpec: - //fmt.Printf("node: %+v\n", n) f.n[n.Name.Name] = n return visitorFunc(f.findTypes) } return nil } + +var types = map[string]bool{ + "int32": true, + "uint32": true, + "int64": true, + "uint64": true, + "float32": true, + "float64": true, + "bool": true, + "string": true, +} From 9ac601c91ee8b99ee1eeb2ef506c216fc14b6bd1 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Sun, 13 Jun 2021 06:03:39 -0600 Subject: [PATCH 12/25] wip --- internal/dremel/dremel.go | 2 +- internal/dremel/dremel_test.go | 336 +++--- internal/dremel/read.go | 6 +- internal/dremel/read_repeated.go | 4 +- internal/dremel/read_test.go | 112 +- internal/dremel/testcases/doc/generated.go | 981 ------------------ internal/dremel/testcases/person/generated.go | 862 --------------- internal/dremel/write_test.go | 166 +-- internal/fields/fields.go | 8 +- internal/fields/fields_test.go | 6 +- internal/gen/funcs.go | 8 +- internal/gen/gen.go | 4 +- 12 files changed, 327 insertions(+), 2168 deletions(-) delete mode 100644 internal/dremel/testcases/doc/generated.go delete mode 100644 internal/dremel/testcases/person/generated.go diff --git a/internal/dremel/dremel.go b/internal/dremel/dremel.go index 2d10853..38de5cb 100644 --- a/internal/dremel/dremel.go +++ b/internal/dremel/dremel.go @@ -42,5 +42,5 @@ func Read(f fields.Field) string { func writeRequired(f fields.Field) string { return fmt.Sprintf(`func %s(x *%s, vals []%s) { x.%s = vals[0] -}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), f.StructType(), f.TypeName, strings.Join(f.FieldNames(), ".")) +}`, fmt.Sprintf("write%s", strings.Join(f.FieldNames(), "")), f.StructType(), f.TypeName(), strings.Join(f.FieldNames(), ".")) } diff --git a/internal/dremel/dremel_test.go b/internal/dremel/dremel_test.go index 7048c0e..d1810f4 100644 --- a/internal/dremel/dremel_test.go +++ b/internal/dremel/dremel_test.go @@ -1,173 +1,173 @@ package dremel_test -import ( - "bytes" - "log" - "testing" - - "github.com/parsyl/parquet/internal/dremel/testcases/doc" - "github.com/parsyl/parquet/internal/dremel/testcases/person" - "github.com/stretchr/testify/assert" -) - -var ( - dremelDocs = []doc.Document{ - { - DocID: 10, - Links: &doc.Link{ - Forward: []int64{20, 40, 60}, - }, - Names: []doc.Name{ - { - Languages: []doc.Language{ - {Code: "en-us", Country: pstring("us")}, - {Code: "en"}, - }, - URL: pstring("http://A"), - }, - { - URL: pstring("http://B"), - }, - { - Languages: []doc.Language{ - {Code: "en-gb", Country: pstring("gb")}, - }, - }, - }, - }, - { - DocID: 20, - Links: &doc.Link{ - Backward: []int64{10, 30}, - Forward: []int64{80}, - }, - Names: []doc.Name{ - { - URL: pstring("http://C"), - }, - }, - }, - } -) - -// TestLevels verifies that the example from the dremel paper -// results in the correct definition and repetition levels. -func TestLevels(t *testing.T) { - var buf bytes.Buffer - pw, err := doc.NewParquetWriter(&buf) - if err != nil { - assert.NoError(t, err) - } - - for _, doc := range dremelDocs { - pw.Add(doc) - } - - if err := pw.Write(); err != nil { - assert.NoError(t, err) - } - - pw.Close() - - pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) - if err != nil { - assert.NoError(t, err) - } - - expected := []doc.Levels{ - {Name: "docid"}, - {Name: "link.backward", Defs: []uint8{1, 2, 2}, Reps: []uint8{0, 0, 1}}, - {Name: "link.forward", Defs: []uint8{2, 2, 2, 2}, Reps: []uint8{0, 1, 1, 0}}, - {Name: "names.languages.code", Defs: []uint8{2, 2, 1, 2, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, - {Name: "names.languages.country", Defs: []uint8{3, 2, 1, 3, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, - {Name: "names.url", Defs: []uint8{2, 2, 1, 2}, Reps: []uint8{0, 1, 1, 0}}, - } - - assert.Equal(t, expected, pr.Levels()) -} - -var ( - people = []person.Person{ - { - Name: "peep", - Hobby: &person.Hobby{ - Name: "napping", - Difficulty: pint32(10), - Skills: []person.Skill{ - {Name: "meditation", Difficulty: "very"}, - {Name: "calmness", Difficulty: "so-so"}, - }, - }, - }, - } -) - -func TestPersonLevels(t *testing.T) { - var buf bytes.Buffer - pw, err := person.NewParquetWriter(&buf) - if err != nil { - assert.NoError(t, err) - } - - for _, p := range people { - pw.Add(p) - } - - if err := pw.Write(); err != nil { - assert.NoError(t, err) - } - - pw.Close() - - pr, err := person.NewParquetReader(bytes.NewReader(buf.Bytes())) - if err != nil { - assert.NoError(t, err) - } - - expected := []person.Levels{ - {Name: "name"}, - {Name: "hobby.name", Defs: []uint8{1}}, - {Name: "hobby.difficulty", Defs: []uint8{2}}, - {Name: "hobby.skills.name", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, - {Name: "hobby.skills.difficulty", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, - } - - assert.Equal(t, expected, pr.Levels()) -} - -// TestDremel uses the example from the dremel paper and writes then -// reads from a parquet file to make sure nested fields work correctly. -func TestDremel(t *testing.T) { - var buf bytes.Buffer - pw, err := doc.NewParquetWriter(&buf) - if err != nil { - log.Fatal(err) - } - - for _, doc := range dremelDocs { - pw.Add(doc) - } - - if err := pw.Write(); err != nil { - log.Fatal(err) - } - - pw.Close() - - pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) - if err != nil { - log.Fatal(err) - } - - var out []doc.Document - for pr.Next() { - var d doc.Document - pr.Scan(&d) - out = append(out, d) - } - - assert.Equal(t, dremelDocs, out) -} +// import ( +// "bytes" +// "log" +// "testing" + +// "github.com/parsyl/parquet/internal/dremel/testcases/doc" +// "github.com/parsyl/parquet/internal/dremel/testcases/person" +// "github.com/stretchr/testify/assert" +// ) + +// var ( +// dremelDocs = []doc.Document{ +// { +// DocID: 10, +// Links: &doc.Link{ +// Forward: []int64{20, 40, 60}, +// }, +// Names: []doc.Name{ +// { +// Languages: []doc.Language{ +// {Code: "en-us", Country: pstring("us")}, +// {Code: "en"}, +// }, +// URL: pstring("http://A"), +// }, +// { +// URL: pstring("http://B"), +// }, +// { +// Languages: []doc.Language{ +// {Code: "en-gb", Country: pstring("gb")}, +// }, +// }, +// }, +// }, +// { +// DocID: 20, +// Links: &doc.Link{ +// Backward: []int64{10, 30}, +// Forward: []int64{80}, +// }, +// Names: []doc.Name{ +// { +// URL: pstring("http://C"), +// }, +// }, +// }, +// } +// ) + +// // TestLevels verifies that the example from the dremel paper +// // results in the correct definition and repetition levels. +// func TestLevels(t *testing.T) { +// var buf bytes.Buffer +// pw, err := doc.NewParquetWriter(&buf) +// if err != nil { +// assert.NoError(t, err) +// } + +// for _, doc := range dremelDocs { +// pw.Add(doc) +// } + +// if err := pw.Write(); err != nil { +// assert.NoError(t, err) +// } + +// pw.Close() + +// pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) +// if err != nil { +// assert.NoError(t, err) +// } + +// expected := []doc.Levels{ +// {Name: "docid"}, +// {Name: "link.backward", Defs: []uint8{1, 2, 2}, Reps: []uint8{0, 0, 1}}, +// {Name: "link.forward", Defs: []uint8{2, 2, 2, 2}, Reps: []uint8{0, 1, 1, 0}}, +// {Name: "names.languages.code", Defs: []uint8{2, 2, 1, 2, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, +// {Name: "names.languages.country", Defs: []uint8{3, 2, 1, 3, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, +// {Name: "names.url", Defs: []uint8{2, 2, 1, 2}, Reps: []uint8{0, 1, 1, 0}}, +// } + +// assert.Equal(t, expected, pr.Levels()) +// } + +// var ( +// people = []person.Person{ +// { +// Name: "peep", +// Hobby: &person.Hobby{ +// Name: "napping", +// Difficulty: pint32(10), +// Skills: []person.Skill{ +// {Name: "meditation", Difficulty: "very"}, +// {Name: "calmness", Difficulty: "so-so"}, +// }, +// }, +// }, +// } +// ) + +// func TestPersonLevels(t *testing.T) { +// var buf bytes.Buffer +// pw, err := person.NewParquetWriter(&buf) +// if err != nil { +// assert.NoError(t, err) +// } + +// for _, p := range people { +// pw.Add(p) +// } + +// if err := pw.Write(); err != nil { +// assert.NoError(t, err) +// } + +// pw.Close() + +// pr, err := person.NewParquetReader(bytes.NewReader(buf.Bytes())) +// if err != nil { +// assert.NoError(t, err) +// } + +// expected := []person.Levels{ +// {Name: "name"}, +// {Name: "hobby.name", Defs: []uint8{1}}, +// {Name: "hobby.difficulty", Defs: []uint8{2}}, +// {Name: "hobby.skills.name", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, +// {Name: "hobby.skills.difficulty", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, +// } + +// assert.Equal(t, expected, pr.Levels()) +// } + +// // TestDremel uses the example from the dremel paper and writes then +// // reads from a parquet file to make sure nested fields work correctly. +// func TestDremel(t *testing.T) { +// var buf bytes.Buffer +// pw, err := doc.NewParquetWriter(&buf) +// if err != nil { +// log.Fatal(err) +// } + +// for _, doc := range dremelDocs { +// pw.Add(doc) +// } + +// if err := pw.Write(); err != nil { +// log.Fatal(err) +// } + +// pw.Close() + +// pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) +// if err != nil { +// log.Fatal(err) +// } + +// var out []doc.Document +// for pr.Next() { +// var d doc.Document +// pr.Scan(&d) +// out = append(out, d) +// } + +// assert.Equal(t, dremelDocs, out) +// } func pstring(s string) *string { return &s diff --git a/internal/dremel/read.go b/internal/dremel/read.go index d6dff4c..f75e15e 100644 --- a/internal/dremel/read.go +++ b/internal/dremel/read.go @@ -10,7 +10,7 @@ import ( func readRequired(f fields.Field) string { return fmt.Sprintf(`func read%s(x %s) %s { return x.%s -}`, strings.Join(f.FieldNames(), ""), f.StructType(), f.TypeName, strings.Join(f.FieldNames(), ".")) +}`, strings.Join(f.FieldNames(), ""), f.StructType(), f.TypeName(), strings.Join(f.FieldNames(), ".")) } func readOptional(f fields.Field) string { @@ -29,13 +29,13 @@ func readOptional(f fields.Field) string { } out += fmt.Sprintf(` default: - return []%s{%sx.%s}, []uint8{%d}, nil`, cleanTypeName(f.TypeName), ptr, nilField(n, f), n) + return []%s{%sx.%s}, []uint8{%d}, nil`, cleanTypeName(f.Type), ptr, nilField(n, f), n) return fmt.Sprintf(`func read%s(x %s) ([]%s, []uint8, []uint8) { switch { %s } - }`, strings.Join(f.FieldNames(), ""), f.StructType(), cleanTypeName(f.TypeName), out) + }`, strings.Join(f.FieldNames(), ""), f.StructType(), cleanTypeName(f.Type), out) } func cleanTypeName(s string) string { diff --git a/internal/dremel/read_repeated.go b/internal/dremel/read_repeated.go index ce19814..c7aadcc 100644 --- a/internal/dremel/read_repeated.go +++ b/internal/dremel/read_repeated.go @@ -71,8 +71,8 @@ func readRepeated(f fields.Field) string { }`, strings.Join(f.FieldNames(), ""), f.StructType(), - cleanTypeName(f.TypeName), - cleanTypeName(f.TypeName), + cleanTypeName(f.Type), + cleanTypeName(f.Type), doReadRepeated(f, 0, "x"), ) } diff --git a/internal/dremel/read_test.go b/internal/dremel/read_test.go index 2c867ef..1624ab1 100644 --- a/internal/dremel/read_test.go +++ b/internal/dremel/read_test.go @@ -20,7 +20,7 @@ func TestRead(t *testing.T) { { name: "required and not nested", f: fields.Field{ - FieldType: "int32", TypeName: "int32", FieldName: "ID", RepetitionType: fields.Required, + Type: "int32", Name: "ID", RepetitionType: fields.Required, }, result: `func readID(x Person) int32 { return x.ID @@ -30,7 +30,7 @@ func TestRead(t *testing.T) { name: "optional and not nested", ////f: fields.Field{Type: "Person", TypeName: "*int32", FieldNames: []string{"ID"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, f: fields.Field{ - FieldType: "int32", TypeName: "*int32", FieldName: "ID", RepetitionType: fields.Optional, + Type: "int32", Name: "ID", RepetitionType: fields.Optional, }, result: `func readID(x Person) ([]int32, []uint8, []uint8) { switch { @@ -44,9 +44,9 @@ func TestRead(t *testing.T) { { name: "required and nested", f: fields.Field{ - FieldName: "Other", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Required}, + Name: "Other", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Required}, }}, }, }, @@ -57,8 +57,8 @@ func TestRead(t *testing.T) { { name: "optional and nested", f: fields.Field{ - FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int32", TypeName: "*int32", FieldName: "Difficulty", RepetitionType: fields.Optional}, + Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Optional}, }, }, result: `func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { @@ -75,8 +75,8 @@ func TestRead(t *testing.T) { { name: "mix of optional and required and nested", f: fields.Field{ - FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, }, }, result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { @@ -91,8 +91,8 @@ func TestRead(t *testing.T) { { name: "mix of optional and required and nested v2", f: fields.Field{ - FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Optional}, + Name: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, }, }, result: `func readHobbyName(x Person) ([]string, []uint8, []uint8) { @@ -107,9 +107,9 @@ func TestRead(t *testing.T) { { name: "mix of optional and require and nested 3 deep", f: fields.Field{ - FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Optional}, + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, }}, }, }, @@ -127,9 +127,9 @@ func TestRead(t *testing.T) { { name: "mix of optional and require and nested 3 deep v2", f: fields.Field{ - FieldName: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Optional}, + Name: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, }}, }, }, @@ -147,9 +147,9 @@ func TestRead(t *testing.T) { { name: "mix of optional and require and nested 3 deep v3", f: fields.Field{ - FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, }}, }, }, @@ -167,9 +167,9 @@ func TestRead(t *testing.T) { { name: "nested 3 deep all optional", f: fields.Field{ - FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Optional}, + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, }}, }, }, @@ -189,10 +189,10 @@ func TestRead(t *testing.T) { { name: "four deep", f: fields.Field{ - FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "First", RepetitionType: fields.Optional}, + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, }}, }}, }, @@ -215,10 +215,10 @@ func TestRead(t *testing.T) { { name: "four deep mixed", f: fields.Field{ - FieldName: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "First", RepetitionType: fields.Optional}, + Name: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, }}, }}, }, @@ -239,10 +239,10 @@ func TestRead(t *testing.T) { { name: "four deep mixed v2", f: fields.Field{ - FieldName: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "First", RepetitionType: fields.Required}, + Name: "Friend", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Required}, }}, }}, }, @@ -263,7 +263,7 @@ func TestRead(t *testing.T) { { name: "repeated", f: fields.Field{ - FieldType: "string", TypeName: "string", FieldName: "Friends", RepetitionType: fields.Repeated, + Type: "string", Name: "Friends", RepetitionType: fields.Repeated, }, result: `func readFriends(x Person) ([]string, []uint8, []uint8) { var vals []string @@ -291,8 +291,8 @@ func TestRead(t *testing.T) { name: "readLinkFoward", structName: "Document", f: fields.Field{ - FieldName: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int64", TypeName: "int64", FieldName: "Forward", RepetitionType: fields.Repeated}, + Name: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int64", Name: "Forward", RepetitionType: fields.Repeated}, }, }, result: `func readLinkForward(x Document) ([]int64, []uint8, []uint8) { @@ -326,9 +326,9 @@ func TestRead(t *testing.T) { name: "readNamesLanguagesCode", structName: "Document", f: fields.Field{ - FieldName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Code", RepetitionType: fields.Required}, + Name: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Code", RepetitionType: fields.Required}, }}, }, }, @@ -368,9 +368,9 @@ func TestRead(t *testing.T) { name: "readNamesLanguagesCountry", structName: "Document", f: fields.Field{ - FieldName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Country", RepetitionType: fields.Optional}, + Name: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Country", RepetitionType: fields.Optional}, }}, }, }, @@ -415,8 +415,8 @@ func TestRead(t *testing.T) { name: "readNamesURL", structName: "Document", f: fields.Field{ - FieldName: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "URL", RepetitionType: fields.Optional}, + Name: "Names", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "URL", RepetitionType: fields.Optional}, }, }, result: `func readNamesURL(x Document) ([]string, []uint8, []uint8) { @@ -450,9 +450,9 @@ func TestRead(t *testing.T) { name: "run of required", structName: "Document", f: fields.Field{ - FieldName: "Friends", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Name", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Last", RepetitionType: fields.Required}, + Name: "Friends", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Last", RepetitionType: fields.Required}, }}, }, }, @@ -482,9 +482,9 @@ func TestRead(t *testing.T) { name: "run of required v2", structName: "Document", f: fields.Field{ - FieldName: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Name", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Aliases", RepetitionType: fields.Repeated}, + Name: "Friend", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Aliases", RepetitionType: fields.Repeated}, }}, }, }, @@ -514,10 +514,10 @@ func TestRead(t *testing.T) { name: "run of required v3", structName: "Document", f: fields.Field{ - FieldName: "Other", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Friends", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldName: "Name", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Middle", RepetitionType: fields.Required}, + Name: "Other", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Friends", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Name", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Middle", RepetitionType: fields.Required}, }}, }}, }, diff --git a/internal/dremel/testcases/doc/generated.go b/internal/dremel/testcases/doc/generated.go deleted file mode 100644 index 38191d5..0000000 --- a/internal/dremel/testcases/doc/generated.go +++ /dev/null @@ -1,981 +0,0 @@ -package doc - -// Code generated by github.com/parsyl/parquet. DO NOT EDIT. - -import ( - "bytes" - "encoding/binary" - "fmt" - "io" - "strings" - - "github.com/parsyl/parquet" - sch "github.com/parsyl/parquet/schema" - - "math" - "sort" -) - -type compression int - -const ( - compressionUncompressed compression = 0 - compressionSnappy compression = 1 - compressionGzip compression = 2 - compressionUnknown compression = -1 -) - -// ParquetWriter reprents a row group -type ParquetWriter struct { - fields []Field - - len int - - // child points to the next page - child *ParquetWriter - - // max is the number of Record items that can get written before - // a new set of column chunks is written - max int - - meta *parquet.Metadata - w io.Writer - compression compression -} - -func Fields(compression compression) []Field { - return []Field{ - Newint64(readDocID, writeDocID, []string{"docid"}, fieldCompression(compression)), - Newint64(readLinksBackward, writeLinksBackward, []string{"link", "backward"}, []int{1, 2}, fieldCompression(compression)), - Newint64(readLinksForward, writeLinksForward, []string{"link", "forward"}, []int{1, 2}, fieldCompression(compression)), - Newstring(readNamesLanguagesCode, writeNamesLanguagesCode, []string{"names", "languages", "code"}, []int{2, 2, 0}, fieldCompression(compression)), - Newstring(readNamesLanguagesCountry, writeNamesLanguagesCountry, []string{"names", "languages", "country"}, []int{2, 2, 1}, fieldCompression(compression)), - Newstring(readNamesURL, writeNamesURL, []string{"names", "url"}, []int{2, 1}, fieldCompression(compression)), - } -} - -func readDocID(x Document) int64 { - return x.DocID -} - -func writeDocID(x *Document, vals []int64) { - x.DocID = vals[0] -} - -func readLinksBackward(x Document) ([]int64, []uint8, []uint8) { - var vals []int64 - var defs, reps []uint8 - var lastRep uint8 - - if x.Links == nil { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - if len(x.Links.Backward) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Links.Backward { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x0) - } - } - } - - return vals, defs, reps -} - -func writeLinksBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - x.Links = &Link{} - case 2: - switch rep { - case 0: - x.Links = &Link{Backward: []int64{vals[nVals]}} - case 1: - x.Links.Backward = append(x.Links.Backward, vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readLinksForward(x Document) ([]int64, []uint8, []uint8) { - var vals []int64 - var defs, reps []uint8 - var lastRep uint8 - - if x.Links == nil { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - if len(x.Links.Forward) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Links.Forward { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x0) - } - } - } - - return vals, defs, reps -} - -func writeLinksForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 2: - switch rep { - case 0: - x.Links.Forward = []int64{vals[nVals]} - case 1: - x.Links.Forward = append(x.Links.Forward, vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 - - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if len(x0.Languages) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i1, x1 := range x0.Languages { - if i1 == 1 { - lastRep = 2 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x1.Code) - } - } - } - } - - return vals, defs, reps -} - -func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 2) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 1: - x.Names = append(x.Names, Name{}) - case 2: - switch rep { - case 0: - x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} - case 1: - x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) - case 2: - x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 - - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if len(x0.Languages) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i1, x1 := range x0.Languages { - if i1 == 1 { - lastRep = 2 - } - if x1.Country == nil { - defs = append(defs, 2) - reps = append(reps, lastRep) - } else { - defs = append(defs, 3) - reps = append(reps, lastRep) - vals = append(vals, *x1.Country) - } - } - } - } - } - - return vals, defs, reps -} - -func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 2) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 3: - switch rep { - case 0, 2: - x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readNamesURL(x Document) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 - - if len(x.Names) == 0 { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Names { - if i0 == 1 { - lastRep = 1 - } - if x0.URL == nil { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, *x0.URL) - } - } - } - - return vals, defs, reps -} - -func writeNamesURL(x *Document, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 2: - switch rep { - case 0, 1: - x.Names[ind[0]].URL = pstring(vals[nVals]) - } - nVals++ - } - } - - return nVals, nLevels -} - -func fieldCompression(c compression) func(*parquet.RequiredField) { - switch c { - case compressionUncompressed: - return parquet.RequiredFieldUncompressed - case compressionSnappy: - return parquet.RequiredFieldSnappy - case compressionGzip: - return parquet.RequiredFieldGzip - default: - return parquet.RequiredFieldUncompressed - } -} - -func optionalFieldCompression(c compression) func(*parquet.OptionalField) { - switch c { - case compressionUncompressed: - return parquet.OptionalFieldUncompressed - case compressionSnappy: - return parquet.OptionalFieldSnappy - case compressionGzip: - return parquet.OptionalFieldGzip - default: - return parquet.OptionalFieldUncompressed - } -} - -func NewParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { - return newParquetWriter(w, append(opts, begin)...) -} - -func newParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { - p := &ParquetWriter{ - max: 1000, - w: w, - compression: compressionSnappy, - } - - for _, opt := range opts { - if err := opt(p); err != nil { - return nil, err - } - } - - p.fields = Fields(p.compression) - if p.meta == nil { - ff := Fields(p.compression) - schema := make([]parquet.Field, len(ff)) - for i, f := range ff { - schema[i] = f.Schema() - } - p.meta = parquet.New(schema...) - } - - return p, nil -} - -// MaxPageSize is the maximum number of rows in each row groups' page. -func MaxPageSize(m int) func(*ParquetWriter) error { - return func(p *ParquetWriter) error { - p.max = m - return nil - } -} - -func begin(p *ParquetWriter) error { - _, err := p.w.Write([]byte("PAR1")) - return err -} - -func withMeta(m *parquet.Metadata) func(*ParquetWriter) error { - return func(p *ParquetWriter) error { - p.meta = m - return nil - } -} - -func Uncompressed(p *ParquetWriter) error { - p.compression = compressionUncompressed - return nil -} - -func Snappy(p *ParquetWriter) error { - p.compression = compressionSnappy - return nil -} - -func Gzip(p *ParquetWriter) error { - p.compression = compressionGzip - return nil -} - -func withCompression(c compression) func(*ParquetWriter) error { - return func(p *ParquetWriter) error { - p.compression = c - return nil - } -} - -func (p *ParquetWriter) Write() error { - for i, f := range p.fields { - if err := f.Write(p.w, p.meta); err != nil { - return err - } - - for child := p.child; child != nil; child = child.child { - if err := child.fields[i].Write(p.w, p.meta); err != nil { - return err - } - } - } - - p.fields = Fields(p.compression) - p.child = nil - p.len = 0 - - schema := make([]parquet.Field, len(p.fields)) - for i, f := range p.fields { - schema[i] = f.Schema() - } - p.meta.StartRowGroup(schema...) - return nil -} - -func (p *ParquetWriter) Close() error { - if err := p.meta.Footer(p.w); err != nil { - return err - } - - _, err := p.w.Write([]byte("PAR1")) - return err -} - -func (p *ParquetWriter) Add(rec Document) { - if p.len == p.max { - if p.child == nil { - // an error can't happen here - p.child, _ = newParquetWriter(p.w, MaxPageSize(p.max), withMeta(p.meta), withCompression(p.compression)) - } - - p.child.Add(rec) - return - } - - p.meta.NextDoc() - for _, f := range p.fields { - f.Add(rec) - } - - p.len++ -} - -type Field interface { - Add(r Document) - Write(w io.Writer, meta *parquet.Metadata) error - Schema() parquet.Field - Scan(r *Document) - Read(r io.ReadSeeker, pg parquet.Page) error - Name() string - Levels() ([]uint8, []uint8) -} - -func getFields(ff []Field) map[string]Field { - m := make(map[string]Field, len(ff)) - for _, f := range ff { - m[f.Name()] = f - } - return m -} - -func NewParquetReader(r io.ReadSeeker, opts ...func(*ParquetReader)) (*ParquetReader, error) { - ff := Fields(compressionUnknown) - pr := &ParquetReader{ - r: r, - } - - for _, opt := range opts { - opt(pr) - } - - schema := make([]parquet.Field, len(ff)) - for i, f := range ff { - pr.fieldNames = append(pr.fieldNames, f.Name()) - schema[i] = f.Schema() - } - - meta := parquet.New(schema...) - if err := meta.ReadFooter(r); err != nil { - return nil, err - } - pr.rows = meta.Rows() - var err error - pr.pages, err = meta.Pages() - if err != nil { - return nil, err - } - - pr.rowGroups = meta.RowGroups() - _, err = r.Seek(4, io.SeekStart) - if err != nil { - return nil, err - } - pr.meta = meta - - return pr, pr.readRowGroup() -} - -func readerIndex(i int) func(*ParquetReader) { - return func(p *ParquetReader) { - p.index = i - } -} - -// ParquetReader reads one page from a row group. -type ParquetReader struct { - fields map[string]Field - fieldNames []string - index int - cursor int64 - rows int64 - rowGroupCursor int64 - rowGroupCount int64 - pages map[string][]parquet.Page - meta *parquet.Metadata - err error - - r io.ReadSeeker - rowGroups []parquet.RowGroup -} - -type Levels struct { - Name string - Defs []uint8 - Reps []uint8 -} - -func (p *ParquetReader) Levels() []Levels { - var out []Levels - //for { - for _, name := range p.fieldNames { - f := p.fields[name] - d, r := f.Levels() - out = append(out, Levels{Name: f.Name(), Defs: d, Reps: r}) - } - // if err := p.readRowGroup(); err != nil { - // break - // } - //} - return out -} - -func (p *ParquetReader) Error() error { - return p.err -} - -func (p *ParquetReader) readRowGroup() error { - p.rowGroupCursor = 0 - - if len(p.rowGroups) == 0 { - p.rowGroupCount = 0 - return nil - } - - rg := p.rowGroups[0] - p.fields = getFields(Fields(compressionUnknown)) - p.rowGroupCount = rg.Rows - p.rowGroupCursor = 0 - for _, col := range rg.Columns() { - name := strings.Join(col.MetaData.PathInSchema, ".") - f, ok := p.fields[name] - if !ok { - return fmt.Errorf("unknown field: %s", name) - } - pages := p.pages[name] - if len(pages) <= p.index { - break - } - - pg := pages[0] - if err := f.Read(p.r, pg); err != nil { - return fmt.Errorf("unable to read field %s, err: %s", f.Name(), err) - } - p.pages[name] = p.pages[name][1:] - } - p.rowGroups = p.rowGroups[1:] - return nil -} - -func (p *ParquetReader) Rows() int64 { - return p.rows -} - -func (p *ParquetReader) Next() bool { - if p.err == nil && p.cursor >= p.rows { - return false - } - if p.rowGroupCursor >= p.rowGroupCount { - p.err = p.readRowGroup() - if p.err != nil { - return false - } - } - - p.cursor++ - p.rowGroupCursor++ - return true -} - -func (p *ParquetReader) Scan(x *Document) { - if p.err != nil { - return - } - - for _, name := range p.fieldNames { - f := p.fields[name] - f.Scan(x) - } -} - -type int64 struct { - vals []int64 - parquet.RequiredField - read func(r Document) int64 - write func(r *Document, vals []int64) - stats *int64stats -} - -func Newint64(read func(r Document) int64, write func(r *Document, vals []int64), path []string, opts ...func(*parquet.RequiredField)) *int64 { - return &int64{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newInt64stats(), - } -} - -func (f *int64) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} -} - -func (f *int64) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v := make([]int64, int(pg.N)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *int64) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } - } - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} - -func (f *int64) Scan(r *Document) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *int64) Add(r Document) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) -} - -func (f *int64) Levels() ([]uint8, []uint8) { - return nil, nil -} - -type StringField struct { - parquet.RequiredField - vals []string - read func(r Document) string - write func(r *Document, vals []string) - stats *stringStats -} - -func NewStringField(read func(r Document) string, write func(r *Document, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { - return &StringField{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newStringStats(), - } -} - -func (f *StringField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} -} - -func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { - buf := bytes.Buffer{} - - for _, s := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { - return err - } - buf.Write([]byte(s)) - } - - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} - -func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - for j := 0; j < pg.N; j++ { - var x int32 - if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { - return err - } - s := make([]byte, x) - if _, err := rr.Read(s); err != nil { - return err - } - - f.vals = append(f.vals, string(s)) - } - return nil -} - -func (f *StringField) Scan(r *Document) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *StringField) Add(r Document) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) -} - -func (f *StringField) Levels() ([]uint8, []uint8) { - return nil, nil -} - -type int64stats struct { - min int64 - max int64 -} - -func newInt64stats() *int64stats { - return &int64stats{ - min: int64(math.MaxInt64), - } -} - -func (i *int64stats) add(val int64) { - if val < i.min { - i.min = val - } - if val > i.max { - i.max = val - } -} - -func (f *int64stats) bytes(val int64) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *int64stats) NullCount() *int64 { - return nil -} - -func (f *int64stats) DistinctCount() *int64 { - return nil -} - -func (f *int64stats) Min() []byte { - return f.bytes(f.min) -} - -func (f *int64stats) Max() []byte { - return f.bytes(f.max) -} - -type stringStats struct { - vals []string - min []byte - max []byte -} - -func newStringStats() *stringStats { - return &stringStats{} -} - -func (s *stringStats) add(val string) { - s.vals = append(s.vals, val) -} - -func (s *stringStats) NullCount() *int64 { - return nil -} - -func (s *stringStats) DistinctCount() *int64 { - return nil -} - -func (s *stringStats) Min() []byte { - if s.min == nil { - s.minMax() - } - return s.min -} - -func (s *stringStats) Max() []byte { - if s.max == nil { - s.minMax() - } - return s.max -} - -func (s *stringStats) minMax() { - if len(s.vals) == 0 { - return - } - - tmp := make([]string, len(s.vals)) - copy(tmp, s.vals) - sort.Strings(tmp) - s.min = []byte(tmp[0]) - s.max = []byte(tmp[len(tmp)-1]) -} - -func pint32(i int32) *int32 { return &i } -func puint32(i uint32) *uint32 { return &i } -func pint64(i int64) *int64 { return &i } -func puint64(i uint64) *uint64 { return &i } -func pbool(b bool) *bool { return &b } -func pstring(s string) *string { return &s } -func pfloat32(f float32) *float32 { return &f } -func pfloat64(f float64) *float64 { return &f } - -// keeps track of the indices of repeated fields -// that have already been handled by a previous field -type indices []int - -func (i indices) rep(rep uint8) { - if rep > 0 { - r := int(rep) - 1 - i[r] = i[r] + 1 - for j := int(rep); j < len(i); j++ { - i[j] = 0 - } - } -} - -func maxDef(types []int) uint8 { - var out uint8 - for _, typ := range types { - if typ > 0 { - out++ - } - } - return out -} - -func Int32Type(se *sch.SchemaElement) { - t := sch.Type_INT32 - se.Type = &t -} - -func Uint32Type(se *sch.SchemaElement) { - t := sch.Type_INT32 - se.Type = &t - ct := sch.ConvertedType_UINT_32 - se.ConvertedType = &ct -} - -func Int64Type(se *sch.SchemaElement) { - t := sch.Type_INT64 - se.Type = &t -} - -func Uint64Type(se *sch.SchemaElement) { - t := sch.Type_INT64 - se.Type = &t - ct := sch.ConvertedType_UINT_64 - se.ConvertedType = &ct -} - -func Float32Type(se *sch.SchemaElement) { - t := sch.Type_FLOAT - se.Type = &t -} - -func Float64Type(se *sch.SchemaElement) { - t := sch.Type_DOUBLE - se.Type = &t -} - -func BoolType(se *sch.SchemaElement) { - t := sch.Type_BOOLEAN - se.Type = &t -} - -func StringType(se *sch.SchemaElement) { - t := sch.Type_BYTE_ARRAY - se.Type = &t -} diff --git a/internal/dremel/testcases/person/generated.go b/internal/dremel/testcases/person/generated.go deleted file mode 100644 index 1e0c15b..0000000 --- a/internal/dremel/testcases/person/generated.go +++ /dev/null @@ -1,862 +0,0 @@ -package person - -// Code generated by github.com/parsyl/parquet. DO NOT EDIT. - -import ( - "bytes" - "encoding/binary" - "fmt" - "io" - "strings" - - "github.com/parsyl/parquet" - sch "github.com/parsyl/parquet/schema" - - "math" - "sort" -) - -type compression int - -const ( - compressionUncompressed compression = 0 - compressionSnappy compression = 1 - compressionGzip compression = 2 - compressionUnknown compression = -1 -) - -// ParquetWriter reprents a row group -type ParquetWriter struct { - fields []Field - - len int - - // child points to the next page - child *ParquetWriter - - // max is the number of Record items that can get written before - // a new set of column chunks is written - max int - - meta *parquet.Metadata - w io.Writer - compression compression -} - -func Fields(compression compression) []Field { - return []Field{ - Newstring(readName, writeName, []string{"name"}, fieldCompression(compression)), - Newstring(readHobbyName, writeHobbyName, []string{"hobby", "name"}, []int{1, 0}, fieldCompression(compression)), - Newint32(readHobbyDifficulty, writeHobbyDifficulty, []string{"hobby", "difficulty"}, []int{1, 1}, fieldCompression(compression)), - Newstring(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, fieldCompression(compression)), - Newstring(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, fieldCompression(compression)), - } -} - -func readName(x Person) string { - return x.Name -} - -func writeName(x *Person, vals []string) { - x.Name = vals[0] -} - -func readHobbyName(x Person) ([]string, []uint8, []uint8) { - switch { - case x.Hobby == nil: - return nil, []uint8{0}, nil - default: - return []string{x.Hobby.Name}, []uint8{1}, nil - } -} - -func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 1: - x.Hobby = &Hobby{Name: vals[0]} - return 1, 1 - } - - return 0, 1 -} - -func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { - switch { - case x.Hobby == nil: - return nil, []uint8{0}, nil - case x.Hobby.Difficulty == nil: - return nil, []uint8{1}, nil - default: - return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil - } -} - -func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { - def := defs[0] - switch def { - case 2: - x.Hobby.Difficulty = pint32(vals[0]) - return 1, 1 - } - - return 0, 1 -} - -func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 - - if x.Hobby == nil { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - if len(x.Hobby.Skills) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Hobby.Skills { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x0.Name) - } - } - } - - return vals, defs, reps -} - -func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 2: - switch rep { - case 0: - x.Hobby.Skills = []Skill{{Name: vals[nVals]}} - case 1: - x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) - } - nVals++ - } - } - - return nVals, nLevels -} - -func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { - var vals []string - var defs, reps []uint8 - var lastRep uint8 - - if x.Hobby == nil { - defs = append(defs, 0) - reps = append(reps, lastRep) - } else { - if len(x.Hobby.Skills) == 0 { - defs = append(defs, 1) - reps = append(reps, lastRep) - } else { - for i0, x0 := range x.Hobby.Skills { - if i0 == 1 { - lastRep = 1 - } - defs = append(defs, 2) - reps = append(reps, lastRep) - vals = append(vals, x0.Difficulty) - } - } - } - - return vals, defs, reps -} - -func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { - var nVals, nLevels int - ind := make(indices, 1) - - for i := range defs { - def := defs[i] - rep := reps[i] - if i > 0 && rep == 0 { - break - } - - nLevels++ - ind.rep(rep) - - switch def { - case 2: - switch rep { - case 0, 1: - x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] - } - nVals++ - } - } - - return nVals, nLevels -} - -func fieldCompression(c compression) func(*parquet.RequiredField) { - switch c { - case compressionUncompressed: - return parquet.RequiredFieldUncompressed - case compressionSnappy: - return parquet.RequiredFieldSnappy - case compressionGzip: - return parquet.RequiredFieldGzip - default: - return parquet.RequiredFieldUncompressed - } -} - -func optionalFieldCompression(c compression) func(*parquet.OptionalField) { - switch c { - case compressionUncompressed: - return parquet.OptionalFieldUncompressed - case compressionSnappy: - return parquet.OptionalFieldSnappy - case compressionGzip: - return parquet.OptionalFieldGzip - default: - return parquet.OptionalFieldUncompressed - } -} - -func NewParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { - return newParquetWriter(w, append(opts, begin)...) -} - -func newParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { - p := &ParquetWriter{ - max: 1000, - w: w, - compression: compressionSnappy, - } - - for _, opt := range opts { - if err := opt(p); err != nil { - return nil, err - } - } - - p.fields = Fields(p.compression) - if p.meta == nil { - ff := Fields(p.compression) - schema := make([]parquet.Field, len(ff)) - for i, f := range ff { - schema[i] = f.Schema() - } - p.meta = parquet.New(schema...) - } - - return p, nil -} - -// MaxPageSize is the maximum number of rows in each row groups' page. -func MaxPageSize(m int) func(*ParquetWriter) error { - return func(p *ParquetWriter) error { - p.max = m - return nil - } -} - -func begin(p *ParquetWriter) error { - _, err := p.w.Write([]byte("PAR1")) - return err -} - -func withMeta(m *parquet.Metadata) func(*ParquetWriter) error { - return func(p *ParquetWriter) error { - p.meta = m - return nil - } -} - -func Uncompressed(p *ParquetWriter) error { - p.compression = compressionUncompressed - return nil -} - -func Snappy(p *ParquetWriter) error { - p.compression = compressionSnappy - return nil -} - -func Gzip(p *ParquetWriter) error { - p.compression = compressionGzip - return nil -} - -func withCompression(c compression) func(*ParquetWriter) error { - return func(p *ParquetWriter) error { - p.compression = c - return nil - } -} - -func (p *ParquetWriter) Write() error { - for i, f := range p.fields { - if err := f.Write(p.w, p.meta); err != nil { - return err - } - - for child := p.child; child != nil; child = child.child { - if err := child.fields[i].Write(p.w, p.meta); err != nil { - return err - } - } - } - - p.fields = Fields(p.compression) - p.child = nil - p.len = 0 - - schema := make([]parquet.Field, len(p.fields)) - for i, f := range p.fields { - schema[i] = f.Schema() - } - p.meta.StartRowGroup(schema...) - return nil -} - -func (p *ParquetWriter) Close() error { - if err := p.meta.Footer(p.w); err != nil { - return err - } - - _, err := p.w.Write([]byte("PAR1")) - return err -} - -func (p *ParquetWriter) Add(rec Person) { - if p.len == p.max { - if p.child == nil { - // an error can't happen here - p.child, _ = newParquetWriter(p.w, MaxPageSize(p.max), withMeta(p.meta), withCompression(p.compression)) - } - - p.child.Add(rec) - return - } - - p.meta.NextDoc() - for _, f := range p.fields { - f.Add(rec) - } - - p.len++ -} - -type Field interface { - Add(r Person) - Write(w io.Writer, meta *parquet.Metadata) error - Schema() parquet.Field - Scan(r *Person) - Read(r io.ReadSeeker, pg parquet.Page) error - Name() string - Levels() ([]uint8, []uint8) -} - -func getFields(ff []Field) map[string]Field { - m := make(map[string]Field, len(ff)) - for _, f := range ff { - m[f.Name()] = f - } - return m -} - -func NewParquetReader(r io.ReadSeeker, opts ...func(*ParquetReader)) (*ParquetReader, error) { - ff := Fields(compressionUnknown) - pr := &ParquetReader{ - r: r, - } - - for _, opt := range opts { - opt(pr) - } - - schema := make([]parquet.Field, len(ff)) - for i, f := range ff { - pr.fieldNames = append(pr.fieldNames, f.Name()) - schema[i] = f.Schema() - } - - meta := parquet.New(schema...) - if err := meta.ReadFooter(r); err != nil { - return nil, err - } - pr.rows = meta.Rows() - var err error - pr.pages, err = meta.Pages() - if err != nil { - return nil, err - } - - pr.rowGroups = meta.RowGroups() - _, err = r.Seek(4, io.SeekStart) - if err != nil { - return nil, err - } - pr.meta = meta - - return pr, pr.readRowGroup() -} - -func readerIndex(i int) func(*ParquetReader) { - return func(p *ParquetReader) { - p.index = i - } -} - -// ParquetReader reads one page from a row group. -type ParquetReader struct { - fields map[string]Field - fieldNames []string - index int - cursor int64 - rows int64 - rowGroupCursor int64 - rowGroupCount int64 - pages map[string][]parquet.Page - meta *parquet.Metadata - err error - - r io.ReadSeeker - rowGroups []parquet.RowGroup -} - -type Levels struct { - Name string - Defs []uint8 - Reps []uint8 -} - -func (p *ParquetReader) Levels() []Levels { - var out []Levels - //for { - for _, name := range p.fieldNames { - f := p.fields[name] - d, r := f.Levels() - out = append(out, Levels{Name: f.Name(), Defs: d, Reps: r}) - } - // if err := p.readRowGroup(); err != nil { - // break - // } - //} - return out -} - -func (p *ParquetReader) Error() error { - return p.err -} - -func (p *ParquetReader) readRowGroup() error { - p.rowGroupCursor = 0 - - if len(p.rowGroups) == 0 { - p.rowGroupCount = 0 - return nil - } - - rg := p.rowGroups[0] - p.fields = getFields(Fields(compressionUnknown)) - p.rowGroupCount = rg.Rows - p.rowGroupCursor = 0 - for _, col := range rg.Columns() { - name := strings.Join(col.MetaData.PathInSchema, ".") - f, ok := p.fields[name] - if !ok { - return fmt.Errorf("unknown field: %s", name) - } - pages := p.pages[name] - if len(pages) <= p.index { - break - } - - pg := pages[0] - if err := f.Read(p.r, pg); err != nil { - return fmt.Errorf("unable to read field %s, err: %s", f.Name(), err) - } - p.pages[name] = p.pages[name][1:] - } - p.rowGroups = p.rowGroups[1:] - return nil -} - -func (p *ParquetReader) Rows() int64 { - return p.rows -} - -func (p *ParquetReader) Next() bool { - if p.err == nil && p.cursor >= p.rows { - return false - } - if p.rowGroupCursor >= p.rowGroupCount { - p.err = p.readRowGroup() - if p.err != nil { - return false - } - } - - p.cursor++ - p.rowGroupCursor++ - return true -} - -func (p *ParquetReader) Scan(x *Person) { - if p.err != nil { - return - } - - for _, name := range p.fieldNames { - f := p.fields[name] - f.Scan(x) - } -} - -type StringField struct { - parquet.RequiredField - vals []string - read func(r Person) string - write func(r *Person, vals []string) - stats *stringStats -} - -func NewStringField(read func(r Person) string, write func(r *Person, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { - return &StringField{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newStringStats(), - } -} - -func (f *StringField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} -} - -func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { - buf := bytes.Buffer{} - - for _, s := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { - return err - } - buf.Write([]byte(s)) - } - - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} - -func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - for j := 0; j < pg.N; j++ { - var x int32 - if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { - return err - } - s := make([]byte, x) - if _, err := rr.Read(s); err != nil { - return err - } - - f.vals = append(f.vals, string(s)) - } - return nil -} - -func (f *StringField) Scan(r *Person) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *StringField) Add(r Person) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) -} - -func (f *StringField) Levels() ([]uint8, []uint8) { - return nil, nil -} - -type int32 struct { - parquet.OptionalField - vals []int32 - read func(r Person) ([]int32, []uint8, []uint8) - write func(r *Person, vals []int32, def, rep []uint8) (int, int) - stats *int32optionalStats -} - -func Newint32(read func(r Person) ([]int32, []uint8, []uint8), write func(r *Person, vals []int32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *int32 { - return &int32{ - read: read, - write: write, - OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newint32optionalStats(maxDef(types)), - } -} - -func (f *int32) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int32Type, RepetitionType: f.RepetitionType, Types: f.Types} -} - -func (f *int32) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } - } - return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) -} - -func (f *int32) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v := make([]int32, f.Values()-len(f.vals)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *int32) Add(r Person) { - vals, defs, reps := f.read(r) - f.stats.add(vals, defs) - f.vals = append(f.vals, vals...) - f.Defs = append(f.Defs, defs...) - f.Reps = append(f.Reps, reps...) -} - -func (f *int32) Scan(r *Person) { - if len(f.Defs) == 0 { - return - } - - v, l := f.write(r, f.vals, f.Defs, f.Reps) - f.vals = f.vals[v:] - f.Defs = f.Defs[l:] - if len(f.Reps) > 0 { - f.Reps = f.Reps[l:] - } -} - -func (f *int32) Levels() ([]uint8, []uint8) { - return f.Defs, f.Reps -} - -type stringStats struct { - vals []string - min []byte - max []byte -} - -func newStringStats() *stringStats { - return &stringStats{} -} - -func (s *stringStats) add(val string) { - s.vals = append(s.vals, val) -} - -func (s *stringStats) NullCount() *int64 { - return nil -} - -func (s *stringStats) DistinctCount() *int64 { - return nil -} - -func (s *stringStats) Min() []byte { - if s.min == nil { - s.minMax() - } - return s.min -} - -func (s *stringStats) Max() []byte { - if s.max == nil { - s.minMax() - } - return s.max -} - -func (s *stringStats) minMax() { - if len(s.vals) == 0 { - return - } - - tmp := make([]string, len(s.vals)) - copy(tmp, s.vals) - sort.Strings(tmp) - s.min = []byte(tmp[0]) - s.max = []byte(tmp[len(tmp)-1]) -} - -type int32optionalStats struct { - min int32 - max int32 - nils int64 - nonNils int64 - maxDef uint8 -} - -func newint32optionalStats(d uint8) *int32optionalStats { - return &int32optionalStats{ - min: int32(math.MaxInt32), - maxDef: d, - } -} - -func (f *int32optionalStats) add(vals []int32, defs []uint8) { - var i int - for _, def := range defs { - if def < f.maxDef { - f.nils++ - } else { - val := vals[i] - i++ - - f.nonNils++ - if val < f.min { - f.min = val - } - if val > f.max { - f.max = val - } - } - } -} - -func (f *int32optionalStats) bytes(val int32) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *int32optionalStats) NullCount() *int64 { - return &f.nils -} - -func (f *int32optionalStats) DistinctCount() *int64 { - return nil -} - -func (f *int32optionalStats) Min() []byte { - if f.nonNils == 0 { - return nil - } - return f.bytes(f.min) -} - -func (f *int32optionalStats) Max() []byte { - if f.nonNils == 0 { - return nil - } - return f.bytes(f.max) -} - -func pint32(i int32) *int32 { return &i } -func puint32(i uint32) *uint32 { return &i } -func pint64(i int64) *int64 { return &i } -func puint64(i uint64) *uint64 { return &i } -func pbool(b bool) *bool { return &b } -func pstring(s string) *string { return &s } -func pfloat32(f float32) *float32 { return &f } -func pfloat64(f float64) *float64 { return &f } - -// keeps track of the indices of repeated fields -// that have already been handled by a previous field -type indices []int - -func (i indices) rep(rep uint8) { - if rep > 0 { - r := int(rep) - 1 - i[r] = i[r] + 1 - for j := int(rep); j < len(i); j++ { - i[j] = 0 - } - } -} - -func maxDef(types []int) uint8 { - var out uint8 - for _, typ := range types { - if typ > 0 { - out++ - } - } - return out -} - -func Int32Type(se *sch.SchemaElement) { - t := sch.Type_INT32 - se.Type = &t -} - -func Uint32Type(se *sch.SchemaElement) { - t := sch.Type_INT32 - se.Type = &t - ct := sch.ConvertedType_UINT_32 - se.ConvertedType = &ct -} - -func Int64Type(se *sch.SchemaElement) { - t := sch.Type_INT64 - se.Type = &t -} - -func Uint64Type(se *sch.SchemaElement) { - t := sch.Type_INT64 - se.Type = &t - ct := sch.ConvertedType_UINT_64 - se.ConvertedType = &ct -} - -func Float32Type(se *sch.SchemaElement) { - t := sch.Type_FLOAT - se.Type = &t -} - -func Float64Type(se *sch.SchemaElement) { - t := sch.Type_DOUBLE - se.Type = &t -} - -func BoolType(se *sch.SchemaElement) { - t := sch.Type_BOOLEAN - se.Type = &t -} - -func StringType(se *sch.SchemaElement) { - t := sch.Type_BYTE_ARRAY - se.Type = &t -} diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index 716346e..8b2d6c2 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -20,7 +20,7 @@ func TestWrite(t *testing.T) { { name: "required and not nested", field: fields.Field{ - FieldType: "int32", TypeName: "int32", FieldName: "ID", RepetitionType: fields.Required, + Type: "int32", Name: "ID", RepetitionType: fields.Required, }, result: `func writeID(x *Person, vals []int32) { x.ID = vals[0] @@ -29,7 +29,7 @@ func TestWrite(t *testing.T) { { name: "optional and not nested", field: fields.Field{ - FieldType: "int32", TypeName: "*int32", FieldName: "ID", RepetitionType: fields.Optional, + Type: "int32", Name: "ID", RepetitionType: fields.Optional, }, result: `func writeID(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] @@ -45,9 +45,9 @@ func TestWrite(t *testing.T) { { name: "required and nested", field: fields.Field{ - FieldName: "Other", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Required}, + Name: "Other", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Required}, }}, }, }, @@ -58,8 +58,8 @@ func TestWrite(t *testing.T) { { name: "optional and nested", field: fields.Field{ - FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Optional}, + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Optional}, }, }, result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { @@ -78,9 +78,9 @@ func TestWrite(t *testing.T) { { name: "optional and nested and seen by an optional fields", field: fields.Field{ - FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, - {FieldType: "int32", TypeName: "int32", FieldName: "Difficulty", RepetitionType: fields.Optional}, + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + {Type: "int32", Name: "Difficulty", RepetitionType: fields.Optional}, }, }, result: `func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { @@ -97,8 +97,8 @@ func TestWrite(t *testing.T) { { name: "mix of optional and required and nested", field: fields.Field{ - FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, }, }, result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { @@ -115,8 +115,8 @@ func TestWrite(t *testing.T) { { name: "mix of optional and required and nested v2", field: fields.Field{ - FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, }, }, result: `func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { @@ -133,9 +133,9 @@ func TestWrite(t *testing.T) { { name: "mix of optional and require and nested 3 deep", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Name", FieldType: "string", TypeName: "*string", RepetitionType: fields.Optional}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Name", Type: "string", RepetitionType: fields.Optional}, }}, }, }, @@ -155,10 +155,10 @@ func TestWrite(t *testing.T) { { name: "mix of optional and required and nested 3 deep v2 and seen by optional field", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, - {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, }}, }, }, @@ -176,9 +176,9 @@ func TestWrite(t *testing.T) { { name: "mix of optional and required and nested 3 deep v3", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Required}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, }}, }, }, @@ -198,9 +198,9 @@ func TestWrite(t *testing.T) { { name: "nested 3 deep all optional", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, }}, }, }, @@ -222,10 +222,10 @@ func TestWrite(t *testing.T) { { name: "nested 3 deep all optional and seen by optional field", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "Name", RepetitionType: fields.Optional}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Optional}, }}, }, }, @@ -245,10 +245,10 @@ func TestWrite(t *testing.T) { { name: "four deep", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Optional}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, }}, }}, }, @@ -273,11 +273,11 @@ func TestWrite(t *testing.T) { { name: "four deep and seen by optional field", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Optional}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, }}, }}, }, @@ -300,10 +300,10 @@ func TestWrite(t *testing.T) { { name: "four deep mixed", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Optional}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, }}, }}, }, @@ -326,11 +326,11 @@ func TestWrite(t *testing.T) { { name: "four deep mixed and seen by a required sub-field", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ - {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Required}, - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Optional}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Required, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Required}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Optional}, }}, }}, }, @@ -353,10 +353,10 @@ func TestWrite(t *testing.T) { { name: "four deep mixed v2", field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "*string", FieldName: "First", RepetitionType: fields.Required}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Required}, }}, }}, }, @@ -380,14 +380,14 @@ func TestWrite(t *testing.T) { name: "four deep mixed v2 and seen by an optional field", // fields: []fields.Field{ // {FieldNames: []string{"Friend", "Rank"}, FieldTypes: []string{"Entity", "int"}, RepetitionTypes: []fields.RepetitionType{fields.Optional}}, - // {Type: "Person", TypeName: "*string", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, + // {Type: "Person", FieldNames: []string{"Friend", "Hobby", "Name", "First"}, FieldTypes: []string{"Entity", "Item", "Name", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Optional, fields.Optional, fields.Optional, fields.Required}}, // }, field: fields.Field{ - FieldName: "Friend", FieldType: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "int", TypeName: "*int", FieldName: "Rank", RepetitionType: fields.Optional}, - {FieldName: "Hobby", FieldType: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Name", FieldType: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "First", RepetitionType: fields.Required}, + Name: "Friend", Type: "Entity", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "int", Name: "Rank", RepetitionType: fields.Optional}, + {Name: "Hobby", Type: "Item", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Name", Type: "Name", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "First", RepetitionType: fields.Required}, }}, }}, }, @@ -409,8 +409,8 @@ func TestWrite(t *testing.T) { name: "writeLinkBackward", structName: "Document", field: fields.Field{ - FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Backward", RepetitionType: fields.Repeated}, + Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Backward", RepetitionType: fields.Repeated}, }, }, result: `func writeLinkBackward(x *Document, vals []string, defs, reps []uint8) (int, int) { @@ -448,9 +448,9 @@ func TestWrite(t *testing.T) { name: "writeLinkFoward", structName: "Document", field: fields.Field{ - FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Backward", RepetitionType: fields.Repeated}, - {FieldType: "string", TypeName: "string", FieldName: "Forward", RepetitionType: fields.Repeated}, + Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "Backward", RepetitionType: fields.Repeated}, + {Type: "string", Name: "Forward", RepetitionType: fields.Repeated}, }, }, result: `func writeLinkForward(x *Document, vals []string, defs, reps []uint8) (int, int) { @@ -486,9 +486,9 @@ func TestWrite(t *testing.T) { name: "writeNamesLanguagesCode", structName: "Document", field: fields.Field{ - FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "Language", FieldName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Code", RepetitionType: fields.Required}, + Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "Language", Name: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Code", RepetitionType: fields.Required}, }}, }, }, @@ -528,15 +528,15 @@ func TestWrite(t *testing.T) { { name: "writeNamesLanguagesCountry", // fields: []fields.Field{ - // {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Code"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - // {Type: "Document", TypeName: "string", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, + // {Type: "Document", FieldNames: []string{"Names", "Languages", "Code"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, + // {Type: "Document", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, // }, structName: "Document", field: fields.Field{ - FieldName: "Names", FieldType: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "Language", FieldName: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Code", RepetitionType: fields.Required}, - {FieldType: "string", TypeName: "*string", FieldName: "Country", RepetitionType: fields.Optional}, + Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "Language", Name: "Languages", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Code", RepetitionType: fields.Required}, + {Type: "string", Name: "Country", RepetitionType: fields.Optional}, }}, }, }, @@ -571,8 +571,8 @@ func TestWrite(t *testing.T) { name: "writeFriendsID", structName: "Person", field: fields.Field{ - FieldName: "Friends", FieldType: "Being", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "int32", TypeName: "int32", FieldName: "ID", RepetitionType: fields.Required}, + Name: "Friends", Type: "Being", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "int32", Name: "ID", RepetitionType: fields.Required}, }, }, result: `func writeFriendsID(x *Person, vals []int32, defs, reps []uint8) (int, int) { @@ -608,7 +608,7 @@ func TestWrite(t *testing.T) { name: "repeated primitive", structName: "Document", field: fields.Field{ - FieldName: "LuckyNumbers", TypeName: "int64", FieldType: "int64", RepetitionType: fields.Repeated, + Name: "LuckyNumbers", Type: "int64", RepetitionType: fields.Repeated, }, result: `func writeLuckyNumbers(x *Document, vals []int64, defs, reps []uint8) (int, int) { var nVals, nLevels int @@ -643,9 +643,9 @@ func TestWrite(t *testing.T) { name: "repeated field not handled by previous repeated field", structName: "Document", field: fields.Field{ - FieldName: "Link", FieldType: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "ID", RepetitionType: fields.Required}, - {FieldType: "string", TypeName: "string", FieldName: "Forward", RepetitionType: fields.Repeated}, + Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ + {Type: "string", Name: "ID", RepetitionType: fields.Required}, + {Type: "string", Name: "Forward", RepetitionType: fields.Repeated}, }, }, result: `func writeLinkForward(x *Document, vals []string, defs, reps []uint8) (int, int) { @@ -681,10 +681,10 @@ func TestWrite(t *testing.T) { name: "nested 2 deep", structName: "Person", field: fields.Field{ - FieldName: "Hobby", FieldType: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ - {FieldName: "Skills", FieldType: "Skill", RepetitionType: fields.Repeated, Children: []fields.Field{ - {FieldType: "string", TypeName: "string", FieldName: "Name", RepetitionType: fields.Required}, - {FieldType: "string", TypeName: "string", FieldName: "Difficulty", RepetitionType: fields.Required}, + Name: "Hobby", Type: "Hobby", RepetitionType: fields.Optional, Children: []fields.Field{ + {Name: "Skills", Type: "Skill", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Type: "string", Name: "Name", RepetitionType: fields.Required}, + {Type: "string", Name: "Difficulty", RepetitionType: fields.Required}, }}, }, }, diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 583c557..4d388d9 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -425,14 +425,14 @@ func (f Field) Primitive() bool { return ok } -func (f Field) FieldName() string { +func (f Field) FieldType() string { var op string if f.RepetitionType == Optional || f.RepetitionType == Repeated { op = "Optional" } ft := primitiveTypes[f.Type] - return fmt.Sprintf(ft.name, op) + return fmt.Sprintf(ft.name, op, "Field") } func (f Field) ParquetType() string { @@ -447,10 +447,10 @@ func (f Field) Category() string { } ft := primitiveTypes[f.Type] - return fmt.Sprintf(ft.category, op, "Field") + return fmt.Sprintf(ft.category, op, "") } -func (f Field) TypeName(s string, optional bool) string { +func (f Field) TypeName() string { var star string if f.RepetitionType == Optional { star = "*" diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index cb9fb88..446f553 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -38,8 +38,10 @@ func TestNilFields(t *testing.T) { return } - for i := 0; i < tc.f.MaxDef(); i++ { - s, _, _, _ := tc.f.NilField(i) + f := fields.Field{Type: "Person", Children: []fields.Field{tc.f}} + + for i := 0; i < f.MaxDef(); i++ { + s, _, _, _ := f.NilField(i) assert.Equal(t, tc.expected[i], s) } }) diff --git a/internal/gen/funcs.go b/internal/gen/funcs.go index 79ef561..9d0660e 100644 --- a/internal/gen/funcs.go +++ b/internal/gen/funcs.go @@ -23,7 +23,7 @@ var ( }, "dedupe": dedupe, "compressionFunc": func(f fields.Field) string { - if strings.Contains(f.FieldType, "Optional") { + if strings.Contains(f.Category(), "Optional") { return "optionalFieldCompression" } return "fieldCompression" @@ -45,11 +45,11 @@ var ( var out []string var intFound, stringFound bool for _, f := range fields { - if !intFound && strings.Contains(f.TypeName, "int") { + if !intFound && strings.Contains(f.Type, "int") { intFound = true out = append(out, `"math"`) } - if !stringFound && strings.Contains(f.TypeName, "string") { + if !stringFound && strings.Contains(f.Type, "string") { stringFound = true out = append(out, `"sort"`) } @@ -58,7 +58,7 @@ var ( }, "maxType": func(f fields.Field) string { var out string - switch f.TypeName { + switch f.Type { case "int32", "*int32": out = "math.MaxInt32" case "int64", "*int64": diff --git a/internal/gen/gen.go b/internal/gen/gen.go index 2eb1c4d..f6e56e2 100644 --- a/internal/gen/gen.go +++ b/internal/gen/gen.go @@ -179,10 +179,10 @@ func dedupe(flds []fields.Field) []fields.Field { seen := map[string]bool{} out := make([]fields.Field, 0, len(flds)) for _, f := range flds { - _, ok := seen[f.FieldType] + _, ok := seen[f.Type] if !ok { out = append(out, f) - seen[f.FieldType] = true + seen[f.Type] = true } } return out From 522bdf1fb027ad1e51d2ab2bb13900cb1a098e90 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Sun, 13 Jun 2021 08:44:05 -0600 Subject: [PATCH 13/25] go generate compiles --- internal/dremel/dremel_test.go | 336 ++--- internal/dremel/testcases/doc/generated.go | 1128 +++++++++++++++++ internal/dremel/testcases/person/generated.go | 1000 +++++++++++++++ internal/fields/fields.go | 6 +- internal/gen/gen.go | 10 +- 5 files changed, 2307 insertions(+), 173 deletions(-) create mode 100644 internal/dremel/testcases/doc/generated.go create mode 100644 internal/dremel/testcases/person/generated.go diff --git a/internal/dremel/dremel_test.go b/internal/dremel/dremel_test.go index d1810f4..7048c0e 100644 --- a/internal/dremel/dremel_test.go +++ b/internal/dremel/dremel_test.go @@ -1,173 +1,173 @@ package dremel_test -// import ( -// "bytes" -// "log" -// "testing" - -// "github.com/parsyl/parquet/internal/dremel/testcases/doc" -// "github.com/parsyl/parquet/internal/dremel/testcases/person" -// "github.com/stretchr/testify/assert" -// ) - -// var ( -// dremelDocs = []doc.Document{ -// { -// DocID: 10, -// Links: &doc.Link{ -// Forward: []int64{20, 40, 60}, -// }, -// Names: []doc.Name{ -// { -// Languages: []doc.Language{ -// {Code: "en-us", Country: pstring("us")}, -// {Code: "en"}, -// }, -// URL: pstring("http://A"), -// }, -// { -// URL: pstring("http://B"), -// }, -// { -// Languages: []doc.Language{ -// {Code: "en-gb", Country: pstring("gb")}, -// }, -// }, -// }, -// }, -// { -// DocID: 20, -// Links: &doc.Link{ -// Backward: []int64{10, 30}, -// Forward: []int64{80}, -// }, -// Names: []doc.Name{ -// { -// URL: pstring("http://C"), -// }, -// }, -// }, -// } -// ) - -// // TestLevels verifies that the example from the dremel paper -// // results in the correct definition and repetition levels. -// func TestLevels(t *testing.T) { -// var buf bytes.Buffer -// pw, err := doc.NewParquetWriter(&buf) -// if err != nil { -// assert.NoError(t, err) -// } - -// for _, doc := range dremelDocs { -// pw.Add(doc) -// } - -// if err := pw.Write(); err != nil { -// assert.NoError(t, err) -// } - -// pw.Close() - -// pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) -// if err != nil { -// assert.NoError(t, err) -// } - -// expected := []doc.Levels{ -// {Name: "docid"}, -// {Name: "link.backward", Defs: []uint8{1, 2, 2}, Reps: []uint8{0, 0, 1}}, -// {Name: "link.forward", Defs: []uint8{2, 2, 2, 2}, Reps: []uint8{0, 1, 1, 0}}, -// {Name: "names.languages.code", Defs: []uint8{2, 2, 1, 2, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, -// {Name: "names.languages.country", Defs: []uint8{3, 2, 1, 3, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, -// {Name: "names.url", Defs: []uint8{2, 2, 1, 2}, Reps: []uint8{0, 1, 1, 0}}, -// } - -// assert.Equal(t, expected, pr.Levels()) -// } - -// var ( -// people = []person.Person{ -// { -// Name: "peep", -// Hobby: &person.Hobby{ -// Name: "napping", -// Difficulty: pint32(10), -// Skills: []person.Skill{ -// {Name: "meditation", Difficulty: "very"}, -// {Name: "calmness", Difficulty: "so-so"}, -// }, -// }, -// }, -// } -// ) - -// func TestPersonLevels(t *testing.T) { -// var buf bytes.Buffer -// pw, err := person.NewParquetWriter(&buf) -// if err != nil { -// assert.NoError(t, err) -// } - -// for _, p := range people { -// pw.Add(p) -// } - -// if err := pw.Write(); err != nil { -// assert.NoError(t, err) -// } - -// pw.Close() - -// pr, err := person.NewParquetReader(bytes.NewReader(buf.Bytes())) -// if err != nil { -// assert.NoError(t, err) -// } - -// expected := []person.Levels{ -// {Name: "name"}, -// {Name: "hobby.name", Defs: []uint8{1}}, -// {Name: "hobby.difficulty", Defs: []uint8{2}}, -// {Name: "hobby.skills.name", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, -// {Name: "hobby.skills.difficulty", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, -// } - -// assert.Equal(t, expected, pr.Levels()) -// } - -// // TestDremel uses the example from the dremel paper and writes then -// // reads from a parquet file to make sure nested fields work correctly. -// func TestDremel(t *testing.T) { -// var buf bytes.Buffer -// pw, err := doc.NewParquetWriter(&buf) -// if err != nil { -// log.Fatal(err) -// } - -// for _, doc := range dremelDocs { -// pw.Add(doc) -// } - -// if err := pw.Write(); err != nil { -// log.Fatal(err) -// } - -// pw.Close() - -// pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) -// if err != nil { -// log.Fatal(err) -// } - -// var out []doc.Document -// for pr.Next() { -// var d doc.Document -// pr.Scan(&d) -// out = append(out, d) -// } - -// assert.Equal(t, dremelDocs, out) -// } +import ( + "bytes" + "log" + "testing" + + "github.com/parsyl/parquet/internal/dremel/testcases/doc" + "github.com/parsyl/parquet/internal/dremel/testcases/person" + "github.com/stretchr/testify/assert" +) + +var ( + dremelDocs = []doc.Document{ + { + DocID: 10, + Links: &doc.Link{ + Forward: []int64{20, 40, 60}, + }, + Names: []doc.Name{ + { + Languages: []doc.Language{ + {Code: "en-us", Country: pstring("us")}, + {Code: "en"}, + }, + URL: pstring("http://A"), + }, + { + URL: pstring("http://B"), + }, + { + Languages: []doc.Language{ + {Code: "en-gb", Country: pstring("gb")}, + }, + }, + }, + }, + { + DocID: 20, + Links: &doc.Link{ + Backward: []int64{10, 30}, + Forward: []int64{80}, + }, + Names: []doc.Name{ + { + URL: pstring("http://C"), + }, + }, + }, + } +) + +// TestLevels verifies that the example from the dremel paper +// results in the correct definition and repetition levels. +func TestLevels(t *testing.T) { + var buf bytes.Buffer + pw, err := doc.NewParquetWriter(&buf) + if err != nil { + assert.NoError(t, err) + } + + for _, doc := range dremelDocs { + pw.Add(doc) + } + + if err := pw.Write(); err != nil { + assert.NoError(t, err) + } + + pw.Close() + + pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + assert.NoError(t, err) + } + + expected := []doc.Levels{ + {Name: "docid"}, + {Name: "link.backward", Defs: []uint8{1, 2, 2}, Reps: []uint8{0, 0, 1}}, + {Name: "link.forward", Defs: []uint8{2, 2, 2, 2}, Reps: []uint8{0, 1, 1, 0}}, + {Name: "names.languages.code", Defs: []uint8{2, 2, 1, 2, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, + {Name: "names.languages.country", Defs: []uint8{3, 2, 1, 3, 1}, Reps: []uint8{0, 2, 1, 1, 0}}, + {Name: "names.url", Defs: []uint8{2, 2, 1, 2}, Reps: []uint8{0, 1, 1, 0}}, + } + + assert.Equal(t, expected, pr.Levels()) +} + +var ( + people = []person.Person{ + { + Name: "peep", + Hobby: &person.Hobby{ + Name: "napping", + Difficulty: pint32(10), + Skills: []person.Skill{ + {Name: "meditation", Difficulty: "very"}, + {Name: "calmness", Difficulty: "so-so"}, + }, + }, + }, + } +) + +func TestPersonLevels(t *testing.T) { + var buf bytes.Buffer + pw, err := person.NewParquetWriter(&buf) + if err != nil { + assert.NoError(t, err) + } + + for _, p := range people { + pw.Add(p) + } + + if err := pw.Write(); err != nil { + assert.NoError(t, err) + } + + pw.Close() + + pr, err := person.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + assert.NoError(t, err) + } + + expected := []person.Levels{ + {Name: "name"}, + {Name: "hobby.name", Defs: []uint8{1}}, + {Name: "hobby.difficulty", Defs: []uint8{2}}, + {Name: "hobby.skills.name", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, + {Name: "hobby.skills.difficulty", Defs: []uint8{2, 2}, Reps: []uint8{0, 1}}, + } + + assert.Equal(t, expected, pr.Levels()) +} + +// TestDremel uses the example from the dremel paper and writes then +// reads from a parquet file to make sure nested fields work correctly. +func TestDremel(t *testing.T) { + var buf bytes.Buffer + pw, err := doc.NewParquetWriter(&buf) + if err != nil { + log.Fatal(err) + } + + for _, doc := range dremelDocs { + pw.Add(doc) + } + + if err := pw.Write(); err != nil { + log.Fatal(err) + } + + pw.Close() + + pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + log.Fatal(err) + } + + var out []doc.Document + for pr.Next() { + var d doc.Document + pr.Scan(&d) + out = append(out, d) + } + + assert.Equal(t, dremelDocs, out) +} func pstring(s string) *string { return &s diff --git a/internal/dremel/testcases/doc/generated.go b/internal/dremel/testcases/doc/generated.go new file mode 100644 index 0000000..b0b58f7 --- /dev/null +++ b/internal/dremel/testcases/doc/generated.go @@ -0,0 +1,1128 @@ +package doc + +// Code generated by github.com/parsyl/parquet. DO NOT EDIT. + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "strings" + + "github.com/parsyl/parquet" + sch "github.com/parsyl/parquet/schema" + + "math" + "sort" +) + +type compression int + +const ( + compressionUncompressed compression = 0 + compressionSnappy compression = 1 + compressionGzip compression = 2 + compressionUnknown compression = -1 +) + +// ParquetWriter reprents a row group +type ParquetWriter struct { + fields []Field + + len int + + // child points to the next page + child *ParquetWriter + + // max is the number of Record items that can get written before + // a new set of column chunks is written + max int + + meta *parquet.Metadata + w io.Writer + compression compression +} + +func Fields(compression compression) []Field { + return []Field{ + NewInt64Field(readDocID, writeDocID, []string{"docid"}, fieldCompression(compression)), + NewInt64OptionalField(readLinksBackward, writeLinksBackward, []string{"link", "backward"}, []int{1, 2}, optionalFieldCompression(compression)), + NewInt64OptionalField(readLinksForward, writeLinksForward, []string{"link", "forward"}, []int{1, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readNamesLanguagesCode, writeNamesLanguagesCode, []string{"names", "languages", "code"}, []int{2, 2, 0}, optionalFieldCompression(compression)), + NewStringOptionalField(readNamesLanguagesCountry, writeNamesLanguagesCountry, []string{"names", "languages", "country"}, []int{2, 2, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readNamesURL, writeNamesURL, []string{"names", "url"}, []int{2, 1}, optionalFieldCompression(compression)), + } +} + +func readDocID(x Document) int64 { + return x.DocID +} + +func writeDocID(x *Document, vals []int64) { + x.DocID = vals[0] +} + +func readLinksBackward(x Document) ([]int64, []uint8, []uint8) { + var vals []int64 + var defs, reps []uint8 + var lastRep uint8 + + if x.Links == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Links.Backward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links.Backward { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0) + } + } + } + + return vals, defs, reps +} + +func writeLinksBackward(x *Document, vals []int64, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Links = &Link{} + case 2: + switch rep { + case 0: + x.Links = &Link{Backward: []int64{vals[nVals]}} + case 1: + x.Links.Backward = append(x.Links.Backward, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksForward(x Document) ([]int64, []uint8, []uint8) { + var vals []int64 + var defs, reps []uint8 + var lastRep uint8 + + if x.Links == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Links.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links.Forward { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0) + } + } + } + + return vals, defs, reps +} + +func writeLinksForward(x *Document, vals []int64, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0: + x.Links.Forward = []int64{vals[nVals]} + case 1: + x.Links.Forward = append(x.Links.Forward, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if len(x0.Languages) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Languages { + if i1 == 1 { + lastRep = 2 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x1.Code) + } + } + } + } + + return vals, defs, reps +} + +func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Names = append(x.Names, Name{}) + case 2: + switch rep { + case 0: + x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} + case 1: + x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) + case 2: + x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if len(x0.Languages) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Languages { + if i1 == 1 { + lastRep = 2 + } + if x1.Country == nil { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, *x1.Country) + } + } + } + } + } + + return vals, defs, reps +} + +func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + switch rep { + case 0, 2: + x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readNamesURL(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Names) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Names { + if i0 == 1 { + lastRep = 1 + } + if x0.URL == nil { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, *x0.URL) + } + } + } + + return vals, defs, reps +} + +func writeNamesURL(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0, 1: + x.Names[ind[0]].URL = pstring(vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func fieldCompression(c compression) func(*parquet.RequiredField) { + switch c { + case compressionUncompressed: + return parquet.RequiredFieldUncompressed + case compressionSnappy: + return parquet.RequiredFieldSnappy + case compressionGzip: + return parquet.RequiredFieldGzip + default: + return parquet.RequiredFieldUncompressed + } +} + +func optionalFieldCompression(c compression) func(*parquet.OptionalField) { + switch c { + case compressionUncompressed: + return parquet.OptionalFieldUncompressed + case compressionSnappy: + return parquet.OptionalFieldSnappy + case compressionGzip: + return parquet.OptionalFieldGzip + default: + return parquet.OptionalFieldUncompressed + } +} + +func NewParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + return newParquetWriter(w, append(opts, begin)...) +} + +func newParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + p := &ParquetWriter{ + max: 1000, + w: w, + compression: compressionSnappy, + } + + for _, opt := range opts { + if err := opt(p); err != nil { + return nil, err + } + } + + p.fields = Fields(p.compression) + if p.meta == nil { + ff := Fields(p.compression) + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + schema[i] = f.Schema() + } + p.meta = parquet.New(schema...) + } + + return p, nil +} + +// MaxPageSize is the maximum number of rows in each row groups' page. +func MaxPageSize(m int) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.max = m + return nil + } +} + +func begin(p *ParquetWriter) error { + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func withMeta(m *parquet.Metadata) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.meta = m + return nil + } +} + +func Uncompressed(p *ParquetWriter) error { + p.compression = compressionUncompressed + return nil +} + +func Snappy(p *ParquetWriter) error { + p.compression = compressionSnappy + return nil +} + +func Gzip(p *ParquetWriter) error { + p.compression = compressionGzip + return nil +} + +func withCompression(c compression) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.compression = c + return nil + } +} + +func (p *ParquetWriter) Write() error { + for i, f := range p.fields { + if err := f.Write(p.w, p.meta); err != nil { + return err + } + + for child := p.child; child != nil; child = child.child { + if err := child.fields[i].Write(p.w, p.meta); err != nil { + return err + } + } + } + + p.fields = Fields(p.compression) + p.child = nil + p.len = 0 + + schema := make([]parquet.Field, len(p.fields)) + for i, f := range p.fields { + schema[i] = f.Schema() + } + p.meta.StartRowGroup(schema...) + return nil +} + +func (p *ParquetWriter) Close() error { + if err := p.meta.Footer(p.w); err != nil { + return err + } + + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func (p *ParquetWriter) Add(rec Document) { + if p.len == p.max { + if p.child == nil { + // an error can't happen here + p.child, _ = newParquetWriter(p.w, MaxPageSize(p.max), withMeta(p.meta), withCompression(p.compression)) + } + + p.child.Add(rec) + return + } + + p.meta.NextDoc() + for _, f := range p.fields { + f.Add(rec) + } + + p.len++ +} + +type Field interface { + Add(r Document) + Write(w io.Writer, meta *parquet.Metadata) error + Schema() parquet.Field + Scan(r *Document) + Read(r io.ReadSeeker, pg parquet.Page) error + Name() string + Levels() ([]uint8, []uint8) +} + +func getFields(ff []Field) map[string]Field { + m := make(map[string]Field, len(ff)) + for _, f := range ff { + m[f.Name()] = f + } + return m +} + +func NewParquetReader(r io.ReadSeeker, opts ...func(*ParquetReader)) (*ParquetReader, error) { + ff := Fields(compressionUnknown) + pr := &ParquetReader{ + r: r, + } + + for _, opt := range opts { + opt(pr) + } + + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + pr.fieldNames = append(pr.fieldNames, f.Name()) + schema[i] = f.Schema() + } + + meta := parquet.New(schema...) + if err := meta.ReadFooter(r); err != nil { + return nil, err + } + pr.rows = meta.Rows() + var err error + pr.pages, err = meta.Pages() + if err != nil { + return nil, err + } + + pr.rowGroups = meta.RowGroups() + _, err = r.Seek(4, io.SeekStart) + if err != nil { + return nil, err + } + pr.meta = meta + + return pr, pr.readRowGroup() +} + +func readerIndex(i int) func(*ParquetReader) { + return func(p *ParquetReader) { + p.index = i + } +} + +// ParquetReader reads one page from a row group. +type ParquetReader struct { + fields map[string]Field + fieldNames []string + index int + cursor int64 + rows int64 + rowGroupCursor int64 + rowGroupCount int64 + pages map[string][]parquet.Page + meta *parquet.Metadata + err error + + r io.ReadSeeker + rowGroups []parquet.RowGroup +} + +type Levels struct { + Name string + Defs []uint8 + Reps []uint8 +} + +func (p *ParquetReader) Levels() []Levels { + var out []Levels + //for { + for _, name := range p.fieldNames { + f := p.fields[name] + d, r := f.Levels() + out = append(out, Levels{Name: f.Name(), Defs: d, Reps: r}) + } + // if err := p.readRowGroup(); err != nil { + // break + // } + //} + return out +} + +func (p *ParquetReader) Error() error { + return p.err +} + +func (p *ParquetReader) readRowGroup() error { + p.rowGroupCursor = 0 + + if len(p.rowGroups) == 0 { + p.rowGroupCount = 0 + return nil + } + + rg := p.rowGroups[0] + p.fields = getFields(Fields(compressionUnknown)) + p.rowGroupCount = rg.Rows + p.rowGroupCursor = 0 + for _, col := range rg.Columns() { + name := strings.Join(col.MetaData.PathInSchema, ".") + f, ok := p.fields[name] + if !ok { + return fmt.Errorf("unknown field: %s", name) + } + pages := p.pages[name] + if len(pages) <= p.index { + break + } + + pg := pages[0] + if err := f.Read(p.r, pg); err != nil { + return fmt.Errorf("unable to read field %s, err: %s", f.Name(), err) + } + p.pages[name] = p.pages[name][1:] + } + p.rowGroups = p.rowGroups[1:] + return nil +} + +func (p *ParquetReader) Rows() int64 { + return p.rows +} + +func (p *ParquetReader) Next() bool { + if p.err == nil && p.cursor >= p.rows { + return false + } + if p.rowGroupCursor >= p.rowGroupCount { + p.err = p.readRowGroup() + if p.err != nil { + return false + } + } + + p.cursor++ + p.rowGroupCursor++ + return true +} + +func (p *ParquetReader) Scan(x *Document) { + if p.err != nil { + return + } + + for _, name := range p.fieldNames { + f := p.fields[name] + f.Scan(x) + } +} + +type Int64Field struct { + vals []int64 + parquet.RequiredField + read func(r Document) int64 + write func(r *Document, vals []int64) + stats *int64stats +} + +func NewInt64Field(read func(r Document) int64, write func(r *Document, vals []int64), path []string, opts ...func(*parquet.RequiredField)) *Int64Field { + return &Int64Field{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newInt64stats(), + } +} + +func (f *Int64Field) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *Int64Field) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]int64, int(pg.N)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Int64Field) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *Int64Field) Scan(r *Document) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *Int64Field) Add(r Document) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *Int64Field) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type Int64OptionalField struct { + parquet.OptionalField + vals []int64 + read func(r Document) ([]int64, []uint8, []uint8) + write func(r *Document, vals []int64, def, rep []uint8) (int, int) + stats *int64optionalStats +} + +func NewInt64OptionalField(read func(r Document) ([]int64, []uint8, []uint8), write func(r *Document, vals []int64, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int64OptionalField { + return &Int64OptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newint64optionalStats(maxDef(types)), + } +} + +func (f *Int64OptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *Int64OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *Int64OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]int64, f.Values()-len(f.vals)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Int64OptionalField) Add(r Document) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *Int64OptionalField) Scan(r *Document) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *Int64OptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type StringOptionalField struct { + parquet.OptionalField + vals []string + read func(r Document) ([]string, []uint8, []uint8) + write func(r *Document, vals []string, def, rep []uint8) (int, int) + stats *stringOptionalStats +} + +func NewStringOptionalField(read func(r Document) ([]string, []uint8, []uint8), write func(r *Document, vals []string, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { + return &StringOptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newStringOptionalStats(maxDef(types)), + } +} + +func (f *StringOptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *StringOptionalField) Add(r Document) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *StringOptionalField) Scan(r *Document) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *StringOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *StringOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < f.Values(); j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringOptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type int64stats struct { + min int64 + max int64 +} + +func newInt64stats() *int64stats { + return &int64stats{ + min: int64(math.MaxInt64), + } +} + +func (i *int64stats) add(val int64) { + if val < i.min { + i.min = val + } + if val > i.max { + i.max = val + } +} + +func (f *int64stats) bytes(val int64) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *int64stats) NullCount() *int64 { + return nil +} + +func (f *int64stats) DistinctCount() *int64 { + return nil +} + +func (f *int64stats) Min() []byte { + return f.bytes(f.min) +} + +func (f *int64stats) Max() []byte { + return f.bytes(f.max) +} + +type int64optionalStats struct { + min int64 + max int64 + nils int64 + nonNils int64 + maxDef uint8 +} + +func newint64optionalStats(d uint8) *int64optionalStats { + return &int64optionalStats{ + min: int64(math.MaxInt64), + maxDef: d, + } +} + +func (f *int64optionalStats) add(vals []int64, defs []uint8) { + var i int + for _, def := range defs { + if def < f.maxDef { + f.nils++ + } else { + val := vals[i] + i++ + + f.nonNils++ + if val < f.min { + f.min = val + } + if val > f.max { + f.max = val + } + } + } +} + +func (f *int64optionalStats) bytes(val int64) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *int64optionalStats) NullCount() *int64 { + return &f.nils +} + +func (f *int64optionalStats) DistinctCount() *int64 { + return nil +} + +func (f *int64optionalStats) Min() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.min) +} + +func (f *int64optionalStats) Max() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.max) +} + +type stringOptionalStats struct { + vals []string + min []byte + max []byte + nils int64 + maxDef uint8 +} + +func newStringOptionalStats(d uint8) *stringOptionalStats { + return &stringOptionalStats{maxDef: d} +} + +func (s *stringOptionalStats) add(vals []string, defs []uint8) { + var i int + for _, def := range defs { + if def < s.maxDef { + s.nils++ + } else { + s.vals = append(s.vals, vals[i]) + i++ + } + } +} + +func (s *stringOptionalStats) NullCount() *int64 { + return &s.nils +} + +func (s *stringOptionalStats) DistinctCount() *int64 { + return nil +} + +func (s *stringOptionalStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringOptionalStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringOptionalStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +func pint32(i int32) *int32 { return &i } +func puint32(i uint32) *uint32 { return &i } +func pint64(i int64) *int64 { return &i } +func puint64(i uint64) *uint64 { return &i } +func pbool(b bool) *bool { return &b } +func pstring(s string) *string { return &s } +func pfloat32(f float32) *float32 { return &f } +func pfloat64(f float64) *float64 { return &f } + +// keeps track of the indices of repeated fields +// that have already been handled by a previous field +type indices []int + +func (i indices) rep(rep uint8) { + if rep > 0 { + r := int(rep) - 1 + i[r] = i[r] + 1 + for j := int(rep); j < len(i); j++ { + i[j] = 0 + } + } +} + +func maxDef(types []int) uint8 { + var out uint8 + for _, typ := range types { + if typ > 0 { + out++ + } + } + return out +} + +func Int32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t +} + +func Uint32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t + ct := sch.ConvertedType_UINT_32 + se.ConvertedType = &ct +} + +func Int64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t +} + +func Uint64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t + ct := sch.ConvertedType_UINT_64 + se.ConvertedType = &ct +} + +func Float32Type(se *sch.SchemaElement) { + t := sch.Type_FLOAT + se.Type = &t +} + +func Float64Type(se *sch.SchemaElement) { + t := sch.Type_DOUBLE + se.Type = &t +} + +func BoolType(se *sch.SchemaElement) { + t := sch.Type_BOOLEAN + se.Type = &t +} + +func StringType(se *sch.SchemaElement) { + t := sch.Type_BYTE_ARRAY + se.Type = &t +} diff --git a/internal/dremel/testcases/person/generated.go b/internal/dremel/testcases/person/generated.go new file mode 100644 index 0000000..90eebbe --- /dev/null +++ b/internal/dremel/testcases/person/generated.go @@ -0,0 +1,1000 @@ +package person + +// Code generated by github.com/parsyl/parquet. DO NOT EDIT. + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "strings" + + "github.com/parsyl/parquet" + sch "github.com/parsyl/parquet/schema" + + "math" + "sort" +) + +type compression int + +const ( + compressionUncompressed compression = 0 + compressionSnappy compression = 1 + compressionGzip compression = 2 + compressionUnknown compression = -1 +) + +// ParquetWriter reprents a row group +type ParquetWriter struct { + fields []Field + + len int + + // child points to the next page + child *ParquetWriter + + // max is the number of Record items that can get written before + // a new set of column chunks is written + max int + + meta *parquet.Metadata + w io.Writer + compression compression +} + +func Fields(compression compression) []Field { + return []Field{ + NewStringField(readName, writeName, []string{"name"}, fieldCompression(compression)), + NewStringOptionalField(readHobbyName, writeHobbyName, []string{"hobby", "name"}, []int{1, 0}, optionalFieldCompression(compression)), + NewInt32OptionalField(readHobbyDifficulty, writeHobbyDifficulty, []string{"hobby", "difficulty"}, []int{1, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + NewStringOptionalField(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, optionalFieldCompression(compression)), + } +} + +func readName(x Person) string { + return x.Name +} + +func writeName(x *Person, vals []string) { + x.Name = vals[0] +} + +func readHobbyName(x Person) ([]string, []uint8, []uint8) { + switch { + case x.Hobby == nil: + return nil, []uint8{0}, nil + default: + return []string{x.Hobby.Name}, []uint8{1}, nil + } +} + +func writeHobbyName(x *Person, vals []string, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 1: + x.Hobby = &Hobby{Name: vals[0]} + return 1, 1 + } + + return 0, 1 +} + +func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { + switch { + case x.Hobby == nil: + return nil, []uint8{0}, nil + case x.Hobby.Difficulty == nil: + return nil, []uint8{1}, nil + default: + return []int32{*x.Hobby.Difficulty}, []uint8{2}, nil + } +} + +func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { + def := defs[0] + switch def { + case 2: + x.Hobby.Difficulty = pint32(vals[0]) + return 1, 1 + } + + return 0, 1 +} + +func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Name) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0: + x.Hobby.Skills = []Skill{{Name: vals[nVals]}} + case 1: + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if x.Hobby == nil { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + if len(x.Hobby.Skills) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Hobby.Skills { + if i0 == 1 { + lastRep = 1 + } + defs = append(defs, 2) + reps = append(reps, lastRep) + vals = append(vals, x0.Difficulty) + } + } + } + + return vals, defs, reps +} + +func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0, 1: + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] + } + nVals++ + } + } + + return nVals, nLevels +} + +func fieldCompression(c compression) func(*parquet.RequiredField) { + switch c { + case compressionUncompressed: + return parquet.RequiredFieldUncompressed + case compressionSnappy: + return parquet.RequiredFieldSnappy + case compressionGzip: + return parquet.RequiredFieldGzip + default: + return parquet.RequiredFieldUncompressed + } +} + +func optionalFieldCompression(c compression) func(*parquet.OptionalField) { + switch c { + case compressionUncompressed: + return parquet.OptionalFieldUncompressed + case compressionSnappy: + return parquet.OptionalFieldSnappy + case compressionGzip: + return parquet.OptionalFieldGzip + default: + return parquet.OptionalFieldUncompressed + } +} + +func NewParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + return newParquetWriter(w, append(opts, begin)...) +} + +func newParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + p := &ParquetWriter{ + max: 1000, + w: w, + compression: compressionSnappy, + } + + for _, opt := range opts { + if err := opt(p); err != nil { + return nil, err + } + } + + p.fields = Fields(p.compression) + if p.meta == nil { + ff := Fields(p.compression) + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + schema[i] = f.Schema() + } + p.meta = parquet.New(schema...) + } + + return p, nil +} + +// MaxPageSize is the maximum number of rows in each row groups' page. +func MaxPageSize(m int) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.max = m + return nil + } +} + +func begin(p *ParquetWriter) error { + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func withMeta(m *parquet.Metadata) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.meta = m + return nil + } +} + +func Uncompressed(p *ParquetWriter) error { + p.compression = compressionUncompressed + return nil +} + +func Snappy(p *ParquetWriter) error { + p.compression = compressionSnappy + return nil +} + +func Gzip(p *ParquetWriter) error { + p.compression = compressionGzip + return nil +} + +func withCompression(c compression) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.compression = c + return nil + } +} + +func (p *ParquetWriter) Write() error { + for i, f := range p.fields { + if err := f.Write(p.w, p.meta); err != nil { + return err + } + + for child := p.child; child != nil; child = child.child { + if err := child.fields[i].Write(p.w, p.meta); err != nil { + return err + } + } + } + + p.fields = Fields(p.compression) + p.child = nil + p.len = 0 + + schema := make([]parquet.Field, len(p.fields)) + for i, f := range p.fields { + schema[i] = f.Schema() + } + p.meta.StartRowGroup(schema...) + return nil +} + +func (p *ParquetWriter) Close() error { + if err := p.meta.Footer(p.w); err != nil { + return err + } + + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func (p *ParquetWriter) Add(rec Person) { + if p.len == p.max { + if p.child == nil { + // an error can't happen here + p.child, _ = newParquetWriter(p.w, MaxPageSize(p.max), withMeta(p.meta), withCompression(p.compression)) + } + + p.child.Add(rec) + return + } + + p.meta.NextDoc() + for _, f := range p.fields { + f.Add(rec) + } + + p.len++ +} + +type Field interface { + Add(r Person) + Write(w io.Writer, meta *parquet.Metadata) error + Schema() parquet.Field + Scan(r *Person) + Read(r io.ReadSeeker, pg parquet.Page) error + Name() string + Levels() ([]uint8, []uint8) +} + +func getFields(ff []Field) map[string]Field { + m := make(map[string]Field, len(ff)) + for _, f := range ff { + m[f.Name()] = f + } + return m +} + +func NewParquetReader(r io.ReadSeeker, opts ...func(*ParquetReader)) (*ParquetReader, error) { + ff := Fields(compressionUnknown) + pr := &ParquetReader{ + r: r, + } + + for _, opt := range opts { + opt(pr) + } + + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + pr.fieldNames = append(pr.fieldNames, f.Name()) + schema[i] = f.Schema() + } + + meta := parquet.New(schema...) + if err := meta.ReadFooter(r); err != nil { + return nil, err + } + pr.rows = meta.Rows() + var err error + pr.pages, err = meta.Pages() + if err != nil { + return nil, err + } + + pr.rowGroups = meta.RowGroups() + _, err = r.Seek(4, io.SeekStart) + if err != nil { + return nil, err + } + pr.meta = meta + + return pr, pr.readRowGroup() +} + +func readerIndex(i int) func(*ParquetReader) { + return func(p *ParquetReader) { + p.index = i + } +} + +// ParquetReader reads one page from a row group. +type ParquetReader struct { + fields map[string]Field + fieldNames []string + index int + cursor int64 + rows int64 + rowGroupCursor int64 + rowGroupCount int64 + pages map[string][]parquet.Page + meta *parquet.Metadata + err error + + r io.ReadSeeker + rowGroups []parquet.RowGroup +} + +type Levels struct { + Name string + Defs []uint8 + Reps []uint8 +} + +func (p *ParquetReader) Levels() []Levels { + var out []Levels + //for { + for _, name := range p.fieldNames { + f := p.fields[name] + d, r := f.Levels() + out = append(out, Levels{Name: f.Name(), Defs: d, Reps: r}) + } + // if err := p.readRowGroup(); err != nil { + // break + // } + //} + return out +} + +func (p *ParquetReader) Error() error { + return p.err +} + +func (p *ParquetReader) readRowGroup() error { + p.rowGroupCursor = 0 + + if len(p.rowGroups) == 0 { + p.rowGroupCount = 0 + return nil + } + + rg := p.rowGroups[0] + p.fields = getFields(Fields(compressionUnknown)) + p.rowGroupCount = rg.Rows + p.rowGroupCursor = 0 + for _, col := range rg.Columns() { + name := strings.Join(col.MetaData.PathInSchema, ".") + f, ok := p.fields[name] + if !ok { + return fmt.Errorf("unknown field: %s", name) + } + pages := p.pages[name] + if len(pages) <= p.index { + break + } + + pg := pages[0] + if err := f.Read(p.r, pg); err != nil { + return fmt.Errorf("unable to read field %s, err: %s", f.Name(), err) + } + p.pages[name] = p.pages[name][1:] + } + p.rowGroups = p.rowGroups[1:] + return nil +} + +func (p *ParquetReader) Rows() int64 { + return p.rows +} + +func (p *ParquetReader) Next() bool { + if p.err == nil && p.cursor >= p.rows { + return false + } + if p.rowGroupCursor >= p.rowGroupCount { + p.err = p.readRowGroup() + if p.err != nil { + return false + } + } + + p.cursor++ + p.rowGroupCursor++ + return true +} + +func (p *ParquetReader) Scan(x *Person) { + if p.err != nil { + return + } + + for _, name := range p.fieldNames { + f := p.fields[name] + f.Scan(x) + } +} + +type StringField struct { + parquet.RequiredField + vals []string + read func(r Person) string + write func(r *Person, vals []string) + stats *stringStats +} + +func NewStringField(read func(r Person) string, write func(r *Person, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { + return &StringField{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newStringStats(), + } +} + +func (f *StringField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < pg.N; j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringField) Scan(r *Person) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *StringField) Add(r Person) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *StringField) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type StringOptionalField struct { + parquet.OptionalField + vals []string + read func(r Person) ([]string, []uint8, []uint8) + write func(r *Person, vals []string, def, rep []uint8) (int, int) + stats *stringOptionalStats +} + +func NewStringOptionalField(read func(r Person) ([]string, []uint8, []uint8), write func(r *Person, vals []string, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { + return &StringOptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newStringOptionalStats(maxDef(types)), + } +} + +func (f *StringOptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *StringOptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *StringOptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *StringOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *StringOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < f.Values(); j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringOptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type Int32OptionalField struct { + parquet.OptionalField + vals []int32 + read func(r Person) ([]int32, []uint8, []uint8) + write func(r *Person, vals []int32, def, rep []uint8) (int, int) + stats *int32optionalStats +} + +func NewInt32OptionalField(read func(r Person) ([]int32, []uint8, []uint8), write func(r *Person, vals []int32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int32OptionalField { + return &Int32OptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newint32optionalStats(maxDef(types)), + } +} + +func (f *Int32OptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int32Type, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *Int32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *Int32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]int32, f.Values()-len(f.vals)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Int32OptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *Int32OptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *Int32OptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type stringStats struct { + vals []string + min []byte + max []byte +} + +func newStringStats() *stringStats { + return &stringStats{} +} + +func (s *stringStats) add(val string) { + s.vals = append(s.vals, val) +} + +func (s *stringStats) NullCount() *int64 { + return nil +} + +func (s *stringStats) DistinctCount() *int64 { + return nil +} + +func (s *stringStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +type stringOptionalStats struct { + vals []string + min []byte + max []byte + nils int64 + maxDef uint8 +} + +func newStringOptionalStats(d uint8) *stringOptionalStats { + return &stringOptionalStats{maxDef: d} +} + +func (s *stringOptionalStats) add(vals []string, defs []uint8) { + var i int + for _, def := range defs { + if def < s.maxDef { + s.nils++ + } else { + s.vals = append(s.vals, vals[i]) + i++ + } + } +} + +func (s *stringOptionalStats) NullCount() *int64 { + return &s.nils +} + +func (s *stringOptionalStats) DistinctCount() *int64 { + return nil +} + +func (s *stringOptionalStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringOptionalStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringOptionalStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +type int32optionalStats struct { + min int32 + max int32 + nils int64 + nonNils int64 + maxDef uint8 +} + +func newint32optionalStats(d uint8) *int32optionalStats { + return &int32optionalStats{ + min: int32(math.MaxInt32), + maxDef: d, + } +} + +func (f *int32optionalStats) add(vals []int32, defs []uint8) { + var i int + for _, def := range defs { + if def < f.maxDef { + f.nils++ + } else { + val := vals[i] + i++ + + f.nonNils++ + if val < f.min { + f.min = val + } + if val > f.max { + f.max = val + } + } + } +} + +func (f *int32optionalStats) bytes(val int32) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *int32optionalStats) NullCount() *int64 { + return &f.nils +} + +func (f *int32optionalStats) DistinctCount() *int64 { + return nil +} + +func (f *int32optionalStats) Min() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.min) +} + +func (f *int32optionalStats) Max() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.max) +} + +func pint32(i int32) *int32 { return &i } +func puint32(i uint32) *uint32 { return &i } +func pint64(i int64) *int64 { return &i } +func puint64(i uint64) *uint64 { return &i } +func pbool(b bool) *bool { return &b } +func pstring(s string) *string { return &s } +func pfloat32(f float32) *float32 { return &f } +func pfloat64(f float64) *float64 { return &f } + +// keeps track of the indices of repeated fields +// that have already been handled by a previous field +type indices []int + +func (i indices) rep(rep uint8) { + if rep > 0 { + r := int(rep) - 1 + i[r] = i[r] + 1 + for j := int(rep); j < len(i); j++ { + i[j] = 0 + } + } +} + +func maxDef(types []int) uint8 { + var out uint8 + for _, typ := range types { + if typ > 0 { + out++ + } + } + return out +} + +func Int32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t +} + +func Uint32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t + ct := sch.ConvertedType_UINT_32 + se.ConvertedType = &ct +} + +func Int64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t +} + +func Uint64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t + ct := sch.ConvertedType_UINT_64 + se.ConvertedType = &ct +} + +func Float32Type(se *sch.SchemaElement) { + t := sch.Type_FLOAT + se.Type = &t +} + +func Float64Type(se *sch.SchemaElement) { + t := sch.Type_DOUBLE + se.Type = &t +} + +func BoolType(se *sch.SchemaElement) { + t := sch.Type_BOOLEAN + se.Type = &t +} + +func StringType(se *sch.SchemaElement) { + t := sch.Type_BYTE_ARRAY + se.Type = &t +} diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 4d388d9..b67ce7a 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -427,7 +427,7 @@ func (f Field) Primitive() bool { func (f Field) FieldType() string { var op string - if f.RepetitionType == Optional || f.RepetitionType == Repeated { + if f.Optional() || f.Repeated() { op = "Optional" } @@ -442,12 +442,12 @@ func (f Field) ParquetType() string { func (f Field) Category() string { var op string - if f.RepetitionType == Optional || f.RepetitionType == Repeated { + if f.Optional() || f.Repeated() { op = "Optional" } ft := primitiveTypes[f.Type] - return fmt.Sprintf(ft.category, op, "") + return fmt.Sprintf(ft.category, op) } func (f Field) TypeName() string { diff --git a/internal/gen/gen.go b/internal/gen/gen.go index f6e56e2..907a503 100644 --- a/internal/gen/gen.go +++ b/internal/gen/gen.go @@ -176,15 +176,21 @@ func getFieldType(se *sch.SchemaElement) (string, error) { } func dedupe(flds []fields.Field) []fields.Field { + fmt.Printf("deduping before: %+v\n", flds) seen := map[string]bool{} out := make([]fields.Field, 0, len(flds)) for _, f := range flds { - _, ok := seen[f.Type] + _, ok := seen[f.Category()] if !ok { out = append(out, f) - seen[f.Type] = true + seen[f.Category()] = true } } + fmt.Println("deduping", out) + + for _, f := range out { + fmt.Println("cat", f.Category()) + } return out } From ef1f348d435626de8ef83b75adabb5d40bd98d09 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Sun, 13 Jun 2021 09:53:54 -0600 Subject: [PATCH 14/25] dremel tests passing --- internal/dremel/testcases/doc/generated.go | 8 +++---- internal/dremel/testcases/person/generated.go | 2 +- internal/dremel/write_test.go | 22 ++++++------------- internal/fields/fields.go | 10 ++++----- internal/fields/fields_test.go | 4 ++-- 5 files changed, 17 insertions(+), 29 deletions(-) diff --git a/internal/dremel/testcases/doc/generated.go b/internal/dremel/testcases/doc/generated.go index b0b58f7..7764dd6 100644 --- a/internal/dremel/testcases/doc/generated.go +++ b/internal/dremel/testcases/doc/generated.go @@ -164,9 +164,7 @@ func writeLinksForward(x *Document, vals []int64, defs, reps []uint8) (int, int) switch def { case 2: switch rep { - case 0: - x.Links.Forward = []int64{vals[nVals]} - case 1: + default: x.Links.Forward = append(x.Links.Forward, vals[nVals]) } nVals++ @@ -295,7 +293,7 @@ func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) switch def { case 3: switch rep { - case 0, 2: + default: x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) } nVals++ @@ -349,7 +347,7 @@ func writeNamesURL(x *Document, vals []string, defs, reps []uint8) (int, int) { switch def { case 2: switch rep { - case 0, 1: + default: x.Names[ind[0]].URL = pstring(vals[nVals]) } nVals++ diff --git a/internal/dremel/testcases/person/generated.go b/internal/dremel/testcases/person/generated.go index 90eebbe..09a7597 100644 --- a/internal/dremel/testcases/person/generated.go +++ b/internal/dremel/testcases/person/generated.go @@ -203,7 +203,7 @@ func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (i switch def { case 2: switch rep { - case 0, 1: + default: x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] } nVals++ diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index 8b2d6c2..368249a 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -470,9 +470,7 @@ func TestWrite(t *testing.T) { switch def { case 2: switch rep { - case 0: - x.Link.Forward = []string{vals[nVals]} - case 1: + default: x.Link.Forward = append(x.Link.Forward, vals[nVals]) } nVals++ @@ -526,11 +524,7 @@ func TestWrite(t *testing.T) { }`, }, { - name: "writeNamesLanguagesCountry", - // fields: []fields.Field{ - // {Type: "Document", FieldNames: []string{"Names", "Languages", "Code"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Required}}, - // {Type: "Document", FieldNames: []string{"Names", "Languages", "Country"}, FieldTypes: []string{"Name", "Language", "string"}, RepetitionTypes: []fields.RepetitionType{fields.Repeated, fields.Repeated, fields.Optional}}, - // }, + name: "writeNamesLanguagesCountry", structName: "Document", field: fields.Field{ Name: "Names", Type: "Name", RepetitionType: fields.Repeated, Children: []fields.Field{ @@ -557,7 +551,7 @@ func TestWrite(t *testing.T) { switch def { case 3: switch rep { - case 0, 2: + default: x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) } nVals++ @@ -628,7 +622,7 @@ func TestWrite(t *testing.T) { case 1: switch rep { case 0: - x.LuckyNumbers = []int64{vals[nVals]} + x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals]) case 1: x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals]) } @@ -640,7 +634,7 @@ func TestWrite(t *testing.T) { }`, }, { - name: "repeated field not handled by previous repeated field", + name: "repeated field handled by previous repeated field", structName: "Document", field: fields.Field{ Name: "Link", Type: "Link", RepetitionType: fields.Optional, Children: []fields.Field{ @@ -665,9 +659,7 @@ func TestWrite(t *testing.T) { switch def { case 2: switch rep { - case 0: - x.Link.Forward = []string{vals[nVals]} - case 1: + default: x.Link.Forward = append(x.Link.Forward, vals[nVals]) } nVals++ @@ -705,7 +697,7 @@ func TestWrite(t *testing.T) { switch def { case 2: switch rep { - case 0, 1: + default: x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] } nVals++ diff --git a/internal/fields/fields.go b/internal/fields/fields.go index b67ce7a..f93c0e5 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -192,8 +192,8 @@ type RepCase struct { func (f Field) RepCases() []RepCase { mr := int(f.MaxRep()) - if f.RepetitionType != Repeated && f.Parent != nil && f.Parent.RepetitionType == Repeated && f.Parent.Defined { - return []RepCase{{Case: fmt.Sprintf("case 0, %d:", mr)}} + if f.Repeated() && f.Parent != nil && f.Parent.Defined { + return []RepCase{{Case: "default:"}} } var out []RepCase @@ -375,12 +375,10 @@ func (f Field) Init(def, rep int) string { if fld.Primitive() { if rep == 0 && fld.Parent.RepetitionType == Repeated { right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{vals[nVals]}}%%s", fld.Name, fld.Type)) - } else if (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { - right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.Type)) + } else if reps == rep || (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, vals[nVals])%%s", left)) } else if rep == 0 { right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{vals[nVals]}%%s", fld.Name, fld.Type)) - } else if reps == rep { - right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, vals[nVals])%%s", left)) } else { right = fmt.Sprintf(right, fmt.Sprintf("[%s: []%s{vals[nVals]}]%%s", fld.Name, fld.Type)) } diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 446f553..fa4ea6c 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -479,7 +479,7 @@ func TestInit(t *testing.T) { }, rep: 0, def: 2, - expected: "x.Link.Forward = []string{vals[nVals]}", + expected: "x.Link.Forward = append(x.Link.Forward, vals[nVals])", }, { fields: []fields.Field{ @@ -487,7 +487,7 @@ func TestInit(t *testing.T) { }, def: 1, rep: 0, - expected: "x.LuckyNumbers = []int64{vals[nVals]}", + expected: "x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals])", }, { fields: []fields.Field{ From 39f8b1963cd56b13888a017447576f113bd1f547 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Wed, 16 Jun 2021 08:44:19 -0600 Subject: [PATCH 15/25] got TestRepetition passing the generated code, though, breaks TestDremel --- internal/dremel/dremel_test.go | 79 ++ internal/dremel/read_repeated.go | 2 +- internal/dremel/testcases/person/generated.go | 10 +- .../dremel/testcases/repetition/generated.go | 890 ++++++++++++++++++ .../dremel/testcases/repetition/repetition.go | 19 + internal/dremel/write_repeated.go | 12 +- internal/dremel/write_test.go | 68 +- internal/fields/fields.go | 99 +- internal/fields/fields_test.go | 223 +++++ 9 files changed, 1353 insertions(+), 49 deletions(-) create mode 100644 internal/dremel/testcases/repetition/generated.go create mode 100644 internal/dremel/testcases/repetition/repetition.go diff --git a/internal/dremel/dremel_test.go b/internal/dremel/dremel_test.go index 7048c0e..9fd265b 100644 --- a/internal/dremel/dremel_test.go +++ b/internal/dremel/dremel_test.go @@ -7,6 +7,7 @@ import ( "github.com/parsyl/parquet/internal/dremel/testcases/doc" "github.com/parsyl/parquet/internal/dremel/testcases/person" + "github.com/parsyl/parquet/internal/dremel/testcases/repetition" "github.com/stretchr/testify/assert" ) @@ -176,3 +177,81 @@ func pstring(s string) *string { func pint32(i int32) *int32 { return &i } + +var ( + repetitionDocs = []repetition.Document{ + { + Links: []repetition.Link{ + { + Backward: []repetition.Language{{Codes: []string{"a", "b"}}}, + Forward: []repetition.Language{{Codes: []string{"aa", "bbb"}}}, + }, + { + Backward: nil, + Forward: []repetition.Language{{Codes: []string{"c", "d"}}}, + }, + { + Backward: []repetition.Language{{Countries: []string{"e", "f"}}}, + Forward: nil, + }, + { + Backward: nil, + Forward: []repetition.Language{{Countries: []string{"g", "h"}}}, + }, + { + Backward: []repetition.Language{{Countries: []string{"i", "j"}}}, + Forward: []repetition.Language{{Codes: []string{"k", "l"}}}, + }, + { + Backward: []repetition.Language{ + { + Codes: []string{"m", "n"}, + Countries: []string{"o", "p"}, + }, + { + Codes: []string{"q", "r"}, + Countries: []string{"s", "t"}, + }, + }, + Forward: []repetition.Language{{Countries: []string{"u", "v"}}}, + }, + { + Backward: []repetition.Language{{Codes: []string{"w", "x"}}}, + Forward: []repetition.Language{{Countries: []string{"y", "z"}}}, + }, + }, + }, + } +) + +func TestRepetition(t *testing.T) { + var buf bytes.Buffer + pw, err := repetition.NewParquetWriter(&buf) + if err != nil { + log.Fatal(err) + } + + for _, doc := range repetitionDocs { + pw.Add(doc) + } + + if err := pw.Write(); err != nil { + log.Fatal(err) + } + + pw.Close() + + pr, err := repetition.NewParquetReader(bytes.NewReader(buf.Bytes())) + if err != nil { + log.Fatal(err) + } + + var out []repetition.Document + for pr.Next() { + var d repetition.Document + pr.Scan(&d) + out = append(out, d) + } + + assert.Equal(t, repetitionDocs, out) +} diff --git a/internal/dremel/read_repeated.go b/internal/dremel/read_repeated.go index c7aadcc..33e1ecd 100644 --- a/internal/dremel/read_repeated.go +++ b/internal/dremel/read_repeated.go @@ -36,7 +36,7 @@ var ( reps = append(reps, lastRep) } else { for i{{.Rep}}, x{{.Rep}} := range {{.Var}}.{{.Field}} { - if i{{.Rep}} == 1 { + if i{{.Rep}} >= 1 { lastRep = {{inc .Rep}} } %s diff --git a/internal/dremel/testcases/person/generated.go b/internal/dremel/testcases/person/generated.go index 09a7597..c9a85f3 100644 --- a/internal/dremel/testcases/person/generated.go +++ b/internal/dremel/testcases/person/generated.go @@ -117,7 +117,7 @@ func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Hobby.Skills { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -147,10 +147,6 @@ func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, in switch def { case 2: switch rep { - case 0: - x.Hobby.Skills = []Skill{{Name: vals[nVals]}} - case 1: - x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) } nVals++ } @@ -173,7 +169,7 @@ func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Hobby.Skills { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -203,8 +199,6 @@ func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (i switch def { case 2: switch rep { - default: - x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] } nVals++ } diff --git a/internal/dremel/testcases/repetition/generated.go b/internal/dremel/testcases/repetition/generated.go new file mode 100644 index 0000000..a6f9961 --- /dev/null +++ b/internal/dremel/testcases/repetition/generated.go @@ -0,0 +1,890 @@ +package repetition + +// Code generated by github.com/parsyl/parquet. DO NOT EDIT. + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "strings" + + "github.com/parsyl/parquet" + sch "github.com/parsyl/parquet/schema" + + "sort" +) + +type compression int + +const ( + compressionUncompressed compression = 0 + compressionSnappy compression = 1 + compressionGzip compression = 2 + compressionUnknown compression = -1 +) + +// ParquetWriter reprents a row group +type ParquetWriter struct { + fields []Field + + len int + + // child points to the next page + child *ParquetWriter + + // max is the number of Record items that can get written before + // a new set of column chunks is written + max int + + meta *parquet.Metadata + w io.Writer + compression compression +} + +func Fields(compression compression) []Field { + return []Field{ + NewStringOptionalField(readLinksBackwardCodes, writeLinksBackwardCodes, []string{"links", "backward", "code"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksBackwardCountries, writeLinksBackwardCountries, []string{"links", "backward", "countries"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksForwardCodes, writeLinksForwardCodes, []string{"links", "forward", "code"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksForwardCountries, writeLinksForwardCountries, []string{"links", "forward", "countries"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + } +} + +func readLinksBackwardCodes(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Backward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Backward { + if i1 >= 1 { + lastRep = 2 + } + if len(x1.Codes) == 0 { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + for i2, x2 := range x1.Codes { + if i2 >= 1 { + lastRep = 3 + } + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, x2) + } + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksBackwardCodes(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + switch rep { + case 0: + x.Links = append(x.Links, Link{}) + case 1: + x.Links = append(x.Links, Link{}) + } + case 2: + switch rep { + case 0: + x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{}) + case 1: + x.Links = append(x.Links, Link{Backward: []Language{{}}}) + case 2: + x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{}) + } + case 3: + switch rep { + case 0: + x.Links = []Link{{Backward: []Language{{Codes: []string{vals[nVals]}}}}} + case 1: + x.Links = append(x.Links, Link{Backward: []Language{{Codes: []string{vals[nVals]}}}}) + case 2: + x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{Codes: []string{vals[nVals]}}) + case 3: + x.Links[ind[0]].Backward[ind[1]].Codes = append(x.Links[ind[0]].Backward[ind[1]].Codes, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksBackwardCountries(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Backward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Backward { + if i1 >= 1 { + lastRep = 2 + } + if len(x1.Countries) == 0 { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + for i2, x2 := range x1.Countries { + if i2 >= 1 { + lastRep = 3 + } + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, x2) + } + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksBackwardCountries(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + switch rep { + case 0, 1, 2, 3: + x.Links[ind[0]].Backward[ind[1]].Countries = append(x.Links[ind[0]].Backward[ind[1]].Countries, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksForwardCodes(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Forward { + if i1 >= 1 { + lastRep = 2 + } + if len(x1.Codes) == 0 { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + for i2, x2 := range x1.Codes { + if i2 >= 1 { + lastRep = 3 + } + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, x2) + } + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksForwardCodes(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0, 1, 2: + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{}) + } + case 3: + switch rep { + case 0, 1, 2: + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}}) + case 3: + x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func readLinksForwardCountries(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Forward { + if i1 >= 1 { + lastRep = 2 + } + if len(x1.Countries) == 0 { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + for i2, x2 := range x1.Countries { + if i2 >= 1 { + lastRep = 3 + } + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, x2) + } + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksForwardCountries(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + switch rep { + case 0, 1, 2, 3: + x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +} + +func fieldCompression(c compression) func(*parquet.RequiredField) { + switch c { + case compressionUncompressed: + return parquet.RequiredFieldUncompressed + case compressionSnappy: + return parquet.RequiredFieldSnappy + case compressionGzip: + return parquet.RequiredFieldGzip + default: + return parquet.RequiredFieldUncompressed + } +} + +func optionalFieldCompression(c compression) func(*parquet.OptionalField) { + switch c { + case compressionUncompressed: + return parquet.OptionalFieldUncompressed + case compressionSnappy: + return parquet.OptionalFieldSnappy + case compressionGzip: + return parquet.OptionalFieldGzip + default: + return parquet.OptionalFieldUncompressed + } +} + +func NewParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + return newParquetWriter(w, append(opts, begin)...) +} + +func newParquetWriter(w io.Writer, opts ...func(*ParquetWriter) error) (*ParquetWriter, error) { + p := &ParquetWriter{ + max: 1000, + w: w, + compression: compressionSnappy, + } + + for _, opt := range opts { + if err := opt(p); err != nil { + return nil, err + } + } + + p.fields = Fields(p.compression) + if p.meta == nil { + ff := Fields(p.compression) + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + schema[i] = f.Schema() + } + p.meta = parquet.New(schema...) + } + + return p, nil +} + +// MaxPageSize is the maximum number of rows in each row groups' page. +func MaxPageSize(m int) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.max = m + return nil + } +} + +func begin(p *ParquetWriter) error { + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func withMeta(m *parquet.Metadata) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.meta = m + return nil + } +} + +func Uncompressed(p *ParquetWriter) error { + p.compression = compressionUncompressed + return nil +} + +func Snappy(p *ParquetWriter) error { + p.compression = compressionSnappy + return nil +} + +func Gzip(p *ParquetWriter) error { + p.compression = compressionGzip + return nil +} + +func withCompression(c compression) func(*ParquetWriter) error { + return func(p *ParquetWriter) error { + p.compression = c + return nil + } +} + +func (p *ParquetWriter) Write() error { + for i, f := range p.fields { + if err := f.Write(p.w, p.meta); err != nil { + return err + } + + for child := p.child; child != nil; child = child.child { + if err := child.fields[i].Write(p.w, p.meta); err != nil { + return err + } + } + } + + p.fields = Fields(p.compression) + p.child = nil + p.len = 0 + + schema := make([]parquet.Field, len(p.fields)) + for i, f := range p.fields { + schema[i] = f.Schema() + } + p.meta.StartRowGroup(schema...) + return nil +} + +func (p *ParquetWriter) Close() error { + if err := p.meta.Footer(p.w); err != nil { + return err + } + + _, err := p.w.Write([]byte("PAR1")) + return err +} + +func (p *ParquetWriter) Add(rec Document) { + if p.len == p.max { + if p.child == nil { + // an error can't happen here + p.child, _ = newParquetWriter(p.w, MaxPageSize(p.max), withMeta(p.meta), withCompression(p.compression)) + } + + p.child.Add(rec) + return + } + + p.meta.NextDoc() + for _, f := range p.fields { + f.Add(rec) + } + + p.len++ +} + +type Field interface { + Add(r Document) + Write(w io.Writer, meta *parquet.Metadata) error + Schema() parquet.Field + Scan(r *Document) + Read(r io.ReadSeeker, pg parquet.Page) error + Name() string + Levels() ([]uint8, []uint8) +} + +func getFields(ff []Field) map[string]Field { + m := make(map[string]Field, len(ff)) + for _, f := range ff { + m[f.Name()] = f + } + return m +} + +func NewParquetReader(r io.ReadSeeker, opts ...func(*ParquetReader)) (*ParquetReader, error) { + ff := Fields(compressionUnknown) + pr := &ParquetReader{ + r: r, + } + + for _, opt := range opts { + opt(pr) + } + + schema := make([]parquet.Field, len(ff)) + for i, f := range ff { + pr.fieldNames = append(pr.fieldNames, f.Name()) + schema[i] = f.Schema() + } + + meta := parquet.New(schema...) + if err := meta.ReadFooter(r); err != nil { + return nil, err + } + pr.rows = meta.Rows() + var err error + pr.pages, err = meta.Pages() + if err != nil { + return nil, err + } + + pr.rowGroups = meta.RowGroups() + _, err = r.Seek(4, io.SeekStart) + if err != nil { + return nil, err + } + pr.meta = meta + + return pr, pr.readRowGroup() +} + +func readerIndex(i int) func(*ParquetReader) { + return func(p *ParquetReader) { + p.index = i + } +} + +// ParquetReader reads one page from a row group. +type ParquetReader struct { + fields map[string]Field + fieldNames []string + index int + cursor int64 + rows int64 + rowGroupCursor int64 + rowGroupCount int64 + pages map[string][]parquet.Page + meta *parquet.Metadata + err error + + r io.ReadSeeker + rowGroups []parquet.RowGroup +} + +type Levels struct { + Name string + Defs []uint8 + Reps []uint8 +} + +func (p *ParquetReader) Levels() []Levels { + var out []Levels + //for { + for _, name := range p.fieldNames { + f := p.fields[name] + d, r := f.Levels() + out = append(out, Levels{Name: f.Name(), Defs: d, Reps: r}) + } + // if err := p.readRowGroup(); err != nil { + // break + // } + //} + return out +} + +func (p *ParquetReader) Error() error { + return p.err +} + +func (p *ParquetReader) readRowGroup() error { + p.rowGroupCursor = 0 + + if len(p.rowGroups) == 0 { + p.rowGroupCount = 0 + return nil + } + + rg := p.rowGroups[0] + p.fields = getFields(Fields(compressionUnknown)) + p.rowGroupCount = rg.Rows + p.rowGroupCursor = 0 + for _, col := range rg.Columns() { + name := strings.Join(col.MetaData.PathInSchema, ".") + f, ok := p.fields[name] + if !ok { + return fmt.Errorf("unknown field: %s", name) + } + pages := p.pages[name] + if len(pages) <= p.index { + break + } + + pg := pages[0] + if err := f.Read(p.r, pg); err != nil { + return fmt.Errorf("unable to read field %s, err: %s", f.Name(), err) + } + p.pages[name] = p.pages[name][1:] + } + p.rowGroups = p.rowGroups[1:] + return nil +} + +func (p *ParquetReader) Rows() int64 { + return p.rows +} + +func (p *ParquetReader) Next() bool { + if p.err == nil && p.cursor >= p.rows { + return false + } + if p.rowGroupCursor >= p.rowGroupCount { + p.err = p.readRowGroup() + if p.err != nil { + return false + } + } + + p.cursor++ + p.rowGroupCursor++ + return true +} + +func (p *ParquetReader) Scan(x *Document) { + if p.err != nil { + return + } + + for _, name := range p.fieldNames { + f := p.fields[name] + f.Scan(x) + } +} + +type StringOptionalField struct { + parquet.OptionalField + vals []string + read func(r Document) ([]string, []uint8, []uint8) + write func(r *Document, vals []string, def, rep []uint8) (int, int) + stats *stringOptionalStats +} + +func NewStringOptionalField(read func(r Document) ([]string, []uint8, []uint8), write func(r *Document, vals []string, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *StringOptionalField { + return &StringOptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newStringOptionalStats(maxDef(types)), + } +} + +func (f *StringOptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *StringOptionalField) Add(r Document) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *StringOptionalField) Scan(r *Document) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *StringOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { + return err + } + buf.Write([]byte(s)) + } + + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *StringOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + for j := 0; j < f.Values(); j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { + return err + } + + f.vals = append(f.vals, string(s)) + } + return nil +} + +func (f *StringOptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + +type stringOptionalStats struct { + vals []string + min []byte + max []byte + nils int64 + maxDef uint8 +} + +func newStringOptionalStats(d uint8) *stringOptionalStats { + return &stringOptionalStats{maxDef: d} +} + +func (s *stringOptionalStats) add(vals []string, defs []uint8) { + var i int + for _, def := range defs { + if def < s.maxDef { + s.nils++ + } else { + s.vals = append(s.vals, vals[i]) + i++ + } + } +} + +func (s *stringOptionalStats) NullCount() *int64 { + return &s.nils +} + +func (s *stringOptionalStats) DistinctCount() *int64 { + return nil +} + +func (s *stringOptionalStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringOptionalStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringOptionalStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +func pint32(i int32) *int32 { return &i } +func puint32(i uint32) *uint32 { return &i } +func pint64(i int64) *int64 { return &i } +func puint64(i uint64) *uint64 { return &i } +func pbool(b bool) *bool { return &b } +func pstring(s string) *string { return &s } +func pfloat32(f float32) *float32 { return &f } +func pfloat64(f float64) *float64 { return &f } + +// keeps track of the indices of repeated fields +// that have already been handled by a previous field +type indices []int + +func (i indices) rep(rep uint8) { + if rep > 0 { + r := int(rep) - 1 + i[r] = i[r] + 1 + for j := int(rep); j < len(i); j++ { + i[j] = 0 + } + } +} + +func maxDef(types []int) uint8 { + var out uint8 + for _, typ := range types { + if typ > 0 { + out++ + } + } + return out +} + +func Int32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t +} + +func Uint32Type(se *sch.SchemaElement) { + t := sch.Type_INT32 + se.Type = &t + ct := sch.ConvertedType_UINT_32 + se.ConvertedType = &ct +} + +func Int64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t +} + +func Uint64Type(se *sch.SchemaElement) { + t := sch.Type_INT64 + se.Type = &t + ct := sch.ConvertedType_UINT_64 + se.ConvertedType = &ct +} + +func Float32Type(se *sch.SchemaElement) { + t := sch.Type_FLOAT + se.Type = &t +} + +func Float64Type(se *sch.SchemaElement) { + t := sch.Type_DOUBLE + se.Type = &t +} + +func BoolType(se *sch.SchemaElement) { + t := sch.Type_BOOLEAN + se.Type = &t +} + +func StringType(se *sch.SchemaElement) { + t := sch.Type_BYTE_ARRAY + se.Type = &t +} diff --git a/internal/dremel/testcases/repetition/repetition.go b/internal/dremel/testcases/repetition/repetition.go new file mode 100644 index 0000000..f17bdf1 --- /dev/null +++ b/internal/dremel/testcases/repetition/repetition.go @@ -0,0 +1,19 @@ +package repetition + +//go:generate parquetgen -input repetition.go -type Document -package repetition -output generated.go + +type ( + Document struct { + Links []Link `parquet:"links"` + } + + Link struct { + Backward []Language `parquet:"backward"` + Forward []Language `parquet:"forward"` + } + + Language struct { + Codes []string `parquet:"code"` + Countries []string `parquet:"countries"` + } +) diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go index 6824bfc..4e7f166 100644 --- a/internal/dremel/write_repeated.go +++ b/internal/dremel/write_repeated.go @@ -88,14 +88,12 @@ func init() { nVals++{{end}}{{end}} }{{end}}` - defCaseTpl := `{{define "defCase"}}{{if eq .Def .Field.MaxDef}}{{template "repSwitch" .}}{{else}}{{$rep:=getRep .Def .Field}}{{init .Def $rep .Field}}{{end}}{{end}}` + defCaseTpl := `{{define "defCase"}}switch rep { +{{range $case := .Field.RepCases $.Def}}{{$case.Case}} + {{init $.Def $case.Rep $.Field}} +{{end}} }{{end}}` - repSwitchTpl := `{{define "repSwitch"}}switch rep { -{{range $case := .Field.RepCases}}{{$case.Case}} -{{init $.Def $case.Rep $.Field}} -{{end}} } {{end}}` - - for _, t := range []string{defCaseTpl, defSwitchTpl, repSwitchTpl} { + for _, t := range []string{defCaseTpl, defSwitchTpl} { writeRepeatedTpl, err = writeRepeatedTpl.Parse(t) if err != nil { log.Fatal(err) diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index 368249a..602b032 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -551,7 +551,7 @@ func TestWrite(t *testing.T) { switch def { case 3: switch rep { - default: + case 0, 1, 2: x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) } nVals++ @@ -704,6 +704,71 @@ func TestWrite(t *testing.T) { } } + return nVals, nLevels +}`, + }, + { + name: "everything is repeated", + structName: "Doc", + field: fields.Field{ + Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + }, + }, + result: ``, + }, + { + name: "everything is repeated seen at rep 1", + structName: "Doc", + field: fields.Field{ + Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }, + }, + result: `func writeLinksForwardCodes(x *Doc, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 2: + switch rep { + case 0, 1, 2: + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{}) + } + case 3: + switch rep { + case 0, 1, 2: + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}}) + case 3: + x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals]) + } + nVals++ + } + } + return nVals, nLevels }`, }, @@ -718,6 +783,7 @@ func TestWrite(t *testing.T) { flds := fields.Field{Type: ty, Children: []fields.Field{tc.field}}.Fields() f := flds[len(flds)-1] s := dremel.Write(f) + fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.result, string(gocode)) diff --git a/internal/fields/fields.go b/internal/fields/fields.go index f93c0e5..9d1c7c3 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -41,6 +41,10 @@ func (f Field) Fields() []Field { return f.fields(0) } +func (f Field) IsRoot() bool { + return f.Parent == nil +} + func (f Field) fields(i int) []Field { var out []Field for j, fld := range f.Children { @@ -189,16 +193,39 @@ type RepCase struct { // RepCases returns a RepCase slice based on the field types and // what sub-fields have already been seen. -func (f Field) RepCases() []RepCase { +func (f Field) RepCases(def int) []RepCase { mr := int(f.MaxRep()) - if f.Repeated() && f.Parent != nil && f.Parent.Defined { - return []RepCase{{Case: "default:"}} - } - var out []RepCase - for i := 0; i <= mr; i++ { - out = append(out, RepCase{Case: fmt.Sprintf("case %d:", i), Rep: i}) + var defs int + var reps int + var rollup []int + var i int + for _, fld := range Reverse(f.Chain()) { + if fld.IsRoot() { + continue + } + rollup = append(rollup, i) + if fld.RepetitionType == Required || fld.RepetitionType == Repeated { + defs++ + } + + if fld.RepetitionType == Repeated && reps < mr && defs < def { + reps++ + } + + fmt.Println(rollup, fld.Defined, fld.Name, reps, defs) + + if !fld.Defined || defs == def { + c := fmt.Sprintf("case %s:", strings.Trim(strings.Replace(fmt.Sprint(rollup), " ", ", ", -1), "[]")) + out = append(out, RepCase{Case: c, Rep: reps}) + rollup = []int{} + } + + if defs == def { + break + } + i++ } return out } @@ -247,7 +274,7 @@ func (f Field) Required() bool { return f.RepetitionTypes().Required() } -func (f Field) rightComplete(fld Field, i, def, rep, maxDef, maxRep, defs, reps int) bool { +func (f Field) leftComplete(fld Field, i, def, rep, maxDef, maxRep, defs, reps int) bool { if fld.RepetitionType == Optional && rep == 0 && !fld.Defined { return true } @@ -268,6 +295,10 @@ func (f Field) rightComplete(fld Field, i, def, rep, maxDef, maxRep, defs, reps return false } +func (f Field) rightComplete(def, defs, maxDef int) bool { + return def != maxDef && defs >= def +} + // Init is called by parquetgen's templates to generate the code // that writes to a struct's field // @@ -304,14 +335,14 @@ func (f Field) Init(def, rep int) string { case Optional: left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.Name)) case Repeated: - if (rep > 0 && reps < rep) || (f.NthChild > 0 && !fld.Primitive()) { - left = fmt.Sprintf(left, fmt.Sprintf(".%s[ind[%d]]%%s", fld.Name, reps-1)) - } else { + if fld.Primitive() || f.leftComplete(fld, i, def, rep, maxDef, maxRep, defs, reps) { left = fmt.Sprintf(left, fmt.Sprintf(".%s%%s", fld.Name)) + } else { + left = fmt.Sprintf(left, fmt.Sprintf(".%s[ind[%d]]%%s", fld.Name, reps-1)) } } - if f.rightComplete(fld, i, def, rep, maxDef, maxRep, defs, reps) { + if f.leftComplete(fld, i, def, rep, maxDef, maxRep, defs, reps) { i++ break } @@ -337,19 +368,13 @@ func (f Field) Init(def, rep int) string { right = fmt.Sprintf(right, "vals[nVals]%s") } else if (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { right = fmt.Sprintf(right, "vals[0]%s") - } else if fld.Parent.RepetitionType == Repeated && rep < maxRep { //need one more case: - right = fmt.Sprintf(right, fmt.Sprintf("{%s: vals[nVals]}%%s", fld.Name)) } else if fld.Parent.RepetitionType == Repeated { right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[nVals]%%s", fld.Name)) } else { right = fmt.Sprintf(right, fmt.Sprintf("%s: vals[0]%%s", fld.Name)) } } else { - if fld.Parent.RepetitionType == Repeated && rep < maxRep { - right = fmt.Sprintf(right, fmt.Sprintf("{%s: %s{%%s}}", fld.Name, fld.Type)) - } else { - right = fmt.Sprintf(right, fmt.Sprintf("%s: %s{%%s}", fld.Name, fld.Type)) - } + right = fmt.Sprintf(right, fmt.Sprintf("%s: %s{%%s}", fld.Name, fld.Type)) } case Optional: if fld.Primitive() { @@ -373,31 +398,29 @@ func (f Field) Init(def, rep int) string { } case Repeated: if fld.Primitive() { - if rep == 0 && fld.Parent.RepetitionType == Repeated { - right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{vals[nVals]}}%%s", fld.Name, fld.Type)) - } else if reps == rep || (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { + if j == 0 { right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, vals[nVals])%%s", left)) - } else if rep == 0 { + } else if !fld.IsRoot() { right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{vals[nVals]}%%s", fld.Name, fld.Type)) } else { - right = fmt.Sprintf(right, fmt.Sprintf("[%s: []%s{vals[nVals]}]%%s", fld.Name, fld.Type)) + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.Type)) } } else { - if rep == 0 && j == 0 { - right = fmt.Sprintf(right, fmt.Sprintf("[]%s{%%s}", fld.Type)) - } else if rep == 0 && reps == maxRep && fld.Parent != nil && fld.Parent.RepetitionType == Repeated { - right = fmt.Sprintf(right, fmt.Sprintf("{%s: []%s{%%s}}", fld.Name, fld.Type)) - } else if rep == 0 && reps == maxRep { - right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.Name, fld.Type)) - } else if reps == rep { + if rep > 0 && reps == rep { right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, %s{%%s})", left, fld.Type)) + } else if rep == 0 && j == 0 && !f.rightComplete(def, defs, maxDef) { + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{{%%s}}", fld.Type)) + } else if rep == 0 && j == 0 { + right = fmt.Sprintf(right, fmt.Sprintf("[]%s{%%s}", fld.Type)) + } else if (!f.rightComplete(def, defs, maxDef) && !chain[j+1].Primitive()) || (f.rightComplete(def, defs, maxDef) && def == defs) { + right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{{%%s}}", fld.Name, fld.Type)) } else { right = fmt.Sprintf(right, fmt.Sprintf("%s: []%s{%%s}", fld.Name, fld.Type)) } } } - if def != maxDef && defs >= def { + if f.rightComplete(def, defs, maxDef) { break } } @@ -406,6 +429,18 @@ func (f Field) Init(def, rep int) string { return fmt.Sprintf("x%s = %s", left, right) } +// IsRep is true if this fields is one being repeated +func (f Field) IsRep(rep int) bool { + var reps int + for _, fld := range Reverse(f.Chain()) { + if fld.RepetitionType == Repeated { + reps++ + } + } + + return reps == rep +} + // Path creates gocode for initializing a string slice in a go template func (f Field) Path() string { names := f.ColumnNames() diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index fa4ea6c..2fc901d 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -533,6 +533,229 @@ func TestInit(t *testing.T) { def: 3, expected: "x.A.B.C.D[ind[0]].E.F = pstring(vals[nVals])", }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 1, + rep: 0, + expected: "x.Links = []Link{}", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 1, + rep: 1, + expected: "x.Links = append(x.Links, Link{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Backward[ind[1]].Countries = append(x.Links[ind[0]].Backward[ind[1]].Countries, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 0, + expected: "x.Links[ind[0]].Backward = []Language{{Codes: []string{vals[nVals]}}}", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 1, + rep: 0, + expected: "x.Links = []Link{}", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 1, + rep: 1, + expected: "x.Links = append(x.Links, Link{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 0, + expected: "x.Links = []Link{{Backward: []Language{{}}}}", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 1, + expected: "x.Links = append(x.Links, Link{Backward: []Language{{}}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 1, + expected: "x.Links = append(x.Links, Link{Backward: []Language{{Codes: []string{vals[nVals]}}}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 2, + expected: "x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{Codes: []string{vals[nVals]}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Backward[ind[1]].Codes = append(x.Links[ind[0]].Backward[ind[1]].Codes, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals])", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 2, + rep: 2, + expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 1, + expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}})", + }, } for i, tc := range testCases { From c6a31e95bb54261dcbb7d58c23c11cea437793f8 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Wed, 16 Jun 2021 12:32:04 -0600 Subject: [PATCH 16/25] wip --- internal/dremel/write_repeated.go | 6 ++--- internal/dremel/write_test.go | 15 +++++-------- internal/fields/fields.go | 37 +++++++++++++++++++++++-------- internal/fields/fields_test.go | 19 ++++++++++++++-- 4 files changed, 53 insertions(+), 24 deletions(-) diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go index 4e7f166..75d1d2e 100644 --- a/internal/dremel/write_repeated.go +++ b/internal/dremel/write_repeated.go @@ -88,10 +88,10 @@ func init() { nVals++{{end}}{{end}} }{{end}}` - defCaseTpl := `{{define "defCase"}}switch rep { -{{range $case := .Field.RepCases $.Def}}{{$case.Case}} + defCaseTpl := `{{define "defCase"}}{{$cases := .Field.RepCases $.Def}}{{if $cases.UseRepCase .Field}}switch rep { +{{range $case := $cases}}{{$case.Case}} {{init $.Def $case.Rep $.Field}} -{{end}} }{{end}}` +{{end}}}{{else}}{{init $.Def 0 $.Field}}{{end}}{{end}}` for _, t := range []string{defCaseTpl, defSwitchTpl} { writeRepeatedTpl, err = writeRepeatedTpl.Parse(t) diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index 602b032..77ccc2c 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -470,7 +470,7 @@ func TestWrite(t *testing.T) { switch def { case 2: switch rep { - default: + case 0, 1: x.Link.Forward = append(x.Link.Forward, vals[nVals]) } nVals++ @@ -510,7 +510,7 @@ func TestWrite(t *testing.T) { case 2: switch rep { case 0: - x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} + x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) case 1: x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) case 2: @@ -587,7 +587,7 @@ func TestWrite(t *testing.T) { case 1: switch rep { case 0: - x.Friends = []Being{{ID: vals[nVals]}} + x.Friends = append(x.Friends, Being{ID: vals[nVals]}) case 1: x.Friends = append(x.Friends, Being{ID: vals[nVals]}) } @@ -620,12 +620,7 @@ func TestWrite(t *testing.T) { switch def { case 1: - switch rep { - case 0: - x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals]) - case 1: - x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals]) - } + x.LuckyNumbers = append(x.LuckyNumbers, vals[nVals]) nVals++ } } @@ -659,7 +654,7 @@ func TestWrite(t *testing.T) { switch def { case 2: switch rep { - default: + case 0, 1: x.Link.Forward = append(x.Link.Forward, vals[nVals]) } nVals++ diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 9d1c7c3..66f9502 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -189,11 +189,27 @@ type RepCase struct { Case string // Rep is the repetition level that is handled by the switch case. Rep int + + // Repeated is true if any of the fields (including the one at the def level) were repeated + // This allows the def case to not have a rep case for fields that have a repetition somewhere + // in the chain. + Repeated bool +} + +type RepCases []RepCase + +func (r RepCases) UseRepCase(f Field) bool { + if f.Parent.IsRoot() { + return false + } + return len(r) > 1 || + (len(r) == 1 && r[0].Repeated) } // RepCases returns a RepCase slice based on the field types and // what sub-fields have already been seen. -func (f Field) RepCases(def int) []RepCase { +func (f Field) RepCases(def int) RepCases { + fmt.Println("rep cases", def) mr := int(f.MaxRep()) var out []RepCase @@ -205,26 +221,29 @@ func (f Field) RepCases(def int) []RepCase { if fld.IsRoot() { continue } + + if defs == def && fld.RepetitionType != Required { + break + } + rollup = append(rollup, i) - if fld.RepetitionType == Required || fld.RepetitionType == Repeated { + if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { defs++ } - if fld.RepetitionType == Repeated && reps < mr && defs < def { + if fld.RepetitionType == Repeated && reps < mr && defs <= def { reps++ } - fmt.Println(rollup, fld.Defined, fld.Name, reps, defs) + fmt.Println(rollup, fld.Defined, fld.Name, reps, defs, mr, def) - if !fld.Defined || defs == def { + if !fld.Defined || (defs == def && fld.RepetitionType != Required) { c := fmt.Sprintf("case %s:", strings.Trim(strings.Replace(fmt.Sprint(rollup), " ", ", ", -1), "[]")) - out = append(out, RepCase{Case: c, Rep: reps}) + fmt.Printf("%s (def: %d, rep: %d)\n", c, def, reps) + out = append(out, RepCase{Case: c, Rep: reps, Repeated: reps > 0}) rollup = []int{} } - if defs == def { - break - } i++ } return out diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 2fc901d..01d690c 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -753,9 +753,24 @@ func TestInit(t *testing.T) { }}, }, def: 3, - rep: 1, + rep: 2, //1 isn't a valid rep because it is handled by Links.Backward.Codes expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}})", }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 3, + expected: "x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals])", + }, } for i, tc := range testCases { @@ -763,7 +778,7 @@ func TestInit(t *testing.T) { fields := fields.Field{Children: tc.fields}.Fields() field := fields[len(fields)-1] s := field.Init(tc.def, tc.rep) - fmt.Println(s) + //fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) From 206c33bf2d33d934dc9ea4c19efcf28eb9c9da99 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Wed, 16 Jun 2021 17:47:22 -0600 Subject: [PATCH 17/25] wip --- internal/dremel/write_test.go | 2 +- internal/fields/fields.go | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index 77ccc2c..eee1d26 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -510,7 +510,7 @@ func TestWrite(t *testing.T) { case 2: switch rep { case 0: - x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) + x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} case 1: x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) case 2: diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 66f9502..7fa6233 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -186,7 +186,7 @@ func (f Field) MaxRepForDef(def int) int { // RepCase is used by parquetgen to generate code. type RepCase struct { // Case is the code for a switch case (for example: case 0:) - Case string + Reps []int // Rep is the repetition level that is handled by the switch case. Rep int @@ -196,9 +196,17 @@ type RepCase struct { Repeated bool } +func (r RepCase) Case() string { + return fmt.Sprintf( + "case %s:", + strings.Trim(strings.Replace(fmt.Sprint(r.Reps), " ", ", ", -1), "[]"), + ) +} + type RepCases []RepCase func (r RepCases) UseRepCase(f Field) bool { + fmt.Println("use rep case", r) if f.Parent.IsRoot() { return false } @@ -238,9 +246,7 @@ func (f Field) RepCases(def int) RepCases { fmt.Println(rollup, fld.Defined, fld.Name, reps, defs, mr, def) if !fld.Defined || (defs == def && fld.RepetitionType != Required) { - c := fmt.Sprintf("case %s:", strings.Trim(strings.Replace(fmt.Sprint(rollup), " ", ", ", -1), "[]")) - fmt.Printf("%s (def: %d, rep: %d)\n", c, def, reps) - out = append(out, RepCase{Case: c, Rep: reps, Repeated: reps > 0}) + out = append(out, RepCase{Reps: rollup[:], Rep: max(rollup), Repeated: reps > 0}) rollup = []int{} } @@ -525,3 +531,7 @@ var primitiveTypes = map[string]fieldType{ "bool": {"Bool%s%s", "bool%s"}, "string": {"String%s%s", "string%s"}, } + +func max(i []int) int { + return i[len(i)-1] +} From d6a336257c713ce935f98167647cb1ec6d101e45 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Thu, 17 Jun 2021 06:12:31 -0600 Subject: [PATCH 18/25] wip --- internal/dremel/write_test.go | 31 +++++++++++++++++++++--- internal/fields/fields.go | 9 +++---- internal/fields/fields_test.go | 43 ++++++++++++++++++++++++---------- 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index eee1d26..43c4c87 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -692,7 +692,7 @@ func TestWrite(t *testing.T) { switch def { case 2: switch rep { - default: + case 0, 1: x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] } nVals++ @@ -704,7 +704,7 @@ func TestWrite(t *testing.T) { }, { name: "everything is repeated", - structName: "Doc", + structName: "Document", field: fields.Field{ Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ @@ -717,7 +717,32 @@ func TestWrite(t *testing.T) { }}, }, }, - result: ``, + result: `func writeLinksForwardCountries(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 3) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + switch rep { + case 0, 1, 2, 3: + x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals]) + } + nVals++ + } + } + + return nVals, nLevels +}`, }, { name: "everything is repeated seen at rep 1", diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 7fa6233..020c506 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -223,8 +223,8 @@ func (f Field) RepCases(def int) RepCases { var out []RepCase var defs int var reps int - var rollup []int - var i int + rollup := []int{0} + i := 1 for _, fld := range Reverse(f.Chain()) { if fld.IsRoot() { continue @@ -234,18 +234,19 @@ func (f Field) RepCases(def int) RepCases { break } - rollup = append(rollup, i) if fld.RepetitionType == Optional || fld.RepetitionType == Repeated { defs++ } if fld.RepetitionType == Repeated && reps < mr && defs <= def { reps++ + rollup = append(rollup, reps) } fmt.Println(rollup, fld.Defined, fld.Name, reps, defs, mr, def) if !fld.Defined || (defs == def && fld.RepetitionType != Required) { + fmt.Println("xxxxxxxxxxxxx") out = append(out, RepCase{Reps: rollup[:], Rep: max(rollup), Repeated: reps > 0}) rollup = []int{} } @@ -389,7 +390,7 @@ func (f Field) Init(def, rep int) string { switch fld.RepetitionType { case Required: if fld.Primitive() { - if (fld.Parent.Parent == nil || fld.Parent.Defined) && fld.Parent.RepetitionType == Repeated && rep == 0 { //Should this be a check for repated anywhere in the full chain? + if (fld.Parent.IsRoot() || fld.Parent.Defined) && fld.Parent.RepetitionType == Repeated && (rep == 0 || rep == reps) { //Should this be a check for repeated anywhere in the full chain? right = fmt.Sprintf(right, "vals[nVals]%s") } else if (fld.Parent.Parent == nil || fld.Parent.Defined) && rep == 0 { right = fmt.Sprintf(right, "vals[0]%s") diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 01d690c..050b3cb 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -457,6 +457,7 @@ func TestInit(t *testing.T) { }, }, def: 2, + rep: 1, expected: "x.Hobby.Skills[ind[0]].Difficulty = vals[nVals]", }, { @@ -536,7 +537,7 @@ func TestInit(t *testing.T) { { fields: []fields.Field{ {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, @@ -548,7 +549,7 @@ func TestInit(t *testing.T) { { fields: []fields.Field{ {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, @@ -560,35 +561,35 @@ func TestInit(t *testing.T) { { fields: []fields.Field{ {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }, def: 2, rep: 2, - expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{})", + expected: "x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{})", }, { fields: []fields.Field{ {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }, def: 3, rep: 3, - expected: "x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals])", + expected: "x.Links[ind[0]].Backward[ind[1]].Codes = append(x.Links[ind[0]].Backward[ind[1]].Codes, vals[nVals])", }, { fields: []fields.Field{ {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, }}, - {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, }}, @@ -596,23 +597,39 @@ func TestInit(t *testing.T) { }, def: 3, rep: 3, - expected: "x.Links[ind[0]].Backward[ind[1]].Countries = append(x.Links[ind[0]].Backward[ind[1]].Countries, vals[nVals])", + expected: "x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals])", }, { fields: []fields.Field{ {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + }}, + }}, + }, + def: 3, + rep: 2, + expected: "x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{Codes: []string{vals[nVals]}})", + }, + { + fields: []fields.Field{ + {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, + {Name: "Countries", Type: "string", RepetitionType: fields.Repeated}, + }}, + {Name: "Forward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ + {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, }}, }}, }, def: 3, - rep: 0, - expected: "x.Links[ind[0]].Backward = []Language{{Codes: []string{vals[nVals]}}}", + rep: 3, + expected: "x.Links[ind[0]].Forward[ind[1]].Codes = append(x.Links[ind[0]].Forward[ind[1]].Codes, vals[nVals])", }, { fields: []fields.Field{ @@ -778,7 +795,7 @@ func TestInit(t *testing.T) { fields := fields.Field{Children: tc.fields}.Fields() field := fields[len(fields)-1] s := field.Init(tc.def, tc.rep) - //fmt.Println(s) + fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) From 0adae31179298b81af53517097176e60a1bee2c1 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Thu, 17 Jun 2021 08:32:41 -0600 Subject: [PATCH 19/25] Got dremel tests passing --- internal/dremel/read_test.go | 20 ++++----- internal/dremel/testcases/doc/generated.go | 33 +++++--------- internal/dremel/testcases/person/generated.go | 6 +-- .../dremel/testcases/repetition/generated.go | 30 +++---------- internal/dremel/write_repeated.go | 2 +- internal/dremel/write_test.go | 43 ++++--------------- internal/fields/fields.go | 12 +++--- 7 files changed, 45 insertions(+), 101 deletions(-) diff --git a/internal/dremel/read_test.go b/internal/dremel/read_test.go index 1624ab1..9d798b2 100644 --- a/internal/dremel/read_test.go +++ b/internal/dremel/read_test.go @@ -275,7 +275,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 1) @@ -309,7 +309,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Link.Forward { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -342,7 +342,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if len(x0.Languages) == 0 { @@ -350,7 +350,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i1, x1 := range x0.Languages { - if i1 == 1 { + if i1 >= 1 { lastRep = 2 } defs = append(defs, 2) @@ -384,7 +384,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if len(x0.Languages) == 0 { @@ -392,7 +392,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i1, x1 := range x0.Languages { - if i1 == 1 { + if i1 >= 1 { lastRep = 2 } if x1.Country == nil { @@ -429,7 +429,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if x0.URL == nil { @@ -466,7 +466,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 1) @@ -498,7 +498,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friend.Name.Aliases { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 1) @@ -536,7 +536,7 @@ func TestRead(t *testing.T) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Other.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) diff --git a/internal/dremel/testcases/doc/generated.go b/internal/dremel/testcases/doc/generated.go index 7764dd6..5493d09 100644 --- a/internal/dremel/testcases/doc/generated.go +++ b/internal/dremel/testcases/doc/generated.go @@ -76,7 +76,7 @@ func readLinksBackward(x Document) ([]int64, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Links.Backward { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -134,7 +134,7 @@ func readLinksForward(x Document) ([]int64, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Links.Forward { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -163,10 +163,7 @@ func writeLinksForward(x *Document, vals []int64, defs, reps []uint8) (int, int) switch def { case 2: - switch rep { - default: - x.Links.Forward = append(x.Links.Forward, vals[nVals]) - } + x.Links.Forward = append(x.Links.Forward, vals[nVals]) nVals++ } } @@ -184,7 +181,7 @@ func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if len(x0.Languages) == 0 { @@ -192,7 +189,7 @@ func readNamesLanguagesCode(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i1, x1 := range x0.Languages { - if i1 == 1 { + if i1 >= 1 { lastRep = 2 } defs = append(defs, 2) @@ -225,9 +222,7 @@ func writeNamesLanguagesCode(x *Document, vals []string, defs, reps []uint8) (in x.Names = append(x.Names, Name{}) case 2: switch rep { - case 0: - x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} - case 1: + case 0, 1: x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) case 2: x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) @@ -249,7 +244,7 @@ func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if len(x0.Languages) == 0 { @@ -257,7 +252,7 @@ func readNamesLanguagesCountry(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i1, x1 := range x0.Languages { - if i1 == 1 { + if i1 >= 1 { lastRep = 2 } if x1.Country == nil { @@ -292,10 +287,7 @@ func writeNamesLanguagesCountry(x *Document, vals []string, defs, reps []uint8) switch def { case 3: - switch rep { - default: - x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) - } + x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) nVals++ } } @@ -313,7 +305,7 @@ func readNamesURL(x Document) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Names { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if x0.URL == nil { @@ -346,10 +338,7 @@ func writeNamesURL(x *Document, vals []string, defs, reps []uint8) (int, int) { switch def { case 2: - switch rep { - default: - x.Names[ind[0]].URL = pstring(vals[nVals]) - } + x.Names[ind[0]].URL = pstring(vals[nVals]) nVals++ } } diff --git a/internal/dremel/testcases/person/generated.go b/internal/dremel/testcases/person/generated.go index c9a85f3..76cd891 100644 --- a/internal/dremel/testcases/person/generated.go +++ b/internal/dremel/testcases/person/generated.go @@ -146,8 +146,7 @@ func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, in switch def { case 2: - switch rep { - } + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) nVals++ } } @@ -198,8 +197,7 @@ func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (i switch def { case 2: - switch rep { - } + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] nVals++ } } diff --git a/internal/dremel/testcases/repetition/generated.go b/internal/dremel/testcases/repetition/generated.go index a6f9961..a2ab36e 100644 --- a/internal/dremel/testcases/repetition/generated.go +++ b/internal/dremel/testcases/repetition/generated.go @@ -109,26 +109,17 @@ func writeLinksBackwardCodes(x *Document, vals []string, defs, reps []uint8) (in switch def { case 1: - switch rep { - case 0: - x.Links = append(x.Links, Link{}) - case 1: - x.Links = append(x.Links, Link{}) - } + x.Links = append(x.Links, Link{}) case 2: switch rep { - case 0: - x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{}) - case 1: + case 0, 1: x.Links = append(x.Links, Link{Backward: []Language{{}}}) case 2: x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{}) } case 3: switch rep { - case 0: - x.Links = []Link{{Backward: []Language{{Codes: []string{vals[nVals]}}}}} - case 1: + case 0, 1: x.Links = append(x.Links, Link{Backward: []Language{{Codes: []string{vals[nVals]}}}}) case 2: x.Links[ind[0]].Backward = append(x.Links[ind[0]].Backward, Language{Codes: []string{vals[nVals]}}) @@ -200,10 +191,7 @@ func writeLinksBackwardCountries(x *Document, vals []string, defs, reps []uint8) switch def { case 3: - switch rep { - case 0, 1, 2, 3: - x.Links[ind[0]].Backward[ind[1]].Countries = append(x.Links[ind[0]].Backward[ind[1]].Countries, vals[nVals]) - } + x.Links[ind[0]].Backward[ind[1]].Countries = append(x.Links[ind[0]].Backward[ind[1]].Countries, vals[nVals]) nVals++ } } @@ -269,10 +257,7 @@ func writeLinksForwardCodes(x *Document, vals []string, defs, reps []uint8) (int switch def { case 2: - switch rep { - case 0, 1, 2: - x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{}) - } + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{}) case 3: switch rep { case 0, 1, 2: @@ -345,10 +330,7 @@ func writeLinksForwardCountries(x *Document, vals []string, defs, reps []uint8) switch def { case 3: - switch rep { - case 0, 1, 2, 3: - x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals]) - } + x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals]) nVals++ } } diff --git a/internal/dremel/write_repeated.go b/internal/dremel/write_repeated.go index 75d1d2e..096a98c 100644 --- a/internal/dremel/write_repeated.go +++ b/internal/dremel/write_repeated.go @@ -88,7 +88,7 @@ func init() { nVals++{{end}}{{end}} }{{end}}` - defCaseTpl := `{{define "defCase"}}{{$cases := .Field.RepCases $.Def}}{{if $cases.UseRepCase .Field}}switch rep { + defCaseTpl := `{{define "defCase"}}{{$cases := .Field.RepCases $.Def}}{{if $cases.UseRepCase .Field $.Def}}switch rep { {{range $case := $cases}}{{$case.Case}} {{init $.Def $case.Rep $.Field}} {{end}}}{{else}}{{init $.Def 0 $.Field}}{{end}}{{end}}` diff --git a/internal/dremel/write_test.go b/internal/dremel/write_test.go index 43c4c87..6b4b0f6 100644 --- a/internal/dremel/write_test.go +++ b/internal/dremel/write_test.go @@ -469,10 +469,7 @@ func TestWrite(t *testing.T) { switch def { case 2: - switch rep { - case 0, 1: - x.Link.Forward = append(x.Link.Forward, vals[nVals]) - } + x.Link.Forward = append(x.Link.Forward, vals[nVals]) nVals++ } } @@ -509,9 +506,7 @@ func TestWrite(t *testing.T) { x.Names = append(x.Names, Name{}) case 2: switch rep { - case 0: - x.Names = []Name{{Languages: []Language{{Code: vals[nVals]}}}} - case 1: + case 0, 1: x.Names = append(x.Names, Name{Languages: []Language{{Code: vals[nVals]}}}) case 2: x.Names[ind[0]].Languages = append(x.Names[ind[0]].Languages, Language{Code: vals[nVals]}) @@ -550,10 +545,7 @@ func TestWrite(t *testing.T) { switch def { case 3: - switch rep { - case 0, 1, 2: - x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) - } + x.Names[ind[0]].Languages[ind[1]].Country = pstring(vals[nVals]) nVals++ } } @@ -585,12 +577,7 @@ func TestWrite(t *testing.T) { switch def { case 1: - switch rep { - case 0: - x.Friends = append(x.Friends, Being{ID: vals[nVals]}) - case 1: - x.Friends = append(x.Friends, Being{ID: vals[nVals]}) - } + x.Friends = append(x.Friends, Being{ID: vals[nVals]}) nVals++ } } @@ -653,10 +640,7 @@ func TestWrite(t *testing.T) { switch def { case 2: - switch rep { - case 0, 1: - x.Link.Forward = append(x.Link.Forward, vals[nVals]) - } + x.Link.Forward = append(x.Link.Forward, vals[nVals]) nVals++ } } @@ -691,10 +675,7 @@ func TestWrite(t *testing.T) { switch def { case 2: - switch rep { - case 0, 1: - x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] - } + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] nVals++ } } @@ -733,10 +714,7 @@ func TestWrite(t *testing.T) { switch def { case 3: - switch rep { - case 0, 1, 2, 3: - x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals]) - } + x.Links[ind[0]].Forward[ind[1]].Countries = append(x.Links[ind[0]].Forward[ind[1]].Countries, vals[nVals]) nVals++ } } @@ -774,10 +752,7 @@ func TestWrite(t *testing.T) { switch def { case 2: - switch rep { - case 0, 1, 2: - x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{}) - } + x.Links[ind[0]].Forward = append(x.Links[ind[0]].Forward, Language{}) case 3: switch rep { case 0, 1, 2: @@ -803,7 +778,7 @@ func TestWrite(t *testing.T) { flds := fields.Field{Type: ty, Children: []fields.Field{tc.field}}.Fields() f := flds[len(flds)-1] s := dremel.Write(f) - fmt.Println(s) + //fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.result, string(gocode)) diff --git a/internal/fields/fields.go b/internal/fields/fields.go index 020c506..ddaded6 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -205,13 +205,13 @@ func (r RepCase) Case() string { type RepCases []RepCase -func (r RepCases) UseRepCase(f Field) bool { - fmt.Println("use rep case", r) +func (r RepCases) UseRepCase(f Field, def int) bool { + fmt.Println("use rep case", r, f.MaxRepForDef(def)) if f.Parent.IsRoot() { return false } return len(r) > 1 || - (len(r) == 1 && r[0].Repeated) + (len(r) == 1 && r[0].Repeated && r[0].Rep < f.MaxRepForDef(def)) } // RepCases returns a RepCase slice based on the field types and @@ -245,8 +245,8 @@ func (f Field) RepCases(def int) RepCases { fmt.Println(rollup, fld.Defined, fld.Name, reps, defs, mr, def) - if !fld.Defined || (defs == def && fld.RepetitionType != Required) { - fmt.Println("xxxxxxxxxxxxx") + if len(rollup) > 0 && (!fld.Defined || (defs == def && fld.RepetitionType != Required)) { + fmt.Println("xxxxxxxxxxxxx", rollup) out = append(out, RepCase{Reps: rollup[:], Rep: max(rollup), Repeated: reps > 0}) rollup = []int{} } @@ -432,7 +432,7 @@ func (f Field) Init(def, rep int) string { right = fmt.Sprintf(right, fmt.Sprintf("[]%s{vals[nVals]}%%s", fld.Type)) } } else { - if rep > 0 && reps == rep { + if rep > 0 && reps == rep || (fld.MaxRepForDef(def) == rep && !strings.Contains(right, "append(")) { right = fmt.Sprintf(right, fmt.Sprintf("append(x%s, %s{%%s})", left, fld.Type)) } else if rep == 0 && j == 0 && !f.rightComplete(def, defs, maxDef) { right = fmt.Sprintf(right, fmt.Sprintf("[]%s{{%%s}}", fld.Type)) From ed361a1c4de1984b9fb3d537e379635ca19412b2 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Thu, 17 Jun 2021 08:38:01 -0600 Subject: [PATCH 20/25] fixed fields_test, but now go generate breaks TestParquet --- internal/fields/fields_test.go | 16 +- parquet_generated_test.go | 1389 +++++++------------------------- 2 files changed, 303 insertions(+), 1102 deletions(-) diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 050b3cb..11f349a 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -534,18 +534,6 @@ func TestInit(t *testing.T) { def: 3, expected: "x.A.B.C.D[ind[0]].E.F = pstring(vals[nVals])", }, - { - fields: []fields.Field{ - {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Name: "Backward", Type: "Language", RepetitionType: fields.Repeated, Children: []fields.Field{ - {Name: "Codes", Type: "string", RepetitionType: fields.Repeated}, - }}, - }}, - }, - def: 1, - rep: 0, - expected: "x.Links = []Link{}", - }, { fields: []fields.Field{ {Name: "Links", Type: "Link", RepetitionType: fields.Repeated, Children: []fields.Field{ @@ -641,7 +629,7 @@ func TestInit(t *testing.T) { }, def: 1, rep: 0, - expected: "x.Links = []Link{}", + expected: "x.Links = append(x.Links, Link{})", }, { fields: []fields.Field{ @@ -795,7 +783,7 @@ func TestInit(t *testing.T) { fields := fields.Field{Children: tc.fields}.Fields() field := fields[len(fields)-1] s := field.Init(tc.def, tc.rep) - fmt.Println(s) + //fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) diff --git a/parquet_generated_test.go b/parquet_generated_test.go index b34fd84..076baba 100644 --- a/parquet_generated_test.go +++ b/parquet_generated_test.go @@ -46,6 +46,7 @@ type ParquetWriter struct { func Fields(compression compression) []Field { return []Field{ NewInt32Field(readID, writeID, []string{"id"}, fieldCompression(compression)), + NewStringField(readName, writeName, []string{"name"}, fieldCompression(compression)), NewInt32OptionalField(readAge, writeAge, []string{"age"}, []int{1}, optionalFieldCompression(compression)), NewInt64Field(readHappiness, writeHappiness, []string{"happiness"}, fieldCompression(compression)), NewInt64OptionalField(readSadness, writeSadness, []string{"sadness"}, []int{1}, optionalFieldCompression(compression)), @@ -63,6 +64,7 @@ func Fields(compression compression) []Field { NewStringOptionalField(readHobbySkillsName, writeHobbySkillsName, []string{"hobby", "skills", "name"}, []int{1, 2, 0}, optionalFieldCompression(compression)), NewStringOptionalField(readHobbySkillsDifficulty, writeHobbySkillsDifficulty, []string{"hobby", "skills", "difficulty"}, []int{1, 2, 0}, optionalFieldCompression(compression)), NewInt32OptionalField(readFriendsID, writeFriendsID, []string{"friends", "id"}, []int{2, 0}, optionalFieldCompression(compression)), + NewStringOptionalField(readFriendsName, writeFriendsName, []string{"friends", "name"}, []int{2, 0}, optionalFieldCompression(compression)), NewInt32OptionalField(readFriendsAge, writeFriendsAge, []string{"friends", "age"}, []int{2, 1}, optionalFieldCompression(compression)), NewBoolField(readSleepy, writeSleepy, []string{"Sleepy"}, fieldCompression(compression)), } @@ -76,6 +78,14 @@ func writeID(x *Person, vals []int32) { x.ID = vals[0] } +func readName(x Person) string { + return x.Name +} + +func writeName(x *Person, vals []string) { + x.Name = vals[0] +} + func readAge(x Person) ([]int32, []uint8, []uint8) { switch { case x.Age == nil: @@ -278,16 +288,8 @@ func readHobbyDifficulty(x Person) ([]int32, []uint8, []uint8) { func writeHobbyDifficulty(x *Person, vals []int32, defs, reps []uint8) (int, int) { def := defs[0] switch def { - case 1: - if x.Hobby == nil { - x.Hobby = &Hobby{} - } case 2: - if x.Hobby == nil { - x.Hobby = &Hobby{Difficulty: pint32(vals[0])} - } else { - x.Hobby.Difficulty = pint32(vals[0]) - } + x.Hobby.Difficulty = pint32(vals[0]) return 1, 1 } @@ -308,7 +310,7 @@ func readHobbySkillsName(x Person) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Hobby.Skills { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -336,21 +338,8 @@ func writeHobbySkillsName(x *Person, vals []string, defs, reps []uint8) (int, in ind.rep(rep) switch def { - case 1: - if x.Hobby == nil { - x.Hobby = &Hobby{} - } case 2: - switch rep { - case 0: - if x.Hobby == nil { - x.Hobby = &Hobby{Skills: []Skill{{Name: vals[nVals]}}} - } else { - x.Hobby.Skills = []Skill{{Name: vals[nVals]}} - } - case 1: - x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) - } + x.Hobby.Skills = append(x.Hobby.Skills, Skill{Name: vals[nVals]}) nVals++ } } @@ -372,7 +361,7 @@ func readHobbySkillsDifficulty(x Person) ([]string, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Hobby.Skills { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 2) @@ -401,12 +390,7 @@ func writeHobbySkillsDifficulty(x *Person, vals []string, defs, reps []uint8) (i switch def { case 2: - switch rep { - case 0: - x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] - case 1: - x.Hobby.Skills = append(x.Hobby.Skills, Skill{Difficulty: vals[nVals]}) - } + x.Hobby.Skills[ind[0]].Difficulty = vals[nVals] nVals++ } } @@ -424,7 +408,7 @@ func readFriendsID(x Person) ([]int32, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } defs = append(defs, 1) @@ -452,12 +436,53 @@ func writeFriendsID(x *Person, vals []int32, defs, reps []uint8) (int, int) { switch def { case 1: - switch rep { - case 0: - x.Friends = []Being{{ID: vals[nVals]}} - case 1: - x.Friends = append(x.Friends, Being{ID: vals[nVals]}) + x.Friends = append(x.Friends, Being{ID: vals[nVals]}) + nVals++ + } + } + + return nVals, nLevels +} + +func readFriendsName(x Person) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Friends) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Friends { + if i0 >= 1 { + lastRep = 1 } + defs = append(defs, 1) + reps = append(reps, lastRep) + vals = append(vals, x0.Name) + } + } + + return vals, defs, reps +} + +func writeFriendsName(x *Person, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 1) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 1: + x.Friends[ind[0]].Name = vals[nVals] nVals++ } } @@ -475,7 +500,7 @@ func readFriendsAge(x Person) ([]int32, []uint8, []uint8) { reps = append(reps, lastRep) } else { for i0, x0 := range x.Friends { - if i0 == 1 { + if i0 >= 1 { lastRep = 1 } if x0.Age == nil { @@ -508,10 +533,7 @@ func writeFriendsAge(x *Person, vals []int32, defs, reps []uint8) (int, int) { switch def { case 2: - switch rep { - default: - x.Friends[ind[0]].Age = pint32(vals[nVals]) - } + x.Friends[ind[0]].Age = pint32(vals[nVals]) nVals++ } } @@ -907,118 +929,62 @@ func (f *Int32Field) Levels() ([]uint8, []uint8) { return nil, nil } -type Int32OptionalField struct { - parquet.OptionalField - vals []int32 - read func(r Person) ([]int32, []uint8, []uint8) - write func(r *Person, vals []int32, def, rep []uint8) (int, int) - stats *int32optionalStats +type StringField struct { + parquet.RequiredField + vals []string + read func(r Person) string + write func(r *Person, vals []string) + stats *stringStats } -func NewInt32OptionalField(read func(r Person) ([]int32, []uint8, []uint8), write func(r *Person, vals []int32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int32OptionalField { - return &Int32OptionalField{ +func NewStringField(read func(r Person) string, write func(r *Person, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { + return &StringField{ read: read, write: write, - OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newint32optionalStats(maxDef(types)), + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newStringStats(), } } -func (f *Int32OptionalField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int32Type, RepetitionType: f.RepetitionType, Types: f.Types} +func (f *StringField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} } -func (f *Int32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { +func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { + buf := bytes.Buffer{} + + for _, s := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { return err } + buf.Write([]byte(s)) } - return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) -} - -func (f *Int32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v := make([]int32, f.Values()-len(f.vals)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *Int32OptionalField) Add(r Person) { - vals, defs, reps := f.read(r) - f.stats.add(vals, defs) - f.vals = append(f.vals, vals...) - f.Defs = append(f.Defs, defs...) - f.Reps = append(f.Reps, reps...) -} - -func (f *Int32OptionalField) Scan(r *Person) { - if len(f.Defs) == 0 { - return - } - - v, l := f.write(r, f.vals, f.Defs, f.Reps) - f.vals = f.vals[v:] - f.Defs = f.Defs[l:] - if len(f.Reps) > 0 { - f.Reps = f.Reps[l:] - } -} - -func (f *Int32OptionalField) Levels() ([]uint8, []uint8) { - return f.Defs, f.Reps -} - -type Int64Field struct { - vals []int64 - parquet.RequiredField - read func(r Person) int64 - write func(r *Person, vals []int64) - stats *int64stats -} - -func NewInt64Field(read func(r Person) int64, write func(r *Person, vals []int64), path []string, opts ...func(*parquet.RequiredField)) *Int64Field { - return &Int64Field{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newInt64stats(), - } -} -func (f *Int64Field) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) } -func (f *Int64Field) Read(r io.ReadSeeker, pg parquet.Page) error { +func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { rr, _, err := f.DoRead(r, pg) if err != nil { return err } - v := make([]int64, int(pg.N)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *Int64Field) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + for j := 0; j < pg.N; j++ { + var x int32 + if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { + return err + } + s := make([]byte, x) + if _, err := rr.Read(s); err != nil { return err } + + f.vals = append(f.vals, string(s)) } - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) + return nil } -func (f *Int64Field) Scan(r *Person) { +func (f *StringField) Scan(r *Person) { if len(f.vals) == 0 { return } @@ -1027,38 +993,38 @@ func (f *Int64Field) Scan(r *Person) { f.vals = f.vals[1:] } -func (f *Int64Field) Add(r Person) { +func (f *StringField) Add(r Person) { v := f.read(r) f.stats.add(v) f.vals = append(f.vals, v) } -func (f *Int64Field) Levels() ([]uint8, []uint8) { +func (f *StringField) Levels() ([]uint8, []uint8) { return nil, nil } -type Int64OptionalField struct { +type Int32OptionalField struct { parquet.OptionalField - vals []int64 - read func(r Person) ([]int64, []uint8, []uint8) - write func(r *Person, vals []int64, def, rep []uint8) (int, int) - stats *int64optionalStats + vals []int32 + read func(r Person) ([]int32, []uint8, []uint8) + write func(r *Person, vals []int32, def, rep []uint8) (int, int) + stats *int32optionalStats } -func NewInt64OptionalField(read func(r Person) ([]int64, []uint8, []uint8), write func(r *Person, vals []int64, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int64OptionalField { - return &Int64OptionalField{ +func NewInt32OptionalField(read func(r Person) ([]int32, []uint8, []uint8), write func(r *Person, vals []int32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int32OptionalField { + return &Int32OptionalField{ read: read, write: write, OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newint64optionalStats(maxDef(types)), + stats: newint32optionalStats(maxDef(types)), } } -func (f *Int64OptionalField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: f.RepetitionType, Types: f.Types} +func (f *Int32OptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int32Type, RepetitionType: f.RepetitionType, Types: f.Types} } -func (f *Int64OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { +func (f *Int32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { var buf bytes.Buffer for _, v := range f.vals { if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { @@ -1068,19 +1034,19 @@ func (f *Int64OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) } -func (f *Int64OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { +func (f *Int32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { rr, _, err := f.DoRead(r, pg) if err != nil { return err } - v := make([]int64, f.Values()-len(f.vals)) + v := make([]int32, f.Values()-len(f.vals)) err = binary.Read(rr, binary.LittleEndian, &v) f.vals = append(f.vals, v...) return err } -func (f *Int64OptionalField) Add(r Person) { +func (f *Int32OptionalField) Add(r Person) { vals, defs, reps := f.read(r) f.stats.add(vals, defs) f.vals = append(f.vals, vals...) @@ -1088,7 +1054,7 @@ func (f *Int64OptionalField) Add(r Person) { f.Reps = append(f.Reps, reps...) } -func (f *Int64OptionalField) Scan(r *Person) { +func (f *Int32OptionalField) Scan(r *Person) { if len(f.Defs) == 0 { return } @@ -1101,7 +1067,7 @@ func (f *Int64OptionalField) Scan(r *Person) { } } -func (f *Int64OptionalField) Levels() ([]uint8, []uint8) { +func (f *Int32OptionalField) Levels() ([]uint8, []uint8) { return f.Defs, f.Reps } @@ -1185,857 +1151,151 @@ func (f *StringOptionalField) Levels() ([]uint8, []uint8) { return f.Defs, f.Reps } -type Float32Field struct { - vals []float32 - parquet.RequiredField - read func(r Person) float32 - write func(r *Person, vals []float32) - stats *float32stats +type BoolOptionalField struct { + parquet.OptionalField + vals []bool + read func(r bool) ([]bool, []uint8, []uint8) + write func(r *bool, vals []bool, defs, reps []uint8) (int, int) + stats *boolOptionalStats } -func NewFloat32Field(read func(r Person) float32, write func(r *Person, vals []float32), path []string, opts ...func(*parquet.RequiredField)) *Float32Field { - return &Float32Field{ +func NewBoolOptionalField(read func(r bool) ([]bool, []uint8, []uint8), write func(r *bool, vals []bool, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *BoolOptionalField { + return &BoolOptionalField{ read: read, write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newFloat32stats(), + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newBoolOptionalStats(maxDef(types)), } } -func (f *Float32Field) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Float32Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +func (f *BoolOptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: BoolType, RepetitionType: f.RepetitionType, Types: f.Types} } -func (f *Float32Field) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) +func (f *BoolOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, sizes, err := f.DoRead(r, pg) if err != nil { return err } - v := make([]float32, int(pg.N)) - err = binary.Read(rr, binary.LittleEndian, &v) + v, err := parquet.GetBools(rr, f.Values()-len(f.vals), sizes) f.vals = append(f.vals, v...) return err } -func (f *Float32Field) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } +func (f *BoolOptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return } - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} -func (f *Float32Field) Scan(r *Person) { - if len(f.vals) == 0 { - return + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] } +} - f.write(r, f.vals) - f.vals = f.vals[1:] +func (f *BoolOptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) } -func (f *Float32Field) Add(r Person) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) +func (f *BoolOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + ln := len(f.vals) + byteNum := (ln + 7) / 8 + rawBuf := make([]byte, byteNum) + + for i := 0; i < ln; i++ { + if f.vals[i] { + rawBuf[i/8] = rawBuf[i/8] | (1 << uint32(i%8)) + } + } + + return f.DoWrite(w, meta, rawBuf, len(f.Defs), f.stats) } -func (f *Float32Field) Levels() ([]uint8, []uint8) { - return nil, nil +func (f *BoolOptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps } -type Float64Field struct { - vals []float64 +type BoolField struct { parquet.RequiredField - read func(r Person) float64 - write func(r *Person, vals []float64) - stats *float64stats + vals []bool + read func(r bool) bool + write func(r *bool, vals []bool) + stats *boolStats } -func NewFloat64Field(read func(r Person) float64, write func(r *Person, vals []float64), path []string, opts ...func(*parquet.RequiredField)) *Float64Field { - return &Float64Field{ +func NewBoolField(read func(r bool) bool, write func(r *bool, vals []bool), path []string, opts ...func(*parquet.RequiredField)) *BoolField { + return &BoolField{ read: read, write: write, RequiredField: parquet.NewRequiredField(path, opts...), - stats: newFloat64stats(), } } -func (f *Float64Field) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Float64Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +func (f *BoolField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: BoolType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} } -func (f *Float64Field) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err +func (f *BoolField) Write(w io.Writer, meta *parquet.Metadata) error { + ln := len(f.vals) + n := (ln + 7) / 8 + rawBuf := make([]byte, n) + + for i := 0; i < ln; i++ { + if f.vals[i] { + rawBuf[i/8] = rawBuf[i/8] | (1 << uint32(i%8)) + } } - v := make([]float64, int(pg.N)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *Float64Field) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } - } - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} - -func (f *Float64Field) Scan(r *Person) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *Float64Field) Add(r Person) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) -} - -func (f *Float64Field) Levels() ([]uint8, []uint8) { - return nil, nil -} - -type Float32OptionalField struct { - parquet.OptionalField - vals []float32 - read func(r Person) ([]float32, []uint8, []uint8) - write func(r *Person, vals []float32, def, rep []uint8) (int, int) - stats *float32optionalStats -} - -func NewFloat32OptionalField(read func(r Person) ([]float32, []uint8, []uint8), write func(r *Person, vals []float32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Float32OptionalField { - return &Float32OptionalField{ - read: read, - write: write, - OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newfloat32optionalStats(maxDef(types)), - } -} - -func (f *Float32OptionalField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Float32Type, RepetitionType: f.RepetitionType, Types: f.Types} -} - -func (f *Float32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } - } - return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) -} - -func (f *Float32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v := make([]float32, f.Values()-len(f.vals)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *Float32OptionalField) Add(r Person) { - vals, defs, reps := f.read(r) - f.stats.add(vals, defs) - f.vals = append(f.vals, vals...) - f.Defs = append(f.Defs, defs...) - f.Reps = append(f.Reps, reps...) -} - -func (f *Float32OptionalField) Scan(r *Person) { - if len(f.Defs) == 0 { - return - } - - v, l := f.write(r, f.vals, f.Defs, f.Reps) - f.vals = f.vals[v:] - f.Defs = f.Defs[l:] - if len(f.Reps) > 0 { - f.Reps = f.Reps[l:] - } -} - -func (f *Float32OptionalField) Levels() ([]uint8, []uint8) { - return f.Defs, f.Reps -} - -type BoolOptionalField struct { - parquet.OptionalField - vals []bool - read func(r Person) ([]bool, []uint8, []uint8) - write func(r *Person, vals []bool, defs, reps []uint8) (int, int) - stats *boolOptionalStats -} - -func NewBoolOptionalField(read func(r Person) ([]bool, []uint8, []uint8), write func(r *Person, vals []bool, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *BoolOptionalField { - return &BoolOptionalField{ - read: read, - write: write, - OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newBoolOptionalStats(maxDef(types)), - } -} - -func (f *BoolOptionalField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: BoolType, RepetitionType: f.RepetitionType, Types: f.Types} -} - -func (f *BoolOptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, sizes, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v, err := parquet.GetBools(rr, f.Values()-len(f.vals), sizes) - f.vals = append(f.vals, v...) - return err -} - -func (f *BoolOptionalField) Scan(r *Person) { - if len(f.Defs) == 0 { - return - } - - v, l := f.write(r, f.vals, f.Defs, f.Reps) - f.vals = f.vals[v:] - f.Defs = f.Defs[l:] - if len(f.Reps) > 0 { - f.Reps = f.Reps[l:] - } -} - -func (f *BoolOptionalField) Add(r Person) { - vals, defs, reps := f.read(r) - f.stats.add(vals, defs) - f.vals = append(f.vals, vals...) - f.Defs = append(f.Defs, defs...) - f.Reps = append(f.Reps, reps...) -} - -func (f *BoolOptionalField) Write(w io.Writer, meta *parquet.Metadata) error { - ln := len(f.vals) - byteNum := (ln + 7) / 8 - rawBuf := make([]byte, byteNum) - - for i := 0; i < ln; i++ { - if f.vals[i] { - rawBuf[i/8] = rawBuf[i/8] | (1 << uint32(i%8)) - } - } - - return f.DoWrite(w, meta, rawBuf, len(f.Defs), f.stats) -} - -func (f *BoolOptionalField) Levels() ([]uint8, []uint8) { - return f.Defs, f.Reps -} - -type Uint32Field struct { - vals []uint32 - parquet.RequiredField - read func(r Person) uint32 - write func(r *Person, vals []uint32) - stats *uint32stats -} - -func NewUint32Field(read func(r Person) uint32, write func(r *Person, vals []uint32), path []string, opts ...func(*parquet.RequiredField)) *Uint32Field { - return &Uint32Field{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newUint32stats(), - } -} - -func (f *Uint32Field) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Uint32Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} -} - -func (f *Uint32Field) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v := make([]uint32, int(pg.N)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *Uint32Field) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } - } - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} - -func (f *Uint32Field) Scan(r *Person) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *Uint32Field) Add(r Person) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) -} - -func (f *Uint32Field) Levels() ([]uint8, []uint8) { - return nil, nil -} - -type Uint64OptionalField struct { - parquet.OptionalField - vals []uint64 - read func(r Person) ([]uint64, []uint8, []uint8) - write func(r *Person, vals []uint64, def, rep []uint8) (int, int) - stats *uint64optionalStats -} - -func NewUint64OptionalField(read func(r Person) ([]uint64, []uint8, []uint8), write func(r *Person, vals []uint64, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Uint64OptionalField { - return &Uint64OptionalField{ - read: read, - write: write, - OptionalField: parquet.NewOptionalField(path, types, opts...), - stats: newuint64optionalStats(maxDef(types)), - } -} - -func (f *Uint64OptionalField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Uint64Type, RepetitionType: f.RepetitionType, Types: f.Types} -} - -func (f *Uint64OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { - var buf bytes.Buffer - for _, v := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { - return err - } - } - return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) -} - -func (f *Uint64OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - v := make([]uint64, f.Values()-len(f.vals)) - err = binary.Read(rr, binary.LittleEndian, &v) - f.vals = append(f.vals, v...) - return err -} - -func (f *Uint64OptionalField) Add(r Person) { - vals, defs, reps := f.read(r) - f.stats.add(vals, defs) - f.vals = append(f.vals, vals...) - f.Defs = append(f.Defs, defs...) - f.Reps = append(f.Reps, reps...) -} - -func (f *Uint64OptionalField) Scan(r *Person) { - if len(f.Defs) == 0 { - return - } - - v, l := f.write(r, f.vals, f.Defs, f.Reps) - f.vals = f.vals[v:] - f.Defs = f.Defs[l:] - if len(f.Reps) > 0 { - f.Reps = f.Reps[l:] - } -} - -func (f *Uint64OptionalField) Levels() ([]uint8, []uint8) { - return f.Defs, f.Reps -} - -type StringField struct { - parquet.RequiredField - vals []string - read func(r Person) string - write func(r *Person, vals []string) - stats *stringStats -} - -func NewStringField(read func(r Person) string, write func(r *Person, vals []string), path []string, opts ...func(*parquet.RequiredField)) *StringField { - return &StringField{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - stats: newStringStats(), - } -} - -func (f *StringField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: StringType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} -} - -func (f *StringField) Write(w io.Writer, meta *parquet.Metadata) error { - buf := bytes.Buffer{} - - for _, s := range f.vals { - if err := binary.Write(&buf, binary.LittleEndian, int32(len(s))); err != nil { - return err - } - buf.Write([]byte(s)) - } - - return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) -} - -func (f *StringField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, _, err := f.DoRead(r, pg) - if err != nil { - return err - } - - for j := 0; j < pg.N; j++ { - var x int32 - if err := binary.Read(rr, binary.LittleEndian, &x); err != nil { - return err - } - s := make([]byte, x) - if _, err := rr.Read(s); err != nil { - return err - } - - f.vals = append(f.vals, string(s)) - } - return nil -} - -func (f *StringField) Scan(r *Person) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *StringField) Add(r Person) { - v := f.read(r) - f.stats.add(v) - f.vals = append(f.vals, v) -} - -func (f *StringField) Levels() ([]uint8, []uint8) { - return nil, nil -} - -type BoolField struct { - parquet.RequiredField - vals []bool - read func(r Person) bool - write func(r *Person, vals []bool) - stats *boolStats -} - -func NewBoolField(read func(r Person) bool, write func(r *Person, vals []bool), path []string, opts ...func(*parquet.RequiredField)) *BoolField { - return &BoolField{ - read: read, - write: write, - RequiredField: parquet.NewRequiredField(path, opts...), - } -} - -func (f *BoolField) Schema() parquet.Field { - return parquet.Field{Name: f.Name(), Path: f.Path(), Type: BoolType, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} -} - -func (f *BoolField) Write(w io.Writer, meta *parquet.Metadata) error { - ln := len(f.vals) - n := (ln + 7) / 8 - rawBuf := make([]byte, n) - - for i := 0; i < ln; i++ { - if f.vals[i] { - rawBuf[i/8] = rawBuf[i/8] | (1 << uint32(i%8)) - } - } - - return f.DoWrite(w, meta, rawBuf, len(f.vals), newBoolStats()) -} - -func (f *BoolField) Read(r io.ReadSeeker, pg parquet.Page) error { - rr, sizes, err := f.DoRead(r, pg) - if err != nil { - return err - } - - f.vals, err = parquet.GetBools(rr, int(pg.N), sizes) - return err -} - -func (f *BoolField) Scan(r *Person) { - if len(f.vals) == 0 { - return - } - - f.write(r, f.vals) - f.vals = f.vals[1:] -} - -func (f *BoolField) Add(r Person) { - v := f.read(r) - f.vals = append(f.vals, v) -} - -func (f *BoolField) Levels() ([]uint8, []uint8) { - return nil, nil -} - -type int32stats struct { - min int32 - max int32 -} - -func newInt32stats() *int32stats { - return &int32stats{ - min: int32(math.MaxInt32), - } -} - -func (i *int32stats) add(val int32) { - if val < i.min { - i.min = val - } - if val > i.max { - i.max = val - } -} - -func (f *int32stats) bytes(val int32) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *int32stats) NullCount() *int64 { - return nil -} - -func (f *int32stats) DistinctCount() *int64 { - return nil -} - -func (f *int32stats) Min() []byte { - return f.bytes(f.min) -} - -func (f *int32stats) Max() []byte { - return f.bytes(f.max) -} - -type int32optionalStats struct { - min int32 - max int32 - nils int64 - nonNils int64 - maxDef uint8 -} - -func newint32optionalStats(d uint8) *int32optionalStats { - return &int32optionalStats{ - min: int32(math.MaxInt32), - maxDef: d, - } -} - -func (f *int32optionalStats) add(vals []int32, defs []uint8) { - var i int - for _, def := range defs { - if def < f.maxDef { - f.nils++ - } else { - val := vals[i] - i++ - - f.nonNils++ - if val < f.min { - f.min = val - } - if val > f.max { - f.max = val - } - } - } -} - -func (f *int32optionalStats) bytes(val int32) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *int32optionalStats) NullCount() *int64 { - return &f.nils -} - -func (f *int32optionalStats) DistinctCount() *int64 { - return nil -} - -func (f *int32optionalStats) Min() []byte { - if f.nonNils == 0 { - return nil - } - return f.bytes(f.min) -} - -func (f *int32optionalStats) Max() []byte { - if f.nonNils == 0 { - return nil - } - return f.bytes(f.max) -} - -type int64stats struct { - min int64 - max int64 -} - -func newInt64stats() *int64stats { - return &int64stats{ - min: int64(math.MaxInt64), - } -} - -func (i *int64stats) add(val int64) { - if val < i.min { - i.min = val - } - if val > i.max { - i.max = val - } -} - -func (f *int64stats) bytes(val int64) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *int64stats) NullCount() *int64 { - return nil -} - -func (f *int64stats) DistinctCount() *int64 { - return nil -} - -func (f *int64stats) Min() []byte { - return f.bytes(f.min) -} - -func (f *int64stats) Max() []byte { - return f.bytes(f.max) -} - -type int64optionalStats struct { - min int64 - max int64 - nils int64 - nonNils int64 - maxDef uint8 -} - -func newint64optionalStats(d uint8) *int64optionalStats { - return &int64optionalStats{ - min: int64(math.MaxInt64), - maxDef: d, - } -} - -func (f *int64optionalStats) add(vals []int64, defs []uint8) { - var i int - for _, def := range defs { - if def < f.maxDef { - f.nils++ - } else { - val := vals[i] - i++ - - f.nonNils++ - if val < f.min { - f.min = val - } - if val > f.max { - f.max = val - } - } - } -} - -func (f *int64optionalStats) bytes(val int64) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *int64optionalStats) NullCount() *int64 { - return &f.nils -} - -func (f *int64optionalStats) DistinctCount() *int64 { - return nil -} - -func (f *int64optionalStats) Min() []byte { - if f.nonNils == 0 { - return nil - } - return f.bytes(f.min) -} - -func (f *int64optionalStats) Max() []byte { - if f.nonNils == 0 { - return nil - } - return f.bytes(f.max) -} - -type stringOptionalStats struct { - vals []string - min []byte - max []byte - nils int64 - maxDef uint8 -} - -func newStringOptionalStats(d uint8) *stringOptionalStats { - return &stringOptionalStats{maxDef: d} -} - -func (s *stringOptionalStats) add(vals []string, defs []uint8) { - var i int - for _, def := range defs { - if def < s.maxDef { - s.nils++ - } else { - s.vals = append(s.vals, vals[i]) - i++ - } - } -} - -func (s *stringOptionalStats) NullCount() *int64 { - return &s.nils -} - -func (s *stringOptionalStats) DistinctCount() *int64 { - return nil -} - -func (s *stringOptionalStats) Min() []byte { - if s.min == nil { - s.minMax() - } - return s.min -} - -func (s *stringOptionalStats) Max() []byte { - if s.max == nil { - s.minMax() - } - return s.max -} - -func (s *stringOptionalStats) minMax() { - if len(s.vals) == 0 { - return - } - - tmp := make([]string, len(s.vals)) - copy(tmp, s.vals) - sort.Strings(tmp) - s.min = []byte(tmp[0]) - s.max = []byte(tmp[len(tmp)-1]) -} - -type float32stats struct { - min float32 - max float32 -} - -func newFloat32stats() *float32stats { - return &float32stats{ - min: float32(math.MaxFloat32), - } + return f.DoWrite(w, meta, rawBuf, len(f.vals), newBoolStats()) } -func (i *float32stats) add(val float32) { - if val < i.min { - i.min = val - } - if val > i.max { - i.max = val +func (f *BoolField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, sizes, err := f.DoRead(r, pg) + if err != nil { + return err } -} -func (f *float32stats) bytes(val float32) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() + f.vals, err = parquet.GetBools(rr, int(pg.N), sizes) + return err } -func (f *float32stats) NullCount() *int64 { - return nil -} +func (f *BoolField) Scan(r *Person) { + if len(f.vals) == 0 { + return + } -func (f *float32stats) DistinctCount() *int64 { - return nil + f.write(r, f.vals) + f.vals = f.vals[1:] } -func (f *float32stats) Min() []byte { - return f.bytes(f.min) +func (f *BoolField) Add(r Person) { + v := f.read(r) + f.vals = append(f.vals, v) } -func (f *float32stats) Max() []byte { - return f.bytes(f.max) +func (f *BoolField) Levels() ([]uint8, []uint8) { + return nil, nil } -type float64stats struct { - min float64 - max float64 +type int32stats struct { + min int32 + max int32 } -func newFloat64stats() *float64stats { - return &float64stats{ - min: float64(math.MaxFloat64), +func newInt32stats() *int32stats { + return &int32stats{ + min: int32(math.MaxInt32), } } -func (i *float64stats) add(val float64) { +func (i *int32stats) add(val int32) { if val < i.min { i.min = val } @@ -2044,182 +1304,92 @@ func (i *float64stats) add(val float64) { } } -func (f *float64stats) bytes(val float64) []byte { +func (f *int32stats) bytes(val int32) []byte { var buf bytes.Buffer binary.Write(&buf, binary.LittleEndian, val) return buf.Bytes() } -func (f *float64stats) NullCount() *int64 { - return nil -} - -func (f *float64stats) DistinctCount() *int64 { +func (f *int32stats) NullCount() *int64 { return nil } -func (f *float64stats) Min() []byte { - return f.bytes(f.min) -} - -func (f *float64stats) Max() []byte { - return f.bytes(f.max) -} - -type float32optionalStats struct { - min float32 - max float32 - nils int64 - nonNils int64 - maxDef uint8 -} - -func newfloat32optionalStats(d uint8) *float32optionalStats { - return &float32optionalStats{ - min: float32(math.MaxFloat32), - maxDef: d, - } -} - -func (f *float32optionalStats) add(vals []float32, defs []uint8) { - var i int - for _, def := range defs { - if def < f.maxDef { - f.nils++ - } else { - val := vals[i] - i++ - - f.nonNils++ - if val < f.min { - f.min = val - } - if val > f.max { - f.max = val - } - } - } -} - -func (f *float32optionalStats) bytes(val float32) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *float32optionalStats) NullCount() *int64 { - return &f.nils -} - -func (f *float32optionalStats) DistinctCount() *int64 { +func (f *int32stats) DistinctCount() *int64 { return nil } -func (f *float32optionalStats) Min() []byte { - if f.nonNils == 0 { - return nil - } +func (f *int32stats) Min() []byte { return f.bytes(f.min) } -func (f *float32optionalStats) Max() []byte { - if f.nonNils == 0 { - return nil - } +func (f *int32stats) Max() []byte { return f.bytes(f.max) } -type boolOptionalStats struct { - maxDef uint8 - nils int64 -} - -func newBoolOptionalStats(d uint8) *boolOptionalStats { - return &boolOptionalStats{maxDef: d} -} - -func (b *boolOptionalStats) add(vals []bool, defs []uint8) { - for _, def := range defs { - if def < b.maxDef { - b.nils++ - } - } +type stringStats struct { + vals []string + min []byte + max []byte } -func (b *boolOptionalStats) NullCount() *int64 { - return &b.nils +func newStringStats() *stringStats { + return &stringStats{} } -func (b *boolOptionalStats) DistinctCount() *int64 { - return nil +func (s *stringStats) add(val string) { + s.vals = append(s.vals, val) } -func (b *boolOptionalStats) Min() []byte { +func (s *stringStats) NullCount() *int64 { return nil } -func (b *boolOptionalStats) Max() []byte { +func (s *stringStats) DistinctCount() *int64 { return nil } -type uint32stats struct { - min uint32 - max uint32 -} - -func newUint32stats() *uint32stats { - return &uint32stats{ - min: uint32(math.MaxUint32), +func (s *stringStats) Min() []byte { + if s.min == nil { + s.minMax() } + return s.min } -func (i *uint32stats) add(val uint32) { - if val < i.min { - i.min = val - } - if val > i.max { - i.max = val +func (s *stringStats) Max() []byte { + if s.max == nil { + s.minMax() } + return s.max } -func (f *uint32stats) bytes(val uint32) []byte { - var buf bytes.Buffer - binary.Write(&buf, binary.LittleEndian, val) - return buf.Bytes() -} - -func (f *uint32stats) NullCount() *int64 { - return nil -} - -func (f *uint32stats) DistinctCount() *int64 { - return nil -} - -func (f *uint32stats) Min() []byte { - return f.bytes(f.min) -} +func (s *stringStats) minMax() { + if len(s.vals) == 0 { + return + } -func (f *uint32stats) Max() []byte { - return f.bytes(f.max) + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) } -type uint64optionalStats struct { - min uint64 - max uint64 +type int32optionalStats struct { + min int32 + max int32 nils int64 nonNils int64 maxDef uint8 } -func newuint64optionalStats(d uint8) *uint64optionalStats { - return &uint64optionalStats{ - min: uint64(math.MaxUint64), +func newint32optionalStats(d uint8) *int32optionalStats { + return &int32optionalStats{ + min: int32(math.MaxInt32), maxDef: d, } } -func (f *uint64optionalStats) add(vals []uint64, defs []uint8) { +func (f *int32optionalStats) add(vals []int32, defs []uint8) { var i int for _, def := range defs { if def < f.maxDef { @@ -2239,71 +1409,81 @@ func (f *uint64optionalStats) add(vals []uint64, defs []uint8) { } } -func (f *uint64optionalStats) bytes(val uint64) []byte { +func (f *int32optionalStats) bytes(val int32) []byte { var buf bytes.Buffer binary.Write(&buf, binary.LittleEndian, val) return buf.Bytes() } -func (f *uint64optionalStats) NullCount() *int64 { +func (f *int32optionalStats) NullCount() *int64 { return &f.nils } -func (f *uint64optionalStats) DistinctCount() *int64 { +func (f *int32optionalStats) DistinctCount() *int64 { return nil } -func (f *uint64optionalStats) Min() []byte { +func (f *int32optionalStats) Min() []byte { if f.nonNils == 0 { return nil } return f.bytes(f.min) } -func (f *uint64optionalStats) Max() []byte { +func (f *int32optionalStats) Max() []byte { if f.nonNils == 0 { return nil } return f.bytes(f.max) } -type stringStats struct { - vals []string - min []byte - max []byte +type stringOptionalStats struct { + vals []string + min []byte + max []byte + nils int64 + maxDef uint8 } -func newStringStats() *stringStats { - return &stringStats{} +func newStringOptionalStats(d uint8) *stringOptionalStats { + return &stringOptionalStats{maxDef: d} } -func (s *stringStats) add(val string) { - s.vals = append(s.vals, val) +func (s *stringOptionalStats) add(vals []string, defs []uint8) { + var i int + for _, def := range defs { + if def < s.maxDef { + s.nils++ + } else { + s.vals = append(s.vals, vals[i]) + i++ + } + } } -func (s *stringStats) NullCount() *int64 { - return nil +func (s *stringOptionalStats) NullCount() *int64 { + return &s.nils } -func (s *stringStats) DistinctCount() *int64 { +func (s *stringOptionalStats) DistinctCount() *int64 { return nil } -func (s *stringStats) Min() []byte { +func (s *stringOptionalStats) Min() []byte { if s.min == nil { s.minMax() } return s.min } -func (s *stringStats) Max() []byte { +func (s *stringOptionalStats) Max() []byte { if s.max == nil { s.minMax() } return s.max } -func (s *stringStats) minMax() { +func (s *stringOptionalStats) minMax() { if len(s.vals) == 0 { return } @@ -2315,6 +1495,39 @@ func (s *stringStats) minMax() { s.max = []byte(tmp[len(tmp)-1]) } +type boolOptionalStats struct { + maxDef uint8 + nils int64 +} + +func newBoolOptionalStats(d uint8) *boolOptionalStats { + return &boolOptionalStats{maxDef: d} +} + +func (b *boolOptionalStats) add(vals []bool, defs []uint8) { + for _, def := range defs { + if def < b.maxDef { + b.nils++ + } + } +} + +func (b *boolOptionalStats) NullCount() *int64 { + return &b.nils +} + +func (b *boolOptionalStats) DistinctCount() *int64 { + return nil +} + +func (b *boolOptionalStats) Min() []byte { + return nil +} + +func (b *boolOptionalStats) Max() []byte { + return nil +} + type boolStats struct{} func newBoolStats() *boolStats { return &boolStats{} } From d1e3b408ff948edc2844894e6c26f729d5e0f445 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Thu, 17 Jun 2021 08:54:00 -0600 Subject: [PATCH 21/25] got main tests passing --- internal/gen/gen.go | 6 +- internal/gen/template_bool.go | 6 +- internal/gen/template_bool_optional.go | 6 +- parquet_generated_test.go | 909 +++++++++++++++++++++++-- parquet_test.go | 13 +- 5 files changed, 878 insertions(+), 62 deletions(-) diff --git a/internal/gen/gen.go b/internal/gen/gen.go index 907a503..0bd5b21 100644 --- a/internal/gen/gen.go +++ b/internal/gen/gen.go @@ -180,16 +180,16 @@ func dedupe(flds []fields.Field) []fields.Field { seen := map[string]bool{} out := make([]fields.Field, 0, len(flds)) for _, f := range flds { - _, ok := seen[f.Category()] + _, ok := seen[f.FieldType()] if !ok { out = append(out, f) - seen[f.Category()] = true + seen[f.FieldType()] = true } } fmt.Println("deduping", out) for _, f := range out { - fmt.Println("cat", f.Category()) + fmt.Println("cat", f.FieldType()) } return out } diff --git a/internal/gen/template_bool.go b/internal/gen/template_bool.go index 1554bd1..33bce48 100644 --- a/internal/gen/template_bool.go +++ b/internal/gen/template_bool.go @@ -3,12 +3,12 @@ package gen var boolTpl = `{{define "boolField"}}type BoolField struct { {{parquetType .}} vals []bool - read func(r {{.Type}}) {{.TypeName}} - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}) + read func(r {{.StructType}}) {{.TypeName}} + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}) stats *boolStats } -func NewBoolField(read func(r {{.Type}}) {{.TypeName}}, write func(r *{{.Type}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*{{parquetType .}})) *BoolField { +func NewBoolField(read func(r {{.StructType}}) {{.TypeName}}, write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}), path []string, opts ...func(*{{parquetType .}})) *BoolField { return &BoolField{ read: read, write: write, diff --git a/internal/gen/template_bool_optional.go b/internal/gen/template_bool_optional.go index a838090..ed3777e 100644 --- a/internal/gen/template_bool_optional.go +++ b/internal/gen/template_bool_optional.go @@ -3,12 +3,12 @@ package gen var boolOptionalTpl = `{{define "boolOptionalField"}}type BoolOptionalField struct { parquet.OptionalField vals []bool - read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) - write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int) + read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8) + write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int) stats *boolOptionalStats } -func NewBoolOptionalField(read func(r {{.Type}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.Type}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *BoolOptionalField { +func NewBoolOptionalField(read func(r {{.StructType}}) ([]{{removeStar .TypeName}}, []uint8, []uint8), write func(r *{{.StructType}}, vals []{{removeStar .TypeName}}, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *BoolOptionalField { return &BoolOptionalField{ read: read, write: write, diff --git a/parquet_generated_test.go b/parquet_generated_test.go index 076baba..42dd044 100644 --- a/parquet_generated_test.go +++ b/parquet_generated_test.go @@ -1071,6 +1071,136 @@ func (f *Int32OptionalField) Levels() ([]uint8, []uint8) { return f.Defs, f.Reps } +type Int64Field struct { + vals []int64 + parquet.RequiredField + read func(r Person) int64 + write func(r *Person, vals []int64) + stats *int64stats +} + +func NewInt64Field(read func(r Person) int64, write func(r *Person, vals []int64), path []string, opts ...func(*parquet.RequiredField)) *Int64Field { + return &Int64Field{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newInt64stats(), + } +} + +func (f *Int64Field) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *Int64Field) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]int64, int(pg.N)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Int64Field) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *Int64Field) Scan(r *Person) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *Int64Field) Add(r Person) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *Int64Field) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type Int64OptionalField struct { + parquet.OptionalField + vals []int64 + read func(r Person) ([]int64, []uint8, []uint8) + write func(r *Person, vals []int64, def, rep []uint8) (int, int) + stats *int64optionalStats +} + +func NewInt64OptionalField(read func(r Person) ([]int64, []uint8, []uint8), write func(r *Person, vals []int64, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Int64OptionalField { + return &Int64OptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newint64optionalStats(maxDef(types)), + } +} + +func (f *Int64OptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Int64Type, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *Int64OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *Int64OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]int64, f.Values()-len(f.vals)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Int64OptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *Int64OptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *Int64OptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + type StringOptionalField struct { parquet.OptionalField vals []string @@ -1151,15 +1281,207 @@ func (f *StringOptionalField) Levels() ([]uint8, []uint8) { return f.Defs, f.Reps } +type Float32Field struct { + vals []float32 + parquet.RequiredField + read func(r Person) float32 + write func(r *Person, vals []float32) + stats *float32stats +} + +func NewFloat32Field(read func(r Person) float32, write func(r *Person, vals []float32), path []string, opts ...func(*parquet.RequiredField)) *Float32Field { + return &Float32Field{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newFloat32stats(), + } +} + +func (f *Float32Field) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Float32Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *Float32Field) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]float32, int(pg.N)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Float32Field) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *Float32Field) Scan(r *Person) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *Float32Field) Add(r Person) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *Float32Field) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type Float64Field struct { + vals []float64 + parquet.RequiredField + read func(r Person) float64 + write func(r *Person, vals []float64) + stats *float64stats +} + +func NewFloat64Field(read func(r Person) float64, write func(r *Person, vals []float64), path []string, opts ...func(*parquet.RequiredField)) *Float64Field { + return &Float64Field{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newFloat64stats(), + } +} + +func (f *Float64Field) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Float64Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *Float64Field) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]float64, int(pg.N)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Float64Field) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *Float64Field) Scan(r *Person) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *Float64Field) Add(r Person) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *Float64Field) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type Float32OptionalField struct { + parquet.OptionalField + vals []float32 + read func(r Person) ([]float32, []uint8, []uint8) + write func(r *Person, vals []float32, def, rep []uint8) (int, int) + stats *float32optionalStats +} + +func NewFloat32OptionalField(read func(r Person) ([]float32, []uint8, []uint8), write func(r *Person, vals []float32, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Float32OptionalField { + return &Float32OptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newfloat32optionalStats(maxDef(types)), + } +} + +func (f *Float32OptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Float32Type, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *Float32OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *Float32OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]float32, f.Values()-len(f.vals)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Float32OptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *Float32OptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *Float32OptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + type BoolOptionalField struct { parquet.OptionalField vals []bool - read func(r bool) ([]bool, []uint8, []uint8) - write func(r *bool, vals []bool, defs, reps []uint8) (int, int) + read func(r Person) ([]bool, []uint8, []uint8) + write func(r *Person, vals []bool, defs, reps []uint8) (int, int) stats *boolOptionalStats } -func NewBoolOptionalField(read func(r bool) ([]bool, []uint8, []uint8), write func(r *bool, vals []bool, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *BoolOptionalField { +func NewBoolOptionalField(read func(r Person) ([]bool, []uint8, []uint8), write func(r *Person, vals []bool, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *BoolOptionalField { return &BoolOptionalField{ read: read, write: write, @@ -1222,15 +1544,145 @@ func (f *BoolOptionalField) Levels() ([]uint8, []uint8) { return f.Defs, f.Reps } +type Uint32Field struct { + vals []uint32 + parquet.RequiredField + read func(r Person) uint32 + write func(r *Person, vals []uint32) + stats *uint32stats +} + +func NewUint32Field(read func(r Person) uint32, write func(r *Person, vals []uint32), path []string, opts ...func(*parquet.RequiredField)) *Uint32Field { + return &Uint32Field{ + read: read, + write: write, + RequiredField: parquet.NewRequiredField(path, opts...), + stats: newUint32stats(), + } +} + +func (f *Uint32Field) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Uint32Type, RepetitionType: parquet.RepetitionRequired, Types: []int{0}} +} + +func (f *Uint32Field) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]uint32, int(pg.N)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Uint32Field) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.vals), f.stats) +} + +func (f *Uint32Field) Scan(r *Person) { + if len(f.vals) == 0 { + return + } + + f.write(r, f.vals) + f.vals = f.vals[1:] +} + +func (f *Uint32Field) Add(r Person) { + v := f.read(r) + f.stats.add(v) + f.vals = append(f.vals, v) +} + +func (f *Uint32Field) Levels() ([]uint8, []uint8) { + return nil, nil +} + +type Uint64OptionalField struct { + parquet.OptionalField + vals []uint64 + read func(r Person) ([]uint64, []uint8, []uint8) + write func(r *Person, vals []uint64, def, rep []uint8) (int, int) + stats *uint64optionalStats +} + +func NewUint64OptionalField(read func(r Person) ([]uint64, []uint8, []uint8), write func(r *Person, vals []uint64, defs, reps []uint8) (int, int), path []string, types []int, opts ...func(*parquet.OptionalField)) *Uint64OptionalField { + return &Uint64OptionalField{ + read: read, + write: write, + OptionalField: parquet.NewOptionalField(path, types, opts...), + stats: newuint64optionalStats(maxDef(types)), + } +} + +func (f *Uint64OptionalField) Schema() parquet.Field { + return parquet.Field{Name: f.Name(), Path: f.Path(), Type: Uint64Type, RepetitionType: f.RepetitionType, Types: f.Types} +} + +func (f *Uint64OptionalField) Write(w io.Writer, meta *parquet.Metadata) error { + var buf bytes.Buffer + for _, v := range f.vals { + if err := binary.Write(&buf, binary.LittleEndian, v); err != nil { + return err + } + } + return f.DoWrite(w, meta, buf.Bytes(), len(f.Defs), f.stats) +} + +func (f *Uint64OptionalField) Read(r io.ReadSeeker, pg parquet.Page) error { + rr, _, err := f.DoRead(r, pg) + if err != nil { + return err + } + + v := make([]uint64, f.Values()-len(f.vals)) + err = binary.Read(rr, binary.LittleEndian, &v) + f.vals = append(f.vals, v...) + return err +} + +func (f *Uint64OptionalField) Add(r Person) { + vals, defs, reps := f.read(r) + f.stats.add(vals, defs) + f.vals = append(f.vals, vals...) + f.Defs = append(f.Defs, defs...) + f.Reps = append(f.Reps, reps...) +} + +func (f *Uint64OptionalField) Scan(r *Person) { + if len(f.Defs) == 0 { + return + } + + v, l := f.write(r, f.vals, f.Defs, f.Reps) + f.vals = f.vals[v:] + f.Defs = f.Defs[l:] + if len(f.Reps) > 0 { + f.Reps = f.Reps[l:] + } +} + +func (f *Uint64OptionalField) Levels() ([]uint8, []uint8) { + return f.Defs, f.Reps +} + type BoolField struct { parquet.RequiredField vals []bool - read func(r bool) bool - write func(r *bool, vals []bool) + read func(r Person) bool + write func(r *Person, vals []bool) stats *boolStats } -func NewBoolField(read func(r bool) bool, write func(r *bool, vals []bool), path []string, opts ...func(*parquet.RequiredField)) *BoolField { +func NewBoolField(read func(r Person) bool, write func(r *Person, vals []bool), path []string, opts ...func(*parquet.RequiredField)) *BoolField { return &BoolField{ read: read, write: write, @@ -1322,74 +1774,179 @@ func (f *int32stats) Min() []byte { return f.bytes(f.min) } -func (f *int32stats) Max() []byte { +func (f *int32stats) Max() []byte { + return f.bytes(f.max) +} + +type stringStats struct { + vals []string + min []byte + max []byte +} + +func newStringStats() *stringStats { + return &stringStats{} +} + +func (s *stringStats) add(val string) { + s.vals = append(s.vals, val) +} + +func (s *stringStats) NullCount() *int64 { + return nil +} + +func (s *stringStats) DistinctCount() *int64 { + return nil +} + +func (s *stringStats) Min() []byte { + if s.min == nil { + s.minMax() + } + return s.min +} + +func (s *stringStats) Max() []byte { + if s.max == nil { + s.minMax() + } + return s.max +} + +func (s *stringStats) minMax() { + if len(s.vals) == 0 { + return + } + + tmp := make([]string, len(s.vals)) + copy(tmp, s.vals) + sort.Strings(tmp) + s.min = []byte(tmp[0]) + s.max = []byte(tmp[len(tmp)-1]) +} + +type int32optionalStats struct { + min int32 + max int32 + nils int64 + nonNils int64 + maxDef uint8 +} + +func newint32optionalStats(d uint8) *int32optionalStats { + return &int32optionalStats{ + min: int32(math.MaxInt32), + maxDef: d, + } +} + +func (f *int32optionalStats) add(vals []int32, defs []uint8) { + var i int + for _, def := range defs { + if def < f.maxDef { + f.nils++ + } else { + val := vals[i] + i++ + + f.nonNils++ + if val < f.min { + f.min = val + } + if val > f.max { + f.max = val + } + } + } +} + +func (f *int32optionalStats) bytes(val int32) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *int32optionalStats) NullCount() *int64 { + return &f.nils +} + +func (f *int32optionalStats) DistinctCount() *int64 { + return nil +} + +func (f *int32optionalStats) Min() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.min) +} + +func (f *int32optionalStats) Max() []byte { + if f.nonNils == 0 { + return nil + } return f.bytes(f.max) } -type stringStats struct { - vals []string - min []byte - max []byte +type int64stats struct { + min int64 + max int64 } -func newStringStats() *stringStats { - return &stringStats{} +func newInt64stats() *int64stats { + return &int64stats{ + min: int64(math.MaxInt64), + } } -func (s *stringStats) add(val string) { - s.vals = append(s.vals, val) +func (i *int64stats) add(val int64) { + if val < i.min { + i.min = val + } + if val > i.max { + i.max = val + } } -func (s *stringStats) NullCount() *int64 { - return nil +func (f *int64stats) bytes(val int64) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() } -func (s *stringStats) DistinctCount() *int64 { +func (f *int64stats) NullCount() *int64 { return nil } -func (s *stringStats) Min() []byte { - if s.min == nil { - s.minMax() - } - return s.min +func (f *int64stats) DistinctCount() *int64 { + return nil } -func (s *stringStats) Max() []byte { - if s.max == nil { - s.minMax() - } - return s.max +func (f *int64stats) Min() []byte { + return f.bytes(f.min) } -func (s *stringStats) minMax() { - if len(s.vals) == 0 { - return - } - - tmp := make([]string, len(s.vals)) - copy(tmp, s.vals) - sort.Strings(tmp) - s.min = []byte(tmp[0]) - s.max = []byte(tmp[len(tmp)-1]) +func (f *int64stats) Max() []byte { + return f.bytes(f.max) } -type int32optionalStats struct { - min int32 - max int32 +type int64optionalStats struct { + min int64 + max int64 nils int64 nonNils int64 maxDef uint8 } -func newint32optionalStats(d uint8) *int32optionalStats { - return &int32optionalStats{ - min: int32(math.MaxInt32), +func newint64optionalStats(d uint8) *int64optionalStats { + return &int64optionalStats{ + min: int64(math.MaxInt64), maxDef: d, } } -func (f *int32optionalStats) add(vals []int32, defs []uint8) { +func (f *int64optionalStats) add(vals []int64, defs []uint8) { var i int for _, def := range defs { if def < f.maxDef { @@ -1409,28 +1966,28 @@ func (f *int32optionalStats) add(vals []int32, defs []uint8) { } } -func (f *int32optionalStats) bytes(val int32) []byte { +func (f *int64optionalStats) bytes(val int64) []byte { var buf bytes.Buffer binary.Write(&buf, binary.LittleEndian, val) return buf.Bytes() } -func (f *int32optionalStats) NullCount() *int64 { +func (f *int64optionalStats) NullCount() *int64 { return &f.nils } -func (f *int32optionalStats) DistinctCount() *int64 { +func (f *int64optionalStats) DistinctCount() *int64 { return nil } -func (f *int32optionalStats) Min() []byte { +func (f *int64optionalStats) Min() []byte { if f.nonNils == 0 { return nil } return f.bytes(f.min) } -func (f *int32optionalStats) Max() []byte { +func (f *int64optionalStats) Max() []byte { if f.nonNils == 0 { return nil } @@ -1495,6 +2052,153 @@ func (s *stringOptionalStats) minMax() { s.max = []byte(tmp[len(tmp)-1]) } +type float32stats struct { + min float32 + max float32 +} + +func newFloat32stats() *float32stats { + return &float32stats{ + min: float32(math.MaxFloat32), + } +} + +func (i *float32stats) add(val float32) { + if val < i.min { + i.min = val + } + if val > i.max { + i.max = val + } +} + +func (f *float32stats) bytes(val float32) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *float32stats) NullCount() *int64 { + return nil +} + +func (f *float32stats) DistinctCount() *int64 { + return nil +} + +func (f *float32stats) Min() []byte { + return f.bytes(f.min) +} + +func (f *float32stats) Max() []byte { + return f.bytes(f.max) +} + +type float64stats struct { + min float64 + max float64 +} + +func newFloat64stats() *float64stats { + return &float64stats{ + min: float64(math.MaxFloat64), + } +} + +func (i *float64stats) add(val float64) { + if val < i.min { + i.min = val + } + if val > i.max { + i.max = val + } +} + +func (f *float64stats) bytes(val float64) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *float64stats) NullCount() *int64 { + return nil +} + +func (f *float64stats) DistinctCount() *int64 { + return nil +} + +func (f *float64stats) Min() []byte { + return f.bytes(f.min) +} + +func (f *float64stats) Max() []byte { + return f.bytes(f.max) +} + +type float32optionalStats struct { + min float32 + max float32 + nils int64 + nonNils int64 + maxDef uint8 +} + +func newfloat32optionalStats(d uint8) *float32optionalStats { + return &float32optionalStats{ + min: float32(math.MaxFloat32), + maxDef: d, + } +} + +func (f *float32optionalStats) add(vals []float32, defs []uint8) { + var i int + for _, def := range defs { + if def < f.maxDef { + f.nils++ + } else { + val := vals[i] + i++ + + f.nonNils++ + if val < f.min { + f.min = val + } + if val > f.max { + f.max = val + } + } + } +} + +func (f *float32optionalStats) bytes(val float32) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *float32optionalStats) NullCount() *int64 { + return &f.nils +} + +func (f *float32optionalStats) DistinctCount() *int64 { + return nil +} + +func (f *float32optionalStats) Min() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.min) +} + +func (f *float32optionalStats) Max() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.max) +} + type boolOptionalStats struct { maxDef uint8 nils int64 @@ -1528,6 +2232,111 @@ func (b *boolOptionalStats) Max() []byte { return nil } +type uint32stats struct { + min uint32 + max uint32 +} + +func newUint32stats() *uint32stats { + return &uint32stats{ + min: uint32(math.MaxUint32), + } +} + +func (i *uint32stats) add(val uint32) { + if val < i.min { + i.min = val + } + if val > i.max { + i.max = val + } +} + +func (f *uint32stats) bytes(val uint32) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *uint32stats) NullCount() *int64 { + return nil +} + +func (f *uint32stats) DistinctCount() *int64 { + return nil +} + +func (f *uint32stats) Min() []byte { + return f.bytes(f.min) +} + +func (f *uint32stats) Max() []byte { + return f.bytes(f.max) +} + +type uint64optionalStats struct { + min uint64 + max uint64 + nils int64 + nonNils int64 + maxDef uint8 +} + +func newuint64optionalStats(d uint8) *uint64optionalStats { + return &uint64optionalStats{ + min: uint64(math.MaxUint64), + maxDef: d, + } +} + +func (f *uint64optionalStats) add(vals []uint64, defs []uint8) { + var i int + for _, def := range defs { + if def < f.maxDef { + f.nils++ + } else { + val := vals[i] + i++ + + f.nonNils++ + if val < f.min { + f.min = val + } + if val > f.max { + f.max = val + } + } + } +} + +func (f *uint64optionalStats) bytes(val uint64) []byte { + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, val) + return buf.Bytes() +} + +func (f *uint64optionalStats) NullCount() *int64 { + return &f.nils +} + +func (f *uint64optionalStats) DistinctCount() *int64 { + return nil +} + +func (f *uint64optionalStats) Min() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.min) +} + +func (f *uint64optionalStats) Max() []byte { + if f.nonNils == 0 { + return nil + } + return f.bytes(f.max) +} + type boolStats struct{} func newBoolStats() *boolStats { return &boolStats{} } diff --git a/parquet_test.go b/parquet_test.go index 8a26e2a..18674ee 100644 --- a/parquet_test.go +++ b/parquet_test.go @@ -7,6 +7,7 @@ import ( "io" "math" "math/rand" + "os" "testing" "time" @@ -19,9 +20,15 @@ import ( func init() { rand.Seed(time.Now().UnixNano()) + if os.Getenv("INCLUDE+GZIP") == "true" { + compressionCases = append(compressionCases, "gzip") + } } -var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") +var ( + letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + compressionCases = []string{"uncompressed", "snappy"} +) func TestParquet(t *testing.T) { type testCase struct { @@ -446,7 +453,7 @@ func TestParquet(t *testing.T) { } for i, tc := range testCases { - for j, comp := range []string{"uncompressed", "snappy", "gzip"} { + for j, comp := range compressionCases { t.Run(fmt.Sprintf("%02d %s %s", 2*i+j, tc.name, comp), func(t *testing.T) { if tc.pageSize == 0 { tc.pageSize = 100 @@ -525,7 +532,7 @@ func TestPageHeaders(t *testing.T) { return } - assert.Equal(t, 80, len(pageHeaders)) + assert.Equal(t, 88, len(pageHeaders)) } func TestStats(t *testing.T) { From 857992ca61568f1ec5dd89a32e6c55711dd96ca3 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Fri, 18 Jun 2021 09:40:24 -0600 Subject: [PATCH 22/25] more compliation in dremel repeated tests --- internal/dremel/dremel_test.go | 36 +++++++++++++++---- .../dremel/testcases/repetition/generated.go | 2 ++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/internal/dremel/dremel_test.go b/internal/dremel/dremel_test.go index 9fd265b..cbe795e 100644 --- a/internal/dremel/dremel_test.go +++ b/internal/dremel/dremel_test.go @@ -2,7 +2,6 @@ package dremel_test import ( "bytes" - "log" "testing" "github.com/parsyl/parquet/internal/dremel/testcases/doc" @@ -142,7 +141,7 @@ func TestDremel(t *testing.T) { var buf bytes.Buffer pw, err := doc.NewParquetWriter(&buf) if err != nil { - log.Fatal(err) + t.Fatal(err) } for _, doc := range dremelDocs { @@ -150,14 +149,14 @@ func TestDremel(t *testing.T) { } if err := pw.Write(); err != nil { - log.Fatal(err) + t.Fatal(err) } pw.Close() pr, err := doc.NewParquetReader(bytes.NewReader(buf.Bytes())) if err != nil { - log.Fatal(err) + t.Fatal(err) } var out []doc.Document @@ -219,6 +218,29 @@ var ( Backward: []repetition.Language{{Codes: []string{"w", "x"}}}, Forward: []repetition.Language{{Countries: []string{"y", "z"}}}, }, + { + Backward: []repetition.Language{ + { + Codes: []string{"aa"}, + URL: pstring("http://abc.com"), + Countries: []string{"ab"}, + }, + { + URL: pstring("http://abc.com"), + Countries: []string{"ac"}, + }, + { + Codes: []string{"ad"}, + URL: pstring("http://abc.com"), + }, + }, + Forward: []repetition.Language{ + { + Countries: []string{"y", "z"}, + URL: pstring("http://abc.com"), + }, + }, + }, }, }, } @@ -228,7 +250,7 @@ func TestRepetition(t *testing.T) { var buf bytes.Buffer pw, err := repetition.NewParquetWriter(&buf) if err != nil { - log.Fatal(err) + t.Fatal(err) } for _, doc := range repetitionDocs { @@ -236,14 +258,14 @@ func TestRepetition(t *testing.T) { } if err := pw.Write(); err != nil { - log.Fatal(err) + t.Fatal(err) } pw.Close() pr, err := repetition.NewParquetReader(bytes.NewReader(buf.Bytes())) if err != nil { - log.Fatal(err) + t.Fatal(err) } var out []repetition.Document diff --git a/internal/dremel/testcases/repetition/generated.go b/internal/dremel/testcases/repetition/generated.go index a2ab36e..70072bf 100644 --- a/internal/dremel/testcases/repetition/generated.go +++ b/internal/dremel/testcases/repetition/generated.go @@ -45,8 +45,10 @@ type ParquetWriter struct { func Fields(compression compression) []Field { return []Field{ NewStringOptionalField(readLinksBackwardCodes, writeLinksBackwardCodes, []string{"links", "backward", "code"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksBackwardURL, writeLinksBackwardURL, []string{"links", "backward", "URL"}, []int{2, 2, 1}, optionalFieldCompression(compression)), NewStringOptionalField(readLinksBackwardCountries, writeLinksBackwardCountries, []string{"links", "backward", "countries"}, []int{2, 2, 2}, optionalFieldCompression(compression)), NewStringOptionalField(readLinksForwardCodes, writeLinksForwardCodes, []string{"links", "forward", "code"}, []int{2, 2, 2}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksForwardURL, writeLinksForwardURL, []string{"links", "forward", "URL"}, []int{2, 2, 1}, optionalFieldCompression(compression)), NewStringOptionalField(readLinksForwardCountries, writeLinksForwardCountries, []string{"links", "forward", "countries"}, []int{2, 2, 2}, optionalFieldCompression(compression)), } } From 68b3543db0c3ecdecedf986ab9f8de7bd44e18a6 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Fri, 18 Jun 2021 09:58:10 -0600 Subject: [PATCH 23/25] moved a bunch of stuff to cmd/parquetgen --- {internal => cmd/parquetgen}/cases/cases.go | 0 .../parquetgen}/cases/cases_test.go | 2 +- {internal => cmd/parquetgen}/dremel/dremel.go | 0 .../parquetgen}/dremel/dremel_test.go | 6 +- {internal => cmd/parquetgen}/dremel/read.go | 0 .../parquetgen}/dremel/read_repeated.go | 0 .../parquetgen}/dremel/read_test.go | 2 +- .../parquetgen}/dremel/testcases/doc/doc.go | 0 .../dremel/testcases/doc/generated.go | 0 .../dremel/testcases/person/generated.go | 0 .../dremel/testcases/person/person.go | 0 .../dremel/testcases/repetition/generated.go | 126 +++++++++++++++++- .../dremel/testcases/repetition/repetition.go | 1 + .../parquetgen}/dremel/write_optional.go | 0 .../parquetgen}/dremel/write_repeated.go | 3 +- .../parquetgen}/dremel/write_test.go | 3 +- {internal => cmd/parquetgen}/gen/funcs.go | 4 +- {internal => cmd/parquetgen}/gen/gen.go | 9 +- {internal => cmd/parquetgen}/gen/template.go | 0 .../parquetgen}/gen/template_bool.go | 0 .../parquetgen}/gen/template_bool_optional.go | 0 .../parquetgen}/gen/template_optional.go | 0 .../parquetgen}/gen/template_required.go | 0 .../parquetgen}/gen/template_string.go | 0 .../gen/template_string_optional.go | 0 .../parquetgen}/gen/template_struct.go | 0 cmd/parquetgen/main.go | 2 +- .../parquetgen}/parse/fields_test.go | 2 +- {internal => cmd/parquetgen}/parse/parse.go | 0 .../parquetgen}/parse/parse_test.go | 0 .../parquetgen}/structs/structs.go | 0 .../parquetgen}/structs/structs_test.go | 4 +- internal/fields/fields.go | 5 - internal/fields/fields_test.go | 1 - 34 files changed, 140 insertions(+), 30 deletions(-) rename {internal => cmd/parquetgen}/cases/cases.go (100%) rename {internal => cmd/parquetgen}/cases/cases_test.go (93%) rename {internal => cmd/parquetgen}/dremel/dremel.go (100%) rename {internal => cmd/parquetgen}/dremel/dremel_test.go (96%) rename {internal => cmd/parquetgen}/dremel/read.go (100%) rename {internal => cmd/parquetgen}/dremel/read_repeated.go (100%) rename {internal => cmd/parquetgen}/dremel/read_test.go (99%) rename {internal => cmd/parquetgen}/dremel/testcases/doc/doc.go (100%) rename {internal => cmd/parquetgen}/dremel/testcases/doc/generated.go (100%) rename {internal => cmd/parquetgen}/dremel/testcases/person/generated.go (100%) rename {internal => cmd/parquetgen}/dremel/testcases/person/person.go (100%) rename {internal => cmd/parquetgen}/dremel/testcases/repetition/generated.go (88%) rename {internal => cmd/parquetgen}/dremel/testcases/repetition/repetition.go (91%) rename {internal => cmd/parquetgen}/dremel/write_optional.go (100%) rename {internal => cmd/parquetgen}/dremel/write_repeated.go (99%) rename {internal => cmd/parquetgen}/dremel/write_test.go (99%) rename {internal => cmd/parquetgen}/gen/funcs.go (95%) rename {internal => cmd/parquetgen}/gen/gen.go (94%) rename {internal => cmd/parquetgen}/gen/template.go (100%) rename {internal => cmd/parquetgen}/gen/template_bool.go (100%) rename {internal => cmd/parquetgen}/gen/template_bool_optional.go (100%) rename {internal => cmd/parquetgen}/gen/template_optional.go (100%) rename {internal => cmd/parquetgen}/gen/template_required.go (100%) rename {internal => cmd/parquetgen}/gen/template_string.go (100%) rename {internal => cmd/parquetgen}/gen/template_string_optional.go (100%) rename {internal => cmd/parquetgen}/gen/template_struct.go (100%) rename {internal => cmd/parquetgen}/parse/fields_test.go (99%) rename {internal => cmd/parquetgen}/parse/parse.go (100%) rename {internal => cmd/parquetgen}/parse/parse_test.go (100%) rename {internal => cmd/parquetgen}/structs/structs.go (100%) rename {internal => cmd/parquetgen}/structs/structs_test.go (97%) diff --git a/internal/cases/cases.go b/cmd/parquetgen/cases/cases.go similarity index 100% rename from internal/cases/cases.go rename to cmd/parquetgen/cases/cases.go diff --git a/internal/cases/cases_test.go b/cmd/parquetgen/cases/cases_test.go similarity index 93% rename from internal/cases/cases_test.go rename to cmd/parquetgen/cases/cases_test.go index 316d9f7..54e629e 100644 --- a/internal/cases/cases_test.go +++ b/cmd/parquetgen/cases/cases_test.go @@ -4,7 +4,7 @@ import ( "fmt" "testing" - "github.com/parsyl/parquet/internal/cases" + "github.com/parsyl/parquet/cmd/parquetgen/cases" "github.com/stretchr/testify/assert" ) diff --git a/internal/dremel/dremel.go b/cmd/parquetgen/dremel/dremel.go similarity index 100% rename from internal/dremel/dremel.go rename to cmd/parquetgen/dremel/dremel.go diff --git a/internal/dremel/dremel_test.go b/cmd/parquetgen/dremel/dremel_test.go similarity index 96% rename from internal/dremel/dremel_test.go rename to cmd/parquetgen/dremel/dremel_test.go index cbe795e..18b6f2d 100644 --- a/internal/dremel/dremel_test.go +++ b/cmd/parquetgen/dremel/dremel_test.go @@ -4,9 +4,9 @@ import ( "bytes" "testing" - "github.com/parsyl/parquet/internal/dremel/testcases/doc" - "github.com/parsyl/parquet/internal/dremel/testcases/person" - "github.com/parsyl/parquet/internal/dremel/testcases/repetition" + "github.com/parsyl/parquet/cmd/parquetgen/dremel/testcases/doc" + "github.com/parsyl/parquet/cmd/parquetgen/dremel/testcases/person" + "github.com/parsyl/parquet/cmd/parquetgen/dremel/testcases/repetition" "github.com/stretchr/testify/assert" ) diff --git a/internal/dremel/read.go b/cmd/parquetgen/dremel/read.go similarity index 100% rename from internal/dremel/read.go rename to cmd/parquetgen/dremel/read.go diff --git a/internal/dremel/read_repeated.go b/cmd/parquetgen/dremel/read_repeated.go similarity index 100% rename from internal/dremel/read_repeated.go rename to cmd/parquetgen/dremel/read_repeated.go diff --git a/internal/dremel/read_test.go b/cmd/parquetgen/dremel/read_test.go similarity index 99% rename from internal/dremel/read_test.go rename to cmd/parquetgen/dremel/read_test.go index 9d798b2..5b1f528 100644 --- a/internal/dremel/read_test.go +++ b/cmd/parquetgen/dremel/read_test.go @@ -5,7 +5,7 @@ import ( "go/format" "testing" - "github.com/parsyl/parquet/internal/dremel" + "github.com/parsyl/parquet/cmd/parquetgen/dremel" "github.com/parsyl/parquet/internal/fields" "github.com/stretchr/testify/assert" ) diff --git a/internal/dremel/testcases/doc/doc.go b/cmd/parquetgen/dremel/testcases/doc/doc.go similarity index 100% rename from internal/dremel/testcases/doc/doc.go rename to cmd/parquetgen/dremel/testcases/doc/doc.go diff --git a/internal/dremel/testcases/doc/generated.go b/cmd/parquetgen/dremel/testcases/doc/generated.go similarity index 100% rename from internal/dremel/testcases/doc/generated.go rename to cmd/parquetgen/dremel/testcases/doc/generated.go diff --git a/internal/dremel/testcases/person/generated.go b/cmd/parquetgen/dremel/testcases/person/generated.go similarity index 100% rename from internal/dremel/testcases/person/generated.go rename to cmd/parquetgen/dremel/testcases/person/generated.go diff --git a/internal/dremel/testcases/person/person.go b/cmd/parquetgen/dremel/testcases/person/person.go similarity index 100% rename from internal/dremel/testcases/person/person.go rename to cmd/parquetgen/dremel/testcases/person/person.go diff --git a/internal/dremel/testcases/repetition/generated.go b/cmd/parquetgen/dremel/testcases/repetition/generated.go similarity index 88% rename from internal/dremel/testcases/repetition/generated.go rename to cmd/parquetgen/dremel/testcases/repetition/generated.go index 70072bf..9f06de4 100644 --- a/internal/dremel/testcases/repetition/generated.go +++ b/cmd/parquetgen/dremel/testcases/repetition/generated.go @@ -45,10 +45,10 @@ type ParquetWriter struct { func Fields(compression compression) []Field { return []Field{ NewStringOptionalField(readLinksBackwardCodes, writeLinksBackwardCodes, []string{"links", "backward", "code"}, []int{2, 2, 2}, optionalFieldCompression(compression)), - NewStringOptionalField(readLinksBackwardURL, writeLinksBackwardURL, []string{"links", "backward", "URL"}, []int{2, 2, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksBackwardURL, writeLinksBackwardURL, []string{"links", "backward", "url"}, []int{2, 2, 1}, optionalFieldCompression(compression)), NewStringOptionalField(readLinksBackwardCountries, writeLinksBackwardCountries, []string{"links", "backward", "countries"}, []int{2, 2, 2}, optionalFieldCompression(compression)), NewStringOptionalField(readLinksForwardCodes, writeLinksForwardCodes, []string{"links", "forward", "code"}, []int{2, 2, 2}, optionalFieldCompression(compression)), - NewStringOptionalField(readLinksForwardURL, writeLinksForwardURL, []string{"links", "forward", "URL"}, []int{2, 2, 1}, optionalFieldCompression(compression)), + NewStringOptionalField(readLinksForwardURL, writeLinksForwardURL, []string{"links", "forward", "url"}, []int{2, 2, 1}, optionalFieldCompression(compression)), NewStringOptionalField(readLinksForwardCountries, writeLinksForwardCountries, []string{"links", "forward", "countries"}, []int{2, 2, 2}, optionalFieldCompression(compression)), } } @@ -135,6 +135,67 @@ func writeLinksBackwardCodes(x *Document, vals []string, defs, reps []uint8) (in return nVals, nLevels } +func readLinksBackwardURL(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Backward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Backward { + if i1 >= 1 { + lastRep = 2 + } + if x1.URL == nil { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, *x1.URL) + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksBackwardURL(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + x.Links[ind[0]].Backward[ind[1]].URL = pstring(vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +} + func readLinksBackwardCountries(x Document) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 @@ -274,6 +335,67 @@ func writeLinksForwardCodes(x *Document, vals []string, defs, reps []uint8) (int return nVals, nLevels } +func readLinksForwardURL(x Document) ([]string, []uint8, []uint8) { + var vals []string + var defs, reps []uint8 + var lastRep uint8 + + if len(x.Links) == 0 { + defs = append(defs, 0) + reps = append(reps, lastRep) + } else { + for i0, x0 := range x.Links { + if i0 >= 1 { + lastRep = 1 + } + if len(x0.Forward) == 0 { + defs = append(defs, 1) + reps = append(reps, lastRep) + } else { + for i1, x1 := range x0.Forward { + if i1 >= 1 { + lastRep = 2 + } + if x1.URL == nil { + defs = append(defs, 2) + reps = append(reps, lastRep) + } else { + defs = append(defs, 3) + reps = append(reps, lastRep) + vals = append(vals, *x1.URL) + } + } + } + } + } + + return vals, defs, reps +} + +func writeLinksForwardURL(x *Document, vals []string, defs, reps []uint8) (int, int) { + var nVals, nLevels int + ind := make(indices, 2) + + for i := range defs { + def := defs[i] + rep := reps[i] + if i > 0 && rep == 0 { + break + } + + nLevels++ + ind.rep(rep) + + switch def { + case 3: + x.Links[ind[0]].Forward[ind[1]].URL = pstring(vals[nVals]) + nVals++ + } + } + + return nVals, nLevels +} + func readLinksForwardCountries(x Document) ([]string, []uint8, []uint8) { var vals []string var defs, reps []uint8 diff --git a/internal/dremel/testcases/repetition/repetition.go b/cmd/parquetgen/dremel/testcases/repetition/repetition.go similarity index 91% rename from internal/dremel/testcases/repetition/repetition.go rename to cmd/parquetgen/dremel/testcases/repetition/repetition.go index f17bdf1..b400aee 100644 --- a/internal/dremel/testcases/repetition/repetition.go +++ b/cmd/parquetgen/dremel/testcases/repetition/repetition.go @@ -14,6 +14,7 @@ type ( Language struct { Codes []string `parquet:"code"` + URL *string `parquet:"url"` Countries []string `parquet:"countries"` } ) diff --git a/internal/dremel/write_optional.go b/cmd/parquetgen/dremel/write_optional.go similarity index 100% rename from internal/dremel/write_optional.go rename to cmd/parquetgen/dremel/write_optional.go diff --git a/internal/dremel/write_repeated.go b/cmd/parquetgen/dremel/write_repeated.go similarity index 99% rename from internal/dremel/write_repeated.go rename to cmd/parquetgen/dremel/write_repeated.go index 096a98c..eba74c4 100644 --- a/internal/dremel/write_repeated.go +++ b/cmd/parquetgen/dremel/write_repeated.go @@ -110,8 +110,7 @@ func writeRepeated(f fields.Field) string { var buf bytes.Buffer if err := writeRepeatedTpl.Execute(&buf, wi); err != nil { - fmt.Println(err) - return "" + log.Fatal(err) } return string(buf.Bytes()) } diff --git a/internal/dremel/write_test.go b/cmd/parquetgen/dremel/write_test.go similarity index 99% rename from internal/dremel/write_test.go rename to cmd/parquetgen/dremel/write_test.go index 6b4b0f6..3704ff9 100644 --- a/internal/dremel/write_test.go +++ b/cmd/parquetgen/dremel/write_test.go @@ -5,7 +5,7 @@ import ( "go/format" "testing" - "github.com/parsyl/parquet/internal/dremel" + "github.com/parsyl/parquet/cmd/parquetgen/dremel" "github.com/parsyl/parquet/internal/fields" "github.com/stretchr/testify/assert" ) @@ -778,7 +778,6 @@ func TestWrite(t *testing.T) { flds := fields.Field{Type: ty, Children: []fields.Field{tc.field}}.Fields() f := flds[len(flds)-1] s := dremel.Write(f) - //fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.result, string(gocode)) diff --git a/internal/gen/funcs.go b/cmd/parquetgen/gen/funcs.go similarity index 95% rename from internal/gen/funcs.go rename to cmd/parquetgen/gen/funcs.go index 9d0660e..03b3fbf 100644 --- a/internal/gen/funcs.go +++ b/cmd/parquetgen/gen/funcs.go @@ -5,8 +5,8 @@ import ( "strings" "text/template" - "github.com/parsyl/parquet/internal/cases" - "github.com/parsyl/parquet/internal/dremel" + "github.com/parsyl/parquet/cmd/parquetgen/cases" + "github.com/parsyl/parquet/cmd/parquetgen/dremel" "github.com/parsyl/parquet/internal/fields" ) diff --git a/internal/gen/gen.go b/cmd/parquetgen/gen/gen.go similarity index 94% rename from internal/gen/gen.go rename to cmd/parquetgen/gen/gen.go index 0bd5b21..0cf754a 100644 --- a/internal/gen/gen.go +++ b/cmd/parquetgen/gen/gen.go @@ -8,9 +8,9 @@ import ( "text/template" "github.com/parsyl/parquet" + "github.com/parsyl/parquet/cmd/parquetgen/parse" + "github.com/parsyl/parquet/cmd/parquetgen/structs" "github.com/parsyl/parquet/internal/fields" - "github.com/parsyl/parquet/internal/parse" - "github.com/parsyl/parquet/internal/structs" sch "github.com/parsyl/parquet/schema" ) @@ -176,7 +176,6 @@ func getFieldType(se *sch.SchemaElement) (string, error) { } func dedupe(flds []fields.Field) []fields.Field { - fmt.Printf("deduping before: %+v\n", flds) seen := map[string]bool{} out := make([]fields.Field, 0, len(flds)) for _, f := range flds { @@ -186,11 +185,7 @@ func dedupe(flds []fields.Field) []fields.Field { seen[f.FieldType()] = true } } - fmt.Println("deduping", out) - for _, f := range out { - fmt.Println("cat", f.FieldType()) - } return out } diff --git a/internal/gen/template.go b/cmd/parquetgen/gen/template.go similarity index 100% rename from internal/gen/template.go rename to cmd/parquetgen/gen/template.go diff --git a/internal/gen/template_bool.go b/cmd/parquetgen/gen/template_bool.go similarity index 100% rename from internal/gen/template_bool.go rename to cmd/parquetgen/gen/template_bool.go diff --git a/internal/gen/template_bool_optional.go b/cmd/parquetgen/gen/template_bool_optional.go similarity index 100% rename from internal/gen/template_bool_optional.go rename to cmd/parquetgen/gen/template_bool_optional.go diff --git a/internal/gen/template_optional.go b/cmd/parquetgen/gen/template_optional.go similarity index 100% rename from internal/gen/template_optional.go rename to cmd/parquetgen/gen/template_optional.go diff --git a/internal/gen/template_required.go b/cmd/parquetgen/gen/template_required.go similarity index 100% rename from internal/gen/template_required.go rename to cmd/parquetgen/gen/template_required.go diff --git a/internal/gen/template_string.go b/cmd/parquetgen/gen/template_string.go similarity index 100% rename from internal/gen/template_string.go rename to cmd/parquetgen/gen/template_string.go diff --git a/internal/gen/template_string_optional.go b/cmd/parquetgen/gen/template_string_optional.go similarity index 100% rename from internal/gen/template_string_optional.go rename to cmd/parquetgen/gen/template_string_optional.go diff --git a/internal/gen/template_struct.go b/cmd/parquetgen/gen/template_struct.go similarity index 100% rename from internal/gen/template_struct.go rename to cmd/parquetgen/gen/template_struct.go diff --git a/cmd/parquetgen/main.go b/cmd/parquetgen/main.go index 8865600..87668fe 100644 --- a/cmd/parquetgen/main.go +++ b/cmd/parquetgen/main.go @@ -8,7 +8,7 @@ import ( "os" "github.com/parsyl/parquet" - "github.com/parsyl/parquet/internal/gen" + "github.com/parsyl/parquet/cmd/parquetgen/gen" sch "github.com/parsyl/parquet/schema" ) diff --git a/internal/parse/fields_test.go b/cmd/parquetgen/parse/fields_test.go similarity index 99% rename from internal/parse/fields_test.go rename to cmd/parquetgen/parse/fields_test.go index 11dc7f4..fdb4854 100644 --- a/internal/parse/fields_test.go +++ b/cmd/parquetgen/parse/fields_test.go @@ -6,8 +6,8 @@ import ( "log" "testing" + "github.com/parsyl/parquet/cmd/parquetgen/parse" "github.com/parsyl/parquet/internal/fields" - "github.com/parsyl/parquet/internal/parse" sch "github.com/parsyl/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/internal/parse/parse.go b/cmd/parquetgen/parse/parse.go similarity index 100% rename from internal/parse/parse.go rename to cmd/parquetgen/parse/parse.go diff --git a/internal/parse/parse_test.go b/cmd/parquetgen/parse/parse_test.go similarity index 100% rename from internal/parse/parse_test.go rename to cmd/parquetgen/parse/parse_test.go diff --git a/internal/structs/structs.go b/cmd/parquetgen/structs/structs.go similarity index 100% rename from internal/structs/structs.go rename to cmd/parquetgen/structs/structs.go diff --git a/internal/structs/structs_test.go b/cmd/parquetgen/structs/structs_test.go similarity index 97% rename from internal/structs/structs_test.go rename to cmd/parquetgen/structs/structs_test.go index 5faaa71..277778c 100644 --- a/internal/structs/structs_test.go +++ b/cmd/parquetgen/structs/structs_test.go @@ -5,7 +5,7 @@ import ( "go/format" "testing" - "github.com/parsyl/parquet/internal/structs" + "github.com/parsyl/parquet/cmd/parquetgen/structs" sch "github.com/parsyl/parquet/schema" "github.com/stretchr/testify/assert" ) @@ -69,7 +69,7 @@ func TestStruct(t *testing.T) { gocode, err := format.Source([]byte(s)) assert.NoError(t, err) if !assert.Equal(t, tc.expected, string(gocode)) { - fmt.Println(string(gocode)) + t.Fatal(string(gocode)) } }) } diff --git a/internal/fields/fields.go b/internal/fields/fields.go index ddaded6..51e1cc8 100644 --- a/internal/fields/fields.go +++ b/internal/fields/fields.go @@ -206,7 +206,6 @@ func (r RepCase) Case() string { type RepCases []RepCase func (r RepCases) UseRepCase(f Field, def int) bool { - fmt.Println("use rep case", r, f.MaxRepForDef(def)) if f.Parent.IsRoot() { return false } @@ -217,7 +216,6 @@ func (r RepCases) UseRepCase(f Field, def int) bool { // RepCases returns a RepCase slice based on the field types and // what sub-fields have already been seen. func (f Field) RepCases(def int) RepCases { - fmt.Println("rep cases", def) mr := int(f.MaxRep()) var out []RepCase @@ -243,10 +241,7 @@ func (f Field) RepCases(def int) RepCases { rollup = append(rollup, reps) } - fmt.Println(rollup, fld.Defined, fld.Name, reps, defs, mr, def) - if len(rollup) > 0 && (!fld.Defined || (defs == def && fld.RepetitionType != Required)) { - fmt.Println("xxxxxxxxxxxxx", rollup) out = append(out, RepCase{Reps: rollup[:], Rep: max(rollup), Repeated: reps > 0}) rollup = []int{} } diff --git a/internal/fields/fields_test.go b/internal/fields/fields_test.go index 11f349a..42c0a75 100644 --- a/internal/fields/fields_test.go +++ b/internal/fields/fields_test.go @@ -783,7 +783,6 @@ func TestInit(t *testing.T) { fields := fields.Field{Children: tc.fields}.Fields() field := fields[len(fields)-1] s := field.Init(tc.def, tc.rep) - //fmt.Println(s) gocode, err := format.Source([]byte(s)) assert.NoError(t, err) assert.Equal(t, tc.expected, string(gocode)) From 0d5190f576867a0537ca7f70971681aaa5539135 Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Fri, 18 Jun 2021 10:40:35 -0600 Subject: [PATCH 24/25] moved fields to cmd/parquetgen --- cmd/parquetgen/dremel/dremel.go | 2 +- cmd/parquetgen/dremel/read.go | 2 +- cmd/parquetgen/dremel/read_repeated.go | 2 +- cmd/parquetgen/dremel/read_test.go | 2 +- cmd/parquetgen/dremel/write_optional.go | 2 +- cmd/parquetgen/dremel/write_repeated.go | 2 +- cmd/parquetgen/dremel/write_test.go | 2 +- {internal => cmd/parquetgen}/fields/fields.go | 0 .../parquetgen}/fields/fields_test.go | 2 +- .../parquetgen}/fields/repetition.go | 0 .../parquetgen}/fields/templates.go | 0 cmd/parquetgen/gen/funcs.go | 2 +- cmd/parquetgen/gen/gen.go | 2 +- cmd/parquetgen/parse/fields_test.go | 2 +- cmd/parquetgen/parse/parse.go | 4 +- fields.go | 44 ++++++++++++++++--- 16 files changed, 52 insertions(+), 18 deletions(-) rename {internal => cmd/parquetgen}/fields/fields.go (100%) rename {internal => cmd/parquetgen}/fields/fields_test.go (99%) rename {internal => cmd/parquetgen}/fields/repetition.go (100%) rename {internal => cmd/parquetgen}/fields/templates.go (100%) diff --git a/cmd/parquetgen/dremel/dremel.go b/cmd/parquetgen/dremel/dremel.go index 38de5cb..69f0bf3 100644 --- a/cmd/parquetgen/dremel/dremel.go +++ b/cmd/parquetgen/dremel/dremel.go @@ -4,7 +4,7 @@ import ( "fmt" "strings" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) // Package dremel generates code that parquetgen diff --git a/cmd/parquetgen/dremel/read.go b/cmd/parquetgen/dremel/read.go index f75e15e..b461b44 100644 --- a/cmd/parquetgen/dremel/read.go +++ b/cmd/parquetgen/dremel/read.go @@ -4,7 +4,7 @@ import ( "fmt" "strings" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) func readRequired(f fields.Field) string { diff --git a/cmd/parquetgen/dremel/read_repeated.go b/cmd/parquetgen/dremel/read_repeated.go index 33e1ecd..ccd77f3 100644 --- a/cmd/parquetgen/dremel/read_repeated.go +++ b/cmd/parquetgen/dremel/read_repeated.go @@ -7,7 +7,7 @@ import ( "strings" "text/template" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) func init() { diff --git a/cmd/parquetgen/dremel/read_test.go b/cmd/parquetgen/dremel/read_test.go index 5b1f528..bcfd710 100644 --- a/cmd/parquetgen/dremel/read_test.go +++ b/cmd/parquetgen/dremel/read_test.go @@ -6,7 +6,7 @@ import ( "testing" "github.com/parsyl/parquet/cmd/parquetgen/dremel" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" "github.com/stretchr/testify/assert" ) diff --git a/cmd/parquetgen/dremel/write_optional.go b/cmd/parquetgen/dremel/write_optional.go index 244629e..04a9b12 100644 --- a/cmd/parquetgen/dremel/write_optional.go +++ b/cmd/parquetgen/dremel/write_optional.go @@ -6,7 +6,7 @@ import ( "strings" "text/template" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) func init() { diff --git a/cmd/parquetgen/dremel/write_repeated.go b/cmd/parquetgen/dremel/write_repeated.go index eba74c4..de0935a 100644 --- a/cmd/parquetgen/dremel/write_repeated.go +++ b/cmd/parquetgen/dremel/write_repeated.go @@ -7,7 +7,7 @@ import ( "strings" "text/template" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) var ( diff --git a/cmd/parquetgen/dremel/write_test.go b/cmd/parquetgen/dremel/write_test.go index 3704ff9..137f168 100644 --- a/cmd/parquetgen/dremel/write_test.go +++ b/cmd/parquetgen/dremel/write_test.go @@ -6,7 +6,7 @@ import ( "testing" "github.com/parsyl/parquet/cmd/parquetgen/dremel" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" "github.com/stretchr/testify/assert" ) diff --git a/internal/fields/fields.go b/cmd/parquetgen/fields/fields.go similarity index 100% rename from internal/fields/fields.go rename to cmd/parquetgen/fields/fields.go diff --git a/internal/fields/fields_test.go b/cmd/parquetgen/fields/fields_test.go similarity index 99% rename from internal/fields/fields_test.go rename to cmd/parquetgen/fields/fields_test.go index 42c0a75..b44a617 100644 --- a/internal/fields/fields_test.go +++ b/cmd/parquetgen/fields/fields_test.go @@ -5,7 +5,7 @@ import ( "go/format" "testing" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" "github.com/stretchr/testify/assert" ) diff --git a/internal/fields/repetition.go b/cmd/parquetgen/fields/repetition.go similarity index 100% rename from internal/fields/repetition.go rename to cmd/parquetgen/fields/repetition.go diff --git a/internal/fields/templates.go b/cmd/parquetgen/fields/templates.go similarity index 100% rename from internal/fields/templates.go rename to cmd/parquetgen/fields/templates.go diff --git a/cmd/parquetgen/gen/funcs.go b/cmd/parquetgen/gen/funcs.go index 03b3fbf..698ed77 100644 --- a/cmd/parquetgen/gen/funcs.go +++ b/cmd/parquetgen/gen/funcs.go @@ -7,7 +7,7 @@ import ( "github.com/parsyl/parquet/cmd/parquetgen/cases" "github.com/parsyl/parquet/cmd/parquetgen/dremel" - "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" ) var ( diff --git a/cmd/parquetgen/gen/gen.go b/cmd/parquetgen/gen/gen.go index 0cf754a..94def56 100644 --- a/cmd/parquetgen/gen/gen.go +++ b/cmd/parquetgen/gen/gen.go @@ -8,9 +8,9 @@ import ( "text/template" "github.com/parsyl/parquet" + "github.com/parsyl/parquet/cmd/parquetgen/fields" "github.com/parsyl/parquet/cmd/parquetgen/parse" "github.com/parsyl/parquet/cmd/parquetgen/structs" - "github.com/parsyl/parquet/internal/fields" sch "github.com/parsyl/parquet/schema" ) diff --git a/cmd/parquetgen/parse/fields_test.go b/cmd/parquetgen/parse/fields_test.go index fdb4854..04419d8 100644 --- a/cmd/parquetgen/parse/fields_test.go +++ b/cmd/parquetgen/parse/fields_test.go @@ -6,8 +6,8 @@ import ( "log" "testing" + "github.com/parsyl/parquet/cmd/parquetgen/fields" "github.com/parsyl/parquet/cmd/parquetgen/parse" - "github.com/parsyl/parquet/internal/fields" sch "github.com/parsyl/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/cmd/parquetgen/parse/parse.go b/cmd/parquetgen/parse/parse.go index 2243adf..056c366 100644 --- a/cmd/parquetgen/parse/parse.go +++ b/cmd/parquetgen/parse/parse.go @@ -9,8 +9,8 @@ import ( "go/ast" - "github.com/parsyl/parquet/internal/fields" - flds "github.com/parsyl/parquet/internal/fields" + "github.com/parsyl/parquet/cmd/parquetgen/fields" + flds "github.com/parsyl/parquet/cmd/parquetgen/fields" ) const letters = "abcdefghijklmnopqrstuvwxyz" diff --git a/fields.go b/fields.go index a78b037..dfcb498 100644 --- a/fields.go +++ b/fields.go @@ -11,11 +11,45 @@ import ( "io" "github.com/golang/snappy" - "github.com/parsyl/parquet/internal/fields" "github.com/parsyl/parquet/internal/rle" sch "github.com/parsyl/parquet/schema" ) +// RepetitionType is an enum of the possible +// parquet repetition types +type RepetitionType int + +const ( + Unseen RepetitionType = -1 + Required RepetitionType = 0 + Optional RepetitionType = 1 + Repeated RepetitionType = 2 +) + +type RepetitionTypes []RepetitionType + +// MaxDef returns the largest definition level +func (r RepetitionTypes) MaxDef() uint8 { + var out uint8 + for _, rt := range r { + if rt == Optional || rt == Repeated { + out++ + } + } + return out +} + +// MaxRep returns the largest repetition level +func (r RepetitionTypes) MaxRep() uint8 { + var out uint8 + for _, rt := range r { + if rt == Repeated { + out++ + } + } + return out +} + // RequiredField writes the raw data for required columns type RequiredField struct { pth []string @@ -121,12 +155,12 @@ type OptionalField struct { repeated bool } -func getRepetitionTypes(in []int) fields.RepetitionTypes { - out := make([]fields.RepetitionType, len(in)) +func getRepetitionTypes(in []int) RepetitionTypes { + out := make([]RepetitionType, len(in)) for i, x := range in { - out[i] = fields.RepetitionType(x) + out[i] = RepetitionType(x) } - return fields.RepetitionTypes(out) + return RepetitionTypes(out) } // NewOptionalField creates an optional field From 49a41b2e1c7c48d129ea91979f2402514430baee Mon Sep 17 00:00:00 2001 From: Craig Swank Date: Fri, 18 Jun 2021 10:44:36 -0600 Subject: [PATCH 25/25] rename examples to _examples --- .gitignore | 10 +++++----- {examples => _examples}/people/README.md | 0 {examples => _examples}/people/main.go | 0 {examples => _examples}/people/people.go | 0 {examples => _examples}/via_parquet/README.md | 0 {examples => _examples}/via_parquet/main.go | 0 {examples => _examples}/via_parquet/people.parquet | Bin 7 files changed, 5 insertions(+), 5 deletions(-) rename {examples => _examples}/people/README.md (100%) rename {examples => _examples}/people/main.go (100%) rename {examples => _examples}/people/people.go (100%) rename {examples => _examples}/via_parquet/README.md (100%) rename {examples => _examples}/via_parquet/main.go (100%) rename {examples => _examples}/via_parquet/people.parquet (100%) diff --git a/.gitignore b/.gitignore index 4ca7abc..a1f5f0d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,8 +14,8 @@ *.out -examples/people/parquet.go -examples/people/people.parquet -examples/people/read.py -examples/via_parquet/generated_struct.go -examples/via_parquet/parquet.go \ No newline at end of file +_examples/people/parquet.go +_examples/people/people.parquet +_examples/people/read.py +_examples/via_parquet/generated_struct.go +_examples/via_parquet/parquet.go \ No newline at end of file diff --git a/examples/people/README.md b/_examples/people/README.md similarity index 100% rename from examples/people/README.md rename to _examples/people/README.md diff --git a/examples/people/main.go b/_examples/people/main.go similarity index 100% rename from examples/people/main.go rename to _examples/people/main.go diff --git a/examples/people/people.go b/_examples/people/people.go similarity index 100% rename from examples/people/people.go rename to _examples/people/people.go diff --git a/examples/via_parquet/README.md b/_examples/via_parquet/README.md similarity index 100% rename from examples/via_parquet/README.md rename to _examples/via_parquet/README.md diff --git a/examples/via_parquet/main.go b/_examples/via_parquet/main.go similarity index 100% rename from examples/via_parquet/main.go rename to _examples/via_parquet/main.go diff --git a/examples/via_parquet/people.parquet b/_examples/via_parquet/people.parquet similarity index 100% rename from examples/via_parquet/people.parquet rename to _examples/via_parquet/people.parquet