Skip to content

Commit

Permalink
exec: initial commit of execgen tool
Browse files Browse the repository at this point in the history
Execgen will be our tool for generating templated code necessary for
columnarized execution. So far it only generates the
EncDatumRowsToColVec function, which is used by the columnarizer to
convert a RowSource into a columnarized Operator.

Release note: None
  • Loading branch information
solongordon committed Oct 22, 2018
1 parent 2998190 commit c106c32
Show file tree
Hide file tree
Showing 9 changed files with 459 additions and 117 deletions.
16 changes: 12 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,8 @@ PROTOBUF_TARGETS := bin/.go_protobuf_sources bin/.gw_protobuf_sources bin/.cpp_p

DOCGEN_TARGETS := bin/.docgen_bnfs bin/.docgen_functions

EXECGEN_TARGETS = pkg/sql/exec/rowstovec.og.go

OPTGEN_TARGETS = \
pkg/sql/opt/memo/expr.og.go \
pkg/sql/opt/operator.og.go \
Expand Down Expand Up @@ -735,7 +737,7 @@ BUILDINFO = .buildinfo/tag .buildinfo/rev
BUILD_TAGGED_RELEASE =

$(go-targets): bin/.bootstrap $(BUILDINFO) $(CGO_FLAGS_FILES) $(PROTOBUF_TARGETS)
$(go-targets): $(SQLPARSER_TARGETS) $(OPTGEN_TARGETS)
$(go-targets): $(SQLPARSER_TARGETS) $(EXECGEN_TARGETS) $(OPTGEN_TARGETS)
$(go-targets): override LINKFLAGS += \
-X "github.com/cockroachdb/cockroach/pkg/build.tag=$(shell cat .buildinfo/tag)" \
-X "github.com/cockroachdb/cockroach/pkg/build.rev=$(shell cat .buildinfo/rev)" \
Expand Down Expand Up @@ -914,7 +916,7 @@ dupl: bin/.bootstrap

.PHONY: generate
generate: ## Regenerate generated code.
generate: protobuf $(DOCGEN_TARGETS) $(OPTGEN_TARGETS) $(SQLPARSER_TARGETS) $(SETTINGS_DOC_PAGE) bin/langgen
generate: protobuf $(DOCGEN_TARGETS) $(EXECGEN_TARGETS) $(OPTGEN_TARGETS) $(SQLPARSER_TARGETS) $(SETTINGS_DOC_PAGE) bin/langgen
$(GO) generate $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LINKFLAGS)' $(PKG)

.PHONY: lint
Expand Down Expand Up @@ -962,6 +964,7 @@ $(ARCHIVE): $(ARCHIVE).tmp
ARCHIVE_EXTRAS = \
$(BUILDINFO) \
$(SQLPARSER_TARGETS) \
$(EXECGEN_TARGETS) \
$(OPTGEN_TARGETS) \
pkg/ui/distccl/bindata.go pkg/ui/distoss/bindata.go

Expand Down Expand Up @@ -1305,6 +1308,9 @@ settings-doc-gen := $(if $(filter buildshort,$(MAKECMDGOALS)),$(COCKROACHSHORT),
$(SETTINGS_DOC_PAGE): $(settings-doc-gen)
@$(settings-doc-gen) gen settings-list --format=html > $@

pkg/sql/exec/%.og.go: bin/execgen
execgen $@

optgen-defs := pkg/sql/opt/ops/*.opt
optgen-norm-rules := pkg/sql/opt/norm/rules/*.opt
optgen-xform-rules := pkg/sql/opt/xform/rules/*.opt
Expand Down Expand Up @@ -1362,7 +1368,7 @@ clean: clean-c-deps
.PHONY: maintainer-clean
maintainer-clean: ## Like clean, but also remove some auto-generated source code.
maintainer-clean: clean ui-maintainer-clean
rm -f $(SQLPARSER_TARGETS) $(OPTGEN_TARGETS) $(UI_PROTOS_OSS) $(UI_PROTOS_CCL)
rm -f $(SQLPARSER_TARGETS) $(EXECGEN_TARGETS) $(OPTGEN_TARGETS) $(UI_PROTOS_OSS) $(UI_PROTOS_CCL)

.PHONY: unsafe-clean
unsafe-clean: ## Like maintainer-clean, but also remove ALL untracked/ignored files.
Expand All @@ -1380,6 +1386,7 @@ bins = \
bin/cockroach-oss \
bin/cockroach-short \
bin/docgen \
bin/execgen \
bin/generate-binary \
bin/github-post \
bin/github-pull-request-make \
Expand All @@ -1403,6 +1410,7 @@ testbins = \
bin/logictestccl

# Mappings for binaries that don't live in pkg/cmd.
execgen-package = ./pkg/sql/exec/execgen/cmd/execgen
langgen-package = ./pkg/sql/opt/optgen/cmd/langgen
optgen-package = ./pkg/sql/opt/optgen/cmd/optgen
logictest-package = ./pkg/sql/logictest
Expand All @@ -1414,7 +1422,7 @@ logictest-bins := bin/logictest bin/logictestopt bin/logictestccl
# Additional dependencies for binaries that depend on generated code.
#
# TODO(benesch): Derive this automatically. This is getting out of hand.
bin/workload bin/docgen bin/roachtest $(logictest-bins): $(SQLPARSER_TARGETS) $(PROTOBUF_TARGETS)
bin/workload bin/docgen bin/execgen bin/roachtest $(logictest-bins): $(SQLPARSER_TARGETS) $(PROTOBUF_TARGETS)
bin/workload bin/roachtest $(logictest-bins): $(C_LIBS_CCL) $(CGO_FLAGS_FILES)
bin/roachtest bin/logictestopt: $(OPTGEN_TARGETS)

Expand Down
2 changes: 2 additions & 0 deletions build/variables.mk
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ define VALID_VARS
C_LIBS_OSS
DOCGEN_TARGETS
DUPLFLAGS
EXECGEN_TARGETS
EXTRA_XCMAKE_FLAGS
EXTRA_XCONFIGURE_FLAGS
FILES
Expand Down Expand Up @@ -156,6 +157,7 @@ define VALID_VARS
cmake-flags
configure-flags
cyan
execgen-package
go-targets
go-targets-ccl
have-defs
Expand Down
116 changes: 3 additions & 113 deletions pkg/sql/distsqlrun/columnarizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,9 @@
package distsqlrun

import (
"fmt"

"github.com/cockroachdb/cockroach/pkg/sql/exec"
"github.com/cockroachdb/cockroach/pkg/sql/exec/types"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
)

// columnarizer turns a RowSource input into an exec.Operator output, by reading
Expand Down Expand Up @@ -91,115 +87,9 @@ func (c *columnarizer) Next() exec.ColBatch {

// Write each column into the output batch.
for idx, ct := range columnTypes {
vec := c.batch.ColVec(idx)
switch ct.SemanticType {
// TODO(solon): these should be autogenerated from a template.
case sqlbase.ColumnType_BOOL:
col := vec.Bool()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col.Set(i, bool(*ed.Datum.(*tree.DBool)))
}
case sqlbase.ColumnType_INT:
switch ct.Width {
case 8:
col := vec.Int8()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = int8(*ed.Datum.(*tree.DInt))
}
case 16:
col := vec.Int16()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = int16(*ed.Datum.(*tree.DInt))
}
case 32:
col := vec.Int32()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = int32(*ed.Datum.(*tree.DInt))
}
case 0, 64:
col := vec.Int64()
for i := uint16(0); i < nRows; i++ {
if c.buffered[i][idx].Datum == nil {
if err := c.buffered[i][idx].EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
}
if c.buffered[i][idx].Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = int64(*c.buffered[i][idx].Datum.(*tree.DInt))
}
default:
panic(fmt.Sprintf("integer with unknown width %d", ct.Width))
}
case sqlbase.ColumnType_FLOAT:
col := vec.Float64()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = float64(*ed.Datum.(*tree.DFloat))
}
case sqlbase.ColumnType_BYTES:
col := vec.Bytes()
for i := uint16(0); i < nRows; i++ {
if c.buffered[i][idx].Datum == nil {
if err := c.buffered[i][idx].EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
}
if c.buffered[i][idx].Datum == tree.DNull {
vec.SetNull(i)
}
col.Set(i, encoding.UnsafeConvertStringToBytes(string(*c.buffered[i][idx].Datum.(*tree.DBytes))))
}
case sqlbase.ColumnType_STRING:
col := vec.Bytes()
for i := uint16(0); i < nRows; i++ {
if c.buffered[i][idx].Datum == nil {
if err := c.buffered[i][idx].EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
}
if c.buffered[i][idx].Datum == tree.DNull {
vec.SetNull(i)
}
col.Set(i, encoding.UnsafeConvertStringToBytes(string(*c.buffered[i][idx].Datum.(*tree.DString))))
}
default:
panic(fmt.Sprintf("Unsupported column type %s", ct.SQLString()))
err := exec.EncDatumRowsToColVec(c.buffered[:nRows], c.batch.ColVec(idx), idx, &ct, &c.da)
if err != nil {
panic(err)
}
}
return c.batch
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/exec/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.og.go
160 changes: 160 additions & 0 deletions pkg/sql/exec/execgen/cmd/execgen/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Copyright 2018 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package main

import (
"bytes"
"flag"
"fmt"
"go/format"
"io"
"os"
"path/filepath"

"github.com/pkg/errors"
)

var (
errInvalidArgCount = errors.New("invalid number of arguments")
)

func main() {
gen := execgen{stdErr: os.Stderr}
if !gen.run(os.Args[1:]...) {
os.Exit(2)
}
}

type execgen struct {
// useGoFmt runs the go fmt tool on code generated by execgen, if this setting
// is true.
useGoFmt bool

// stdErr is the writer to which all standard error output will be redirected.
stdErr io.Writer

// cmdLine stores the set of flags used to invoke the Execgen tool.
cmdLine *flag.FlagSet
}

type generator func(io.Writer) error

var generators = make(map[string]generator)

func registerGenerator(g generator, filename string) {
if _, ok := generators[filename]; ok {
panic(fmt.Sprintf("%s generator already registered", filename))
}
generators[filename] = g
}

func (g *execgen) run(args ...string) bool {
// Parse command line.
g.cmdLine = flag.NewFlagSet("execgen", flag.ContinueOnError)
g.cmdLine.SetOutput(g.stdErr)
g.cmdLine.Usage = g.usage
g.cmdLine.BoolVar(&g.useGoFmt, "useGoFmt", true, "run go fmt on generated code")
err := g.cmdLine.Parse(args)
if err != nil {
return false
}

// Get remaining args after any flags have been parsed.
args = g.cmdLine.Args()
if len(args) < 1 {
g.cmdLine.Usage()
g.reportError(errInvalidArgCount)
return false
}

for _, out := range args {
_, file := filepath.Split(out)
gen := generators[file]
if gen == nil {
g.reportError(errors.Errorf("unrecognized filename: %s", file))
return false
}
if err := g.generate(gen, out); err != nil {
g.reportError(err)
return false
}
}

return true
}

func (g *execgen) generate(genFunc generator, out string) error {
var buf bytes.Buffer
buf.WriteString("// Code generated by execgen; DO NOT EDIT.\n")

err := genFunc(&buf)
if err != nil {
return err
}

var b []byte
if g.useGoFmt {

b, err = format.Source(buf.Bytes())
if err != nil {
// Write out incorrect source for easier debugging.
b = buf.Bytes()
err = errors.Wrap(err, "Code formatting failed with Go parse error")
}
} else {
b = buf.Bytes()
}

if err != nil {
// Ignore any write error if another error already occurred.
_ = g.writeOutputFile(b, out)
return err
}
return g.writeOutputFile(b, out)
}

func (g *execgen) writeOutputFile(b []byte, out string) error {
file, err := os.Create(out)
if err != nil {
return err
}
defer file.Close()

_, err = file.Write(b)
return err
}

// usage is a replacement usage function for the flags package.
func (g *execgen) usage() {
fmt.Fprintf(g.stdErr, "Execgen is a tool for generating templated code related to ")
fmt.Fprintf(g.stdErr, "columnarized execution.\n\n")

fmt.Fprintf(g.stdErr, "Usage:\n")
fmt.Fprintf(g.stdErr, "\texecgen [path]...\n\n")

fmt.Fprintf(g.stdErr, "Supported filenames are:\n")
for filename := range generators {
fmt.Fprintf(g.stdErr, "\t%s\n", filename)
}
fmt.Fprintf(g.stdErr, "\n")

fmt.Fprintf(g.stdErr, "Flags:\n")
g.cmdLine.PrintDefaults()
fmt.Fprintf(g.stdErr, "\n")
}

func (g *execgen) reportError(err error) {
fmt.Fprintf(g.stdErr, "ERROR: %v\n", err)
}
Loading

0 comments on commit c106c32

Please sign in to comment.