Skip to content

Commit

Permalink
prep work for joins (aliases, namespaces)
Browse files Browse the repository at this point in the history
There's now a way to alias both columns and datasets, support for
namespaces (foo.bar) and a few other goodies we'll need for joins.

Joins are not in, yet, and the namespace information is ignored, but at
least the structures and parsers are in place.
  • Loading branch information
kokes committed Aug 25, 2021
1 parent 9269386 commit 831ad93
Show file tree
Hide file tree
Showing 12 changed files with 277 additions and 138 deletions.
49 changes: 27 additions & 22 deletions src/database/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,21 +259,6 @@ type Dataset struct {
Stripes []Stripe `json:"stripes"`
}

// DatasetIdentifier contains fields needed for a dataset/version lookup
type DatasetIdentifier struct {
Name string
Version UID
// Latest can be used to avoid using Version (e.g. if it's unknown)
Latest bool
}

func (did DatasetIdentifier) String() string {
if did.Latest {
return did.Name
}
return fmt.Sprintf("%s@v%s", did.Name, did.Version)
}

// NewDataset creates a new empty dataset
func NewDataset(name string) *Dataset {
// ARCH: we don't give the user a choice - the name will get modified if it doesn't
Expand Down Expand Up @@ -303,25 +288,45 @@ func (db *Database) stripePath(ds *Dataset, stripe Stripe) string {
// OPTIM: not efficient in this implementation, but we don't have a map-like structure
// to store our datasets - we keep them in a slice, so that we have predictable order
// -> we need a sorted map
func (db *Database) GetDataset(did *DatasetIdentifier) (*Dataset, error) {
func (db *Database) GetDatasetByVersion(name, version string) (*Dataset, error) {
var found *Dataset
for _, dataset := range db.Datasets {
if dataset.Name != did.Name {
if dataset.Name != name {
continue
}
if did.Latest && (found == nil || dataset.Created > found.Created) {
found = dataset
}
if !did.Latest && dataset.ID == did.Version {
if dataset.ID.String() == version {
return dataset, nil
}
}
if found == nil {
return nil, fmt.Errorf("dataset %v not found: %w", did.Name, errDatasetNotFound)
return nil, fmt.Errorf("dataset %v@v%v not found: %w", name, version, errDatasetNotFound)
}
return found, nil
}

func (db *Database) GetDatasetLatest(name string) (*Dataset, error) {
var found *Dataset
for _, dataset := range db.Datasets {
if dataset.Name != name {
continue
}
if found == nil || dataset.Created > found.Created {
found = dataset
}
}
if found == nil {
return nil, fmt.Errorf("dataset %v not found: %w", name, errDatasetNotFound)
}
return found, nil
}

func (db *Database) GetDataset(name, version string, latest bool) (*Dataset, error) {
if latest {
return db.GetDatasetLatest(name)
}
return db.GetDatasetByVersion(name, version)
}

// AddDataset adds a Dataset to a Database
// this is a pretty rare event, so we don't expect much contention
// it's just to avoid some issues when marshaling the object around in the API etc.
Expand Down
12 changes: 6 additions & 6 deletions src/database/dataset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func TestAddingDatasets(t *testing.T) {
t.Fatal(err)
}

ds2, err := db.GetDataset(&DatasetIdentifier{Name: ds.Name, Latest: true})
ds2, err := db.GetDatasetLatest(ds.Name)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -162,7 +162,7 @@ func TestAddingDatasetsWithVersions(t *testing.T) {
}
last := dss[len(dss)-1]

ds, err := db.GetDataset(&DatasetIdentifier{Name: last.Name, Latest: true})
ds, err := db.GetDatasetLatest(last.Name)
if err != nil {
t.Fatal(err)
}
Expand All @@ -171,7 +171,7 @@ func TestAddingDatasetsWithVersions(t *testing.T) {
}

for _, ds := range dss {
rds, err := db.GetDataset(&DatasetIdentifier{Name: ds.Name, Version: ds.ID})
rds, err := db.GetDatasetByVersion(ds.Name, ds.ID.String())
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -202,7 +202,7 @@ func TestAddingDatasetsWithRestarts(t *testing.T) {
t.Fatal(err)
}

ds2, err := db2.GetDataset(&DatasetIdentifier{Name: ds.Name, Latest: true})
ds2, err := db2.GetDatasetLatest(ds.Name)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -246,7 +246,7 @@ func TestRemovingDatasets(t *testing.T) {
t.Fatal(err)
}

_, err = db.GetDataset(&DatasetIdentifier{Name: ds.Name, Latest: true})
_, err = db.GetDatasetLatest(ds.Name)
if !errors.Is(err, errDatasetNotFound) {
t.Error("should not be able to retrieve a deleted dataset")
}
Expand Down Expand Up @@ -280,7 +280,7 @@ func TestGettingNewDatasets(t *testing.T) {
if err := db.AddDataset(ds); err != nil {
t.Fatal(err)
}
ds2, err := db.GetDataset(&DatasetIdentifier{Name: ds.Name, Latest: true})
ds2, err := db.GetDatasetLatest(ds.Name)
if err != nil {
t.Fatal(err)
}
Expand Down
4 changes: 2 additions & 2 deletions src/query/expr/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ func Evaluate(expr Expression, chunkLength int, columnData map[string]column.Chu
return nil, fmt.Errorf("unknown prefix token: %v", node.operator)
}
case *Identifier:
lookupValue := node.name
lookupValue := node.Name
if !node.quoted {
lookupValue = strings.ToLower(lookupValue)
}
col, ok := columnData[lookupValue]
if !ok {
// we validated the expression, so this should not happen?
// perhaps to catch bugs in case folding?
return nil, fmt.Errorf("column %v not found", node.name)
return nil, fmt.Errorf("column %v not found", node.Name)
}
if filter != nil {
return col.Prune(filter), nil
Expand Down
10 changes: 7 additions & 3 deletions src/query/expr/expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"strings"

"github.com/kokes/smda/src/column"
"github.com/kokes/smda/src/database"
)

var errNoNestedAggregations = errors.New("cannot nest aggregations (e.g. sum(min(a)))")
Expand Down Expand Up @@ -40,7 +39,7 @@ func PruneFunctionCalls(ex Expression) {
// 4) The HTML/JS frontend needs to incorporate this in some way
type Query struct {
Select []Expression
Dataset *database.DatasetIdentifier
Dataset *Dataset
Filter Expression
Aggregate []Expression
Order []Expression
Expand All @@ -65,6 +64,9 @@ func (q Query) String() string {
// ARCH: preparing for queries without FROM clauses
if q.Dataset != nil {
sb.WriteString(fmt.Sprintf(" FROM %s", q.Dataset))
if q.Dataset.alias != nil {
sb.WriteString(fmt.Sprintf(" AS %v", q.Dataset.alias))
}
}
if q.Filter != nil {
sb.WriteString(fmt.Sprintf(" WHERE %s", q.Filter))
Expand Down Expand Up @@ -193,6 +195,8 @@ func HasIdentifiers(expr Expression) bool {
// ARCH: this panics when a given column is not in the schema, but since we already validated
// this schema during the ReturnType call, we should be fine. It's still a bit worrying that
// we might panic though.
// TODO(next)/TODO(joins): all the columnsUsed functions need to support multiple schemas and namespaces
// perhaps we should return []*Identifier, that would solve a few other issues as well
func ColumnsUsed(expr Expression, schema column.TableSchema) (cols []string) {
if idf, ok := expr.(*Identifier); ok {
var lookup func(string) (int, column.Schema, error)
Expand All @@ -201,7 +205,7 @@ func ColumnsUsed(expr Expression, schema column.TableSchema) (cols []string) {
lookup = schema.LocateColumn
}

_, col, err := lookup(idf.name)
_, col, err := lookup(idf.Name)
if err != nil {
panic(err)
}
Expand Down
81 changes: 57 additions & 24 deletions src/query/expr/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const (
ADDITION // +
PRODUCT // *
PREFIX // -X or NOT X
NAMESPACE // foo.bar
CALL // myFunction(X)
)

Expand All @@ -48,6 +49,7 @@ var precedences = map[tokenType]int{
tokenQuo: PRODUCT,
tokenMul: PRODUCT,
tokenLparen: CALL,
tokenDot: NAMESPACE,
}

type (
Expand Down Expand Up @@ -111,6 +113,7 @@ func NewParser(s string) (*Parser, error) {
tokenGt: p.parseInfixExpression,
tokenLte: p.parseInfixExpression,
tokenGte: p.parseInfixExpression,
tokenDot: p.parseInfixExpression,
tokenLparen: p.parseCallExpression,
}

Expand Down Expand Up @@ -148,7 +151,8 @@ func (p *Parser) peekPrecedence() int {
func (p *Parser) parseIdentifer() Expression {
val := p.curToken().value
val = bytes.ToLower(val) // unquoted identifiers are case insensitive, so we can lowercase them
return &Identifier{name: string(val)}
// ARCH: we should perhaps use NewIdentifier as well... for it to be unified (this way we enforce quoted: false, though)
return &Identifier{Name: string(val)}
}
func (p *Parser) parseIdentiferQuoted() Expression {
val := p.curToken().value
Expand Down Expand Up @@ -205,7 +209,7 @@ func (p *Parser) parseCallExpression(left Expression) Expression {
p.errors = append(p.errors, fmt.Errorf("%w: %v", errInvalidFunctionName, left.String()))
return nil
}
funName := id.name
funName := id.Name
var distinct bool

if p.peekToken().ttype == tokenDistinct {
Expand Down Expand Up @@ -285,6 +289,18 @@ func (p *Parser) parseInfixExpression(left Expression) Expression {
}

expr.right = p.parseExpression(precedence)

if expr.operator == tokenDot {
i1, ok1 := expr.left.(*Identifier)
i2, ok2 := expr.right.(*Identifier)
if !(ok1 && ok2) {
p.errors = append(p.errors, fmt.Errorf("namespace selector ('.') requires an identifier on both sides of it, got %v and %v", expr.left, expr.right))
return nil
}
i2.Namespace = i1
return i2
}

return expr
}

Expand Down Expand Up @@ -319,7 +335,7 @@ func (p *Parser) parseExpression(precedence int) Expression {
// `select * from foo` or `select *, foo from bar` etc.
if curToken.ttype == tokenMul && (p.peekToken().ttype == tokenEOF || p.peekToken().ttype == tokenComma || p.peekToken().ttype == tokenFrom) {
// ARCH: consider a custom type for this
return &Identifier{name: "*"}
return &Identifier{Name: "*"}
}

prefix := p.prefixParseFns[curToken.ttype]
Expand Down Expand Up @@ -368,25 +384,39 @@ func (p *Parser) Err() error {
return fmt.Errorf("encountered %v errors, first one being: %w", len(p.errors), p.errors[0])
}

func (p *Parser) parseRelabeling() (*Identifier, error) {
pt := p.peekToken().ttype
if !(pt == tokenAs || pt == tokenIdentifier || pt == tokenIdentifierQuoted) {
// ARCH: perhaps return nil, errNoRelabeling, which we can act upon (just continue)
return nil, nil
}
p.position++
if pt == tokenAs {
p.position++
}
// relabeling is an exception, we use a different Expression for that
target := p.parseExpression(LOWEST)
label, ok := target.(*Identifier)
if !ok {
return nil, errors.New("when relabeling (AS), the right side value has to be an identifier")
}
return label, nil
}

// parse expressions separated by commas
func (p *Parser) parseExpressions() ([]Expression, error) {
var ret []Expression
for {
expr := p.parseExpression(LOWEST)
pt := p.peekToken().ttype
if pt == tokenAs || pt == tokenIdentifier || pt == tokenIdentifierQuoted {
p.position++
if pt == tokenAs {
p.position++
}
// relabeling is an exception, we use a different Expression for that
target := p.parseExpression(LOWEST)
label, ok := target.(*Identifier)
if !ok {
return nil, errors.New("when relabeling (AS), the right side value has to be an identifier")
}
expr = &Relabel{inner: expr, Label: label.name}
label, err := p.parseRelabeling()
if err != nil {
return nil, err
}
if label != nil {
expr = &Relabel{inner: expr, Label: label.Name}
}
pt := p.peekToken().ttype
// TODO(next): move this equality checks into p.parseOrdering?
if pt == tokenAsc || pt == tokenDesc || pt == tokenNulls {
oexp, err := p.parseOrdering()
if err != nil {
Expand Down Expand Up @@ -497,11 +527,7 @@ func ParseQuerySQL(s string) (Query, error) {
if p.curToken().ttype != tokenIdentifier {
return q, fmt.Errorf("expecting dataset name, got %v", p.curToken())
}
datasetID := database.DatasetIdentifier{
Name: string(p.curToken().value),
Version: database.UID{},
Latest: true,
}
q.Dataset = &Dataset{Name: string(p.curToken().value), Latest: true}
if p.peekToken().ttype == tokenAt {
p.position += 2
if p.curToken().ttype != tokenIdentifier {
Expand All @@ -511,13 +537,20 @@ func ParseQuerySQL(s string) (Query, error) {
if len(dsn) == 0 || dsn[0] != 'v' {
return q, fmt.Errorf("invalid dataset version, got %s", dsn)
}
datasetID.Version, err = database.UIDFromHex(dsn[1:])
version, err := database.UIDFromHex(dsn[1:])
if err != nil {
return q, err
}
datasetID.Latest = false
q.Dataset.Version = version.String()
q.Dataset.Latest = false
}
label, err := p.parseRelabeling()
if err != nil {
return q, err
}
if label != nil {
q.Dataset.alias = label
}
q.Dataset = &datasetID

p.position++

Expand Down
Loading

0 comments on commit 831ad93

Please sign in to comment.