Skip to content

Commit

Permalink
feat(dev-exp): Add support for storage direct queries (#186)
Browse files Browse the repository at this point in the history
* add storage class

* add storage query tests

* add tests

* request test

* update CHANGES.rst

* remove extra files

* fix typing

* update repr
  • Loading branch information
enochtangg authored May 9, 2024
1 parent 6e3dc43 commit 79e090a
Show file tree
Hide file tree
Showing 16 changed files with 350 additions and 26 deletions.
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Changelog and versioning
==========================

2.0.34
------

### Various fixes & improvements

- feat: Add support for storage queries.

2.0.33
------

Expand Down
2 changes: 2 additions & 0 deletions snuba_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from snuba_sdk.column import Column
from snuba_sdk.conditions import And, BooleanCondition, BooleanOp, Condition, Op, Or
from snuba_sdk.entity import Entity
from snuba_sdk.storage import Storage
from snuba_sdk.expressions import Granularity, Limit, Offset, Totals
from snuba_sdk.formula import ArithmeticOperator, Formula
from snuba_sdk.function import CurriedFunction, Function, Identifier, Lambda
Expand Down Expand Up @@ -49,6 +50,7 @@
"Relationship",
"Request",
"Rollup",
"Storage",
"Timeseries",
"Totals",
]
11 changes: 6 additions & 5 deletions snuba_sdk/column.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import re
from dataclasses import dataclass, field
from typing import Optional
from typing import Optional, Union

from snuba_sdk.entity import Entity
from snuba_sdk.expressions import Expression, InvalidExpressionError
from snuba_sdk.storage import Storage


class InvalidColumnError(InvalidExpressionError):
Expand Down Expand Up @@ -63,12 +64,12 @@ def validate(self) -> None:
super().__setattr__("subscriptable", subscriptable)
super().__setattr__("key", key)

def validate_data_model(self, entity: Entity) -> None:
if entity.data_model is None:
def validate_data_model(self, match: Union[Entity, Storage]) -> None:
if match.data_model is None:
return

to_check = self.subscriptable if self.subscriptable else self.name
if not entity.data_model.contains(to_check):
if not match.data_model.contains(to_check):
raise InvalidColumnError(
f"entity '{entity.name}' does not support the column '{self.name}'"
f"'{match.name}' does not support the column '{self.name}'"
)
10 changes: 4 additions & 6 deletions snuba_sdk/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Optional

from snuba_sdk.expressions import Expression
from snuba_sdk.schema import EntityModel
from snuba_sdk.schema import DataModel

entity_name_re = re.compile(r"^[a-zA-Z_]+$")

Expand All @@ -17,7 +17,7 @@ class Entity(Expression):
name: str
alias: Optional[str] = None
sample: Optional[float] = None
data_model: Optional[EntityModel] = field(hash=False, default=None)
data_model: Optional[DataModel] = field(hash=False, default=None)

def validate(self) -> None:
# TODO: There should be a whitelist of entity names at some point
Expand All @@ -35,10 +35,8 @@ def validate(self) -> None:
raise InvalidEntityError(f"'{self.alias}' is not a valid alias")

if self.data_model is not None:
if not isinstance(self.data_model, EntityModel):
raise InvalidEntityError(
"data_model must be an instance of EntityModel"
)
if not isinstance(self.data_model, DataModel):
raise InvalidEntityError("data_model must be an instance of DataModel")

def __repr__(self) -> str:
alias = f", alias='{self.alias}'" if self.alias is not None else ""
Expand Down
15 changes: 10 additions & 5 deletions snuba_sdk/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from snuba_sdk.orderby import LimitBy, OrderBy
from snuba_sdk.query_visitors import InvalidQueryError, Printer, Validator
from snuba_sdk.relationships import Join
from snuba_sdk.storage import Storage

from snuba_sdk.query_optimizers.or_optimizer import OrOptimizer

Expand Down Expand Up @@ -64,7 +65,7 @@ class Query(BaseQuery):
"""

# These must be listed in the order that they must appear in the SnQL query.
match: Union[Entity, Join, Query]
match: Union[Entity, Storage, Join, Query]
select: Optional[Sequence[SelectableExpression]] = None
groupby: Optional[Sequence[SelectableExpression]] = None
array_join: Optional[Sequence[Column]] = None
Expand All @@ -85,8 +86,10 @@ def __post_init__(self) -> None:
right away since the select columns can be added later.
"""
if not isinstance(self.match, (Query, Join, Entity)):
raise InvalidQueryError("queries must have a valid Entity, Join or Query")
if not isinstance(self.match, (Query, Join, Entity, Storage)):
raise InvalidQueryError(
"queries must have a valid Entity, Storage, Join or Query"
)

if isinstance(self.match, Query):
try:
Expand All @@ -98,9 +101,11 @@ def _replace(self, field: str, value: Any) -> Query:
new = replace(self, **{field: value})
return new

def set_match(self, match: Union[Entity, Join, Query]) -> Query:
def set_match(self, match: Union[Entity, Storage, Join, Query]) -> Query:
if not isinstance(match, (Entity, Join, Query)):
raise InvalidQueryError(f"{match} must be a valid Entity, Join or Query")
raise InvalidQueryError(
f"{match} must be a valid Entity, Storage, Join or Query"
)
elif isinstance(match, Query):
try:
match.validate()
Expand Down
17 changes: 17 additions & 0 deletions snuba_sdk/query_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from snuba_sdk.function import CurriedFunction
from snuba_sdk.relationships import Join
from snuba_sdk.schema import Column as ColumnModel
from snuba_sdk.storage import Storage


class InvalidMatchError(Exception):
Expand All @@ -39,6 +40,8 @@ def validate_match(
_validate_subquery(query.match, all_columns)
elif isinstance(query.match, Join):
_validate_join(query.match, all_columns)
elif isinstance(query.match, Storage):
_validate_storage(query.match, all_columns)
else:
_validate_entity(query.match, all_columns)

Expand Down Expand Up @@ -92,6 +95,20 @@ def _validate_join(match: Join, all_columns: set[Expression]) -> None:
)


def _validate_storage(match: Storage, all_columns: set[Expression]) -> None:
"""
Perform the checks to validate the match storage:
Ensure that all the columns referenced in the query are in the data model for that storage.
:param match: The Storage of the query.
:param all_columns: All the columns referenced in the query.
"""
for column in all_columns:
assert isinstance(column, Column)
column.validate_data_model(match)


def _validate_entity(match: Entity, all_columns: set[Expression]) -> None:
"""
Perform the checks to validate the match entity:
Expand Down
7 changes: 4 additions & 3 deletions snuba_sdk/query_visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from snuba_sdk.query_validation import validate_match
from snuba_sdk.relationships import Join
from snuba_sdk.snuba import is_aggregation_function
from snuba_sdk.storage import Storage
from snuba_sdk.visitors import ExpressionFinder, Translation, entity_aliases


Expand Down Expand Up @@ -153,8 +154,8 @@ def _combine(self, query: main.Query, returns: Mapping[str, str]) -> str:

return formatted

def _visit_match(self, match: Union[Entity, Join, main.Query]) -> str:
if isinstance(match, (Entity, Join)):
def _visit_match(self, match: Union[Entity, Storage, Join, main.Query]) -> str:
if isinstance(match, (Entity, Storage, Join)):
return f"MATCH {self.translator.visit(match)}"

# We need a separate translator that can recurse through the subqueries
Expand Down Expand Up @@ -301,7 +302,7 @@ def _combine(self, query: main.Query, returns: Mapping[str, None]) -> None:
if query.totals and query.totals.totals and not query.groupby:
raise InvalidQueryError("totals is only valid with a groupby")

def _visit_match(self, match: Union[Entity, Join, main.Query]) -> None:
def _visit_match(self, match: Union[Entity, Storage, Join, main.Query]) -> None:
match.validate()

def __list_validate(self, values: Optional[Sequence[Expression]]) -> None:
Expand Down
2 changes: 1 addition & 1 deletion snuba_sdk/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __iter__(self) -> Iterator[Column]:


@dataclass(frozen=True, init=False)
class EntityModel:
class DataModel:
column_set: ColumnSet
required_time_column: Column
required_columns: set[Column] = field(init=False, default_factory=set)
Expand Down
37 changes: 37 additions & 0 deletions snuba_sdk/storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import re
from dataclasses import dataclass, field
from typing import Optional

from snuba_sdk.expressions import Expression
from snuba_sdk.schema import DataModel

storage_name_re = re.compile(r"^[a-zA-Z_]+$")


class InvalidStorageError(Exception):
pass


@dataclass(frozen=True, repr=False)
class Storage(Expression):
name: str
sample: Optional[float] = None
data_model: Optional[DataModel] = field(hash=False, default=None)

def validate(self) -> None:
if not isinstance(self.name, str) or not storage_name_re.match(self.name):
raise InvalidStorageError(f"'{self.name}' is not a valid storage name")

if self.sample is not None:
if not isinstance(self.sample, float):
raise InvalidStorageError("sample must be a float")
elif self.sample <= 0.0:
raise InvalidStorageError("samples must be greater than 0.0")

if self.data_model is not None:
if not isinstance(self.data_model, DataModel):
raise InvalidStorageError("data_model must be an instance of DataModel")

def __repr__(self) -> str:
sample = f", sample={self.sample}" if self.sample is not None else ""
return f"STORAGE('{self.name}'{sample})"
21 changes: 21 additions & 0 deletions snuba_sdk/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from snuba_sdk.column import Column
from snuba_sdk.conditions import BooleanCondition, Condition, Op, is_unary
from snuba_sdk.entity import Entity
from snuba_sdk.storage import Storage
from snuba_sdk.expressions import (
Expression,
Granularity,
Expand Down Expand Up @@ -41,6 +42,8 @@ def visit(self, node: Expression) -> TVisited:
return self._visit_lambda(node)
elif isinstance(node, Entity):
return self._visit_entity(node)
elif isinstance(node, Storage):
return self._visit_storage(node)
elif isinstance(node, Relationship):
return self._visit_relationship(node)
elif isinstance(node, Join):
Expand Down Expand Up @@ -94,6 +97,10 @@ def _visit_int_literal(self, literal: int) -> TVisited:
def _visit_entity(self, entity: Entity) -> TVisited:
raise NotImplementedError

@abstractmethod
def _visit_storage(self, storage: Storage) -> TVisited:
raise NotImplementedError

@abstractmethod
def _visit_relationship(self, relationship: Relationship) -> TVisited:
raise NotImplementedError
Expand Down Expand Up @@ -246,6 +253,15 @@ def _visit_entity(self, entity: Entity) -> str:
sample_clause = f" SAMPLE {entity.sample:f}"
return f"({alias_clause}{entity.name}{sample_clause})"

def _visit_storage(self, storage: Storage) -> str:
sample_clause = ""
if storage.sample is not None:
if storage.sample % 1 == 0:
sample_clause = f" SAMPLE {storage.sample:.1f}"
else:
sample_clause = f" SAMPLE {storage.sample:f}"
return f"STORAGE({storage.name}{sample_clause})"

def _visit_relationship(self, relationship: Relationship) -> str:
return f"{self.visit(relationship.lhs)} -[{relationship.name}]-> {self.visit(relationship.rhs)}"

Expand Down Expand Up @@ -374,6 +390,11 @@ def _visit_entity(self, entity: Entity) -> set[Expression]:
return set([entity])
return set()

def _visit_storage(self, storage: Storage) -> set[Expression]:
if isinstance(storage, self.exp_type):
return set([storage])
return set()

def _visit_relationship(self, relationship: Relationship) -> set[Expression]:
if isinstance(relationship, self.exp_type):
return set([relationship])
Expand Down
38 changes: 37 additions & 1 deletion tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Op,
OrderBy,
Query,
Storage,
)
from snuba_sdk.query_validation import InvalidMatchError
from snuba_sdk.query_visitors import InvalidQueryError
Expand All @@ -37,6 +38,18 @@
),
id="basic query",
),
pytest.param(
Query(
match=Storage("events"),
select=[Column("event_id")],
groupby=None,
where=[Condition(Column("timestamp"), Op.GT, NOW)],
limit=Limit(10),
offset=Offset(1),
granularity=Granularity(3600),
),
id="basic storage query",
),
pytest.param(
Query(
match=Entity("events", "ev", 0.2),
Expand All @@ -60,6 +73,29 @@
),
id="complex query",
),
pytest.param(
Query(
match=Storage("events", 0.2),
select=[
Column("title"),
Column("tags[release:1]"),
Function("uniq", [Column("event_id")], "uniq_events"),
],
groupby=[Column("title"), Column("tags[release:1]")],
where=[
Condition(Column("timestamp"), Op.GT, NOW),
Condition(Function("toHour", [Column("timestamp")]), Op.LTE, NOW),
Condition(Column("project_id"), Op.IN, Function("tuple", [1, 2, 3])),
],
having=[Condition(Function("uniq", [Column("event_id")]), Op.GT, 1)],
orderby=[OrderBy(Column("title"), Direction.ASC)],
limitby=LimitBy([Column("title"), Column("event_id")], 5),
limit=Limit(10),
offset=Offset(1),
granularity=Granularity(3600),
),
id="complex storage query",
),
pytest.param(
Query(Entity("events", None, 0.2))
.set_select([Column("event_id")])
Expand Down Expand Up @@ -247,7 +283,7 @@


@pytest.mark.parametrize("query", tests)
def test_query(query: Query) -> None:
def test_entity_query(query: Query) -> None:
query.validate()


Expand Down
Loading

0 comments on commit 79e090a

Please sign in to comment.