From bade12bbdd5f194dfeda887152daf360268ce5a8 Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Fri, 22 Nov 2024 00:19:18 -0600 Subject: [PATCH 01/14] [5201]Implement expressions in python client --- .../gravitino/api/expressions/__init__.py | 16 ++++ .../gravitino/api/expressions/expression.py | 41 +++++++++ .../api/expressions/function_expression.py | 90 +++++++++++++++++++ .../api/expressions/named_reference.py | 76 ++++++++++++++++ .../api/expressions/unparsed_expression.py | 78 ++++++++++++++++ .../tests/unittests/test_expressions.py | 61 +++++++++++++ .../unittests/test_function_expression.py | 62 +++++++++++++ .../tests/unittests/test_named_reference.py | 39 ++++++++ .../unittests/test_unparsed_expression.py | 34 +++++++ 9 files changed, 497 insertions(+) create mode 100644 clients/client-python/gravitino/api/expressions/__init__.py create mode 100644 clients/client-python/gravitino/api/expressions/expression.py create mode 100644 clients/client-python/gravitino/api/expressions/function_expression.py create mode 100644 clients/client-python/gravitino/api/expressions/named_reference.py create mode 100644 clients/client-python/gravitino/api/expressions/unparsed_expression.py create mode 100644 clients/client-python/tests/unittests/test_expressions.py create mode 100644 clients/client-python/tests/unittests/test_function_expression.py create mode 100644 clients/client-python/tests/unittests/test_named_reference.py create mode 100644 clients/client-python/tests/unittests/test_unparsed_expression.py diff --git a/clients/client-python/gravitino/api/expressions/__init__.py b/clients/client-python/gravitino/api/expressions/__init__.py new file mode 100644 index 00000000000..13a83393a91 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/clients/client-python/gravitino/api/expressions/expression.py b/clients/client-python/gravitino/api/expressions/expression.py new file mode 100644 index 00000000000..0ee215706e8 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/expression.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations +from abc import ABC +from typing import List, Set, TYPE_CHECKING + +if TYPE_CHECKING: + from gravitino.api.expressions.named_reference import NamedReference + + +class Expression(ABC): + """Base class of the public logical expression API.""" + + EMPTY_EXPRESSION: List[Expression] = [] + + def children(self) -> List[Expression]: + """Returns a list of the children of this node. Children should not change.""" + return self.EMPTY_EXPRESSION + + def references(self) -> List[NamedReference]: + """Returns a list of fields or columns that are referenced by this expression.""" + + ref_set: Set[NamedReference] = set() + for child in self.children(): + ref_set.update(child.references()) + return list(ref_set) diff --git a/clients/client-python/gravitino/api/expressions/function_expression.py b/clients/client-python/gravitino/api/expressions/function_expression.py new file mode 100644 index 00000000000..992bac10b6f --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/function_expression.py @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from __future__ import annotations +from abc import abstractmethod +from typing import List, Union +from gravitino.api.expressions.expression import Expression + + +class FunctionExpression(Expression): + """ + The interface of a function expression. A function expression is an expression that takes a + function name and a list of arguments. + """ + + @staticmethod + def of(function_name: str, *arguments: Expression) -> FuncExpressionImpl: + """ + Creates a new FunctionExpression with the given function name. + If no arguments are provided, it uses an empty expression. + + :param function_name: The name of the function. + :param arguments: The arguments to the function (optional). + :return: The created FunctionExpression. + """ + arguments = list(arguments) if arguments else Expression.EMPTY_EXPRESSION + return FuncExpressionImpl(function_name, arguments) + + @abstractmethod + def function_name(self) -> str: + """Returns the function name.""" + pass + + @abstractmethod + def arguments(self) -> List[Expression]: + """Returns the arguments passed to the function.""" + pass + + def children(self) -> List[Expression]: + """Returns the arguments as children.""" + return self.arguments() + + +class FuncExpressionImpl(FunctionExpression): + """ + A concrete implementation of the FunctionExpression interface. + """ + + def __init__(self, function_name: str, arguments: List[Expression]): + super().__init__() + self._function_name = function_name + self._arguments = arguments + + def function_name(self) -> str: + return self._function_name + + def arguments(self) -> List[Expression]: + return self._arguments + + def __str__(self) -> str: + if not self._arguments: + return f"{self._function_name}()" + arguments_str = ", ".join(map(str, self._arguments)) + return f"{self._function_name}({arguments_str})" + + def __eq__(self, other: Union[FuncExpressionImpl, object]) -> bool: + if isinstance(other, FuncExpressionImpl): + return ( + self._function_name == other._function_name + and self._arguments == other._arguments + ) + return False + + def __hash__(self) -> int: + return hash((self._function_name, tuple(self._arguments))) diff --git a/clients/client-python/gravitino/api/expressions/named_reference.py b/clients/client-python/gravitino/api/expressions/named_reference.py new file mode 100644 index 00000000000..80cb2da9719 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/named_reference.py @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations +from typing import List +from gravitino.api.expressions.expression import Expression + + +class NamedReference(Expression): + """ + Represents a field or column reference in the public logical expression API. + """ + + @staticmethod + def field(field_name: List[str]) -> FieldReference: + """Returns a FieldReference for the given field name(s).""" + return FieldReference(field_name) + + @staticmethod + def field_from_column(column_name: str) -> FieldReference: + """Returns a FieldReference for the given column name.""" + return FieldReference([column_name]) + + def field_name(self) -> List[str]: + """ + Returns the referenced field name as a list of string parts. + Must be implemented by subclasses. + """ + raise NotImplementedError("Subclasses must implement this method.") + + def children(self) -> List[Expression]: + """Named references do not have children.""" + return Expression.EMPTY_EXPRESSION + + def references(self) -> List[NamedReference]: + """Named references reference themselves.""" + return [self] + + +class FieldReference(NamedReference): + """ + A NamedReference that references a field or column. + """ + + def __init__(self, field_name: List[str]): + super().__init__() + self._field_name = field_name + + def field_name(self) -> List[str]: + return self._field_name + + def __eq__(self, other: object) -> bool: + if isinstance(other, FieldReference): + return self._field_name == other._field_name + return False + + def __hash__(self) -> int: + return hash(tuple(self._field_name)) + + def __str__(self) -> str: + """Returns the string representation of the field reference.""" + return ".".join(self._field_name) diff --git a/clients/client-python/gravitino/api/expressions/unparsed_expression.py b/clients/client-python/gravitino/api/expressions/unparsed_expression.py new file mode 100644 index 00000000000..2c6b8c3fb38 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/unparsed_expression.py @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from __future__ import annotations +from typing import List +from gravitino.api.expressions.expression import Expression + + +class UnparsedExpression(Expression): + """ + Represents an expression that is not parsed yet. + The parsed expression is represented by FunctionExpression, Literal, or NamedReference. + """ + + def unparsed_expression(self) -> str: + """ + Returns the unparsed expression as a string. + """ + raise NotImplementedError("Subclasses must implement this method.") + + def children(self) -> List[Expression]: + """ + Unparsed expressions do not have children. + """ + return Expression.EMPTY_EXPRESSION + + @staticmethod + def of(unparsed_expression: str) -> UnparsedExpressionImpl: + """ + Creates a new UnparsedExpression with the given unparsed expression. + + + :param unparsed_expression: The unparsed expression as a string. + :return: The created UnparsedExpression. + """ + return UnparsedExpressionImpl(unparsed_expression) + + +class UnparsedExpressionImpl(UnparsedExpression): + """ + An implementation of the UnparsedExpression interface. + """ + + def __init__(self, unparsed_expression: str): + super().__init__() + self._unparsed_expression = unparsed_expression + + def unparsed_expression(self) -> str: + return self._unparsed_expression + + def __eq__(self, other: object) -> bool: + if isinstance(other, UnparsedExpressionImpl): + return self._unparsed_expression == other._unparsed_expression + return False + + def __hash__(self) -> int: + return hash(self._unparsed_expression) + + def __str__(self) -> str: + """ + Returns the string representation of the unparsed expression. + """ + return f"UnparsedExpressionImpl{{unparsedExpression='{self._unparsed_expression}'}}" diff --git a/clients/client-python/tests/unittests/test_expressions.py b/clients/client-python/tests/unittests/test_expressions.py new file mode 100644 index 00000000000..6054c1fde67 --- /dev/null +++ b/clients/client-python/tests/unittests/test_expressions.py @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from typing import List +from gravitino.api.expressions.expression import Expression +from gravitino.api.expressions.named_reference import NamedReference + + +class MockExpression(Expression): + """Mock implementation of the Expression class for testing.""" + + def __init__( + self, children: List[Expression] = None, references: List[NamedReference] = None + ): + self._children = children if children else [] + self._references = references if references else [] + + def children(self) -> List[Expression]: + return self._children + + def references(self) -> List[NamedReference]: + if self._references: + return self._references + return super().references() + + +class TestExpression(unittest.TestCase): + def test_empty_expression(self): + expr = MockExpression() + self.assertEqual(expr.children(), []) + self.assertEqual(expr.references(), []) + + def test_expression_with_references(self): + ref = NamedReference.field(["student", "name"]) + child = MockExpression(references=[ref]) + expr = MockExpression(children=[child]) + self.assertEqual(expr.children(), [child]) + self.assertEqual(expr.references(), [ref]) + + def test_multiple_children(self): + ref1 = NamedReference.field(["student", "name"]) + ref2 = NamedReference.field(["student", "age"]) + child1 = MockExpression(references=[ref1]) + child2 = MockExpression(references=[ref2]) + expr = MockExpression(children=[child1, child2]) + self.assertCountEqual(expr.references(), [ref1, ref2]) diff --git a/clients/client-python/tests/unittests/test_function_expression.py b/clients/client-python/tests/unittests/test_function_expression.py new file mode 100644 index 00000000000..deaa2089e23 --- /dev/null +++ b/clients/client-python/tests/unittests/test_function_expression.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from gravitino.api.expressions.function_expression import ( + FunctionExpression, + FuncExpressionImpl, +) +from gravitino.api.expressions.expression import Expression + + +class MockExpression(Expression): + """Mock implementation of the Expression class for testing.""" + + def children(self): + return [] + + def references(self): + return [] + + def __str__(self): + return "MockExpression()" + + +class TestFunctionExpression(unittest.TestCase): + def test_function_without_arguments(self): + func = FuncExpressionImpl("SUM", []) + self.assertEqual(func.function_name(), "SUM") + self.assertEqual(func.arguments(), []) + self.assertEqual(str(func), "SUM()") + + def test_function_with_arguments(self): + arg1 = MockExpression() + arg2 = MockExpression() + func = FuncExpressionImpl("SUM", [arg1, arg2]) + self.assertEqual(func.function_name(), "SUM") + self.assertEqual(func.arguments(), [arg1, arg2]) + self.assertEqual(str(func), "SUM(MockExpression(), MockExpression())") + + def test_function_equality(self): + func1 = FuncExpressionImpl("SUM", []) + func2 = FuncExpressionImpl("SUM", []) + self.assertEqual(func1, func2) + self.assertEqual(hash(func1), hash(func2)) + + def test_function_of_static_method(self): + func = FunctionExpression.of("SUM", MockExpression()) + self.assertEqual(func.function_name(), "SUM") diff --git a/clients/client-python/tests/unittests/test_named_reference.py b/clients/client-python/tests/unittests/test_named_reference.py new file mode 100644 index 00000000000..a9942aec7fc --- /dev/null +++ b/clients/client-python/tests/unittests/test_named_reference.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from gravitino.api.expressions.named_reference import NamedReference, FieldReference + + +class TestNamedReference(unittest.TestCase): + def test_field_reference_creation(self): + field = FieldReference(["student", "name"]) + self.assertEqual(field.field_name(), ["student", "name"]) + self.assertEqual(str(field), "student.name") + + def test_field_reference_equality(self): + field1 = FieldReference(["student", "name"]) + field2 = FieldReference(["student", "name"]) + self.assertEqual(field1, field2) + self.assertEqual(hash(field1), hash(field2)) + + def test_named_reference_static_methods(self): + ref = NamedReference.field(["student", "name"]) + self.assertEqual(ref.field_name(), ["student", "name"]) + + ref2 = NamedReference.field_from_column("student") + self.assertEqual(ref2.field_name(), ["student"]) diff --git a/clients/client-python/tests/unittests/test_unparsed_expression.py b/clients/client-python/tests/unittests/test_unparsed_expression.py new file mode 100644 index 00000000000..809caf67d48 --- /dev/null +++ b/clients/client-python/tests/unittests/test_unparsed_expression.py @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from gravitino.api.expressions.unparsed_expression import UnparsedExpressionImpl + + +class TestUnparsedExpression(unittest.TestCase): + def test_unparsed_expression_creation(self): + expr = UnparsedExpressionImpl("some_expression") + self.assertEqual(expr.unparsed_expression(), "some_expression") + self.assertEqual( + str(expr), "UnparsedExpressionImpl{unparsedExpression='some_expression'}" + ) + + def test_unparsed_expression_equality(self): + expr1 = UnparsedExpressionImpl("some_expression") + expr2 = UnparsedExpressionImpl("some_expression") + self.assertEqual(expr1, expr2) + self.assertEqual(hash(expr1), hash(expr2)) From 3fc740020e5fec7109ba201cc663baa5c5a6af9d Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Sat, 23 Nov 2024 00:03:47 -0600 Subject: [PATCH 02/14] Update the script for named_reference and function_expression --- .../api/expressions/function_expression.py | 1 + .../gravitino/api/expressions/named_reference.py | 14 +++++++------- .../tests/unittests/test_named_reference.py | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/clients/client-python/gravitino/api/expressions/function_expression.py b/clients/client-python/gravitino/api/expressions/function_expression.py index 992bac10b6f..92ced3b5b88 100644 --- a/clients/client-python/gravitino/api/expressions/function_expression.py +++ b/clients/client-python/gravitino/api/expressions/function_expression.py @@ -84,6 +84,7 @@ def __eq__(self, other: Union[FuncExpressionImpl, object]) -> bool: self._function_name == other._function_name and self._arguments == other._arguments ) + # TODO: Consider handling other cases or adding custom equality checks return False def __hash__(self) -> int: diff --git a/clients/client-python/gravitino/api/expressions/named_reference.py b/clients/client-python/gravitino/api/expressions/named_reference.py index 80cb2da9719..b145e6a1691 100644 --- a/clients/client-python/gravitino/api/expressions/named_reference.py +++ b/clients/client-python/gravitino/api/expressions/named_reference.py @@ -56,21 +56,21 @@ class FieldReference(NamedReference): A NamedReference that references a field or column. """ - def __init__(self, field_name: List[str]): + def __init__(self, field_names: List[str]): super().__init__() - self._field_name = field_name + self._field_names = field_names - def field_name(self) -> List[str]: - return self._field_name + def field_names(self) -> List[str]: + return self._field_names def __eq__(self, other: object) -> bool: if isinstance(other, FieldReference): - return self._field_name == other._field_name + return self._field_names == other._field_names return False def __hash__(self) -> int: - return hash(tuple(self._field_name)) + return hash(tuple(self._field_names)) def __str__(self) -> str: """Returns the string representation of the field reference.""" - return ".".join(self._field_name) + return ".".join(self._field_names) diff --git a/clients/client-python/tests/unittests/test_named_reference.py b/clients/client-python/tests/unittests/test_named_reference.py index a9942aec7fc..b76f6cec8c9 100644 --- a/clients/client-python/tests/unittests/test_named_reference.py +++ b/clients/client-python/tests/unittests/test_named_reference.py @@ -22,7 +22,7 @@ class TestNamedReference(unittest.TestCase): def test_field_reference_creation(self): field = FieldReference(["student", "name"]) - self.assertEqual(field.field_name(), ["student", "name"]) + self.assertEqual(field.field_names(), ["student", "name"]) self.assertEqual(str(field), "student.name") def test_field_reference_equality(self): @@ -33,7 +33,7 @@ def test_field_reference_equality(self): def test_named_reference_static_methods(self): ref = NamedReference.field(["student", "name"]) - self.assertEqual(ref.field_name(), ["student", "name"]) + self.assertEqual(ref.field_names(), ["student", "name"]) ref2 = NamedReference.field_from_column("student") - self.assertEqual(ref2.field_name(), ["student"]) + self.assertEqual(ref2.field_names(), ["student"]) From 00d31e86bc79edcf6619cebb46078220004eb2ed Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Sat, 23 Nov 2024 22:01:01 -0600 Subject: [PATCH 03/14] Update the distribution.py --- .../api/expressions/distributions.py | 122 ++++++++++++++++++ .../tests/unittests/test_distributions.py | 50 +++++++ 2 files changed, 172 insertions(+) create mode 100644 clients/client-python/gravitino/api/expressions/distributions.py create mode 100644 clients/client-python/tests/unittests/test_distributions.py diff --git a/clients/client-python/gravitino/api/expressions/distributions.py b/clients/client-python/gravitino/api/expressions/distributions.py new file mode 100644 index 00000000000..420fb16ee0f --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/distributions.py @@ -0,0 +1,122 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Tuple +from gravitino.api.expressions.named_reference import NamedReference + + +# Enum equivalent in Python for Strategy +class Strategy: + NONE = "NONE" + HASH = "HASH" + RANGE = "RANGE" + EVEN = "EVEN" + + @staticmethod + def get_by_name(name: str): + name = name.upper() + if name == "NONE": + return Strategy.NONE + if name == "HASH": + return Strategy.HASH + if name == "RANGE": + return Strategy.RANGE + if name in ("EVEN", "RANDOM"): + return Strategy.EVEN + raise ValueError( + f"Invalid distribution strategy: {name}. Valid values are: {', '.join( + [Strategy.NONE, Strategy.HASH, Strategy.RANGE, Strategy.EVEN])}" + ) + + +# Distribution interface equivalent +class Distribution: + def strategy(self) -> str: + raise NotImplementedError + + def number(self) -> int: + raise NotImplementedError + + def expressions(self) -> List: + raise NotImplementedError + + def children(self) -> List: + return self.expressions() + + def equals(self, distribution) -> bool: + return ( + isinstance(distribution, Distribution) + and self.strategy() == distribution.strategy() + and self.number() == distribution.number() + and self.expressions() == distribution.expressions() + ) + + +# Implementation of Distribution +class DistributionImpl(Distribution): + def __init__(self, strategy: str, number: int, expressions: List): + self._strategy = strategy + self._number = number + self._expressions = expressions + + def strategy(self) -> str: + return self._strategy + + def number(self) -> int: + return self._number + + def expressions(self) -> List: + return self._expressions + + def __str__(self): + return f"DistributionImpl(strategy={self._strategy}, number={self._number}, expressions={self._expressions})" + + def __eq__(self, other): + if not isinstance(other, DistributionImpl): + return False + return ( + self._strategy == other._strategy + and self._number == other._number + and self._expressions == other._expressions + ) + + def __hash__(self): + return hash((self._strategy, self._number, tuple(self._expressions))) + + +# Helper methods to create distributions +class Distributions: + NONE = DistributionImpl(Strategy.NONE, 0, []) + HASH = DistributionImpl(Strategy.HASH, 0, []) + RANGE = DistributionImpl(Strategy.RANGE, 0, []) + + @staticmethod + def even(number: int, *expressions) -> Distribution: + return DistributionImpl(Strategy.EVEN, number, list(expressions)) + + @staticmethod + def hash(number: int, *expressions) -> Distribution: + return DistributionImpl(Strategy.HASH, number, list(expressions)) + + @staticmethod + def of(strategy: str, number: int, *expressions) -> Distribution: + return DistributionImpl(strategy, number, list(expressions)) + + @staticmethod + def fields(strategy: str, number: int, *field_names: Tuple[str]) -> Distribution: + expressions = [NamedReference.field(field_name) for field_name in field_names] + return Distributions.of(strategy, number, *expressions) diff --git a/clients/client-python/tests/unittests/test_distributions.py b/clients/client-python/tests/unittests/test_distributions.py new file mode 100644 index 00000000000..9ddbbbe3883 --- /dev/null +++ b/clients/client-python/tests/unittests/test_distributions.py @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from gravitino.api.expressions.distributions import Distributions, Strategy + + +class TestDistribution(unittest.TestCase): + def test_distribution_creation(self): + # Test creating a distribution with EVEN strategy + dist = Distributions.even(10, "a", "b") + self.assertEqual(dist.strategy(), Strategy.EVEN) + self.assertEqual(dist.number(), 10) + self.assertEqual(dist.expressions(), ["a", "b"]) + + # Test creating a distribution with HASH strategy + dist_hash = Distributions.hash(5, "c", "d") + self.assertEqual(dist_hash.strategy(), Strategy.HASH) + self.assertEqual(dist_hash.number(), 5) + self.assertEqual(dist_hash.expressions(), ["c", "d"]) + + def test_distribution_equals(self): + dist1 = Distributions.even(10, "a", "b") + dist2 = Distributions.even(10, "a", "b") + dist3 = Distributions.hash(10, "a", "b") + self.assertTrue(dist1.equals(dist2)) + self.assertFalse(dist1.equals(dist3)) + + def test_strategy_get_by_name(self): + self.assertEqual(Strategy.get_by_name("hash"), Strategy.HASH) + self.assertEqual(Strategy.get_by_name("RANGE"), Strategy.RANGE) + self.assertEqual(Strategy.get_by_name("EVEN"), Strategy.EVEN) + + def test_invalid_strategy(self): + with self.assertRaises(ValueError): + Strategy.get_by_name("INVALID") From c21bba5d46aa0dd2fafb202530d0957433ca3e94 Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Sat, 23 Nov 2024 22:39:14 -0600 Subject: [PATCH 04/14] Update literals.py --- .../gravitino/api/expressions/literals.py | 136 ++++++++++++++++++ .../tests/unittests/test_literals.py | 103 +++++++++++++ 2 files changed, 239 insertions(+) create mode 100644 clients/client-python/gravitino/api/expressions/literals.py create mode 100644 clients/client-python/tests/unittests/test_literals.py diff --git a/clients/client-python/gravitino/api/expressions/literals.py b/clients/client-python/gravitino/api/expressions/literals.py new file mode 100644 index 00000000000..c28466995a4 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/literals.py @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC, abstractmethod +from typing import Union +from datetime import date, time, datetime + + +# Abstract base class for Literal +class Literal(ABC): + """ + Represents a constant literal value in the expression API. + """ + + @abstractmethod + def value(self) -> Union[int, float, str, datetime, time, date, bool]: + """ + Returns the literal value. + """ + pass + + @abstractmethod + def data_type(self) -> str: + """ + Returns the data type of the literal. + """ + pass + + def children(self): + """ + Returns the child expressions. By default, this is an empty list. + """ + return [] + + +# Concrete implementation of Literal +class LiteralImpl(Literal): + def __init__( + self, value: Union[int, float, str, datetime, time, date, bool], data_type: str + ): + self._value = value + self._data_type = data_type + + def value(self) -> Union[int, float, str, datetime, time, date, bool]: + return self._value + + def data_type(self) -> str: + return self._data_type + + def __eq__(self, other: object) -> bool: + if not isinstance(other, LiteralImpl): + return False + return (self._value == other._value) and (self._data_type == other._data_type) + + def __hash__(self): + return hash((self._value, self._data_type)) + + def __str__(self): + return f"LiteralImpl(value={self._value}, data_type={self._data_type})" + + +# Helper class to create literals +class Literals: + @staticmethod + def null_literal() -> Literal: + return LiteralImpl(None, "NullType") + + @staticmethod + def boolean_literal(value: bool) -> Literal: + return LiteralImpl(value, "Boolean") + + @staticmethod + def byte_literal(value: int) -> Literal: + return LiteralImpl(value, "Byte") + + @staticmethod + def short_literal(value: int) -> Literal: + return LiteralImpl(value, "Short") + + @staticmethod + def integer_literal(value: int) -> Literal: + return LiteralImpl(value, "Integer") + + @staticmethod + def long_literal(value: int) -> Literal: + return LiteralImpl(value, "Long") + + @staticmethod + def float_literal(value: float) -> Literal: + return LiteralImpl(value, "Float") + + @staticmethod + def double_literal(value: float) -> Literal: + return LiteralImpl(value, "Double") + + @staticmethod + def decimal_literal(value: float) -> Literal: + return LiteralImpl(value, "Decimal") + + @staticmethod + def string_literal(value: str) -> Literal: + return LiteralImpl(value, "String") + + @staticmethod + def varchar_literal(length: int, value: str) -> Literal: + return LiteralImpl(value, f"Varchar({length})") + + @staticmethod + def date_literal(value: date) -> Literal: + return LiteralImpl(value, "Date") + + @staticmethod + def time_literal(value: time) -> Literal: + return LiteralImpl(value, "Time") + + @staticmethod + def timestamp_literal(value: datetime) -> Literal: + return LiteralImpl(value, "Timestamp") + + @staticmethod + def timestamp_literal_from_string(value: str) -> Literal: + return Literals.timestamp_literal(datetime.fromisoformat(value)) diff --git a/clients/client-python/tests/unittests/test_literals.py b/clients/client-python/tests/unittests/test_literals.py new file mode 100644 index 00000000000..c5baf9cf6da --- /dev/null +++ b/clients/client-python/tests/unittests/test_literals.py @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from datetime import date, time, datetime +from gravitino.api.expressions.literals import Literals + + +class TestLiterals(unittest.TestCase): + def test_null_literal(self): + null_val = Literals.null_literal() + self.assertEqual(null_val.value(), None) + self.assertEqual(null_val.data_type(), "NullType") + self.assertEqual(str(null_val), "LiteralImpl(value=None, data_type=NullType)") + + def test_boolean_literal(self): + bool_val = Literals.boolean_literal(True) + self.assertEqual(bool_val.value(), True) + self.assertEqual(bool_val.data_type(), "Boolean") + self.assertEqual(str(bool_val), "LiteralImpl(value=True, data_type=Boolean)") + + def test_integer_literal(self): + int_val = Literals.integer_literal(42) + self.assertEqual(int_val.value(), 42) + self.assertEqual(int_val.data_type(), "Integer") + self.assertEqual(str(int_val), "LiteralImpl(value=42, data_type=Integer)") + + def test_string_literal(self): + str_val = Literals.string_literal("Hello World") + self.assertEqual(str_val.value(), "Hello World") + self.assertEqual(str_val.data_type(), "String") + self.assertEqual( + str(str_val), "LiteralImpl(value=Hello World, data_type=String)" + ) + + def test_date_literal(self): + date_val = Literals.date_literal(date(2023, 1, 1)) + self.assertEqual(date_val.value(), date(2023, 1, 1)) + self.assertEqual(date_val.data_type(), "Date") + self.assertEqual(str(date_val), "LiteralImpl(value=2023-01-01, data_type=Date)") + + def test_time_literal(self): + time_val = Literals.time_literal(time(12, 30, 45)) + self.assertEqual(time_val.value(), time(12, 30, 45)) + self.assertEqual(time_val.data_type(), "Time") + self.assertEqual(str(time_val), "LiteralImpl(value=12:30:45, data_type=Time)") + + def test_timestamp_literal(self): + timestamp_val = Literals.timestamp_literal(datetime(2023, 1, 1, 12, 30, 45)) + self.assertEqual(timestamp_val.value(), datetime(2023, 1, 1, 12, 30, 45)) + self.assertEqual(timestamp_val.data_type(), "Timestamp") + self.assertEqual( + str(timestamp_val), + "LiteralImpl(value=2023-01-01 12:30:45, data_type=Timestamp)", + ) + + def test_timestamp_literal_from_string(self): + timestamp_val = Literals.timestamp_literal_from_string("2023-01-01T12:30:45") + self.assertEqual(timestamp_val.value(), datetime(2023, 1, 1, 12, 30, 45)) + self.assertEqual(timestamp_val.data_type(), "Timestamp") + self.assertEqual( + str(timestamp_val), + "LiteralImpl(value=2023-01-01 12:30:45, data_type=Timestamp)", + ) + + def test_varchar_literal(self): + varchar_val = Literals.varchar_literal(10, "Test String") + self.assertEqual(varchar_val.value(), "Test String") + self.assertEqual(varchar_val.data_type(), "Varchar(10)") + self.assertEqual( + str(varchar_val), "LiteralImpl(value=Test String, data_type=Varchar(10))" + ) + + def test_equality(self): + int_val1 = Literals.integer_literal(42) + int_val2 = Literals.integer_literal(42) + int_val3 = Literals.integer_literal(10) + self.assertTrue(int_val1 == int_val2) + self.assertFalse(int_val1 == int_val3) + + def test_hash(self): + int_val1 = Literals.integer_literal(42) + int_val2 = Literals.integer_literal(42) + self.assertEqual(hash(int_val1), hash(int_val2)) + + def test_unequal_literals(self): + int_val = Literals.integer_literal(42) + str_val = Literals.string_literal("Hello") + self.assertFalse(int_val == str_val) From 361acbc7c04293105c16dc0fe89497e1eb7b78a8 Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Sat, 23 Nov 2024 23:21:11 -0600 Subject: [PATCH 05/14] Update sorts.py --- .../gravitino/api/expressions/sorts.py | 155 ++++++++++++++++++ .../tests/unittests/test_sorts.py | 140 ++++++++++++++++ 2 files changed, 295 insertions(+) create mode 100644 clients/client-python/gravitino/api/expressions/sorts.py create mode 100644 clients/client-python/tests/unittests/test_sorts.py diff --git a/clients/client-python/gravitino/api/expressions/sorts.py b/clients/client-python/gravitino/api/expressions/sorts.py new file mode 100644 index 00000000000..0442310492e --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/sorts.py @@ -0,0 +1,155 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from enum import Enum +from abc import ABC, abstractmethod +from typing import Optional + + +# Enum for NullOrdering +class NullOrdering(Enum): + NULLS_FIRST = "nulls_first" + NULLS_LAST = "nulls_last" + + @staticmethod + def from_string(s: str): + try: + return NullOrdering[s.upper()] + except KeyError as exc: + raise ValueError(f"Invalid null ordering: {s}") from exc + + def __str__(self): + return self.value + + +# Enum for SortDirection +class SortDirection(Enum): + ASCENDING = "asc" + DESCENDING = "desc" + + def __init__(self, value): + self._default_null_ordering = ( + NullOrdering.NULLS_FIRST if value == "asc" else NullOrdering.NULLS_LAST + ) + + def default_null_ordering(self) -> NullOrdering: + return self._default_null_ordering + + @staticmethod + def from_string(s: str) -> "SortDirection": + if s.lower() == "asc": + return SortDirection.ASCENDING + if s.lower() == "desc": + return SortDirection.DESCENDING + raise ValueError(f"Unexpected sort direction: {s}") + + def __str__(self): + return self.value + + +# Abstract base class for SortOrder +class SortOrder(ABC): + @abstractmethod + def expression(self): + pass + + @abstractmethod + def direction(self): + pass + + @abstractmethod + def null_ordering(self): + pass + + def children(self): + """ + Returns the child expressions. By default, this is the expression itself in a list. + """ + return [self.expression()] + + +# Concrete implementation of SortOrder +class SortImpl(SortOrder): + def __init__( + self, expression, direction: SortDirection, null_ordering: NullOrdering + ): + self._expression = expression + self._direction = direction + self._null_ordering = null_ordering + + def expression(self): + return self._expression + + def direction(self): + return self._direction + + def null_ordering(self): + return self._null_ordering + + def __eq__(self, other): + if not isinstance(other, SortImpl): + return False + return ( + self._expression == other._expression + and self._direction == other._direction + and self._null_ordering == other._null_ordering + ) + + def __hash__(self): + return hash((self._expression, self._direction, self._null_ordering)) + + def __str__(self): + return ( + f"SortImpl(expression={self._expression}, " + f"direction={self._direction}, " + f"nullOrdering={self._null_ordering})" + ) + + +# Helper class to create SortOrder instances +class SortOrders: + NONE = [] + + @staticmethod + def ascending(expression) -> SortImpl: + return SortOrders.of(expression, SortDirection.ASCENDING) + + @staticmethod + def descending(expression) -> SortImpl: + return SortOrders.of(expression, SortDirection.DESCENDING) + + @staticmethod + def of( + expression, + direction: SortDirection, + null_ordering: Optional[NullOrdering] = None, + ) -> SortImpl: + if null_ordering is None: + null_ordering = direction.default_null_ordering() + return SortImpl(expression, direction, null_ordering) + + @staticmethod + def from_string( + expression, direction_str: str, null_ordering_str: Optional[str] = None + ) -> SortImpl: + direction = SortDirection.from_string(direction_str) + null_ordering = ( + NullOrdering(null_ordering_str) + if null_ordering_str + else direction.default_null_ordering() + ) + return SortImpl(expression, direction, null_ordering) diff --git a/clients/client-python/tests/unittests/test_sorts.py b/clients/client-python/tests/unittests/test_sorts.py new file mode 100644 index 00000000000..1924b502e27 --- /dev/null +++ b/clients/client-python/tests/unittests/test_sorts.py @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from gravitino.api.expressions.sorts import ( + SortDirection, + NullOrdering, + SortImpl, + SortOrders, +) + + +class TestSortImpl(unittest.TestCase): + + def test_sortimpl_initialization(self): + # Testing initialization of SortImpl + expr = "column_name" + direction = SortDirection.ASCENDING + null_ordering = NullOrdering.NULLS_FIRST + sort_order = SortImpl(expr, direction, null_ordering) + + self.assertEqual(sort_order.expression(), expr) + self.assertEqual(sort_order.direction(), direction) + self.assertEqual(sort_order.null_ordering(), null_ordering) + + def test_sortimpl_equality(self): + # Testing equality of two SortImpl instances + expr = "column_name" + direction = SortDirection.ASCENDING + null_ordering = NullOrdering.NULLS_FIRST + sort_order1 = SortImpl(expr, direction, null_ordering) + sort_order2 = SortImpl(expr, direction, null_ordering) + + self.assertEqual(sort_order1, sort_order2) + + def test_sortimpl_inequality(self): + # Testing inequality of two SortImpl instances + expr = "column_name" + direction = SortDirection.ASCENDING + null_ordering = NullOrdering.NULLS_FIRST + sort_order1 = SortImpl(expr, direction, null_ordering) + sort_order2 = SortImpl("another_column", direction, null_ordering) + + self.assertNotEqual(sort_order1, sort_order2) + + def test_sortimpl_hash(self): + # Testing the hash method of SortImpl + expr = "column_name" + direction = SortDirection.ASCENDING + null_ordering = NullOrdering.NULLS_FIRST + sort_order1 = SortImpl(expr, direction, null_ordering) + sort_order2 = SortImpl(expr, direction, null_ordering) + + self.assertEqual(hash(sort_order1), hash(sort_order2)) + + +class TestSortDirection(unittest.TestCase): + + def test_from_string(self): + # Test from_string method for SortDirection + self.assertEqual(SortDirection.from_string("asc"), SortDirection.ASCENDING) + self.assertEqual(SortDirection.from_string("desc"), SortDirection.DESCENDING) + + with self.assertRaises(ValueError): + SortDirection.from_string("invalid") + + def test_default_null_ordering(self): + # Test default_null_ordering method for SortDirection + self.assertEqual( + SortDirection.ASCENDING.default_null_ordering(), NullOrdering.NULLS_FIRST + ) + self.assertEqual( + SortDirection.DESCENDING.default_null_ordering(), NullOrdering.NULLS_LAST + ) + + +class TestNullOrdering(unittest.TestCase): + + def test_from_string(self): + # Test from_string method for NullOrdering + self.assertEqual( + NullOrdering.from_string("nulls_first"), NullOrdering.NULLS_FIRST + ) + self.assertEqual( + NullOrdering.from_string("nulls_last"), NullOrdering.NULLS_LAST + ) + + with self.assertRaises(ValueError): + NullOrdering.from_string("invalid") + + +class TestSortOrders(unittest.TestCase): + + def test_ascending(self): + # Test the ascending method of SortOrders + expr = "column_name" + sort_order = SortOrders.ascending(expr) + self.assertEqual(sort_order.expression(), expr) + self.assertEqual(sort_order.direction(), SortDirection.ASCENDING) + self.assertEqual(sort_order.null_ordering(), NullOrdering.NULLS_FIRST) + + def test_descending(self): + # Test the descending method of SortOrders + expr = "column_name" + sort_order = SortOrders.descending(expr) + self.assertEqual(sort_order.expression(), expr) + self.assertEqual(sort_order.direction(), SortDirection.DESCENDING) + self.assertEqual(sort_order.null_ordering(), NullOrdering.NULLS_LAST) + + def test_of(self): + # Test the of method of SortOrders + expr = "column_name" + sort_order = SortOrders.of( + expr, SortDirection.DESCENDING, NullOrdering.NULLS_FIRST + ) + self.assertEqual(sort_order.expression(), expr) + self.assertEqual(sort_order.direction(), SortDirection.DESCENDING) + self.assertEqual(sort_order.null_ordering(), NullOrdering.NULLS_FIRST) + + def test_from_string(self): + # Test the from_string method of SortOrders + expr = "column_name" + sort_order = SortOrders.from_string(expr, "asc", "nulls_last") + self.assertEqual(sort_order.expression(), expr) + self.assertEqual(sort_order.direction(), SortDirection.ASCENDING) + self.assertEqual(sort_order.null_ordering(), NullOrdering.NULLS_LAST) From 07a8a874ede4ba4de172644df43546dec87daacc Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Sun, 24 Nov 2024 23:01:15 -0600 Subject: [PATCH 06/14] Update the transforms.py --- .../api/expressions/named_reference.py | 2 +- .../gravitino/api/expressions/transforms.py | 526 ++++++++++++++++++ .../tests/unittests/test_named_reference.py | 6 +- .../tests/unittests/test_tranforms.py | 103 ++++ 4 files changed, 633 insertions(+), 4 deletions(-) create mode 100644 clients/client-python/gravitino/api/expressions/transforms.py create mode 100644 clients/client-python/tests/unittests/test_tranforms.py diff --git a/clients/client-python/gravitino/api/expressions/named_reference.py b/clients/client-python/gravitino/api/expressions/named_reference.py index b145e6a1691..a4dddf5c619 100644 --- a/clients/client-python/gravitino/api/expressions/named_reference.py +++ b/clients/client-python/gravitino/api/expressions/named_reference.py @@ -60,7 +60,7 @@ def __init__(self, field_names: List[str]): super().__init__() self._field_names = field_names - def field_names(self) -> List[str]: + def field_name(self) -> List[str]: return self._field_names def __eq__(self, other: object) -> bool: diff --git a/clients/client-python/gravitino/api/expressions/transforms.py b/clients/client-python/gravitino/api/expressions/transforms.py new file mode 100644 index 00000000000..add6476e670 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/transforms.py @@ -0,0 +1,526 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC +from typing import List +from gravitino.api.expressions.expression import Expression +from gravitino.api.expressions.named_reference import NamedReference +from gravitino.api.expressions.literals import Literals + + +class Partition(ABC): + """Base class for partitioning types.""" + + pass + + +class Partitions: + EMPTY_PARTITIONS = [] # Default empty partitions list + + +class ListPartition(Partition): + """Represents list partitioning.""" + + pass + + +class RangePartition(Partition): + """Represents range partitioning.""" + + pass + + +class Transform(Expression, ABC): + """Represents a transform function.""" + + def name(self) -> str: + """Returns the transform function name.""" + pass + + def arguments(self) -> List[Expression]: + """Returns the arguments passed to the transform function.""" + pass + + def assignments(self) -> List[Partition]: + """ + Returns the preassigned partitions for the transform. + By default, it returns an empty list of partitions, + as only some transforms like ListTransform and RangeTransform + need to deal with assignments. + """ + return Partitions.EMPTY_PARTITIONS + + def children(self) -> List[Expression]: + """Returns the children expressions. By default, it is the arguments.""" + return self.arguments() + + +class SingleFieldTransform(Transform): + """Base class for transforms on a single field.""" + + def __init__(self, ref: NamedReference): + self.ref = ref + + def field_name(self) -> List[str]: + """Returns the referenced field name as a list of string parts.""" + return self.ref.field_name() + + def references(self) -> List[NamedReference]: + """Returns a list of references (i.e., the field reference).""" + return [self.ref] + + def arguments(self) -> List[Expression]: + """Returns a list of arguments for the transform, which is just `ref`.""" + return [self.ref] + + def __eq__(self, other: object) -> bool: + """Checks equality based on the `ref`.""" + if not isinstance(other, SingleFieldTransform): + return False + return self.ref == other.ref + + def __hash__(self) -> int: + """Generates a hash based on `ref`.""" + return hash(self.ref) + + +class Transforms(Transform): + """Helper methods to create logical transforms to pass into Apache Gravitino.""" + + # Constants + EMPTY_TRANSFORM = [] + NAME_OF_IDENTITY = "identity" + NAME_OF_YEAR = "year" + NAME_OF_MONTH = "month" + NAME_OF_DAY = "day" + NAME_OF_HOUR = "hour" + NAME_OF_BUCKET = "bucket" + NAME_OF_TRUNCATE = "truncate" + NAME_OF_LIST = "list" + NAME_OF_RANGE = "range" + + @staticmethod + def identity(field_name: List[str]) -> "IdentityTransform": + """ + Create a transform that returns the input value. + + :param field_name: The field name(s) to transform. + Can be a list of field names or a single field name. + :return: The created transform + """ + # If a single column name is passed, convert it to a list. + if isinstance(field_name, str): + field_name = [field_name] + + return IdentityTransform(NamedReference.field(field_name)) + + @staticmethod + def year(field_name: List[str]) -> "YearTransform": + """ + Create a transform that returns the year of the input value. + + :param field_name: The field name(s) to transform. + Can be a list of field names or a single field name. + :return: The created transform + """ + # If a single column name is passed, convert it to a list. + if isinstance(field_name, str): + field_name = [field_name] + + return YearTransform(NamedReference.field(field_name)) + + @staticmethod + def month(field_name: List[str]) -> "MonthTransform": + """ + Create a transform that returns the month of the input value. + + :param field_name: The field name(s) to transform. + Can be a list of field names or a single field name. + :return: The created transform + """ + # If a single column name is passed, convert it to a list. + if isinstance(field_name, str): + field_name = [field_name] + + return MonthTransform(NamedReference.field(field_name)) + + @staticmethod + def day(field_name): + """ + Create a transform that returns the day of the input value. + + :param field_name: The field name(s) to transform. + Can be a list of field names or a single column name. + :return: The created transform + """ + # If a single column name is passed, convert it to a list. + if isinstance(field_name, str): + field_name = [field_name] + + return DayTransform(NamedReference.field(field_name)) + + @staticmethod + def hour(field_name): + """ + Create a transform that returns the hour of the input value. + + :param field_name: The field name(s) to transform. + Can be a list of field names or a single column name. + :return: The created transform + """ + # If a single column name is passed, convert it to a list. + if isinstance(field_name, str): + field_name = [field_name] + + return HourTransform(NamedReference.field(field_name)) + + @staticmethod + def bucket(num_buckets: int, *field_names: List[str]) -> "BucketTransform": + """ + Create a transform that returns the bucket of the input value. + + :param num_buckets: The number of buckets to use + :param field_names: The field names to transform + :return: The created transform + """ + fields = [NamedReference.field(fn) for fn in field_names] + return BucketTransform(Literals.integer_literal(num_buckets), fields) + + @staticmethod + def list( + field_names: List[List[str]], assignments: List[ListPartition] = None + ) -> "ListTransform": + """ + Create a transform that includes multiple fields in a list. + + :param field_names: The field names to include in the list + :param assignments: The preassigned list partitions (default is an empty list) + :return: The created transform + """ + if assignments is None: + assignments = [] + # Convert the list of field names into NamedReference objects + fields = [NamedReference.field(fn) for fn in field_names] + + return ListTransform(fields, assignments) + + @staticmethod + def range( + field_name: List[str], assignments: List[RangePartition] = None + ) -> "RangeTransform": + """ + Create a transform that returns the range of the input value. + + :param field_name: The field name to transform + :param assignments: The preassigned range partitions (default is an empty list) + :return: The created transform + """ + if assignments is None: + assignments = [] + return RangeTransform(NamedReference.field(field_name), assignments) + + @staticmethod + def truncate(width: int, field_name) -> "TruncateTransform": + """ + Create a transform that returns the truncated value of the input value with the given width. + + :param width: The width to truncate to + :param field_name: The field name(s) to transform. Can be a list of field names or a single field name. + :return: The created transform + """ + # If a single column name is passed, convert it to a list. + if isinstance(field_name, str): + field_name = [field_name] + + return TruncateTransform( + Literals.integer_literal(width), NamedReference.field(field_name) + ) + + @staticmethod + def apply(name: str, *arguments: "Expression") -> "ApplyTransform": + """ + Create a transform that applies a function to the input value. + + :param name: The name of the function to apply + :param arguments: The arguments to the function + :return: The created transform + """ + return ApplyTransform(name, arguments) + + +class IdentityTransform(Transforms): + """A transform that returns the input value.""" + + def __init__(self, ref: "NamedReference"): + self.ref = ref + + def name(self) -> str: + return Transforms.NAME_OF_IDENTITY + + def arguments(self) -> List["Expression"]: + return [self.ref] + + def __eq__(self, other): + return isinstance(other, IdentityTransform) and self.ref == other.ref + + def __hash__(self): + return hash(self.ref) + + +class YearTransform(Transforms): + """A transform that returns the year of the input value.""" + + def __init__(self, ref: "NamedReference"): + self.ref = ref + + def name(self) -> str: + return Transforms.NAME_OF_YEAR + + def children(self) -> List[Expression]: + return [self.ref] + + def arguments(self) -> List["Expression"]: + return [self.ref] + + def __eq__(self, other): + return isinstance(other, YearTransform) and self.ref == other.ref + + def __hash__(self): + return hash(self.ref) + + +class MonthTransform(Transforms): + """A transform that returns the month of the input value.""" + + def __init__(self, ref: "NamedReference"): + self.ref = ref + + def name(self) -> str: + return Transforms.NAME_OF_MONTH + + def children(self) -> List[Expression]: + return [self.ref] + + def arguments(self) -> List["Expression"]: + return [self.ref] + + def __eq__(self, other): + return isinstance(other, MonthTransform) and self.ref == other.ref + + def __hash__(self): + return hash(self.ref) + + +class DayTransform(Transforms): + """A transform that returns the day of the input value.""" + + def __init__(self, ref: "NamedReference"): + self.ref = ref + + def name(self) -> str: + return Transforms.NAME_OF_DAY + + def children(self) -> List[Expression]: + return [self.ref] + + def arguments(self) -> List["Expression"]: + return [self.ref] + + def __eq__(self, other): + return isinstance(other, DayTransform) and self.ref == other.ref + + def __hash__(self): + return hash(self.ref) + + +class HourTransform(Transforms): + """A transform that returns the hour of the input value.""" + + def __init__(self, ref: "NamedReference"): + self.ref = ref + + def name(self) -> str: + return Transforms.NAME_OF_HOUR + + def children(self) -> List[Expression]: + return [self.ref] + + def arguments(self) -> List["Expression"]: + return [self.ref] + + def __eq__(self, other): + return isinstance(other, HourTransform) and self.ref == other.ref + + def __hash__(self): + return hash(self.ref) + + +class BucketTransform(Transforms): + """A transform that returns the bucket of the input value.""" + + def __init__(self, num_buckets: int, fields: List["NamedReference"]): + self._num_buckets = num_buckets + self.fields = fields + + @property + def num_buckets(self) -> int: + return self._num_buckets + + @property + def field_names(self) -> List[str]: + return [ + field_name for field in self.fields for field_name in field.field_name() + ] + + def name(self) -> str: + return Transforms.NAME_OF_BUCKET + + def arguments(self) -> List["Expression"]: + return [str(Literals.integer_literal(self.num_buckets))] + [ + field_name for field in self.fields for field_name in field.field_name() + ] + + def __eq__(self, other): + if not isinstance(other, BucketTransform): + return False + return ( + self.num_buckets == other.num_buckets + and self.field_names == other.field_names + ) + + def __hash__(self): + return hash((self.num_buckets, *(str(field) for field in self.fields))) + + +class TruncateTransform(Transforms): + """A transform that returns the truncated value of the input value with the given width.""" + + def __init__(self, width: int, field: "NamedReference"): + self._width = width + self.field = field + + @property + def width(self) -> int: + return self._width + + @property + def field_name(self) -> List[str]: + return self.field.field_name() + + def name(self) -> str: + return Transforms.NAME_OF_TRUNCATE + + def arguments(self) -> List["Expression"]: + return [self.width, self.field] + + def __eq__(self, other): + return ( + isinstance(other, TruncateTransform) + and self.width == other.width + and self.field == other.field + ) + + def __hash__(self): + return hash((self.width, self.field)) + + +class ListTransform(Transforms): + """A transform that includes multiple fields in a list.""" + + def __init__( + self, + fields: List["NamedReference"], + assignments: List["ListPartition"] = None, + ): + if assignments is None: + assignments = [] + self.fields = fields + self.assignments = assignments + + @property + def field_names(self) -> List[List[str]]: + return [field.field_name() for field in self.fields] + + def name(self) -> str: + return Transforms.NAME_OF_LIST + + def arguments(self) -> List["Expression"]: + return self.fields + + def assignments(self) -> List["ListPartition"]: + return self.assignments + + def __eq__(self, other): + return isinstance(other, ListTransform) and self.fields == other.fields + + def __hash__(self): + return hash(tuple(self.fields)) + + +class RangeTransform(Transforms): + """A transform that returns the range of the input value.""" + + def __init__( + self, field: "NamedReference", assignments: List["RangePartition"] = None + ): + if assignments is None: + assignments = [] + self.field = field + self.assignments = assignments + + @property + def field_name(self) -> List[str]: + return self.field.field_name() + + def name(self) -> str: + return Transforms.NAME_OF_RANGE + + def arguments(self) -> List["Expression"]: + return [self.field] + + def assignments(self) -> List["RangePartition"]: + return self.assignments + + def __eq__(self, other): + return isinstance(other, RangeTransform) and self.field == other.field + + def __hash__(self): + return hash(self.field) + + +class ApplyTransform(Transforms): + """A transform that applies a function to the input value.""" + + def __init__(self, name: str, arguments: List["Expression"]): + self._name = name + self._arguments = list(arguments) + + def name(self) -> str: + return self._name + + def arguments(self) -> List["Expression"]: + return self._arguments + + def __eq__(self, other): + return ( + isinstance(other, ApplyTransform) + and self.name() == other.name() + and self.arguments() == other.arguments() + ) + + def __hash__(self): + return hash((self.name(), tuple(self.arguments()))) diff --git a/clients/client-python/tests/unittests/test_named_reference.py b/clients/client-python/tests/unittests/test_named_reference.py index b76f6cec8c9..a9942aec7fc 100644 --- a/clients/client-python/tests/unittests/test_named_reference.py +++ b/clients/client-python/tests/unittests/test_named_reference.py @@ -22,7 +22,7 @@ class TestNamedReference(unittest.TestCase): def test_field_reference_creation(self): field = FieldReference(["student", "name"]) - self.assertEqual(field.field_names(), ["student", "name"]) + self.assertEqual(field.field_name(), ["student", "name"]) self.assertEqual(str(field), "student.name") def test_field_reference_equality(self): @@ -33,7 +33,7 @@ def test_field_reference_equality(self): def test_named_reference_static_methods(self): ref = NamedReference.field(["student", "name"]) - self.assertEqual(ref.field_names(), ["student", "name"]) + self.assertEqual(ref.field_name(), ["student", "name"]) ref2 = NamedReference.field_from_column("student") - self.assertEqual(ref2.field_names(), ["student"]) + self.assertEqual(ref2.field_name(), ["student"]) diff --git a/clients/client-python/tests/unittests/test_tranforms.py b/clients/client-python/tests/unittests/test_tranforms.py new file mode 100644 index 00000000000..52d2072c51b --- /dev/null +++ b/clients/client-python/tests/unittests/test_tranforms.py @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from gravitino.api.expressions.transforms import ( + Transforms, + IdentityTransform, + YearTransform, + MonthTransform, + DayTransform, + HourTransform, + BucketTransform, + TruncateTransform, + ListTransform, + RangeTransform, + ApplyTransform, +) +from gravitino.api.expressions.named_reference import NamedReference, FieldReference +from gravitino.api.expressions.literals import Literals + + +class TestTransforms(unittest.TestCase): + + def setUp(self): + self.ref = NamedReference.field(["some_field"]) + + def test_identity_transform(self): + transform = Transforms.identity(["some_field"]) + self.assertIsInstance(transform, IdentityTransform) + self.assertEqual(transform.name(), "identity") + self.assertEqual(transform.arguments(), [self.ref]) + + def test_year_transform(self): + transform = Transforms.year(["some_field"]) + self.assertIsInstance(transform, YearTransform) + self.assertEqual(transform.name(), "year") + self.assertEqual(transform.arguments(), [self.ref]) + + def test_month_transform(self): + transform = Transforms.month(["some_field"]) + self.assertIsInstance(transform, MonthTransform) + self.assertEqual(transform.name(), "month") + self.assertEqual(transform.arguments(), [self.ref]) + + def test_day_transform(self): + transform = Transforms.day(["some_field"]) + self.assertIsInstance(transform, DayTransform) + self.assertEqual(transform.name(), "day") + self.assertEqual(transform.arguments(), [self.ref]) + + def test_hour_transform(self): + transform = Transforms.hour(["some_field"]) + self.assertIsInstance(transform, HourTransform) + self.assertEqual(transform.name(), "hour") + self.assertEqual(transform.arguments(), [self.ref]) + + def test_bucket_transform(self): + field1 = FieldReference(["field1"]) + field2 = FieldReference(["field2"]) + transform = BucketTransform(num_buckets=5, fields=[field1, field2]) + self.assertEqual(transform.num_buckets, 5) + self.assertEqual(transform.field_names, ["field1", "field2"]) + self.assertEqual(transform.name(), "bucket") + expected_arguments = [str(Literals.integer_literal(5))] + ["field1", "field2"] + self.assertEqual(transform.arguments(), expected_arguments) + + def test_truncate_transform(self): + transform = Transforms.truncate(5, "some_field") + self.assertIsInstance(transform, TruncateTransform) + self.assertEqual(transform.name(), "truncate") + self.assertEqual(transform.arguments(), [Literals.integer_literal(5), self.ref]) + + def test_list_transform(self): + transform = Transforms.list([["field1"], ["field2"]]) + self.assertIsInstance(transform, ListTransform) + self.assertEqual(transform.name(), "list") + self.assertEqual(len(transform.arguments()), 2) # Expecting 2 field names + + def test_range_transform(self): + transform = Transforms.range(["some_field"]) + self.assertIsInstance(transform, RangeTransform) + self.assertEqual(transform.name(), "range") + self.assertEqual(transform.arguments(), [self.ref]) + + def test_apply_transform(self): + transform = Transforms.apply("my_function", self.ref) + self.assertIsInstance(transform, ApplyTransform) + self.assertEqual(transform.name(), "my_function") + self.assertEqual(transform.arguments(), [self.ref]) From 2bc898362a71db8c461fa37273c07de69b1ed4bc Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Sun, 24 Nov 2024 23:11:35 -0600 Subject: [PATCH 07/14] Fix a f string bug for distributions.py --- .../client-python/gravitino/api/expressions/distributions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/client-python/gravitino/api/expressions/distributions.py b/clients/client-python/gravitino/api/expressions/distributions.py index 420fb16ee0f..bb7b0ee20e3 100644 --- a/clients/client-python/gravitino/api/expressions/distributions.py +++ b/clients/client-python/gravitino/api/expressions/distributions.py @@ -38,8 +38,8 @@ def get_by_name(name: str): if name in ("EVEN", "RANDOM"): return Strategy.EVEN raise ValueError( - f"Invalid distribution strategy: {name}. Valid values are: {', '.join( - [Strategy.NONE, Strategy.HASH, Strategy.RANGE, Strategy.EVEN])}" + f"Invalid distribution strategy: {name}. " + f"Valid values are: {', '.join([Strategy.NONE, Strategy.HASH, Strategy.RANGE, Strategy.EVEN])}" ) From 4bac4564bedbe5af0ea62d8a3490dfc4ff3bc89a Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Sun, 24 Nov 2024 23:26:09 -0600 Subject: [PATCH 08/14] Update the comment for Partition class and so on --- clients/client-python/gravitino/api/expressions/transforms.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clients/client-python/gravitino/api/expressions/transforms.py b/clients/client-python/gravitino/api/expressions/transforms.py index add6476e670..ad111772311 100644 --- a/clients/client-python/gravitino/api/expressions/transforms.py +++ b/clients/client-python/gravitino/api/expressions/transforms.py @@ -22,22 +22,26 @@ from gravitino.api.expressions.literals import Literals +# set up for temporal usage, need to delete when we implement Partition class class Partition(ABC): """Base class for partitioning types.""" pass +# set up for temporal usage, need to delete when we implement Partitions class class Partitions: EMPTY_PARTITIONS = [] # Default empty partitions list +# set up for temporal usage, need to delete when we implement ListPartition class class ListPartition(Partition): """Represents list partitioning.""" pass +# set up for temporal usage, need to delete when we implement RangePartition class class RangePartition(Partition): """Represents range partitioning.""" From 40a14ee3fa4923dcbca29327f86cd63d2b851ffc Mon Sep 17 00:00:00 2001 From: Xun Date: Mon, 25 Nov 2024 18:58:58 +0800 Subject: [PATCH 09/14] improve model structure --- .../api/expressions/Literals/__init__.py | 0 .../api/expressions/Literals/literal.py | 39 ++++ .../expressions/{ => Literals}/literals.py | 80 ++++---- .../api/expressions/distributions.py | 122 ------------ .../api/expressions/distributions/__init__.py | 0 .../expressions/distributions/distribution.py | 64 ++++++ .../distributions/distributions.py | 136 +++++++++++++ .../api/expressions/distributions/strategy.py | 40 ++++ .../gravitino/api/expressions/expression.py | 14 +- .../api/expressions/function_expression.py | 22 ++- .../api/expressions/named_reference.py | 12 +- .../api/expressions/partitions/__init__.py | 0 .../api/expressions/partitions/partition.py | 27 +++ .../api/expressions/partitions/partitions.py | 35 ++++ .../api/expressions/sorts/__init__.py | 0 .../api/expressions/sorts/null_ordering.py | 32 +++ .../api/expressions/sorts/sort_direction.py | 43 ++++ .../api/expressions/sorts/sort_order.py | 41 ++++ .../{sorts.py => sorts/sort_orders.py} | 88 ++------- .../api/expressions/transforms/__init__.py | 0 .../api/expressions/transforms/transform.py | 61 ++++++ .../{ => transforms}/transforms.py | 183 +++++------------- .../api/expressions/unparsed_expression.py | 2 +- .../tests/unittests/test_distributions.py | 5 +- .../tests/unittests/test_literals.py | 5 +- .../tests/unittests/test_sorts.py | 18 +- .../tests/unittests/test_tranforms.py | 7 +- 27 files changed, 671 insertions(+), 405 deletions(-) create mode 100644 clients/client-python/gravitino/api/expressions/Literals/__init__.py create mode 100644 clients/client-python/gravitino/api/expressions/Literals/literal.py rename clients/client-python/gravitino/api/expressions/{ => Literals}/literals.py (73%) delete mode 100644 clients/client-python/gravitino/api/expressions/distributions.py create mode 100644 clients/client-python/gravitino/api/expressions/distributions/__init__.py create mode 100644 clients/client-python/gravitino/api/expressions/distributions/distribution.py create mode 100644 clients/client-python/gravitino/api/expressions/distributions/distributions.py create mode 100644 clients/client-python/gravitino/api/expressions/distributions/strategy.py create mode 100644 clients/client-python/gravitino/api/expressions/partitions/__init__.py create mode 100644 clients/client-python/gravitino/api/expressions/partitions/partition.py create mode 100644 clients/client-python/gravitino/api/expressions/partitions/partitions.py create mode 100644 clients/client-python/gravitino/api/expressions/sorts/__init__.py create mode 100644 clients/client-python/gravitino/api/expressions/sorts/null_ordering.py create mode 100644 clients/client-python/gravitino/api/expressions/sorts/sort_direction.py create mode 100644 clients/client-python/gravitino/api/expressions/sorts/sort_order.py rename clients/client-python/gravitino/api/expressions/{sorts.py => sorts/sort_orders.py} (54%) create mode 100644 clients/client-python/gravitino/api/expressions/transforms/__init__.py create mode 100644 clients/client-python/gravitino/api/expressions/transforms/transform.py rename clients/client-python/gravitino/api/expressions/{ => transforms}/transforms.py (71%) diff --git a/clients/client-python/gravitino/api/expressions/Literals/__init__.py b/clients/client-python/gravitino/api/expressions/Literals/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/clients/client-python/gravitino/api/expressions/Literals/literal.py b/clients/client-python/gravitino/api/expressions/Literals/literal.py new file mode 100644 index 00000000000..3a8c3ecfec1 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/Literals/literal.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from abc import abstractmethod +from typing import List + +from gravitino.api.expressions.expression import Expression + + +class Literal(Expression): + """ + Represents a constant literal value in the public expression API. + """ + + @abstractmethod + def value(self): + """The literal value.""" + pass + + @abstractmethod + def data_type(self): + """The data type of the literal.""" + pass + + def children(self) -> List[Expression]: + return Expression.EMPTY_EXPRESSION diff --git a/clients/client-python/gravitino/api/expressions/literals.py b/clients/client-python/gravitino/api/expressions/Literals/literals.py similarity index 73% rename from clients/client-python/gravitino/api/expressions/literals.py rename to clients/client-python/gravitino/api/expressions/Literals/literals.py index c28466995a4..68e61d4f133 100644 --- a/clients/client-python/gravitino/api/expressions/literals.py +++ b/clients/client-python/gravitino/api/expressions/Literals/literals.py @@ -15,42 +15,25 @@ # specific language governing permissions and limitations # under the License. -from abc import ABC, abstractmethod +from decimal import Decimal from typing import Union from datetime import date, time, datetime +from gravitino.api.expressions.literals.literal import Literal -# Abstract base class for Literal -class Literal(ABC): - """ - Represents a constant literal value in the expression API. - """ - @abstractmethod - def value(self) -> Union[int, float, str, datetime, time, date, bool]: - """ - Returns the literal value. - """ - pass - - @abstractmethod - def data_type(self) -> str: - """ - Returns the data type of the literal. - """ - pass - - def children(self): - """ - Returns the child expressions. By default, this is an empty list. - """ - return [] +class LiteralImpl(Literal): + """Creates a literal with the given type value.""" + _value: Union[int, float, str, datetime, time, date, bool, Decimal, None] + _data_type: ( + str # TODO: Need implement `api/src/main/java/org/apache/gravitino/rel/types` + ) -# Concrete implementation of Literal -class LiteralImpl(Literal): def __init__( - self, value: Union[int, float, str, datetime, time, date, bool], data_type: str + self, + value: Union[int, float, str, datetime, time, date, bool, Decimal, None], + data_type: str, ): self._value = value self._data_type = data_type @@ -73,11 +56,14 @@ def __str__(self): return f"LiteralImpl(value={self._value}, data_type={self._data_type})" -# Helper class to create literals class Literals: + """The helper class to create literals to pass into Apache Gravitino.""" + + NULL = LiteralImpl(None, "NullType") + @staticmethod - def null_literal() -> Literal: - return LiteralImpl(None, "NullType") + def of(value, data_type) -> Literal: + return LiteralImpl(value, data_type) @staticmethod def boolean_literal(value: bool) -> Literal: @@ -87,18 +73,34 @@ def boolean_literal(value: bool) -> Literal: def byte_literal(value: int) -> Literal: return LiteralImpl(value, "Byte") + @staticmethod + def unsigned_byte_literal(value: int) -> Literal: + return LiteralImpl(value, "Unsigned Byte") + @staticmethod def short_literal(value: int) -> Literal: return LiteralImpl(value, "Short") + @staticmethod + def unsigned_short_literal(value: int) -> Literal: + return LiteralImpl(value, "Unsigned Short") + @staticmethod def integer_literal(value: int) -> Literal: return LiteralImpl(value, "Integer") + @staticmethod + def unsigned_integer_literal(value: int) -> Literal: + return LiteralImpl(value, "Unsigned Integer") + @staticmethod def long_literal(value: int) -> Literal: return LiteralImpl(value, "Long") + @staticmethod + def unsigned_long_literal(value: Decimal) -> Literal: + return LiteralImpl(value, "Unsigned Long") + @staticmethod def float_literal(value: float) -> Literal: return LiteralImpl(value, "Float") @@ -111,14 +113,6 @@ def double_literal(value: float) -> Literal: def decimal_literal(value: float) -> Literal: return LiteralImpl(value, "Decimal") - @staticmethod - def string_literal(value: str) -> Literal: - return LiteralImpl(value, "String") - - @staticmethod - def varchar_literal(length: int, value: str) -> Literal: - return LiteralImpl(value, f"Varchar({length})") - @staticmethod def date_literal(value: date) -> Literal: return LiteralImpl(value, "Date") @@ -134,3 +128,11 @@ def timestamp_literal(value: datetime) -> Literal: @staticmethod def timestamp_literal_from_string(value: str) -> Literal: return Literals.timestamp_literal(datetime.fromisoformat(value)) + + @staticmethod + def string_literal(value: str) -> Literal: + return LiteralImpl(value, "String") + + @staticmethod + def varchar_literal(length: int, value: str) -> Literal: + return LiteralImpl(value, f"Varchar({length})") diff --git a/clients/client-python/gravitino/api/expressions/distributions.py b/clients/client-python/gravitino/api/expressions/distributions.py deleted file mode 100644 index bb7b0ee20e3..00000000000 --- a/clients/client-python/gravitino/api/expressions/distributions.py +++ /dev/null @@ -1,122 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List, Tuple -from gravitino.api.expressions.named_reference import NamedReference - - -# Enum equivalent in Python for Strategy -class Strategy: - NONE = "NONE" - HASH = "HASH" - RANGE = "RANGE" - EVEN = "EVEN" - - @staticmethod - def get_by_name(name: str): - name = name.upper() - if name == "NONE": - return Strategy.NONE - if name == "HASH": - return Strategy.HASH - if name == "RANGE": - return Strategy.RANGE - if name in ("EVEN", "RANDOM"): - return Strategy.EVEN - raise ValueError( - f"Invalid distribution strategy: {name}. " - f"Valid values are: {', '.join([Strategy.NONE, Strategy.HASH, Strategy.RANGE, Strategy.EVEN])}" - ) - - -# Distribution interface equivalent -class Distribution: - def strategy(self) -> str: - raise NotImplementedError - - def number(self) -> int: - raise NotImplementedError - - def expressions(self) -> List: - raise NotImplementedError - - def children(self) -> List: - return self.expressions() - - def equals(self, distribution) -> bool: - return ( - isinstance(distribution, Distribution) - and self.strategy() == distribution.strategy() - and self.number() == distribution.number() - and self.expressions() == distribution.expressions() - ) - - -# Implementation of Distribution -class DistributionImpl(Distribution): - def __init__(self, strategy: str, number: int, expressions: List): - self._strategy = strategy - self._number = number - self._expressions = expressions - - def strategy(self) -> str: - return self._strategy - - def number(self) -> int: - return self._number - - def expressions(self) -> List: - return self._expressions - - def __str__(self): - return f"DistributionImpl(strategy={self._strategy}, number={self._number}, expressions={self._expressions})" - - def __eq__(self, other): - if not isinstance(other, DistributionImpl): - return False - return ( - self._strategy == other._strategy - and self._number == other._number - and self._expressions == other._expressions - ) - - def __hash__(self): - return hash((self._strategy, self._number, tuple(self._expressions))) - - -# Helper methods to create distributions -class Distributions: - NONE = DistributionImpl(Strategy.NONE, 0, []) - HASH = DistributionImpl(Strategy.HASH, 0, []) - RANGE = DistributionImpl(Strategy.RANGE, 0, []) - - @staticmethod - def even(number: int, *expressions) -> Distribution: - return DistributionImpl(Strategy.EVEN, number, list(expressions)) - - @staticmethod - def hash(number: int, *expressions) -> Distribution: - return DistributionImpl(Strategy.HASH, number, list(expressions)) - - @staticmethod - def of(strategy: str, number: int, *expressions) -> Distribution: - return DistributionImpl(strategy, number, list(expressions)) - - @staticmethod - def fields(strategy: str, number: int, *field_names: Tuple[str]) -> Distribution: - expressions = [NamedReference.field(field_name) for field_name in field_names] - return Distributions.of(strategy, number, *expressions) diff --git a/clients/client-python/gravitino/api/expressions/distributions/__init__.py b/clients/client-python/gravitino/api/expressions/distributions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/clients/client-python/gravitino/api/expressions/distributions/distribution.py b/clients/client-python/gravitino/api/expressions/distributions/distribution.py new file mode 100644 index 00000000000..bdf0f10a8b3 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/distributions/distribution.py @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import List + +from gravitino.api.expressions.distributions.strategy import Strategy +from gravitino.api.expressions.expression import Expression + + +class Distribution(Expression): + """An interface that defines how data is distributed across partitions.""" + + def strategy(self) -> Strategy: + """ + Return the distribution strategy name. + """ + raise NotImplementedError + + def number(self) -> int: + """ + Return the number of buckets/distribution. + For example, if the distribution strategy is HASH + * and the number is 10, then the data is distributed across 10 buckets. + """ + raise NotImplementedError + + def expressions(self) -> List[Expression]: + """Return the expressions passed to the distribution function.""" + raise NotImplementedError + + def children(self) -> List[Expression]: + return self.expressions() + + def equals(self, distribution) -> bool: + """ + Indicates whether some other object is "equal to" this one. + + Args: + distribution The reference distribution object with which to compare. + + Returns: + Returns true if this object is the same as the obj argument; false otherwise. + """ + if distribution is None: + return False + + return ( + self.strategy() == distribution.strategy() + and self.number() == distribution.number() + and self.expressions() == distribution.expressions() + ) diff --git a/clients/client-python/gravitino/api/expressions/distributions/distributions.py b/clients/client-python/gravitino/api/expressions/distributions/distributions.py new file mode 100644 index 00000000000..495a580c11d --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/distributions/distributions.py @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Tuple + +from gravitino.api.expressions.distributions.distribution import Distribution +from gravitino.api.expressions.distributions.strategy import Strategy +from gravitino.api.expressions.expression import Expression +from gravitino.api.expressions.named_reference import NamedReference + + +class DistributionImpl(Distribution): + """ + Create a distribution on columns. Like distribute by (a) or (a, b), for complex like + distributing by (func(a), b) or (func(a), func(b)) + """ + + _strategy: Strategy + _number: int + _expressions: List[Expression] + + def __init__(self, strategy: Strategy, number: int, expressions: List[Expression]): + self._strategy = strategy + self._number = number + self._expressions = expressions + + def strategy(self) -> Strategy: + return self._strategy + + def number(self) -> int: + return self._number + + def expressions(self) -> List[Expression]: + return self._expressions + + def __str__(self): + return f"DistributionImpl(strategy={self._strategy}, number={self._number}, expressions={self._expressions})" + + def __eq__(self, other): + if self is other: + return True + if other is None or not isinstance(other, DistributionImpl): + return False + return ( + self._strategy == other.strategy() + and self._number == other.number() + and self._expressions == other.expressions() + ) + + def __hash__(self): + return hash((self._strategy, self._number, tuple(self._expressions))) + + +# Helper methods to create distributions +class Distributions: + NONE = DistributionImpl(Strategy.NONE, 0, []) + """NONE is used to indicate that there is no distribution.""" + HASH = DistributionImpl(Strategy.HASH, 0, []) + """List bucketing strategy hash, TODO: #1505 Separate the bucket number from the Distribution.""" + RANGE = DistributionImpl(Strategy.RANGE, 0, []) + """List bucketing strategy range, TODO: #1505 Separate the bucket number from the Distribution.""" + + @staticmethod + def even(number: int, *expressions) -> Distribution: + """ + Create a distribution by evenly distributing the data across the number of buckets. + + :param number: The number of buckets + :param expressions: The expressions to distribute by + :return: The created even distribution + """ + return DistributionImpl(Strategy.EVEN, number, list(expressions)) + + @staticmethod + def hash(number: int, *expressions) -> Distribution: + """ + Create a distribution by hashing the data across the number of buckets. + + :param number: The number of buckets + :param expressions: The expressions to distribute by + :return: The created hash distribution + """ + return DistributionImpl(Strategy.HASH, number, list(expressions)) + + @staticmethod + def of(strategy: Strategy, number: int, *expressions) -> Distribution: + """ + Create a distribution by the given strategy. + + :param strategy: The strategy to use + :param number: The number of buckets + :param expressions: The expressions to distribute by + :return: The created distribution + """ + return DistributionImpl(strategy, number, list(expressions)) + + @staticmethod + def fields( + strategy: Strategy, number: int, *field_names: Tuple[str] + ) -> Distribution: + """ + Create a distribution on columns. Like distribute by (a) or (a, b), for complex like + distributing by (func(a), b) or (func(a), func(b)), please use DistributionImpl.Builder to create. + + NOTE: a, b, c are column names. + + SQL syntax: distribute by hash(a, b) buckets 5 + fields(Strategy.HASH, 5, "a", "b") + + SQL syntax: distribute by hash(a, b, c) buckets 10 + fields(Strategy.HASH, 10, "a", "b", "c") + + SQL syntax: distribute by EVEN(a) buckets 128 + fields(Strategy.EVEN, 128, "a") + + :param strategy: The strategy to use. + :param number: The number of buckets. + :param field_names: The field names to distribute by. + :return: The created distribution. + """ + expressions = [NamedReference.field(field_name) for field_name in field_names] + return Distributions.of(strategy, number, *expressions) diff --git a/clients/client-python/gravitino/api/expressions/distributions/strategy.py b/clients/client-python/gravitino/api/expressions/distributions/strategy.py new file mode 100644 index 00000000000..e37d6748c8a --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/distributions/strategy.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# Enum equivalent in Python for Strategy +class Strategy: + NONE = "NONE" + HASH = "HASH" + RANGE = "RANGE" + EVEN = "EVEN" + + @staticmethod + def get_by_name(name: str): + name = name.upper() + if name == "NONE": + return Strategy.NONE + if name == "HASH": + return Strategy.HASH + if name == "RANGE": + return Strategy.RANGE + if name in ("EVEN", "RANDOM"): + return Strategy.EVEN + raise ValueError( + f"Invalid distribution strategy: {name}. " + f"Valid values are: {', '.join([Strategy.NONE, Strategy.HASH, Strategy.RANGE, Strategy.EVEN])}" + ) diff --git a/clients/client-python/gravitino/api/expressions/expression.py b/clients/client-python/gravitino/api/expressions/expression.py index 0ee215706e8..aeedcab6302 100644 --- a/clients/client-python/gravitino/api/expressions/expression.py +++ b/clients/client-python/gravitino/api/expressions/expression.py @@ -16,7 +16,7 @@ # under the License. from __future__ import annotations -from abc import ABC +from abc import ABC, abstractmethod from typing import List, Set, TYPE_CHECKING if TYPE_CHECKING: @@ -27,10 +27,20 @@ class Expression(ABC): """Base class of the public logical expression API.""" EMPTY_EXPRESSION: List[Expression] = [] + """ + `EMPTY_EXPRESSION` is only used as an input when the default `children` method builds the result. + """ + EMPTY_NAMED_REFERENCE: List[NamedReference] = [] + """ + `EMPTY_NAMED_REFERENCE` is only used as an input when the default `references` method builds + the result array to avoid repeatedly allocating an empty array. + """ + + @abstractmethod def children(self) -> List[Expression]: """Returns a list of the children of this node. Children should not change.""" - return self.EMPTY_EXPRESSION + pass def references(self) -> List[NamedReference]: """Returns a list of fields or columns that are referenced by this expression.""" diff --git a/clients/client-python/gravitino/api/expressions/function_expression.py b/clients/client-python/gravitino/api/expressions/function_expression.py index 92ced3b5b88..4deb23afaa8 100644 --- a/clients/client-python/gravitino/api/expressions/function_expression.py +++ b/clients/client-python/gravitino/api/expressions/function_expression.py @@ -18,7 +18,7 @@ from __future__ import annotations from abc import abstractmethod -from typing import List, Union +from typing import List from gravitino.api.expressions.expression import Expression @@ -61,6 +61,9 @@ class FuncExpressionImpl(FunctionExpression): A concrete implementation of the FunctionExpression interface. """ + _function_name: str + _arguments: List[Expression] + def __init__(self, function_name: str, arguments: List[Expression]): super().__init__() self._function_name = function_name @@ -78,14 +81,15 @@ def __str__(self) -> str: arguments_str = ", ".join(map(str, self._arguments)) return f"{self._function_name}({arguments_str})" - def __eq__(self, other: Union[FuncExpressionImpl, object]) -> bool: - if isinstance(other, FuncExpressionImpl): - return ( - self._function_name == other._function_name - and self._arguments == other._arguments - ) - # TODO: Consider handling other cases or adding custom equality checks - return False + def __eq__(self, other: FuncExpressionImpl) -> bool: + if self is other: + return True + if other is None or self.__class__ is not other.__class__: + return False + return ( + self._function_name == other.function_name() + and self._arguments == other.arguments() + ) def __hash__(self) -> int: return hash((self._function_name, tuple(self._arguments))) diff --git a/clients/client-python/gravitino/api/expressions/named_reference.py b/clients/client-python/gravitino/api/expressions/named_reference.py index a4dddf5c619..ad936cc8b9f 100644 --- a/clients/client-python/gravitino/api/expressions/named_reference.py +++ b/clients/client-python/gravitino/api/expressions/named_reference.py @@ -27,7 +27,15 @@ class NamedReference(Expression): @staticmethod def field(field_name: List[str]) -> FieldReference: - """Returns a FieldReference for the given field name(s).""" + """ + Returns a FieldReference for the given field name(s). The array of field name(s) is + used to reference nested fields. For example, if we have a struct column named "student" with a + data type of StructType{"name": StringType, "age": IntegerType}, we can reference the field + "name" by calling field("student", "name"). + + @param field_name the field name(s) + @return a FieldReference for the given field name(s) + """ return FieldReference(field_name) @staticmethod @@ -56,6 +64,8 @@ class FieldReference(NamedReference): A NamedReference that references a field or column. """ + _field_names: List[str] + def __init__(self, field_names: List[str]): super().__init__() self._field_names = field_names diff --git a/clients/client-python/gravitino/api/expressions/partitions/__init__.py b/clients/client-python/gravitino/api/expressions/partitions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/clients/client-python/gravitino/api/expressions/partitions/partition.py b/clients/client-python/gravitino/api/expressions/partitions/partition.py new file mode 100644 index 00000000000..ba0e5def2c5 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/partitions/partition.py @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import Dict + + +class Partition: + def name(self) -> str: + """Return the name of the partition.""" + pass + + def properties(self) -> Dict[str, str]: + """Return the properties of the partition, such as statistics, location, etc.""" + pass diff --git a/clients/client-python/gravitino/api/expressions/partitions/partitions.py b/clients/client-python/gravitino/api/expressions/partitions/partitions.py new file mode 100644 index 00000000000..d6dec1eb26b --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/partitions/partitions.py @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from gravitino.api.expressions.partitions.partition import Partition + + +class Partitions: + EMPTY_PARTITIONS = [] + """Default empty partitions list""" + + +class ListPartition(Partition): + """Represents list partitioning.""" + + pass + + +class RangePartition(Partition): + """Represents range partitioning.""" + + pass diff --git a/clients/client-python/gravitino/api/expressions/sorts/__init__.py b/clients/client-python/gravitino/api/expressions/sorts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/clients/client-python/gravitino/api/expressions/sorts/null_ordering.py b/clients/client-python/gravitino/api/expressions/sorts/null_ordering.py new file mode 100644 index 00000000000..71465457bb7 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/sorts/null_ordering.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from enum import Enum + + +class NullOrdering(Enum): + NULLS_FIRST = "nulls_first" + NULLS_LAST = "nulls_last" + + @staticmethod + def from_string(s: str): + try: + return NullOrdering[s.upper()] + except KeyError as exc: + raise ValueError(f"Invalid null ordering: {s}") from exc + + def __str__(self): + return self.value diff --git a/clients/client-python/gravitino/api/expressions/sorts/sort_direction.py b/clients/client-python/gravitino/api/expressions/sorts/sort_direction.py new file mode 100644 index 00000000000..a694f0f70fb --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/sorts/sort_direction.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from enum import Enum + +from gravitino.api.expressions.sorts.null_ordering import NullOrdering + + +class SortDirection(Enum): + ASCENDING = "asc" + DESCENDING = "desc" + + def __init__(self, value): + self._default_null_ordering = ( + NullOrdering.NULLS_FIRST if value == "asc" else NullOrdering.NULLS_LAST + ) + + def default_null_ordering(self) -> NullOrdering: + return self._default_null_ordering + + @staticmethod + def from_string(s: str) -> "SortDirection": + if s.lower() == "asc": + return SortDirection.ASCENDING + if s.lower() == "desc": + return SortDirection.DESCENDING + raise ValueError(f"Unexpected sort direction: {s}") + + def __str__(self): + return self.value diff --git a/clients/client-python/gravitino/api/expressions/sorts/sort_order.py b/clients/client-python/gravitino/api/expressions/sorts/sort_order.py new file mode 100644 index 00000000000..23ae460caef --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/sorts/sort_order.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from abc import abstractmethod + +from gravitino.api.expressions.expression import Expression + + +class SortOrder(Expression): + """Represents a sort order in the public expression API.""" + + @abstractmethod + def expression(self): + pass + + @abstractmethod + def direction(self): + pass + + @abstractmethod + def null_ordering(self): + pass + + def children(self): + """ + Returns the child expressions. By default, this is the expression itself in a list. + """ + return [self.expression()] diff --git a/clients/client-python/gravitino/api/expressions/sorts.py b/clients/client-python/gravitino/api/expressions/sorts/sort_orders.py similarity index 54% rename from clients/client-python/gravitino/api/expressions/sorts.py rename to clients/client-python/gravitino/api/expressions/sorts/sort_orders.py index 0442310492e..0d67984d7c9 100644 --- a/clients/client-python/gravitino/api/expressions/sorts.py +++ b/clients/client-python/gravitino/api/expressions/sorts/sort_orders.py @@ -15,75 +15,21 @@ # specific language governing permissions and limitations # under the License. -from enum import Enum -from abc import ABC, abstractmethod -from typing import Optional +from typing import Optional, List +from gravitino.api.expressions.expression import Expression +from gravitino.api.expressions.sorts.null_ordering import NullOrdering +from gravitino.api.expressions.sorts.sort_direction import SortDirection +from gravitino.api.expressions.sorts.sort_order import SortOrder -# Enum for NullOrdering -class NullOrdering(Enum): - NULLS_FIRST = "nulls_first" - NULLS_LAST = "nulls_last" - @staticmethod - def from_string(s: str): - try: - return NullOrdering[s.upper()] - except KeyError as exc: - raise ValueError(f"Invalid null ordering: {s}") from exc - - def __str__(self): - return self.value - - -# Enum for SortDirection -class SortDirection(Enum): - ASCENDING = "asc" - DESCENDING = "desc" - - def __init__(self, value): - self._default_null_ordering = ( - NullOrdering.NULLS_FIRST if value == "asc" else NullOrdering.NULLS_LAST - ) - - def default_null_ordering(self) -> NullOrdering: - return self._default_null_ordering - - @staticmethod - def from_string(s: str) -> "SortDirection": - if s.lower() == "asc": - return SortDirection.ASCENDING - if s.lower() == "desc": - return SortDirection.DESCENDING - raise ValueError(f"Unexpected sort direction: {s}") - - def __str__(self): - return self.value - - -# Abstract base class for SortOrder -class SortOrder(ABC): - @abstractmethod - def expression(self): - pass - - @abstractmethod - def direction(self): - pass - - @abstractmethod - def null_ordering(self): - pass - - def children(self): - """ - Returns the child expressions. By default, this is the expression itself in a list. - """ - return [self.expression()] +class SortImpl(SortOrder): + """Create a sort order by the given expression with the given sort direction and null ordering.""" + _expression: Expression + _direction: SortDirection + _null_ordering: NullOrdering -# Concrete implementation of SortOrder -class SortImpl(SortOrder): def __init__( self, expression, direction: SortDirection, null_ordering: NullOrdering ): @@ -122,7 +68,7 @@ def __str__(self): # Helper class to create SortOrder instances class SortOrders: - NONE = [] + NONE: List[SortOrder] = [] @staticmethod def ascending(expression) -> SortImpl: @@ -141,15 +87,3 @@ def of( if null_ordering is None: null_ordering = direction.default_null_ordering() return SortImpl(expression, direction, null_ordering) - - @staticmethod - def from_string( - expression, direction_str: str, null_ordering_str: Optional[str] = None - ) -> SortImpl: - direction = SortDirection.from_string(direction_str) - null_ordering = ( - NullOrdering(null_ordering_str) - if null_ordering_str - else direction.default_null_ordering() - ) - return SortImpl(expression, direction, null_ordering) diff --git a/clients/client-python/gravitino/api/expressions/transforms/__init__.py b/clients/client-python/gravitino/api/expressions/transforms/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/clients/client-python/gravitino/api/expressions/transforms/transform.py b/clients/client-python/gravitino/api/expressions/transforms/transform.py new file mode 100644 index 00000000000..9d3de491035 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/transforms/transform.py @@ -0,0 +1,61 @@ +from abc import ABC +from typing import List +from gravitino.api.expressions.expression import Expression +from gravitino.api.expressions.named_reference import NamedReference + +from gravitino.api.expressions.partitions.partition import Partition +from gravitino.api.expressions.partitions.partitions import Partitions + + +class Transform(Expression, ABC): + """Represents a transform function.""" + + def name(self) -> str: + """Returns the transform function name.""" + pass + + def arguments(self) -> List[Expression]: + """Returns the arguments passed to the transform function.""" + pass + + def assignments(self) -> List[Partition]: + """ + Returns the preassigned partitions for the transform. + By default, it returns an empty list of partitions, + as only some transforms like ListTransform and RangeTransform + need to deal with assignments. + """ + return Partitions.EMPTY_PARTITIONS + + def children(self) -> List[Expression]: + """Returns the children expressions. By default, it is the arguments.""" + return self.arguments() + + +class SingleFieldTransform(Transform): + """Base class for transforms on a single field.""" + + def __init__(self, ref: NamedReference): + self.ref = ref + + def field_name(self) -> List[str]: + """Returns the referenced field name as a list of string parts.""" + return self.ref.field_name() + + def references(self) -> List[NamedReference]: + """Returns a list of references (i.e., the field reference).""" + return [self.ref] + + def arguments(self) -> List[Expression]: + """Returns a list of arguments for the transform, which is just `ref`.""" + return [self.ref] + + def __eq__(self, other: object) -> bool: + """Checks equality based on the `ref`.""" + if not isinstance(other, SingleFieldTransform): + return False + return self.ref == other.ref + + def __hash__(self) -> int: + """Generates a hash based on `ref`.""" + return hash(self.ref) diff --git a/clients/client-python/gravitino/api/expressions/transforms.py b/clients/client-python/gravitino/api/expressions/transforms/transforms.py similarity index 71% rename from clients/client-python/gravitino/api/expressions/transforms.py rename to clients/client-python/gravitino/api/expressions/transforms/transforms.py index ad111772311..0261f2ab280 100644 --- a/clients/client-python/gravitino/api/expressions/transforms.py +++ b/clients/client-python/gravitino/api/expressions/transforms/transforms.py @@ -1,121 +1,38 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from abc import ABC from typing import List + from gravitino.api.expressions.expression import Expression +from gravitino.api.expressions.literals.literals import Literals from gravitino.api.expressions.named_reference import NamedReference -from gravitino.api.expressions.literals import Literals - - -# set up for temporal usage, need to delete when we implement Partition class -class Partition(ABC): - """Base class for partitioning types.""" - - pass - - -# set up for temporal usage, need to delete when we implement Partitions class -class Partitions: - EMPTY_PARTITIONS = [] # Default empty partitions list - - -# set up for temporal usage, need to delete when we implement ListPartition class -class ListPartition(Partition): - """Represents list partitioning.""" - - pass - - -# set up for temporal usage, need to delete when we implement RangePartition class -class RangePartition(Partition): - """Represents range partitioning.""" - - pass - - -class Transform(Expression, ABC): - """Represents a transform function.""" - - def name(self) -> str: - """Returns the transform function name.""" - pass - - def arguments(self) -> List[Expression]: - """Returns the arguments passed to the transform function.""" - pass - - def assignments(self) -> List[Partition]: - """ - Returns the preassigned partitions for the transform. - By default, it returns an empty list of partitions, - as only some transforms like ListTransform and RangeTransform - need to deal with assignments. - """ - return Partitions.EMPTY_PARTITIONS - - def children(self) -> List[Expression]: - """Returns the children expressions. By default, it is the arguments.""" - return self.arguments() - - -class SingleFieldTransform(Transform): - """Base class for transforms on a single field.""" - - def __init__(self, ref: NamedReference): - self.ref = ref - - def field_name(self) -> List[str]: - """Returns the referenced field name as a list of string parts.""" - return self.ref.field_name() - - def references(self) -> List[NamedReference]: - """Returns a list of references (i.e., the field reference).""" - return [self.ref] - - def arguments(self) -> List[Expression]: - """Returns a list of arguments for the transform, which is just `ref`.""" - return [self.ref] - - def __eq__(self, other: object) -> bool: - """Checks equality based on the `ref`.""" - if not isinstance(other, SingleFieldTransform): - return False - return self.ref == other.ref - - def __hash__(self) -> int: - """Generates a hash based on `ref`.""" - return hash(self.ref) +from gravitino.api.expressions.partitions.partitions import ( + ListPartition, + RangePartition, +) +from gravitino.api.expressions.transforms.transform import Transform class Transforms(Transform): """Helper methods to create logical transforms to pass into Apache Gravitino.""" - # Constants - EMPTY_TRANSFORM = [] - NAME_OF_IDENTITY = "identity" - NAME_OF_YEAR = "year" - NAME_OF_MONTH = "month" - NAME_OF_DAY = "day" - NAME_OF_HOUR = "hour" - NAME_OF_BUCKET = "bucket" - NAME_OF_TRUNCATE = "truncate" - NAME_OF_LIST = "list" - NAME_OF_RANGE = "range" + EMPTY_TRANSFORM: List[Transform] = [] + """An empty array of transforms.""" + NAME_OF_IDENTITY: str = "identity" + """The name of the identity transform.""" + NAME_OF_YEAR: str = "year" + """The name of the year transform. The year transform returns the year of the input value.""" + NAME_OF_MONTH: str = "month" + """The name of the month transform. The month transform returns the month of the input value.""" + NAME_OF_DAY: str = "day" + """The name of the day transform. The day transform returns the day of the input value.""" + NAME_OF_HOUR: str = "hour" + """The name of the hour transform. The hour transform returns the hour of the input value.""" + NAME_OF_BUCKET: str = "bucket" + """The name of the bucket transform. The bucket transform returns the bucket of the input value.""" + NAME_OF_TRUNCATE: str = "truncate" + """The name of the truncate transform. The truncate transform returns the truncated value of the""" + NAME_OF_LIST: str = "list" + """The name of the list transform. The list transform includes multiple fields in a list.""" + NAME_OF_RANGE: str = "range" + """The name of the range transform. The range transform returns the range of the input value.""" @staticmethod def identity(field_name: List[str]) -> "IdentityTransform": @@ -255,7 +172,7 @@ def truncate(width: int, field_name) -> "TruncateTransform": ) @staticmethod - def apply(name: str, *arguments: "Expression") -> "ApplyTransform": + def apply(name: str, *arguments: Expression) -> "ApplyTransform": """ Create a transform that applies a function to the input value. @@ -269,13 +186,13 @@ def apply(name: str, *arguments: "Expression") -> "ApplyTransform": class IdentityTransform(Transforms): """A transform that returns the input value.""" - def __init__(self, ref: "NamedReference"): + def __init__(self, ref: NamedReference): self.ref = ref def name(self) -> str: return Transforms.NAME_OF_IDENTITY - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return [self.ref] def __eq__(self, other): @@ -288,7 +205,7 @@ def __hash__(self): class YearTransform(Transforms): """A transform that returns the year of the input value.""" - def __init__(self, ref: "NamedReference"): + def __init__(self, ref: NamedReference): self.ref = ref def name(self) -> str: @@ -297,7 +214,7 @@ def name(self) -> str: def children(self) -> List[Expression]: return [self.ref] - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return [self.ref] def __eq__(self, other): @@ -310,7 +227,7 @@ def __hash__(self): class MonthTransform(Transforms): """A transform that returns the month of the input value.""" - def __init__(self, ref: "NamedReference"): + def __init__(self, ref: NamedReference): self.ref = ref def name(self) -> str: @@ -319,7 +236,7 @@ def name(self) -> str: def children(self) -> List[Expression]: return [self.ref] - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return [self.ref] def __eq__(self, other): @@ -332,7 +249,7 @@ def __hash__(self): class DayTransform(Transforms): """A transform that returns the day of the input value.""" - def __init__(self, ref: "NamedReference"): + def __init__(self, ref: NamedReference): self.ref = ref def name(self) -> str: @@ -341,7 +258,7 @@ def name(self) -> str: def children(self) -> List[Expression]: return [self.ref] - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return [self.ref] def __eq__(self, other): @@ -354,7 +271,7 @@ def __hash__(self): class HourTransform(Transforms): """A transform that returns the hour of the input value.""" - def __init__(self, ref: "NamedReference"): + def __init__(self, ref: NamedReference): self.ref = ref def name(self) -> str: @@ -363,7 +280,7 @@ def name(self) -> str: def children(self) -> List[Expression]: return [self.ref] - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return [self.ref] def __eq__(self, other): @@ -376,7 +293,7 @@ def __hash__(self): class BucketTransform(Transforms): """A transform that returns the bucket of the input value.""" - def __init__(self, num_buckets: int, fields: List["NamedReference"]): + def __init__(self, num_buckets: int, fields: List[NamedReference]): self._num_buckets = num_buckets self.fields = fields @@ -393,7 +310,7 @@ def field_names(self) -> List[str]: def name(self) -> str: return Transforms.NAME_OF_BUCKET - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return [str(Literals.integer_literal(self.num_buckets))] + [ field_name for field in self.fields for field_name in field.field_name() ] @@ -413,7 +330,7 @@ def __hash__(self): class TruncateTransform(Transforms): """A transform that returns the truncated value of the input value with the given width.""" - def __init__(self, width: int, field: "NamedReference"): + def __init__(self, width: int, field: NamedReference): self._width = width self.field = field @@ -428,7 +345,7 @@ def field_name(self) -> List[str]: def name(self) -> str: return Transforms.NAME_OF_TRUNCATE - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return [self.width, self.field] def __eq__(self, other): @@ -447,8 +364,8 @@ class ListTransform(Transforms): def __init__( self, - fields: List["NamedReference"], - assignments: List["ListPartition"] = None, + fields: List[NamedReference], + assignments: List[ListPartition] = None, ): if assignments is None: assignments = [] @@ -478,9 +395,7 @@ def __hash__(self): class RangeTransform(Transforms): """A transform that returns the range of the input value.""" - def __init__( - self, field: "NamedReference", assignments: List["RangePartition"] = None - ): + def __init__(self, field: NamedReference, assignments: List[RangePartition] = None): if assignments is None: assignments = [] self.field = field @@ -493,10 +408,10 @@ def field_name(self) -> List[str]: def name(self) -> str: return Transforms.NAME_OF_RANGE - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return [self.field] - def assignments(self) -> List["RangePartition"]: + def assignments(self) -> List[RangePartition]: return self.assignments def __eq__(self, other): @@ -509,14 +424,14 @@ def __hash__(self): class ApplyTransform(Transforms): """A transform that applies a function to the input value.""" - def __init__(self, name: str, arguments: List["Expression"]): + def __init__(self, name: str, arguments: List[Expression]): self._name = name self._arguments = list(arguments) def name(self) -> str: return self._name - def arguments(self) -> List["Expression"]: + def arguments(self) -> List[Expression]: return self._arguments def __eq__(self, other): diff --git a/clients/client-python/gravitino/api/expressions/unparsed_expression.py b/clients/client-python/gravitino/api/expressions/unparsed_expression.py index 2c6b8c3fb38..82c5087e58a 100644 --- a/clients/client-python/gravitino/api/expressions/unparsed_expression.py +++ b/clients/client-python/gravitino/api/expressions/unparsed_expression.py @@ -24,7 +24,7 @@ class UnparsedExpression(Expression): """ Represents an expression that is not parsed yet. - The parsed expression is represented by FunctionExpression, Literal, or NamedReference. + The parsed expression is represented by FunctionExpression, literal.py, or NamedReference. """ def unparsed_expression(self) -> str: diff --git a/clients/client-python/tests/unittests/test_distributions.py b/clients/client-python/tests/unittests/test_distributions.py index 9ddbbbe3883..50c49c27922 100644 --- a/clients/client-python/tests/unittests/test_distributions.py +++ b/clients/client-python/tests/unittests/test_distributions.py @@ -16,7 +16,10 @@ # under the License. import unittest -from gravitino.api.expressions.distributions import Distributions, Strategy +from gravitino.api.expressions.distributions.distributions import ( + Distributions, + Strategy, +) class TestDistribution(unittest.TestCase): diff --git a/clients/client-python/tests/unittests/test_literals.py b/clients/client-python/tests/unittests/test_literals.py index c5baf9cf6da..9f7348e74cc 100644 --- a/clients/client-python/tests/unittests/test_literals.py +++ b/clients/client-python/tests/unittests/test_literals.py @@ -17,12 +17,13 @@ import unittest from datetime import date, time, datetime -from gravitino.api.expressions.literals import Literals + +from gravitino.api.expressions.literals.literals import Literals class TestLiterals(unittest.TestCase): def test_null_literal(self): - null_val = Literals.null_literal() + null_val = Literals.NULL self.assertEqual(null_val.value(), None) self.assertEqual(null_val.data_type(), "NullType") self.assertEqual(str(null_val), "LiteralImpl(value=None, data_type=NullType)") diff --git a/clients/client-python/tests/unittests/test_sorts.py b/clients/client-python/tests/unittests/test_sorts.py index 1924b502e27..2c53faac854 100644 --- a/clients/client-python/tests/unittests/test_sorts.py +++ b/clients/client-python/tests/unittests/test_sorts.py @@ -16,12 +16,10 @@ # under the License. import unittest -from gravitino.api.expressions.sorts import ( - SortDirection, - NullOrdering, - SortImpl, - SortOrders, -) + +from gravitino.api.expressions.sorts.null_ordering import NullOrdering +from gravitino.api.expressions.sorts.sort_direction import SortDirection +from gravitino.api.expressions.sorts.sort_orders import SortImpl, SortOrders class TestSortImpl(unittest.TestCase): @@ -130,11 +128,3 @@ def test_of(self): self.assertEqual(sort_order.expression(), expr) self.assertEqual(sort_order.direction(), SortDirection.DESCENDING) self.assertEqual(sort_order.null_ordering(), NullOrdering.NULLS_FIRST) - - def test_from_string(self): - # Test the from_string method of SortOrders - expr = "column_name" - sort_order = SortOrders.from_string(expr, "asc", "nulls_last") - self.assertEqual(sort_order.expression(), expr) - self.assertEqual(sort_order.direction(), SortDirection.ASCENDING) - self.assertEqual(sort_order.null_ordering(), NullOrdering.NULLS_LAST) diff --git a/clients/client-python/tests/unittests/test_tranforms.py b/clients/client-python/tests/unittests/test_tranforms.py index 52d2072c51b..660344f3c7b 100644 --- a/clients/client-python/tests/unittests/test_tranforms.py +++ b/clients/client-python/tests/unittests/test_tranforms.py @@ -16,7 +16,10 @@ # under the License. import unittest -from gravitino.api.expressions.transforms import ( + +from gravitino.api.expressions.literals.literals import Literals +from gravitino.api.expressions.named_reference import NamedReference, FieldReference +from gravitino.api.expressions.transforms.transforms import ( Transforms, IdentityTransform, YearTransform, @@ -29,8 +32,6 @@ RangeTransform, ApplyTransform, ) -from gravitino.api.expressions.named_reference import NamedReference, FieldReference -from gravitino.api.expressions.literals import Literals class TestTransforms(unittest.TestCase): From 2a25eb401719d83fb5c5e8e8b604aa59a26098f0 Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Mon, 25 Nov 2024 20:19:42 -0600 Subject: [PATCH 10/14] Update the script and fix a name bug --- .../api/expressions/Literals/__init__.py | 16 ++++++++++++++++ .../api/expressions/Literals/literal.py | 1 - .../api/expressions/distributions/__init__.py | 16 ++++++++++++++++ .../api/expressions/partitions/__init__.py | 16 ++++++++++++++++ .../gravitino/api/expressions/sorts/__init__.py | 16 ++++++++++++++++ .../api/expressions/transforms/__init__.py | 16 ++++++++++++++++ 6 files changed, 80 insertions(+), 1 deletion(-) diff --git a/clients/client-python/gravitino/api/expressions/Literals/__init__.py b/clients/client-python/gravitino/api/expressions/Literals/__init__.py index e69de29bb2d..d216be4ddc9 100644 --- a/clients/client-python/gravitino/api/expressions/Literals/__init__.py +++ b/clients/client-python/gravitino/api/expressions/Literals/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/clients/client-python/gravitino/api/expressions/Literals/literal.py b/clients/client-python/gravitino/api/expressions/Literals/literal.py index 3a8c3ecfec1..718e2dfa485 100644 --- a/clients/client-python/gravitino/api/expressions/Literals/literal.py +++ b/clients/client-python/gravitino/api/expressions/Literals/literal.py @@ -16,7 +16,6 @@ # under the License. from abc import abstractmethod from typing import List - from gravitino.api.expressions.expression import Expression diff --git a/clients/client-python/gravitino/api/expressions/distributions/__init__.py b/clients/client-python/gravitino/api/expressions/distributions/__init__.py index e69de29bb2d..d216be4ddc9 100644 --- a/clients/client-python/gravitino/api/expressions/distributions/__init__.py +++ b/clients/client-python/gravitino/api/expressions/distributions/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/clients/client-python/gravitino/api/expressions/partitions/__init__.py b/clients/client-python/gravitino/api/expressions/partitions/__init__.py index e69de29bb2d..d216be4ddc9 100644 --- a/clients/client-python/gravitino/api/expressions/partitions/__init__.py +++ b/clients/client-python/gravitino/api/expressions/partitions/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/clients/client-python/gravitino/api/expressions/sorts/__init__.py b/clients/client-python/gravitino/api/expressions/sorts/__init__.py index e69de29bb2d..d216be4ddc9 100644 --- a/clients/client-python/gravitino/api/expressions/sorts/__init__.py +++ b/clients/client-python/gravitino/api/expressions/sorts/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/clients/client-python/gravitino/api/expressions/transforms/__init__.py b/clients/client-python/gravitino/api/expressions/transforms/__init__.py index e69de29bb2d..d216be4ddc9 100644 --- a/clients/client-python/gravitino/api/expressions/transforms/__init__.py +++ b/clients/client-python/gravitino/api/expressions/transforms/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. \ No newline at end of file From 3bc79ca61e33bea5bc7932738c5ccad22b8787b8 Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Mon, 25 Nov 2024 20:28:08 -0600 Subject: [PATCH 11/14] Update the Literal class to raise NotImplementedError --- .../gravitino/api/expressions/Literals/__init__.py | 2 +- .../gravitino/api/expressions/Literals/literal.py | 4 ++-- .../gravitino/api/expressions/distributions/__init__.py | 2 +- .../gravitino/api/expressions/partitions/__init__.py | 2 +- .../client-python/gravitino/api/expressions/sorts/__init__.py | 2 +- .../gravitino/api/expressions/transforms/__init__.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clients/client-python/gravitino/api/expressions/Literals/__init__.py b/clients/client-python/gravitino/api/expressions/Literals/__init__.py index d216be4ddc9..13a83393a91 100644 --- a/clients/client-python/gravitino/api/expressions/Literals/__init__.py +++ b/clients/client-python/gravitino/api/expressions/Literals/__init__.py @@ -13,4 +13,4 @@ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations -# under the License. \ No newline at end of file +# under the License. diff --git a/clients/client-python/gravitino/api/expressions/Literals/literal.py b/clients/client-python/gravitino/api/expressions/Literals/literal.py index 718e2dfa485..16ab477d7c7 100644 --- a/clients/client-python/gravitino/api/expressions/Literals/literal.py +++ b/clients/client-python/gravitino/api/expressions/Literals/literal.py @@ -27,12 +27,12 @@ class Literal(Expression): @abstractmethod def value(self): """The literal value.""" - pass + raise NotImplementedError("Subclasses must implement the `value` method.") @abstractmethod def data_type(self): """The data type of the literal.""" - pass + raise NotImplementedError("Subclasses must implement the `data_type` method.") def children(self) -> List[Expression]: return Expression.EMPTY_EXPRESSION diff --git a/clients/client-python/gravitino/api/expressions/distributions/__init__.py b/clients/client-python/gravitino/api/expressions/distributions/__init__.py index d216be4ddc9..13a83393a91 100644 --- a/clients/client-python/gravitino/api/expressions/distributions/__init__.py +++ b/clients/client-python/gravitino/api/expressions/distributions/__init__.py @@ -13,4 +13,4 @@ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations -# under the License. \ No newline at end of file +# under the License. diff --git a/clients/client-python/gravitino/api/expressions/partitions/__init__.py b/clients/client-python/gravitino/api/expressions/partitions/__init__.py index d216be4ddc9..13a83393a91 100644 --- a/clients/client-python/gravitino/api/expressions/partitions/__init__.py +++ b/clients/client-python/gravitino/api/expressions/partitions/__init__.py @@ -13,4 +13,4 @@ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations -# under the License. \ No newline at end of file +# under the License. diff --git a/clients/client-python/gravitino/api/expressions/sorts/__init__.py b/clients/client-python/gravitino/api/expressions/sorts/__init__.py index d216be4ddc9..13a83393a91 100644 --- a/clients/client-python/gravitino/api/expressions/sorts/__init__.py +++ b/clients/client-python/gravitino/api/expressions/sorts/__init__.py @@ -13,4 +13,4 @@ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations -# under the License. \ No newline at end of file +# under the License. diff --git a/clients/client-python/gravitino/api/expressions/transforms/__init__.py b/clients/client-python/gravitino/api/expressions/transforms/__init__.py index d216be4ddc9..13a83393a91 100644 --- a/clients/client-python/gravitino/api/expressions/transforms/__init__.py +++ b/clients/client-python/gravitino/api/expressions/transforms/__init__.py @@ -13,4 +13,4 @@ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations -# under the License. \ No newline at end of file +# under the License. From 9fe8d6cf0e463569da7088dda961d8bdcbcd5879 Mon Sep 17 00:00:00 2001 From: Xun Date: Tue, 26 Nov 2024 13:51:40 +0800 Subject: [PATCH 12/14] remove Literals folder --- .../api/expressions/Literals/__init__.py | 16 -- .../api/expressions/Literals/literal.py | 38 ----- .../api/expressions/Literals/literals.py | 138 ------------------ 3 files changed, 192 deletions(-) delete mode 100644 clients/client-python/gravitino/api/expressions/Literals/__init__.py delete mode 100644 clients/client-python/gravitino/api/expressions/Literals/literal.py delete mode 100644 clients/client-python/gravitino/api/expressions/Literals/literals.py diff --git a/clients/client-python/gravitino/api/expressions/Literals/__init__.py b/clients/client-python/gravitino/api/expressions/Literals/__init__.py deleted file mode 100644 index 13a83393a91..00000000000 --- a/clients/client-python/gravitino/api/expressions/Literals/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/clients/client-python/gravitino/api/expressions/Literals/literal.py b/clients/client-python/gravitino/api/expressions/Literals/literal.py deleted file mode 100644 index 16ab477d7c7..00000000000 --- a/clients/client-python/gravitino/api/expressions/Literals/literal.py +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from abc import abstractmethod -from typing import List -from gravitino.api.expressions.expression import Expression - - -class Literal(Expression): - """ - Represents a constant literal value in the public expression API. - """ - - @abstractmethod - def value(self): - """The literal value.""" - raise NotImplementedError("Subclasses must implement the `value` method.") - - @abstractmethod - def data_type(self): - """The data type of the literal.""" - raise NotImplementedError("Subclasses must implement the `data_type` method.") - - def children(self) -> List[Expression]: - return Expression.EMPTY_EXPRESSION diff --git a/clients/client-python/gravitino/api/expressions/Literals/literals.py b/clients/client-python/gravitino/api/expressions/Literals/literals.py deleted file mode 100644 index 68e61d4f133..00000000000 --- a/clients/client-python/gravitino/api/expressions/Literals/literals.py +++ /dev/null @@ -1,138 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from decimal import Decimal -from typing import Union -from datetime import date, time, datetime - -from gravitino.api.expressions.literals.literal import Literal - - -class LiteralImpl(Literal): - """Creates a literal with the given type value.""" - - _value: Union[int, float, str, datetime, time, date, bool, Decimal, None] - _data_type: ( - str # TODO: Need implement `api/src/main/java/org/apache/gravitino/rel/types` - ) - - def __init__( - self, - value: Union[int, float, str, datetime, time, date, bool, Decimal, None], - data_type: str, - ): - self._value = value - self._data_type = data_type - - def value(self) -> Union[int, float, str, datetime, time, date, bool]: - return self._value - - def data_type(self) -> str: - return self._data_type - - def __eq__(self, other: object) -> bool: - if not isinstance(other, LiteralImpl): - return False - return (self._value == other._value) and (self._data_type == other._data_type) - - def __hash__(self): - return hash((self._value, self._data_type)) - - def __str__(self): - return f"LiteralImpl(value={self._value}, data_type={self._data_type})" - - -class Literals: - """The helper class to create literals to pass into Apache Gravitino.""" - - NULL = LiteralImpl(None, "NullType") - - @staticmethod - def of(value, data_type) -> Literal: - return LiteralImpl(value, data_type) - - @staticmethod - def boolean_literal(value: bool) -> Literal: - return LiteralImpl(value, "Boolean") - - @staticmethod - def byte_literal(value: int) -> Literal: - return LiteralImpl(value, "Byte") - - @staticmethod - def unsigned_byte_literal(value: int) -> Literal: - return LiteralImpl(value, "Unsigned Byte") - - @staticmethod - def short_literal(value: int) -> Literal: - return LiteralImpl(value, "Short") - - @staticmethod - def unsigned_short_literal(value: int) -> Literal: - return LiteralImpl(value, "Unsigned Short") - - @staticmethod - def integer_literal(value: int) -> Literal: - return LiteralImpl(value, "Integer") - - @staticmethod - def unsigned_integer_literal(value: int) -> Literal: - return LiteralImpl(value, "Unsigned Integer") - - @staticmethod - def long_literal(value: int) -> Literal: - return LiteralImpl(value, "Long") - - @staticmethod - def unsigned_long_literal(value: Decimal) -> Literal: - return LiteralImpl(value, "Unsigned Long") - - @staticmethod - def float_literal(value: float) -> Literal: - return LiteralImpl(value, "Float") - - @staticmethod - def double_literal(value: float) -> Literal: - return LiteralImpl(value, "Double") - - @staticmethod - def decimal_literal(value: float) -> Literal: - return LiteralImpl(value, "Decimal") - - @staticmethod - def date_literal(value: date) -> Literal: - return LiteralImpl(value, "Date") - - @staticmethod - def time_literal(value: time) -> Literal: - return LiteralImpl(value, "Time") - - @staticmethod - def timestamp_literal(value: datetime) -> Literal: - return LiteralImpl(value, "Timestamp") - - @staticmethod - def timestamp_literal_from_string(value: str) -> Literal: - return Literals.timestamp_literal(datetime.fromisoformat(value)) - - @staticmethod - def string_literal(value: str) -> Literal: - return LiteralImpl(value, "String") - - @staticmethod - def varchar_literal(length: int, value: str) -> Literal: - return LiteralImpl(value, f"Varchar({length})") From cbb2a5545bc3a007ccda17a279dfe18b0b521a1e Mon Sep 17 00:00:00 2001 From: Xun Date: Tue, 26 Nov 2024 13:52:09 +0800 Subject: [PATCH 13/14] Add literals folder --- .../api/expressions/literals/__init__.py | 16 ++ .../api/expressions/literals/literal.py | 38 +++++ .../api/expressions/literals/literals.py | 138 ++++++++++++++++++ 3 files changed, 192 insertions(+) create mode 100644 clients/client-python/gravitino/api/expressions/literals/__init__.py create mode 100644 clients/client-python/gravitino/api/expressions/literals/literal.py create mode 100644 clients/client-python/gravitino/api/expressions/literals/literals.py diff --git a/clients/client-python/gravitino/api/expressions/literals/__init__.py b/clients/client-python/gravitino/api/expressions/literals/__init__.py new file mode 100644 index 00000000000..13a83393a91 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/literals/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/clients/client-python/gravitino/api/expressions/literals/literal.py b/clients/client-python/gravitino/api/expressions/literals/literal.py new file mode 100644 index 00000000000..16ab477d7c7 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/literals/literal.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from abc import abstractmethod +from typing import List +from gravitino.api.expressions.expression import Expression + + +class Literal(Expression): + """ + Represents a constant literal value in the public expression API. + """ + + @abstractmethod + def value(self): + """The literal value.""" + raise NotImplementedError("Subclasses must implement the `value` method.") + + @abstractmethod + def data_type(self): + """The data type of the literal.""" + raise NotImplementedError("Subclasses must implement the `data_type` method.") + + def children(self) -> List[Expression]: + return Expression.EMPTY_EXPRESSION diff --git a/clients/client-python/gravitino/api/expressions/literals/literals.py b/clients/client-python/gravitino/api/expressions/literals/literals.py new file mode 100644 index 00000000000..68e61d4f133 --- /dev/null +++ b/clients/client-python/gravitino/api/expressions/literals/literals.py @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from decimal import Decimal +from typing import Union +from datetime import date, time, datetime + +from gravitino.api.expressions.literals.literal import Literal + + +class LiteralImpl(Literal): + """Creates a literal with the given type value.""" + + _value: Union[int, float, str, datetime, time, date, bool, Decimal, None] + _data_type: ( + str # TODO: Need implement `api/src/main/java/org/apache/gravitino/rel/types` + ) + + def __init__( + self, + value: Union[int, float, str, datetime, time, date, bool, Decimal, None], + data_type: str, + ): + self._value = value + self._data_type = data_type + + def value(self) -> Union[int, float, str, datetime, time, date, bool]: + return self._value + + def data_type(self) -> str: + return self._data_type + + def __eq__(self, other: object) -> bool: + if not isinstance(other, LiteralImpl): + return False + return (self._value == other._value) and (self._data_type == other._data_type) + + def __hash__(self): + return hash((self._value, self._data_type)) + + def __str__(self): + return f"LiteralImpl(value={self._value}, data_type={self._data_type})" + + +class Literals: + """The helper class to create literals to pass into Apache Gravitino.""" + + NULL = LiteralImpl(None, "NullType") + + @staticmethod + def of(value, data_type) -> Literal: + return LiteralImpl(value, data_type) + + @staticmethod + def boolean_literal(value: bool) -> Literal: + return LiteralImpl(value, "Boolean") + + @staticmethod + def byte_literal(value: int) -> Literal: + return LiteralImpl(value, "Byte") + + @staticmethod + def unsigned_byte_literal(value: int) -> Literal: + return LiteralImpl(value, "Unsigned Byte") + + @staticmethod + def short_literal(value: int) -> Literal: + return LiteralImpl(value, "Short") + + @staticmethod + def unsigned_short_literal(value: int) -> Literal: + return LiteralImpl(value, "Unsigned Short") + + @staticmethod + def integer_literal(value: int) -> Literal: + return LiteralImpl(value, "Integer") + + @staticmethod + def unsigned_integer_literal(value: int) -> Literal: + return LiteralImpl(value, "Unsigned Integer") + + @staticmethod + def long_literal(value: int) -> Literal: + return LiteralImpl(value, "Long") + + @staticmethod + def unsigned_long_literal(value: Decimal) -> Literal: + return LiteralImpl(value, "Unsigned Long") + + @staticmethod + def float_literal(value: float) -> Literal: + return LiteralImpl(value, "Float") + + @staticmethod + def double_literal(value: float) -> Literal: + return LiteralImpl(value, "Double") + + @staticmethod + def decimal_literal(value: float) -> Literal: + return LiteralImpl(value, "Decimal") + + @staticmethod + def date_literal(value: date) -> Literal: + return LiteralImpl(value, "Date") + + @staticmethod + def time_literal(value: time) -> Literal: + return LiteralImpl(value, "Time") + + @staticmethod + def timestamp_literal(value: datetime) -> Literal: + return LiteralImpl(value, "Timestamp") + + @staticmethod + def timestamp_literal_from_string(value: str) -> Literal: + return Literals.timestamp_literal(datetime.fromisoformat(value)) + + @staticmethod + def string_literal(value: str) -> Literal: + return LiteralImpl(value, "String") + + @staticmethod + def varchar_literal(length: int, value: str) -> Literal: + return LiteralImpl(value, f"Varchar({length})") From 624929ef570097f217bf0d8ba36ac656ac10e31b Mon Sep 17 00:00:00 2001 From: YUN SUN Date: Tue, 26 Nov 2024 00:35:44 -0600 Subject: [PATCH 14/14] Update licenses headers and format --- .../expressions/distributions/distribution.py | 1 + .../api/expressions/function_expression.py | 2 +- .../api/expressions/literals/literal.py | 2 ++ .../api/expressions/named_reference.py | 1 + .../api/expressions/partitions/partition.py | 1 + .../api/expressions/sorts/null_ordering.py | 1 + .../api/expressions/sorts/sort_direction.py | 1 + .../api/expressions/sorts/sort_order.py | 1 + .../api/expressions/transforms/transform.py | 17 +++++++++++++++++ .../api/expressions/transforms/transforms.py | 17 +++++++++++++++++ .../api/expressions/unparsed_expression.py | 2 +- 11 files changed, 44 insertions(+), 2 deletions(-) diff --git a/clients/client-python/gravitino/api/expressions/distributions/distribution.py b/clients/client-python/gravitino/api/expressions/distributions/distribution.py index bdf0f10a8b3..72f1b236532 100644 --- a/clients/client-python/gravitino/api/expressions/distributions/distribution.py +++ b/clients/client-python/gravitino/api/expressions/distributions/distribution.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + from typing import List from gravitino.api.expressions.distributions.strategy import Strategy diff --git a/clients/client-python/gravitino/api/expressions/function_expression.py b/clients/client-python/gravitino/api/expressions/function_expression.py index 4deb23afaa8..eb97438e4ba 100644 --- a/clients/client-python/gravitino/api/expressions/function_expression.py +++ b/clients/client-python/gravitino/api/expressions/function_expression.py @@ -15,10 +15,10 @@ # specific language governing permissions and limitations # under the License. - from __future__ import annotations from abc import abstractmethod from typing import List + from gravitino.api.expressions.expression import Expression diff --git a/clients/client-python/gravitino/api/expressions/literals/literal.py b/clients/client-python/gravitino/api/expressions/literals/literal.py index 16ab477d7c7..93be5db6d06 100644 --- a/clients/client-python/gravitino/api/expressions/literals/literal.py +++ b/clients/client-python/gravitino/api/expressions/literals/literal.py @@ -14,8 +14,10 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + from abc import abstractmethod from typing import List + from gravitino.api.expressions.expression import Expression diff --git a/clients/client-python/gravitino/api/expressions/named_reference.py b/clients/client-python/gravitino/api/expressions/named_reference.py index ad936cc8b9f..ccd44b9b172 100644 --- a/clients/client-python/gravitino/api/expressions/named_reference.py +++ b/clients/client-python/gravitino/api/expressions/named_reference.py @@ -17,6 +17,7 @@ from __future__ import annotations from typing import List + from gravitino.api.expressions.expression import Expression diff --git a/clients/client-python/gravitino/api/expressions/partitions/partition.py b/clients/client-python/gravitino/api/expressions/partitions/partition.py index ba0e5def2c5..3267234aa0e 100644 --- a/clients/client-python/gravitino/api/expressions/partitions/partition.py +++ b/clients/client-python/gravitino/api/expressions/partitions/partition.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + from typing import Dict diff --git a/clients/client-python/gravitino/api/expressions/sorts/null_ordering.py b/clients/client-python/gravitino/api/expressions/sorts/null_ordering.py index 71465457bb7..1b078bfe4e5 100644 --- a/clients/client-python/gravitino/api/expressions/sorts/null_ordering.py +++ b/clients/client-python/gravitino/api/expressions/sorts/null_ordering.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + from enum import Enum diff --git a/clients/client-python/gravitino/api/expressions/sorts/sort_direction.py b/clients/client-python/gravitino/api/expressions/sorts/sort_direction.py index a694f0f70fb..706e8938125 100644 --- a/clients/client-python/gravitino/api/expressions/sorts/sort_direction.py +++ b/clients/client-python/gravitino/api/expressions/sorts/sort_direction.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + from enum import Enum from gravitino.api.expressions.sorts.null_ordering import NullOrdering diff --git a/clients/client-python/gravitino/api/expressions/sorts/sort_order.py b/clients/client-python/gravitino/api/expressions/sorts/sort_order.py index 23ae460caef..573c90a3dd2 100644 --- a/clients/client-python/gravitino/api/expressions/sorts/sort_order.py +++ b/clients/client-python/gravitino/api/expressions/sorts/sort_order.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + from abc import abstractmethod from gravitino.api.expressions.expression import Expression diff --git a/clients/client-python/gravitino/api/expressions/transforms/transform.py b/clients/client-python/gravitino/api/expressions/transforms/transform.py index 9d3de491035..cc6424fe762 100644 --- a/clients/client-python/gravitino/api/expressions/transforms/transform.py +++ b/clients/client-python/gravitino/api/expressions/transforms/transform.py @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from abc import ABC from typing import List from gravitino.api.expressions.expression import Expression diff --git a/clients/client-python/gravitino/api/expressions/transforms/transforms.py b/clients/client-python/gravitino/api/expressions/transforms/transforms.py index 0261f2ab280..713a3ff2356 100644 --- a/clients/client-python/gravitino/api/expressions/transforms/transforms.py +++ b/clients/client-python/gravitino/api/expressions/transforms/transforms.py @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + from typing import List from gravitino.api.expressions.expression import Expression diff --git a/clients/client-python/gravitino/api/expressions/unparsed_expression.py b/clients/client-python/gravitino/api/expressions/unparsed_expression.py index 82c5087e58a..c15920dee18 100644 --- a/clients/client-python/gravitino/api/expressions/unparsed_expression.py +++ b/clients/client-python/gravitino/api/expressions/unparsed_expression.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. - from __future__ import annotations from typing import List + from gravitino.api.expressions.expression import Expression