Skip to content

Commit

Permalink
implemented numerous existing operations (#723)
Browse files Browse the repository at this point in the history
progress on #722 

Added:
* hash
* lower
* round
* sqrt
* upper
* zip_max
* zip_min
  • Loading branch information
epinzur authored Sep 4, 2023
1 parent 5cbbbff commit 4f89615
Show file tree
Hide file tree
Showing 21 changed files with 326 additions and 4 deletions.
10 changes: 7 additions & 3 deletions python/docs/source/reference/timestream/arithmetic.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,15 @@ See the notes on the specific functions for more information.
Timestream.add
Timestream.ceil
Timestream.clamp
Timestream.div
Timestream.exp
Timestream.floor
Timestream.powf
Timestream.sub
Timestream.greatest
Timestream.least
Timestream.mul
Timestream.div
Timestream.neg
Timestream.powf
Timestream.round
Timestream.sqrt
Timestream.sub
```
1 change: 1 addition & 0 deletions python/docs/source/reference/timestream/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ grouping
logical
misc
records
string
time
```
1 change: 1 addition & 0 deletions python/docs/source/reference/timestream/misc.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Timestream.coalesce
Timestream.else_
Timestream.filter
Timestream.hash
Timestream.if_
Timestream.lag
Timestream.null_if
Expand Down
11 changes: 11 additions & 0 deletions python/docs/source/reference/timestream/string.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# String

```{eval-rst}
.. currentmodule:: kaskada
.. autosummary::
:toctree: ../apidocs/
Timestream.lower
Timestream.upper
```
2 changes: 1 addition & 1 deletion python/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def docs_build(session: nox.Session) -> None:
@nox.session(python=python_versions[0])
def docs(session: nox.Session) -> None:
"""Build and serve the documentation with live reloading on file changes."""
args = ["--open-browser", "docs/source", "docs/_build", "-j", "auto", "--ignore", "*/apidocs/*"]
args = ["--open-browser", "docs/source", "docs/_build", "-j", "auto", "--ignore", "*/apidocs/*", "--watch", "pysrc/kaskada"]
install(session, groups=["typecheck", "docs"])

build_dir = Path("docs", "_build")
Expand Down
89 changes: 89 additions & 0 deletions python/pysrc/kaskada/_timestream.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,20 @@ def floor(self) -> Timestream:
"""Return a Timestream of self rounded down to the nearest integer."""
return Timestream._call("floor", self)

def hash(self) -> Timestream:
"""Return a Timestream containing the hash of the input.
Notes:
This will only return `null` when interpolated at points where
the input is not defined. At other points, it will return the
hash of `null`, which is `0` (not `null`).
"""
return Timestream._call("hash", self)

def lower(self) -> Timestream:
"""Return a Timestream with all values converted to lower case."""
return Timestream._call("lower", self)

def mul(self, rhs: Arg) -> Timestream:
"""Return a Timestream multiplying this and `rhs`.
Expand Down Expand Up @@ -803,6 +817,11 @@ def max(self, window: Optional[kd.windows.Window] = None) -> Timestream:
Args:
window: The window to use for the aggregation. Defaults to the entire Timestream.
See Also:
This returns the maximum of values in a column. See
:func:`greatest` to get the maximum value
between Timestreams at each point.
"""
return _aggregation("max", self, window)

Expand All @@ -813,6 +832,11 @@ def min(self, window: Optional[kd.windows.Window] = None) -> Timestream:
Args:
window: The window to use for the aggregation. Defaults to the entire Timestream.
See Also:
This returns the minimum of values in a column. See
:func:`least` to get the minimum value
between Timestreams at each point.
"""
return _aggregation("min", self, window)

Expand Down Expand Up @@ -923,6 +947,71 @@ def record(self, fields: Callable[[Timestream], Mapping[str, Arg]]) -> Timestrea
"""
return record(fields(self))

def round(self) -> Timestream:
"""Return a Timestream with all values rounded to the nearest integer.
Returns:
A Timestream of the same type as `self`. The result contains `null`
if the value was `null` at that point. Otherwise, it contains
the result of rounding the value to the nearest integer.
Notes:
This method may be applied to any numeric type. For anything other
than `float32` and `float64` it has no affect since the values
are already integers.
See Also:
- :func:`ceil`
- :func:`floor`
"""
return Timestream._call("round", self)

def sqrt(self) -> Timestream:
"""Return a Timestream with the square root of all values."""
return Timestream._call("sqrt", self)

def upper(self) -> Timestream:
"""Return a Timestream with all values converted to upper case."""
return Timestream._call("upper", self)

def greatest(self, rhs: Arg) -> Timestream:
"""Return a Timestream with the maximum value of `self` and `rhs` at each point.
Args:
rhs: The Timestream or literal value to compare to this.
Returns:
Each point contains the value from `self` if `self`
is greater than `rhs`, otherwise it contains `rhs`.
If any input is `null` or `NaN`, then that will be
the result.
See Also:
This returns the greatest of two values. See
:func:`max` for the maximum of values in
a column.
"""
return Timestream._call("zip_max", self, rhs)

def least(self, rhs: Arg) -> Timestream:
"""Return a Timestream with the minimum value of `self` and `rhs` at each point.
Args:
rhs: The Timestream or literal value to compare to this.
Returns:
Each point contains the value from `self` if `self`
is less than `rhs`, otherwise it contains `rhs`.
If any input is `null` or `NaN`, then that will be
the result.
See Also:
This returns the least of two values. See
:func:`min` for the minimum of values in
a column.
"""
return Timestream._call("zip_min", self, rhs)

def preview(
self,
limit: int = 10,
Expand Down
5 changes: 5 additions & 0 deletions python/pytests/golden/greatest_test/test_greatest.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"_time":"2021-01-01T00:00:00.000000000","_key":"A","a":5.7,"b":1.2,"a_greatest_b":5.7}
{"_time":"2021-01-02T00:00:00.000000000","_key":"A","a":6.3,"b":0.4,"a_greatest_b":6.3}
{"_time":"2021-01-03T00:00:00.000000000","_key":"B","a":null,"b":3.7,"a_greatest_b":null}
{"_time":"2021-01-04T00:00:00.000000000","_key":"A","a":13.2,"b":null,"a_greatest_b":null}
{"_time":"2021-01-05T00:00:00.000000000","_key":"A","a":2.0,"b":5.4,"a_greatest_b":5.4}
6 changes: 6 additions & 0 deletions python/pytests/golden/hash_test/test_hash_integer.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"_time":"2021-01-01T00:00:00.000000000","_key":"Ben","m":5.0,"hash_m":16461383214845928621}
{"_time":"2021-01-01T00:00:00.000000000","_key":"Ryan","m":8.0,"hash_m":6794973171266502674}
{"_time":"2021-01-02T00:00:00.000000000","_key":"Ryan","m":9.0,"hash_m":15653042715643359010}
{"_time":"2021-01-03T00:00:00.000000000","_key":"Ben","m":8.0,"hash_m":6794973171266502674}
{"_time":"2021-01-04T00:00:00.000000000","_key":"Ben","m":null,"hash_m":0}
{"_time":"2021-01-04T00:00:00.000000000","_key":"Ryan","m":9.0,"hash_m":15653042715643359010}
6 changes: 6 additions & 0 deletions python/pytests/golden/hash_test/test_hash_string.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"_time":"2021-01-01T00:00:00.000000000","_key":"Ben","m":"hello","hash_m":1472103086483932002}
{"_time":"2021-01-01T00:00:00.000000000","_key":"Ryan","m":null,"hash_m":0}
{"_time":"2021-01-02T00:00:00.000000000","_key":"Ryan","m":"world","hash_m":8057155968893317866}
{"_time":"2021-01-03T00:00:00.000000000","_key":"Ben","m":"hi","hash_m":2460612554838835252}
{"_time":"2021-01-04T00:00:00.000000000","_key":"Ben","m":" ","hash_m":14894517190786516170}
{"_time":"2021-01-04T00:00:00.000000000","_key":"Ryan","m":"earth","hash_m":14489671231712828724}
5 changes: 5 additions & 0 deletions python/pytests/golden/least_test/test_least.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"_time":"2021-01-01T00:00:00.000000000","_key":"A","a":5.7,"b":1.2,"a_least_b":1.2}
{"_time":"2021-01-02T00:00:00.000000000","_key":"A","a":6.3,"b":0.4,"a_least_b":0.4}
{"_time":"2021-01-03T00:00:00.000000000","_key":"B","a":null,"b":3.7,"a_least_b":null}
{"_time":"2021-01-04T00:00:00.000000000","_key":"A","a":13.2,"b":null,"a_least_b":null}
{"_time":"2021-01-05T00:00:00.000000000","_key":"A","a":2.0,"b":5.4,"a_least_b":2.0}
6 changes: 6 additions & 0 deletions python/pytests/golden/lower_test/test_lower.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"_time":"2021-01-01T00:00:00.000000000","_key":"Ben","m":"Hello World","lower_m":"hello world"}
{"_time":"2021-01-02T00:00:00.000000000","_key":"Ryan","m":null,"lower_m":null}
{"_time":"2021-01-02T00:00:00.000000000","_key":"Ryan","m":"Hi Earth","lower_m":"hi earth"}
{"_time":"2021-01-03T00:00:00.000000000","_key":"Ben","m":"Hello","lower_m":"hello"}
{"_time":"2021-01-03T00:00:00.000000000","_key":"Ben","m":null,"lower_m":null}
{"_time":"2021-01-04T00:00:00.000000000","_key":"Ryan","m":"hi","lower_m":"hi"}
3 changes: 3 additions & 0 deletions python/pytests/golden/round_test/test_round.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"_time":"2021-01-01T00:00:00.000000000","_key":"A","m":5.7,"round_m":6.0}
{"_time":"2021-01-01T00:00:00.000000000","_key":"A","m":6.3,"round_m":6.0}
{"_time":"2021-01-02T00:00:00.000000000","_key":"B","m":null,"round_m":null}
3 changes: 3 additions & 0 deletions python/pytests/golden/sqrt_test/test_sqrt.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"_time":"2021-01-01T00:00:00.000000000","_key":"A","m":5.7,"sqrt_m":2.3874672773}
{"_time":"2021-01-01T00:00:00.000000000","_key":"A","m":6.3,"sqrt_m":2.5099800796}
{"_time":"2021-01-02T00:00:00.000000000","_key":"B","m":null,"sqrt_m":null}
6 changes: 6 additions & 0 deletions python/pytests/golden/upper_test/test_upper.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"_time":"2021-01-01T00:00:00.000000000","_key":"Ben","m":"Hello World","upper_m":"HELLO WORLD"}
{"_time":"2021-01-02T00:00:00.000000000","_key":"Ryan","m":null,"upper_m":null}
{"_time":"2021-01-02T00:00:00.000000000","_key":"Ryan","m":"Hi Earth","upper_m":"HI EARTH"}
{"_time":"2021-01-03T00:00:00.000000000","_key":"Ben","m":"Hello","upper_m":"HELLO"}
{"_time":"2021-01-03T00:00:00.000000000","_key":"Ben","m":null,"upper_m":null}
{"_time":"2021-01-04T00:00:00.000000000","_key":"Ryan","m":"hi","upper_m":"HI"}
23 changes: 23 additions & 0 deletions python/pytests/greatest_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import kaskada as kd
import pytest


@pytest.fixture(scope="module")
def source() -> kd.sources.CsvString:
content = "\n".join(
[
"time,key,a,b",
"2021-01-01T00:00:00,A,5.7,1.2",
"2021-01-02T00:00:00,A,6.3,0.4",
"2021-01-03T00:00:00,B,,3.7",
"2021-01-04T00:00:00,A,13.2,",
"2021-01-05T00:00:00,A,2,5.4",
]
)
return kd.sources.CsvString(content, time_column="time", key_column="key")


def test_greatest(source, golden) -> None:
a = source.col("a")
b = source.col("b")
golden.jsonl(kd.record({"a": a, "b": b, "a_greatest_b": a.greatest(b)}))
44 changes: 44 additions & 0 deletions python/pytests/hash_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import kaskada as kd
import pytest


@pytest.fixture(scope="module")
def string_source() -> kd.sources.CsvString:
content = "\n".join(
[
"time,key,m,n",
"2021-01-01T00:00:00,Ben,hello,",
"2021-01-01T00:00:00,Ryan,,",
"2021-01-02T00:00:00,Ryan,world,",
"2021-01-03T00:00:00,Ben,hi,",
"2021-01-04T00:00:00,Ben, ,",
"2021-01-04T00:00:00,Ryan,earth,",
]
)
return kd.sources.CsvString(content, time_column="time", key_column="key")


def test_hash_string(string_source, golden) -> None:
m = string_source.col("m")
golden.jsonl(kd.record({"m": m, "hash_m": m.hash()}))


@pytest.fixture(scope="module")
def integer_source() -> kd.sources.CsvString:
content = "\n".join(
[
"time,key,m,n",
"2021-01-01T00:00:00,Ben,5,",
"2021-01-01T00:00:00,Ryan,8,",
"2021-01-02T00:00:00,Ryan,9,",
"2021-01-03T00:00:00,Ben,8,",
"2021-01-04T00:00:00,Ben,,",
"2021-01-04T00:00:00,Ryan,9,",
]
)
return kd.sources.CsvString(content, time_column="time", key_column="key")


def test_hash_integer(integer_source, golden) -> None:
m = integer_source.col("m")
golden.jsonl(kd.record({"m": m, "hash_m": m.hash()}))
23 changes: 23 additions & 0 deletions python/pytests/least_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import kaskada as kd
import pytest


@pytest.fixture(scope="module")
def source() -> kd.sources.CsvString:
content = "\n".join(
[
"time,key,a,b",
"2021-01-01T00:00:00,A,5.7,1.2",
"2021-01-02T00:00:00,A,6.3,0.4",
"2021-01-03T00:00:00,B,,3.7",
"2021-01-04T00:00:00,A,13.2,",
"2021-01-05T00:00:00,A,2,5.4",
]
)
return kd.sources.CsvString(content, time_column="time", key_column="key")


def test_least(source, golden) -> None:
a = source.col("a")
b = source.col("b")
golden.jsonl(kd.record({"a": a, "b": b, "a_least_b": a.least(b)}))
23 changes: 23 additions & 0 deletions python/pytests/lower_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import kaskada as kd
import pytest


@pytest.fixture(scope="module")
def source() -> kd.sources.CsvString:
content = "\n".join(
[
"time,key,m",
"2021-01-01T00:00:00,Ben,Hello World",
"2021-01-02T00:00:00,Ryan,",
"2021-01-02T00:00:00,Ryan,Hi Earth",
"2021-01-03T00:00:00,Ben,Hello",
"2021-01-03T00:00:00,Ben,",
"2021-01-04T00:00:00,Ryan,hi",
]
)
return kd.sources.CsvString(content, time_column="time", key_column="key")


def test_lower(source, golden) -> None:
m = source.col("m")
golden.jsonl(kd.record({"m": m, "lower_m": m.lower()}))
20 changes: 20 additions & 0 deletions python/pytests/round_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import kaskada as kd
import pytest


@pytest.fixture(scope="module")
def source() -> kd.sources.CsvString:
content = "\n".join(
[
"time,key,m",
"2021-01-01T00:00:00,A,5.7",
"2021-01-01T00:00:00,A,6.3",
"2021-01-02T00:00:00,B,",
]
)
return kd.sources.CsvString(content, time_column="time", key_column="key")


def test_round(source, golden) -> None:
m = source.col("m")
golden.jsonl(kd.record({"m": m, "round_m": m.round()}))
20 changes: 20 additions & 0 deletions python/pytests/sqrt_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import kaskada as kd
import pytest


@pytest.fixture(scope="module")
def source() -> kd.sources.CsvString:
content = "\n".join(
[
"time,key,m",
"2021-01-01T00:00:00,A,5.7",
"2021-01-01T00:00:00,A,6.3",
"2021-01-02T00:00:00,B,",
]
)
return kd.sources.CsvString(content, time_column="time", key_column="key")


def test_sqrt(source, golden) -> None:
m = source.col("m")
golden.jsonl(kd.record({"m": m, "sqrt_m": m.sqrt()}))
Loading

0 comments on commit 4f89615

Please sign in to comment.