Skip to content

Commit

Permalink
fix: make memtable cleanup tests work (#10283)
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored Oct 8, 2024
1 parent afebf55 commit 11f8921
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 37 deletions.
4 changes: 2 additions & 2 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ services:
image: bitnami/spark:3.5.3
ports:
- 15002:15002
command: /opt/bitnami/spark/sbin/start-connect-server.sh --name ibis_testing --packages org.apache.spark:spark-connect_2.12:3.5.2,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.5.2,io.delta:delta-core_2.12:2.1.0
command: /opt/bitnami/spark/sbin/start-connect-server.sh --name ibis_testing --packages org.apache.spark:spark-connect_2.12:3.5.3,org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.6.1,io.delta:delta-core_2.12:2.1.0
healthcheck:
test:
- CMD-SHELL
Expand All @@ -601,7 +601,7 @@ services:
volumes:
- spark-connect:/data
- $PWD/docker/spark-connect/conf.properties:/opt/bitnami/spark/conf/spark-defaults.conf:ro
# - $PWD/docker/spark-connect/log4j2.properties:/opt/bitnami/spark/conf/log4j2.properties:ro
- $PWD/docker/spark-connect/log4j2.properties:/opt/bitnami/spark/conf/log4j2.properties:ro
networks:
- spark-connect

Expand Down
2 changes: 1 addition & 1 deletion docker/spark-connect/conf.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
spark.driver.extraJavaOptions=-Duser.timezone=GMT
spark.executor.extraJavaOptions=-Duser.timezone=GMT
spark.jars.packages=org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.5.2
spark.jars.packages=org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.6.1
spark.sql.catalog.local.type=hadoop
spark.sql.catalog.local.warehouse=warehouse
spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog
Expand Down
6 changes: 4 additions & 2 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,12 +698,14 @@ def compile(
):
"""Compile an Ibis expression to a SQL string."""
session_dataset = self._session_dataset
session_dataset_id = getattr(session_dataset, "dataset_id", None)
session_project = getattr(session_dataset, "project", None)
query = self.compiler.to_sqlglot(
expr,
limit=limit,
params=params,
session_dataset_id=getattr(session_dataset, "dataset_id", None),
session_project=getattr(session_dataset, "project", None),
session_dataset_id=session_dataset_id,
session_project=session_project,
**kwargs,
)
queries = query if isinstance(query, list) else [query]
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ def _register_udfs(self, expr: ir.Expr) -> None:
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
schema = PySparkSchema.from_ibis(op.schema)
df = self._session.createDataFrame(data=op.data.to_frame(), schema=schema)
df.createTempView(op.name)
df.createOrReplaceTempView(op.name)

def _finalize_memtable(self, name: str) -> None:
"""No-op, otherwise a deadlock can occur when using Spark Connect."""
Expand Down
31 changes: 11 additions & 20 deletions ibis/backends/snowflake/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,32 +355,23 @@ def test_struct_of_json(con):
assert all(value == raw for value in result.to_pylist())


def test_list_tables(con):
assert {
"ASTRONAUTS",
"AWARDS_PLAYERS",
"BATTING",
"DIAMONDS",
"FUNCTIONAL_ALLTYPES",
}.issubset(con.list_tables())

like_table = [
@pytest.mark.parametrize(
"database",
["IBIS_TESTING.INFORMATION_SCHEMA", ("IBIS_TESTING", "INFORMATION_SCHEMA")],
ids=["dotted-path", "tuple"],
)
def test_list_tables_with_database(con, database):
like_table = {
"EVENT_TABLES",
"EXTERNAL_TABLES",
"HYBRID_TABLES",
"TABLES",
"TABLE_CONSTRAINTS",
"TABLE_PRIVILEGES",
"TABLE_STORAGE_METRICS",
]

assert (
con.list_tables(database="IBIS_TESTING.INFORMATION_SCHEMA", like="TABLE")
== like_table
)
assert (
con.list_tables(database=("IBIS_TESTING", "INFORMATION_SCHEMA"), like="TABLE")
== like_table
)
}
tables = con.list_tables(database=database, like="TABLE")
assert like_table.issubset(tables)


def test_timestamp_memtable(con):
Expand Down
18 changes: 11 additions & 7 deletions ibis/backends/sql/compilers/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,15 @@
_NAME_REGEX = re.compile(r'[^!"$()*,./;?@[\\\]^`{}~\n]+')


_MEMTABLE_PATTERN = re.compile(
r"^_?ibis_(?:[A-Za-z_][A-Za-z_0-9]*)_memtable_[a-z0-9]{26}$"
)


def _qualify_memtable(
node: sge.Expression, *, dataset: str | None, project: str | None
node: sge.Expression,
*,
dataset: str | None,
project: str | None,
memtable_names: frozenset[str],
) -> sge.Expression:
"""Add a BigQuery dataset and project to memtable references."""
if isinstance(node, sge.Table) and _MEMTABLE_PATTERN.match(node.name) is not None:
if isinstance(node, sge.Table) and node.name in memtable_names:
node.args["db"] = dataset
node.args["catalog"] = project
# make sure to quote table location
Expand Down Expand Up @@ -241,10 +240,15 @@ def to_sqlglot(
table_expr = expr.as_table()
geocols = table_expr.schema().geospatial

memtable_names = frozenset(
op.name for op in table_expr.op().find(ops.InMemoryTable)
)

result = sql.transform(
_qualify_memtable,
dataset=session_dataset_id,
project=session_project,
memtable_names=memtable_names,
).transform(_remove_null_ordering_from_unsupported_window)

if geocols:
Expand Down
2 changes: 1 addition & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ download-data owner="ibis-project" repo="testing-data" rev="master":
fi

# download the iceberg jar used for testing pyspark and iceberg integration
download-iceberg-jar pyspark scala="2.12" iceberg="1.5.2":
download-iceberg-jar pyspark scala="2.12" iceberg="1.6.1":
#!/usr/bin/env bash
set -eo pipefail
Expand Down
6 changes: 3 additions & 3 deletions poetry-overrides.nix
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ final: prev: {
inherit (final) pkgs lib;
pysparkVersion = lib.versions.majorMinor attrs.version;
jarHashes = {
"3.5" = "sha256-KuxLeNgGzIHU5QMls1H2NJyQ3mQVROZExgMvAAk4YYs=";
"3.3" = "sha256-W3ij6mwrIDOc4zGqtpCsbg563qHmdMc8eZnLX6bnl2M=";
"3.5" = "sha256-h+cYTzHvDKrEFbvfzxvElDNGpYuY10fcg0NPcTnhKss=";
"3.3" = "sha256-3D++9VCiLoMP7jPvdCtBn7xnxqHnyQowcqdGUe0M3mk=";
};
icebergVersion = "1.5.2";
icebergVersion = "1.6.1";
scalaVersion = "2.12";
jarName = "iceberg-spark-runtime-${pysparkVersion}_${scalaVersion}-${icebergVersion}.jar";
icebergJarUrl = "https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-${pysparkVersion}_${scalaVersion}/${icebergVersion}/${jarName}";
Expand Down

0 comments on commit 11f8921

Please sign in to comment.