From f31b35815b4c7a638dbd3987aead8a9528c17488 Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Wed, 2 Jul 2025 09:26:24 -0700 Subject: [PATCH 01/10] add polaris-bundle-project --- .../publishing/PublishingHelperPlugin.kt | 5 - plugins/spark/README.md | 63 +++++----- plugins/spark/v3.5/getting-started/README.md | 2 +- .../v3.5/getting-started/notebooks/Dockerfile | 2 +- .../notebooks/SparkPolaris.ipynb | 2 +- plugins/spark/v3.5/regtests/run.sh | 5 +- .../LICENSE => spark-bundle/CUSTOM-LICENSE} | 0 .../NOTICE => spark-bundle/CUSTOM-NOTICE} | 0 .../spark/v3.5/spark-bundle/build.gradle.kts | 69 +++++++++++ plugins/spark/v3.5/spark/build.gradle.kts | 108 ------------------ settings.gradle.kts | 13 +-- 11 files changed, 114 insertions(+), 155 deletions(-) rename plugins/spark/v3.5/{spark/LICENSE => spark-bundle/CUSTOM-LICENSE} (100%) rename plugins/spark/v3.5/{spark/NOTICE => spark-bundle/CUSTOM-NOTICE} (100%) create mode 100644 plugins/spark/v3.5/spark-bundle/build.gradle.kts diff --git a/build-logic/src/main/kotlin/publishing/PublishingHelperPlugin.kt b/build-logic/src/main/kotlin/publishing/PublishingHelperPlugin.kt index d4d412a30f..04b04225e7 100644 --- a/build-logic/src/main/kotlin/publishing/PublishingHelperPlugin.kt +++ b/build-logic/src/main/kotlin/publishing/PublishingHelperPlugin.kt @@ -133,11 +133,6 @@ constructor(private val softwareComponentFactory: SoftwareComponentFactory) : Pl suppressPomMetadataWarningsFor("testFixturesApiElements") suppressPomMetadataWarningsFor("testFixturesRuntimeElements") - - if (project.tasks.findByName("createPolarisSparkJar") != null) { - // if the project contains spark client jar, also publish the jar to maven - artifact(project.tasks.named("createPolarisSparkJar").get()) - } } if ( diff --git a/plugins/spark/README.md b/plugins/spark/README.md index c7d6bc876b..0b6447ba01 100644 --- a/plugins/spark/README.md +++ b/plugins/spark/README.md @@ -28,32 +28,29 @@ REST endpoints, and provides implementations for Apache Spark's Right now, the plugin only provides support for Spark 3.5, Scala version 2.12 and 2.13, and depends on iceberg-spark-runtime 1.9.0. -# Build Plugin Jar -A task createPolarisSparkJar is added to build a jar for the Polaris Spark plugin, the jar is named as: -`polaris-spark-_--bundle.jar`. For example: -`polaris-spark-3.5_2.12-0.11.0-beta-incubating-SNAPSHOT-bundle.jar`. - -- `./gradlew :polaris-spark-3.5_2.12:createPolarisSparkJar` -- build jar for Spark 3.5 with Scala version 2.12. -- `./gradlew :polaris-spark-3.5_2.13:createPolarisSparkJar` -- build jar for Spark 3.5 with Scala version 2.13. - -The result jar is located at plugins/spark/v3.5/build//libs after the build. - -# Start Spark with Local Polaris Service using built Jar -Once the jar is built, we can manually test it with Spark and a local Polaris service. - +# Start Spark with local Polaris service using the Polaris Spark plugin The following command starts a Polaris server for local testing, it runs on localhost:8181 with default -realm `POLARIS` and root credentials `root:secret`: +realm `POLARIS` and root credentials `root:s3cr3t`: ```shell ./gradlew run ``` -Once the local server is running, the following command can be used to start the spark-shell with the built Spark client -jar, and to use the local Polaris server as a Catalog. +Once the local server is running, you can start Spark with the Polaris Spark plugin using either the `--packages` +option with the Polaris Spark package, or the `--jars` option with the Polaris Spark bundle JAR. + +The following sections explain how to build and run Spark with both the Polaris package and the bundle JAR. + +# Build and run with Polaris spark package locally +The Polaris Spark client source code is located in plugins/spark/v3.5/spark. To use the Polaris Spark package +with Spark, you first need to publish the source JAR to your local Maven repository. + +Run the following command to build the Polaris Spark project and publish the source JAR to your local Maven repository: +- `./gradlew assemble` -- build the whole Polaris project without running tests +- `./gradlew publishToMavenLocal` -- publish Polaris project source JAR to local Maven repository ```shell bin/spark-shell \ ---jars \ ---packages org.apache.iceberg:iceberg-aws-bundle:1.9.0,io.delta:delta-spark_2.12:3.3.1 \ +--packages org.apache.polaris:polaris-spark-_:,org.apache.iceberg:iceberg-aws-bundle:1.9.0,io.delta:delta-spark_2.12:3.3.1 \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ --conf spark.sql.catalog..warehouse= \ @@ -66,23 +63,31 @@ bin/spark-shell \ --conf spark.sql.sources.useV1SourceList='' ``` -Assume the path to the built Spark client jar is -`/polaris/plugins/spark/v3.5/spark/build/2.12/libs/polaris-spark-3.5_2.12-0.11.0-beta-incubating-SNAPSHOT-bundle.jar` -and the name of the catalog is `polaris`. The cli command will look like following: +The polaris version can be found in versions.txt in the Polaris root project dir. + +# Build and run with Polaris spark bundle JAR +The polaris-spark-bundle project is used to build the Polaris Spark bundle JAR. The resulting JAR will follow this naming format: +polaris-spark-bundle-_-.jar +For example: +polaris-spark-bundle-3.5_2.12-1.1.0-incubating-SNAPSHOT.jar + +Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, +the bundle JAR can be found under: plugins/spark/v3.5/spark-bundle/build//libs/. +To start Spark using the bundle JAR, specify it with the `--jars` option as shown below: ```shell bin/spark-shell \ ---jars /polaris/plugins/spark/v3.5/spark/build/2.12/libs/polaris-spark-3.5_2.12-0.11.0-beta-incubating-SNAPSHOT-bundle.jar \ +--jars \ --packages org.apache.iceberg:iceberg-aws-bundle:1.9.0,io.delta:delta-spark_2.12:3.3.1 \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ ---conf spark.sql.catalog.polaris.warehouse= \ ---conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials \ ---conf spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog \ ---conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ ---conf spark.sql.catalog.polaris.credential="root:secret" \ ---conf spark.sql.catalog.polaris.scope='PRINCIPAL_ROLE:ALL' \ ---conf spark.sql.catalog.polaris.token-refresh-enabled=true \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ --conf spark.sql.sources.useV1SourceList='' ``` diff --git a/plugins/spark/v3.5/getting-started/README.md b/plugins/spark/v3.5/getting-started/README.md index d5aa245baa..2617870a9b 100644 --- a/plugins/spark/v3.5/getting-started/README.md +++ b/plugins/spark/v3.5/getting-started/README.md @@ -28,7 +28,7 @@ A Jupyter notebook is started to run PySpark, and Polaris Python client is also directly through Python Client. ## Build the Spark Client Jar and Polaris image -If Spark Client Jar is not presented locally under plugins/spark/v3.5/build//libs, please build the jar +If Spark Client Jar is not presented locally under plugins/spark-bundle/v3.5/build//libs, please build the jar using - `./gradlew assemble` -- build the Polaris project and skip the tests. diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 2af9412c60..3f6cccb7d9 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -38,7 +38,7 @@ USER jovyan COPY --chown=jovyan client /home/jovyan/client COPY --chown=jovyan regtests/requirements.txt /tmp -COPY --chown=jovyan plugins/spark/v3.5/spark/build/2.12/libs /home/jovyan/polaris_libs +COPY --chown=jovyan plugins/spark/v3.5/spark-bundle/build/2.12/libs /home/jovyan/polaris_libs RUN pip install -r /tmp/requirements.txt RUN cd client/python && poetry lock && \ python3 -m poetry install && \ diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index 1c3803d7b0..02761f7167 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -265,7 +265,7 @@ "from pyspark.sql import SparkSession\n", "\n", "spark = (SparkSession.builder\n", - " .config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-0.11.0-beta-incubating-SNAPSHOT-bundle.jar\")\n", + " .config(\"spark.jars\", \"../polaris_libs/polaris-spark-bundle-3.5_2.12-1.1.0-incubating-SNAPSHOT.jar\")\n", " .config(\"spark.jars.packages\", \"org.apache.iceberg:iceberg-aws-bundle:1.9.0,io.delta:delta-spark_2.12:3.2.1\")\n", " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", " .config('spark.sql.iceberg.vectorization.enabled', 'false')\n", diff --git a/plugins/spark/v3.5/regtests/run.sh b/plugins/spark/v3.5/regtests/run.sh index 90c2dd7555..1e60f28238 100755 --- a/plugins/spark/v3.5/regtests/run.sh +++ b/plugins/spark/v3.5/regtests/run.sh @@ -73,8 +73,9 @@ SPARK_SHELL_OPTIONS=("PACKAGE" "JAR") for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}" # find the project jar - SPARK_DIR=${SPARK_ROOT_DIR}/spark - JAR_PATH=$(find ${SPARK_DIR} -name "polaris-spark-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.*-bundle.jar" -print -quit) + SPARK_DIR=${SPARK_ROOT_DIR}/spark-bundle + SRC_JAR_PATH=$(find ${SPARK_DIR} -name "polaris-spark-bundle-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.*-sources.jar" -print -quit) + JAR_PATH=${SRC_JAR_PATH/-sources/} echo "find jar ${JAR_PATH}" # extract the polaris diff --git a/plugins/spark/v3.5/spark/LICENSE b/plugins/spark/v3.5/spark-bundle/CUSTOM-LICENSE similarity index 100% rename from plugins/spark/v3.5/spark/LICENSE rename to plugins/spark/v3.5/spark-bundle/CUSTOM-LICENSE diff --git a/plugins/spark/v3.5/spark/NOTICE b/plugins/spark/v3.5/spark-bundle/CUSTOM-NOTICE similarity index 100% rename from plugins/spark/v3.5/spark/NOTICE rename to plugins/spark/v3.5/spark-bundle/CUSTOM-NOTICE diff --git a/plugins/spark/v3.5/spark-bundle/build.gradle.kts b/plugins/spark/v3.5/spark-bundle/build.gradle.kts new file mode 100644 index 0000000000..92f58f4768 --- /dev/null +++ b/plugins/spark/v3.5/spark-bundle/build.gradle.kts @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + id("polaris-client") + id("com.gradleup.shadow") +} + +// get version information +val sparkMajorVersion = "3.5" +val scalaVersion = getAndUseScalaVersionForProject() +val icebergVersion = pluginlibs.versions.iceberg.get() +val spark35Version = pluginlibs.versions.spark35.get() + +val scalaLibraryVersion = + if (scalaVersion == "2.12") { + pluginlibs.versions.scala212.get() + } else { + pluginlibs.versions.scala213.get() + } + +dependencies { implementation(project(":polaris-spark-${sparkMajorVersion}_${scalaVersion}")) } + +tasks.named("shadowJar") { + archiveClassifier = null + isZip64 = true + + // pack all the dependencies into an uber jar + configurations = listOf(project.configurations.runtimeClasspath.get()) + + // recursively remove all LICENSE and NOTICE file under META-INF, includes + // directories contains 'license' in the name + exclude("META-INF/**/*LICENSE*") + exclude("META-INF/**/*NOTICE*") + // exclude the top level LICENSE, LICENSE-*.txt and NOTICE + exclude("LICENSE*") + exclude("NOTICE*") + + // add polaris customized LICENSE and NOTICE at top level. Note that the + // customized LICENSE and NOTICE file are called CUSTOM-LICENSE and CUSTOM-NOTICE, + // and renamed to LICENSE and NOTICE after include, this is to avoid the file + // being excluded due to the exclude pattern matching used above. + from("${projectDir}/CUSTOM-LICENSE") { rename { "LICENSE" } } + from("${projectDir}/CUSTOM-NOTICE") { rename { "NOTICE" } } +} + +// ensure the shadow jar job (which will automatically run license addition) is run for both +// `assemble` and `build` task +tasks.named("assemble") { dependsOn("shadowJar") } + +tasks.named("build") { dependsOn("shadowJar") } diff --git a/plugins/spark/v3.5/spark/build.gradle.kts b/plugins/spark/v3.5/spark/build.gradle.kts index 797b27f7d9..42f13728cb 100644 --- a/plugins/spark/v3.5/spark/build.gradle.kts +++ b/plugins/spark/v3.5/spark/build.gradle.kts @@ -17,8 +17,6 @@ * under the License. */ -import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar - plugins { id("polaris-client") } checkstyle { @@ -80,109 +78,3 @@ dependencies { exclude("org.slf4j", "jul-to-slf4j") } } - -tasks.register("createPolarisSparkJar") { - archiveClassifier = "bundle" - isZip64 = true - - // pack both the source code and dependencies - from(sourceSets.main.get().output) - configurations = listOf(project.configurations.runtimeClasspath.get()) - - // Optimization: Minimize the JAR (remove unused classes from dependencies) - // The iceberg-spark-runtime plugin is always packaged along with our polaris-spark plugin, - // therefore excluded from the optimization. - minimize { exclude(dependency("org.apache.iceberg:iceberg-spark-runtime-*.*")) } - - // Always run the license file addition after this task completes - finalizedBy("addLicenseFilesToJar") -} - -// Post-processing task to add our project's LICENSE and NOTICE files to the jar and remove any -// other LICENSE or NOTICE files that were shaded in. -tasks.register("addLicenseFilesToJar") { - dependsOn("createPolarisSparkJar") - - doLast { - val shadowTask = tasks.named("createPolarisSparkJar", ShadowJar::class.java).get() - val jarFile = shadowTask.archiveFile.get().asFile - val tempDir = - File( - "${project.layout.buildDirectory.get().asFile}/tmp/jar-cleanup-${shadowTask.archiveBaseName.get()}-${shadowTask.archiveClassifier.get()}" - ) - val projectLicenseFile = File(projectDir, "LICENSE") - val projectNoticeFile = File(projectDir, "NOTICE") - - // Validate that required license files exist - if (!projectLicenseFile.exists()) { - throw GradleException("Project LICENSE file not found at: ${projectLicenseFile.absolutePath}") - } - if (!projectNoticeFile.exists()) { - throw GradleException("Project NOTICE file not found at: ${projectNoticeFile.absolutePath}") - } - - logger.info("Processing jar: ${jarFile.absolutePath}") - logger.info("Using temp directory: ${tempDir.absolutePath}") - - // Clean up temp directory - if (tempDir.exists()) { - tempDir.deleteRecursively() - } - tempDir.mkdirs() - - // Extract the jar - copy { - from(zipTree(jarFile)) - into(tempDir) - } - - fileTree(tempDir) - .matching { - include("**/*LICENSE*") - include("**/*NOTICE*") - } - .forEach { file -> - logger.info("Removing license file: ${file.relativeTo(tempDir)}") - file.delete() - } - - // Remove META-INF/licenses directory if it exists - val licensesDir = File(tempDir, "META-INF/licenses") - if (licensesDir.exists()) { - licensesDir.deleteRecursively() - logger.info("Removed META-INF/licenses directory") - } - - // Copy our project's license files to root - copy { - from(projectLicenseFile) - into(tempDir) - } - logger.info("Added project LICENSE file") - - copy { - from(projectNoticeFile) - into(tempDir) - } - logger.info("Added project NOTICE file") - - // Delete the original jar - jarFile.delete() - - // Create new jar with only project LICENSE and NOTICE files - ant.withGroovyBuilder { - "jar"("destfile" to jarFile.absolutePath) { "fileset"("dir" to tempDir.absolutePath) } - } - - logger.info("Recreated jar with only project LICENSE and NOTICE files") - - // Clean up temp directory - tempDir.deleteRecursively() - } -} - -// ensure the shadow jar job (which will automatically run license addition) is run for both -// `assemble` and `build` task -tasks.named("assemble") { dependsOn("createPolarisSparkJar") } - -tasks.named("build") { dependsOn("createPolarisSparkJar") } diff --git a/settings.gradle.kts b/settings.gradle.kts index cbfbc3a269..d212423040 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -73,19 +73,16 @@ for (sparkVersion in sparkVersions) { for (scalaVersion in scalaVersions) { val sparkArtifactId = "polaris-spark-${sparkVersion}_${scalaVersion}" val sparkIntArtifactId = "polaris-spark-integration-${sparkVersion}_${scalaVersion}" - polarisProject( - "polaris-spark-${sparkVersion}_${scalaVersion}", - file("${polarisSparkDir}/v${sparkVersion}/spark"), - ) - polarisProject( - "polaris-spark-integration-${sparkVersion}_${scalaVersion}", - file("${polarisSparkDir}/v${sparkVersion}/integration"), - ) + val sparkBundleArtifactId = "polaris-spark-bundle-${sparkVersion}_${scalaVersion}" + polarisProject(sparkArtifactId, file("${polarisSparkDir}/v${sparkVersion}/spark")) + polarisProject(sparkIntArtifactId, file("${polarisSparkDir}/v${sparkVersion}/integration")) + polarisProject(sparkBundleArtifactId, file("${polarisSparkDir}/v${sparkVersion}/spark-bundle")) if (first) { first = false } else { noSourceChecksProjects.add(":$sparkArtifactId") noSourceChecksProjects.add(":$sparkIntArtifactId") + noSourceChecksProjects.add(":$sparkBundleArtifactId") } // Skip all duplicated spark client projects while using Intelij IDE. // This is to avoid problems during dependency analysis and sync when From 3b7f5fb8b93375b47cb87fc50fc9a400386fc1e3 Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Wed, 2 Jul 2025 13:55:58 -0700 Subject: [PATCH 02/10] address comments --- plugins/spark/README.md | 23 ++++++++++++++++++- .../{CUSTOM-LICENSE => BUNDLE-LICENSE} | 0 .../{CUSTOM-NOTICE => BUNDLE-NOTICE} | 0 .../spark/v3.5/spark-bundle/build.gradle.kts | 6 +++-- 4 files changed, 26 insertions(+), 3 deletions(-) rename plugins/spark/v3.5/spark-bundle/{CUSTOM-LICENSE => BUNDLE-LICENSE} (100%) rename plugins/spark/v3.5/spark-bundle/{CUSTOM-NOTICE => BUNDLE-NOTICE} (100%) diff --git a/plugins/spark/README.md b/plugins/spark/README.md index 0b6447ba01..c2056d8040 100644 --- a/plugins/spark/README.md +++ b/plugins/spark/README.md @@ -63,7 +63,28 @@ bin/spark-shell \ --conf spark.sql.sources.useV1SourceList='' ``` -The polaris version can be found in versions.txt in the Polaris root project dir. +The Polaris version is defined in the `versions.txt` file located in the root directory of the Polaris project. +Assume the following values: +- `spark_version`: 3.5 +- `scala_version`: 2.12 +- `polaris_version`: 1.1.0-incubating-SNAPSHOT +- `catalog-name`: `polaris` +The Spark command would look like following: + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-3.5_2.12:1.1.0-incubating-SNAPSHOT,org.apache.iceberg:iceberg-aws-bundle:1.9.0,io.delta:delta-spark_2.12:3.3.1 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog.polaris.warehouse=polaris \ +--conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog.polaris.credential="root:secret" \ +--conf spark.sql.catalog.polaris.scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog.polaris.token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` # Build and run with Polaris spark bundle JAR The polaris-spark-bundle project is used to build the Polaris Spark bundle JAR. The resulting JAR will follow this naming format: diff --git a/plugins/spark/v3.5/spark-bundle/CUSTOM-LICENSE b/plugins/spark/v3.5/spark-bundle/BUNDLE-LICENSE similarity index 100% rename from plugins/spark/v3.5/spark-bundle/CUSTOM-LICENSE rename to plugins/spark/v3.5/spark-bundle/BUNDLE-LICENSE diff --git a/plugins/spark/v3.5/spark-bundle/CUSTOM-NOTICE b/plugins/spark/v3.5/spark-bundle/BUNDLE-NOTICE similarity index 100% rename from plugins/spark/v3.5/spark-bundle/CUSTOM-NOTICE rename to plugins/spark/v3.5/spark-bundle/BUNDLE-NOTICE diff --git a/plugins/spark/v3.5/spark-bundle/build.gradle.kts b/plugins/spark/v3.5/spark-bundle/build.gradle.kts index 92f58f4768..b144f51a27 100644 --- a/plugins/spark/v3.5/spark-bundle/build.gradle.kts +++ b/plugins/spark/v3.5/spark-bundle/build.gradle.kts @@ -58,8 +58,8 @@ tasks.named("shadowJar") { // customized LICENSE and NOTICE file are called CUSTOM-LICENSE and CUSTOM-NOTICE, // and renamed to LICENSE and NOTICE after include, this is to avoid the file // being excluded due to the exclude pattern matching used above. - from("${projectDir}/CUSTOM-LICENSE") { rename { "LICENSE" } } - from("${projectDir}/CUSTOM-NOTICE") { rename { "NOTICE" } } + from("${projectDir}/BUNDLE-LICENSE") { rename { "LICENSE" } } + from("${projectDir}/BUNDLE-NOTICE") { rename { "NOTICE" } } } // ensure the shadow jar job (which will automatically run license addition) is run for both @@ -67,3 +67,5 @@ tasks.named("shadowJar") { tasks.named("assemble") { dependsOn("shadowJar") } tasks.named("build") { dependsOn("shadowJar") } + +tasks.named("jar") { enabled = false } From 8a242923604794fd111d0996cf43a0504160c675 Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Wed, 2 Jul 2025 14:00:17 -0700 Subject: [PATCH 03/10] update comment --- plugins/spark/v3.5/spark-bundle/build.gradle.kts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/spark/v3.5/spark-bundle/build.gradle.kts b/plugins/spark/v3.5/spark-bundle/build.gradle.kts index b144f51a27..4320916bb8 100644 --- a/plugins/spark/v3.5/spark-bundle/build.gradle.kts +++ b/plugins/spark/v3.5/spark-bundle/build.gradle.kts @@ -54,8 +54,8 @@ tasks.named("shadowJar") { exclude("LICENSE*") exclude("NOTICE*") - // add polaris customized LICENSE and NOTICE at top level. Note that the - // customized LICENSE and NOTICE file are called CUSTOM-LICENSE and CUSTOM-NOTICE, + // add polaris customized LICENSE and NOTICE for the bundle jar at top level. Note that the + // customized LICENSE and NOTICE file are called BUNDLE-LICENSE and BUNDLE-NOTICE, // and renamed to LICENSE and NOTICE after include, this is to avoid the file // being excluded due to the exclude pattern matching used above. from("${projectDir}/BUNDLE-LICENSE") { rename { "LICENSE" } } From f3de59dc43100b7c46b7b1928187c6515faf7813 Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Mon, 7 Jul 2025 10:54:08 -0700 Subject: [PATCH 04/10] add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd1a5340dd..bc86f93446 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti ### Upgrade notes ### Breaking changes +1. The Polaris Spark Client bundle jar name is updated from polaris-spark-_--bundle.jar to polaris-spark-bundle-_-.jar. ### New Features From 5cb3827e0aee01d65273f3d2c8b4d64bbb43267b Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Mon, 7 Jul 2025 17:57:44 -0700 Subject: [PATCH 05/10] fix spark client regtests --- plugins/spark/v3.5/regtests/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/spark/v3.5/regtests/run.sh b/plugins/spark/v3.5/regtests/run.sh index 1e60f28238..5b9121a725 100755 --- a/plugins/spark/v3.5/regtests/run.sh +++ b/plugins/spark/v3.5/regtests/run.sh @@ -81,7 +81,7 @@ for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do # extract the polaris JAR_NAME=$(basename "$JAR_PATH") echo "JAR_NAME=${JAR_NAME}" - POLARIS_VERSION=$(echo "$JAR_NAME" | sed -n 's/.*-\([0-9][^-]*.*\)-bundle\.jar/\1/p') + POLARIS_VERSION=$(echo "$JAR_NAME" | sed -E 's/^polaris-spark-bundle-[^_]+_[^-]+-([^\.]+.*)\.jar/\1/') echo "$POLARIS_VERSION" SPARK_EXISTS="TRUE" From 28a2abda2cfc6d4563309443e5e71042e998414f Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Wed, 9 Jul 2025 15:43:38 -0700 Subject: [PATCH 06/10] update PR --- CHANGELOG.md | 1 - .../publishing/PublishingHelperPlugin.kt | 5 ++ plugins/spark/v3.5/getting-started/README.md | 2 +- .../v3.5/getting-started/notebooks/Dockerfile | 2 +- .../notebooks/SparkPolaris.ipynb | 49 +++++++------ plugins/spark/v3.5/regtests/run.sh | 7 +- .../spark/v3.5/spark-bundle/build.gradle.kts | 71 ------------------- .../{spark-bundle => spark}/BUNDLE-LICENSE | 0 .../{spark-bundle => spark}/BUNDLE-NOTICE | 0 plugins/spark/v3.5/spark/build.gradle.kts | 32 +++++++++ settings.gradle.kts | 13 ++-- 11 files changed, 78 insertions(+), 104 deletions(-) delete mode 100644 plugins/spark/v3.5/spark-bundle/build.gradle.kts rename plugins/spark/v3.5/{spark-bundle => spark}/BUNDLE-LICENSE (100%) rename plugins/spark/v3.5/{spark-bundle => spark}/BUNDLE-NOTICE (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc86f93446..fd1a5340dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,6 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti ### Upgrade notes ### Breaking changes -1. The Polaris Spark Client bundle jar name is updated from polaris-spark-_--bundle.jar to polaris-spark-bundle-_-.jar. ### New Features diff --git a/build-logic/src/main/kotlin/publishing/PublishingHelperPlugin.kt b/build-logic/src/main/kotlin/publishing/PublishingHelperPlugin.kt index 04b04225e7..d4d412a30f 100644 --- a/build-logic/src/main/kotlin/publishing/PublishingHelperPlugin.kt +++ b/build-logic/src/main/kotlin/publishing/PublishingHelperPlugin.kt @@ -133,6 +133,11 @@ constructor(private val softwareComponentFactory: SoftwareComponentFactory) : Pl suppressPomMetadataWarningsFor("testFixturesApiElements") suppressPomMetadataWarningsFor("testFixturesRuntimeElements") + + if (project.tasks.findByName("createPolarisSparkJar") != null) { + // if the project contains spark client jar, also publish the jar to maven + artifact(project.tasks.named("createPolarisSparkJar").get()) + } } if ( diff --git a/plugins/spark/v3.5/getting-started/README.md b/plugins/spark/v3.5/getting-started/README.md index 2617870a9b..d5aa245baa 100644 --- a/plugins/spark/v3.5/getting-started/README.md +++ b/plugins/spark/v3.5/getting-started/README.md @@ -28,7 +28,7 @@ A Jupyter notebook is started to run PySpark, and Polaris Python client is also directly through Python Client. ## Build the Spark Client Jar and Polaris image -If Spark Client Jar is not presented locally under plugins/spark-bundle/v3.5/build//libs, please build the jar +If Spark Client Jar is not presented locally under plugins/spark/v3.5/build//libs, please build the jar using - `./gradlew assemble` -- build the Polaris project and skip the tests. diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 3f6cccb7d9..2af9412c60 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -38,7 +38,7 @@ USER jovyan COPY --chown=jovyan client /home/jovyan/client COPY --chown=jovyan regtests/requirements.txt /tmp -COPY --chown=jovyan plugins/spark/v3.5/spark-bundle/build/2.12/libs /home/jovyan/polaris_libs +COPY --chown=jovyan plugins/spark/v3.5/spark/build/2.12/libs /home/jovyan/polaris_libs RUN pip install -r /tmp/requirements.txt RUN cd client/python && poetry lock && \ python3 -m poetry install && \ diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index 02761f7167..1a827fa52a 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -54,7 +54,7 @@ { "data": { "text/plain": [ - "PolarisCatalog(type='INTERNAL', name='polaris_demo', properties=CatalogProperties(default_base_location='file:///tmp/polaris/', additional_properties={}), create_timestamp=1745882018864, last_update_timestamp=1745882018864, entity_version=1, storage_config_info=FileStorageConfigInfo(storage_type='FILE', allowed_locations=['file:///tmp', 'file:///tmp/polaris/']))" + "PolarisCatalog(type='INTERNAL', name='polaris_demo', properties=CatalogProperties(default_base_location='file:///tmp/polaris/', additional_properties={}), create_timestamp=1752100615397, last_update_timestamp=1752100615397, entity_version=1, storage_config_info=FileStorageConfigInfo(storage_type='FILE', allowed_locations=['file:///tmp', 'file:///tmp/polaris/']))" ] }, "execution_count": 2, @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "id": "fd13f24b-9d59-470d-9be1-660c22dde680", "metadata": { "tags": [] @@ -265,7 +265,7 @@ "from pyspark.sql import SparkSession\n", "\n", "spark = (SparkSession.builder\n", - " .config(\"spark.jars\", \"../polaris_libs/polaris-spark-bundle-3.5_2.12-1.1.0-incubating-SNAPSHOT.jar\")\n", + " .config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.1.0-incubating-SNAPSHOT-bundle.jar\") # TODO: add a way to automatically find the jar\n", " .config(\"spark.jars.packages\", \"org.apache.iceberg:iceberg-aws-bundle:1.9.0,io.delta:delta-spark_2.12:3.2.1\")\n", " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", " .config('spark.sql.iceberg.vectorization.enabled', 'false')\n", @@ -308,21 +308,25 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "id": "72e9e5fb-b22e-4d38-bb1e-4ca78c0d0f3e", "metadata": { "tags": [] }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---------+\n", - "|namespace|\n", - "+---------+\n", - "+---------+\n", - "\n" + "ename": "Py4JJavaError", + "evalue": "An error occurred while calling o57.sql.\n: org.apache.spark.SparkException: Cannot find catalog plugin class for catalog 'polaris': org.apache.polaris.spark.SparkCatalog.\n\tat org.apache.spark.sql.errors.QueryExecutionErrors$.catalogPluginClassNotFoundForCatalogError(QueryExecutionErrors.scala:1926)\n\tat org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:70)\n\tat org.apache.spark.sql.connector.catalog.CatalogManager.$anonfun$catalog$1(CatalogManager.scala:54)\n\tat scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)\n\tat org.apache.spark.sql.connector.catalog.CatalogManager.catalog(CatalogManager.scala:54)\n\tat org.apache.spark.sql.connector.catalog.LookupCatalog$CatalogAndNamespace$.unapply(LookupCatalog.scala:86)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:51)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:30)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$2(AnalysisHelper.scala:170)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:170)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$4(AnalysisHelper.scala:175)\n\tat org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1216)\n\tat org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1215)\n\tat org.apache.spark.sql.catalyst.plans.logical.SetCatalogAndNamespace.mapChildren(v2Commands.scala:941)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:175)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning(AnalysisHelper.scala:99)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning$(AnalysisHelper.scala:96)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:75)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:30)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:27)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)\n\tat scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)\n\tat scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)\n\tat scala.collection.immutable.List.foldLeft(List.scala:91)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)\n\tat scala.collection.immutable.List.foreach(List.scala:431)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)\n\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\nCaused by: java.lang.ClassNotFoundException: org.apache.polaris.spark.SparkCatalog\n\tat java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:445)\n\tat java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:592)\n\tat java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:525)\n\tat org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:60)\n\t... 78 more\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUSE polaris\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m spark\u001b[38;5;241m.\u001b[39msql(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSHOW NAMESPACES\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m/opt/spark/python/pyspark/sql/session.py:1631\u001b[0m, in \u001b[0;36mSparkSession.sql\u001b[0;34m(self, sqlQuery, args, **kwargs)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1628\u001b[0m litArgs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm\u001b[38;5;241m.\u001b[39mPythonUtils\u001b[38;5;241m.\u001b[39mtoArray(\n\u001b[1;32m 1629\u001b[0m [_to_java_column(lit(v)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m (args \u001b[38;5;129;01mor\u001b[39;00m [])]\n\u001b[1;32m 1630\u001b[0m )\n\u001b[0;32m-> 1631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jsparkSession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msqlQuery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlitArgs\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 1632\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(kwargs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", + "File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1318\u001b[0m args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1319\u001b[0m proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m 1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1323\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", + "File \u001b[0;32m/opt/spark/python/pyspark/errors/exceptions/captured.py:179\u001b[0m, in \u001b[0;36mcapture_sql_exception..deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m Py4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 181\u001b[0m converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n", + "File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[0;32m--> 326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[1;32m 327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[1;32m 331\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n", + "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o57.sql.\n: org.apache.spark.SparkException: Cannot find catalog plugin class for catalog 'polaris': org.apache.polaris.spark.SparkCatalog.\n\tat org.apache.spark.sql.errors.QueryExecutionErrors$.catalogPluginClassNotFoundForCatalogError(QueryExecutionErrors.scala:1926)\n\tat org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:70)\n\tat org.apache.spark.sql.connector.catalog.CatalogManager.$anonfun$catalog$1(CatalogManager.scala:54)\n\tat scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)\n\tat org.apache.spark.sql.connector.catalog.CatalogManager.catalog(CatalogManager.scala:54)\n\tat org.apache.spark.sql.connector.catalog.LookupCatalog$CatalogAndNamespace$.unapply(LookupCatalog.scala:86)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:51)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:30)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$2(AnalysisHelper.scala:170)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:170)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$4(AnalysisHelper.scala:175)\n\tat org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1216)\n\tat org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1215)\n\tat org.apache.spark.sql.catalyst.plans.logical.SetCatalogAndNamespace.mapChildren(v2Commands.scala:941)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:175)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning(AnalysisHelper.scala:99)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning$(AnalysisHelper.scala:96)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:75)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:30)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:27)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)\n\tat scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)\n\tat scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)\n\tat scala.collection.immutable.List.foldLeft(List.scala:91)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)\n\tat scala.collection.immutable.List.foreach(List.scala:431)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)\n\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\nCaused by: java.lang.ClassNotFoundException: org.apache.polaris.spark.SparkCatalog\n\tat java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:445)\n\tat java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:592)\n\tat java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:525)\n\tat org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:60)\n\t... 78 more\n" ] } ], @@ -348,15 +352,18 @@ }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---------------+\n", - "| namespace|\n", - "+---------------+\n", - "|DELTA_NS.PUBLIC|\n", - "+---------------+\n", - "\n" + "ename": "Py4JJavaError", + "evalue": "An error occurred while calling o57.sql.\n: org.apache.spark.SparkException: [INTERNAL_ERROR] Undefined error message parameter for error class: '_LEGACY_ERROR_TEMP_1055'. Parameters: Map(database -> DELTA_NS.PUBLIC)\n\tat org.apache.spark.SparkException$.internalError(SparkException.scala:92)\n\tat org.apache.spark.SparkException$.internalError(SparkException.scala:96)\n\tat org.apache.spark.ErrorClassesJsonReader.getErrorMessage(ErrorClassesJSONReader.scala:56)\n\tat org.apache.spark.SparkThrowableHelper$.getMessage(SparkThrowableHelper.scala:53)\n\tat org.apache.spark.SparkThrowableHelper$.getMessage(SparkThrowableHelper.scala:40)\n\tat org.apache.spark.sql.AnalysisException.(AnalysisException.scala:47)\n\tat org.apache.spark.sql.AnalysisException.(AnalysisException.scala:70)\n\tat org.apache.spark.sql.errors.QueryCompilationErrors$.invalidDatabaseNameError(QueryCompilationErrors.scala:875)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$DatabaseNameInSessionCatalog$.unapply(ResolveSessionCatalog.scala:691)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:235)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:52)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$3(AnalysisHelper.scala:138)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$1(AnalysisHelper.scala:138)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning(AnalysisHelper.scala:134)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning$(AnalysisHelper.scala:130)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUpWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:111)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:110)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.apply(ResolveSessionCatalog.scala:52)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.apply(ResolveSessionCatalog.scala:46)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)\n\tat scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)\n\tat scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)\n\tat scala.collection.immutable.List.foldLeft(List.scala:91)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)\n\tat scala.collection.immutable.List.foreach(List.scala:431)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)\n\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m spark\u001b[38;5;241m.\u001b[39msql(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCREATE NAMESPACE IF NOT EXISTS DELTA_NS\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[43mspark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCREATE NAMESPACE IF NOT EXISTS DELTA_NS.PUBLIC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m spark\u001b[38;5;241m.\u001b[39msql(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSHOW NAMESPACES IN DELTA_NS\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mshow()\n", + "File \u001b[0;32m/opt/spark/python/pyspark/sql/session.py:1631\u001b[0m, in \u001b[0;36mSparkSession.sql\u001b[0;34m(self, sqlQuery, args, **kwargs)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1628\u001b[0m litArgs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm\u001b[38;5;241m.\u001b[39mPythonUtils\u001b[38;5;241m.\u001b[39mtoArray(\n\u001b[1;32m 1629\u001b[0m [_to_java_column(lit(v)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m (args \u001b[38;5;129;01mor\u001b[39;00m [])]\n\u001b[1;32m 1630\u001b[0m )\n\u001b[0;32m-> 1631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jsparkSession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msqlQuery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlitArgs\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 1632\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(kwargs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", + "File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1318\u001b[0m args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1319\u001b[0m proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m 1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1323\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", + "File \u001b[0;32m/opt/spark/python/pyspark/errors/exceptions/captured.py:179\u001b[0m, in \u001b[0;36mcapture_sql_exception..deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m Py4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 181\u001b[0m converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n", + "File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[0;32m--> 326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[1;32m 327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[1;32m 331\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n", + "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o57.sql.\n: org.apache.spark.SparkException: [INTERNAL_ERROR] Undefined error message parameter for error class: '_LEGACY_ERROR_TEMP_1055'. Parameters: Map(database -> DELTA_NS.PUBLIC)\n\tat org.apache.spark.SparkException$.internalError(SparkException.scala:92)\n\tat org.apache.spark.SparkException$.internalError(SparkException.scala:96)\n\tat org.apache.spark.ErrorClassesJsonReader.getErrorMessage(ErrorClassesJSONReader.scala:56)\n\tat org.apache.spark.SparkThrowableHelper$.getMessage(SparkThrowableHelper.scala:53)\n\tat org.apache.spark.SparkThrowableHelper$.getMessage(SparkThrowableHelper.scala:40)\n\tat org.apache.spark.sql.AnalysisException.(AnalysisException.scala:47)\n\tat org.apache.spark.sql.AnalysisException.(AnalysisException.scala:70)\n\tat org.apache.spark.sql.errors.QueryCompilationErrors$.invalidDatabaseNameError(QueryCompilationErrors.scala:875)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$DatabaseNameInSessionCatalog$.unapply(ResolveSessionCatalog.scala:691)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:235)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:52)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$3(AnalysisHelper.scala:138)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$1(AnalysisHelper.scala:138)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning(AnalysisHelper.scala:134)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning$(AnalysisHelper.scala:130)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUpWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:111)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:110)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.apply(ResolveSessionCatalog.scala:52)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.apply(ResolveSessionCatalog.scala:46)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)\n\tat scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)\n\tat scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)\n\tat scala.collection.immutable.List.foldLeft(List.scala:91)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)\n\tat scala.collection.immutable.List.foreach(List.scala:431)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)\n\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n" ] } ], diff --git a/plugins/spark/v3.5/regtests/run.sh b/plugins/spark/v3.5/regtests/run.sh index 5b9121a725..90c2dd7555 100755 --- a/plugins/spark/v3.5/regtests/run.sh +++ b/plugins/spark/v3.5/regtests/run.sh @@ -73,15 +73,14 @@ SPARK_SHELL_OPTIONS=("PACKAGE" "JAR") for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}" # find the project jar - SPARK_DIR=${SPARK_ROOT_DIR}/spark-bundle - SRC_JAR_PATH=$(find ${SPARK_DIR} -name "polaris-spark-bundle-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.*-sources.jar" -print -quit) - JAR_PATH=${SRC_JAR_PATH/-sources/} + SPARK_DIR=${SPARK_ROOT_DIR}/spark + JAR_PATH=$(find ${SPARK_DIR} -name "polaris-spark-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.*-bundle.jar" -print -quit) echo "find jar ${JAR_PATH}" # extract the polaris JAR_NAME=$(basename "$JAR_PATH") echo "JAR_NAME=${JAR_NAME}" - POLARIS_VERSION=$(echo "$JAR_NAME" | sed -E 's/^polaris-spark-bundle-[^_]+_[^-]+-([^\.]+.*)\.jar/\1/') + POLARIS_VERSION=$(echo "$JAR_NAME" | sed -n 's/.*-\([0-9][^-]*.*\)-bundle\.jar/\1/p') echo "$POLARIS_VERSION" SPARK_EXISTS="TRUE" diff --git a/plugins/spark/v3.5/spark-bundle/build.gradle.kts b/plugins/spark/v3.5/spark-bundle/build.gradle.kts deleted file mode 100644 index 4320916bb8..0000000000 --- a/plugins/spark/v3.5/spark-bundle/build.gradle.kts +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar - -plugins { - id("polaris-client") - id("com.gradleup.shadow") -} - -// get version information -val sparkMajorVersion = "3.5" -val scalaVersion = getAndUseScalaVersionForProject() -val icebergVersion = pluginlibs.versions.iceberg.get() -val spark35Version = pluginlibs.versions.spark35.get() - -val scalaLibraryVersion = - if (scalaVersion == "2.12") { - pluginlibs.versions.scala212.get() - } else { - pluginlibs.versions.scala213.get() - } - -dependencies { implementation(project(":polaris-spark-${sparkMajorVersion}_${scalaVersion}")) } - -tasks.named("shadowJar") { - archiveClassifier = null - isZip64 = true - - // pack all the dependencies into an uber jar - configurations = listOf(project.configurations.runtimeClasspath.get()) - - // recursively remove all LICENSE and NOTICE file under META-INF, includes - // directories contains 'license' in the name - exclude("META-INF/**/*LICENSE*") - exclude("META-INF/**/*NOTICE*") - // exclude the top level LICENSE, LICENSE-*.txt and NOTICE - exclude("LICENSE*") - exclude("NOTICE*") - - // add polaris customized LICENSE and NOTICE for the bundle jar at top level. Note that the - // customized LICENSE and NOTICE file are called BUNDLE-LICENSE and BUNDLE-NOTICE, - // and renamed to LICENSE and NOTICE after include, this is to avoid the file - // being excluded due to the exclude pattern matching used above. - from("${projectDir}/BUNDLE-LICENSE") { rename { "LICENSE" } } - from("${projectDir}/BUNDLE-NOTICE") { rename { "NOTICE" } } -} - -// ensure the shadow jar job (which will automatically run license addition) is run for both -// `assemble` and `build` task -tasks.named("assemble") { dependsOn("shadowJar") } - -tasks.named("build") { dependsOn("shadowJar") } - -tasks.named("jar") { enabled = false } diff --git a/plugins/spark/v3.5/spark-bundle/BUNDLE-LICENSE b/plugins/spark/v3.5/spark/BUNDLE-LICENSE similarity index 100% rename from plugins/spark/v3.5/spark-bundle/BUNDLE-LICENSE rename to plugins/spark/v3.5/spark/BUNDLE-LICENSE diff --git a/plugins/spark/v3.5/spark-bundle/BUNDLE-NOTICE b/plugins/spark/v3.5/spark/BUNDLE-NOTICE similarity index 100% rename from plugins/spark/v3.5/spark-bundle/BUNDLE-NOTICE rename to plugins/spark/v3.5/spark/BUNDLE-NOTICE diff --git a/plugins/spark/v3.5/spark/build.gradle.kts b/plugins/spark/v3.5/spark/build.gradle.kts index 42f13728cb..5a84dad93b 100644 --- a/plugins/spark/v3.5/spark/build.gradle.kts +++ b/plugins/spark/v3.5/spark/build.gradle.kts @@ -1,3 +1,5 @@ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -78,3 +80,33 @@ dependencies { exclude("org.slf4j", "jul-to-slf4j") } } + +tasks.register("createPolarisSparkJar") { + archiveClassifier = "bundle" + isZip64 = true + + // pack both the source code and dependencies + from(sourceSets.main.get().output) + configurations = listOf(project.configurations.runtimeClasspath.get()) + + // recursively remove all LICENSE and NOTICE file under META-INF, includes + // directories contains 'license' in the name + exclude("META-INF/**/*LICENSE*") + exclude("META-INF/**/*NOTICE*") + // exclude the top level LICENSE, LICENSE-*.txt and NOTICE + exclude("LICENSE*") + exclude("NOTICE*") + + // add polaris customized LICENSE and NOTICE for the bundle jar at top level. Note that the + // customized LICENSE and NOTICE file are called BUNDLE-LICENSE and BUNDLE-NOTICE, + // and renamed to LICENSE and NOTICE after include, this is to avoid the file + // being excluded due to the exclude pattern matching used above. + from("${projectDir}/BUNDLE-LICENSE") { rename { "LICENSE" } } + from("${projectDir}/BUNDLE-NOTICE") { rename { "NOTICE" } } +} + +// ensure the shadow jar job (which will automatically run license addition) is run for both +// `assemble` and `build` task +tasks.named("assemble") { dependsOn("createPolarisSparkJar") } + +tasks.named("build") { dependsOn("createPolarisSparkJar") } diff --git a/settings.gradle.kts b/settings.gradle.kts index d212423040..cbfbc3a269 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -73,16 +73,19 @@ for (sparkVersion in sparkVersions) { for (scalaVersion in scalaVersions) { val sparkArtifactId = "polaris-spark-${sparkVersion}_${scalaVersion}" val sparkIntArtifactId = "polaris-spark-integration-${sparkVersion}_${scalaVersion}" - val sparkBundleArtifactId = "polaris-spark-bundle-${sparkVersion}_${scalaVersion}" - polarisProject(sparkArtifactId, file("${polarisSparkDir}/v${sparkVersion}/spark")) - polarisProject(sparkIntArtifactId, file("${polarisSparkDir}/v${sparkVersion}/integration")) - polarisProject(sparkBundleArtifactId, file("${polarisSparkDir}/v${sparkVersion}/spark-bundle")) + polarisProject( + "polaris-spark-${sparkVersion}_${scalaVersion}", + file("${polarisSparkDir}/v${sparkVersion}/spark"), + ) + polarisProject( + "polaris-spark-integration-${sparkVersion}_${scalaVersion}", + file("${polarisSparkDir}/v${sparkVersion}/integration"), + ) if (first) { first = false } else { noSourceChecksProjects.add(":$sparkArtifactId") noSourceChecksProjects.add(":$sparkIntArtifactId") - noSourceChecksProjects.add(":$sparkBundleArtifactId") } // Skip all duplicated spark client projects while using Intelij IDE. // This is to avoid problems during dependency analysis and sync when From 5118ddf7c2b10e34383f56bb6286ae930ccace80 Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Wed, 9 Jul 2025 15:49:25 -0700 Subject: [PATCH 07/10] update readme --- plugins/spark/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/spark/README.md b/plugins/spark/README.md index c2056d8040..9764fc8d15 100644 --- a/plugins/spark/README.md +++ b/plugins/spark/README.md @@ -87,13 +87,13 @@ bin/spark-shell \ ``` # Build and run with Polaris spark bundle JAR -The polaris-spark-bundle project is used to build the Polaris Spark bundle JAR. The resulting JAR will follow this naming format: -polaris-spark-bundle-_-.jar +The polaris-spark project also provides a Spark bundle JAR for the `--jars` use case. The resulting JAR will follow this naming format: +polaris-spark-_--bundle.jar For example: -polaris-spark-bundle-3.5_2.12-1.1.0-incubating-SNAPSHOT.jar +polaris-spark-bundle-3.5_2.12-1.1.0-incubating-SNAPSHOT-bundle.jar Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, -the bundle JAR can be found under: plugins/spark/v3.5/spark-bundle/build//libs/. +the bundle JAR can be found under: plugins/spark/v3.5/spark/build//libs/. To start Spark using the bundle JAR, specify it with the `--jars` option as shown below: ```shell From 79d94ae6440b416ce3e7a1b325c1cb990f653802 Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Wed, 9 Jul 2025 15:52:25 -0700 Subject: [PATCH 08/10] fix notebook --- .../notebooks/SparkPolaris.ipynb | 83 +++++++++---------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index 1a827fa52a..a01b6d8e94 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -54,7 +54,7 @@ { "data": { "text/plain": [ - "PolarisCatalog(type='INTERNAL', name='polaris_demo', properties=CatalogProperties(default_base_location='file:///tmp/polaris/', additional_properties={}), create_timestamp=1752100615397, last_update_timestamp=1752100615397, entity_version=1, storage_config_info=FileStorageConfigInfo(storage_type='FILE', allowed_locations=['file:///tmp', 'file:///tmp/polaris/']))" + "PolarisCatalog(type='INTERNAL', name='polaris_demo', properties=CatalogProperties(default_base_location='file:///tmp/polaris/', additional_properties={}), create_timestamp=1752101488740, last_update_timestamp=1752101488740, entity_version=1, storage_config_info=FileStorageConfigInfo(storage_type='FILE', allowed_locations=['file:///tmp', 'file:///tmp/polaris/']))" ] }, "execution_count": 2, @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "id": "fd13f24b-9d59-470d-9be1-660c22dde680", "metadata": { "tags": [] @@ -308,25 +308,21 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 7, "id": "72e9e5fb-b22e-4d38-bb1e-4ca78c0d0f3e", "metadata": { "tags": [] }, "outputs": [ { - "ename": "Py4JJavaError", - "evalue": "An error occurred while calling o57.sql.\n: org.apache.spark.SparkException: Cannot find catalog plugin class for catalog 'polaris': org.apache.polaris.spark.SparkCatalog.\n\tat org.apache.spark.sql.errors.QueryExecutionErrors$.catalogPluginClassNotFoundForCatalogError(QueryExecutionErrors.scala:1926)\n\tat org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:70)\n\tat org.apache.spark.sql.connector.catalog.CatalogManager.$anonfun$catalog$1(CatalogManager.scala:54)\n\tat scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)\n\tat org.apache.spark.sql.connector.catalog.CatalogManager.catalog(CatalogManager.scala:54)\n\tat org.apache.spark.sql.connector.catalog.LookupCatalog$CatalogAndNamespace$.unapply(LookupCatalog.scala:86)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:51)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:30)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$2(AnalysisHelper.scala:170)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:170)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$4(AnalysisHelper.scala:175)\n\tat org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1216)\n\tat org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1215)\n\tat org.apache.spark.sql.catalyst.plans.logical.SetCatalogAndNamespace.mapChildren(v2Commands.scala:941)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:175)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning(AnalysisHelper.scala:99)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning$(AnalysisHelper.scala:96)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:75)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:30)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:27)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)\n\tat scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)\n\tat scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)\n\tat scala.collection.immutable.List.foldLeft(List.scala:91)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)\n\tat scala.collection.immutable.List.foreach(List.scala:431)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)\n\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\nCaused by: java.lang.ClassNotFoundException: org.apache.polaris.spark.SparkCatalog\n\tat java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:445)\n\tat java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:592)\n\tat java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:525)\n\tat org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:60)\n\t... 78 more\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUSE polaris\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m spark\u001b[38;5;241m.\u001b[39msql(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSHOW NAMESPACES\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m/opt/spark/python/pyspark/sql/session.py:1631\u001b[0m, in \u001b[0;36mSparkSession.sql\u001b[0;34m(self, sqlQuery, args, **kwargs)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1628\u001b[0m litArgs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm\u001b[38;5;241m.\u001b[39mPythonUtils\u001b[38;5;241m.\u001b[39mtoArray(\n\u001b[1;32m 1629\u001b[0m [_to_java_column(lit(v)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m (args \u001b[38;5;129;01mor\u001b[39;00m [])]\n\u001b[1;32m 1630\u001b[0m )\n\u001b[0;32m-> 1631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jsparkSession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msqlQuery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlitArgs\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 1632\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(kwargs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", - "File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1318\u001b[0m args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1319\u001b[0m proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m 1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1323\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", - "File \u001b[0;32m/opt/spark/python/pyspark/errors/exceptions/captured.py:179\u001b[0m, in \u001b[0;36mcapture_sql_exception..deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m Py4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 181\u001b[0m converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n", - "File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[0;32m--> 326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[1;32m 327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[1;32m 331\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n", - "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o57.sql.\n: org.apache.spark.SparkException: Cannot find catalog plugin class for catalog 'polaris': org.apache.polaris.spark.SparkCatalog.\n\tat org.apache.spark.sql.errors.QueryExecutionErrors$.catalogPluginClassNotFoundForCatalogError(QueryExecutionErrors.scala:1926)\n\tat org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:70)\n\tat org.apache.spark.sql.connector.catalog.CatalogManager.$anonfun$catalog$1(CatalogManager.scala:54)\n\tat scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)\n\tat org.apache.spark.sql.connector.catalog.CatalogManager.catalog(CatalogManager.scala:54)\n\tat org.apache.spark.sql.connector.catalog.LookupCatalog$CatalogAndNamespace$.unapply(LookupCatalog.scala:86)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:51)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:30)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$2(AnalysisHelper.scala:170)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:170)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$4(AnalysisHelper.scala:175)\n\tat org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1216)\n\tat org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1215)\n\tat org.apache.spark.sql.catalyst.plans.logical.SetCatalogAndNamespace.mapChildren(v2Commands.scala:941)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:175)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning(AnalysisHelper.scala:99)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning$(AnalysisHelper.scala:96)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:75)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:30)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:27)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)\n\tat scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)\n\tat scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)\n\tat scala.collection.immutable.List.foldLeft(List.scala:91)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)\n\tat scala.collection.immutable.List.foreach(List.scala:431)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)\n\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\nCaused by: java.lang.ClassNotFoundException: org.apache.polaris.spark.SparkCatalog\n\tat java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:445)\n\tat java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:592)\n\tat java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:525)\n\tat org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:60)\n\t... 78 more\n" + "name": "stdout", + "output_type": "stream", + "text": [ + "+---------+\n", + "|namespace|\n", + "+---------+\n", + "+---------+\n", + "\n" ] } ], @@ -352,18 +348,15 @@ }, "outputs": [ { - "ename": "Py4JJavaError", - "evalue": "An error occurred while calling o57.sql.\n: org.apache.spark.SparkException: [INTERNAL_ERROR] Undefined error message parameter for error class: '_LEGACY_ERROR_TEMP_1055'. Parameters: Map(database -> DELTA_NS.PUBLIC)\n\tat org.apache.spark.SparkException$.internalError(SparkException.scala:92)\n\tat org.apache.spark.SparkException$.internalError(SparkException.scala:96)\n\tat org.apache.spark.ErrorClassesJsonReader.getErrorMessage(ErrorClassesJSONReader.scala:56)\n\tat org.apache.spark.SparkThrowableHelper$.getMessage(SparkThrowableHelper.scala:53)\n\tat org.apache.spark.SparkThrowableHelper$.getMessage(SparkThrowableHelper.scala:40)\n\tat org.apache.spark.sql.AnalysisException.(AnalysisException.scala:47)\n\tat org.apache.spark.sql.AnalysisException.(AnalysisException.scala:70)\n\tat org.apache.spark.sql.errors.QueryCompilationErrors$.invalidDatabaseNameError(QueryCompilationErrors.scala:875)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$DatabaseNameInSessionCatalog$.unapply(ResolveSessionCatalog.scala:691)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:235)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:52)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$3(AnalysisHelper.scala:138)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$1(AnalysisHelper.scala:138)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning(AnalysisHelper.scala:134)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning$(AnalysisHelper.scala:130)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUpWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:111)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:110)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.apply(ResolveSessionCatalog.scala:52)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.apply(ResolveSessionCatalog.scala:46)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)\n\tat scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)\n\tat scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)\n\tat scala.collection.immutable.List.foldLeft(List.scala:91)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)\n\tat scala.collection.immutable.List.foreach(List.scala:431)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)\n\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m spark\u001b[38;5;241m.\u001b[39msql(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCREATE NAMESPACE IF NOT EXISTS DELTA_NS\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[43mspark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCREATE NAMESPACE IF NOT EXISTS DELTA_NS.PUBLIC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m spark\u001b[38;5;241m.\u001b[39msql(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSHOW NAMESPACES IN DELTA_NS\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mshow()\n", - "File \u001b[0;32m/opt/spark/python/pyspark/sql/session.py:1631\u001b[0m, in \u001b[0;36mSparkSession.sql\u001b[0;34m(self, sqlQuery, args, **kwargs)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1628\u001b[0m litArgs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm\u001b[38;5;241m.\u001b[39mPythonUtils\u001b[38;5;241m.\u001b[39mtoArray(\n\u001b[1;32m 1629\u001b[0m [_to_java_column(lit(v)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m (args \u001b[38;5;129;01mor\u001b[39;00m [])]\n\u001b[1;32m 1630\u001b[0m )\n\u001b[0;32m-> 1631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jsparkSession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msqlQuery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlitArgs\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 1632\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(kwargs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", - "File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1318\u001b[0m args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1319\u001b[0m proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m 1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1323\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", - "File \u001b[0;32m/opt/spark/python/pyspark/errors/exceptions/captured.py:179\u001b[0m, in \u001b[0;36mcapture_sql_exception..deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m Py4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 181\u001b[0m converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n", - "File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[0;32m--> 326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[1;32m 327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[1;32m 331\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n", - "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o57.sql.\n: org.apache.spark.SparkException: [INTERNAL_ERROR] Undefined error message parameter for error class: '_LEGACY_ERROR_TEMP_1055'. Parameters: Map(database -> DELTA_NS.PUBLIC)\n\tat org.apache.spark.SparkException$.internalError(SparkException.scala:92)\n\tat org.apache.spark.SparkException$.internalError(SparkException.scala:96)\n\tat org.apache.spark.ErrorClassesJsonReader.getErrorMessage(ErrorClassesJSONReader.scala:56)\n\tat org.apache.spark.SparkThrowableHelper$.getMessage(SparkThrowableHelper.scala:53)\n\tat org.apache.spark.SparkThrowableHelper$.getMessage(SparkThrowableHelper.scala:40)\n\tat org.apache.spark.sql.AnalysisException.(AnalysisException.scala:47)\n\tat org.apache.spark.sql.AnalysisException.(AnalysisException.scala:70)\n\tat org.apache.spark.sql.errors.QueryCompilationErrors$.invalidDatabaseNameError(QueryCompilationErrors.scala:875)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$DatabaseNameInSessionCatalog$.unapply(ResolveSessionCatalog.scala:691)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:235)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:52)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$3(AnalysisHelper.scala:138)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$1(AnalysisHelper.scala:138)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning(AnalysisHelper.scala:134)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning$(AnalysisHelper.scala:130)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUpWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:111)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:110)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.apply(ResolveSessionCatalog.scala:52)\n\tat org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.apply(ResolveSessionCatalog.scala:46)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)\n\tat scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)\n\tat scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)\n\tat scala.collection.immutable.List.foldLeft(List.scala:91)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)\n\tat scala.collection.immutable.List.foreach(List.scala:431)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)\n\tat org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)\n\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)\n\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)\n\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n" + "name": "stdout", + "output_type": "stream", + "text": [ + "+---------------+\n", + "| namespace|\n", + "+---------------+\n", + "|DELTA_NS.PUBLIC|\n", + "+---------------+\n", + "\n" ] } ], @@ -383,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "4abc8426-7f2a-4f3f-9e26-1f1824f870c6", "metadata": { "tags": [] @@ -395,7 +388,7 @@ "DataFrame[]" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -412,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "13356e64-23ca-4804-a1b9-e9f57f4d14ca", "metadata": {}, "outputs": [ @@ -443,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "ff5a466d-6a67-4f42-a6a6-ac54ec258e54", "metadata": { "tags": [] @@ -453,10 +446,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "+---+----+\n", - "| id|name|\n", - "+---+----+\n", - "+---+----+\n", + "+---+------+\n", + "| id| name|\n", + "+---+------+\n", + "| 3|jonath|\n", + "| 1| anna|\n", + "| 2| bob|\n", + "+---+------+\n", "\n" ] } @@ -476,7 +472,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "d7ab2991-6de9-4105-9f95-4c9f1c18f426", "metadata": { "tags": [] @@ -490,8 +486,11 @@ "| id| name|\n", "+---+------+\n", "| 3|jonath|\n", + "| 3|jonath|\n", + "| 1| anna|\n", "| 1| anna|\n", "| 2| bob|\n", + "| 2| bob|\n", "+---+------+\n", "\n" ] @@ -512,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "8eb5fd0c-20d4-42ce-a823-b6ae43f58313", "metadata": {}, "outputs": [ @@ -522,7 +521,7 @@ "DataFrame[]" ] }, - "execution_count": 14, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -534,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "54904fd6-96b4-4198-b5b7-2b6a9e6eea1f", "metadata": {}, "outputs": [ @@ -566,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "b1e639be-5a3a-41c6-a782-dd939bc2eea4", "metadata": {}, "outputs": [ @@ -602,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "6f3aac79-bf45-4603-bd64-30eeab4bdfa7", "metadata": {}, "outputs": [ @@ -612,7 +611,7 @@ "DataFrame[]" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } From d47466856abcfff5ae661f329ec8b68d5e8e942c Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Wed, 9 Jul 2025 15:54:50 -0700 Subject: [PATCH 09/10] fix notebook change --- .../notebooks/SparkPolaris.ipynb | 40 ++++++++----------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index a01b6d8e94..8974a81e28 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -54,7 +54,7 @@ { "data": { "text/plain": [ - "PolarisCatalog(type='INTERNAL', name='polaris_demo', properties=CatalogProperties(default_base_location='file:///tmp/polaris/', additional_properties={}), create_timestamp=1752101488740, last_update_timestamp=1752101488740, entity_version=1, storage_config_info=FileStorageConfigInfo(storage_type='FILE', allowed_locations=['file:///tmp', 'file:///tmp/polaris/']))" + "PolarisCatalog(type='INTERNAL', name='polaris_demo', properties=CatalogProperties(default_base_location='file:///tmp/polaris/', additional_properties={}), create_timestamp=1745882018864, last_update_timestamp=1745882018864, entity_version=1, storage_config_info=FileStorageConfigInfo(storage_type='FILE', allowed_locations=['file:///tmp', 'file:///tmp/polaris/']))" ] }, "execution_count": 2, @@ -265,7 +265,7 @@ "from pyspark.sql import SparkSession\n", "\n", "spark = (SparkSession.builder\n", - " .config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.1.0-incubating-SNAPSHOT-bundle.jar\") # TODO: add a way to automatically find the jar\n", + " .config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.1.0-incubating-SNAPSHOT-bundle.jar\") # TODO: add a way to automatically discover the Jar\n", " .config(\"spark.jars.packages\", \"org.apache.iceberg:iceberg-aws-bundle:1.9.0,io.delta:delta-spark_2.12:3.2.1\")\n", " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", " .config('spark.sql.iceberg.vectorization.enabled', 'false')\n", @@ -376,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "4abc8426-7f2a-4f3f-9e26-1f1824f870c6", "metadata": { "tags": [] @@ -388,7 +388,7 @@ "DataFrame[]" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -405,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "13356e64-23ca-4804-a1b9-e9f57f4d14ca", "metadata": {}, "outputs": [ @@ -436,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "ff5a466d-6a67-4f42-a6a6-ac54ec258e54", "metadata": { "tags": [] @@ -446,13 +446,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "+---+------+\n", - "| id| name|\n", - "+---+------+\n", - "| 3|jonath|\n", - "| 1| anna|\n", - "| 2| bob|\n", - "+---+------+\n", + "+---+----+\n", + "| id|name|\n", + "+---+----+\n", + "+---+----+\n", "\n" ] } @@ -472,7 +469,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "d7ab2991-6de9-4105-9f95-4c9f1c18f426", "metadata": { "tags": [] @@ -486,10 +483,7 @@ "| id| name|\n", "+---+------+\n", "| 3|jonath|\n", - "| 3|jonath|\n", "| 1| anna|\n", - "| 1| anna|\n", - "| 2| bob|\n", "| 2| bob|\n", "+---+------+\n", "\n" @@ -511,7 +505,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "8eb5fd0c-20d4-42ce-a823-b6ae43f58313", "metadata": {}, "outputs": [ @@ -521,7 +515,7 @@ "DataFrame[]" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -533,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "54904fd6-96b4-4198-b5b7-2b6a9e6eea1f", "metadata": {}, "outputs": [ @@ -565,7 +559,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "b1e639be-5a3a-41c6-a782-dd939bc2eea4", "metadata": {}, "outputs": [ @@ -601,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "6f3aac79-bf45-4603-bd64-30eeab4bdfa7", "metadata": {}, "outputs": [ @@ -611,7 +605,7 @@ "DataFrame[]" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } From 1239a9e639c254ae5c3677beb2c42a82d3087e4b Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Wed, 9 Jul 2025 16:06:44 -0700 Subject: [PATCH 10/10] fix oder --- plugins/spark/v3.5/spark/build.gradle.kts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/spark/v3.5/spark/build.gradle.kts b/plugins/spark/v3.5/spark/build.gradle.kts index 5a84dad93b..45af3b6f93 100644 --- a/plugins/spark/v3.5/spark/build.gradle.kts +++ b/plugins/spark/v3.5/spark/build.gradle.kts @@ -1,5 +1,3 @@ -import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +17,8 @@ import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar * under the License. */ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + plugins { id("polaris-client") } checkstyle {