Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 70 additions & 27 deletions regtests/run_spark_sql.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,32 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# -----------------------------------------------------------------------------
# Purpose: Launch the Spark SQL shell to interact with Polaris.
# -----------------------------------------------------------------------------
#
# Usage:
# ./run_spark_sql.sh [S3-location AWS-IAM-role]
#
# Description:
# - Without arguments: Runs against a catalog backed by the local filesystem.
# - With two arguments: Runs against a catalog backed by AWS S3.
# - [S3-location] - The S3 path to use as the default base location for the catalog.
# - [AWS-IAM-role] - The AWS IAM role for catalog to assume when accessing the S3 location.
#
# Run this to open an interactive spark-sql shell talking to a catalog named "manual_spark"
# Examples:
# - Run against local filesystem:
# ./run_spark_sql.sh
#
# You must run 'use polaris;' as your first query in the spark-sql shell.
# - Run against AWS S3:
# ./run_spark_sql.sh s3://my-bucket/path arn:aws:iam::123456789001:role/my-role

if [ $# -ne 0 ] && [ $# -ne 2 ]; then
echo "run_spark_sql.sh only accepts 0 or 2 arguments"
echo "Usage: ./run_spark_sql.sh [S3-location AWS-IAM-role]"
exit 1
fi

REGTEST_HOME=$(dirname $(realpath $0))
cd ${REGTEST_HOME}
Expand All @@ -36,45 +57,67 @@ fi

SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}"

# Use local filesystem by default
curl -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
-d '{
"catalog": {
"name": "manual_spark",
"type": "INTERNAL",
"readOnly": false,
"properties": {
"default-base-location": "file:///tmp/polaris/"
},
"storageConfigInfo": {
"storageType": "FILE",
"allowedLocations": [
"file:///tmp"
]
}
}
}'
if [ $# -eq 0 ]; then
# create a catalog backed by the local filesystem
curl -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
-d '{
"catalog": {
"name": "manual_spark",
"type": "INTERNAL",
"readOnly": false,
"properties": {
"default-base-location": "file:///tmp/polaris/"
},
"storageConfigInfo": {
"storageType": "FILE",
"allowedLocations": [
"file:///tmp"
]
}
}
}'

elif [ $# -eq 2 ]; then
# create a catalog backed by S3
S3_LOCATION=$1
AWS_IAM_ROLE=$2

# Use the following instead of below to use s3 instead of local filesystem
#curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
# http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
# -d "{\"name\": \"manual_spark\", \"id\": 100, \"type\": \"INTERNAL\", \"readOnly\": false, \"properties\": {\"default-base-location\": \"s3://${S3_BUCKET}/${USER}/polaris/\"}}"
curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
-d "{
\"name\": \"manual_spark\",
\"id\": 100,
\"type\": \"INTERNAL\",
\"readOnly\": false,
\"properties\": {
\"default-base-location\": \"${S3_LOCATION}\"
},
\"storageConfigInfo\": {
\"storageType\": \"S3\",
\"allowedLocations\": [\"${S3_LOCATION}/\"],
\"roleArn\": \"${AWS_IAM_ROLE}\"
}
}"
fi

# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata
curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \
-d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr

# For now, also explicitly assign the catalog_admin to the service_admin. Remove once GS fully rolled out for auto-assign.
# Assign the catalog_admin to the service_admin.
curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \
-d '{"name": "catalog_admin"}' > /dev/stderr

curl -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark

echo ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}"
${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \
--conf spark.sql.catalog.polaris.warehouse=manual_spark \
--conf spark.sql.defaultCatalog=polaris \
Expand Down