|
16 | 16 | # KIND, either express or implied. See the License for the |
17 | 17 | # specific language governing permissions and limitations |
18 | 18 | # under the License. |
| 19 | + |
| 20 | +# ----------------------------------------------------------------------------- |
| 21 | +# Purpose: Launch the Spark SQL shell to interact with Polaris. |
| 22 | +# ----------------------------------------------------------------------------- |
| 23 | +# |
| 24 | +# Usage: |
| 25 | +# ./run_spark_sql.sh [S3-location AWS-IAM-role] |
19 | 26 | # |
| 27 | +# Description: |
| 28 | +# - Without arguments: Runs against a catalog backed by the local filesystem. |
| 29 | +# - With two arguments: Runs against a catalog backed by AWS S3. |
| 30 | +# - [S3-location] - The S3 path to use as the default base location for the catalog. |
| 31 | +# - [AWS-IAM-role] - The AWS IAM role for catalog to assume when accessing the S3 location. |
20 | 32 | # |
21 | | -# Run this to open an interactive spark-sql shell talking to a catalog named "manual_spark" |
| 33 | +# Examples: |
| 34 | +# - Run against local filesystem: |
| 35 | +# ./run_spark_sql.sh |
22 | 36 | # |
23 | | -# You must run 'use polaris;' as your first query in the spark-sql shell. |
| 37 | +# - Run against AWS S3: |
| 38 | +# ./run_spark_sql.sh s3://my-bucket/path arn:aws:iam::123456789001:role/my-role |
| 39 | + |
| 40 | +if [ $# -ne 0 ] && [ $# -ne 2 ]; then |
| 41 | + echo "run_spark_sql.sh only accepts 0 or 2 arguments" |
| 42 | + echo "Usage: ./run_spark_sql.sh [S3-location AWS-IAM-role]" |
| 43 | + exit 1 |
| 44 | +fi |
24 | 45 |
|
25 | 46 | REGTEST_HOME=$(dirname $(realpath $0)) |
26 | 47 | cd ${REGTEST_HOME} |
|
36 | 57 |
|
37 | 58 | SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" |
38 | 59 |
|
39 | | -# Use local filesystem by default |
40 | | -curl -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ |
41 | | - http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ |
42 | | - -d '{ |
43 | | - "catalog": { |
44 | | - "name": "manual_spark", |
45 | | - "type": "INTERNAL", |
46 | | - "readOnly": false, |
47 | | - "properties": { |
48 | | - "default-base-location": "file:///tmp/polaris/" |
49 | | - }, |
50 | | - "storageConfigInfo": { |
51 | | - "storageType": "FILE", |
52 | | - "allowedLocations": [ |
53 | | - "file:///tmp" |
54 | | - ] |
55 | | - } |
56 | | - } |
57 | | - }' |
| 60 | +if [ $# -eq 0 ]; then |
| 61 | + # create a catalog backed by the local filesystem |
| 62 | + curl -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ |
| 63 | + -H 'Accept: application/json' \ |
| 64 | + -H 'Content-Type: application/json' \ |
| 65 | + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ |
| 66 | + -d '{ |
| 67 | + "catalog": { |
| 68 | + "name": "manual_spark", |
| 69 | + "type": "INTERNAL", |
| 70 | + "readOnly": false, |
| 71 | + "properties": { |
| 72 | + "default-base-location": "file:///tmp/polaris/" |
| 73 | + }, |
| 74 | + "storageConfigInfo": { |
| 75 | + "storageType": "FILE", |
| 76 | + "allowedLocations": [ |
| 77 | + "file:///tmp" |
| 78 | + ] |
| 79 | + } |
| 80 | + } |
| 81 | + }' |
| 82 | + |
| 83 | +elif [ $# -eq 2 ]; then |
| 84 | + # create a catalog backed by S3 |
| 85 | + S3_LOCATION=$1 |
| 86 | + AWS_IAM_ROLE=$2 |
58 | 87 |
|
59 | | -# Use the following instead of below to use s3 instead of local filesystem |
60 | | -#curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ |
61 | | -# http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ |
62 | | -# -d "{\"name\": \"manual_spark\", \"id\": 100, \"type\": \"INTERNAL\", \"readOnly\": false, \"properties\": {\"default-base-location\": \"s3://${S3_BUCKET}/${USER}/polaris/\"}}" |
| 88 | + curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ |
| 89 | + -H 'Accept: application/json' \ |
| 90 | + -H 'Content-Type: application/json' \ |
| 91 | + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ |
| 92 | + -d "{ |
| 93 | + \"name\": \"manual_spark\", |
| 94 | + \"id\": 100, |
| 95 | + \"type\": \"INTERNAL\", |
| 96 | + \"readOnly\": false, |
| 97 | + \"properties\": { |
| 98 | + \"default-base-location\": \"${S3_LOCATION}\" |
| 99 | + }, |
| 100 | + \"storageConfigInfo\": { |
| 101 | + \"storageType\": \"S3\", |
| 102 | + \"allowedLocations\": [\"${S3_LOCATION}/\"], |
| 103 | + \"roleArn\": \"${AWS_IAM_ROLE}\" |
| 104 | + } |
| 105 | + }" |
| 106 | +fi |
63 | 107 |
|
64 | 108 | # Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata |
65 | 109 | curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ |
66 | 110 | http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \ |
67 | 111 | -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr |
68 | 112 |
|
69 | | -# For now, also explicitly assign the catalog_admin to the service_admin. Remove once GS fully rolled out for auto-assign. |
| 113 | +# Assign the catalog_admin to the service_admin. |
70 | 114 | curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ |
71 | 115 | http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ |
72 | 116 | -d '{"name": "catalog_admin"}' > /dev/stderr |
73 | 117 |
|
74 | 118 | curl -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ |
75 | 119 | http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark |
76 | 120 |
|
77 | | -echo ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" |
78 | 121 | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ |
79 | 122 | --conf spark.sql.catalog.polaris.warehouse=manual_spark \ |
80 | 123 | --conf spark.sql.defaultCatalog=polaris \ |
|
0 commit comments