Skip to content

Commit

Permalink
Add pre-commit hook shellcheck-py to lint shell files
Browse files Browse the repository at this point in the history
  • Loading branch information
jbampton committed Oct 11, 2024
1 parent f5faea4 commit 3df8b2b
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 33 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ repos:
exclude: ^\.github/.*$
types: [markdown]
files: \.(md|mdown|markdown)$
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.10.0.1
hooks:
- id: shellcheck
- repo: https://github.com/adrienverge/yamllint
rev: v1.35.1
hooks:
Expand Down
1 change: 1 addition & 0 deletions .shellcheckrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
disable=SC2004,SC2041,SC2155,SC2181
14 changes: 7 additions & 7 deletions docker/sedona-spark-jupyterlab/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ BUILD_MODE=$3
GEOTOOLS_VERSION=${4:-auto}

SEDONA_SPARK_VERSION=${SPARK_VERSION:0:3}
if [ ${SPARK_VERSION:0:1} -eq "3" ] && [ ${SPARK_VERSION:2:1} -le "3" ]; then
if [ "${SPARK_VERSION:0:1}" -eq "3" ] && [ "${SPARK_VERSION:2:1}" -le "3" ]; then
# 3.0, 3.1, 3.2, 3.3
SEDONA_SPARK_VERSION=3.0
fi
Expand All @@ -42,7 +42,7 @@ get_latest_version_with_suffix() {

# Fetch the maven-metadata.xml file
METADATA_URL="${BASE_URL}maven-metadata.xml"
METADATA_XML=$(curl -s $METADATA_URL)
METADATA_XML=$(curl -s "$METADATA_URL")

# Extract versions from the XML
VERSIONS=$(echo "$METADATA_XML" | grep -o '<version>[^<]*</version>' | awk -F'[<>]' '{print $3}')
Expand All @@ -52,7 +52,7 @@ get_latest_version_with_suffix() {
# Filter versions that end with the specified suffix and find the largest one
for VERSION in $VERSIONS; do
if [[ $VERSION == *$SUFFIX ]]; then
if [[ -z $LATEST_VERSION ]] || version_gt $VERSION $LATEST_VERSION; then
if [[ -z $LATEST_VERSION ]] || version_gt "$VERSION" "$LATEST_VERSION"; then
LATEST_VERSION=$VERSION
fi
fi
Expand All @@ -61,7 +61,7 @@ get_latest_version_with_suffix() {
if [[ -z $LATEST_VERSION ]]; then
exit 1
else
echo $LATEST_VERSION
echo "$LATEST_VERSION"
fi
}

Expand All @@ -80,7 +80,7 @@ if [ "$SEDONA_VERSION" = "latest" ]; then
echo "Using latest geotools-wrapper version: $GEOTOOLS_WRAPPER_VERSION"

# The compilation must take place outside Docker to avoid unnecessary maven packages
mvn clean install -DskipTests -Dspark=${SEDONA_SPARK_VERSION} -Dscala=2.12
mvn clean install -DskipTests -Dspark="${SEDONA_SPARK_VERSION}" -Dscala=2.12
fi

# -- Building the image
Expand All @@ -92,7 +92,7 @@ if [ -z "$BUILD_MODE" ] || [ "$BUILD_MODE" = "local" ]; then
--build-arg sedona_version="${SEDONA_VERSION}" \
--build-arg geotools_wrapper_version="${GEOTOOLS_WRAPPER_VERSION}" \
-f docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile \
-t apache/sedona:${SEDONA_VERSION} .
-t apache/sedona:"${SEDONA_VERSION}" .
else
# If release, build the image for cross-platform
docker buildx build --platform linux/amd64,linux/arm64 \
Expand All @@ -103,5 +103,5 @@ else
--build-arg sedona_version="${SEDONA_VERSION}" \
--build-arg geotools_wrapper_version="${GEOTOOLS_WRAPPER_VERSION}" \
-f docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile \
-t apache/sedona:${SEDONA_VERSION} .
-t apache/sedona:"${SEDONA_VERSION}" .
fi
18 changes: 9 additions & 9 deletions docker/sedona-spark-jupyterlab/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ convert_to_mb() {
echo $(($mem_value * 1024))
;;
[mM])
echo $mem_value
echo "$mem_value"
;;
*)
echo "Invalid memory unit: $mem_str" >&2
Expand All @@ -39,13 +39,13 @@ convert_to_mb() {
}

# Convert DRIVER_MEM and EXECUTOR_MEM to megabytes
DRIVER_MEM_MB=$(convert_to_mb $DRIVER_MEM)
DRIVER_MEM_MB=$(convert_to_mb "$DRIVER_MEM")
if [ $? -ne 0 ]; then
echo "Error converting DRIVER_MEM to megabytes." >&2
exit 1
fi

EXECUTOR_MEM_MB=$(convert_to_mb $EXECUTOR_MEM)
EXECUTOR_MEM_MB=$(convert_to_mb "$EXECUTOR_MEM")
if [ $? -ne 0 ]; then
echo "Error converting EXECUTOR_MEM to megabytes." >&2
exit 1
Expand All @@ -58,7 +58,7 @@ TOTAL_PHYSICAL_MEM_MB=$(free -m | awk '/^Mem:/{print $2}')
TOTAL_REQUIRED_MEM_MB=$(($DRIVER_MEM_MB + $EXECUTOR_MEM_MB))

# Compare total required memory with total physical memory
if [ $TOTAL_REQUIRED_MEM_MB -gt $TOTAL_PHYSICAL_MEM_MB ]; then
if [ $TOTAL_REQUIRED_MEM_MB -gt "$TOTAL_PHYSICAL_MEM_MB" ]; then
echo "Error: Insufficient memory" >&2
echo " total: $TOTAL_PHYSICAL_MEM_MB MB" >&2
echo " required: $TOTAL_REQUIRED_MEM_MB MB (driver: $DRIVER_MEM_MB MB, executor: $EXECUTOR_MEM_MB MB)" >&2
Expand All @@ -68,14 +68,14 @@ if [ $TOTAL_REQUIRED_MEM_MB -gt $TOTAL_PHYSICAL_MEM_MB ]; then
fi

# Configure spark
cp ${SPARK_HOME}/conf/spark-env.sh.template ${SPARK_HOME}/conf/spark-env.sh
echo "SPARK_WORKER_MEMORY=${EXECUTOR_MEM}" >> ${SPARK_HOME}/conf/spark-env.sh
echo "spark.driver.memory $DRIVER_MEM" >> ${SPARK_HOME}/conf/spark-defaults.conf
echo "spark.executor.memory $EXECUTOR_MEM" >> ${SPARK_HOME}/conf/spark-defaults.conf
cp "${SPARK_HOME}"/conf/spark-env.sh.template "${SPARK_HOME}"/conf/spark-env.sh
echo "SPARK_WORKER_MEMORY=${EXECUTOR_MEM}" >> "${SPARK_HOME}"/conf/spark-env.sh
echo "spark.driver.memory $DRIVER_MEM" >> "${SPARK_HOME}"/conf/spark-defaults.conf
echo "spark.executor.memory $EXECUTOR_MEM" >> "${SPARK_HOME}"/conf/spark-defaults.conf

# Start spark standalone cluster
service ssh start
${SPARK_HOME}/sbin/start-all.sh
"${SPARK_HOME}"/sbin/start-all.sh

# Start jupyter lab
exec jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token=
18 changes: 9 additions & 9 deletions docker/sedona.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,29 @@ spark_extension_version=$4

spark_compat_version=${spark_version:0:3}
sedona_spark_version=${spark_compat_version}
if [ ${spark_version:0:1} -eq "3" ] && [ ${spark_version:2:1} -le "3" ]; then
if [ "${spark_version:0:1}" -eq "3" ] && [ "${spark_version:2:1}" -le "3" ]; then
# 3.0, 3.1, 3.2, 3.3
sedona_spark_version=3.0
fi

if [ $sedona_version = "latest" ]; then
if [ "$sedona_version" = "latest" ]; then
# Code to execute when SEDONA_VERSION is "latest"
cp ${SEDONA_HOME}/spark-shaded/target/sedona-spark-shaded-*.jar ${SPARK_HOME}/jars/
cd ${SEDONA_HOME}/python;pip3 install .
cp "${SEDONA_HOME}"/spark-shaded/target/sedona-spark-shaded-*.jar "${SPARK_HOME}"/jars/
cd "${SEDONA_HOME}"/python;pip3 install .
else
# Code to execute when SEDONA_VERSION is not "latest"
# Download Sedona
curl https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-${sedona_spark_version}_2.12/${sedona_version}/sedona-spark-shaded-${sedona_spark_version}_2.12-${sedona_version}.jar -o $SPARK_HOME/jars/sedona-spark-shaded-${sedona_spark_version}_2.12-${sedona_version}.jar
curl https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-"${sedona_spark_version}"_2.12/"${sedona_version}"/sedona-spark-shaded-"${sedona_spark_version}"_2.12-"${sedona_version}".jar -o "$SPARK_HOME"/jars/sedona-spark-shaded-"${sedona_spark_version}"_2.12-"${sedona_version}".jar

# Install Sedona Python
pip3 install apache-sedona==${sedona_version}
pip3 install apache-sedona=="${sedona_version}"
fi

# Download gresearch spark extension
curl https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_2.12/${spark_extension_version}-${spark_compat_version}/spark-extension_2.12-${spark_extension_version}-${spark_compat_version}.jar -o $SPARK_HOME/jars/spark-extension_2.12-${spark_extension_version}-${spark_compat_version}.jar
curl https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_2.12/"${spark_extension_version}"-"${spark_compat_version}"/spark-extension_2.12-"${spark_extension_version}"-"${spark_compat_version}".jar -o "$SPARK_HOME"/jars/spark-extension_2.12-"${spark_extension_version}"-"${spark_compat_version}".jar

# Install Spark extension Python
pip3 install pyspark-extension==${spark_extension_version}.${spark_compat_version}
pip3 install pyspark-extension=="${spark_extension_version}"."${spark_compat_version}"

# Download GeoTools jar
curl https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/${geotools_wrapper_version}/geotools-wrapper-${geotools_wrapper_version}.jar -o $SPARK_HOME/jars/geotools-wrapper-${geotools_wrapper_version}.jar
curl https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/"${geotools_wrapper_version}"/geotools-wrapper-"${geotools_wrapper_version}".jar -o "$SPARK_HOME"/jars/geotools-wrapper-"${geotools_wrapper_version}".jar
16 changes: 8 additions & 8 deletions docker/spark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ apt-get install -y openjdk-19-jdk-headless curl python3-pip maven
pip3 install --upgrade pip && pip3 install pipenv

# Download Spark jar and set up PySpark
curl https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz
tar -xf spark.tgz && mv spark-${spark_version}-bin-hadoop${hadoop_version}/* ${SPARK_HOME}/
rm spark.tgz && rm -rf spark-${spark_version}-bin-hadoop${hadoop_version}
pip3 install pyspark==${spark_version}
curl https://archive.apache.org/dist/spark/spark-"${spark_version}"/spark-"${spark_version}"-bin-hadoop"${hadoop_version}".tgz -o spark.tgz
tar -xf spark.tgz && mv spark-"${spark_version}"-bin-hadoop"${hadoop_version}"/* "${SPARK_HOME}"/
rm spark.tgz && rm -rf spark-"${spark_version}"-bin-hadoop"${hadoop_version}"
pip3 install pyspark=="${spark_version}"

# Add S3 jars
curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${hadoop_s3_version}/hadoop-aws-${hadoop_s3_version}.jar -o ${SPARK_HOME}/jars/hadoop-aws-${hadoop_s3_version}.jar
curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${aws_sdk_version}/aws-java-sdk-bundle-${aws_sdk_version}.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-${aws_sdk_version}.jar
curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/"${hadoop_s3_version}"/hadoop-aws-"${hadoop_s3_version}".jar -o "${SPARK_HOME}"/jars/hadoop-aws-"${hadoop_s3_version}".jar
curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/"${aws_sdk_version}"/aws-java-sdk-bundle-"${aws_sdk_version}".jar -o "${SPARK_HOME}"/jars/aws-java-sdk-bundle-"${aws_sdk_version}".jar

# Add spark-xml jar
curl https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/${spark_xml_version}/spark-xml_2.12-${spark_xml_version}.jar -o ${SPARK_HOME}/jars/spark-xml_2.12-${spark_xml_version}.jar
curl https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/"${spark_xml_version}"/spark-xml_2.12-"${spark_xml_version}".jar -o "${SPARK_HOME}"/jars/spark-xml_2.12-"${spark_xml_version}".jar

# Set up master IP address and executor memory
cp ${SPARK_HOME}/conf/spark-defaults.conf.template ${SPARK_HOME}/conf/spark-defaults.conf
cp "${SPARK_HOME}"/conf/spark-defaults.conf.template "${SPARK_HOME}"/conf/spark-defaults.conf

# Install required libraries for GeoPandas on Apple chip mac
apt-get install -y gdal-bin libgdal-dev
Expand Down

0 comments on commit 3df8b2b

Please sign in to comment.