diff --git a/images/pyspark-notebook/Dockerfile b/images/pyspark-notebook/Dockerfile index 212e3a5502..f64bb75db6 100644 --- a/images/pyspark-notebook/Dockerfile +++ b/images/pyspark-notebook/Dockerfile @@ -34,12 +34,6 @@ ARG scala_version # But it seems to be slower, that's why we use the recommended site for download ARG spark_download_url="https://dlcdn.apache.org/spark/" -# Configure Spark -ENV SPARK_VERSION="${spark_version}" \ - HADOOP_VERSION="${hadoop_version}" \ - SCALA_VERSION="${scala_version}" \ - SPARK_DOWNLOAD_URL="${spark_download_url}" - ENV SPARK_HOME=/usr/local/spark ENV PATH="${PATH}:${SPARK_HOME}/bin" ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" @@ -47,7 +41,11 @@ ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M COPY setup_spark.py /opt/setup-scripts/ # Setup Spark -RUN /opt/setup-scripts/setup_spark.py +RUN SPARK_VERSION="${spark_version}" \ + HADOOP_VERSION="${hadoop_version}" \ + SCALA_VERSION="${scala_version}" \ + SPARK_DOWNLOAD_URL="${spark_download_url}" \ + /opt/setup-scripts/setup_spark.py # Configure IPython system-wide COPY ipython_kernel_config.py "/etc/ipython/" diff --git a/tagging/images_hierarchy.py b/tagging/images_hierarchy.py index f4145876c7..8c3e3fd384 100644 --- a/tagging/images_hierarchy.py +++ b/tagging/images_hierarchy.py @@ -13,7 +13,6 @@ ) from tagging.taggers import ( DateTagger, - HadoopVersionTagger, JavaVersionTagger, JuliaVersionTagger, JupyterHubVersionTagger, @@ -83,7 +82,7 @@ class ImageDescription: ), "pyspark-notebook": ImageDescription( parent_image="scipy-notebook", - taggers=[SparkVersionTagger(), HadoopVersionTagger(), JavaVersionTagger()], + taggers=[SparkVersionTagger(), JavaVersionTagger()], manifests=[SparkInfoManifest()], ), "all-spark-notebook": ImageDescription( diff --git a/tagging/taggers.py b/tagging/taggers.py index daf987b1e1..1aa6705dae 100644 --- a/tagging/taggers.py +++ b/tagging/taggers.py @@ -12,18 +12,6 @@ def _get_program_version(container: Container, program: str) -> str: return DockerRunner.run_simple_command(container, cmd=f"{program} --version") -def _get_env_variable(container: Container, variable: str) -> str: - env = DockerRunner.run_simple_command( - container, - cmd="env", - print_result=False, - ).split() - for env_entry in env: - if env_entry.startswith(variable): - return env_entry[len(variable) + 1 :] - raise KeyError(variable) - - def _get_pip_package_version(container: Container, package: str) -> str: PIP_VERSION_PREFIX = "Version: " @@ -136,12 +124,6 @@ def tag_value(container: Container) -> str: return "spark-" + version_line.split(" ")[-1] -class HadoopVersionTagger(TaggerInterface): - @staticmethod - def tag_value(container: Container) -> str: - return "hadoop-" + _get_env_variable(container, "HADOOP_VERSION") - - class JavaVersionTagger(TaggerInterface): @staticmethod def tag_value(container: Container) -> str: