forked from databricks/containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
28 lines (24 loc) · 954 Bytes
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
FROM databricksruntime/minimal:9.x
# Installs python 3.8 and virtualenv for Spark and Notebooks
RUN apt-get update \
&& apt-get install -y \
python3.8 \
virtualenv \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Initialize the default environment that Spark and notebooks will use
RUN virtualenv -p python3.8 --system-site-packages /databricks/python3
# These python libraries are used by Databricks notebooks and the Python REPL
# You do not need to install pyspark - it is injected when the cluster is launched
# Versions are intended to reflect DBR 9.0
RUN /databricks/python3/bin/pip install \
six==1.15.0 \
# ensure minimum ipython version for Python autocomplete with jedi 0.17.x
ipython==7.19.0 \
numpy==1.19.2 \
pandas==1.2.4 \
pyarrow==4.0.0 \
matplotlib==3.4.2 \
jinja2==2.11.3
# Specifies where Spark will look for the python process
ENV PYSPARK_PYTHON=/databricks/python3/bin/python3