-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathDockerfile
113 lines (93 loc) · 4.5 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# To check running container: docker exec -it tube /bin/bash
FROM quay.io/cdis/python:python3.9-buster-stable
ENV DEBIAN_FRONTEND=noninteractive \
SQOOP_VERSION="1.4.7" \
HADOOP_VERSION="3.3.2" \
ES_HADOOP_VERSION="8.3.3" \
MAVEN_ES_URL="https://search.maven.org/remotecontent?filepath=org/elasticsearch" \
ES_SPARK_30_2_12="elasticsearch-spark-30_2.12" \
ES_SPARK_20_2_11="elasticsearch-spark-20_2.11"
ENV MAVEN_ES_SPARK_VERSION="${MAVEN_ES_URL}/${ES_SPARK_30_2_12}/${ES_HADOOP_VERSION}/${ES_SPARK_30_2_12}-${ES_HADOOP_VERSION}"
ENV SQOOP_INSTALLATION_URL="http://archive.apache.org/dist/sqoop/${SQOOP_VERSION}/sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz" \
HADOOP_INSTALLATION_URL="http://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz" \
ES_HADOOP_INSTALLATION_URL="https://artifacts.elastic.co/downloads/elasticsearch-hadoop/elasticsearch-hadoop-${ES_HADOOP_VERSION}.zip" \
SQOOP_HOME="/sqoop" \
HADOOP_HOME="/hadoop" \
ES_HADOOP_HOME="/es-hadoop" \
JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64/"
ENV ES_HADOOP_HOME_VERSION="${ES_HADOOP_HOME}/elasticsearch-hadoop-${ES_HADOOP_VERSION}"
RUN mkdir -p /usr/share/man/man1
RUN mkdir -p /usr/share/man/man7
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
openjdk-11-jdk-headless \
# dependency for pyscopg2 - which is dependency for sqlalchemy postgres engine
libpq-dev \
postgresql-client \
wget \
unzip \
git \
# dependency for cryptography
libffi-dev \
# dependency for cryptography
libssl-dev \
libssl1.1 \
libgnutls30 \
vim \
curl \
g++ \
&& rm -rf /var/lib/apt/lists/*
RUN python -m pip install --upgrade pip poetry requests
RUN wget ${SQOOP_INSTALLATION_URL} \
&& mkdir -p $SQOOP_HOME \
&& tar -xvf sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz -C ${SQOOP_HOME} --strip-components 1 \
&& rm sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz \
&& rm -rf $SQOOP_HOME/docs
RUN wget https://jdbc.postgresql.org/download/postgresql-42.2.4.jar -O $SQOOP_HOME/lib/postgresql-42.2.4.jar
RUN wget https://dlcdn.apache.org//commons/lang/binaries/commons-lang-2.6-bin.tar.gz \
&& tar -xvf commons-lang-2.6-bin.tar.gz \
&& rm commons-lang-2.6-bin.tar.gz \
&& mv commons-lang-2.6/commons-lang-2.6.jar $SQOOP_HOME/lib/
RUN wget ${HADOOP_INSTALLATION_URL} \
&& mkdir -p $HADOOP_HOME \
&& tar -xvf hadoop-${HADOOP_VERSION}.tar.gz -C ${HADOOP_HOME} --strip-components 1 \
&& rm hadoop-${HADOOP_VERSION}.tar.gz \
&& rm -rf $HADOOP_HOME/share/doc
RUN wget ${ES_HADOOP_INSTALLATION_URL} \
&& mkdir -p $ES_HADOOP_HOME \
&& unzip elasticsearch-hadoop-${ES_HADOOP_VERSION}.zip -d ${ES_HADOOP_HOME} \
&& rm elasticsearch-hadoop-${ES_HADOOP_VERSION}.zip
RUN wget ${MAVEN_ES_SPARK_VERSION}.jar -O ${ES_HADOOP_HOME_VERSION}/dist/${ES_SPARK_20_2_11}-${ES_HADOOP_VERSION}.jar
RUN wget ${MAVEN_ES_SPARK_VERSION}-javadoc.jar -O ${ES_HADOOP_HOME_VERSION}/dist/${ES_SPARK_20_2_11}-${ES_HADOOP_VERSION}-javadoc.jar
RUN wget ${MAVEN_ES_SPARK_VERSION}-sources.jar -O ${ES_HADOOP_HOME_VERSION}/dist/${ES_SPARK_20_2_11}-${ES_HADOOP_VERSION}-sources.jar
ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop \
HADOOP_MAPRED_HOME=$HADOOP_HOME \
HADOOP_COMMON_HOME=$HADOOP_HOME \
HADOOP_HDFS_HOME=$HADOOP_HOME \
YARN_HOME=$HADOOP_HOME \
ACCUMULO_HOME=/accumulo \
HIVE_HOME=/hive \
HBASE_HOME=/hbase \
HCAT_HOME=/hcatalog \
ZOOKEEPER_HOME=/zookeeper \
HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native \
LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH
RUN mkdir -p $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME
ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}
WORKDIR /tube
# copy ONLY poetry artifact, install the dependencies but not fence
# this will make sure than the dependencies is cached
COPY poetry.lock pyproject.toml /tube/
RUN python -m poetry config virtualenvs.create false \
&& python -m poetry install -vv --no-root --only main --no-interaction \
&& python -m poetry show -v
# copy source code ONLY after installing dependencies
COPY . /tube
RUN python -m poetry config virtualenvs.create false \
&& python -m poetry install -vv --only main --no-interaction \
&& python -m poetry show -v
#ENV TINI_VERSION v0.18.0
#ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
#RUN chmod +x /tini
#ENTRYPOINT ["/tini", "--"]
ENV PYTHONUNBUFFERED 1