Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(docker): respect pip mirrors with uv #9963

Merged
merged 3 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ARG BASE_IMAGE=base
ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine
ARG GITHUB_REPO_URL=https://github.com
ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
ARG PIP_MIRROR_URL=null
ARG PIP_MIRROR_URL=https://pypi.python.org/simple

FROM golang:1-alpine3.18 AS dockerize-binary

Expand All @@ -26,15 +26,18 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION

FROM python:3.10 as base

ARG DEBIAN_REPO_URL
ARG PIP_MIRROR_URL
ARG GITHUB_REPO_URL

ENV DEBIAN_FRONTEND noninteractive

# Optionally set corporate mirror for apk and pip
# Optionally set corporate mirror for deb
ARG DEBIAN_REPO_URL
RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list.d/debian.sources ; fi
RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi

# Optionally set corporate mirror for pip
ARG PIP_MIRROR_URL
RUN if [ "${PIP_MIRROR_URL}" != "https://pypi.python.org/simple" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
ENV UV_INDEX_URL=${PIP_MIRROR_URL}

RUN apt-get update && apt-get install -y -qq \
python3-ldap \
Expand Down Expand Up @@ -67,8 +70,7 @@ USER datahub
ENV VIRTUAL_ENV=/datahub-ingestion/.venv
ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
RUN python3 -m venv $VIRTUAL_ENV && \
uv pip install --no-cache -r requirements.txt && \
pip uninstall -y acryl-datahub
uv pip install --no-cache -r requirements.txt

ENTRYPOINT [ "/entrypoint.sh" ]

Expand Down
18 changes: 11 additions & 7 deletions docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,22 @@
ARG APP_ENV=full
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=head
ARG PIP_MIRROR_URL=null
ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
ARG PIP_MIRROR_URL=https://pypi.python.org/simple

FROM $BASE_IMAGE:$DOCKER_VERSION as base

# Optionally set corporate mirror for deb
USER 0
ARG DEBIAN_REPO_URL
RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list.d/debian.sources ; fi
USER datahub

# Optionally set corporate mirror for pip
ARG PIP_MIRROR_URL
RUN if [ "${PIP_MIRROR_URL}" != "https://pypi.python.org/simple" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
ENV UV_INDEX_URL=${PIP_MIRROR_URL}

COPY --chown=datahub ./metadata-ingestion /datahub-ingestion
COPY --chown=datahub ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plugin

Expand All @@ -19,23 +29,17 @@ RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEAS
cat airflow-plugin/src/datahub_airflow_plugin/__init__.py | grep __version__

FROM base as slim-install
ARG PIP_MIRROR_URL

RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN uv pip install --no-cache "acryl-datahub[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary] @ ."

FROM base as full-install-build
ARG PIP_MIRROR_URL
ARG DEBIAN_REPO_URL

USER 0
RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list.d/debian.sources ; fi
RUN apt-get update && apt-get install -y -qq maven

USER datahub
COPY ./docker/datahub-ingestion/pyspark_jars.sh .

RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN uv pip install --no-cache "acryl-datahub[base,all] @ ." "acryl-datahub-airflow-plugin[plugin-v2] @ ./airflow-plugin" && \
datahub --version
RUN ./pyspark_jars.sh
Expand Down
19 changes: 9 additions & 10 deletions docker/datahub-ingestion/Dockerfile-slim-only
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
# Defining environment
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=head-slim
ARG PIP_MIRROR_URL=null
ARG PIP_MIRROR_URL=https://pypi.python.org/simple

FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0
USER datahub

# Optionally set corporate mirror for apk and pip
ARG PIP_MIRROR_URL
RUN if [ "${PIP_MIRROR_URL}" != "https://pypi.python.org/simple" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
ENV UV_INDEX_URL=${PIP_MIRROR_URL}

COPY ./metadata-ingestion /datahub-ingestion
COPY --chown=datahub ./metadata-ingestion /datahub-ingestion

ARG RELEASE_VERSION
WORKDIR /datahub-ingestion
RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \
cat src/datahub/__init__.py && \
chown -R datahub /datahub-ingestion

USER datahub
cat src/datahub/__init__.py

FROM base as slim-install

ARG PIP_MIRROR_URL

RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi
RUN uv pip install --no-cache "acryl-datahub[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary] @ ." && \
datahub --version

Expand Down
Loading