From eef763caa338b38037cc783305228b1259f56004 Mon Sep 17 00:00:00 2001 From: Matteo Merli Date: Wed, 6 Mar 2024 16:28:18 -0800 Subject: [PATCH] WIP --- README.md | 8 +- build/run_unit_group.sh | 2 +- docker/README.md | 30 ++----- docker/pom.xml | 2 +- docker/publish.sh | 8 +- .../Dockerfile | 38 +++++++-- .../pom.xml | 78 ++----------------- docker/pulsar/Dockerfile | 28 +------ docker/pulsar/pom.xml | 1 - pulsar-bom/pom.xml | 2 +- .../latest-version-image/Dockerfile | 43 ++++++---- .../latest-version-image/pom.xml | 77 ++++++++++++++++++ 12 files changed, 158 insertions(+), 159 deletions(-) rename docker/{pulsar-all => pulsar-functions-python}/Dockerfile (50%) rename docker/{pulsar-all => pulsar-functions-python}/pom.xml (59%) diff --git a/README.md b/README.md index 3eae0ae29c3343..e145f05dd33703 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ ![logo](https://pulsar.apache.org/img/pulsar.svg) -[![docker pull](https://img.shields.io/docker/pulls/apachepulsar/pulsar-all.svg)](https://hub.docker.com/r/apachepulsar/pulsar) +[![docker pull](https://img.shields.io/docker/pulls/apachepulsar/pulsar.svg)](https://hub.docker.com/r/apachepulsar/pulsar) [![contributors](https://img.shields.io/github/contributors-anon/apache/pulsar)](https://github.com/apache/pulsar/graphs/contributors) [![last commit](https://img.shields.io/github/last-commit/apache/pulsar)](https://github.com/apache/pulsar/commits/master) [![release](https://img.shields.io/github/v/release/apache/pulsar?sort=semver)](https://pulsar.apache.org/download/) @@ -200,13 +200,13 @@ Here are some general instructions for building custom docker images: * Java 11 is the recommended JDK version in `branch-2.8`, `branch-2.9` and `branch-2.10`. * Java 17 is the recommended JDK version in `master`. -The following command builds the docker images `apachepulsar/pulsar-all:latest` and `apachepulsar/pulsar:latest`: +The following command builds the docker image `apachepulsar/pulsar:latest`: ```bash mvn clean install -DskipTests # setting DOCKER_CLI_EXPERIMENTAL=enabled is required in some environments with older docker versions export DOCKER_CLI_EXPERIMENTAL=enabled -mvn package -Pdocker,-main -am -pl docker/pulsar-all -DskipTests +mvn package -Pdocker,-main -am -pl docker/pulsar -DskipTests ``` After the images are built, they can be tagged and pushed to your custom repository. Here's an example of a bash script that tags the docker images with the current version and git revision and pushes them to `localhost:32000/apachepulsar`. @@ -217,8 +217,6 @@ pulsar_version=$(mvn initialize help:evaluate -Dexpression=project.version -pl . gitrev=$(git rev-parse HEAD | colrm 10) tag="${pulsar_version}-${gitrev}" echo "Using tag $tag" -docker tag apachepulsar/pulsar-all:latest ${image_repo_and_project}/pulsar-all:$tag -docker push ${image_repo_and_project}/pulsar-all:$tag docker tag apachepulsar/pulsar:latest ${image_repo_and_project}/pulsar:$tag docker push ${image_repo_and_project}/pulsar:$tag ``` diff --git a/build/run_unit_group.sh b/build/run_unit_group.sh index 351477aed1c924..dc9ba227b0283e 100755 --- a/build/run_unit_group.sh +++ b/build/run_unit_group.sh @@ -167,7 +167,7 @@ function test_group_proxy() { function test_group_other() { mvn_test --clean --install \ - -pl '!org.apache.pulsar:distribution,!org.apache.pulsar:pulsar-offloader-distribution,!org.apache.pulsar:pulsar-server-distribution,!org.apache.pulsar:pulsar-io-distribution,!org.apache.pulsar:pulsar-all-docker-image' \ + -pl '!org.apache.pulsar:distribution,!org.apache.pulsar:pulsar-offloader-distribution,!org.apache.pulsar:pulsar-server-distribution,!org.apache.pulsar:pulsar-io-distribution,!org.apache.pulsar:pulsar-functions-python-docker-image' \ -PskipTestsForUnitGroupOther -DdisableIoMainProfile=true -DskipIntegrationTests \ -Dexclude='**/ManagedLedgerTest.java, **/OffloadersCacheTest.java diff --git a/docker/README.md b/docker/README.md index 5318d331ce9f43..a48f8d7ffe3fbb 100644 --- a/docker/README.md +++ b/docker/README.md @@ -24,7 +24,8 @@ The Apache Pulsar community produces 2 docker images with each official release. * `apachepulsar/pulsar` - contains the necessary components for a working Pulsar cluster -* `apachepulsar/pulsar-all` - extends the `apachepulsar/pulsar` image by adding many Pulsar connectors and offloaders +* `apachepulsar/pulsar-functions-python` - extends the `apachepulsar/pulsar` image by adding the Python + dependencies required to run Pulsar Functions with Python runtime. Since the 2.10.0 release, these docker images run as an unnamed, non-root user that is also part of the root group, by default. This was done to increase container security. The user is part of the root group to ensure that the container @@ -37,44 +38,27 @@ Note that you first must build the project in order to have the right dependenci ## Building Derivative Custom Images -If you find the `apachepulsar/pulsar-all` docker image too large, but you want to use a connector or an offloader, -you can easily build an image with a curated list of connectors or offloaders based on the official Apache Pulsar +You can easily build an image with a curated list of connectors or offloaders based on the official Apache Pulsar images. You can use the following sample docker image as a guide: ```Dockerfile ARG VERSION -# Load the pulsar-all image as a builder image -FROM apachepulsar/pulsar-all:${VERSION} as pulsar-all - FROM apachepulsar/pulsar:${VERSION} # Add the cassandra connector -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-cassandra-*.nar /pulsar/connectors - -# Add the jcloud offloader -COPY --from=pulsar-all /pulsar/connectors/tiered-storage-jcloud-*.nar /pulsar/offloaders +RUN mkdir -p connectors && \ + cd connectors && \ + wget https://downloads.apache.org/pulsar/pulsar-${VERSION}/connectors/pulsar-io-cassandra-${VERSION}.nar ``` -NOTE: the above example uses a wildcard in the `COPY` commands because argument expansion does not work for `COPY`. - Assuming that you have the above `Dockerfile` in your local directory and are running docker on your local host, you can -run the following command to build a custom image with the cassandra connector and the jcloud offloader. +run the following command to build a custom image with the cassandra connector. ```shell docker build --build-arg VERSION=2.9.1 -t pulsar-custom:2.9.1 . ``` -For reference, here are the sizes of the official 2.9.1 docker images and the custom image built from the above -`Dockerfile`: - -| REPOSITORY | TAG | SIZE | -| :---------------------- | :---- | :----- | -| apachepulsar/pulsar | 2.9.1 | 1.59GB | -| apachepulsar/pulsar-all | 2.9.1 | 3.44GB | -| pulsar-custom | 2.9.1 | 1.6GB | - - ## Troubleshooting non-root containers Troubleshooting is harder because the docker image runs as a non-root user. For example, a non-root user won't be able diff --git a/docker/pom.xml b/docker/pom.xml index 21ed4de9408260..7a0c08c4bd7b7c 100644 --- a/docker/pom.xml +++ b/docker/pom.xml @@ -58,7 +58,7 @@ pulsar - pulsar-all + pulsar-functions-python diff --git a/docker/publish.sh b/docker/publish.sh index 651fefc1498e96..a4fd1c8f0c08c3 100755 --- a/docker/publish.sh +++ b/docker/publish.sh @@ -67,19 +67,19 @@ set -e if [[ "$GIT_BRANCH" == "master" ]]; then docker tag apachepulsar/pulsar:${IMAGE_TAG} ${docker_registry_org}/pulsar:latest - docker tag apachepulsar/pulsar-all:${IMAGE_TAG} ${docker_registry_org}/pulsar-all:latest + docker tag apachepulsar/pulsar-functions-python:${IMAGE_TAG} ${docker_registry_org}/pulsar-functions-python:latest fi docker tag apachepulsar/pulsar:${IMAGE_TAG} ${docker_registry_org}/pulsar:$MVN_VERSION -docker tag apachepulsar/pulsar-all:${IMAGE_TAG} ${docker_registry_org}/pulsar-all:$MVN_VERSION +docker tag apachepulsar/pulsar-functions-python:${IMAGE_TAG} ${docker_registry_org}/pulsar-functions-python:$MVN_VERSION # Push all images and tags if [[ "$GIT_BRANCH" == "master" ]]; then docker push ${docker_registry_org}/pulsar:latest - docker push ${docker_registry_org}/pulsar-all:latest + docker push ${docker_registry_org}/pulsar-functions-python:latest fi docker push ${docker_registry_org}/pulsar:$MVN_VERSION -docker push ${docker_registry_org}/pulsar-all:$MVN_VERSION +docker push ${docker_registry_org}/pulsar-functions-python:$MVN_VERSION echo "Finished pushing images to ${docker_registry_org}" \ No newline at end of file diff --git a/docker/pulsar-all/Dockerfile b/docker/pulsar-functions-python/Dockerfile similarity index 50% rename from docker/pulsar-all/Dockerfile rename to docker/pulsar-functions-python/Dockerfile index 81ad74b65000f8..86027d02a4643b 100644 --- a/docker/pulsar-all/Dockerfile +++ b/docker/pulsar-functions-python/Dockerfile @@ -17,16 +17,38 @@ # under the License. # +ARG PULSAR_CLIENT_PYTHON_VERSION ARG PULSAR_IMAGE -FROM busybox as pulsar-all -ARG PULSAR_IO_DIR -ARG PULSAR_OFFLOADER_TARBALL +## Create stage to build the Python dependencies +## Since it needs to have GCC available, we're doing it in a different layer +FROM alpine:3.19 AS python-deps -ADD ${PULSAR_IO_DIR} /connectors -ADD ${PULSAR_OFFLOADER_TARBALL} / -RUN mv /apache-pulsar-offloaders-*/offloaders /offloaders +RUN apk add --no-cache \ + bash \ + python3-dev \ + g++ \ + musl-dev \ + libffi-dev \ + py3-pip \ + py3-grpcio \ + py3-yaml +RUN pip3 install --break-system-packages \ + kazoo + +ARG PULSAR_CLIENT_PYTHON_VERSION +RUN pip3 install --break-system-packages \ + pulsar-client[all]==${PULSAR_CLIENT_PYTHON_VERSION} + +# Main image stage +ARG PULSAR_IMAGE FROM $PULSAR_IMAGE -COPY --from=pulsar-all /connectors /pulsar/connectors -COPY --from=pulsar-all /offloaders /pulsar/offloaders + +# Switch to root user to install PIP package +USER root +RUN apk add --no-cache py3-pip +USER 10000 + +# Copy Python depedencies from the other stage +COPY --from=python-deps /usr/lib/python3.11/site-packages /usr/lib/python3.11/site-packages diff --git a/docker/pulsar-all/pom.xml b/docker/pulsar-functions-python/pom.xml similarity index 59% rename from docker/pulsar-all/pom.xml rename to docker/pulsar-functions-python/pom.xml index 3da14ea84bcb34..2467969cfee5f0 100644 --- a/docker/pulsar-all/pom.xml +++ b/docker/pulsar-functions-python/pom.xml @@ -26,42 +26,16 @@ 3.3.0-SNAPSHOT 4.0.0 - pulsar-all-docker-image - Apache Pulsar :: Docker Images :: Pulsar Latest Version (Include All Components) + pulsar-functions-python-docker-image + Apache Pulsar :: Docker Images :: Pulsar Functions Python pom ${project.groupId} - pulsar-io-distribution - ${project.parent.version} + pulsar-docker-image + ${project.version} pom - provided - - - * - * - - - - - io.perfmark - perfmark-api - runtime - - - ${project.groupId} - pulsar-offloader-distribution - ${project.parent.version} - bin - tar.gz - provided - - - * - * - - @@ -97,45 +71,6 @@ - - maven-resources-plugin - - - copy-resources - generate-resources - - copy-resources - - - ${basedir}/target/apache-pulsar-io-connectors-${project.version}-bin - - - ${basedir}/../../distribution/io/target/apache-pulsar-io-connectors-${project.version}-bin - false - - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - copy-offloader-tarball - - copy-dependencies - - generate-resources - - ${project.build.directory}/ - pulsar-offloader-distribution - true - - - - io.fabric8 docker-maven-plugin @@ -150,7 +85,7 @@ - ${docker.organization}/pulsar-all + ${docker.organization}/pulsar-functions-python ${project.basedir} @@ -158,9 +93,8 @@ ${project.version}-${git.commit.id.abbrev} - target/apache-pulsar-io-connectors-${project.version}-bin - target/pulsar-offloader-distribution-${project.version}-bin.tar.gz ${docker.organization}/pulsar:${project.version}-${git.commit.id.abbrev} + ${pulsar.client.python.version} diff --git a/docker/pulsar/Dockerfile b/docker/pulsar/Dockerfile index f586a9dd4f9d72..181e73b0771ba4 100644 --- a/docker/pulsar/Dockerfile +++ b/docker/pulsar/Dockerfile @@ -48,27 +48,6 @@ RUN for SUBDIRECTORY in conf data download logs instances/deps; do \ RUN chmod -R g+rx /pulsar/bin RUN chmod -R o+rx /pulsar -## Create 2nd stage to build the Python dependencies -## Since it needs to have GCC available, we're doing it in a different layer -FROM alpine:3.19 AS python-deps - -RUN apk add --no-cache \ - bash \ - python3-dev \ - g++ \ - musl-dev \ - libffi-dev \ - py3-pip \ - py3-grpcio \ - py3-yaml - -RUN pip3 install --break-system-packages \ - kazoo - -ARG PULSAR_CLIENT_PYTHON_VERSION -RUN pip3 install --break-system-packages \ - pulsar-client[all]==${PULSAR_CLIENT_PYTHON_VERSION} - ### Create one stage to include JVM distribution FROM alpine AS jvm @@ -87,7 +66,6 @@ RUN echo networkaddress.cache.negative.ttl=1 >> /opt/jvm/conf/security/java.secu FROM apachepulsar/glibc-base:2.38 as glibc ## Create final stage from Alpine image -## and add OpenJDK and Python dependencies (for Pulsar functions) FROM alpine:3.19.1 ENV LANG C.UTF-8 @@ -95,7 +73,6 @@ ENV LANG C.UTF-8 RUN apk add --no-cache \ bash \ python3 \ - py3-pip \ gcompat \ ca-certificates \ procps @@ -107,12 +84,9 @@ RUN apk add --allow-untrusted --force-overwrite /root/packages/glibc-*.apk COPY --from=jvm /opt/jvm /opt/jvm ENV JAVA_HOME=/opt/jvm -# The default is /pulsat/bin and cannot be written. +# The default is /pulsar/bin and cannot be written. ENV PULSAR_PID_DIR=/pulsar/logs -# Copy Python depedencies from the other stage -COPY --from=python-deps /usr/lib/python3.11/site-packages /usr/lib/python3.11/site-packages - ENV PULSAR_ROOT_LOGGER=INFO,CONSOLE COPY --from=pulsar /pulsar /pulsar diff --git a/docker/pulsar/pom.xml b/docker/pulsar/pom.xml index 79ff4bd33b10c1..f4dd4b18ee9afe 100644 --- a/docker/pulsar/pom.xml +++ b/docker/pulsar/pom.xml @@ -81,7 +81,6 @@ target/pulsar-server-distribution-${project.version}-bin.tar.gz - ${pulsar.client.python.version} ${project.basedir} diff --git a/pulsar-bom/pom.xml b/pulsar-bom/pom.xml index 4161643cb9ba22..86bf1211af92da 100644 --- a/pulsar-bom/pom.xml +++ b/pulsar-bom/pom.xml @@ -182,7 +182,7 @@ org.apache.pulsar - pulsar-all-docker-image + pulsar-functions-python-docker-image ${project.version} diff --git a/tests/docker-images/latest-version-image/Dockerfile b/tests/docker-images/latest-version-image/Dockerfile index c23341c0748a29..2add2c496af048 100644 --- a/tests/docker-images/latest-version-image/Dockerfile +++ b/tests/docker-images/latest-version-image/Dockerfile @@ -19,6 +19,9 @@ # build go lang examples first in a separate layer +ARG PULSAR_IO_DIR +ARG PULSAR_OFFLOADER_TARBALL + FROM golang:1.21-alpine as pulsar-function-go COPY target/pulsar-function-go/ /go/src/github.com/apache/pulsar/pulsar-function-go @@ -26,13 +29,31 @@ RUN cd /go/src/github.com/apache/pulsar/pulsar-function-go && go install ./... RUN cd /go/src/github.com/apache/pulsar/pulsar-function-go/pf && go install RUN cd /go/src/github.com/apache/pulsar/pulsar-function-go/examples && go install ./... -# Reference pulsar-all to copy connectors from there -FROM apachepulsar/pulsar-all:latest as pulsar-all +## Prepare a stage with few connectors and offloaders +FROM busybox as connectors-offloaders +ARG PULSAR_IO_DIR +ARG PULSAR_OFFLOADER_TARBALL + +ADD ${PULSAR_IO_DIR} /connectors +RUN mkdir -p /pulsar/connectors +RUN cp /connectors/pulsar-io-cassandra-*.nar /pulsar/connectors/ +RUN cp /connectors/pulsar-io-debezium-*.nar /pulsar/connectors/ +RUN cp /connectors/pulsar-io-elastic-*.nar /pulsar/connectors/ +RUN cp /connectors/pulsar-io-hdfs*.nar /pulsar/connectors/ +RUN cp /connectors/pulsar-io-jdbc-postgres-*.nar /pulsar/connectors/ +RUN cp /connectors/pulsar-io-kafka-*.nar /pulsar/connectors/ +RUN cp /connectors/pulsar-io-rabbitmq-*.nar /pulsar/connectors/ +RUN cp /connectors/pulsar-io-batch-data-generator-*.nar /pulsar/connectors/ +RUN cp /connectors/pulsar-io-kinesis-*.nar /pulsar/connectors/ + +ADD ${PULSAR_OFFLOADER_TARBALL} / +RUN mkdir -p /pulsar/offloaders +RUN mv /apache-pulsar-offloaders-*/offloaders /pulsar/offloaders ######################################## ###### Main image build ######################################## -FROM apachepulsar/pulsar:latest +FROM apachepulsar/pulsar-functions-python:latest # Switch to run as the root user to simplify building container and then running # supervisord. Each of the pulsar components are spawned by supervisord and their @@ -76,19 +97,9 @@ COPY target/certificate-authority /pulsar/certificate-authority/ # copy broker plugins COPY target/plugins/ /pulsar/examples/ -# Include all offloaders -COPY --from=pulsar-all /pulsar/offloaders /pulsar/offloaders - -# Include only the connectors needed by integration tests -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-cassandra-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-debezium-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-elastic-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-hdfs*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-jdbc-postgres-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-kafka-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-rabbitmq-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-batch-data-generator-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-kinesis-*.nar /pulsar/connectors/ +# Include all offloaders and few connectors needed by integration tests +COPY --from=connectors-offloaders /pulsar/connectors /pulsar/connnectors +COPY --from=connectors-offloaders /pulsar/offloaders /pulsar/offloaders # download Oracle JDBC driver for Oracle Debezium Connector tests RUN mkdir -p META-INF/bundled-dependencies diff --git a/tests/docker-images/latest-version-image/pom.xml b/tests/docker-images/latest-version-image/pom.xml index 3db93441263373..8bf2019674ad3d 100644 --- a/tests/docker-images/latest-version-image/pom.xml +++ b/tests/docker-images/latest-version-image/pom.xml @@ -30,6 +30,41 @@ Apache Pulsar :: Tests :: Docker Images :: Latest Version Testing pom + + + org.apache.pulsar + pulsar-io-distribution + ${project.version} + pom + provided + + + * + * + + + + + io.perfmark + perfmark-api + runtime + + + org.apache.pulsar + pulsar-offloader-distribution + ${project.version} + bin + tar.gz + provided + + + * + * + + + + + docker @@ -138,6 +173,48 @@ + + maven-resources-plugin + + + copy-resources + generate-resources + + copy-resources + + + ${basedir}/target/apache-pulsar-io-connectors-${project.version}-bin + + + + + ${basedir}/../../distribution/io/target/apache-pulsar-io-connectors-${project.version}-bin + + false + + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-offloader-tarball + + copy-dependencies + + generate-resources + + ${project.build.directory}/ + pulsar-offloader-distribution + true + + + + io.fabric8 docker-maven-plugin