forked from IBM/data-prep-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
first cut at refactoring fdedup as its own named dpk_ module
Signed-off-by: Maroun Touma <touma@us.ibm.com>
- Loading branch information
Showing
135 changed files
with
546 additions
and
916 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
ARG BASE_IMAGE=docker.io/rayproject/ray:2.36.1-py310 | ||
|
||
FROM ${BASE_IMAGE} | ||
|
||
RUN pip install --upgrade --no-cache-dir pip | ||
|
||
# install pytest | ||
RUN pip install --no-cache-dir pytest | ||
ARG DPK_WHEEL_FILE_NAME | ||
|
||
# Copy and install data processing libraries | ||
# These are expected to be placed in the docker context before this is run (see the make image). | ||
COPY --chown=ray:users data-processing-dist data-processing-dist | ||
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[ray] | ||
|
||
## Copy the python version of the tansform | ||
COPY --chown=ray:users dpk_fdedup/ dpk_fdedup/ | ||
COPY --chown=ray:users requirements.txt requirements.txt | ||
RUN pip install -r requirements.txt | ||
|
||
# Grant non-root users the necessary permissions to the ray directory | ||
RUN chmod 755 /home/ray | ||
|
||
# Set environment | ||
ENV PYTHONPATH /home/ray | ||
|
||
# Put these at the end since they seem to upset the docker cache. | ||
ARG BUILD_DATE | ||
ARG GIT_COMMIT | ||
LABEL build-date=$BUILD_DATE | ||
LABEL git-commit=$GIT_COMMIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
FROM quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest | ||
|
||
USER root | ||
# install pytest | ||
RUN pip install --no-cache-dir pytest | ||
|
||
WORKDIR ${SPARK_HOME}/work-dir | ||
ARG DPK_WHEEL_FILE_NAME | ||
|
||
# Copy and install data processing libraries | ||
# These are expected to be placed in the docker context before this is run (see the make image). | ||
COPY --chown=spark:root data-processing-dist data-processing-dist | ||
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark] | ||
|
||
|
||
# Install project source | ||
|
||
## Copy the python version of the tansform | ||
COPY --chown=spark:root dpk_fdedup/ dpk_fdedup/ | ||
COPY --chown=spark:root requirements.txt requirements.txt | ||
RUN pip install -r requirements.txt | ||
|
||
RUN mkdir -p /opt/spark/work-dir/src/templates && \ | ||
mkdir -p /opt/spark/work-dir/config | ||
COPY --chown=spark:root spark-deployment/kubernetes/spark-executor-pod-template.yml /opt/spark/work-dir/src/templates/ | ||
COPY --chown=spark:root spark-deployment/kubernetes/spark_profile.yml /opt/spark/work-dir/config/ | ||
|
||
|
||
USER spark | ||
|
||
# Set environment | ||
ENV PYTHONPATH=${SPARK_HOME}/work-dir/:${SPARK_HOME}/work-dir/src/:${PYTHONPATH} | ||
ENV PATH=${SPARK_HOME}/work-dir/.local/bin/:${PATH} | ||
|
||
# Put these at the end since they seem to upset the docker cache. | ||
ARG BUILD_DATE | ||
ARG GIT_COMMIT | ||
LABEL build-date=$BUILD_DATE | ||
LABEL git-commit=$GIT_COMMIT | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,79 +1,23 @@ | ||
REPOROOT=../../.. | ||
# Use make help, to see the available rules | ||
include $(REPOROOT)/.make.defaults | ||
include $(REPOROOT)/transforms/.make.cicd.targets | ||
|
||
setup:: | ||
@# Help: Recursively make $@ all subdirs | ||
$(MAKE) RULE=$@ .recurse | ||
# | ||
# This is intended to be included across the Makefiles provided within | ||
# a given transform's directory tree, so must use compatible syntax. | ||
# | ||
################################################################################ | ||
# This defines the name of the transform and is used to match against | ||
# expected files and is used to define the transform's image name. | ||
TRANSFORM_NAME=$(shell basename `pwd`) | ||
|
||
clean:: | ||
@# Help: Recursively make $@ all subdirs | ||
$(MAKE) RULE=$@ .recurse | ||
################################################################################ | ||
|
||
build:: | ||
@# Help: Recursively make $@ in subdirs | ||
$(MAKE) RULE=$@ .recurse | ||
venv:: | ||
@# Help: Recursively make $@ in subdirs | ||
$(MAKE) RULE=$@ .recurse | ||
|
||
image:: | ||
@# Help: Recursively make $@ in all subdirs | ||
@$(MAKE) RULE=$@ .recurse | ||
|
||
set-versions: | ||
@# Help: Recursively $@ in all subdirs | ||
@$(MAKE) RULE=$@ .recurse | ||
|
||
publish:: | ||
@# Help: Recursively make $@ in all subdirs | ||
@$(MAKE) RULE=$@ .recurse | ||
|
||
test-image:: | ||
@# Help: Recursively make $@ in all subdirs | ||
@$(MAKE) RULE=$@ .recurse | ||
|
||
test:: | ||
@# Help: Recursively make $@ in all subdirs | ||
@$(MAKE) RULE=$@ .recurse | ||
|
||
test-src:: | ||
@# Help: Recursively make $@ in all subdirs | ||
$(MAKE) RULE=$@ .recurse | ||
|
||
kind-load-image:: | ||
@# Help: Recursively make $@ in all subdirs | ||
$(MAKE) RULE=$@ .recurse | ||
|
||
docker-load-image:: | ||
@# Help: Recursively make $@ in all subdirs | ||
$(MAKE) RULE=$@ .recurse | ||
|
||
docker-save-image:: | ||
@# Help: Recursively make $@ in all subdirs | ||
$(MAKE) RULE=$@ .recurse | ||
|
||
.PHONY: workflow-venv | ||
workflow-venv: | ||
if [ -e kfp_ray ]; then \ | ||
$(MAKE) -C kfp_ray workflow-venv; \ | ||
fi | ||
|
||
.PHONY: workflow-test | ||
workflow-test: | ||
if [ -e kfp_ray ]; then \ | ||
$(MAKE) -C kfp_ray workflow-test; \ | ||
fi | ||
|
||
.PHONY: workflow-upload | ||
workflow-upload: | ||
if [ -e kfp_ray ]; then \ | ||
$(MAKE) -C kfp_ray workflow-upload; \ | ||
fi | ||
|
||
.PHONY: workflow-build | ||
workflow-build: | ||
if [ -e kfp_ray ]; then \ | ||
$(MAKE) -C kfp_ray workflow-build; \ | ||
fi | ||
|
||
un-cli-sample: | ||
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform_ray.py \ | ||
RUN_ARGS="--run_locally True --data_local_config \"{ 'input_folder' : '../test-data/input', 'output_folder' : '../output'}\" \ | ||
--fdedup_id_column int_id_column" \ | ||
.transforms.run-src-file |
Oops, something went wrong.