diff --git a/Makefile-deps.mk b/Makefile-deps.mk index 4b2e1d1ecb..13edfe5e1d 100644 --- a/Makefile-deps.mk +++ b/Makefile-deps.mk @@ -117,12 +117,6 @@ kf-training-operator-crd: ## Copy the CRDs from the training-operator to the dep find $(KF_TRAINING_ROOT)/manifests/base/crds/ -type f -not -name "kubeflow.org_mpijobs.yaml" -exec cp -f {} $(EXTERNAL_CRDS_DIR)/training-operator/ \; sed -i '/kubeflow.org_mpijobs.yaml/d' $(EXTERNAL_CRDS_DIR)/training-operator/kustomization.yaml -.PHONY: kf-training-operator-manifest -kf-training-operator-manifest: ## Copy the manifest from the training-operator to the dep-manifests directory. - if [ -d $(EXTERNAL_MANIFESTS_DIR)/training-operator ]; then rm -rf $(EXTERNAL_MANIFESTS_DIR)/training-operator ; fi - mkdir -p $(EXTERNAL_MANIFESTS_DIR)/training-operator - cp -rf $(KF_TRAINING_ROOT)/manifests/ $(EXTERNAL_MANIFESTS_DIR)/training-operator/ ; - RAY_ROOT = $(shell $(GO_CMD) list -m -mod=readonly -f "{{.Dir}}" github.com/ray-project/kuberay/ray-operator) .PHONY: ray-operator-crd ray-operator-crd: ## Copy the CRDs from the ray-operator to the dep-crds directory. diff --git a/Makefile-test.mk b/Makefile-test.mk index b23c3819b6..d13da266f5 100644 --- a/Makefile-test.mk +++ b/Makefile-test.mk @@ -81,7 +81,7 @@ CREATE_KIND_CLUSTER ?= true test-e2e: kustomize ginkgo yq gomod-download jobset-operator-crd kf-training-operator-crd mpi-operator-crd kueuectl run-test-e2e-$(E2E_KIND_VERSION:kindest/node:v%=%) .PHONY: test-multikueue-e2e -test-multikueue-e2e: kustomize ginkgo yq gomod-download jobset-operator-crd kf-training-operator-crd kf-training-operator-manifest mpi-operator-crd run-test-multikueue-e2e-$(E2E_KIND_VERSION:kindest/node:v%=%) +test-multikueue-e2e: kustomize ginkgo yq gomod-download jobset-operator-crd kf-training-operator-crd mpi-operator-crd run-test-multikueue-e2e-$(E2E_KIND_VERSION:kindest/node:v%=%) E2E_TARGETS := $(addprefix run-test-e2e-,${E2E_K8S_VERSIONS}) @@ -97,9 +97,9 @@ run-test-e2e-%: FORCE E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) KUBEFLOW_VERSION=$(KUBEFLOW_VERSION) KUBEFLOW_MPI_VERSION=$(KUBEFLOW_MPI_VERSION) ./hack/e2e-test.sh run-test-multikueue-e2e-%: K8S_VERSION = $(@:run-test-multikueue-e2e-%=%) -run-test-multikueue-e2e-%: FORCE kf-training-operator-manifest # manifest is required to be available locally is it must be modified before usage +run-test-multikueue-e2e-%: FORCE @echo Running multikueue e2e for k8s ${K8S_VERSION} - E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) KUBEFLOW_VERSION=$(KUBEFLOW_VERSION) KUBEFLOW_MPI_VERSION=$(KUBEFLOW_MPI_VERSION) ./hack/multikueue-e2e-test.sh + E2E_KIND_VERSION="kindest/node:v$(K8S_VERSION)" KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) CREATE_KIND_CLUSTER=$(CREATE_KIND_CLUSTER) ARTIFACTS="$(ARTIFACTS)/$@" IMAGE_TAG=$(IMAGE_TAG) GINKGO_ARGS="$(GINKGO_ARGS)" JOBSET_VERSION=$(JOBSET_VERSION) KUBEFLOW_VERSION=$(KUBEFLOW_VERSION) KUBEFLOW_MPI_VERSION=$(KUBEFLOW_MPI_VERSION) EXTERNAL_MANIFESTS_DIR=$(EXTERNAL_MANIFESTS_DIR) KF_TRAINING_ROOT=$(KF_TRAINING_ROOT) ./hack/multikueue-e2e-test.sh SCALABILITY_RUNNER := $(PROJECT_DIR)/bin/performance-scheduler-runner .PHONY: performance-scheduler-runner diff --git a/hack/e2e-common.sh b/hack/e2e-common.sh index 3961c1bbc6..0cf538ab75 100644 --- a/hack/e2e-common.sh +++ b/hack/e2e-common.sh @@ -78,8 +78,12 @@ function install_jobset { function patch_kubeflow_manifest { # In order for MPI-operator and Training-operator to work on the same cluster it is required that: - # 1. 'kubeflow.org_mpijobs.yaml' is removed from base/crds/kustomization.yaml - https://github.com/kubeflow/training-operator/issues/1930 + # 0. Copy the manifest from the training-operator to the dep-manifests directory. + mkdir -p "${EXTERNAL_MANIFESTS_DIR}/training-operator" + cp -rpf ${KF_TRAINING_ROOT}/manifests/ ${EXTERNAL_MANIFESTS_DIR}/training-operator/ ; chmod -R u+w "${KUBEFLOW_MANIFEST_DIR}/" + + # 1. 'kubeflow.org_mpijobs.yaml' is removed from base/crds/kustomization.yaml - https://github.com/kubeflow/training-operator/issues/1930 sed -i '/kubeflow.org_mpijobs.yaml/d' "${KUBEFLOW_MANIFEST_DIR}/base/crds/kustomization.yaml" # 2. Training-operator deployment file is patched and manually enabled for all kubeflow jobs except for mpi - https://github.com/kubeflow/training-operator/issues/1777