diff --git a/backend/src/plugins/kube.ts b/backend/src/plugins/kube.ts index ed1c43eaa6..60610f7c61 100644 --- a/backend/src/plugins/kube.ts +++ b/backend/src/plugins/kube.ts @@ -4,7 +4,7 @@ import { FastifyInstance } from 'fastify'; import * as jsYaml from 'js-yaml'; import * as k8s from '@kubernetes/client-node'; import { DEV_MODE } from '../utils/constants'; -import { cleanupDSPSuffix, initializeWatchedResources } from '../utils/resourceUtils'; +import { cleanupGPU, initializeWatchedResources } from '../utils/resourceUtils'; import { User } from '@kubernetes/client-node/dist/config_types'; const CONSOLE_CONFIG_YAML_FIELD = 'console-config.yaml'; @@ -85,12 +85,10 @@ export default fp(async (fastify: FastifyInstance) => { // Initialize the watching of resources initializeWatchedResources(fastify); - // TODO: Delete this code in the future once we have no customers using RHODS 1.19 / ODH 2.4.0 - // Cleanup for display name suffix of [DSP] - cleanupDSPSuffix(fastify).catch((e) => + cleanupGPU(fastify).catch((e) => fastify.log.error( - `Unable to fully cleanup project display name suffixes - Some projects may not appear in the dashboard UI. ${ - e.response?.body?.message || e.message + `Unable to fully convert GPU to use accelerator profiles. ${ + e.response?.body?.message || e.message || e }`, ), ); diff --git a/backend/src/routes/api/accelerators/acceleratorUtils.ts b/backend/src/routes/api/accelerators/acceleratorUtils.ts new file mode 100644 index 0000000000..d80b12b42a --- /dev/null +++ b/backend/src/routes/api/accelerators/acceleratorUtils.ts @@ -0,0 +1,71 @@ +import { AcceleratorInfo, KubeFastifyInstance } from '../../../types'; + +const RESOURCE_TYPES = [ + 'cpu', + 'memory', + 'pods', + 'ephemeral-storage', + 'hugepages-1Gi', + 'hugepages-2Mi', + 'attachable-volumes-aws-ebs', +]; + +const getIdentifiersFromResources = (resources: { [key: string]: string } = {}) => { + return Object.entries(resources) + .filter(([key]) => !RESOURCE_TYPES.includes(key)) + .reduce<{ [key: string]: number }>((identifiers, [key, value]) => { + identifiers[key] = isNaN(parseInt(value)) ? 0 : parseInt(value); + return identifiers; + }, {}); +}; + +export const getAcceleratorNumbers = async ( + fastify: KubeFastifyInstance, +): Promise => + fastify.kube.coreV1Api + .listNode() + .then((res) => + res.body.items.reduce( + (info, node) => { + // reduce resources down to just the accelerators and their counts + const allocatable = getIdentifiersFromResources(node.status.allocatable); + const capacity = getIdentifiersFromResources(node.status.capacity); + + // update the max count for each accelerator + Object.entries(allocatable).forEach( + ([key, value]) => (info.available[key] = Math.max(info.available[key] ?? 0, value)), + ); + + // update the total count for each accelerator + Object.entries(capacity).forEach( + ([key, value]) => (info.total[key] = (info.total[key] ?? 0) + value), + ); + + // update the allocated count for each accelerator + Object.entries(capacity).forEach( + ([key, value]) => + (info.allocated[key] = (info.allocated[key] ?? 0) + value - (allocatable[key] ?? 0)), + ); + + // if any accelerators are available, the cluster is configured + const configured = + info.configured || Object.values(info.available).some((value) => value > 0); + + return { + total: info.total, + available: info.available, + allocated: info.allocated, + configured, + }; + }, + { configured: false, available: {}, total: {}, allocated: {} }, + ), + ) + .catch((e) => { + fastify.log.error( + `A ${e.statusCode} error occurred when listing cluster nodes: ${ + e.response?.body?.message || e.statusMessage + }`, + ); + return { configured: false, available: {}, total: {}, allocated: {} }; + }); diff --git a/backend/src/routes/api/accelerators/index.ts b/backend/src/routes/api/accelerators/index.ts new file mode 100644 index 0000000000..16d651ad6d --- /dev/null +++ b/backend/src/routes/api/accelerators/index.ts @@ -0,0 +1,11 @@ +import { KubeFastifyInstance, OauthFastifyRequest } from '../../../types'; +import { getAcceleratorNumbers } from './acceleratorUtils'; +import { logRequestDetails } from '../../../utils/fileUtils'; + +export default async (fastify: KubeFastifyInstance): Promise => { + fastify.get('/', async (request: OauthFastifyRequest) => { + logRequestDetails(fastify, request); + + return getAcceleratorNumbers(fastify); + }); +}; diff --git a/backend/src/routes/api/gpu/gpuUtils.ts b/backend/src/routes/api/gpu/gpuUtils.ts index e70cad3ece..c3726436fd 100644 --- a/backend/src/routes/api/gpu/gpuUtils.ts +++ b/backend/src/routes/api/gpu/gpuUtils.ts @@ -16,6 +16,9 @@ const storage: { lastFetch: number; lastValue: GPUInfo } = { lastFetch: 0, }; +/** + * @deprecated - use getAcceleratorNumbers instead + */ export const getGPUNumber = async (fastify: KubeFastifyInstance): Promise => { if (storage.lastFetch >= Date.now() - 30_000) { fastify.log.info(`Returning cached gpu value (${JSON.stringify(storage)})`); @@ -67,11 +70,15 @@ export const getGPUNumber = async (fastify: KubeFastifyInstance): Promise => { fastify.get('/', async (request: OauthFastifyRequest) => { logRequestDetails(fastify, request); diff --git a/backend/src/types.ts b/backend/src/types.ts index f52c522943..1b8e0e6e1e 100644 --- a/backend/src/types.ts +++ b/backend/src/types.ts @@ -254,6 +254,7 @@ export type KubeDecorator = KubeStatus & { customObjectsApi: k8s.CustomObjectsApi; rbac: k8s.RbacAuthorizationV1Api; currentToken: string; + }; export type KubeFastifyInstance = FastifyInstance & { @@ -755,6 +756,14 @@ export type GPUInfo = { available: number; autoscalers: gpuScale[]; }; + +export type AcceleratorInfo = { + configured: boolean; + available: {[key: string]: number}; + total: {[key: string]: number}; + allocated: {[key: string]: number}; +} + export type EnvironmentVariable = EitherNotBoth< { value: string | number }, { valueFrom: Record } @@ -805,12 +814,17 @@ export type NotebookData = { notebookSizeName: string; imageName: string; imageTagName: string; - gpus: number; + accelerator: AcceleratorState; envVars: EnvVarReducedTypeKeyValues; state: NotebookState; username?: string; }; +export type AcceleratorState = { + accelerator?: AcceleratorKind; + count: number; +}; + export const LIMIT_NOTEBOOK_IMAGE_GPU = 'nvidia.com/gpu'; type DisplayNameAnnotations = Partial<{ @@ -868,19 +882,21 @@ export type SupportedModelFormats = { autoSelect?: boolean; }; -export type GPUCount = string | number; + +export enum ContainerResourceAttributes { + CPU = 'cpu', + MEMORY = 'memory', +} export type ContainerResources = { requests?: { cpu?: string | number; memory?: string; - 'nvidia.com/gpu'?: GPUCount; - }; + } & Record; limits?: { cpu?: string | number; memory?: string; - 'nvidia.com/gpu'?: GPUCount; - }; + } & Record; }; export type ServingRuntime = K8sResourceCommon & { @@ -908,3 +924,26 @@ export type ServingRuntime = K8sResourceCommon & { volumes?: Volume[]; }; }; + +export type AcceleratorKind = K8sResourceCommon & { + metadata: { + name: string; + annotations?: Partial<{ + 'opendatahub.io/modified-date': string; + }>; + }; + spec: { + displayName: string; + enabled: boolean; + identifier: string; + description?: string; + tolerations?: NotebookToleration[]; + }; +}; + +export enum KnownLabels { + DASHBOARD_RESOURCE = 'opendatahub.io/dashboard', + PROJECT_SHARING = 'opendatahub.io/project-sharing', + MODEL_SERVING_PROJECT = 'modelmesh-enabled', + DATA_CONNECTION_AWS = 'opendatahub.io/managed', +} diff --git a/backend/src/utils/constants.ts b/backend/src/utils/constants.ts index 0aa5d0965f..1699c0eac1 100644 --- a/backend/src/utils/constants.ts +++ b/backend/src/utils/constants.ts @@ -1,6 +1,6 @@ import * as path from 'path'; import './dotenv'; -import { DashboardConfig, NotebookSize } from '../types'; +import { DashboardConfig, KnownLabels, NotebookSize } from '../types'; export const PORT = Number(process.env.PORT) || Number(process.env.BACKEND_PORT) || 8080; export const IP = process.env.IP || '0.0.0.0'; @@ -134,3 +134,5 @@ export const DEFAULT_NOTEBOOK_SIZES: NotebookSize[] = [ export const imageUrlRegex = /^([\w.\-_]+((?::\d+|)(?=\/[a-z0-9._-]+\/[a-z0-9._-]+))|)(?:\/|)([a-z0-9.\-_]+(?:\/[a-z0-9.\-_]+|))(?::([\w.\-_]{1,127})|)/; + +export const LABEL_SELECTOR_DASHBOARD_RESOURCE = `${KnownLabels.DASHBOARD_RESOURCE}=true`; diff --git a/backend/src/utils/notebookUtils.ts b/backend/src/utils/notebookUtils.ts index 769efce71d..af3c9e2703 100644 --- a/backend/src/utils/notebookUtils.ts +++ b/backend/src/utils/notebookUtils.ts @@ -1,10 +1,10 @@ import { getDashboardConfig } from './resourceUtils'; import { + ContainerResourceAttributes, EnvironmentVariable, ImageInfo, ImageTag, KubeFastifyInstance, - LIMIT_NOTEBOOK_IMAGE_GPU, Notebook, NotebookAffinity, NotebookData, @@ -156,7 +156,7 @@ export const assembleNotebook = async ( envName: string, tolerationSettings: NotebookTolerationSettings, ): Promise => { - const { notebookSizeName, imageName, imageTagName, gpus, envVars } = data; + const { notebookSizeName, imageName, imageTagName, accelerator, envVars } = data; const notebookSize = getNotebookSize(notebookSizeName); @@ -191,40 +191,35 @@ export const assembleNotebook = async ( const resources: NotebookResources = { ...notebookSize.resources }; const tolerations: NotebookToleration[] = []; - let affinity: NotebookAffinity = {}; - if (gpus > 0) { + const affinity: NotebookAffinity = {}; + if (accelerator.count > 0 && accelerator.accelerator) { if (!resources.limits) { resources.limits = {}; } if (!resources.requests) { resources.requests = {}; } - resources.limits[LIMIT_NOTEBOOK_IMAGE_GPU] = gpus; - resources.requests[LIMIT_NOTEBOOK_IMAGE_GPU] = gpus; - tolerations.push({ - effect: 'NoSchedule', - key: LIMIT_NOTEBOOK_IMAGE_GPU, - operator: 'Exists', - }); + resources.limits[accelerator.accelerator.spec.identifier] = accelerator.count; + resources.requests[accelerator.accelerator.spec.identifier] = accelerator.count; } else { - affinity = { - nodeAffinity: { - preferredDuringSchedulingIgnoredDuringExecution: [ - { - preference: { - matchExpressions: [ - { - key: 'nvidia.com/gpu.present', - operator: 'NotIn', - values: ['true'], - }, - ], - }, - weight: 1, - }, - ], - }, - }; + // step type down to string to avoid type errors + const containerResourceKeys: string[] = Object.values(ContainerResourceAttributes); + + Object.keys(resources.limits || {}).forEach((key) => { + if (!containerResourceKeys.includes(key)) { + delete resources.limits?.[key]; + } + }); + + Object.keys(resources.requests || {}).forEach((key) => { + if (!containerResourceKeys.includes(key)) { + delete resources.requests?.[key]; + } + }); + } + + if (accelerator.accelerator?.spec.tolerations) { + tolerations.push(...accelerator.accelerator.spec.tolerations); } if (tolerationSettings?.enabled) { @@ -272,6 +267,7 @@ export const assembleNotebook = async ( 'notebooks.opendatahub.io/last-image-selection': imageSelection, 'opendatahub.io/username': username, 'kubeflow-resource-stopped': null, + 'opendatahub.io/accelerator-name': accelerator.accelerator?.metadata.name || '', }, name: name, namespace: namespace, diff --git a/backend/src/utils/resourceUtils.ts b/backend/src/utils/resourceUtils.ts index 44cbeec139..567b7c9688 100644 --- a/backend/src/utils/resourceUtils.ts +++ b/backend/src/utils/resourceUtils.ts @@ -2,6 +2,7 @@ import * as _ from 'lodash'; import createError from 'http-errors'; import { PatchUtils, V1ConfigMap, V1Namespace, V1NamespaceList } from '@kubernetes/client-node'; import { + AcceleratorKind, BUILD_PHASE, BuildKind, BuildStatus, @@ -33,6 +34,7 @@ import { getRouteForClusterId, } from './componentUtils'; import { createCustomError } from './requestUtils'; +import { getAcceleratorNumbers } from '../routes/api/accelerators/acceleratorUtils'; const dashboardConfigMapName = 'odh-dashboard-config'; const consoleLinksGroup = 'console.openshift.io'; @@ -631,6 +633,126 @@ export const getConsoleLinks = (): ConsoleLinkKind[] => { return consoleLinksWatcher.getResources(); }; +/** + * Converts GPU usage to use accelerator by adding an accelerator profile CRD to the cluster if GPU usage is detected + */ +export const cleanupGPU = async (fastify: KubeFastifyInstance): Promise => { + // When we startup — in kube.ts we can handle a migration (catch ALL promise errors — exit gracefully and use fastify logging) + // Check for migration-gpu-status configmap in dashboard namespace — if found, exit early + const CONFIG_MAP_NAME = 'migration-gpu-status'; + + const continueProcessing = await fastify.kube.coreV1Api + .readNamespacedConfigMap(CONFIG_MAP_NAME, fastify.kube.namespace) + .then(() => { + // Found configmap, not continuing + fastify.log.info(`GPU migration already completed, skipping`); + return false; + }) + .catch((e) => { + if (e.statusCode === 404) { + // No config saying we have already migrated gpus, continue + return true; + } else { + throw `fetching gpu migration configmap had a ${e.statusCode} error: ${ + e.response?.body?.message || e?.response?.statusMessage + }`; + } + }); + + if (continueProcessing) { + // Read existing AcceleratorProfiles + const acceleratorProfilesResponse = await fastify.kube.customObjectsApi + .listNamespacedCustomObject( + 'dashboard.opendatahub.io', + 'v1alpha', + fastify.kube.namespace, + 'acceleratorprofiles', + ) + .catch((e) => { + console.log(e); + // If error shows up — CRD may not be installed, exit early + throw `A ${e.statusCode} error occurred when trying to fetch accelerator profiles: ${ + e.response?.body?.message || e?.response?.statusMessage + }`; + }); + + const acceleratorProfiles = ( + acceleratorProfilesResponse?.body as { + items: AcceleratorKind[]; + } + )?.items; + + // If not error and no profiles detected: + if ( + acceleratorProfiles && + Array.isArray(acceleratorProfiles) && + acceleratorProfiles.length === 0 + ) { + // if gpu detected on cluster, create our default migrated-gpu + const acceleratorDetected = await getAcceleratorNumbers(fastify); + + if (acceleratorDetected.configured) { + const payload: AcceleratorKind = { + kind: 'AcceleratorProfile', + apiVersion: 'dashboard.opendatahub.io/v1alpha', + metadata: { + name: 'migrated-gpu', + namespace: fastify.kube.namespace, + }, + spec: { + displayName: 'Nvidia GPU', + identifier: 'nvidia.com/gpu', + enabled: true, + tolerations: [ + { + effect: 'NoSchedule', + key: 'nvidia.com/gpu', + operator: 'Exists', + }, + ], + }, + }; + + try { + await fastify.kube.customObjectsApi.createNamespacedCustomObject( + 'dashboard.opendatahub.io', + 'v1alpha', + fastify.kube.namespace, + 'acceleratorprofiles', + payload, + ); + } catch (e) { + // If bad detection — exit early and dont create config + throw `A ${ + e.statusCode + } error occurred when trying to add migrated-gpu accelerator profile: ${ + e.response?.body?.message || e?.response?.statusMessage + }`; + } + } + } + + // Create configmap to flag operation as successful + const configMap = { + metadata: { + name: CONFIG_MAP_NAME, + namespace: fastify.kube.namespace, + }, + data: { + migratedCompleted: 'true', + }, + }; + + await fastify.kube.coreV1Api + .createNamespacedConfigMap(fastify.kube.namespace, configMap) + .then(() => fastify.log.info('Successfully migrated GPUs to accelerator profiles')) + .catch((e) => { + throw `A ${e.statusCode} error occurred when trying to create gpu migration configmap: ${ + e.response?.body?.message || e?.response?.statusMessage + }`; + }); + } +}; /** * @deprecated - Look to remove asap (see comments below) * Converts namespaces that have a display-name annotation suffixed with `[DSP]` over to using a label. diff --git a/docs/dashboard_config.md b/docs/dashboard_config.md index c0cb9891c9..e7ed17eb04 100644 --- a/docs/dashboard_config.md +++ b/docs/dashboard_config.md @@ -87,7 +87,6 @@ The `notebookController` field controls the Notebook Controller options such as ```yaml notebookController: enabled: true - gpuSetting: autodetect pvcSize: 20Gi notebookNamespace: odh-notebooks notebookTolerationSettings: diff --git a/frontend/src/api/index.ts b/frontend/src/api/index.ts index 89261907fa..9d8db270cf 100644 --- a/frontend/src/api/index.ts +++ b/frontend/src/api/index.ts @@ -17,6 +17,7 @@ export * from './k8s/users'; export * from './k8s/groups'; export * from './k8s/templates'; export * from './k8s/dashboardConfig'; +export * from './k8s/accelerators'; // Pipelines uses special redirected API export * from './pipelines/custom'; diff --git a/frontend/src/api/k8s/accelerators.ts b/frontend/src/api/k8s/accelerators.ts new file mode 100644 index 0000000000..de5d47d1e6 --- /dev/null +++ b/frontend/src/api/k8s/accelerators.ts @@ -0,0 +1,11 @@ +import { k8sListResource } from '@openshift/dynamic-plugin-sdk-utils'; +import { AcceleratorKind } from '~/k8sTypes'; +import { AcceleratorModel } from '~/api/models'; + +export const listAccelerators = async (namespace: string): Promise => + k8sListResource({ + model: AcceleratorModel, + queryOptions: { + ns: namespace, + }, + }).then((listResource) => listResource.items); diff --git a/frontend/src/api/k8s/notebooks.ts b/frontend/src/api/k8s/notebooks.ts index d7a86ba07e..ffd30d8a24 100644 --- a/frontend/src/api/k8s/notebooks.ts +++ b/frontend/src/api/k8s/notebooks.ts @@ -40,11 +40,13 @@ const assembleNotebook = ( description, notebookSize, envFrom, - gpus, + accelerator, image, volumes: formVolumes, volumeMounts: formVolumeMounts, tolerationSettings, + existingTolerations, + existingResources, } = data; const notebookId = overrideNotebookId || translateDisplayNameForK8s(notebookName); const imageUrl = `${image.imageStream?.status?.dockerImageRepository}:${image.imageVersion?.name}`; @@ -52,8 +54,11 @@ const assembleNotebook = ( const { affinity, tolerations, resources } = assemblePodSpecOptions( notebookSize.resources, - gpus, + accelerator, tolerationSettings, + existingTolerations, + undefined, + existingResources, ); const translatedUsername = usernameTranslate(username); @@ -101,6 +106,7 @@ const assembleNotebook = ( 'notebooks.opendatahub.io/last-image-selection': imageSelection, 'notebooks.opendatahub.io/inject-oauth': 'true', 'opendatahub.io/username': username, + 'opendatahub.io/accelerator-name': accelerator.accelerator?.metadata.name || '', }, name: notebookId, namespace: projectName, @@ -273,7 +279,7 @@ export const updateNotebook = ( // clean the envFrom array in case of merging the old value again container.envFrom = []; - // clean the resources, affinity and tolerations for GPU + // clean the resources, affinity and tolerations for accelerator oldNotebook.spec.template.spec.tolerations = []; oldNotebook.spec.template.spec.affinity = {}; container.resources = {}; diff --git a/frontend/src/api/k8s/servingRuntimes.ts b/frontend/src/api/k8s/servingRuntimes.ts index a8b4699cac..f1e757d932 100644 --- a/frontend/src/api/k8s/servingRuntimes.ts +++ b/frontend/src/api/k8s/servingRuntimes.ts @@ -7,12 +7,13 @@ import { k8sUpdateResource, } from '@openshift/dynamic-plugin-sdk-utils'; import { ServingRuntimeModel } from '~/api/models'; -import { K8sAPIOptions, ServingRuntimeKind } from '~/k8sTypes'; +import { K8sAPIOptions, ServingContainer, ServingRuntimeKind } from '~/k8sTypes'; import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types'; import { ContainerResources } from '~/types'; import { getModelServingRuntimeName } from '~/pages/modelServing/utils'; import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/pages/projects/utils'; import { applyK8sAPIOptions } from '~/api/apiMergeUtils'; +import { AcceleratorState } from '~/utilities/useAcceleratorState'; import { getModelServingProjects } from './projects'; import { assemblePodSpecOptions, getshmVolume, getshmVolumeMount } from './utils'; @@ -22,8 +23,9 @@ const assembleServingRuntime = ( servingRuntime: ServingRuntimeKind, isCustomServingRuntimesEnabled: boolean, isEditing?: boolean, + acceleratorState?: AcceleratorState, ): ServingRuntimeKind => { - const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth, gpus } = data; + const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth } = data; const createName = isCustomServingRuntimesEnabled ? translateDisplayNameForK8s(displayName) : getModelServingRuntimeName(namespace); @@ -50,6 +52,7 @@ const assembleServingRuntime = ( }), ...(isCustomServingRuntimesEnabled && { 'opendatahub.io/template-display-name': getDisplayNameFromK8sResource(servingRuntime), + 'opendatahub.io/accelerator-name': acceleratorState?.accelerator?.metadata.name || '', }), }, }; @@ -60,6 +63,7 @@ const assembleServingRuntime = ( ...updatedServingRuntime.metadata.annotations, 'enable-route': externalRoute ? 'true' : 'false', 'enable-auth': tokenAuth ? 'true' : 'false', + 'opendatahub.io/accelerator-name': acceleratorState?.accelerator?.metadata.name || '', ...(isCustomServingRuntimesEnabled && { 'openshift.io/display-name': displayName.trim() }), }, }; @@ -77,7 +81,32 @@ const assembleServingRuntime = ( }, }; - const { affinity, tolerations, resources } = assemblePodSpecOptions(resourceSettings, gpus); + const { affinity, tolerations, resources } = assemblePodSpecOptions( + resourceSettings, + acceleratorState, + undefined, + servingRuntime.spec.tolerations, + undefined, + updatedServingRuntime.spec.containers[0]?.resources, + ); + + updatedServingRuntime.spec.containers = servingRuntime.spec.containers.map( + (container): ServingContainer => { + const volumeMounts = container.volumeMounts || []; + if (!volumeMounts.find((volumeMount) => volumeMount.mountPath === '/dev/shm')) { + volumeMounts.push(getshmVolumeMount()); + } + + return { + ...container, + resources, + affinity, + volumeMounts, + }; + }, + ); + + servingRuntime.spec.tolerations = tolerations; const volumes = updatedServingRuntime.spec.volumes || []; if (!volumes.find((volume) => volume.name === 'shm')) { @@ -86,21 +115,6 @@ const assembleServingRuntime = ( updatedServingRuntime.spec.volumes = volumes; - updatedServingRuntime.spec.containers = servingRuntime.spec.containers.map((container) => { - const volumeMounts = container.volumeMounts || []; - if (!volumeMounts.find((volumeMount) => volumeMount.mountPath === '/dev/shm')) { - volumeMounts.push(getshmVolumeMount()); - } - - return { - ...container, - resources, - affinity, - tolerations, - volumeMounts, - }; - }); - return updatedServingRuntime; }; @@ -143,18 +157,22 @@ export const getServingRuntime = (name: string, namespace: string): Promise => { +export const updateServingRuntime = (options: { + data: CreatingServingRuntimeObject; + existingData: ServingRuntimeKind; + isCustomServingRuntimesEnabled: boolean; + opts?: K8sAPIOptions; + acceleratorState?: AcceleratorState; +}): Promise => { + const { data, existingData, isCustomServingRuntimesEnabled, opts, acceleratorState } = options; + const updatedServingRuntime = assembleServingRuntime( data, existingData.metadata.namespace, existingData, isCustomServingRuntimesEnabled, true, + acceleratorState, ); return k8sUpdateResource( @@ -165,18 +183,29 @@ export const updateServingRuntime = ( ); }; -export const createServingRuntime = ( - data: CreatingServingRuntimeObject, - namespace: string, - servingRuntime: ServingRuntimeKind, - isCustomServingRuntimesEnabled: boolean, - opts?: K8sAPIOptions, -): Promise => { +export const createServingRuntime = (options: { + data: CreatingServingRuntimeObject; + namespace: string; + servingRuntime: ServingRuntimeKind; + isCustomServingRuntimesEnabled: boolean; + opts?: K8sAPIOptions; + acceleratorState?: AcceleratorState; +}): Promise => { + const { + data, + namespace, + servingRuntime, + isCustomServingRuntimesEnabled, + opts, + acceleratorState, + } = options; const assembledServingRuntime = assembleServingRuntime( data, namespace, servingRuntime, isCustomServingRuntimesEnabled, + false, + acceleratorState, ); return k8sCreateResource( diff --git a/frontend/src/api/k8s/utils.ts b/frontend/src/api/k8s/utils.ts index ce2867007c..68b0fc9f44 100644 --- a/frontend/src/api/k8s/utils.ts +++ b/frontend/src/api/k8s/utils.ts @@ -3,57 +3,55 @@ import { ContainerResources, PodToleration, TolerationSettings, - ContainerResourceAttributes, VolumeMount, Volume, } from '~/types'; import { determineTolerations } from '~/utilities/tolerations'; +import { AcceleratorState } from '~/utilities/useAcceleratorState'; export const assemblePodSpecOptions = ( resourceSettings: ContainerResources, - gpus: number, + accelerator?: AcceleratorState, tolerationSettings?: TolerationSettings, + existingTolerations?: PodToleration[], affinitySettings?: PodAffinity, + existingResources?: ContainerResources, ): { affinity: PodAffinity; tolerations: PodToleration[]; resources: ContainerResources; } => { - let affinity: PodAffinity = structuredClone(affinitySettings || {}); - const resources = structuredClone(resourceSettings); - if (gpus > 0) { - if (!resources.limits) { - resources.limits = {}; + const affinity: PodAffinity = structuredClone(affinitySettings || {}); + let resources: ContainerResources = { + limits: { ...existingResources?.limits, ...resourceSettings?.limits }, + requests: { ...existingResources?.requests, ...resourceSettings?.requests }, + }; + + if (accelerator?.additionalOptions?.useExisting && !accelerator.useExisting) { + resources = structuredClone(resourceSettings); + } + + // Clear the last accelerator from the resources + if (accelerator?.initialAccelerator) { + if (resources.limits) { + delete resources.limits[accelerator.initialAccelerator.spec.identifier]; + } + if (resources.requests) { + delete resources.requests[accelerator.initialAccelerator.spec.identifier]; + } + } + + // Add back the new accelerator to the resources if count > 0 + if (accelerator?.accelerator && accelerator.count > 0) { + if (resources.limits) { + resources.limits[accelerator.accelerator.spec.identifier] = accelerator.count; } - if (!resources.requests) { - resources.requests = {}; + if (resources.requests) { + resources.requests[accelerator.accelerator.spec.identifier] = accelerator.count; } - resources.limits[ContainerResourceAttributes.NVIDIA_GPU] = gpus; - resources.requests[ContainerResourceAttributes.NVIDIA_GPU] = gpus; - } else { - delete resources.limits?.[ContainerResourceAttributes.NVIDIA_GPU]; - delete resources.requests?.[ContainerResourceAttributes.NVIDIA_GPU]; - affinity = { - nodeAffinity: { - preferredDuringSchedulingIgnoredDuringExecution: [ - { - preference: { - matchExpressions: [ - { - key: 'nvidia.com/gpu.present', - operator: 'NotIn', - values: ['true'], - }, - ], - }, - weight: 1, - }, - ], - }, - }; } - const tolerations = determineTolerations(gpus > 0, tolerationSettings); + const tolerations = determineTolerations(tolerationSettings, accelerator, existingTolerations); return { affinity, tolerations, resources }; }; diff --git a/frontend/src/api/models/openShift.ts b/frontend/src/api/models/openShift.ts index 543ff77a38..3d4ecf4735 100644 --- a/frontend/src/api/models/openShift.ts +++ b/frontend/src/api/models/openShift.ts @@ -55,3 +55,10 @@ export const TemplateModel: K8sModelCommon = { kind: 'Template', plural: 'templates', }; + +export const AcceleratorModel: K8sModelCommon = { + apiVersion: 'v1alpha', + apiGroup: 'dashboard.opendatahub.io', + kind: 'AcceleratorProfile', + plural: 'acceleratorprofiles', +}; diff --git a/frontend/src/components/ResourceNameTooltip.tsx b/frontend/src/components/ResourceNameTooltip.tsx index b7554f6ec2..a43f541dcb 100644 --- a/frontend/src/components/ResourceNameTooltip.tsx +++ b/frontend/src/components/ResourceNameTooltip.tsx @@ -1,17 +1,18 @@ import * as React from 'react'; import { + ClipboardCopy, DescriptionList, DescriptionListDescription, DescriptionListGroup, DescriptionListTerm, - Icon, + Popover, Stack, StackItem, - Tooltip, } from '@patternfly/react-core'; import { OutlinedQuestionCircleIcon } from '@patternfly/react-icons'; import { K8sResourceCommon } from '@openshift/dynamic-plugin-sdk-utils'; import '~/pages/notebookController/NotebookController.scss'; +import DashboardPopupIconButton from '~/concepts/dashboard/DashboardPopupIconButton'; type ResourceNameTooltipProps = { resource: K8sResourceCommon; @@ -23,10 +24,10 @@ const ResourceNameTooltip: React.FC = ({ children, res {children}{' '} {resource.metadata?.name && (
- Resource names and types are used to find your resources in OpenShift. @@ -36,7 +37,9 @@ const ResourceNameTooltip: React.FC = ({ children, res Resource name - {resource.metadata.name} + + {resource.metadata?.name} + @@ -48,10 +51,8 @@ const ResourceNameTooltip: React.FC = ({ children, res } > - - - - + } aria-label="More info" /> +
)} diff --git a/frontend/src/components/SimpleDropdownSelect.scss b/frontend/src/components/SimpleDropdownSelect.scss new file mode 100644 index 0000000000..bcb8baf49f --- /dev/null +++ b/frontend/src/components/SimpleDropdownSelect.scss @@ -0,0 +1,3 @@ +.full-width { + width: 100%; +} \ No newline at end of file diff --git a/frontend/src/components/SimpleDropdownSelect.tsx b/frontend/src/components/SimpleDropdownSelect.tsx index c1d0775549..fad00f220d 100644 --- a/frontend/src/components/SimpleDropdownSelect.tsx +++ b/frontend/src/components/SimpleDropdownSelect.tsx @@ -1,11 +1,22 @@ import * as React from 'react'; import { Dropdown, DropdownItem, DropdownToggle } from '@patternfly/react-core'; +import './SimpleDropdownSelect.scss'; + +export type SimpleDropdownOption = { + key: string; + label: React.ReactNode; + description?: React.ReactNode; + selectedLabel?: React.ReactNode; + isPlaceholder?: boolean; +}; type SimpleDropdownProps = { - options: { key: string; label: React.ReactNode }[]; + options: SimpleDropdownOption[]; value: string; placeholder?: string; - onChange: (key: string) => void; + onChange: (key: string, isPlaceholder: boolean) => void; + isFullWidth?: boolean; + isDisabled?: boolean; } & Omit, 'isOpen' | 'toggle' | 'dropdownItems' | 'onChange'>; const SimpleDropdownSelect: React.FC = ({ @@ -13,30 +24,43 @@ const SimpleDropdownSelect: React.FC = ({ options, placeholder = 'Select...', value, + isFullWidth, + isDisabled, ...props }) => { const [open, setOpen] = React.useState(false); + const selectedOption = options.find(({ key }) => key === value); + const selectedLabel = selectedOption?.selectedLabel ?? selectedOption?.label ?? placeholder; + return ( setOpen(!open)}> - <>{options.find(({ key }) => key === value)?.label ?? placeholder} + setOpen(!open)} + > + <>{selectedLabel} } - dropdownItems={options.map(({ key, label }) => ( - { - onChange(key); - setOpen(false); - }} - > - {label} - - ))} + dropdownItems={options + .sort((a, b) => (a.isPlaceholder === b.isPlaceholder ? 0 : a.isPlaceholder ? -1 : 1)) + .map(({ key, label, description, isPlaceholder }) => ( + { + onChange(key, !!isPlaceholder); + setOpen(false); + }} + > + {label} + + ))} /> ); }; diff --git a/frontend/src/concepts/dashboard/DashboardPopupIconButton.tsx b/frontend/src/concepts/dashboard/DashboardPopupIconButton.tsx new file mode 100644 index 0000000000..461e2a40cb --- /dev/null +++ b/frontend/src/concepts/dashboard/DashboardPopupIconButton.tsx @@ -0,0 +1,19 @@ +import React from 'react'; +import { Button, ButtonProps, Icon } from '@patternfly/react-core'; + +type DashboardPopupIconButtonProps = Omit & { + icon: React.ReactNode; +}; + +/** + * Overriding PF's button styles to allow for a11y in opening tooltips or popovers on a single item + */ +const DashboardPopupIconButton = ({ icon, ...props }: DashboardPopupIconButtonProps) => ( + +); + +export default DashboardPopupIconButton; diff --git a/frontend/src/k8sTypes.ts b/frontend/src/k8sTypes.ts index 2795ef2eed..d64f49ca57 100644 --- a/frontend/src/k8sTypes.ts +++ b/frontend/src/k8sTypes.ts @@ -44,7 +44,10 @@ type DisplayNameAnnotations = Partial<{ export type K8sDSGResource = K8sResourceCommon & { metadata: { - annotations?: DisplayNameAnnotations; + annotations?: DisplayNameAnnotations & + Partial<{ + 'opendatahub.io/recommended-accelerators': string; + }>; name: string; }; }; @@ -70,6 +73,7 @@ export type NotebookAnnotations = Partial<{ 'opendatahub.io/username': string; // the untranslated username behind the notebook 'notebooks.opendatahub.io/last-image-selection': string; // the last image they selected 'notebooks.opendatahub.io/last-size-selection': string; // the last notebook size they selected + 'opendatahub.io/accelerator-name': string; // the accelerator attached to the notebook }>; export type DashboardLabels = { @@ -92,6 +96,8 @@ export type ServingRuntimeAnnotations = Partial<{ 'opendatahub.io/template-name': string; 'opendatahub.io/template-display-name': string; 'opendatahub.io/disable-gpu': string; + 'opendatahub.io/recommended-accelerators': string; + 'opendatahub.io/accelerator-name': string; 'enable-route': string; 'enable-auth': string; }>; @@ -310,6 +316,15 @@ export type ServiceAccountKind = K8sResourceCommon & { }[]; }; +export type ServingContainer = { + args: string[]; + image: string; + name: string; + affinity?: PodAffinity; + resources: ContainerResources; + volumeMounts?: VolumeMount[]; +}; + export type ServingRuntimeKind = K8sResourceCommon & { metadata: { annotations?: DisplayNameAnnotations & ServingRuntimeAnnotations; @@ -323,15 +338,10 @@ export type ServingRuntimeKind = K8sResourceCommon & { memBufferBytes?: number; modelLoadingTimeoutMillis?: number; }; - containers: { - args: string[]; - image: string; - name: string; - resources: ContainerResources; - volumeMounts?: VolumeMount[]; - }[]; + containers: ServingContainer[]; supportedModelFormats: SupportedModelFormats[]; replicas: number; + tolerations?: PodToleration[]; volumes?: Volume[]; }; }; @@ -732,3 +742,19 @@ export type DashboardConfigKind = K8sResourceCommon & { templateDisablement?: string[]; }; }; + +export type AcceleratorKind = K8sResourceCommon & { + metadata: { + name: string; + annotations?: Partial<{ + 'opendatahub.io/modified-date': string; + }>; + }; + spec: { + displayName: string; + enabled: boolean; + identifier: string; + description?: string; + tolerations?: PodToleration[]; + }; +}; diff --git a/frontend/src/pages/BYONImages/BYONImagesTable.tsx b/frontend/src/pages/BYONImages/BYONImagesTable.tsx index fc0f828454..a17928dace 100644 --- a/frontend/src/pages/BYONImages/BYONImagesTable.tsx +++ b/frontend/src/pages/BYONImages/BYONImagesTable.tsx @@ -35,6 +35,8 @@ import { BYONImage } from '~/types'; import { relativeTime } from '~/utilities/time'; import { updateBYONImage } from '~/services/imagesService'; import ImageErrorStatus from '~/pages/BYONImages/ImageErrorStatus'; +import ResourceNameTooltip from '~/components/ResourceNameTooltip'; +import { convertBYONImageToK8sResource } from '~/pages/projects/screens/spawner/spawnerUtils'; import { ImportImageModal } from './ImportImageModal'; import { DeleteImageModal } from './DeleteBYONImageModal'; import { UpdateImageModal } from './UpdateImageModal'; @@ -330,7 +332,11 @@ export const BYONImagesTable: React.FC = ({ images, forceU spaceItems={{ default: 'spaceItemsSm' }} alignItems={{ default: 'alignItemsCenter' }} > - {image.name} + + + {image.name} + + diff --git a/frontend/src/pages/modelServing/screens/projects/ServingRuntimeDetails.tsx b/frontend/src/pages/modelServing/screens/projects/ServingRuntimeDetails.tsx index f50ab5d8ce..53d3099e33 100644 --- a/frontend/src/pages/modelServing/screens/projects/ServingRuntimeDetails.tsx +++ b/frontend/src/pages/modelServing/screens/projects/ServingRuntimeDetails.tsx @@ -8,10 +8,10 @@ import { List, ListItem, } from '@patternfly/react-core'; -import { ServingRuntimeKind } from '~/k8sTypes'; import { AppContext } from '~/app/AppContext'; -import { ContainerResourceAttributes } from '~/types'; +import { ServingRuntimeKind } from '~/k8sTypes'; import { getServingRuntimeSizes } from './utils'; +import useServingAccelerator from './useServingAccelerator'; type ServingRuntimeDetailsProps = { obj: ServingRuntimeKind; @@ -22,6 +22,7 @@ const ServingRuntimeDetails: React.FC = ({ obj }) => const container = obj.spec.containers[0]; // can we assume the first container? const sizes = getServingRuntimeSizes(dashboardConfig); const size = sizes.find((size) => _.isEqual(size.resources, container.resources)); + const [accelerator] = useServingAccelerator(obj); return ( @@ -44,11 +45,21 @@ const ServingRuntimeDetails: React.FC = ({ obj }) => - Number of GPUs + Accelerator - {container.resources.limits?.[ContainerResourceAttributes.NVIDIA_GPU] || 0} + {accelerator.accelerator + ? accelerator.accelerator.spec.displayName + : accelerator.useExisting + ? 'Unknown' + : 'None'} + {!accelerator.useExisting && ( + + Number of accelerators + {accelerator.count} + + )} ); }; diff --git a/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ManageServingRuntimeModal.tsx b/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ManageServingRuntimeModal.tsx index a9ae72648b..ec99407ebf 100644 --- a/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ManageServingRuntimeModal.tsx +++ b/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ManageServingRuntimeModal.tsx @@ -36,6 +36,7 @@ import { import useCustomServingRuntimesEnabled from '~/pages/modelServing/customServingRuntimes/useCustomServingRuntimesEnabled'; import { getServingRuntimeFromName } from '~/pages/modelServing/customServingRuntimes/utils'; import { translateDisplayNameForK8s } from '~/pages/projects/utils'; +import useServingAccelerator from '~/pages/modelServing/screens/projects/useServingAccelerator'; import ServingRuntimeReplicaSection from './ServingRuntimeReplicaSection'; import ServingRuntimeSizeSection from './ServingRuntimeSizeSection'; import ServingRuntimeTokenSection from './ServingRuntimeTokenSection'; @@ -69,6 +70,9 @@ const ManageServingRuntimeModal: React.FC = ({ editInfo, }) => { const [createData, setCreateData, resetData, sizes] = useCreateServingRuntimeObject(editInfo); + const [acceleratorState, setAcceleratorState, resetAcceleratorData] = useServingAccelerator( + editInfo?.servingRuntime, + ); const [actionInProgress, setActionInProgress] = React.useState(false); const [error, setError] = React.useState(); @@ -106,6 +110,7 @@ const ManageServingRuntimeModal: React.FC = ({ setError(undefined); setActionInProgress(false); resetData(); + resetAcceleratorData(); }; const setErrorModal = (error: Error) => { @@ -127,33 +132,39 @@ const ManageServingRuntimeModal: React.FC = ({ } const servingRuntimeData = { ...createData, - gpus: isGpuDisabled(servingRuntimeSelected) ? 0 : createData.gpus, + existingTolerations: servingRuntimeSelected.spec.tolerations || [], }; const servingRuntimeName = translateDisplayNameForK8s(servingRuntimeData.name); const createRolebinding = servingRuntimeData.tokenAuth && allowCreate; + const accelerator = isGpuDisabled(servingRuntimeSelected) + ? { count: 0, accelerators: [], useExisting: false } + : acceleratorState; + Promise.all([ ...(editInfo?.servingRuntime ? [ - updateServingRuntime( - servingRuntimeData, - editInfo?.servingRuntime, - customServingRuntimesEnabled, - { + updateServingRuntime({ + data: servingRuntimeData, + existingData: editInfo?.servingRuntime, + isCustomServingRuntimesEnabled: customServingRuntimesEnabled, + opts: { dryRun: true, }, - ), + acceleratorState: accelerator, + }), ] : [ - createServingRuntime( - servingRuntimeData, + createServingRuntime({ + data: servingRuntimeData, namespace, - servingRuntimeSelected, - customServingRuntimesEnabled, - { + servingRuntime: servingRuntimeSelected, + isCustomServingRuntimesEnabled: customServingRuntimesEnabled, + opts: { dryRun: true, }, - ), + acceleratorState: accelerator, + }), ]), setUpTokenAuth( servingRuntimeData, @@ -173,19 +184,22 @@ const ManageServingRuntimeModal: React.FC = ({ : []), ...(editInfo?.servingRuntime ? [ - updateServingRuntime( - servingRuntimeData, - editInfo?.servingRuntime, - customServingRuntimesEnabled, - ), + updateServingRuntime({ + data: servingRuntimeData, + existingData: editInfo?.servingRuntime, + isCustomServingRuntimesEnabled: customServingRuntimesEnabled, + + acceleratorState: accelerator, + }), ] : [ - createServingRuntime( - servingRuntimeData, + createServingRuntime({ + data: servingRuntimeData, namespace, - servingRuntimeSelected, - customServingRuntimesEnabled, - ), + servingRuntime: servingRuntimeSelected, + isCustomServingRuntimesEnabled: customServingRuntimesEnabled, + acceleratorState: accelerator, + }), ]), setUpTokenAuth( servingRuntimeData, @@ -244,6 +258,7 @@ const ManageServingRuntimeModal: React.FC = ({ setData={setCreateData} templates={servingRuntimeTemplates || []} isEditing={!!editInfo} + acceleratorState={acceleratorState} /> @@ -254,6 +269,8 @@ const ManageServingRuntimeModal: React.FC = ({ setData={setCreateData} sizes={sizes} servingRuntimeSelected={servingRuntimeSelected} + acceleratorState={acceleratorState} + setAcceleratorState={setAcceleratorState} /> diff --git a/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection.tsx b/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection.tsx index bd04dad8d4..54079330e2 100644 --- a/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection.tsx +++ b/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection.tsx @@ -2,7 +2,6 @@ import * as React from 'react'; import { FormGroup, FormSection, - NumberInput, Select, SelectOption, Stack, @@ -13,9 +12,11 @@ import { CreatingServingRuntimeObject, ServingRuntimeSize, } from '~/pages/modelServing/screens/types'; -import useGPUSetting from '~/pages/notebookController/screens/server/useGPUSetting'; import { ServingRuntimeKind } from '~/k8sTypes'; import { isGpuDisabled } from '~/pages/modelServing/screens/projects/utils'; +import AcceleratorSelectField from '~/pages/notebookController/screens/server/AcceleratorSelectField'; +import { getCompatibleAcceleratorIdentifiers } from '~/pages/projects/screens/spawner/spawnerUtils'; +import { AcceleratorState } from '~/utilities/useAcceleratorState'; import ServingRuntimeSizeExpandedField from './ServingRuntimeSizeExpandedField'; type ServingRuntimeSizeSectionProps = { @@ -23,6 +24,8 @@ type ServingRuntimeSizeSectionProps = { setData: UpdateObjectAtPropAndValue; sizes: ServingRuntimeSize[]; servingRuntimeSelected?: ServingRuntimeKind; + acceleratorState: AcceleratorState; + setAcceleratorState: UpdateObjectAtPropAndValue; }; const ServingRuntimeSizeSection: React.FC = ({ @@ -30,9 +33,19 @@ const ServingRuntimeSizeSection: React.FC = ({ setData, sizes, servingRuntimeSelected, + acceleratorState, + setAcceleratorState, }) => { const [sizeDropdownOpen, setSizeDropdownOpen] = React.useState(false); - const { available: gpuAvailable, count: gpuCount } = useGPUSetting('autodetect'); + const [supportedAccelerators, setSupportedAccelerators] = React.useState(); + + React.useEffect(() => { + if (servingRuntimeSelected) { + setSupportedAccelerators(getCompatibleAcceleratorIdentifiers(servingRuntimeSelected)); + } else { + setSupportedAccelerators(undefined); + } + }, [servingRuntimeSelected]); const gpuDisabled = servingRuntimeSelected ? isGpuDisabled(servingRuntimeSelected) : false; @@ -88,25 +101,13 @@ const ServingRuntimeSizeSection: React.FC = ({ )} - {gpuAvailable && !gpuDisabled && ( - - ) => { - const target = event.currentTarget; - setData('gpus', parseInt(target.value) || 0); - }} - onBlur={(event: React.FormEvent) => { - const target = event.currentTarget; - const gpuInput = parseInt(target.value) || 0; - setData('gpus', Math.max(0, Math.min(gpuCount, gpuInput))); - }} - onMinus={() => setData('gpus', data.gpus - 1)} - onPlus={() => setData('gpus', data.gpus + 1)} + {!gpuDisabled && ( + + )} diff --git a/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeTemplateSection.tsx b/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeTemplateSection.tsx index 0b8f9ddd46..2cee7c6af1 100644 --- a/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeTemplateSection.tsx +++ b/frontend/src/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeTemplateSection.tsx @@ -1,5 +1,5 @@ import * as React from 'react'; -import { FormGroup, Select, SelectOption, StackItem, TextInput } from '@patternfly/react-core'; +import { FormGroup, Label, Split, SplitItem, StackItem, TextInput } from '@patternfly/react-core'; import { UpdateObjectAtPropAndValue } from '~/pages/projects/types'; import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types'; import { TemplateKind } from '~/k8sTypes'; @@ -7,12 +7,16 @@ import { getServingRuntimeDisplayNameFromTemplate, getServingRuntimeNameFromTemplate, } from '~/pages/modelServing/customServingRuntimes/utils'; +import { isCompatibleWithAccelerator } from '~/pages/projects/screens/spawner/spawnerUtils'; +import SimpleDropdownSelect from '~/components/SimpleDropdownSelect'; +import { AcceleratorState } from '~/utilities/useAcceleratorState'; type ServingRuntimeTemplateSectionProps = { data: CreatingServingRuntimeObject; setData: UpdateObjectAtPropAndValue; templates: TemplateKind[]; isEditing?: boolean; + acceleratorState: AcceleratorState; }; const ServingRuntimeTemplateSection: React.FC = ({ @@ -20,17 +24,24 @@ const ServingRuntimeTemplateSection: React.FC { - const [isOpen, setOpen] = React.useState(false); - - const options = templates.map((template) => ( - - {getServingRuntimeDisplayNameFromTemplate(template)} - - )); + const options = templates.map((template) => ({ + key: getServingRuntimeNameFromTemplate(template), + selectedLabel: getServingRuntimeDisplayNameFromTemplate(template), + label: ( + + {getServingRuntimeDisplayNameFromTemplate(template)} + + + {isCompatibleWithAccelerator( + acceleratorState.accelerator?.spec.identifier, + template.objects[0], + ) && } + + + ), + })); return ( <> @@ -46,22 +57,20 @@ const ServingRuntimeTemplateSection: React.FC - + id="serving-runtime-template-selection" + aria-label="Select a template" + options={options} + placeholder={ + isEditing || templates.length === 0 ? data.servingRuntimeTemplateName : 'Select one' + } + value={data.servingRuntimeTemplateName ?? ''} + onChange={(name) => { + setData('servingRuntimeTemplateName', name); + }} + /> diff --git a/frontend/src/pages/modelServing/screens/projects/useServingAccelerator.ts b/frontend/src/pages/modelServing/screens/projects/useServingAccelerator.ts new file mode 100644 index 0000000000..fa20a86e55 --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/useServingAccelerator.ts @@ -0,0 +1,15 @@ +import { ServingRuntimeKind } from '~/k8sTypes'; +import useAcceleratorState, { AcceleratorState } from '~/utilities/useAcceleratorState'; +import { GenericObjectState } from '~/utilities/useGenericObjectState'; + +const useServingAccelerator = ( + servingRuntime?: ServingRuntimeKind | null, +): GenericObjectState => { + const acceleratorName = servingRuntime?.metadata.annotations?.['opendatahub.io/accelerator-name']; + const resources = servingRuntime?.spec.containers[0].resources; + const tolerations = servingRuntime?.spec.tolerations; + + return useAcceleratorState(resources, tolerations, acceleratorName); +}; + +export default useServingAccelerator; diff --git a/frontend/src/pages/modelServing/screens/projects/utils.ts b/frontend/src/pages/modelServing/screens/projects/utils.ts index 616789f8c1..a800389c3e 100644 --- a/frontend/src/pages/modelServing/screens/projects/utils.ts +++ b/frontend/src/pages/modelServing/screens/projects/utils.ts @@ -8,7 +8,7 @@ import { InferenceServiceStorageType, ServingRuntimeSize, } from '~/pages/modelServing/screens/types'; -import { ContainerResourceAttributes, DashboardConfig } from '~/types'; +import { DashboardConfig } from '~/types'; import { DEFAULT_MODEL_SERVER_SIZES } from '~/pages/modelServing/screens/const'; import { useAppContext } from '~/app/AppContext'; import { useDeepCompareMemoize } from '~/utilities/useDeepCompareMemoize'; @@ -61,7 +61,6 @@ export const useCreateServingRuntimeObject = (existingData?: { servingRuntimeTemplateName: '', numReplicas: 1, modelSize: sizes[0], - gpus: 0, externalRoute: false, tokenAuth: false, tokens: [], @@ -82,11 +81,6 @@ export const useCreateServingRuntimeObject = (existingData?: { const existingResources = existingData?.servingRuntime?.spec?.containers[0]?.resources || sizes[0].resources; - const existingGpus = - existingData?.servingRuntime?.spec?.containers[0]?.resources?.requests?.[ - ContainerResourceAttributes.NVIDIA_GPU - ] || 0; - const existingExternalRoute = existingData?.servingRuntime?.metadata.annotations?.['enable-route'] === 'true'; const existingTokenAuth = @@ -118,10 +112,6 @@ export const useCreateServingRuntimeObject = (existingData?: { resources: existingResources, }, ); - setCreateData( - 'gpus', - typeof existingGpus == 'string' ? parseInt(existingGpus) : existingGpus, - ); setCreateData('externalRoute', existingExternalRoute); setCreateData('tokenAuth', existingTokenAuth); setCreateData('tokens', existingTokens); @@ -131,7 +121,6 @@ export const useCreateServingRuntimeObject = (existingData?: { existingServingRuntimeTemplateName, existingNumReplicas, existingResources, - existingGpus, existingExternalRoute, existingTokenAuth, existingTokens, diff --git a/frontend/src/pages/modelServing/screens/types.ts b/frontend/src/pages/modelServing/screens/types.ts index 7b66c456c5..05b720e68d 100644 --- a/frontend/src/pages/modelServing/screens/types.ts +++ b/frontend/src/pages/modelServing/screens/types.ts @@ -34,7 +34,6 @@ export type CreatingServingRuntimeObject = { servingRuntimeTemplateName: string; numReplicas: number; modelSize: ServingRuntimeSize; - gpus: number; externalRoute: boolean; tokenAuth: boolean; tokens: ServingRuntimeToken[]; diff --git a/frontend/src/pages/notebookController/screens/server/AcceleratorSelectField.tsx b/frontend/src/pages/notebookController/screens/server/AcceleratorSelectField.tsx new file mode 100644 index 0000000000..36096a998b --- /dev/null +++ b/frontend/src/pages/notebookController/screens/server/AcceleratorSelectField.tsx @@ -0,0 +1,224 @@ +import * as React from 'react'; +import { + Alert, + AlertVariant, + FormGroup, + InputGroup, + Label, + NumberInput, + Split, + SplitItem, + Stack, + StackItem, +} from '@patternfly/react-core'; +import { isHTMLInputElement } from '~/utilities/utils'; +import { AcceleratorKind } from '~/k8sTypes'; +import SimpleDropdownSelect, { SimpleDropdownOption } from '~/components/SimpleDropdownSelect'; +import { UpdateObjectAtPropAndValue } from '~/pages/projects/types'; +import { AcceleratorState } from '~/utilities/useAcceleratorState'; +import useAcceleratorCounts from './useAcceleratorCounts'; + +type AcceleratorSelectFieldProps = { + acceleratorState: AcceleratorState; + setAcceleratorState: UpdateObjectAtPropAndValue; + supportedAccelerators?: string[]; + resourceDisplayName?: string; +}; + +const AcceleratorSelectField: React.FC = ({ + acceleratorState, + setAcceleratorState, + supportedAccelerators, + resourceDisplayName = 'image', +}) => { + const [detectedAcceleratorInfo] = useAcceleratorCounts(); + + const { + accelerator, + count: acceleratorCount, + accelerators, + useExisting, + additionalOptions, + } = acceleratorState; + + const generateAcceleratorCountWarning = (newSize: number) => { + if (!accelerator) { + return ''; + } + + const identifier = accelerator?.spec.identifier; + + const detectedAcceleratorCount = Object.entries(detectedAcceleratorInfo.available).find( + ([id]) => identifier === id, + )?.[1]; + + if (detectedAcceleratorCount === undefined) { + return `No accelerator detected with the identifier ${identifier}.`; + } else if (newSize > detectedAcceleratorCount) { + return `Only ${detectedAcceleratorCount} accelerator${ + detectedAcceleratorCount > 1 ? 's' : '' + } detected.`; + } + + return ''; + }; + + const acceleratorCountWarning = generateAcceleratorCountWarning(acceleratorCount); + + const isAcceleratorSupported = (accelerator: AcceleratorKind) => + supportedAccelerators?.includes(accelerator.spec.identifier); + + const enabledAccelerators = accelerators.filter((ac) => ac.spec.enabled); + + const formatOption = (ac: AcceleratorKind): SimpleDropdownOption => { + const displayName = `${ac.spec.displayName}${!ac.spec.enabled ? ' (disabled)' : ''}`; + + return { + key: ac.metadata.name, + selectedLabel: displayName, + description: ac.spec.description, + label: ( + + {displayName} + + + {isAcceleratorSupported(ac) && ( + + )} + + + ), + }; + }; + + const options: SimpleDropdownOption[] = enabledAccelerators + .sort((a, b) => { + const aSupported = isAcceleratorSupported(a); + const bSupported = isAcceleratorSupported(b); + if (aSupported && !bSupported) { + return -1; + } + if (!aSupported && bSupported) { + return 1; + } + return 0; + }) + .map((ac) => formatOption(ac)); + + let acceleratorAlertMessage: { title: string; variant: AlertVariant } | null = null; + if (accelerator && supportedAccelerators !== undefined) { + if (supportedAccelerators?.length === 0) { + acceleratorAlertMessage = { + title: `The ${resourceDisplayName} you have selected doesn't support the selected accelerator. It is recommended to use a compatible ${resourceDisplayName} for optimal performance.`, + variant: AlertVariant.info, + }; + } else if (!isAcceleratorSupported(accelerator)) { + acceleratorAlertMessage = { + title: `The ${resourceDisplayName} you have selected is not compatible with the selected accelerator`, + variant: AlertVariant.warning, + }; + } + } + + // add none option + options.push({ + key: '', + label: 'None', + isPlaceholder: true, + }); + + if (additionalOptions?.useExisting) { + options.push({ + key: 'use-existing', + label: 'Existing settings', + description: 'Use the existing accelerator settings from the notebook server', + }); + } else if (additionalOptions?.useDisabled) { + options.push(formatOption(additionalOptions?.useDisabled)); + } + + const onStep = (step: number) => { + setAcceleratorState('count', Math.max(acceleratorCount + step, 1)); + }; + + // if there is more than a none option, show the dropdown + if (options.length === 1) { + return null; + } + + return ( + + + + { + if (isPlaceholder) { + // none + setAcceleratorState('useExisting', false); + setAcceleratorState('accelerator', undefined); + setAcceleratorState('count', 0); + } else if (key === 'use-existing') { + // use existing settings + setAcceleratorState('useExisting', true); + setAcceleratorState('accelerator', undefined); + setAcceleratorState('count', 0); + } else { + // normal flow + setAcceleratorState('count', 1); + setAcceleratorState('useExisting', false); + setAcceleratorState( + 'accelerator', + accelerators.find((ac) => ac.metadata.name === key), + ); + } + }} + /> + + + {acceleratorAlertMessage && ( + + + + )} + {accelerator && ( + + + + onStep(1)} + onMinus={() => onStep(-1)} + onChange={(event) => { + if (isHTMLInputElement(event.target)) { + const newSize = Number(event.target.value); + setAcceleratorState('count', Math.max(newSize, 1)); + } + }} + /> + + + + )} + {acceleratorCountWarning && ( + + + + )} + + ); +}; + +export default AcceleratorSelectField; diff --git a/frontend/src/pages/notebookController/screens/server/NotebookServerDetails.tsx b/frontend/src/pages/notebookController/screens/server/NotebookServerDetails.tsx index e111770c84..edd6ebc690 100644 --- a/frontend/src/pages/notebookController/screens/server/NotebookServerDetails.tsx +++ b/frontend/src/pages/notebookController/screens/server/NotebookServerDetails.tsx @@ -16,11 +16,11 @@ import { getDescriptionForTag, getImageTagByContainer, getNameVersionString, - getNumGpus, } from '~/utilities/imageUtils'; import { useAppContext } from '~/app/AppContext'; import { useWatchImages } from '~/utilities/useWatchImages'; import { NotebookControllerContext } from '~/pages/notebookController/NotebookControllerContext'; +import useNotebookAccelerator from '~/pages/projects/screens/detail/notebooks/useNotebookAccelerator'; import { getNotebookSizes } from './usePreferredNotebookSize'; const NotebookServerDetails: React.FC = () => { @@ -28,6 +28,7 @@ const NotebookServerDetails: React.FC = () => { const { images, loaded } = useWatchImages(); const [isExpanded, setExpanded] = React.useState(false); const { dashboardConfig } = useAppContext(); + const [accelerator] = useNotebookAccelerator(notebook); const container: NotebookContainer | undefined = notebook?.spec.template.spec.containers.find( (container) => container.name === notebook.metadata.name, @@ -45,7 +46,6 @@ const NotebookServerDetails: React.FC = () => { const tagSoftware = getDescriptionForTag(tag); const tagDependencies = tag?.content.dependencies ?? []; - const numGpus = getNumGpus(container); const sizes = getNotebookSizes(dashboardConfig); const size = sizes.find((size) => _.isEqual(size.resources.limits, container.resources?.limits)); @@ -106,9 +106,21 @@ const NotebookServerDetails: React.FC = () => { {`${container.resources?.requests?.cpu} CPU, ${container.resources?.requests?.memory} Memory`} - Number of GPUs - {numGpus} + Accelerator + + {accelerator.accelerator + ? accelerator.accelerator.spec.displayName + : accelerator.useExisting + ? 'Unknown' + : 'None'} + + {!accelerator.useExisting && ( + + Number of accelerators + {accelerator.count} + + )} ); diff --git a/frontend/src/pages/notebookController/screens/server/SpawnerPage.tsx b/frontend/src/pages/notebookController/screens/server/SpawnerPage.tsx index 71a8e6ade5..a439c42d49 100644 --- a/frontend/src/pages/notebookController/screens/server/SpawnerPage.tsx +++ b/frontend/src/pages/notebookController/screens/server/SpawnerPage.tsx @@ -39,7 +39,7 @@ import ImpersonateAlert from '~/pages/notebookController/screens/admin/Impersona import useNamespaces from '~/pages/notebookController/useNamespaces'; import { fireTrackingEvent } from '~/utilities/segmentIOUtils'; import { getEnvConfigMap, getEnvSecret } from '~/services/envService'; -import GPUSelectField from './GPUSelectField'; +import useNotebookAccelerator from '~/pages/projects/screens/detail/notebooks/useNotebookAccelerator'; import SizeSelectField from './SizeSelectField'; import useSpawnerNotebookModalState from './useSpawnerNotebookModalState'; import BrowserTabPreferenceCheckbox from './BrowserTabPreferenceCheckbox'; @@ -49,6 +49,7 @@ import { usePreferredNotebookSize } from './usePreferredNotebookSize'; import StartServerModal from './StartServerModal'; import '~/pages/notebookController/NotebookController.scss'; +import AcceleratorSelectField from './AcceleratorSelectField'; const SpawnerPage: React.FC = () => { const navigate = useNavigate(); @@ -68,7 +69,7 @@ const SpawnerPage: React.FC = () => { tag: undefined, }); const { selectedSize, setSelectedSize, sizes } = usePreferredNotebookSize(); - const [selectedGpu, setSelectedGpu] = React.useState('0'); + const [accelerator, setAccelerator] = useNotebookAccelerator(currentUserNotebook); const [variableRows, setVariableRows] = React.useState([]); const [submitError, setSubmitError] = React.useState(null); @@ -231,7 +232,12 @@ const SpawnerPage: React.FC = () => { const fireStartServerEvent = () => { fireTrackingEvent('Notebook Server Started', { - GPU: parseInt(selectedGpu), + accelerator: accelerator.accelerator + ? `${accelerator.accelerator.spec.displayName} (${accelerator.accelerator.metadata.name}): ${accelerator.accelerator.spec.identifier}` + : accelerator.useExisting + ? 'Unknown' + : 'None', + acceleratorCount: accelerator.useExisting ? undefined : accelerator.count, lastSelectedSize: selectedSize.name, lastSelectedImage: `${selectedImageTag.image?.name}:${selectedImageTag.tag?.name}`, }); @@ -246,7 +252,7 @@ const SpawnerPage: React.FC = () => { notebookSizeName: selectedSize.name, imageName: selectedImageTag.image?.name || '', imageTagName: selectedImageTag.tag?.name || '', - gpus: parseInt(selectedGpu), + accelerator: accelerator, envVars: envVars, state: NotebookState.Started, username: impersonatedUsername || undefined, @@ -307,7 +313,10 @@ const SpawnerPage: React.FC = () => { setValue={(size) => setSelectedSize(size)} sizes={sizes} /> - setSelectedGpu(size)} /> + {renderEnvironmentVariableRows()} diff --git a/frontend/src/pages/notebookController/screens/server/useAcceleratorCounts.ts b/frontend/src/pages/notebookController/screens/server/useAcceleratorCounts.ts new file mode 100644 index 0000000000..1b5c879327 --- /dev/null +++ b/frontend/src/pages/notebookController/screens/server/useAcceleratorCounts.ts @@ -0,0 +1,13 @@ +import useFetchState, { FetchState } from '~/utilities/useFetchState'; +import { getAcceleratorCounts } from '~/services/acceleratorService'; +import { AcceleratorInfo } from '~/types'; + +const useAcceleratorCounts = (): FetchState => + useFetchState(getAcceleratorCounts, { + available: {}, + total: {}, + allocated: {}, + configured: false, + }); + +export default useAcceleratorCounts; diff --git a/frontend/src/pages/notebookController/screens/server/useAccelerators.ts b/frontend/src/pages/notebookController/screens/server/useAccelerators.ts new file mode 100644 index 0000000000..d4f9545077 --- /dev/null +++ b/frontend/src/pages/notebookController/screens/server/useAccelerators.ts @@ -0,0 +1,11 @@ +import React from 'react'; +import useFetchState, { FetchState } from '~/utilities/useFetchState'; +import { AcceleratorKind } from '~/k8sTypes'; +import { listAccelerators } from '~/api'; + +const useAccelerators = (namespace: string): FetchState => { + const getAccelerators = React.useCallback(() => listAccelerators(namespace), [namespace]); + return useFetchState(getAccelerators, []); +}; + +export default useAccelerators; diff --git a/frontend/src/pages/projects/notebook/NotebookStatusToggle.tsx b/frontend/src/pages/projects/notebook/NotebookStatusToggle.tsx index cc63f9c3a6..8554d19866 100644 --- a/frontend/src/pages/projects/notebook/NotebookStatusToggle.tsx +++ b/frontend/src/pages/projects/notebook/NotebookStatusToggle.tsx @@ -2,7 +2,7 @@ import * as React from 'react'; import { Flex, FlexItem, Switch } from '@patternfly/react-core'; import { startNotebook, stopNotebook } from '~/api'; import { fireTrackingEvent } from '~/utilities/segmentIOUtils'; -import useNotebookGPUNumber from '~/pages/projects/screens/detail/notebooks/useNotebookGPUNumber'; +import useNotebookAccelerators from '~/pages/projects/screens/detail/notebooks/useNotebookAccelerator'; import useNotebookDeploymentSize from '~/pages/projects/screens/detail/notebooks/useNotebookDeploymentSize'; import { computeNotebooksTolerations } from '~/utilities/tolerations'; import { useAppContext } from '~/app/AppContext'; @@ -25,7 +25,7 @@ const NotebookStatusToggle: React.FC = ({ enablePipelines, }) => { const { notebook, isStarting, isRunning, isStopping, refresh } = notebookState; - const gpuNumber = useNotebookGPUNumber(notebook); + const [acceleratorData] = useNotebookAccelerators(notebook); const { size } = useNotebookDeploymentSize(notebook); const [isOpenConfirm, setOpenConfirm] = React.useState(false); const [inProgress, setInProgress] = React.useState(false); @@ -51,7 +51,12 @@ const NotebookStatusToggle: React.FC = ({ const fireNotebookTrackingEvent = React.useCallback( (action: 'started' | 'stopped') => { fireTrackingEvent(`Workbench ${action}`, { - GPU: gpuNumber, + acceleratorCount: acceleratorData.useExisting ? undefined : acceleratorData.count, + accelerator: acceleratorData.accelerator + ? `${acceleratorData.accelerator.spec.displayName} (${acceleratorData.accelerator.metadata.name}): ${acceleratorData.accelerator.spec.identifier}` + : acceleratorData.useExisting + ? 'Unknown' + : 'None', lastSelectedSize: size?.name || notebook.metadata.annotations?.['notebooks.opendatahub.io/last-size-selection'], @@ -64,7 +69,7 @@ const NotebookStatusToggle: React.FC = ({ }), }); }, - [gpuNumber, notebook, size], + [acceleratorData, notebook, size], ); const handleStop = React.useCallback(() => { diff --git a/frontend/src/pages/projects/screens/detail/notebooks/useNotebookAccelerator.ts b/frontend/src/pages/projects/screens/detail/notebooks/useNotebookAccelerator.ts new file mode 100644 index 0000000000..cd01955ad2 --- /dev/null +++ b/frontend/src/pages/projects/screens/detail/notebooks/useNotebookAccelerator.ts @@ -0,0 +1,18 @@ +import { NotebookKind } from '~/k8sTypes'; +import { Notebook } from '~/types'; +import useAcceleratorState, { AcceleratorState } from '~/utilities/useAcceleratorState'; +import { GenericObjectState } from '~/utilities/useGenericObjectState'; + +const useNotebookAccelerator = ( + notebook?: NotebookKind | Notebook | null, +): GenericObjectState => { + const acceleratorName = notebook?.metadata.annotations?.['opendatahub.io/accelerator-name']; + const resources = notebook?.spec.template.spec.containers.find( + (container) => container.name === notebook.metadata.name, + )?.resources; + const tolerations = notebook?.spec.template.spec.tolerations; + + return useAcceleratorState(resources, tolerations, acceleratorName); +}; + +export default useNotebookAccelerator; diff --git a/frontend/src/pages/projects/screens/detail/notebooks/useNotebookGPUNumber.ts b/frontend/src/pages/projects/screens/detail/notebooks/useNotebookGPUNumber.ts deleted file mode 100644 index d980f91009..0000000000 --- a/frontend/src/pages/projects/screens/detail/notebooks/useNotebookGPUNumber.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { NotebookKind } from '~/k8sTypes'; -import { ContainerResourceAttributes, GPUCount, NotebookContainer } from '~/types'; - -const useNotebookGPUNumber = (notebook?: NotebookKind): GPUCount => { - const container: NotebookContainer | undefined = notebook?.spec.template.spec.containers.find( - (container) => container.name === notebook.metadata.name, - ); - - const gpuNumbers = container?.resources?.limits?.[ContainerResourceAttributes.NVIDIA_GPU]; - - return gpuNumbers || 0; -}; - -export default useNotebookGPUNumber; diff --git a/frontend/src/pages/projects/screens/spawner/SpawnerFooter.tsx b/frontend/src/pages/projects/screens/spawner/SpawnerFooter.tsx index fbf05a466d..ab93e4d235 100644 --- a/frontend/src/pages/projects/screens/spawner/SpawnerFooter.tsx +++ b/frontend/src/pages/projects/screens/spawner/SpawnerFooter.tsx @@ -78,9 +78,14 @@ const SpawnerFooter: React.FC = ({ ); const afterStart = (name: string, type: 'created' | 'updated') => { - const { gpus, notebookSize, image } = startNotebookData; + const { accelerator, notebookSize, image } = startNotebookData; fireTrackingEvent(`Workbench ${type}`, { - GPU: gpus, + acceleratorCount: accelerator.useExisting ? undefined : accelerator.count, + accelerator: accelerator.accelerator + ? `${accelerator.accelerator.spec.displayName} (${accelerator.accelerator.metadata.name}): ${accelerator.accelerator.spec.identifier}` + : accelerator.useExisting + ? 'Unknown' + : 'None', lastSelectedSize: notebookSize.name, lastSelectedImage: image.imageVersion?.from ? `${image.imageVersion.from.name}` diff --git a/frontend/src/pages/projects/screens/spawner/SpawnerPage.tsx b/frontend/src/pages/projects/screens/spawner/SpawnerPage.tsx index cc12d854f3..83730ff768 100644 --- a/frontend/src/pages/projects/screens/spawner/SpawnerPage.tsx +++ b/frontend/src/pages/projects/screens/spawner/SpawnerPage.tsx @@ -21,14 +21,14 @@ import { getNotebookDisplayName, getProjectDisplayName, } from '~/pages/projects/utils'; -import GPUSelectField from '~/pages/notebookController/screens/server/GPUSelectField'; import { NotebookKind } from '~/k8sTypes'; import useNotebookImageData from '~/pages/projects/screens/detail/notebooks/useNotebookImageData'; import useNotebookDeploymentSize from '~/pages/projects/screens/detail/notebooks/useNotebookDeploymentSize'; -import useNotebookGPUNumber from '~/pages/projects/screens/detail/notebooks/useNotebookGPUNumber'; import NotebookRestartAlert from '~/pages/projects/components/NotebookRestartAlert'; import useWillNotebooksRestart from '~/pages/projects/notebook/useWillNotebooksRestart'; import CanEnableElyraPipelinesCheck from '~/concepts/pipelines/elyra/CanEnableElyraPipelinesCheck'; +import AcceleratorSelectField from '~/pages/notebookController/screens/server/AcceleratorSelectField'; +import useNotebookAccelerator from '~/pages/projects/screens/detail/notebooks/useNotebookAccelerator'; import { SpawnerPageSectionID } from './types'; import { ScrollableSelectorID, SpawnerPageSectionTitles } from './const'; import SpawnerFooter from './SpawnerFooter'; @@ -38,7 +38,11 @@ import { useNotebookSize } from './useNotebookSize'; import StorageField from './storage/StorageField'; import EnvironmentVariables from './environmentVariables/EnvironmentVariables'; import { useStorageDataObject } from './storage/utils'; -import { getRootVolumeName, useMergeDefaultPVCName } from './spawnerUtils'; +import { + getCompatibleAcceleratorIdentifiers, + getRootVolumeName, + useMergeDefaultPVCName, +} from './spawnerUtils'; import { useNotebookEnvVariables } from './environmentVariables/useNotebookEnvVariables'; import DataConnectionField from './dataConnection/DataConnectionField'; import { useNotebookDataConnection } from './dataConnection/useNotebookDataConnection'; @@ -61,7 +65,7 @@ const SpawnerPage: React.FC = ({ existingNotebook }) => { imageVersion: undefined, }); const { selectedSize, setSelectedSize, sizes } = useNotebookSize(); - const [selectedGpu, setSelectedGpu] = React.useState('0'); + const [supportedAccelerators, setSupportedAccelerators] = React.useState(); const [storageDataWithoutDefault, setStorageData] = useStorageDataObject(existingNotebook); const storageData = useMergeDefaultPVCName(storageDataWithoutDefault, nameDesc.name); const [envVariables, setEnvVariables] = useNotebookEnvVariables(existingNotebook); @@ -97,10 +101,16 @@ const SpawnerPage: React.FC = ({ existingNotebook }) => { } }, [notebookSize, setSelectedSize]); - const notebookGPU = useNotebookGPUNumber(existingNotebook); + const [notebookAcceleratorState, setNotebookAcceleratorState] = + useNotebookAccelerator(existingNotebook); + React.useEffect(() => { - setSelectedGpu(notebookGPU.toString()); - }, [notebookGPU, setSelectedGpu]); + if (selectedImage.imageStream) { + setSupportedAccelerators(getCompatibleAcceleratorIdentifiers(selectedImage.imageStream)); + } else { + setSupportedAccelerators(undefined); + } + }, [selectedImage.imageStream]); const editNotebookDisplayName = existingNotebook ? getNotebookDisplayName(existingNotebook) : ''; @@ -157,6 +167,7 @@ const SpawnerPage: React.FC = ({ existingNotebook }) => { = ({ existingNotebook }) => { setValue={setSelectedSize} value={selectedSize} /> - setSelectedGpu(value)} + = ({ existingNotebook }) => { projectName: currentProject.metadata.name, image: selectedImage, notebookSize: selectedSize, - gpus: parseInt(selectedGpu), + accelerator: notebookAcceleratorState, volumes: [], volumeMounts: [], + existingTolerations: existingNotebook?.spec.template.spec.tolerations || [], + existingResources: existingNotebook?.spec.template.spec.containers[0].resources, }} storageData={storageData} envVariables={envVariables} diff --git a/frontend/src/pages/projects/screens/spawner/imageSelector/ImageSelectorField.tsx b/frontend/src/pages/projects/screens/spawner/imageSelector/ImageSelectorField.tsx index e7f6d6bf35..883908472b 100644 --- a/frontend/src/pages/projects/screens/spawner/imageSelector/ImageSelectorField.tsx +++ b/frontend/src/pages/projects/screens/spawner/imageSelector/ImageSelectorField.tsx @@ -17,11 +17,13 @@ import ImageStreamSelector from './ImageStreamSelector'; type ImageSelectorFieldProps = { selectedImage: ImageStreamAndVersion; setSelectedImage: React.Dispatch>; + compatibleAccelerator?: string; }; const ImageSelectorField: React.FC = ({ selectedImage, setSelectedImage, + compatibleAccelerator, }) => { const { dashboardNamespace } = useDashboardNamespace(); const buildStatuses = useBuildStatuses(dashboardNamespace); @@ -69,6 +71,7 @@ const ImageSelectorField: React.FC = ({ buildStatuses={buildStatuses} onImageStreamSelect={onImageStreamSelect} selectedImageStream={selectedImage.imageStream} + compatibleAccelerator={compatibleAccelerator} /> void; + compatibleAccelerator?: string; }; const ImageStreamSelector: React.FC = ({ @@ -22,49 +24,47 @@ const ImageStreamSelector: React.FC = ({ selectedImageStream, onImageStreamSelect, buildStatuses, + compatibleAccelerator, }) => { - const [imageSelectionOpen, setImageSelectionOpen] = React.useState(false); - - const selectOptionObjects = [...imageStreams] - .sort(compareImageStreamOrder) - .map((imageStream) => getImageStreamSelectOptionObject(imageStream)); - - const options = selectOptionObjects.map((optionObject) => { - const imageStream = optionObject.imageStream; + const options = [...imageStreams].sort(compareImageStreamOrder).map((imageStream) => { const description = getRelatedVersionDescription(imageStream); - return ( - - ); + const displayName = getImageStreamDisplayName(imageStream); + + return { + key: imageStream.metadata.name, + selectedLabel: displayName, + description: description, + disabled: !checkImageStreamAvailability(imageStream, buildStatuses), + label: ( + + {displayName} + + + {isCompatibleWithAccelerator(compatibleAccelerator, imageStream) && ( + + )} + + + ), + }; }); return ( - + /> ); }; diff --git a/frontend/src/pages/projects/screens/spawner/spawnerUtils.ts b/frontend/src/pages/projects/screens/spawner/spawnerUtils.ts index a4eecb6126..453a74f9e7 100644 --- a/frontend/src/pages/projects/screens/spawner/spawnerUtils.ts +++ b/frontend/src/pages/projects/screens/spawner/spawnerUtils.ts @@ -1,7 +1,13 @@ import * as React from 'react'; import compareVersions from 'compare-versions'; -import { NotebookSize, Volume, VolumeMount } from '~/types'; -import { BuildKind, ImageStreamKind, ImageStreamSpecTagType, NotebookKind } from '~/k8sTypes'; +import { BYONImage, NotebookSize, Volume, VolumeMount } from '~/types'; +import { + BuildKind, + ImageStreamKind, + ImageStreamSpecTagType, + K8sDSGResource, + NotebookKind, +} from '~/k8sTypes'; import { ConfigMapCategory, DataConnectionData, @@ -16,7 +22,6 @@ import { ROOT_MOUNT_PATH } from '~/pages/projects/pvc/const'; import { AWS_FIELDS } from '~/pages/projects/dataConnections/const'; import { BuildStatus, - ImageStreamSelectOptionObjectType, ImageVersionDependencyType, ImageVersionSelectOptionObjectType, } from './types'; @@ -64,12 +69,6 @@ export const getNameVersionString = (software: ImageVersionDependencyType): stri * Create object for PF Select component to use * `toString` decides the text shown for the select option */ -export const getImageStreamSelectOptionObject = ( - imageStream: ImageStreamKind, -): ImageStreamSelectOptionObjectType => ({ - imageStream, - toString: () => getImageStreamDisplayName(imageStream), -}); export const getImageVersionSelectOptionObject = ( imageStream: ImageStreamKind, imageVersion: ImageStreamSpecTagType, @@ -78,15 +77,10 @@ export const getImageVersionSelectOptionObject = ( toString: () => `${imageVersion.name}${checkVersionRecommended(imageVersion) ? ' (Recommended)' : ''}`, }); -export const isImageStreamSelectOptionObject = ( - object: unknown, -): object is ImageStreamSelectOptionObjectType => - (object as ImageStreamSelectOptionObjectType).imageStream !== undefined; export const isImageVersionSelectOptionObject = ( object: unknown, ): object is ImageVersionSelectOptionObjectType => (object as ImageVersionSelectOptionObjectType).imageVersion !== undefined; - /******************* Compare utils for sorting *******************/ const getBuildNumber = (build: BuildKind): number => { const buildNumber = build.metadata.annotations?.['openshift.io/build.number'] || '-1'; @@ -140,6 +134,37 @@ export const getImageStreamDescription = (imageStream: ImageStreamKind): string export const getImageSteamOrder = (imageStream: ImageStreamKind): number => parseInt(imageStream.metadata.annotations?.[IMAGE_ANNOTATIONS.IMAGE_ORDER] || '100'); +export const getCompatibleAcceleratorIdentifiers = ( + object: ImageStreamKind | K8sDSGResource, +): string[] => { + try { + const annotation = object.metadata.annotations?.['opendatahub.io/recommended-accelerators']; + // in the format of ["foo.com/gpu", "bar.com/gpu"] + if (annotation) { + const identifiers = JSON.parse(annotation); + if (Array.isArray(identifiers)) { + return identifiers; + } + } + } catch (error) { + // catch invalid json in metadata + } + return []; +}; + +export const isCompatibleWithAccelerator = ( + acceleratorIdentifier?: string, + obj?: ImageStreamKind | K8sDSGResource, +) => { + if (!obj || !acceleratorIdentifier) { + return false; + } + + return getCompatibleAcceleratorIdentifiers(obj).some( + (accelerator) => accelerator === acceleratorIdentifier, + ); +}; + /** * Parse annotation software field or dependencies field from long string to array */ @@ -388,3 +413,14 @@ export const isInvalidBYONImageStream = (imageStream: ImageStreamKind) => { (activeTag === undefined || activeTag.items === null) ); }; + +export const convertBYONImageToK8sResource = (image: BYONImage) => ({ + kind: 'ImageStream', + apiVersion: 'image.openshift.io/v1', + metadata: { + name: image.id, + annotations: { + 'openshift.io/display-name': image.name, + }, + }, +}); diff --git a/frontend/src/pages/projects/screens/spawner/types.ts b/frontend/src/pages/projects/screens/spawner/types.ts index 9f4f8c6bc3..3aec64e612 100644 --- a/frontend/src/pages/projects/screens/spawner/types.ts +++ b/frontend/src/pages/projects/screens/spawner/types.ts @@ -1,4 +1,4 @@ -import { BUILD_PHASE, ImageStreamKind, ImageStreamSpecTagType } from '~/k8sTypes'; +import { AcceleratorKind, BUILD_PHASE, ImageStreamKind, ImageStreamSpecTagType } from '~/k8sTypes'; export enum SpawnerPageSectionID { NAME_DESCRIPTION = 'name-and-description', @@ -40,3 +40,8 @@ export type ImageVersionSelectDataType = { imageStream?: ImageStreamKind; imageVersions: ImageStreamSpecTagType[]; }; + +export type AcceleratorSelectOptionObjectType = { + accelerator: AcceleratorKind; + toString: () => string; +}; diff --git a/frontend/src/pages/projects/types.ts b/frontend/src/pages/projects/types.ts index 89de7934e3..870eee7658 100644 --- a/frontend/src/pages/projects/types.ts +++ b/frontend/src/pages/projects/types.ts @@ -1,12 +1,15 @@ import { + ContainerResources, ImageStreamAndVersion, NotebookSize, + PodToleration, TolerationSettings, Volume, VolumeMount, } from '~/types'; import { ValueOf } from '~/typeHelpers'; import { AWSSecretKind } from '~/k8sTypes'; +import { AcceleratorState } from '~/utilities/useAcceleratorState'; import { AWS_KEYS } from './dataConnections/const'; export type UpdateObjectAtPropAndValue = (propKey: keyof T, propValue: ValueOf) => void; @@ -60,11 +63,13 @@ export type StartNotebookData = { projectName: string; notebookName: string; notebookSize: NotebookSize; - gpus: number; + accelerator: AcceleratorState; image: ImageStreamAndVersion; volumes?: Volume[]; volumeMounts?: VolumeMount[]; tolerationSettings?: TolerationSettings; + existingTolerations?: PodToleration[]; + existingResources?: ContainerResources; envFrom?: EnvironmentFromVariable[]; description?: string; /** An override for the assembleNotebook so it doesn't regen an id */ diff --git a/frontend/src/services/acceleratorService.ts b/frontend/src/services/acceleratorService.ts new file mode 100644 index 0000000000..7e7908dbb9 --- /dev/null +++ b/frontend/src/services/acceleratorService.ts @@ -0,0 +1,12 @@ +import axios from 'axios'; +import { AcceleratorInfo } from '~/types'; + +export const getAcceleratorCounts = (): Promise => { + const url = '/api/accelerators'; + return axios + .get(url) + .then((response) => response.data) + .catch((e) => { + throw new Error(e.response.data.message); + }); +}; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index d44dd5e0e5..48e8be0f8e 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -6,6 +6,7 @@ import { ServingRuntimeSize } from '~/pages/modelServing/screens/types'; import { EnvironmentFromVariable } from '~/pages/projects/types'; import { ImageStreamKind, ImageStreamSpecTagType } from './k8sTypes'; import { EitherNotBoth } from './typeHelpers'; +import { AcceleratorState } from './utilities/useAcceleratorState'; export type PrometheusQueryResponse = { data: { @@ -104,24 +105,22 @@ export type NotebookControllerUserState = { * OdhDashboardConfig contains gpuSetting as a string value override -- proper gpus return as numbers * TODO: Look to make it just number by properly parsing the value */ -export type GPUCount = string | number; export enum ContainerResourceAttributes { CPU = 'cpu', MEMORY = 'memory', - NVIDIA_GPU = 'nvidia.com/gpu', } export type ContainerResources = { requests?: { + [key: string]: number | string | undefined; cpu?: string | number; memory?: string; - 'nvidia.com/gpu'?: GPUCount; }; limits?: { + [key: string]: number | string | undefined; cpu?: string | number; memory?: string; - 'nvidia.com/gpu'?: GPUCount; }; }; @@ -329,7 +328,8 @@ export type TrackingEventProperties = { anonymousID?: string; type?: string; term?: string; - GPU?: GPUCount; + accelerator?: string; + acceleratorCount?: number; lastSelectedSize?: string; lastSelectedImage?: string; projectName?: string; @@ -344,9 +344,11 @@ export type NotebookPort = { }; export type PodToleration = { - effect: string; key: string; - operator: string; + operator?: string; + value?: string; + effect?: string; + tolerationSeconds?: number; }; export type NotebookContainer = { @@ -376,6 +378,7 @@ export type Notebook = K8sResourceCommon & { 'opendatahub.io/username': string; // the untranslated username behind the notebook 'notebooks.opendatahub.io/last-image-selection': string; // the last image they selected 'notebooks.opendatahub.io/last-size-selection': string; // the last notebook size they selected + 'opendatahub.io/accelerator-name': string | undefined; }>; labels: Partial<{ 'opendatahub.io/user': string; // translated username -- see translateUsername @@ -702,7 +705,7 @@ export type NotebookData = { notebookSizeName: string; imageName: string; imageTagName: string; - gpus: number; + accelerator: AcceleratorState; envVars: EnvVarReducedTypeKeyValues; state: NotebookState; // only used for admin calls, regular users cannot use this field @@ -737,3 +740,10 @@ export type ContextResourceData = { export type BreadcrumbItemType = { label: string; } & EitherNotBoth<{ link: string }, { isActive: boolean }>; + +export type AcceleratorInfo = { + configured: boolean; + available: { [key: string]: number }; + total: { [key: string]: number }; + allocated: { [key: string]: number }; +}; diff --git a/frontend/src/utilities/imageUtils.ts b/frontend/src/utilities/imageUtils.ts index d2bda5123b..6b1a04d17e 100644 --- a/frontend/src/utilities/imageUtils.ts +++ b/frontend/src/utilities/imageUtils.ts @@ -2,13 +2,11 @@ import compareVersions from 'compare-versions'; import { BuildStatus, BUILD_PHASE, - GPUCount, ImageInfo, ImageSoftwareType, ImageTag, ImageTagInfo, NotebookContainer, - ContainerResourceAttributes, } from '~/types'; const PENDING_PHASES = [ @@ -72,9 +70,6 @@ export const getVersion = (version?: string, prefix?: string): string => { export const getNameVersionString = (software: ImageSoftwareType): string => `${software.name}${getVersion(software.version, ' v')}`; -export const getNumGpus = (container?: NotebookContainer): GPUCount => - container?.resources?.limits?.[ContainerResourceAttributes.NVIDIA_GPU] || 0; - export const getDefaultTag = ( buildStatuses: BuildStatus[], image: ImageInfo, diff --git a/frontend/src/utilities/tolerations.ts b/frontend/src/utilities/tolerations.ts index aa3e17480d..5878656918 100644 --- a/frontend/src/utilities/tolerations.ts +++ b/frontend/src/utilities/tolerations.ts @@ -1,6 +1,8 @@ import { Patch } from '@openshift/dynamic-plugin-sdk-utils'; +import _ from 'lodash'; import { DashboardConfig, PodToleration, TolerationSettings } from '~/types'; import { NotebookKind } from '~/k8sTypes'; +import { AcceleratorState } from './useAcceleratorState'; export type TolerationChanges = { type: 'add' | 'remove' | 'replace' | 'nothing'; @@ -8,19 +10,35 @@ export type TolerationChanges = { }; export const determineTolerations = ( - hasGpu: boolean, tolerationSettings?: TolerationSettings, + acceleratorState?: AcceleratorState, + existingTolerations?: PodToleration[], ): PodToleration[] => { - const tolerations: PodToleration[] = []; + let tolerations = existingTolerations || []; - if (hasGpu) { - tolerations.push({ - effect: 'NoSchedule', - key: 'nvidia.com/gpu', - operator: 'Exists', - }); + // remove old accelerator tolerations if they exist + if (acceleratorState?.initialAccelerator) { + tolerations = tolerations.filter( + (t) => !acceleratorState.initialAccelerator?.spec.tolerations?.some((t2) => _.isEqual(t2, t)), + ); } - if (tolerationSettings?.enabled) { + + // add new accelerator tolerations if they exist + if (acceleratorState?.accelerator?.spec.tolerations) { + tolerations.push(...acceleratorState.accelerator.spec.tolerations); + } + + // remove duplicated tolerations + tolerations = _.uniqWith(tolerations, _.isEqual); + + // add toleration from settings if they exist + if ( + tolerationSettings?.enabled && + !tolerations.some( + (t) => + t.key === tolerationSettings.key && t.operator === 'Exists' && t.effect === 'NoSchedule', + ) + ) { tolerations.push({ effect: 'NoSchedule', key: tolerationSettings.key, @@ -35,15 +53,9 @@ export const computeNotebooksTolerations = ( dashboardConfig: DashboardConfig, notebook: NotebookKind, ): TolerationChanges => { - const hasGPU = !!notebook.spec.template.spec.containers.find( - (container) => - !!container.resources?.limits?.['nvidia.com/gpu'] || - !!container.resources?.requests?.['nvidia.com/gpu'], - ); const tolerations = notebook.spec.template.spec.tolerations || []; const settings = determineTolerations( - hasGPU, dashboardConfig.spec.notebookController?.notebookTolerationSettings, ); diff --git a/frontend/src/utilities/useAcceleratorState.ts b/frontend/src/utilities/useAcceleratorState.ts new file mode 100644 index 0000000000..83a55cce42 --- /dev/null +++ b/frontend/src/utilities/useAcceleratorState.ts @@ -0,0 +1,130 @@ +import React from 'react'; +import { AcceleratorKind } from '~/k8sTypes'; +import useAccelerators from '~/pages/notebookController/screens/server/useAccelerators'; +import { useDashboardNamespace } from '~/redux/selectors'; +import { ContainerResourceAttributes, ContainerResources, PodToleration } from '~/types'; +import useGenericObjectState, { GenericObjectState } from '~/utilities/useGenericObjectState'; + +export type AcceleratorState = { + accelerator?: AcceleratorKind; + accelerators: AcceleratorKind[]; + initialAccelerator?: AcceleratorKind; + useExisting: boolean; + count: number; + additionalOptions?: { + useExisting?: boolean; + useDisabled?: AcceleratorKind; + }; +}; + +const useAcceleratorState = ( + resources?: ContainerResources, + tolerations?: PodToleration[], + existingAcceleratorName?: string, +): GenericObjectState => { + const [acceleratorState, setData, resetData] = useGenericObjectState({ + accelerator: undefined, + accelerators: [], + initialAccelerator: undefined, + count: 0, + useExisting: false, + }); + + const { dashboardNamespace } = useDashboardNamespace(); + const [accelerators, loaded, loadError] = useAccelerators(dashboardNamespace); + + React.useEffect(() => { + if (loaded && !loadError) { + setData('accelerators', accelerators); + + // Exit early if no resources = not in edit mode + if (!resources) { + return; + } + + const accelerator = accelerators.find( + (accelerator) => accelerator.metadata.name === existingAcceleratorName, + ); + + if (accelerator) { + setData('accelerator', accelerator); + setData('initialAccelerator', accelerator); + setData('count', Number(resources.requests?.[accelerator.spec.identifier] ?? 0)); + if (!accelerator.spec.enabled) { + setData('additionalOptions', { useDisabled: accelerator }); + } + } else { + // check if there is accelerator usage in the container + // this is to handle the case where the accelerator is disabled, deleted, or empty + const containerResourceAttributes = Object.values(ContainerResourceAttributes) as string[]; + const possibleAcceleratorRequests = Object.entries(resources.requests ?? {}) + .filter(([key]) => !containerResourceAttributes.includes(key)) + .map(([key, value]) => ({ identifier: key, count: value })); + if (possibleAcceleratorRequests.length > 0) { + // check if they are just using the nvidia.com/gpu + // if so, lets migrate them over to using the migrated-gpu accelerator profile if it exists + const nvidiaAcceleratorRequests = possibleAcceleratorRequests.find( + (request) => request.identifier === 'nvidia.com/gpu', + ); + + if ( + nvidiaAcceleratorRequests && + tolerations?.some( + (toleration) => + toleration.key === 'nvidia.com/gpu' && + toleration.operator === 'Exists' && + toleration.effect === 'NoSchedule', + ) + ) { + const migratedAccelerator = accelerators.find( + (accelerator) => accelerator.metadata.name === 'migrated-gpu', + ); + + if (migratedAccelerator) { + setData('accelerator', migratedAccelerator); + setData('initialAccelerator', migratedAccelerator); + setData('count', Number(nvidiaAcceleratorRequests.count ?? 0)); + if (!migratedAccelerator.spec.enabled) { + setData('additionalOptions', { useDisabled: accelerator }); + } + } else { + // create a fake accelerator to use + const fakeAccelerator: AcceleratorKind = { + apiVersion: 'dashboard.opendatahub.io/v1alpha', + kind: 'AcceleratorProfile', + metadata: { + name: 'migrated-gpu', + }, + spec: { + identifier: 'nvidia.com/gpu', + displayName: 'Nvidia GPU', + enabled: true, + tolerations: [ + { + key: 'nvidia.com/gpu', + operator: 'Exists', + effect: 'NoSchedule', + }, + ], + }, + }; + + setData('accelerator', fakeAccelerator); + setData('accelerators', [fakeAccelerator, ...accelerators]); + setData('initialAccelerator', fakeAccelerator); + setData('count', Number(nvidiaAcceleratorRequests.count ?? 0)); + } + } else { + // fallback to using the existing accelerator + setData('useExisting', true); + setData('additionalOptions', { useExisting: true }); + } + } + } + } + }, [accelerators, loaded, loadError, resources, tolerations, existingAcceleratorName, setData]); + + return [acceleratorState, setData, resetData]; +}; + +export default useAcceleratorState; diff --git a/frontend/src/utilities/useGenericObjectState.ts b/frontend/src/utilities/useGenericObjectState.ts index 0f535c5d15..ae2808256d 100644 --- a/frontend/src/utilities/useGenericObjectState.ts +++ b/frontend/src/utilities/useGenericObjectState.ts @@ -1,7 +1,7 @@ import * as React from 'react'; import { UpdateObjectAtPropAndValue } from '~/pages/projects/types'; -type GenericObjectState = [ +export type GenericObjectState = [ data: T, setData: UpdateObjectAtPropAndValue, resetDefault: () => void, diff --git a/manifests/base/cluster-role.yaml b/manifests/base/cluster-role.yaml index 51b5798502..d21d47cd04 100644 --- a/manifests/base/cluster-role.yaml +++ b/manifests/base/cluster-role.yaml @@ -3,6 +3,13 @@ apiVersion: rbac.authorization.k8s.io/v1 metadata: name: odh-dashboard rules: + - verbs: + - get + - list + apiGroups: + - '' + resources: + - nodes - verbs: - get - list diff --git a/manifests/base/fetch-accelerators.rbac.yaml b/manifests/base/fetch-accelerators.rbac.yaml new file mode 100644 index 0000000000..d11b7dc3ad --- /dev/null +++ b/manifests/base/fetch-accelerators.rbac.yaml @@ -0,0 +1,26 @@ +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: fetch-accelerators-role +rules: + - apiGroups: + - dashboard.opendatahub.io + verbs: + - get + - list + - watch + resources: + - acceleratorprofiles +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: accelerators +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: fetch-accelerators-role +subjects: + - apiGroup: rbac.authorization.k8s.io + kind: Group + name: system:authenticated \ No newline at end of file diff --git a/manifests/base/kustomization.yaml b/manifests/base/kustomization.yaml index 4fe93e8917..3595eb216d 100644 --- a/manifests/base/kustomization.yaml +++ b/manifests/base/kustomization.yaml @@ -21,6 +21,7 @@ resources: - image-puller.clusterrolebinding.yaml - model-serving-role.yaml - model-serving-role-binding.yaml + - fetch-accelerators.rbac.yaml images: - name: odh-dashboard newName: quay.io/opendatahub/odh-dashboard diff --git a/manifests/base/role.yaml b/manifests/base/role.yaml index bac744a29e..5a885ab041 100644 --- a/manifests/base/role.yaml +++ b/manifests/base/role.yaml @@ -3,6 +3,14 @@ apiVersion: rbac.authorization.k8s.io/v1 metadata: name: odh-dashboard rules: + - verbs: + - create + - get + - list + apiGroups: + - dashboard.opendatahub.io + resources: + - acceleratorprofiles - apiGroups: - route.openshift.io resources: diff --git a/manifests/crd/acceleratorprofiles.opendatahub.io.crd.yaml b/manifests/crd/acceleratorprofiles.opendatahub.io.crd.yaml new file mode 100644 index 0000000000..0b429de43d --- /dev/null +++ b/manifests/crd/acceleratorprofiles.opendatahub.io.crd.yaml @@ -0,0 +1,54 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: acceleratorprofiles.dashboard.opendatahub.io +spec: + group: dashboard.opendatahub.io + scope: Namespaced + names: + plural: acceleratorprofiles + singular: acceleratorprofile + kind: AcceleratorProfile + versions: + - name: v1alpha + served: true + storage: true + schema: + openAPIV3Schema: + type: object + required: + - spec + properties: + spec: + type: object + required: + - displayName + - enabled + - identifier + properties: + displayName: + type: string + enabled: + type: boolean + identifier: + type: string + description: + type: string + tolerations: + type: array + items: + type: object + required: + - key + properties: + key: + type: string + operator: + type: string + value: + type: string + effect: + type: string + tolerationSeconds: + type: integer + \ No newline at end of file diff --git a/manifests/crd/kustomization.yaml b/manifests/crd/kustomization.yaml index 7709378df1..3d8497d804 100644 --- a/manifests/crd/kustomization.yaml +++ b/manifests/crd/kustomization.yaml @@ -8,3 +8,4 @@ resources: - odhquickstarts.console.openshift.io.crd.yaml - odhdocuments.dashboard.opendatahub.io.crd.yaml - odhapplications.dashboard.opendatahub.io.crd.yaml +- acceleratorprofiles.opendatahub.io.crd.yaml \ No newline at end of file