InftyAI · InftyAI-Agent · Jan 18, 2025 · Jan 18, 2025
diff --git a/README.md b/README.md
@@ -66,10 +66,11 @@ spec:
   source:
     modelHub:
       modelID: facebook/opt-125m
-  inferenceFlavors:
-    - name: t4 # GPU type
-      requests:
-        nvidia.com/gpu: 1
+  inferenceConfig:
+    flavors:
+      - name: default # Configure GPU type
+        requests:
+          nvidia.com/gpu: 1
 ```
 
 #### Inference Playground

diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go
@@ -122,6 +122,15 @@ type Flavor struct {
 	Params map[string]string `json:"params,omitempty"`
 }
 
+// InferenceConfig represents the inference configurations for the model.
+type InferenceConfig struct {
+	// Flavors represents the accelerator requirements to serve the model.
+	// Flavors are fungible following the priority represented by the slice order.
+	// +kubebuilder:validation:MaxItems=8
+	// +optional
+	Flavors []Flavor `json:"flavors,omitempty"`
+}
+
 type ModelName string
 
 // ModelClaim represents claiming for one model, it's the standard claimMode
@@ -188,11 +197,8 @@ type ModelSpec struct {
 	// Source represents the source of the model, there're several ways to load
 	// the model such as loading from huggingface, OCI registry, s3, host path and so on.
 	Source ModelSource `json:"source"`
-	// InferenceFlavors represents the accelerator requirements to serve the model.
-	// Flavors are fungible following the priority represented by the slice order.
-	// +kubebuilder:validation:MaxItems=8
-	// +optional
-	InferenceFlavors []Flavor `json:"inferenceFlavors,omitempty"`
+	// InferenceConfig represents the inference configurations for the model.
+	InferenceConfig *InferenceConfig `json:"inferenceConfig,omitempty"`
 }
 
 const (

diff --git a/api/core/v1alpha1/zz_generated.deepcopy.go b/api/core/v1alpha1/zz_generated.deepcopy.go
diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go
@@ -26,6 +26,7 @@ import (
 // do not change the name.
 type BackendRuntimeArg struct {
 	// Name represents the identifier of the backendRuntime argument.
+	// +kubebuilder:default=default
 	Name string `json:"name"`
 	// Flags represents all the preset configurations.
 	// Flag around with {{ .CONFIG }} is a configuration waiting for render.

diff --git a/api/inference/v1alpha1/config_types.go b/api/inference/v1alpha1/config_types.go
@@ -33,17 +33,9 @@ type BackendRuntimeConfig struct {
 	// from the default version.
 	// +optional
 	Version *string `json:"version,omitempty"`
-	// ArgName represents the argument name set in the backendRuntimeArg.
-	// If not set, will be derived by the model role, e.g. if one model's role
-	// is <draft>, the argName will be set to <speculative-decoding>. Better to
-	// set the argName explicitly.
-	// By default, the argName will be treated as <default> in runtime.
-	// +optional
-	ArgName *string `json:"argName,omitempty"`
-	// ArgFlags represents the argument flags appended to the backend.
-	// You can add new flags or overwrite the default flags.
-	// +optional
-	ArgFlags []string `json:"argFlags,omitempty"`
+	// Args represents the specified arguments of the backendRuntime,
+	// will be append to the backendRuntime.spec.Args.
+	Args *BackendRuntimeArg `json:"args,omitempty"`
 	// Envs represents the environments set to the container.
 	// +optional
 	Envs []corev1.EnvVar `json:"envs,omitempty"`

diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/flavor.go b/client-go/applyconfiguration/core/v1alpha1/flavor.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/inferenceconfig.go b/client-go/applyconfiguration/core/v1alpha1/inferenceconfig.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelclaim.go b/client-go/applyconfiguration/core/v1alpha1/modelclaim.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelrefer.go b/client-go/applyconfiguration/core/v1alpha1/modelrefer.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelspec.go b/client-go/applyconfiguration/core/v1alpha1/modelspec.go