diff --git a/hack/violation_exceptions.list b/hack/violation_exceptions.list
index dd5d1786b74..68eed39821f 100644
--- a/hack/violation_exceptions.list
+++ b/hack/violation_exceptions.list
@@ -1,5 +1,6 @@
 API rule violation: list_type_missing,./pkg/apis/serving/v1alpha1,BuiltInAdapter,Env
 API rule violation: list_type_missing,./pkg/apis/serving/v1alpha1,InferenceGraphList,Items
+API rule violation: list_type_missing,./pkg/apis/serving/v1alpha1,InferenceRouter,Steps
 API rule violation: list_type_missing,./pkg/apis/serving/v1alpha1,ServingRuntimePodSpec,Containers
 API rule violation: list_type_missing,./pkg/apis/serving/v1alpha1,ServingRuntimePodSpec,Tolerations
 API rule violation: list_type_missing,./pkg/apis/serving/v1alpha1,ServingRuntimePodSpec,Volumes
@@ -17,6 +18,8 @@ API rule violation: list_type_missing,./pkg/apis/serving/v1beta1,PodSpec,Readine
 API rule violation: list_type_missing,./pkg/apis/serving/v1beta1,PodSpec,Tolerations
 API rule violation: list_type_missing,./pkg/apis/serving/v1beta1,PodSpec,Volumes
 API rule violation: list_type_missing,./pkg/apis/serving/v1beta1,PredictorConfig,SupportedFrameworks
+API rule violation: names_match,./pkg/apis/serving/v1alpha1,InferenceStep,StepName
+API rule violation: names_match,./pkg/apis/serving/v1alpha1,InferenceTarget,ServiceURL
 API rule violation: names_match,./pkg/apis/serving/v1alpha1,ModelSpec,StorageURI
 API rule violation: names_match,./pkg/apis/serving/v1alpha1,ServingRuntimeSpec,GrpcMultiModelManagementEndpoint
 API rule violation: names_match,./pkg/apis/serving/v1beta1,ComponentExtensionSpec,TimeoutSeconds
diff --git a/pkg/apis/serving/v1alpha1/inference_graph.go b/pkg/apis/serving/v1alpha1/inference_graph.go
index dcec2dc7e50..87346c69944 100644
--- a/pkg/apis/serving/v1alpha1/inference_graph.go
+++ b/pkg/apis/serving/v1alpha1/inference_graph.go
@@ -38,6 +38,7 @@ type InferenceGraph struct {
 }
 
 // InferenceGraphSpec defines the InferenceGraph spec
+// +k8s:openapi-gen=true
 type InferenceGraphSpec struct {
 	// Map of InferenceGraph router nodes
 	// Each node defines the router which can be different routing types
@@ -70,7 +71,6 @@ const (
 )
 
 // +k8s:openapi-gen=true
-
 // InferenceRouter defines the router for each InferenceGraph node with one or multiple steps
 //
 // ```yaml
@@ -190,7 +190,6 @@ type InferenceRouter struct {
 
 // +k8s:openapi-gen=true
 // Exactly one InferenceTarget field must be specified
-
 type InferenceTarget struct {
 	// The node name for routing as next step
 	// +optional
@@ -206,7 +205,6 @@ type InferenceTarget struct {
 
 // InferenceStep defines the inference target of the current step with condition, weights and data.
 // +k8s:openapi-gen=true
-
 type InferenceStep struct {
 	// Unique name for the step within this node
 	// +optional
@@ -233,7 +231,6 @@ type InferenceStep struct {
 
 // InferenceGraphStatus defines the InferenceGraph conditions and status
 // +k8s:openapi-gen=true
-
 type InferenceGraphStatus struct {
 	// Conditions for InferenceGraph
 	duckv1.Status `json:",inline"`
diff --git a/pkg/apis/serving/v1beta1/openapi_generated.go b/pkg/apis/serving/v1beta1/openapi_generated.go
index ece7a073a25..540e9a1a0e0 100644
--- a/pkg/apis/serving/v1beta1/openapi_generated.go
+++ b/pkg/apis/serving/v1beta1/openapi_generated.go
@@ -35,6 +35,11 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
 		"./pkg/apis/serving/v1alpha1.ClusterServingRuntimeList": schema_pkg_apis_serving_v1alpha1_ClusterServingRuntimeList(ref),
 		"./pkg/apis/serving/v1alpha1.InferenceGraph":            schema_pkg_apis_serving_v1alpha1_InferenceGraph(ref),
 		"./pkg/apis/serving/v1alpha1.InferenceGraphList":        schema_pkg_apis_serving_v1alpha1_InferenceGraphList(ref),
+		"./pkg/apis/serving/v1alpha1.InferenceGraphSpec":        schema_pkg_apis_serving_v1alpha1_InferenceGraphSpec(ref),
+		"./pkg/apis/serving/v1alpha1.InferenceGraphStatus":      schema_pkg_apis_serving_v1alpha1_InferenceGraphStatus(ref),
+		"./pkg/apis/serving/v1alpha1.InferenceRouter":           schema_pkg_apis_serving_v1alpha1_InferenceRouter(ref),
+		"./pkg/apis/serving/v1alpha1.InferenceStep":             schema_pkg_apis_serving_v1alpha1_InferenceStep(ref),
+		"./pkg/apis/serving/v1alpha1.InferenceTarget":           schema_pkg_apis_serving_v1alpha1_InferenceTarget(ref),
 		"./pkg/apis/serving/v1alpha1.ModelSpec":                 schema_pkg_apis_serving_v1alpha1_ModelSpec(ref),
 		"./pkg/apis/serving/v1alpha1.ServingRuntime":            schema_pkg_apis_serving_v1alpha1_ServingRuntime(ref),
 		"./pkg/apis/serving/v1alpha1.ServingRuntimeList":        schema_pkg_apis_serving_v1alpha1_ServingRuntimeList(ref),
@@ -348,6 +353,235 @@ func schema_pkg_apis_serving_v1alpha1_InferenceGraphList(ref common.ReferenceCal
 	}
 }
 
+func schema_pkg_apis_serving_v1alpha1_InferenceGraphSpec(ref common.ReferenceCallback) common.OpenAPIDefinition {
+	return common.OpenAPIDefinition{
+		Schema: spec.Schema{
+			SchemaProps: spec.SchemaProps{
+				Description: "InferenceGraphSpec defines the InferenceGraph spec",
+				Type:        []string{"object"},
+				Properties: map[string]spec.Schema{
+					"nodes": {
+						SchemaProps: spec.SchemaProps{
+							Description: "Map of InferenceGraph router nodes Each node defines the router which can be different routing types",
+							Type:        []string{"object"},
+							AdditionalProperties: &spec.SchemaOrBool{
+								Allows: true,
+								Schema: &spec.Schema{
+									SchemaProps: spec.SchemaProps{
+										Default: map[string]interface{}{},
+										Ref:     ref("./pkg/apis/serving/v1alpha1.InferenceRouter"),
+									},
+								},
+							},
+						},
+					},
+				},
+				Required: []string{"nodes"},
+			},
+		},
+		Dependencies: []string{
+			"./pkg/apis/serving/v1alpha1.InferenceRouter"},
+	}
+}
+
+func schema_pkg_apis_serving_v1alpha1_InferenceGraphStatus(ref common.ReferenceCallback) common.OpenAPIDefinition {
+	return common.OpenAPIDefinition{
+		Schema: spec.Schema{
+			SchemaProps: spec.SchemaProps{
+				Description: "InferenceGraphStatus defines the InferenceGraph conditions and status",
+				Type:        []string{"object"},
+				Properties: map[string]spec.Schema{
+					"observedGeneration": {
+						SchemaProps: spec.SchemaProps{
+							Description: "ObservedGeneration is the 'Generation' of the Service that was last processed by the controller.",
+							Type:        []string{"integer"},
+							Format:      "int64",
+						},
+					},
+					"conditions": {
+						VendorExtensible: spec.VendorExtensible{
+							Extensions: spec.Extensions{
+								"x-kubernetes-patch-merge-key": "type",
+								"x-kubernetes-patch-strategy":  "merge",
+							},
+						},
+						SchemaProps: spec.SchemaProps{
+							Description: "Conditions the latest available observations of a resource's current state.",
+							Type:        []string{"array"},
+							Items: &spec.SchemaOrArray{
+								Schema: &spec.Schema{
+									SchemaProps: spec.SchemaProps{
+										Default: map[string]interface{}{},
+										Ref:     ref("knative.dev/pkg/apis.Condition"),
+									},
+								},
+							},
+						},
+					},
+					"annotations": {
+						SchemaProps: spec.SchemaProps{
+							Description: "Annotations is additional Status fields for the Resource to save some additional State as well as convey more information to the user. This is roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards.",
+							Type:        []string{"object"},
+							AdditionalProperties: &spec.SchemaOrBool{
+								Allows: true,
+								Schema: &spec.Schema{
+									SchemaProps: spec.SchemaProps{
+										Default: "",
+										Type:    []string{"string"},
+										Format:  "",
+									},
+								},
+							},
+						},
+					},
+					"url": {
+						SchemaProps: spec.SchemaProps{
+							Description: "Url for the InferenceGraph",
+							Ref:         ref("knative.dev/pkg/apis.URL"),
+						},
+					},
+				},
+			},
+		},
+		Dependencies: []string{
+			"knative.dev/pkg/apis.Condition", "knative.dev/pkg/apis.URL"},
+	}
+}
+
+func schema_pkg_apis_serving_v1alpha1_InferenceRouter(ref common.ReferenceCallback) common.OpenAPIDefinition {
+	return common.OpenAPIDefinition{
+		Schema: spec.Schema{
+			SchemaProps: spec.SchemaProps{
+				Description: "InferenceRouter defines the router for each InferenceGraph node with one or multiple steps\n\n```yaml kind: InferenceGraph metadata:\n  name: canary-route\nspec:\n  nodes:\n    root:\n      routerType: Splitter\n      routes:\n      - service: mymodel1\n        weight: 20\n      - service: mymodel2\n        weight: 80\n```\n\n```yaml kind: InferenceGraph metadata:\n  name: abtest\nspec:\n  nodes:\n    mymodel:\n      routerType: Switch\n      routes:\n      - service: mymodel1\n        condition: \"{ .input.userId == 1 }\"\n      - service: mymodel2\n        condition: \"{ .input.userId == 2 }\"\n```\n\nScoring a case using a model ensemble consists of scoring it using each model separately, then combining the results into a single scoring result using one of the pre-defined combination methods.\n\nTree Ensemble constitutes a case where simple algorithms for combining results of either classification or regression trees are well known. Multiple classification trees, for example, are commonly combined using a \"majority-vote\" method. Multiple regression trees are often combined using various averaging techniques. e.g tagging models with segment identifiers and weights to be used for their combination in these ways. ```yaml kind: InferenceGraph metadata:\n  name: ensemble\nspec:\n  nodes:\n    root:\n      routerType: Sequence\n      routes:\n      - service: feast\n      - nodeName: ensembleModel\n        data: $response\n    ensembleModel:\n      routerType: Ensemble\n      routes:\n      - service: sklearn-model\n      - service: xgboost-model\n```\n\nScoring a case using a sequence, or chain of models allows the output of one model to be passed in as input to the subsequent models. ```yaml kind: InferenceGraph metadata:\n  name: model-chainer\nspec:\n  nodes:\n    root:\n      routerType: Sequence\n      routes:\n      - service: mymodel-s1\n      - service: mymodel-s2\n        data: $response\n      - service: mymodel-s3\n        data: $response\n```\n\nIn the flow described below, the pre_processing node base64 encodes the image and passes it to two model nodes in the flow. The encoded data is available to both these nodes for classification. The second node i.e. dog-breed-classification takes the original input from the pre_processing node along-with the response from the cat-dog-classification node to do further classification of the dog breed if required. ```yaml kind: InferenceGraph metadata:\n  name: dog-breed-classification\nspec:\n  nodes:\n    root:\n      routerType: Sequence\n      routes:\n      - service: cat-dog-classifier\n      - nodeName: breed-classifier\n        data: $request\n    breed-classifier:\n      routerType: Switch\n      routes:\n      - service: dog-breed-classifier\n        condition: { .predictions.class == \"dog\" }\n      - service: cat-breed-classifier\n        condition: { .predictions.class == \"cat\" }\n```",
+				Type:        []string{"object"},
+				Properties: map[string]spec.Schema{
+					"routerType": {
+						SchemaProps: spec.SchemaProps{
+							Description: "RouterType\n\n- `Sequence:` chain multiple inference steps with input/output from previous step\n\n- `Splitter:` randomly routes to the target service according to the weight\n\n- `Ensemble:` routes the request to multiple models and then merge the responses\n\n- `Switch:` routes the request to one of the steps based on condition",
+							Default:     "",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+					"steps": {
+						SchemaProps: spec.SchemaProps{
+							Description: "Steps defines destinations for the current router node",
+							Type:        []string{"array"},
+							Items: &spec.SchemaOrArray{
+								Schema: &spec.Schema{
+									SchemaProps: spec.SchemaProps{
+										Default: map[string]interface{}{},
+										Ref:     ref("./pkg/apis/serving/v1alpha1.InferenceStep"),
+									},
+								},
+							},
+						},
+					},
+				},
+				Required: []string{"routerType"},
+			},
+		},
+		Dependencies: []string{
+			"./pkg/apis/serving/v1alpha1.InferenceStep"},
+	}
+}
+
+func schema_pkg_apis_serving_v1alpha1_InferenceStep(ref common.ReferenceCallback) common.OpenAPIDefinition {
+	return common.OpenAPIDefinition{
+		Schema: spec.Schema{
+			SchemaProps: spec.SchemaProps{
+				Description: "InferenceStep defines the inference target of the current step with condition, weights and data.",
+				Type:        []string{"object"},
+				Properties: map[string]spec.Schema{
+					"name": {
+						SchemaProps: spec.SchemaProps{
+							Description: "Unique name for the step within this node",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+					"nodeName": {
+						SchemaProps: spec.SchemaProps{
+							Description: "The node name for routing as next step",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+					"serviceName": {
+						SchemaProps: spec.SchemaProps{
+							Description: "named reference for InferenceService",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+					"serviceUrl": {
+						SchemaProps: spec.SchemaProps{
+							Description: "InferenceService URL, mutually exclusive with ServiceName",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+					"data": {
+						SchemaProps: spec.SchemaProps{
+							Description: "request data sent to the next route with input/output from the previous step $request $response.predictions",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+					"weight": {
+						SchemaProps: spec.SchemaProps{
+							Description: "the weight for split of the traffic, only used for Split Router when weight is specified all the routing targets should be sum to 100",
+							Type:        []string{"integer"},
+							Format:      "int64",
+						},
+					},
+					"condition": {
+						SchemaProps: spec.SchemaProps{
+							Description: "routing based on the condition",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func schema_pkg_apis_serving_v1alpha1_InferenceTarget(ref common.ReferenceCallback) common.OpenAPIDefinition {
+	return common.OpenAPIDefinition{
+		Schema: spec.Schema{
+			SchemaProps: spec.SchemaProps{
+				Description: "Exactly one InferenceTarget field must be specified",
+				Type:        []string{"object"},
+				Properties: map[string]spec.Schema{
+					"nodeName": {
+						SchemaProps: spec.SchemaProps{
+							Description: "The node name for routing as next step",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+					"serviceName": {
+						SchemaProps: spec.SchemaProps{
+							Description: "named reference for InferenceService",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+					"serviceUrl": {
+						SchemaProps: spec.SchemaProps{
+							Description: "InferenceService URL, mutually exclusive with ServiceName",
+							Type:        []string{"string"},
+							Format:      "",
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
 func schema_pkg_apis_serving_v1alpha1_ModelSpec(ref common.ReferenceCallback) common.OpenAPIDefinition {
 	return common.OpenAPIDefinition{
 		Schema: spec.Schema{
@@ -807,6 +1041,7 @@ func schema_pkg_apis_serving_v1alpha1_SupportedModelFormat(ref common.ReferenceC
 						},
 					},
 				},
+				
 			},
 		},
 	}
@@ -3496,6 +3731,7 @@ func schema_pkg_apis_serving_v1beta1_ExplainerExtensionSpec(ref common.Reference
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -4635,6 +4871,7 @@ func schema_pkg_apis_serving_v1beta1_LightGBMSpec(ref common.ReferenceCallback)
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -4719,6 +4956,7 @@ func schema_pkg_apis_serving_v1beta1_ModelFormat(ref common.ReferenceCallback) c
 						},
 					},
 				},
+				
 			},
 		},
 	}
@@ -5343,6 +5581,7 @@ func schema_pkg_apis_serving_v1beta1_ONNXRuntimeSpec(ref common.ReferenceCallbac
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -5615,6 +5854,7 @@ func schema_pkg_apis_serving_v1beta1_PMMLSpec(ref common.ReferenceCallback) comm
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -5886,6 +6126,7 @@ func schema_pkg_apis_serving_v1beta1_PaddleServerSpec(ref common.ReferenceCallba
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -6608,6 +6849,7 @@ func schema_pkg_apis_serving_v1beta1_PredictorExtensionSpec(ref common.Reference
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -7477,6 +7719,7 @@ func schema_pkg_apis_serving_v1beta1_SKLearnSpec(ref common.ReferenceCallback) c
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -7798,6 +8041,7 @@ func schema_pkg_apis_serving_v1beta1_TFServingSpec(ref common.ReferenceCallback)
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -8070,6 +8314,7 @@ func schema_pkg_apis_serving_v1beta1_TorchServeSpec(ref common.ReferenceCallback
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -8836,6 +9081,7 @@ func schema_pkg_apis_serving_v1beta1_TritonSpec(ref common.ReferenceCallback) co
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
@@ -9108,6 +9354,7 @@ func schema_pkg_apis_serving_v1beta1_XGBoostSpec(ref common.ReferenceCallback) c
 						},
 					},
 				},
+				
 			},
 		},
 		Dependencies: []string{
diff --git a/pkg/apis/serving/v1beta1/swagger.json b/pkg/apis/serving/v1beta1/swagger.json
index b671e62a0aa..8180ce98b00 100644
--- a/pkg/apis/serving/v1beta1/swagger.json
+++ b/pkg/apis/serving/v1beta1/swagger.json
@@ -148,6 +148,131 @@
         }
       }
     },
+    "v1alpha1.InferenceGraphSpec": {
+      "description": "InferenceGraphSpec defines the InferenceGraph spec",
+      "type": "object",
+      "required": [
+        "nodes"
+      ],
+      "properties": {
+        "nodes": {
+          "description": "Map of InferenceGraph router nodes Each node defines the router which can be different routing types",
+          "type": "object",
+          "additionalProperties": {
+            "default": {},
+            "$ref": "#/definitions/v1alpha1.InferenceRouter"
+          }
+        }
+      }
+    },
+    "v1alpha1.InferenceGraphStatus": {
+      "description": "InferenceGraphStatus defines the InferenceGraph conditions and status",
+      "type": "object",
+      "properties": {
+        "annotations": {
+          "description": "Annotations is additional Status fields for the Resource to save some additional State as well as convey more information to the user. This is roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards.",
+          "type": "object",
+          "additionalProperties": {
+            "type": "string",
+            "default": ""
+          }
+        },
+        "conditions": {
+          "description": "Conditions the latest available observations of a resource's current state.",
+          "type": "array",
+          "items": {
+            "default": {},
+            "$ref": "#/definitions/knative.Condition"
+          },
+          "x-kubernetes-patch-merge-key": "type",
+          "x-kubernetes-patch-strategy": "merge"
+        },
+        "observedGeneration": {
+          "description": "ObservedGeneration is the 'Generation' of the Service that was last processed by the controller.",
+          "type": "integer",
+          "format": "int64"
+        },
+        "url": {
+          "description": "Url for the InferenceGraph",
+          "$ref": "#/definitions/knative.URL"
+        }
+      }
+    },
+    "v1alpha1.InferenceRouter": {
+      "description": "InferenceRouter defines the router for each InferenceGraph node with one or multiple steps\n\n```yaml kind: InferenceGraph metadata:\n  name: canary-route\nspec:\n  nodes:\n    root:\n      routerType: Splitter\n      routes:\n      - service: mymodel1\n        weight: 20\n      - service: mymodel2\n        weight: 80\n```\n\n```yaml kind: InferenceGraph metadata:\n  name: abtest\nspec:\n  nodes:\n    mymodel:\n      routerType: Switch\n      routes:\n      - service: mymodel1\n        condition: \"{ .input.userId == 1 }\"\n      - service: mymodel2\n        condition: \"{ .input.userId == 2 }\"\n```\n\nScoring a case using a model ensemble consists of scoring it using each model separately, then combining the results into a single scoring result using one of the pre-defined combination methods.\n\nTree Ensemble constitutes a case where simple algorithms for combining results of either classification or regression trees are well known. Multiple classification trees, for example, are commonly combined using a \"majority-vote\" method. Multiple regression trees are often combined using various averaging techniques. e.g tagging models with segment identifiers and weights to be used for their combination in these ways. ```yaml kind: InferenceGraph metadata:\n  name: ensemble\nspec:\n  nodes:\n    root:\n      routerType: Sequence\n      routes:\n      - service: feast\n      - nodeName: ensembleModel\n        data: $response\n    ensembleModel:\n      routerType: Ensemble\n      routes:\n      - service: sklearn-model\n      - service: xgboost-model\n```\n\nScoring a case using a sequence, or chain of models allows the output of one model to be passed in as input to the subsequent models. ```yaml kind: InferenceGraph metadata:\n  name: model-chainer\nspec:\n  nodes:\n    root:\n      routerType: Sequence\n      routes:\n      - service: mymodel-s1\n      - service: mymodel-s2\n        data: $response\n      - service: mymodel-s3\n        data: $response\n```\n\nIn the flow described below, the pre_processing node base64 encodes the image and passes it to two model nodes in the flow. The encoded data is available to both these nodes for classification. The second node i.e. dog-breed-classification takes the original input from the pre_processing node along-with the response from the cat-dog-classification node to do further classification of the dog breed if required. ```yaml kind: InferenceGraph metadata:\n  name: dog-breed-classification\nspec:\n  nodes:\n    root:\n      routerType: Sequence\n      routes:\n      - service: cat-dog-classifier\n      - nodeName: breed-classifier\n        data: $request\n    breed-classifier:\n      routerType: Switch\n      routes:\n      - service: dog-breed-classifier\n        condition: { .predictions.class == \"dog\" }\n      - service: cat-breed-classifier\n        condition: { .predictions.class == \"cat\" }\n```",
+      "type": "object",
+      "required": [
+        "routerType"
+      ],
+      "properties": {
+        "routerType": {
+          "description": "RouterType\n\n- `Sequence:` chain multiple inference steps with input/output from previous step\n\n- `Splitter:` randomly routes to the target service according to the weight\n\n- `Ensemble:` routes the request to multiple models and then merge the responses\n\n- `Switch:` routes the request to one of the steps based on condition",
+          "type": "string",
+          "default": ""
+        },
+        "steps": {
+          "description": "Steps defines destinations for the current router node",
+          "type": "array",
+          "items": {
+            "default": {},
+            "$ref": "#/definitions/v1alpha1.InferenceStep"
+          }
+        }
+      }
+    },
+    "v1alpha1.InferenceStep": {
+      "description": "InferenceStep defines the inference target of the current step with condition, weights and data.",
+      "type": "object",
+      "properties": {
+        "condition": {
+          "description": "routing based on the condition",
+          "type": "string"
+        },
+        "data": {
+          "description": "request data sent to the next route with input/output from the previous step $request $response.predictions",
+          "type": "string"
+        },
+        "name": {
+          "description": "Unique name for the step within this node",
+          "type": "string"
+        },
+        "nodeName": {
+          "description": "The node name for routing as next step",
+          "type": "string"
+        },
+        "serviceName": {
+          "description": "named reference for InferenceService",
+          "type": "string"
+        },
+        "serviceUrl": {
+          "description": "InferenceService URL, mutually exclusive with ServiceName",
+          "type": "string"
+        },
+        "weight": {
+          "description": "the weight for split of the traffic, only used for Split Router when weight is specified all the routing targets should be sum to 100",
+          "type": "integer",
+          "format": "int64"
+        }
+      }
+    },
+    "v1alpha1.InferenceTarget": {
+      "description": "Exactly one InferenceTarget field must be specified",
+      "type": "object",
+      "properties": {
+        "nodeName": {
+          "description": "The node name for routing as next step",
+          "type": "string"
+        },
+        "serviceName": {
+          "description": "named reference for InferenceService",
+          "type": "string"
+        },
+        "serviceUrl": {
+          "description": "InferenceService URL, mutually exclusive with ServiceName",
+          "type": "string"
+        }
+      }
+    },
     "v1alpha1.ModelSpec": {
       "description": "ModelSpec describes a TrainedModel",
       "type": "object",
diff --git a/python/kserve/README.md b/python/kserve/README.md
index bdc616b8d4d..1ba412c864e 100644
--- a/python/kserve/README.md
+++ b/python/kserve/README.md
@@ -71,6 +71,13 @@ Please review [KServe Client API](https://github.com/kserve/website/blob/main/do
  - [KnativeURL](docs/KnativeURL.md)
  - [KnativeVolatileTime](docs/KnativeVolatileTime.md)
  - [NetUrlUserinfo](docs/NetUrlUserinfo.md)
+ - [V1alpha1InferenceGraph](docs/V1alpha1InferenceGraph.md)
+ - [V1alpha1InferenceGraphList](docs/V1alpha1InferenceGraphList.md)
+ - [V1alpha1InferenceGraphSpec](docs/V1alpha1InferenceGraphSpec.md)
+ - [V1alpha1InferenceGraphStatus](docs/V1alpha1InferenceGraphStatus.md)
+ - [V1alpha1InferenceRouter](docs/V1alpha1InferenceRouter.md)
+ - [V1alpha1InferenceStep](docs/V1alpha1InferenceStep.md)
+ - [V1alpha1InferenceTarget](docs/V1alpha1InferenceTarget.md)
  - [V1beta1AIXExplainerSpec](docs/V1beta1AIXExplainerSpec.md)
  - [V1beta1AlibiExplainerSpec](docs/V1beta1AlibiExplainerSpec.md)
  - [V1beta1Batcher](docs/V1beta1Batcher.md)
diff --git a/python/kserve/docs/V1alpha1InferenceGraph.md b/python/kserve/docs/V1alpha1InferenceGraph.md
new file mode 100644
index 00000000000..7073d971529
--- /dev/null
+++ b/python/kserve/docs/V1alpha1InferenceGraph.md
@@ -0,0 +1,15 @@
+# V1alpha1InferenceGraph
+
+InferenceGraph is the Schema for the InferenceGraph API for multiple models
+## Properties
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] 
+**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] 
+**metadata** | [**V1ObjectMeta**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1ObjectMeta.md) |  | [optional] 
+**spec** | [**V1alpha1InferenceGraphSpec**](V1alpha1InferenceGraphSpec.md) |  | [optional] 
+**status** | [**V1alpha1InferenceGraphStatus**](V1alpha1InferenceGraphStatus.md) |  | [optional] 
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/python/kserve/docs/V1alpha1InferenceGraphList.md b/python/kserve/docs/V1alpha1InferenceGraphList.md
new file mode 100644
index 00000000000..a3d9faeea3f
--- /dev/null
+++ b/python/kserve/docs/V1alpha1InferenceGraphList.md
@@ -0,0 +1,14 @@
+# V1alpha1InferenceGraphList
+
+InferenceGraphList contains a list of InferenceGraph
+## Properties
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] 
+**items** | [**list[V1alpha1InferenceGraph]**](V1alpha1InferenceGraph.md) |  | 
+**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] 
+**metadata** | [**V1ListMeta**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1ListMeta.md) |  | [optional] 
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/python/kserve/docs/V1alpha1InferenceGraphSpec.md b/python/kserve/docs/V1alpha1InferenceGraphSpec.md
new file mode 100644
index 00000000000..20f73ee18d2
--- /dev/null
+++ b/python/kserve/docs/V1alpha1InferenceGraphSpec.md
@@ -0,0 +1,11 @@
+# V1alpha1InferenceGraphSpec
+
+InferenceGraphSpec defines the InferenceGraph spec
+## Properties
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**nodes** | [**dict(str, V1alpha1InferenceRouter)**](V1alpha1InferenceRouter.md) | Map of InferenceGraph router nodes Each node defines the router which can be different routing types | 
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/python/kserve/docs/V1alpha1InferenceGraphStatus.md b/python/kserve/docs/V1alpha1InferenceGraphStatus.md
new file mode 100644
index 00000000000..74f45ac6600
--- /dev/null
+++ b/python/kserve/docs/V1alpha1InferenceGraphStatus.md
@@ -0,0 +1,14 @@
+# V1alpha1InferenceGraphStatus
+
+InferenceGraphStatus defines the InferenceGraph conditions and status
+## Properties
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**annotations** | **dict(str, str)** | Annotations is additional Status fields for the Resource to save some additional State as well as convey more information to the user. This is roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards. | [optional] 
+**conditions** | [**list[KnativeCondition]**](KnativeCondition.md) | Conditions the latest available observations of a resource&#39;s current state. | [optional] 
+**observed_generation** | **int** | ObservedGeneration is the &#39;Generation&#39; of the Service that was last processed by the controller. | [optional] 
+**url** | [**KnativeURL**](KnativeURL.md) |  | [optional] 
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/python/kserve/docs/V1alpha1InferenceRouter.md b/python/kserve/docs/V1alpha1InferenceRouter.md
new file mode 100644
index 00000000000..ef3c75be38f
--- /dev/null
+++ b/python/kserve/docs/V1alpha1InferenceRouter.md
@@ -0,0 +1,12 @@
+# V1alpha1InferenceRouter
+
+InferenceRouter defines the router for each InferenceGraph node with one or multiple steps  ```yaml kind: InferenceGraph metadata:   name: canary-route spec:   nodes:     root:       routerType: Splitter       routes:       - service: mymodel1         weight: 20       - service: mymodel2         weight: 80 ```  ```yaml kind: InferenceGraph metadata:   name: abtest spec:   nodes:     mymodel:       routerType: Switch       routes:       - service: mymodel1         condition: \"{ .input.userId == 1 }\"       - service: mymodel2         condition: \"{ .input.userId == 2 }\" ```  Scoring a case using a model ensemble consists of scoring it using each model separately, then combining the results into a single scoring result using one of the pre-defined combination methods.  Tree Ensemble constitutes a case where simple algorithms for combining results of either classification or regression trees are well known. Multiple classification trees, for example, are commonly combined using a \"majority-vote\" method. Multiple regression trees are often combined using various averaging techniques. e.g tagging models with segment identifiers and weights to be used for their combination in these ways. ```yaml kind: InferenceGraph metadata:   name: ensemble spec:   nodes:     root:       routerType: Sequence       routes:       - service: feast       - nodeName: ensembleModel         data: $response     ensembleModel:       routerType: Ensemble       routes:       - service: sklearn-model       - service: xgboost-model ```  Scoring a case using a sequence, or chain of models allows the output of one model to be passed in as input to the subsequent models. ```yaml kind: InferenceGraph metadata:   name: model-chainer spec:   nodes:     root:       routerType: Sequence       routes:       - service: mymodel-s1       - service: mymodel-s2         data: $response       - service: mymodel-s3         data: $response ```  In the flow described below, the pre_processing node base64 encodes the image and passes it to two model nodes in the flow. The encoded data is available to both these nodes for classification. The second node i.e. dog-breed-classification takes the original input from the pre_processing node along-with the response from the cat-dog-classification node to do further classification of the dog breed if required. ```yaml kind: InferenceGraph metadata:   name: dog-breed-classification spec:   nodes:     root:       routerType: Sequence       routes:       - service: cat-dog-classifier       - nodeName: breed-classifier         data: $request     breed-classifier:       routerType: Switch       routes:       - service: dog-breed-classifier         condition: { .predictions.class == \"dog\" }       - service: cat-breed-classifier         condition: { .predictions.class == \"cat\" } ```
+## Properties
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**router_type** | **str** | RouterType  - &#x60;Sequence:&#x60; chain multiple inference steps with input/output from previous step  - &#x60;Splitter:&#x60; randomly routes to the target service according to the weight  - &#x60;Ensemble:&#x60; routes the request to multiple models and then merge the responses  - &#x60;Switch:&#x60; routes the request to one of the steps based on condition | [default to '']
+**steps** | [**list[V1alpha1InferenceStep]**](V1alpha1InferenceStep.md) | Steps defines destinations for the current router node | [optional] 
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/python/kserve/docs/V1alpha1InferenceStep.md b/python/kserve/docs/V1alpha1InferenceStep.md
new file mode 100644
index 00000000000..1d9853df264
--- /dev/null
+++ b/python/kserve/docs/V1alpha1InferenceStep.md
@@ -0,0 +1,17 @@
+# V1alpha1InferenceStep
+
+InferenceStep defines the inference target of the current step with condition, weights and data.
+## Properties
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**condition** | **str** | routing based on the condition | [optional] 
+**data** | **str** | request data sent to the next route with input/output from the previous step $request $response.predictions | [optional] 
+**name** | **str** | Unique name for the step within this node | [optional] 
+**node_name** | **str** | The node name for routing as next step | [optional] 
+**service_name** | **str** | named reference for InferenceService | [optional] 
+**service_url** | **str** | InferenceService URL, mutually exclusive with ServiceName | [optional] 
+**weight** | **int** | the weight for split of the traffic, only used for Split Router when weight is specified all the routing targets should be sum to 100 | [optional] 
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/python/kserve/docs/V1alpha1InferenceTarget.md b/python/kserve/docs/V1alpha1InferenceTarget.md
new file mode 100644
index 00000000000..137ca25b747
--- /dev/null
+++ b/python/kserve/docs/V1alpha1InferenceTarget.md
@@ -0,0 +1,13 @@
+# V1alpha1InferenceTarget
+
+Exactly one InferenceTarget field must be specified
+## Properties
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**node_name** | **str** | The node name for routing as next step | [optional] 
+**service_name** | **str** | named reference for InferenceService | [optional] 
+**service_url** | **str** | InferenceService URL, mutually exclusive with ServiceName | [optional] 
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/python/kserve/docs/V1beta1ComponentExtensionSpec.md b/python/kserve/docs/V1beta1ComponentExtensionSpec.md
index 569eb94d6b9..561f058da56 100644
--- a/python/kserve/docs/V1beta1ComponentExtensionSpec.md
+++ b/python/kserve/docs/V1beta1ComponentExtensionSpec.md
@@ -10,8 +10,8 @@ Name | Type | Description | Notes
 **logger** | [**V1beta1LoggerSpec**](V1beta1LoggerSpec.md) |  | [optional] 
 **max_replicas** | **int** | Maximum number of replicas for autoscaling. | [optional] 
 **min_replicas** | **int** | Minimum number of replicas, defaults to 1 but can be set to 0 to enable scale-to-zero. | [optional] 
-**scale_metric** | **str** | ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/). | [optional] 
-**scale_target** | **int** | ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). | [optional] 
+**scale_metric** | **str** | ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics). | [optional] 
+**scale_target** | **int** | ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). | [optional] 
 **timeout** | **int** | TimeoutSeconds specifies the number of seconds to wait before timing out a request to the component. | [optional] 
 
 [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
diff --git a/python/kserve/docs/V1beta1ExplainerSpec.md b/python/kserve/docs/V1beta1ExplainerSpec.md
index 54c38e1dc94..47f608906c6 100644
--- a/python/kserve/docs/V1beta1ExplainerSpec.md
+++ b/python/kserve/docs/V1beta1ExplainerSpec.md
@@ -37,8 +37,8 @@ Name | Type | Description | Notes
 **readiness_gates** | [**list[V1PodReadinessGate]**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodReadinessGate.md) | If specified, all readiness gates will be evaluated for pod readiness. A pod is ready when all its containers are ready AND all conditions specified in the readiness gates have status equal to \&quot;True\&quot; More info: https://git.k8s.io/enhancements/keps/sig-network/0007-pod-ready%2B%2B.md | [optional] 
 **restart_policy** | **str** | Restart policy for all containers within the pod. One of Always, OnFailure, Never. Default to Always. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy | [optional] 
 **runtime_class_name** | **str** | RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used to run this pod.  If no RuntimeClass resource matches the named class, the pod will not be run. If unset or empty, the \&quot;legacy\&quot; RuntimeClass will be used, which is an implicit class with an empty definition that uses the default runtime handler. More info: https://git.k8s.io/enhancements/keps/sig-node/runtime-class.md This is a beta feature as of Kubernetes v1.14. | [optional] 
-**scale_metric** | **str** | ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/). | [optional] 
-**scale_target** | **int** | ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). | [optional] 
+**scale_metric** | **str** | ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics). | [optional] 
+**scale_target** | **int** | ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). | [optional] 
 **scheduler_name** | **str** | If specified, the pod will be dispatched by specified scheduler. If not specified, the pod will be dispatched by default scheduler. | [optional] 
 **security_context** | [**V1PodSecurityContext**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodSecurityContext.md) |  | [optional] 
 **service_account** | **str** | DeprecatedServiceAccount is a depreciated alias for ServiceAccountName. Deprecated: Use serviceAccountName instead. | [optional] 
diff --git a/python/kserve/docs/V1beta1IngressConfig.md b/python/kserve/docs/V1beta1IngressConfig.md
index f9510222d83..fd9cb2fb0f9 100644
--- a/python/kserve/docs/V1beta1IngressConfig.md
+++ b/python/kserve/docs/V1beta1IngressConfig.md
@@ -10,6 +10,7 @@ Name | Type | Description | Notes
 **ingress_service** | **str** |  | [optional] 
 **local_gateway** | **str** |  | [optional] 
 **local_gateway_service** | **str** |  | [optional] 
+**url_scheme** | **str** |  | [optional] 
 
 [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
 
diff --git a/python/kserve/docs/V1beta1PredictorSpec.md b/python/kserve/docs/V1beta1PredictorSpec.md
index 3362362b56f..d7478f1b0e8 100644
--- a/python/kserve/docs/V1beta1PredictorSpec.md
+++ b/python/kserve/docs/V1beta1PredictorSpec.md
@@ -40,8 +40,8 @@ Name | Type | Description | Notes
 **readiness_gates** | [**list[V1PodReadinessGate]**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodReadinessGate.md) | If specified, all readiness gates will be evaluated for pod readiness. A pod is ready when all its containers are ready AND all conditions specified in the readiness gates have status equal to \&quot;True\&quot; More info: https://git.k8s.io/enhancements/keps/sig-network/0007-pod-ready%2B%2B.md | [optional] 
 **restart_policy** | **str** | Restart policy for all containers within the pod. One of Always, OnFailure, Never. Default to Always. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy | [optional] 
 **runtime_class_name** | **str** | RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used to run this pod.  If no RuntimeClass resource matches the named class, the pod will not be run. If unset or empty, the \&quot;legacy\&quot; RuntimeClass will be used, which is an implicit class with an empty definition that uses the default runtime handler. More info: https://git.k8s.io/enhancements/keps/sig-node/runtime-class.md This is a beta feature as of Kubernetes v1.14. | [optional] 
-**scale_metric** | **str** | ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/). | [optional] 
-**scale_target** | **int** | ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). | [optional] 
+**scale_metric** | **str** | ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics). | [optional] 
+**scale_target** | **int** | ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). | [optional] 
 **scheduler_name** | **str** | If specified, the pod will be dispatched by specified scheduler. If not specified, the pod will be dispatched by default scheduler. | [optional] 
 **security_context** | [**V1PodSecurityContext**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodSecurityContext.md) |  | [optional] 
 **service_account** | **str** | DeprecatedServiceAccount is a depreciated alias for ServiceAccountName. Deprecated: Use serviceAccountName instead. | [optional] 
diff --git a/python/kserve/docs/V1beta1TransformerSpec.md b/python/kserve/docs/V1beta1TransformerSpec.md
index 080d67e026a..7bc67e12d58 100644
--- a/python/kserve/docs/V1beta1TransformerSpec.md
+++ b/python/kserve/docs/V1beta1TransformerSpec.md
@@ -34,8 +34,8 @@ Name | Type | Description | Notes
 **readiness_gates** | [**list[V1PodReadinessGate]**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodReadinessGate.md) | If specified, all readiness gates will be evaluated for pod readiness. A pod is ready when all its containers are ready AND all conditions specified in the readiness gates have status equal to \&quot;True\&quot; More info: https://git.k8s.io/enhancements/keps/sig-network/0007-pod-ready%2B%2B.md | [optional] 
 **restart_policy** | **str** | Restart policy for all containers within the pod. One of Always, OnFailure, Never. Default to Always. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy | [optional] 
 **runtime_class_name** | **str** | RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used to run this pod.  If no RuntimeClass resource matches the named class, the pod will not be run. If unset or empty, the \&quot;legacy\&quot; RuntimeClass will be used, which is an implicit class with an empty definition that uses the default runtime handler. More info: https://git.k8s.io/enhancements/keps/sig-node/runtime-class.md This is a beta feature as of Kubernetes v1.14. | [optional] 
-**scale_metric** | **str** | ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/). | [optional] 
-**scale_target** | **int** | ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). | [optional] 
+**scale_metric** | **str** | ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics). | [optional] 
+**scale_target** | **int** | ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/). | [optional] 
 **scheduler_name** | **str** | If specified, the pod will be dispatched by specified scheduler. If not specified, the pod will be dispatched by default scheduler. | [optional] 
 **security_context** | [**V1PodSecurityContext**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodSecurityContext.md) |  | [optional] 
 **service_account** | **str** | DeprecatedServiceAccount is a depreciated alias for ServiceAccountName. Deprecated: Use serviceAccountName instead. | [optional] 
diff --git a/python/kserve/kserve/__init__.py b/python/kserve/kserve/__init__.py
index 70d556b7403..32775a46d3f 100644
--- a/python/kserve/kserve/__init__.py
+++ b/python/kserve/kserve/__init__.py
@@ -39,6 +39,13 @@
 from kserve.models.v1alpha1_cluster_serving_runtime import V1alpha1ClusterServingRuntime
 from kserve.models.v1alpha1_cluster_serving_runtime_list import V1alpha1ClusterServingRuntimeList
 from kserve.models.v1alpha1_container import V1alpha1Container
+from kserve.models.v1alpha1_inference_graph import V1alpha1InferenceGraph
+from kserve.models.v1alpha1_inference_graph_list import V1alpha1InferenceGraphList
+from kserve.models.v1alpha1_inference_graph_spec import V1alpha1InferenceGraphSpec
+from kserve.models.v1alpha1_inference_graph_status import V1alpha1InferenceGraphStatus
+from kserve.models.v1alpha1_inference_router import V1alpha1InferenceRouter
+from kserve.models.v1alpha1_inference_step import V1alpha1InferenceStep
+from kserve.models.v1alpha1_inference_target import V1alpha1InferenceTarget
 from kserve.models.v1alpha1_model_spec import V1alpha1ModelSpec
 from kserve.models.v1alpha1_serving_runtime import V1alpha1ServingRuntime
 from kserve.models.v1alpha1_serving_runtime_list import V1alpha1ServingRuntimeList
diff --git a/python/kserve/kserve/api/kserve_client.py b/python/kserve/kserve/api/kserve_client.py
index e8cd18c61a8..64f6527afaf 100644
--- a/python/kserve/kserve/api/kserve_client.py
+++ b/python/kserve/kserve/api/kserve_client.py
@@ -13,14 +13,16 @@
 # limitations under the License.
 
 import time
+from urllib.parse import urlparse
+
 import requests
 from kubernetes import client, config
-from urllib.parse import urlparse
 
-from ..constants import constants
-from ..utils import utils
 from .creds_utils import set_gcs_credentials, set_s3_credentials, set_azure_credentials
 from .watch import isvc_watch
+from ..constants import constants
+from ..models import V1alpha1InferenceGraph
+from ..utils import utils
 
 
 class KServeClient(object):
@@ -414,3 +416,125 @@ def wait_model_ready(self, service_name, model_name, isvc_namespace=None,  # pyl
 
         raise RuntimeError(f"InferenceService ({service_name}) has not loaded the \
                             model ({model_name}) before the timeout.")
+
+    def create_inference_graph(self, inferencegraph: V1alpha1InferenceGraph, namespace: str = None) -> object:
+        """
+        create a inference graph
+
+        :param inferencegraph: inference graph object
+        :param namespace: defaults to current or default namespace
+        :return: created inference graph
+        """
+        version = inferencegraph.api_version.split("/")[1]
+        if namespace is None:
+            namespace = utils.set_ig_namespace(inferencegraph)
+
+        try:
+            outputs = self.api_instance.create_namespaced_custom_object(
+                constants.KSERVE_GROUP,
+                version,
+                namespace,
+                constants.KSERVE_PLURAL_INFERENCEGRAPH,
+                inferencegraph
+            )
+        except client.rest.ApiException as e:
+            raise RuntimeError(
+                "Exception when calling CustomObjectsApi->create_namespaced_custom_object:\
+                 %s\n"
+                % e
+            )
+        return outputs
+
+    def delete_inference_graph(self, name: str, namespace: str = None,
+                               version: str = constants.KSERVE_V1ALPHA1_VERSION):
+        """
+        Delete the inference graph
+
+        :param name: inference graph name
+        :param namespace: defaults to current or default namespace
+        :param version: api group version
+        """
+        if namespace is None:
+            namespace = utils.get_default_target_namespace()
+
+        try:
+            self.api_instance.delete_namespaced_custom_object(
+                constants.KSERVE_GROUP,
+                version,
+                namespace,
+                constants.KSERVE_PLURAL_INFERENCEGRAPH,
+                name,
+            )
+        except client.rest.ApiException as e:
+            raise RuntimeError(
+                "Exception when calling CustomObjectsApi->create_namespaced_custom_object:\
+                 %s\n"
+                % e
+            )
+
+    def get_inference_graph(self, name: str, namespace: str = None,
+                            version: str = constants.KSERVE_V1ALPHA1_VERSION) -> object:
+        """
+        Get the inference graph
+
+        :param name: existing inference graph name
+        :param namespace: defaults to current or default namespace
+        :param version: api group version
+        :return: inference graph
+        """
+
+        if namespace is None:
+            namespace = utils.get_default_target_namespace()
+
+        try:
+            return self.api_instance.get_namespaced_custom_object(
+                constants.KSERVE_GROUP,
+                version,
+                namespace,
+                constants.KSERVE_PLURAL_INFERENCEGRAPH,
+                name)
+        except client.rest.ApiException as e:
+            raise RuntimeError(
+                "Exception when calling CustomObjectsApi->get_namespaced_custom_object:\
+                 %s\n" % e)
+
+    def is_ig_ready(self, name: str, namespace: str = None, version: str = constants.KSERVE_V1ALPHA1_VERSION) -> bool:
+        """
+        Check if the inference graph is ready.
+
+        :param name: inference graph name
+        :param namespace: defaults to current or default namespace
+        :param version: api group version
+        :return: true if inference graph is ready, else false.
+        """
+        if namespace is None:
+            namespace = utils.get_default_target_namespace()
+
+        ig: dict = self.get_inference_graph(name, namespace=namespace, version=version)
+        for condition in ig.get('status', {}).get('conditions', {}):
+            if condition.get('type', '') == 'Ready':
+                status = condition.get('status', 'Unknown')
+                return status.lower() == "true"
+        return False
+
+    def wait_ig_ready(self, name: str, namespace: str = None, version: str = constants.KSERVE_V1ALPHA1_VERSION,
+                      timeout_seconds: int = 600,
+                      polling_interval: int = 10):
+        """
+        Wait for inference graph to be ready until timeout. Print out the inference graph if timeout.
+
+        :param name: inference graph name
+        :param namespace: defaults to current or default namespace
+        :param version: api group version
+        :param timeout_seconds: timeout seconds for waiting, default to 600s.
+        :param polling_interval: The time interval to poll status
+        :return:
+        """
+        for _ in range(round(timeout_seconds / polling_interval)):
+            time.sleep(polling_interval)
+            if self.is_ig_ready(name, namespace, version):
+                return
+
+        current_ig = self.get_inference_graph(name, namespace=namespace, version=version)
+        raise RuntimeError("Timeout to start the InferenceGraph {}. \
+                            The InferenceGraph is as following: {}".format(name, current_ig))
diff --git a/python/kserve/kserve/constants/constants.py b/python/kserve/kserve/constants/constants.py
index ba818efb33e..8ed4ee25e9d 100644
--- a/python/kserve/kserve/constants/constants.py
+++ b/python/kserve/kserve/constants/constants.py
@@ -20,6 +20,8 @@
 KSERVE_PLURAL = 'inferenceservices'
 KSERVE_KIND_TRAINEDMODEL = 'TrainedModel'
 KSERVE_PLURAL_TRAINEDMODEL = 'trainedmodels'
+KSERVE_KIND_INFERENCEGRAPH = 'InferenceGraph'
+KSERVE_PLURAL_INFERENCEGRAPH = 'inferencegraphs'
 KSERVE_V1BETA1_VERSION = 'v1beta1'
 KSERVE_V1ALPHA1_VERSION = "v1alpha1"
 
diff --git a/python/kserve/kserve/models/__init__.py b/python/kserve/kserve/models/__init__.py
index e75ed572bf4..4bba6e173c5 100644
--- a/python/kserve/kserve/models/__init__.py
+++ b/python/kserve/kserve/models/__init__.py
@@ -31,6 +31,13 @@
 from kserve.models.v1alpha1_built_in_adapter import V1alpha1BuiltInAdapter
 from kserve.models.v1alpha1_cluster_serving_runtime import V1alpha1ClusterServingRuntime
 from kserve.models.v1alpha1_cluster_serving_runtime_list import V1alpha1ClusterServingRuntimeList
+from kserve.models.v1alpha1_inference_graph import V1alpha1InferenceGraph
+from kserve.models.v1alpha1_inference_graph_list import V1alpha1InferenceGraphList
+from kserve.models.v1alpha1_inference_graph_spec import V1alpha1InferenceGraphSpec
+from kserve.models.v1alpha1_inference_graph_status import V1alpha1InferenceGraphStatus
+from kserve.models.v1alpha1_inference_router import V1alpha1InferenceRouter
+from kserve.models.v1alpha1_inference_step import V1alpha1InferenceStep
+from kserve.models.v1alpha1_inference_target import V1alpha1InferenceTarget
 from kserve.models.v1alpha1_model_spec import V1alpha1ModelSpec
 from kserve.models.v1alpha1_serving_runtime import V1alpha1ServingRuntime
 from kserve.models.v1alpha1_serving_runtime_list import V1alpha1ServingRuntimeList
diff --git a/python/kserve/kserve/models/v1alpha1_inference_graph.py b/python/kserve/kserve/models/v1alpha1_inference_graph.py
new file mode 100644
index 00000000000..9e3903dff61
--- /dev/null
+++ b/python/kserve/kserve/models/v1alpha1_inference_graph.py
@@ -0,0 +1,242 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from kserve.configuration import Configuration
+
+
+class V1alpha1InferenceGraph(object):
+    """NOTE: This class is auto generated by OpenAPI Generator.
+    Ref: https://openapi-generator.tech
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      openapi_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    openapi_types = {
+        'api_version': 'str',
+        'kind': 'str',
+        'metadata': 'V1ObjectMeta',
+        'spec': 'V1alpha1InferenceGraphSpec',
+        'status': 'V1alpha1InferenceGraphStatus'
+    }
+
+    attribute_map = {
+        'api_version': 'apiVersion',
+        'kind': 'kind',
+        'metadata': 'metadata',
+        'spec': 'spec',
+        'status': 'status'
+    }
+
+    def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None):  # noqa: E501
+        """V1alpha1InferenceGraph - a model defined in OpenAPI"""  # noqa: E501
+        if local_vars_configuration is None:
+            local_vars_configuration = Configuration()
+        self.local_vars_configuration = local_vars_configuration
+
+        self._api_version = None
+        self._kind = None
+        self._metadata = None
+        self._spec = None
+        self._status = None
+        self.discriminator = None
+
+        if api_version is not None:
+            self.api_version = api_version
+        if kind is not None:
+            self.kind = kind
+        if metadata is not None:
+            self.metadata = metadata
+        if spec is not None:
+            self.spec = spec
+        if status is not None:
+            self.status = status
+
+    @property
+    def api_version(self):
+        """Gets the api_version of this V1alpha1InferenceGraph.  # noqa: E501
+
+        APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources  # noqa: E501
+
+        :return: The api_version of this V1alpha1InferenceGraph.  # noqa: E501
+        :rtype: str
+        """
+        return self._api_version
+
+    @api_version.setter
+    def api_version(self, api_version):
+        """Sets the api_version of this V1alpha1InferenceGraph.
+
+        APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources  # noqa: E501
+
+        :param api_version: The api_version of this V1alpha1InferenceGraph.  # noqa: E501
+        :type: str
+        """
+
+        self._api_version = api_version
+
+    @property
+    def kind(self):
+        """Gets the kind of this V1alpha1InferenceGraph.  # noqa: E501
+
+        Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds  # noqa: E501
+
+        :return: The kind of this V1alpha1InferenceGraph.  # noqa: E501
+        :rtype: str
+        """
+        return self._kind
+
+    @kind.setter
+    def kind(self, kind):
+        """Sets the kind of this V1alpha1InferenceGraph.
+
+        Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds  # noqa: E501
+
+        :param kind: The kind of this V1alpha1InferenceGraph.  # noqa: E501
+        :type: str
+        """
+
+        self._kind = kind
+
+    @property
+    def metadata(self):
+        """Gets the metadata of this V1alpha1InferenceGraph.  # noqa: E501
+
+
+        :return: The metadata of this V1alpha1InferenceGraph.  # noqa: E501
+        :rtype: V1ObjectMeta
+        """
+        return self._metadata
+
+    @metadata.setter
+    def metadata(self, metadata):
+        """Sets the metadata of this V1alpha1InferenceGraph.
+
+
+        :param metadata: The metadata of this V1alpha1InferenceGraph.  # noqa: E501
+        :type: V1ObjectMeta
+        """
+
+        self._metadata = metadata
+
+    @property
+    def spec(self):
+        """Gets the spec of this V1alpha1InferenceGraph.  # noqa: E501
+
+
+        :return: The spec of this V1alpha1InferenceGraph.  # noqa: E501
+        :rtype: V1alpha1InferenceGraphSpec
+        """
+        return self._spec
+
+    @spec.setter
+    def spec(self, spec):
+        """Sets the spec of this V1alpha1InferenceGraph.
+
+
+        :param spec: The spec of this V1alpha1InferenceGraph.  # noqa: E501
+        :type: V1alpha1InferenceGraphSpec
+        """
+
+        self._spec = spec
+
+    @property
+    def status(self):
+        """Gets the status of this V1alpha1InferenceGraph.  # noqa: E501
+
+
+        :return: The status of this V1alpha1InferenceGraph.  # noqa: E501
+        :rtype: V1alpha1InferenceGraphStatus
+        """
+        return self._status
+
+    @status.setter
+    def status(self, status):
+        """Sets the status of this V1alpha1InferenceGraph.
+
+
+        :param status: The status of this V1alpha1InferenceGraph.  # noqa: E501
+        :type: V1alpha1InferenceGraphStatus
+        """
+
+        self._status = status
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.openapi_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, V1alpha1InferenceGraph):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, V1alpha1InferenceGraph):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/python/kserve/kserve/models/v1alpha1_inference_graph_list.py b/python/kserve/kserve/models/v1alpha1_inference_graph_list.py
new file mode 100644
index 00000000000..887104ba5b0
--- /dev/null
+++ b/python/kserve/kserve/models/v1alpha1_inference_graph_list.py
@@ -0,0 +1,217 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from kserve.configuration import Configuration
+
+
+class V1alpha1InferenceGraphList(object):
+    """NOTE: This class is auto generated by OpenAPI Generator.
+    Ref: https://openapi-generator.tech
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      openapi_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    openapi_types = {
+        'api_version': 'str',
+        'items': 'list[V1alpha1InferenceGraph]',
+        'kind': 'str',
+        'metadata': 'V1ListMeta'
+    }
+
+    attribute_map = {
+        'api_version': 'apiVersion',
+        'items': 'items',
+        'kind': 'kind',
+        'metadata': 'metadata'
+    }
+
+    def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None):  # noqa: E501
+        """V1alpha1InferenceGraphList - a model defined in OpenAPI"""  # noqa: E501
+        if local_vars_configuration is None:
+            local_vars_configuration = Configuration()
+        self.local_vars_configuration = local_vars_configuration
+
+        self._api_version = None
+        self._items = None
+        self._kind = None
+        self._metadata = None
+        self.discriminator = None
+
+        if api_version is not None:
+            self.api_version = api_version
+        self.items = items
+        if kind is not None:
+            self.kind = kind
+        if metadata is not None:
+            self.metadata = metadata
+
+    @property
+    def api_version(self):
+        """Gets the api_version of this V1alpha1InferenceGraphList.  # noqa: E501
+
+        APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources  # noqa: E501
+
+        :return: The api_version of this V1alpha1InferenceGraphList.  # noqa: E501
+        :rtype: str
+        """
+        return self._api_version
+
+    @api_version.setter
+    def api_version(self, api_version):
+        """Sets the api_version of this V1alpha1InferenceGraphList.
+
+        APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources  # noqa: E501
+
+        :param api_version: The api_version of this V1alpha1InferenceGraphList.  # noqa: E501
+        :type: str
+        """
+
+        self._api_version = api_version
+
+    @property
+    def items(self):
+        """Gets the items of this V1alpha1InferenceGraphList.  # noqa: E501
+
+
+        :return: The items of this V1alpha1InferenceGraphList.  # noqa: E501
+        :rtype: list[V1alpha1InferenceGraph]
+        """
+        return self._items
+
+    @items.setter
+    def items(self, items):
+        """Sets the items of this V1alpha1InferenceGraphList.
+
+
+        :param items: The items of this V1alpha1InferenceGraphList.  # noqa: E501
+        :type: list[V1alpha1InferenceGraph]
+        """
+        if self.local_vars_configuration.client_side_validation and items is None:  # noqa: E501
+            raise ValueError("Invalid value for `items`, must not be `None`")  # noqa: E501
+
+        self._items = items
+
+    @property
+    def kind(self):
+        """Gets the kind of this V1alpha1InferenceGraphList.  # noqa: E501
+
+        Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds  # noqa: E501
+
+        :return: The kind of this V1alpha1InferenceGraphList.  # noqa: E501
+        :rtype: str
+        """
+        return self._kind
+
+    @kind.setter
+    def kind(self, kind):
+        """Sets the kind of this V1alpha1InferenceGraphList.
+
+        Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds  # noqa: E501
+
+        :param kind: The kind of this V1alpha1InferenceGraphList.  # noqa: E501
+        :type: str
+        """
+
+        self._kind = kind
+
+    @property
+    def metadata(self):
+        """Gets the metadata of this V1alpha1InferenceGraphList.  # noqa: E501
+
+
+        :return: The metadata of this V1alpha1InferenceGraphList.  # noqa: E501
+        :rtype: V1ListMeta
+        """
+        return self._metadata
+
+    @metadata.setter
+    def metadata(self, metadata):
+        """Sets the metadata of this V1alpha1InferenceGraphList.
+
+
+        :param metadata: The metadata of this V1alpha1InferenceGraphList.  # noqa: E501
+        :type: V1ListMeta
+        """
+
+        self._metadata = metadata
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.openapi_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, V1alpha1InferenceGraphList):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, V1alpha1InferenceGraphList):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/python/kserve/kserve/models/v1alpha1_inference_graph_spec.py b/python/kserve/kserve/models/v1alpha1_inference_graph_spec.py
new file mode 100644
index 00000000000..4555dcc2bbb
--- /dev/null
+++ b/python/kserve/kserve/models/v1alpha1_inference_graph_spec.py
@@ -0,0 +1,137 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from kserve.configuration import Configuration
+
+
+class V1alpha1InferenceGraphSpec(object):
+    """NOTE: This class is auto generated by OpenAPI Generator.
+    Ref: https://openapi-generator.tech
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      openapi_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    openapi_types = {
+        'nodes': 'dict(str, V1alpha1InferenceRouter)'
+    }
+
+    attribute_map = {
+        'nodes': 'nodes'
+    }
+
+    def __init__(self, nodes=None, local_vars_configuration=None):  # noqa: E501
+        """V1alpha1InferenceGraphSpec - a model defined in OpenAPI"""  # noqa: E501
+        if local_vars_configuration is None:
+            local_vars_configuration = Configuration()
+        self.local_vars_configuration = local_vars_configuration
+
+        self._nodes = None
+        self.discriminator = None
+
+        self.nodes = nodes
+
+    @property
+    def nodes(self):
+        """Gets the nodes of this V1alpha1InferenceGraphSpec.  # noqa: E501
+
+        Map of InferenceGraph router nodes Each node defines the router which can be different routing types  # noqa: E501
+
+        :return: The nodes of this V1alpha1InferenceGraphSpec.  # noqa: E501
+        :rtype: dict(str, V1alpha1InferenceRouter)
+        """
+        return self._nodes
+
+    @nodes.setter
+    def nodes(self, nodes):
+        """Sets the nodes of this V1alpha1InferenceGraphSpec.
+
+        Map of InferenceGraph router nodes Each node defines the router which can be different routing types  # noqa: E501
+
+        :param nodes: The nodes of this V1alpha1InferenceGraphSpec.  # noqa: E501
+        :type: dict(str, V1alpha1InferenceRouter)
+        """
+        if self.local_vars_configuration.client_side_validation and nodes is None:  # noqa: E501
+            raise ValueError("Invalid value for `nodes`, must not be `None`")  # noqa: E501
+
+        self._nodes = nodes
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.openapi_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, V1alpha1InferenceGraphSpec):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, V1alpha1InferenceGraphSpec):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/python/kserve/kserve/models/v1alpha1_inference_graph_status.py b/python/kserve/kserve/models/v1alpha1_inference_graph_status.py
new file mode 100644
index 00000000000..83ba63d9556
--- /dev/null
+++ b/python/kserve/kserve/models/v1alpha1_inference_graph_status.py
@@ -0,0 +1,218 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from kserve.configuration import Configuration
+
+
+class V1alpha1InferenceGraphStatus(object):
+    """NOTE: This class is auto generated by OpenAPI Generator.
+    Ref: https://openapi-generator.tech
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      openapi_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    openapi_types = {
+        'annotations': 'dict(str, str)',
+        'conditions': 'list[KnativeCondition]',
+        'observed_generation': 'int',
+        'url': 'KnativeURL'
+    }
+
+    attribute_map = {
+        'annotations': 'annotations',
+        'conditions': 'conditions',
+        'observed_generation': 'observedGeneration',
+        'url': 'url'
+    }
+
+    def __init__(self, annotations=None, conditions=None, observed_generation=None, url=None, local_vars_configuration=None):  # noqa: E501
+        """V1alpha1InferenceGraphStatus - a model defined in OpenAPI"""  # noqa: E501
+        if local_vars_configuration is None:
+            local_vars_configuration = Configuration()
+        self.local_vars_configuration = local_vars_configuration
+
+        self._annotations = None
+        self._conditions = None
+        self._observed_generation = None
+        self._url = None
+        self.discriminator = None
+
+        if annotations is not None:
+            self.annotations = annotations
+        if conditions is not None:
+            self.conditions = conditions
+        if observed_generation is not None:
+            self.observed_generation = observed_generation
+        if url is not None:
+            self.url = url
+
+    @property
+    def annotations(self):
+        """Gets the annotations of this V1alpha1InferenceGraphStatus.  # noqa: E501
+
+        Annotations is additional Status fields for the Resource to save some additional State as well as convey more information to the user. This is roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards.  # noqa: E501
+
+        :return: The annotations of this V1alpha1InferenceGraphStatus.  # noqa: E501
+        :rtype: dict(str, str)
+        """
+        return self._annotations
+
+    @annotations.setter
+    def annotations(self, annotations):
+        """Sets the annotations of this V1alpha1InferenceGraphStatus.
+
+        Annotations is additional Status fields for the Resource to save some additional State as well as convey more information to the user. This is roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards.  # noqa: E501
+
+        :param annotations: The annotations of this V1alpha1InferenceGraphStatus.  # noqa: E501
+        :type: dict(str, str)
+        """
+
+        self._annotations = annotations
+
+    @property
+    def conditions(self):
+        """Gets the conditions of this V1alpha1InferenceGraphStatus.  # noqa: E501
+
+        Conditions the latest available observations of a resource's current state.  # noqa: E501
+
+        :return: The conditions of this V1alpha1InferenceGraphStatus.  # noqa: E501
+        :rtype: list[KnativeCondition]
+        """
+        return self._conditions
+
+    @conditions.setter
+    def conditions(self, conditions):
+        """Sets the conditions of this V1alpha1InferenceGraphStatus.
+
+        Conditions the latest available observations of a resource's current state.  # noqa: E501
+
+        :param conditions: The conditions of this V1alpha1InferenceGraphStatus.  # noqa: E501
+        :type: list[KnativeCondition]
+        """
+
+        self._conditions = conditions
+
+    @property
+    def observed_generation(self):
+        """Gets the observed_generation of this V1alpha1InferenceGraphStatus.  # noqa: E501
+
+        ObservedGeneration is the 'Generation' of the Service that was last processed by the controller.  # noqa: E501
+
+        :return: The observed_generation of this V1alpha1InferenceGraphStatus.  # noqa: E501
+        :rtype: int
+        """
+        return self._observed_generation
+
+    @observed_generation.setter
+    def observed_generation(self, observed_generation):
+        """Sets the observed_generation of this V1alpha1InferenceGraphStatus.
+
+        ObservedGeneration is the 'Generation' of the Service that was last processed by the controller.  # noqa: E501
+
+        :param observed_generation: The observed_generation of this V1alpha1InferenceGraphStatus.  # noqa: E501
+        :type: int
+        """
+
+        self._observed_generation = observed_generation
+
+    @property
+    def url(self):
+        """Gets the url of this V1alpha1InferenceGraphStatus.  # noqa: E501
+
+
+        :return: The url of this V1alpha1InferenceGraphStatus.  # noqa: E501
+        :rtype: KnativeURL
+        """
+        return self._url
+
+    @url.setter
+    def url(self, url):
+        """Sets the url of this V1alpha1InferenceGraphStatus.
+
+
+        :param url: The url of this V1alpha1InferenceGraphStatus.  # noqa: E501
+        :type: KnativeURL
+        """
+
+        self._url = url
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.openapi_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, V1alpha1InferenceGraphStatus):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, V1alpha1InferenceGraphStatus):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/python/kserve/kserve/models/v1alpha1_inference_router.py b/python/kserve/kserve/models/v1alpha1_inference_router.py
new file mode 100644
index 00000000000..197603dc963
--- /dev/null
+++ b/python/kserve/kserve/models/v1alpha1_inference_router.py
@@ -0,0 +1,165 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from kserve.configuration import Configuration
+
+
+class V1alpha1InferenceRouter(object):
+    """NOTE: This class is auto generated by OpenAPI Generator.
+    Ref: https://openapi-generator.tech
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      openapi_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    openapi_types = {
+        'router_type': 'str',
+        'steps': 'list[V1alpha1InferenceStep]'
+    }
+
+    attribute_map = {
+        'router_type': 'routerType',
+        'steps': 'steps'
+    }
+
+    def __init__(self, router_type='', steps=None, local_vars_configuration=None):  # noqa: E501
+        """V1alpha1InferenceRouter - a model defined in OpenAPI"""  # noqa: E501
+        if local_vars_configuration is None:
+            local_vars_configuration = Configuration()
+        self.local_vars_configuration = local_vars_configuration
+
+        self._router_type = None
+        self._steps = None
+        self.discriminator = None
+
+        self.router_type = router_type
+        if steps is not None:
+            self.steps = steps
+
+    @property
+    def router_type(self):
+        """Gets the router_type of this V1alpha1InferenceRouter.  # noqa: E501
+
+        RouterType  - `Sequence:` chain multiple inference steps with input/output from previous step  - `Splitter:` randomly routes to the target service according to the weight  - `Ensemble:` routes the request to multiple models and then merge the responses  - `Switch:` routes the request to one of the steps based on condition  # noqa: E501
+
+        :return: The router_type of this V1alpha1InferenceRouter.  # noqa: E501
+        :rtype: str
+        """
+        return self._router_type
+
+    @router_type.setter
+    def router_type(self, router_type):
+        """Sets the router_type of this V1alpha1InferenceRouter.
+
+        RouterType  - `Sequence:` chain multiple inference steps with input/output from previous step  - `Splitter:` randomly routes to the target service according to the weight  - `Ensemble:` routes the request to multiple models and then merge the responses  - `Switch:` routes the request to one of the steps based on condition  # noqa: E501
+
+        :param router_type: The router_type of this V1alpha1InferenceRouter.  # noqa: E501
+        :type: str
+        """
+        if self.local_vars_configuration.client_side_validation and router_type is None:  # noqa: E501
+            raise ValueError("Invalid value for `router_type`, must not be `None`")  # noqa: E501
+
+        self._router_type = router_type
+
+    @property
+    def steps(self):
+        """Gets the steps of this V1alpha1InferenceRouter.  # noqa: E501
+
+        Steps defines destinations for the current router node  # noqa: E501
+
+        :return: The steps of this V1alpha1InferenceRouter.  # noqa: E501
+        :rtype: list[V1alpha1InferenceStep]
+        """
+        return self._steps
+
+    @steps.setter
+    def steps(self, steps):
+        """Sets the steps of this V1alpha1InferenceRouter.
+
+        Steps defines destinations for the current router node  # noqa: E501
+
+        :param steps: The steps of this V1alpha1InferenceRouter.  # noqa: E501
+        :type: list[V1alpha1InferenceStep]
+        """
+
+        self._steps = steps
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.openapi_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, V1alpha1InferenceRouter):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, V1alpha1InferenceRouter):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/python/kserve/kserve/models/v1alpha1_inference_step.py b/python/kserve/kserve/models/v1alpha1_inference_step.py
new file mode 100644
index 00000000000..6fe4a9b61ec
--- /dev/null
+++ b/python/kserve/kserve/models/v1alpha1_inference_step.py
@@ -0,0 +1,304 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from kserve.configuration import Configuration
+
+
+class V1alpha1InferenceStep(object):
+    """NOTE: This class is auto generated by OpenAPI Generator.
+    Ref: https://openapi-generator.tech
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      openapi_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    openapi_types = {
+        'condition': 'str',
+        'data': 'str',
+        'name': 'str',
+        'node_name': 'str',
+        'service_name': 'str',
+        'service_url': 'str',
+        'weight': 'int'
+    }
+
+    attribute_map = {
+        'condition': 'condition',
+        'data': 'data',
+        'name': 'name',
+        'node_name': 'nodeName',
+        'service_name': 'serviceName',
+        'service_url': 'serviceUrl',
+        'weight': 'weight'
+    }
+
+    def __init__(self, condition=None, data=None, name=None, node_name=None, service_name=None, service_url=None, weight=None, local_vars_configuration=None):  # noqa: E501
+        """V1alpha1InferenceStep - a model defined in OpenAPI"""  # noqa: E501
+        if local_vars_configuration is None:
+            local_vars_configuration = Configuration()
+        self.local_vars_configuration = local_vars_configuration
+
+        self._condition = None
+        self._data = None
+        self._name = None
+        self._node_name = None
+        self._service_name = None
+        self._service_url = None
+        self._weight = None
+        self.discriminator = None
+
+        if condition is not None:
+            self.condition = condition
+        if data is not None:
+            self.data = data
+        if name is not None:
+            self.name = name
+        if node_name is not None:
+            self.node_name = node_name
+        if service_name is not None:
+            self.service_name = service_name
+        if service_url is not None:
+            self.service_url = service_url
+        if weight is not None:
+            self.weight = weight
+
+    @property
+    def condition(self):
+        """Gets the condition of this V1alpha1InferenceStep.  # noqa: E501
+
+        routing based on the condition  # noqa: E501
+
+        :return: The condition of this V1alpha1InferenceStep.  # noqa: E501
+        :rtype: str
+        """
+        return self._condition
+
+    @condition.setter
+    def condition(self, condition):
+        """Sets the condition of this V1alpha1InferenceStep.
+
+        routing based on the condition  # noqa: E501
+
+        :param condition: The condition of this V1alpha1InferenceStep.  # noqa: E501
+        :type: str
+        """
+
+        self._condition = condition
+
+    @property
+    def data(self):
+        """Gets the data of this V1alpha1InferenceStep.  # noqa: E501
+
+        request data sent to the next route with input/output from the previous step $request $response.predictions  # noqa: E501
+
+        :return: The data of this V1alpha1InferenceStep.  # noqa: E501
+        :rtype: str
+        """
+        return self._data
+
+    @data.setter
+    def data(self, data):
+        """Sets the data of this V1alpha1InferenceStep.
+
+        request data sent to the next route with input/output from the previous step $request $response.predictions  # noqa: E501
+
+        :param data: The data of this V1alpha1InferenceStep.  # noqa: E501
+        :type: str
+        """
+
+        self._data = data
+
+    @property
+    def name(self):
+        """Gets the name of this V1alpha1InferenceStep.  # noqa: E501
+
+        Unique name for the step within this node  # noqa: E501
+
+        :return: The name of this V1alpha1InferenceStep.  # noqa: E501
+        :rtype: str
+        """
+        return self._name
+
+    @name.setter
+    def name(self, name):
+        """Sets the name of this V1alpha1InferenceStep.
+
+        Unique name for the step within this node  # noqa: E501
+
+        :param name: The name of this V1alpha1InferenceStep.  # noqa: E501
+        :type: str
+        """
+
+        self._name = name
+
+    @property
+    def node_name(self):
+        """Gets the node_name of this V1alpha1InferenceStep.  # noqa: E501
+
+        The node name for routing as next step  # noqa: E501
+
+        :return: The node_name of this V1alpha1InferenceStep.  # noqa: E501
+        :rtype: str
+        """
+        return self._node_name
+
+    @node_name.setter
+    def node_name(self, node_name):
+        """Sets the node_name of this V1alpha1InferenceStep.
+
+        The node name for routing as next step  # noqa: E501
+
+        :param node_name: The node_name of this V1alpha1InferenceStep.  # noqa: E501
+        :type: str
+        """
+
+        self._node_name = node_name
+
+    @property
+    def service_name(self):
+        """Gets the service_name of this V1alpha1InferenceStep.  # noqa: E501
+
+        named reference for InferenceService  # noqa: E501
+
+        :return: The service_name of this V1alpha1InferenceStep.  # noqa: E501
+        :rtype: str
+        """
+        return self._service_name
+
+    @service_name.setter
+    def service_name(self, service_name):
+        """Sets the service_name of this V1alpha1InferenceStep.
+
+        named reference for InferenceService  # noqa: E501
+
+        :param service_name: The service_name of this V1alpha1InferenceStep.  # noqa: E501
+        :type: str
+        """
+
+        self._service_name = service_name
+
+    @property
+    def service_url(self):
+        """Gets the service_url of this V1alpha1InferenceStep.  # noqa: E501
+
+        InferenceService URL, mutually exclusive with ServiceName  # noqa: E501
+
+        :return: The service_url of this V1alpha1InferenceStep.  # noqa: E501
+        :rtype: str
+        """
+        return self._service_url
+
+    @service_url.setter
+    def service_url(self, service_url):
+        """Sets the service_url of this V1alpha1InferenceStep.
+
+        InferenceService URL, mutually exclusive with ServiceName  # noqa: E501
+
+        :param service_url: The service_url of this V1alpha1InferenceStep.  # noqa: E501
+        :type: str
+        """
+
+        self._service_url = service_url
+
+    @property
+    def weight(self):
+        """Gets the weight of this V1alpha1InferenceStep.  # noqa: E501
+
+        the weight for split of the traffic, only used for Split Router when weight is specified all the routing targets should be sum to 100  # noqa: E501
+
+        :return: The weight of this V1alpha1InferenceStep.  # noqa: E501
+        :rtype: int
+        """
+        return self._weight
+
+    @weight.setter
+    def weight(self, weight):
+        """Sets the weight of this V1alpha1InferenceStep.
+
+        the weight for split of the traffic, only used for Split Router when weight is specified all the routing targets should be sum to 100  # noqa: E501
+
+        :param weight: The weight of this V1alpha1InferenceStep.  # noqa: E501
+        :type: int
+        """
+
+        self._weight = weight
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.openapi_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, V1alpha1InferenceStep):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, V1alpha1InferenceStep):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/python/kserve/kserve/models/v1alpha1_inference_target.py b/python/kserve/kserve/models/v1alpha1_inference_target.py
new file mode 100644
index 00000000000..4176ebc8757
--- /dev/null
+++ b/python/kserve/kserve/models/v1alpha1_inference_target.py
@@ -0,0 +1,192 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from kserve.configuration import Configuration
+
+
+class V1alpha1InferenceTarget(object):
+    """NOTE: This class is auto generated by OpenAPI Generator.
+    Ref: https://openapi-generator.tech
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      openapi_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    openapi_types = {
+        'node_name': 'str',
+        'service_name': 'str',
+        'service_url': 'str'
+    }
+
+    attribute_map = {
+        'node_name': 'nodeName',
+        'service_name': 'serviceName',
+        'service_url': 'serviceUrl'
+    }
+
+    def __init__(self, node_name=None, service_name=None, service_url=None, local_vars_configuration=None):  # noqa: E501
+        """V1alpha1InferenceTarget - a model defined in OpenAPI"""  # noqa: E501
+        if local_vars_configuration is None:
+            local_vars_configuration = Configuration()
+        self.local_vars_configuration = local_vars_configuration
+
+        self._node_name = None
+        self._service_name = None
+        self._service_url = None
+        self.discriminator = None
+
+        if node_name is not None:
+            self.node_name = node_name
+        if service_name is not None:
+            self.service_name = service_name
+        if service_url is not None:
+            self.service_url = service_url
+
+    @property
+    def node_name(self):
+        """Gets the node_name of this V1alpha1InferenceTarget.  # noqa: E501
+
+        The node name for routing as next step  # noqa: E501
+
+        :return: The node_name of this V1alpha1InferenceTarget.  # noqa: E501
+        :rtype: str
+        """
+        return self._node_name
+
+    @node_name.setter
+    def node_name(self, node_name):
+        """Sets the node_name of this V1alpha1InferenceTarget.
+
+        The node name for routing as next step  # noqa: E501
+
+        :param node_name: The node_name of this V1alpha1InferenceTarget.  # noqa: E501
+        :type: str
+        """
+
+        self._node_name = node_name
+
+    @property
+    def service_name(self):
+        """Gets the service_name of this V1alpha1InferenceTarget.  # noqa: E501
+
+        named reference for InferenceService  # noqa: E501
+
+        :return: The service_name of this V1alpha1InferenceTarget.  # noqa: E501
+        :rtype: str
+        """
+        return self._service_name
+
+    @service_name.setter
+    def service_name(self, service_name):
+        """Sets the service_name of this V1alpha1InferenceTarget.
+
+        named reference for InferenceService  # noqa: E501
+
+        :param service_name: The service_name of this V1alpha1InferenceTarget.  # noqa: E501
+        :type: str
+        """
+
+        self._service_name = service_name
+
+    @property
+    def service_url(self):
+        """Gets the service_url of this V1alpha1InferenceTarget.  # noqa: E501
+
+        InferenceService URL, mutually exclusive with ServiceName  # noqa: E501
+
+        :return: The service_url of this V1alpha1InferenceTarget.  # noqa: E501
+        :rtype: str
+        """
+        return self._service_url
+
+    @service_url.setter
+    def service_url(self, service_url):
+        """Sets the service_url of this V1alpha1InferenceTarget.
+
+        InferenceService URL, mutually exclusive with ServiceName  # noqa: E501
+
+        :param service_url: The service_url of this V1alpha1InferenceTarget.  # noqa: E501
+        :type: str
+        """
+
+        self._service_url = service_url
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.openapi_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, V1alpha1InferenceTarget):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, V1alpha1InferenceTarget):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/python/kserve/kserve/models/v1beta1_component_extension_spec.py b/python/kserve/kserve/models/v1beta1_component_extension_spec.py
index b2d4cdfe1a1..fbb4bf46f76 100644
--- a/python/kserve/kserve/models/v1beta1_component_extension_spec.py
+++ b/python/kserve/kserve/models/v1beta1_component_extension_spec.py
@@ -244,7 +244,7 @@ def min_replicas(self, min_replicas):
     def scale_metric(self):
         """Gets the scale_metric of this V1beta1ComponentExtensionSpec.  # noqa: E501
 
-        ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/).  # noqa: E501
+        ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).  # noqa: E501
 
         :return: The scale_metric of this V1beta1ComponentExtensionSpec.  # noqa: E501
         :rtype: str
@@ -255,7 +255,7 @@ def scale_metric(self):
     def scale_metric(self, scale_metric):
         """Sets the scale_metric of this V1beta1ComponentExtensionSpec.
 
-        ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/).  # noqa: E501
+        ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).  # noqa: E501
 
         :param scale_metric: The scale_metric of this V1beta1ComponentExtensionSpec.  # noqa: E501
         :type: str
@@ -267,7 +267,7 @@ def scale_metric(self, scale_metric):
     def scale_target(self):
         """Gets the scale_target of this V1beta1ComponentExtensionSpec.  # noqa: E501
 
-        ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
+        ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
 
         :return: The scale_target of this V1beta1ComponentExtensionSpec.  # noqa: E501
         :rtype: int
@@ -278,7 +278,7 @@ def scale_target(self):
     def scale_target(self, scale_target):
         """Sets the scale_target of this V1beta1ComponentExtensionSpec.
 
-        ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
+        ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
 
         :param scale_target: The scale_target of this V1beta1ComponentExtensionSpec.  # noqa: E501
         :type: int
diff --git a/python/kserve/kserve/models/v1beta1_explainer_spec.py b/python/kserve/kserve/models/v1beta1_explainer_spec.py
index cf651caf320..79bba715af3 100644
--- a/python/kserve/kserve/models/v1beta1_explainer_spec.py
+++ b/python/kserve/kserve/models/v1beta1_explainer_spec.py
@@ -1045,7 +1045,7 @@ def runtime_class_name(self, runtime_class_name):
     def scale_metric(self):
         """Gets the scale_metric of this V1beta1ExplainerSpec.  # noqa: E501
 
-        ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/).  # noqa: E501
+        ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).  # noqa: E501
 
         :return: The scale_metric of this V1beta1ExplainerSpec.  # noqa: E501
         :rtype: str
@@ -1056,7 +1056,7 @@ def scale_metric(self):
     def scale_metric(self, scale_metric):
         """Sets the scale_metric of this V1beta1ExplainerSpec.
 
-        ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/).  # noqa: E501
+        ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).  # noqa: E501
 
         :param scale_metric: The scale_metric of this V1beta1ExplainerSpec.  # noqa: E501
         :type: str
@@ -1068,7 +1068,7 @@ def scale_metric(self, scale_metric):
     def scale_target(self):
         """Gets the scale_target of this V1beta1ExplainerSpec.  # noqa: E501
 
-        ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
+        ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
 
         :return: The scale_target of this V1beta1ExplainerSpec.  # noqa: E501
         :rtype: int
@@ -1079,7 +1079,7 @@ def scale_target(self):
     def scale_target(self, scale_target):
         """Sets the scale_target of this V1beta1ExplainerSpec.
 
-        ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
+        ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
 
         :param scale_target: The scale_target of this V1beta1ExplainerSpec.  # noqa: E501
         :type: int
diff --git a/python/kserve/kserve/models/v1beta1_ingress_config.py b/python/kserve/kserve/models/v1beta1_ingress_config.py
index 5c89e5828c2..b75155d893e 100644
--- a/python/kserve/kserve/models/v1beta1_ingress_config.py
+++ b/python/kserve/kserve/models/v1beta1_ingress_config.py
@@ -53,7 +53,8 @@ class V1beta1IngressConfig(object):
         'ingress_gateway': 'str',
         'ingress_service': 'str',
         'local_gateway': 'str',
-        'local_gateway_service': 'str'
+        'local_gateway_service': 'str',
+        'url_scheme': 'str'
     }
 
     attribute_map = {
@@ -63,10 +64,11 @@ class V1beta1IngressConfig(object):
         'ingress_gateway': 'ingressGateway',
         'ingress_service': 'ingressService',
         'local_gateway': 'localGateway',
-        'local_gateway_service': 'localGatewayService'
+        'local_gateway_service': 'localGatewayService',
+        'url_scheme': 'urlScheme'
     }
 
-    def __init__(self, domain_template=None, ingress_class_name=None, ingress_domain=None, ingress_gateway=None, ingress_service=None, local_gateway=None, local_gateway_service=None, local_vars_configuration=None):  # noqa: E501
+    def __init__(self, domain_template=None, ingress_class_name=None, ingress_domain=None, ingress_gateway=None, ingress_service=None, local_gateway=None, local_gateway_service=None, url_scheme=None, local_vars_configuration=None):  # noqa: E501
         """V1beta1IngressConfig - a model defined in OpenAPI"""  # noqa: E501
         if local_vars_configuration is None:
             local_vars_configuration = Configuration()
@@ -79,6 +81,7 @@ def __init__(self, domain_template=None, ingress_class_name=None, ingress_domain
         self._ingress_service = None
         self._local_gateway = None
         self._local_gateway_service = None
+        self._url_scheme = None
         self.discriminator = None
 
         if domain_template is not None:
@@ -95,6 +98,8 @@ def __init__(self, domain_template=None, ingress_class_name=None, ingress_domain
             self.local_gateway = local_gateway
         if local_gateway_service is not None:
             self.local_gateway_service = local_gateway_service
+        if url_scheme is not None:
+            self.url_scheme = url_scheme
 
     @property
     def domain_template(self):
@@ -243,6 +248,27 @@ def local_gateway_service(self, local_gateway_service):
 
         self._local_gateway_service = local_gateway_service
 
+    @property
+    def url_scheme(self):
+        """Gets the url_scheme of this V1beta1IngressConfig.  # noqa: E501
+
+
+        :return: The url_scheme of this V1beta1IngressConfig.  # noqa: E501
+        :rtype: str
+        """
+        return self._url_scheme
+
+    @url_scheme.setter
+    def url_scheme(self, url_scheme):
+        """Sets the url_scheme of this V1beta1IngressConfig.
+
+
+        :param url_scheme: The url_scheme of this V1beta1IngressConfig.  # noqa: E501
+        :type: str
+        """
+
+        self._url_scheme = url_scheme
+
     def to_dict(self):
         """Returns the model properties as a dict"""
         result = {}
diff --git a/python/kserve/kserve/models/v1beta1_predictor_spec.py b/python/kserve/kserve/models/v1beta1_predictor_spec.py
index a1ce6ed880f..e7dd1635b2a 100644
--- a/python/kserve/kserve/models/v1beta1_predictor_spec.py
+++ b/python/kserve/kserve/models/v1beta1_predictor_spec.py
@@ -1143,7 +1143,7 @@ def runtime_class_name(self, runtime_class_name):
     def scale_metric(self):
         """Gets the scale_metric of this V1beta1PredictorSpec.  # noqa: E501
 
-        ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/).  # noqa: E501
+        ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).  # noqa: E501
 
         :return: The scale_metric of this V1beta1PredictorSpec.  # noqa: E501
         :rtype: str
@@ -1154,7 +1154,7 @@ def scale_metric(self):
     def scale_metric(self, scale_metric):
         """Sets the scale_metric of this V1beta1PredictorSpec.
 
-        ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/).  # noqa: E501
+        ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).  # noqa: E501
 
         :param scale_metric: The scale_metric of this V1beta1PredictorSpec.  # noqa: E501
         :type: str
@@ -1166,7 +1166,7 @@ def scale_metric(self, scale_metric):
     def scale_target(self):
         """Gets the scale_target of this V1beta1PredictorSpec.  # noqa: E501
 
-        ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
+        ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
 
         :return: The scale_target of this V1beta1PredictorSpec.  # noqa: E501
         :rtype: int
@@ -1177,7 +1177,7 @@ def scale_target(self):
     def scale_target(self, scale_target):
         """Sets the scale_target of this V1beta1PredictorSpec.
 
-        ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
+        ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
 
         :param scale_target: The scale_target of this V1beta1PredictorSpec.  # noqa: E501
         :type: int
diff --git a/python/kserve/kserve/models/v1beta1_transformer_spec.py b/python/kserve/kserve/models/v1beta1_transformer_spec.py
index 7054789f8f8..df6c9582bc0 100644
--- a/python/kserve/kserve/models/v1beta1_transformer_spec.py
+++ b/python/kserve/kserve/models/v1beta1_transformer_spec.py
@@ -967,7 +967,7 @@ def runtime_class_name(self, runtime_class_name):
     def scale_metric(self):
         """Gets the scale_metric of this V1beta1TransformerSpec.  # noqa: E501
 
-        ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/).  # noqa: E501
+        ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).  # noqa: E501
 
         :return: The scale_metric of this V1beta1TransformerSpec.  # noqa: E501
         :rtype: str
@@ -978,7 +978,7 @@ def scale_metric(self):
     def scale_metric(self, scale_metric):
         """Sets the scale_metric of this V1beta1TransformerSpec.
 
-        ScaleMetric specifies scaling metric of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics/).  # noqa: E501
+        ScaleMetric defines the scaling metric type watched by autoscaler possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).  # noqa: E501
 
         :param scale_metric: The scale_metric of this V1beta1TransformerSpec.  # noqa: E501
         :type: str
@@ -990,7 +990,7 @@ def scale_metric(self, scale_metric):
     def scale_target(self):
         """Gets the scale_target of this V1beta1TransformerSpec.  # noqa: E501
 
-        ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
+        ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
 
         :return: The scale_target of this V1beta1TransformerSpec.  # noqa: E501
         :rtype: int
@@ -1001,7 +1001,7 @@ def scale_target(self):
     def scale_target(self, scale_target):
         """Sets the scale_target of this V1beta1TransformerSpec.
 
-        ScaleTarget specifies scaling value of the component concurrency(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
+        ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for. concurrency and rps targets are supported by Knative Pod Autoscaler (https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).  # noqa: E501
 
         :param scale_target: The scale_target of this V1beta1TransformerSpec.  # noqa: E501
         :type: int
diff --git a/python/kserve/kserve/utils/utils.py b/python/kserve/kserve/utils/utils.py
index 26b2d6e6915..49e827b84e9 100644
--- a/python/kserve/kserve/utils/utils.py
+++ b/python/kserve/kserve/utils/utils.py
@@ -14,8 +14,9 @@
 
 import os
 import sys
-import psutil
 from typing import Dict, Union
+
+import psutil
 from cloudevents.http import CloudEvent, to_binary, to_structured
 
 
@@ -40,6 +41,10 @@ def set_isvc_namespace(inferenceservice):
     return namespace
 
 
+def set_ig_namespace(inferencegraph):
+    return inferencegraph.metadata.namespace or get_default_target_namespace()
+
+
 def cpu_count():
     """Get the available CPU count for this system.
     Takes the minimum value from the following locations:
diff --git a/python/kserve/test/test_v1alpha1_inference_graph.py b/python/kserve/test/test_v1alpha1_inference_graph.py
new file mode 100644
index 00000000000..8740d382cd7
--- /dev/null
+++ b/python/kserve/test/test_v1alpha1_inference_graph.py
@@ -0,0 +1,70 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+from __future__ import absolute_import
+
+import unittest
+import datetime
+
+import kserve
+from kserve.models.v1alpha1_inference_graph import V1alpha1InferenceGraph  # noqa: E501
+from kserve.rest import ApiException
+
+class TestV1alpha1InferenceGraph(unittest.TestCase):
+    """V1alpha1InferenceGraph unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional):
+        """Test V1alpha1InferenceGraph
+            include_option is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # model = kserve.models.v1alpha1_inference_graph.V1alpha1InferenceGraph()  # noqa: E501
+        if include_optional :
+            return V1alpha1InferenceGraph(
+                api_version = '0', 
+                kind = '0', 
+                metadata = None, 
+                spec = None, 
+                status = None
+            )
+        else :
+            return V1alpha1InferenceGraph(
+        )
+
+    def testV1alpha1InferenceGraph(self):
+        """Test V1alpha1InferenceGraph"""
+        inst_req_only = self.make_instance(include_optional=False)
+        inst_req_and_optional = self.make_instance(include_optional=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/kserve/test/test_v1alpha1_inference_graph_list.py b/python/kserve/test/test_v1alpha1_inference_graph_list.py
new file mode 100644
index 00000000000..62784e2ec8f
--- /dev/null
+++ b/python/kserve/test/test_v1alpha1_inference_graph_list.py
@@ -0,0 +1,84 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+from __future__ import absolute_import
+
+import unittest
+import datetime
+
+import kserve
+from kserve.models.v1alpha1_inference_graph_list import V1alpha1InferenceGraphList  # noqa: E501
+from kserve.rest import ApiException
+
+class TestV1alpha1InferenceGraphList(unittest.TestCase):
+    """V1alpha1InferenceGraphList unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional):
+        """Test V1alpha1InferenceGraphList
+            include_option is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # model = kserve.models.v1alpha1_inference_graph_list.V1alpha1InferenceGraphList()  # noqa: E501
+        if include_optional :
+            return V1alpha1InferenceGraphList(
+                api_version = '0', 
+                items = [
+                    kserve.models.v1alpha1_inference_graph.V1alpha1InferenceGraph(
+                        api_version = '0', 
+                        kind = '0', 
+                        metadata = None, 
+                        spec = None, 
+                        status = None, )
+                    ], 
+                kind = '0', 
+                metadata = None
+            )
+        else :
+            return V1alpha1InferenceGraphList(
+                items = [
+                    kserve.models.v1alpha1_inference_graph.V1alpha1InferenceGraph(
+                        api_version = '0', 
+                        kind = '0', 
+                        metadata = None, 
+                        spec = None, 
+                        status = None, )
+                    ],
+        )
+
+    def testV1alpha1InferenceGraphList(self):
+        """Test V1alpha1InferenceGraphList"""
+        inst_req_only = self.make_instance(include_optional=False)
+        inst_req_and_optional = self.make_instance(include_optional=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/kserve/test/test_v1alpha1_inference_graph_spec.py b/python/kserve/test/test_v1alpha1_inference_graph_spec.py
new file mode 100644
index 00000000000..2b85bb846e3
--- /dev/null
+++ b/python/kserve/test/test_v1alpha1_inference_graph_spec.py
@@ -0,0 +1,71 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+from __future__ import absolute_import
+
+import unittest
+import datetime
+
+import kserve
+from kserve.models.v1alpha1_inference_graph_spec import V1alpha1InferenceGraphSpec  # noqa: E501
+from kserve.rest import ApiException
+
+class TestV1alpha1InferenceGraphSpec(unittest.TestCase):
+    """V1alpha1InferenceGraphSpec unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional):
+        """Test V1alpha1InferenceGraphSpec
+            include_option is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # model = kserve.models.v1alpha1_inference_graph_spec.V1alpha1InferenceGraphSpec()  # noqa: E501
+        if include_optional :
+            return V1alpha1InferenceGraphSpec(
+                nodes = {
+                    'key' : None
+                    }
+            )
+        else :
+            return V1alpha1InferenceGraphSpec(
+                nodes = {
+                    'key' : None
+                    },
+        )
+
+    def testV1alpha1InferenceGraphSpec(self):
+        """Test V1alpha1InferenceGraphSpec"""
+        inst_req_only = self.make_instance(include_optional=False)
+        inst_req_and_optional = self.make_instance(include_optional=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/kserve/test/test_v1alpha1_inference_graph_status.py b/python/kserve/test/test_v1alpha1_inference_graph_status.py
new file mode 100644
index 00000000000..dc75e13ce68
--- /dev/null
+++ b/python/kserve/test/test_v1alpha1_inference_graph_status.py
@@ -0,0 +1,73 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+from __future__ import absolute_import
+
+import unittest
+import datetime
+
+import kserve
+from kserve.models.v1alpha1_inference_graph_status import V1alpha1InferenceGraphStatus  # noqa: E501
+from kserve.rest import ApiException
+
+class TestV1alpha1InferenceGraphStatus(unittest.TestCase):
+    """V1alpha1InferenceGraphStatus unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional):
+        """Test V1alpha1InferenceGraphStatus
+            include_option is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # model = kserve.models.v1alpha1_inference_graph_status.V1alpha1InferenceGraphStatus()  # noqa: E501
+        if include_optional :
+            return V1alpha1InferenceGraphStatus(
+                annotations = {
+                    'key' : '0'
+                    }, 
+                conditions = [
+                    None
+                    ], 
+                observed_generation = 56, 
+                url = None
+            )
+        else :
+            return V1alpha1InferenceGraphStatus(
+        )
+
+    def testV1alpha1InferenceGraphStatus(self):
+        """Test V1alpha1InferenceGraphStatus"""
+        inst_req_only = self.make_instance(include_optional=False)
+        inst_req_and_optional = self.make_instance(include_optional=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/kserve/test/test_v1alpha1_inference_router.py b/python/kserve/test/test_v1alpha1_inference_router.py
new file mode 100644
index 00000000000..2a0bdadeb65
--- /dev/null
+++ b/python/kserve/test/test_v1alpha1_inference_router.py
@@ -0,0 +1,77 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+from __future__ import absolute_import
+
+import unittest
+import datetime
+
+import kserve
+from kserve.models.v1alpha1_inference_router import V1alpha1InferenceRouter  # noqa: E501
+from kserve.rest import ApiException
+
+class TestV1alpha1InferenceRouter(unittest.TestCase):
+    """V1alpha1InferenceRouter unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional):
+        """Test V1alpha1InferenceRouter
+            include_option is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # model = kserve.models.v1alpha1_inference_router.V1alpha1InferenceRouter()  # noqa: E501
+        if include_optional :
+            return V1alpha1InferenceRouter(
+                router_type = '0', 
+                steps = [
+                    kserve.models.v1alpha1_inference_step.V1alpha1InferenceStep(
+                        condition = '0', 
+                        data = '0', 
+                        name = '0', 
+                        node_name = '0', 
+                        service_name = '0', 
+                        service_url = '0', 
+                        weight = 56, )
+                    ]
+            )
+        else :
+            return V1alpha1InferenceRouter(
+                router_type = '0',
+        )
+
+    def testV1alpha1InferenceRouter(self):
+        """Test V1alpha1InferenceRouter"""
+        inst_req_only = self.make_instance(include_optional=False)
+        inst_req_and_optional = self.make_instance(include_optional=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/kserve/test/test_v1alpha1_inference_step.py b/python/kserve/test/test_v1alpha1_inference_step.py
new file mode 100644
index 00000000000..0ba811e14d7
--- /dev/null
+++ b/python/kserve/test/test_v1alpha1_inference_step.py
@@ -0,0 +1,72 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+from __future__ import absolute_import
+
+import unittest
+import datetime
+
+import kserve
+from kserve.models.v1alpha1_inference_step import V1alpha1InferenceStep  # noqa: E501
+from kserve.rest import ApiException
+
+class TestV1alpha1InferenceStep(unittest.TestCase):
+    """V1alpha1InferenceStep unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional):
+        """Test V1alpha1InferenceStep
+            include_option is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # model = kserve.models.v1alpha1_inference_step.V1alpha1InferenceStep()  # noqa: E501
+        if include_optional :
+            return V1alpha1InferenceStep(
+                condition = '0', 
+                data = '0', 
+                name = '0', 
+                node_name = '0', 
+                service_name = '0', 
+                service_url = '0', 
+                weight = 56
+            )
+        else :
+            return V1alpha1InferenceStep(
+        )
+
+    def testV1alpha1InferenceStep(self):
+        """Test V1alpha1InferenceStep"""
+        inst_req_only = self.make_instance(include_optional=False)
+        inst_req_and_optional = self.make_instance(include_optional=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/kserve/test/test_v1alpha1_inference_target.py b/python/kserve/test/test_v1alpha1_inference_target.py
new file mode 100644
index 00000000000..89c811044ac
--- /dev/null
+++ b/python/kserve/test/test_v1alpha1_inference_target.py
@@ -0,0 +1,68 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+from __future__ import absolute_import
+
+import unittest
+import datetime
+
+import kserve
+from kserve.models.v1alpha1_inference_target import V1alpha1InferenceTarget  # noqa: E501
+from kserve.rest import ApiException
+
+class TestV1alpha1InferenceTarget(unittest.TestCase):
+    """V1alpha1InferenceTarget unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional):
+        """Test V1alpha1InferenceTarget
+            include_option is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # model = kserve.models.v1alpha1_inference_target.V1alpha1InferenceTarget()  # noqa: E501
+        if include_optional :
+            return V1alpha1InferenceTarget(
+                node_name = '0', 
+                service_name = '0', 
+                service_url = '0'
+            )
+        else :
+            return V1alpha1InferenceTarget(
+        )
+
+    def testV1alpha1InferenceTarget(self):
+        """Test V1alpha1InferenceTarget"""
+        inst_req_only = self.make_instance(include_optional=False)
+        inst_req_and_optional = self.make_instance(include_optional=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/kserve/test/test_v1beta1_model_status.py b/python/kserve/test/test_v1beta1_model_status.py
new file mode 100644
index 00000000000..fff063a2d18
--- /dev/null
+++ b/python/kserve/test/test_v1beta1_model_status.py
@@ -0,0 +1,79 @@
+# Copyright 2022 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+
+"""
+    KServe
+
+    Python SDK for KServe  # noqa: E501
+
+    The version of the OpenAPI document: v0.1
+    Generated by: https://openapi-generator.tech
+"""
+
+
+from __future__ import absolute_import
+
+import unittest
+import datetime
+
+import kserve
+from kserve.models.v1beta1_model_status import V1beta1ModelStatus  # noqa: E501
+from kserve.rest import ApiException
+
+class TestV1beta1ModelStatus(unittest.TestCase):
+    """V1beta1ModelStatus unit test stubs"""
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def make_instance(self, include_optional):
+        """Test V1beta1ModelStatus
+            include_option is a boolean, when False only required
+            params are included, when True both required and
+            optional params are included """
+        # model = kserve.models.v1beta1_model_status.V1beta1ModelStatus()  # noqa: E501
+        if include_optional :
+            return V1beta1ModelStatus(
+                copies = kserve.models.v1beta1_model_copies.V1beta1ModelCopies(
+                    failed_copies = 56, 
+                    total_copies = 56, ), 
+                last_failure_info = kserve.models.v1beta1_failure_info.V1beta1FailureInfo(
+                    location = '0', 
+                    message = '0', 
+                    model_revision_name = '0', 
+                    reason = '0', 
+                    time = None, ), 
+                states = kserve.models.v1beta1_model_revision_states.V1beta1ModelRevisionStates(
+                    active_model_state = '0', 
+                    target_model_state = '0', ), 
+                transition_status = '0'
+            )
+        else :
+            return V1beta1ModelStatus(
+                transition_status = '0',
+        )
+
+    def testV1beta1ModelStatus(self):
+        """Test V1beta1ModelStatus"""
+        inst_req_only = self.make_instance(include_optional=False)
+        inst_req_and_optional = self.make_instance(include_optional=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/e2e/common/utils.py b/test/e2e/common/utils.py
index fc1ba45abbd..cb7142273bb 100644
--- a/test/e2e/common/utils.py
+++ b/test/e2e/common/utils.py
@@ -10,17 +10,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import grpc
-import time
-import logging
 import json
-import requests
+import logging
 import os
-from . import inference_pb2_grpc
+import time
 from urllib.parse import urlparse
+
+import grpc
+import requests
 from kubernetes import client
+
 from kserve import KServeClient
 from kserve import constants
+from . import inference_pb2_grpc
 
 logging.basicConfig(level=logging.INFO)
 
@@ -94,6 +96,33 @@ def predict_str(service_name, input_json, protocol_version="v1",
     return preds
 
 
+def predict_ig(ig_name, input_json, protocol_version="v1",
+               version=constants.KSERVE_V1ALPHA1_VERSION):
+    with open(input_json) as json_file:
+        data = json.dumps(json.load(json_file))
+
+        kserve_client = KServeClient(
+            config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
+        ig = kserve_client.get_inference_graph(
+            ig_name,
+            namespace=KSERVE_TEST_NAMESPACE,
+            version=version,
+        )
+
+        cluster_ip = get_cluster_ip()
+        host = urlparse(ig["status"]["url"]).netloc
+        headers = {"Host": host}
+        url = f"http://{cluster_ip}"
+
+        logging.info("Sending Header = %s", headers)
+        logging.info("Sending url = %s", url)
+        logging.info("Sending request data: %s", input_json)
+        response = requests.post(url, data, headers=headers)
+        logging.info("Got response code %s, content %s", response.status_code, response.content)
+        preds = json.loads(response.content.decode("utf-8"))
+        return preds
+
+
 def explain(service_name, input_json):
     return explain_response(service_name, input_json)["data"]["precision"]
 
diff --git a/test/e2e/graph/__init__.py b/test/e2e/graph/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/test/e2e/graph/test_inference_graph.py b/test/e2e/graph/test_inference_graph.py
new file mode 100644
index 00000000000..9d10c5f08eb
--- /dev/null
+++ b/test/e2e/graph/test_inference_graph.py
@@ -0,0 +1,95 @@
+import os
+
+from kubernetes import client
+from kubernetes.client import V1ResourceRequirements
+
+from kserve import V1beta1PredictorSpec, V1beta1SKLearnSpec, V1beta1InferenceServiceSpec, V1beta1InferenceService, \
+    constants, KServeClient, V1alpha1InferenceGraphSpec, V1alpha1InferenceRouter, V1alpha1InferenceGraph, \
+    V1alpha1InferenceStep, V1beta1XGBoostSpec
+from ..common.utils import KSERVE_TEST_NAMESPACE, predict_ig
+
+
+def test_inference_graph():
+    sklearn_name = "isvc-sklearn"
+    xgb_name = "isvc-xgboost"
+    graph_name = "model-chainer"
+
+    sklearn_predictor = V1beta1PredictorSpec(
+        min_replicas=1,
+        sklearn=V1beta1SKLearnSpec(
+            storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
+            resources=V1ResourceRequirements(
+                requests={"cpu": "50m", "memory": "128Mi"},
+                limits={"cpu": "100m", "memory": "256Mi"},
+            ),
+        ),
+    )
+    sklearn_isvc = V1beta1InferenceService(
+        api_version=constants.KSERVE_V1BETA1,
+        kind=constants.KSERVE_KIND,
+        metadata=client.V1ObjectMeta(
+            name=sklearn_name,
+            namespace=KSERVE_TEST_NAMESPACE
+        ),
+        spec=V1beta1InferenceServiceSpec(predictor=sklearn_predictor),
+    )
+
+    xgb_predictor = V1beta1PredictorSpec(
+        min_replicas=1,
+        xgboost=V1beta1XGBoostSpec(
+            storage_uri="gs://kfserving-examples/models/xgboost/1.5/model",
+            resources=V1ResourceRequirements(
+                requests={"cpu": "50m", "memory": "128Mi"},
+                limits={"cpu": "100m", "memory": "256Mi"},
+            ),
+        ),
+    )
+    xgb_isvc = V1beta1InferenceService(
+        api_version=constants.KSERVE_V1BETA1,
+        kind=constants.KSERVE_KIND,
+        metadata=client.V1ObjectMeta(
+            name=xgb_name, namespace=KSERVE_TEST_NAMESPACE
+        ),
+        spec=V1beta1InferenceServiceSpec(predictor=xgb_predictor),
+    )
+
+    nodes = {"root": V1alpha1InferenceRouter(
+        router_type="Sequence",
+        steps=[
+            V1alpha1InferenceStep(
+                service_name=sklearn_name,
+            ),
+            V1alpha1InferenceStep(
+                service_name=xgb_name,
+                data="$request",
+            ),
+        ],
+    )}
+    graph_spec = V1alpha1InferenceGraphSpec(
+        nodes=nodes,
+    )
+    ig = V1alpha1InferenceGraph(
+        api_version=constants.KSERVE_V1ALPHA1,
+        kind=constants.KSERVE_KIND_INFERENCEGRAPH,
+        metadata=client.V1ObjectMeta(
+            name=graph_name,
+            namespace=KSERVE_TEST_NAMESPACE
+        ),
+        spec=graph_spec,
+    )
+
+    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
+    kserve_client.create(sklearn_isvc)
+    kserve_client.create(xgb_isvc)
+    kserve_client.create_inference_graph(ig)
+
+    kserve_client.wait_isvc_ready(sklearn_name, namespace=KSERVE_TEST_NAMESPACE)
+    kserve_client.wait_isvc_ready(xgb_name, namespace=KSERVE_TEST_NAMESPACE)
+    kserve_client.wait_ig_ready(graph_name, namespace=KSERVE_TEST_NAMESPACE)
+
+    res = predict_ig(graph_name, "./data/iris_input.json")
+    assert res["predictions"] == [1, 1]
+
+    kserve_client.delete_inference_graph(graph_name, KSERVE_TEST_NAMESPACE)
+    kserve_client.delete(sklearn_name, KSERVE_TEST_NAMESPACE)
+    kserve_client.delete(xgb_name, KSERVE_TEST_NAMESPACE)