Skip to content
This repository was archived by the owner on Jul 24, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions api/v1alpha1/modelservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ type ContainerSpec struct {
// For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
// For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
// and the value is mounted to an environment variable called HF_TOKEN
// For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
// For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
// is created and mounted with the mountPath oci-dir
// default:false
// +optional
Expand Down Expand Up @@ -258,7 +258,7 @@ type Routing struct {
// ModelArtifacts describes the source of the model
type ModelArtifacts struct {
// URI is the model URI
// Three types of URIs are support to enable models packaged as images (oci://<image-repo>/<image-name><:image-tag>),
// Three types of URIs are support to enable models packaged as images (oci+native://<image-repo>/<image-name><:image-tag>),
// models downloaded from HuggingFace (hf://<model-repo>/<model-name>)
// and pre-existing models loaded from a volume-mounted PVC (pvc://model-path)
//
Expand All @@ -273,6 +273,13 @@ type ModelArtifacts struct {
//
// +optional
Size *res.Quantity `json:"size,omitempty"`
// OCI image pull policy.
// One of Always, Never, IfNotPresent.
// Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
// Cannot be updated.
// More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
// +optional
PullPolicy corev1.PullPolicy `json:"pullPolicy,omitempty" protobuf:"bytes,14,opt,name=pullPolicy,casttype=PullPolicy"`
}

// ModelServicePodSpec defines the specification for pod templates that will be created by ModelService.
Expand Down
22 changes: 15 additions & 7 deletions config/crd/bases/llm-d.ai_modelservices.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
Expand Down Expand Up @@ -656,7 +656,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
Expand Down Expand Up @@ -992,7 +992,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
Expand Down Expand Up @@ -1293,7 +1293,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
Expand Down Expand Up @@ -1387,6 +1387,14 @@ spec:
authSecretName:
description: Name of the authentication secret. Contains HF_TOKEN
type: string
pullPolicy:
description: |-
OCI image pull policy.
One of Always, Never, IfNotPresent.
Defaults to Always if :latest tag is specified, or IfNotPresent otherwise.
Cannot be updated.
More info: https://kubernetes.io/docs/concepts/containers/images#updating-images
type: string
size:
anyOf:
- type: integer
Expand All @@ -1399,7 +1407,7 @@ spec:
uri:
description: |-
URI is the model URI
Three types of URIs are support to enable models packaged as images (oci://<image-repo>/<image-name><:image-tag>),
Three types of URIs are support to enable models packaged as images (oci+native://<image-repo>/<image-name><:image-tag>),
models downloaded from HuggingFace (hf://<model-repo>/<model-name>)
and pre-existing models loaded from a volume-mounted PVC (pvc://model-path)
type: string
Expand Down Expand Up @@ -1657,7 +1665,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
Expand Down Expand Up @@ -1958,7 +1966,7 @@ spec:
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
and the value is mounted to an environment variable called HF_TOKEN
For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
is created and mounted with the mountPath oci-dir
default:false
type: boolean
Expand Down
96 changes: 93 additions & 3 deletions docs/api_reference/out.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-co
For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache +
For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, +
and the value is mounted to an environment variable called HF_TOKEN +
For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) +
For URIs with oci+native:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) +
is created and mounted with the mountPath oci-dir +
default:false + | |
|===
Expand All @@ -121,12 +121,17 @@ ModelArtifacts describes the source of the model
|===
| Field | Description | Default | Validation
| *`uri`* __string__ | URI is the model URI +
Three types of URIs are support to enable models packaged as images (oci://<image-repo>/<image-name><:image-tag>), +
Three types of URIs are support to enable models packaged as images (oci+native://<image-repo>/<image-name><:image-tag>), +
models downloaded from HuggingFace (hf://<model-repo>/<model-name>) +
and pre-existing models loaded from a volume-mounted PVC (pvc://model-path) + | |
| *`authSecretName`* __string__ | Name of the authentication secret. Contains HF_TOKEN + | |
| *`size`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#quantity-resource-api[$$Quantity$$]__ | Size of the model artifacts on disk +
| *`size`* __xref:{anchor_prefix}-k8s-io-apimachinery-pkg-api-resource-quantity[$$Quantity$$]__ | Size of the model artifacts on disk +
ensure Size is large enough when providing hf://... URI + | |
| *`pullPolicy`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#pullpolicy-v1-core[$$PullPolicy$$]__ | OCI image pull policy. +
One of Always, Never, IfNotPresent. +
Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. +
Cannot be updated. +
More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + | |
|===


Expand Down Expand Up @@ -278,6 +283,9 @@ this reference will be nil + | |
| *`eppDeploymentRef`* __string__ | EppDeploymentRef identifies the epp deployment +
if epp deployment is yet to be created, +
this reference will be nil + | |
| *`httpRouteRef`* __string__ | HTTPRoute identifies the HTTPRoute resource +
if HTTPRoute is yet to be created, +
this reference will be nil + | |
| *`inferenceModelRef`* __string__ | InferenceModelRef identifies the inference model resource +
if inference model is yet to be created, +
this reference will be nil + | |
Expand Down Expand Up @@ -435,6 +443,88 @@ Required: {} +

| *`ports`* __xref:{anchor_prefix}-github-com-llm-d-llm-d-model-service-api-v1alpha1-port[$$Port$$] array__ | Ports is a list of named ports +
These can be referenced by name in configuration of base configuration or model services + | |
| *`gatewayRefs`* __ParentReference array__ | GatewayRef is merged to baseconfig based on the Name field. +
Directly from Gateway API: https://gateway-api.sigs.k8s.io/reference/spec/#commonroutespec +
ParentRefs references the resources (usually Gateways) that a Route wants +
to be attached to. Note that the referenced parent resource needs to +
allow this for the attachment to be complete. For Gateways, that means +
the Gateway needs to allow attachment from Routes of this kind and +
namespace. For Services, that means the Service must either be in the same +
namespace for a "producer" route, or the mesh implementation must support +
and allow "consumer" routes for the referenced Service. ReferenceGrant is +
not applicable for governing ParentRefs to Services - it is not possible to +
create a "producer" route for a Service in a different namespace from the +
Route. +


There are two kinds of parent resources with "Core" support: +


* Gateway (Gateway conformance profile) +
* Service (Mesh conformance profile, ClusterIP Services only) +


This API may be extended in the future to support additional kinds of parent +
resources. +


ParentRefs must be _distinct_. This means either that: +


* They select different objects. If this is the case, then parentRef +
entries are distinct. In terms of fields, this means that the +
multi-part key defined by `group`, `kind`, `namespace`, and `name` must +
be unique across all parentRef entries in the Route. +
* They do not select different objects, but for each optional field used, +
each ParentRef that selects the same object must set the same set of +
optional fields to different values. If one ParentRef sets a +
combination of optional fields, all must set the same combination. +


Some examples: +


* If one ParentRef sets `sectionName`, all ParentRefs referencing the +
same object must also set `sectionName`. +
* If one ParentRef sets `port`, all ParentRefs referencing the same +
object must also set `port`. +
* If one ParentRef sets `sectionName` and `port`, all ParentRefs +
referencing the same object must also set `sectionName` and `port`. +


It is possible to separately reference multiple distinct objects that may +
be collapsed by an implementation. For example, some implementations may +
choose to merge compatible Gateway Listeners together. If that is the +
case, the list of routes attached to those resources should also be +
merged. +


Note that for ParentRefs that cross namespace boundaries, there are specific +
rules. Cross-namespace references are only valid if they are explicitly +
allowed by something in the namespace they are referring to. For example, +
Gateway has the AllowedRoutes field, and ReferenceGrant provides a +
generic way to enable other kinds of cross-namespace reference. +


<gateway:experimental:description> +
ParentRefs from a Route to a Service in the same namespace are "producer" +
routes, which apply default routing rules to inbound connections from +
any namespace to the Service. +


ParentRefs from a Route to a Service in a different namespace are +
"consumer" routes, and these routing rules are only applied to outbound +
connections originating from the same namespace as the Route, for which +
the intended destination of the connections are a Service targeted as a +
ParentRef of the Route. +
</gateway:experimental:description> +


<gateway:standard:validation:XValidation:message="sectionName must be specified when parentRefs includes 2 or more references to the same parent",rule="self.all(p1, self.all(p2, p1.group == p2.group && p1.kind == p2.kind && p1.name == p2.name && (((!has(p1.__namespace__) \|\| p1.__namespace__ == '') && (!has(p2.__namespace__) \|\| p2.__namespace__ == '')) \|\| (has(p1.__namespace__) && has(p2.__namespace__) && p1.__namespace__ == p2.__namespace__ )) ? ((!has(p1.sectionName) \|\| p1.sectionName == '') == (!has(p2.sectionName) \|\| p2.sectionName == '')) : true))"> +
<gateway:standard:validation:XValidation:message="sectionName must be unique when parentRefs includes 2 or more references to the same parent",rule="self.all(p1, self.exists_one(p2, p1.group == p2.group && p1.kind == p2.kind && p1.name == p2.name && (((!has(p1.__namespace__) \|\| p1.__namespace__ == '') && (!has(p2.__namespace__) \|\| p2.__namespace__ == '')) \|\| (has(p1.__namespace__) && has(p2.__namespace__) && p1.__namespace__ == p2.__namespace__ )) && (((!has(p1.sectionName) \|\| p1.sectionName == '') && (!has(p2.sectionName) \|\| p2.sectionName == '')) \|\| (has(p1.sectionName) && has(p2.sectionName) && p1.sectionName == p2.sectionName))))"> +
<gateway:experimental:validation:XValidation:message="sectionName or port must be specified when parentRefs includes 2 or more references to the same parent",rule="self.all(p1, self.all(p2, p1.group == p2.group && p1.kind == p2.kind && p1.name == p2.name && (((!has(p1.__namespace__) \|\| p1.__namespace__ == '') && (!has(p2.__namespace__) \|\| p2.__namespace__ == '')) \|\| (has(p1.__namespace__) && has(p2.__namespace__) && p1.__namespace__ == p2.__namespace__)) ? ((!has(p1.sectionName) \|\| p1.sectionName == '') == (!has(p2.sectionName) \|\| p2.sectionName == '') && (!has(p1.port) \|\| p1.port == 0) == (!has(p2.port) \|\| p2.port == 0)): true))"> +
<gateway:experimental:validation:XValidation:message="sectionName or port must be unique when parentRefs includes 2 or more references to the same parent",rule="self.all(p1, self.exists_one(p2, p1.group == p2.group && p1.kind == p2.kind && p1.name == p2.name && (((!has(p1.__namespace__) \|\| p1.__namespace__ == '') && (!has(p2.__namespace__) \|\| p2.__namespace__ == '')) \|\| (has(p1.__namespace__) && has(p2.__namespace__) && p1.__namespace__ == p2.__namespace__ )) && (((!has(p1.sectionName) \|\| p1.sectionName == '') && (!has(p2.sectionName) \|\| p2.sectionName == '')) \|\| ( has(p1.sectionName) && has(p2.sectionName) && p1.sectionName == p2.sectionName)) && (((!has(p1.port) \|\| p1.port == 0) && (!has(p2.port) \|\| p2.port == 0)) \|\| (has(p1.port) && has(p2.port) && p1.port == p2.port))))"> + | | MaxItems: 32 +

|===


Loading
Loading