huggingface · co42 · May 17, 2024 · May 16, 2024
diff --git a/docs/source/en/guides/inference_endpoints.md b/docs/source/en/guides/inference_endpoints.md
@@ -22,8 +22,8 @@ The first step is to create an Inference Endpoint using [`create_inference_endpo
 ...     vendor="aws",
 ...     region="us-east-1",
 ...     type="protected",
-...     instance_size="medium",
-...     instance_type="c6i"
+...     instance_size="x2",
+...     instance_type="intel-icl"
 ... )
 ```
 
@@ -58,8 +58,8 @@ By default the Inference Endpoint is built from a docker image provided by Huggi
 ...     vendor="aws",
 ...     region="us-east-1",
 ...     type="protected",
-...     instance_size="medium",
-...     instance_type="g5.2xlarge",
+...     instance_size="x1",
+...     instance_type="nvidia-a10g",
 ...     custom_image={
 ...         "health_route": "/health",
 ...         "env": {
@@ -203,7 +203,7 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)
 
 # Update to larger instance
->>> endpoint.update(accelerator="cpu", instance_size="large", instance_type="c6i")
+>>> endpoint.update(accelerator="cpu", instance_size="x4", instance_type="intel-icl")
 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)
 ```
 

diff --git a/docs/source/ko/guides/inference_endpoints.md b/docs/source/ko/guides/inference_endpoints.md
@@ -21,8 +21,8 @@
 ...     vendor="aws",
 ...     region="us-east-1",
 ...     type="protected",
-...     instance_size="medium",
-...     instance_type="c6i"
+...     instance_size="x2",
+...     instance_type="intel-icl"
 ... )
 ```
 
@@ -57,8 +57,8 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
 ...     vendor="aws",
 ...     region="us-east-1",
 ...     type="protected",
-...     instance_size="medium",
-...     instance_type="g5.2xlarge",
+...     instance_size="x1",
+...     instance_type="nvidia-a10g",
 ...     custom_image={
 ...         "health_route": "/health",
 ...         "env": {
@@ -202,7 +202,7 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)
 
 # 더 큰 인스턴스로 업데이트
->>> endpoint.update(accelerator="cpu", instance_size="large", instance_type="c6i")
+>>> endpoint.update(accelerator="cpu", instance_size="x4", instance_type="intel-icl")
 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)
 ```
 
@@ -254,4 +254,4 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
 
 # 엔드포인트 중지
 >>> endpoint.pause()
-```
+```
diff --git a/src/huggingface_hub/_inference_endpoints.py b/src/huggingface_hub/_inference_endpoints.py
@@ -256,9 +256,9 @@ def update(
             accelerator (`str`, *optional*):
                 The hardware accelerator to be used for inference (e.g. `"cpu"`).
             instance_size (`str`, *optional*):
-                The size or type of the instance to be used for hosting the model (e.g. `"large"`).
+                The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
             instance_type (`str`, *optional*):
-                The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`).
+                The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
             min_replica (`int`, *optional*):
                 The minimum number of replicas (instances) to keep running for the Inference Endpoint.
             max_replica (`int`, *optional*):

diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py
@@ -7147,9 +7147,9 @@ def create_inference_endpoint(
             accelerator (`str`):
                 The hardware accelerator to be used for inference (e.g. `"cpu"`).
             instance_size (`str`):
-                The size or type of the instance to be used for hosting the model (e.g. `"large"`).
+                The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
             instance_type (`str`):
-                The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`).
+                The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
             region (`str`):
                 The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`).
             vendor (`str`):
@@ -7193,8 +7193,8 @@ def create_inference_endpoint(
             ...     vendor="aws",
             ...     region="us-east-1",
             ...     type="protected",
-            ...     instance_size="medium",
-            ...     instance_type="c6i",
+            ...     instance_size="x2",
+            ...     instance_type="intel-icl",
             ... )
             >>> endpoint
             InferenceEndpoint(name='my-endpoint-name', status="pending",...)
@@ -7217,8 +7217,8 @@ def create_inference_endpoint(
             ...     vendor="aws",
             ...     region="us-east-1",
             ...     type="protected",
-            ...     instance_size="medium",
-            ...     instance_type="g5.2xlarge",
+            ...     instance_size="x1",
+            ...     instance_type="nvidia-a10g",
             ...     custom_image={
             ...         "health_route": "/health",
             ...         "env": {
@@ -7351,9 +7351,9 @@ def update_inference_endpoint(
             accelerator (`str`, *optional*):
                 The hardware accelerator to be used for inference (e.g. `"cpu"`).
             instance_size (`str`, *optional*):
-                The size or type of the instance to be used for hosting the model (e.g. `"large"`).
+                The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
             instance_type (`str`, *optional*):
-                The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`).
+                The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
             min_replica (`int`, *optional*):
                 The minimum number of replicas (instances) to keep running for the Inference Endpoint.
             max_replica (`int`, *optional*):

diff --git a/tests/test_inference_endpoints.py b/tests/test_inference_endpoints.py
@@ -20,8 +20,8 @@
     "provider": {"vendor": "aws", "region": "us-east-1"},
     "compute": {
         "accelerator": "cpu",
-        "instanceType": "c6i",
-        "instanceSize": "medium",
+        "instanceType": "intel-icl",
+        "instanceSize": "x2",
         "scaling": {"minReplica": 0, "maxReplica": 1},
     },
     "model": {
@@ -51,8 +51,8 @@
     "provider": {"vendor": "aws", "region": "us-east-1"},
     "compute": {
         "accelerator": "cpu",
-        "instanceType": "c6i",
-        "instanceSize": "medium",
+        "instanceType": "intel-icl",
+        "instanceSize": "x2",
         "scaling": {"minReplica": 0, "maxReplica": 1},
     },
     "model": {
@@ -83,8 +83,8 @@
     "provider": {"vendor": "aws", "region": "us-east-1"},
     "compute": {
         "accelerator": "cpu",
-        "instanceType": "c6i",
-        "instanceSize": "medium",
+        "instanceType": "intel-icl",
+        "instanceSize": "x2",
         "scaling": {"minReplica": 0, "maxReplica": 1},
     },
     "model": {