diff --git a/docs/source/en/guides/inference_endpoints.md b/docs/source/en/guides/inference_endpoints.md index ff627536c9..c0fe3a4f17 100644 --- a/docs/source/en/guides/inference_endpoints.md +++ b/docs/source/en/guides/inference_endpoints.md @@ -22,8 +22,8 @@ The first step is to create an Inference Endpoint using [`create_inference_endpo ... vendor="aws", ... region="us-east-1", ... type="protected", -... instance_size="medium", -... instance_type="c6i" +... instance_size="x2", +... instance_type="intel-icl" ... ) ``` @@ -58,8 +58,8 @@ By default the Inference Endpoint is built from a docker image provided by Huggi ... vendor="aws", ... region="us-east-1", ... type="protected", -... instance_size="medium", -... instance_type="g5.2xlarge", +... instance_size="x1", +... instance_type="nvidia-a10g", ... custom_image={ ... "health_route": "/health", ... "env": { @@ -203,7 +203,7 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None) # Update to larger instance ->>> endpoint.update(accelerator="cpu", instance_size="large", instance_type="c6i") +>>> endpoint.update(accelerator="cpu", instance_size="x4", instance_type="intel-icl") InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None) ``` diff --git a/docs/source/ko/guides/inference_endpoints.md b/docs/source/ko/guides/inference_endpoints.md index ecd6dc27f2..5659521ca6 100644 --- a/docs/source/ko/guides/inference_endpoints.md +++ b/docs/source/ko/guides/inference_endpoints.md @@ -21,8 +21,8 @@ ... vendor="aws", ... region="us-east-1", ... type="protected", -... instance_size="medium", -... instance_type="c6i" +... instance_size="x2", +... instance_type="intel-icl" ... ) ``` @@ -57,8 +57,8 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2 ... vendor="aws", ... region="us-east-1", ... type="protected", -... instance_size="medium", -... instance_type="g5.2xlarge", +... instance_size="x1", +... instance_type="nvidia-a10g", ... custom_image={ ... "health_route": "/health", ... "env": { @@ -202,7 +202,7 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None) # 더 큰 인스턴스로 업데이트 ->>> endpoint.update(accelerator="cpu", instance_size="large", instance_type="c6i") +>>> endpoint.update(accelerator="cpu", instance_size="x4", instance_type="intel-icl") InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None) ``` @@ -254,4 +254,4 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2 # 엔드포인트 중지 >>> endpoint.pause() -``` \ No newline at end of file +``` diff --git a/src/huggingface_hub/_inference_endpoints.py b/src/huggingface_hub/_inference_endpoints.py index 1fd2116d9f..92e407b81a 100644 --- a/src/huggingface_hub/_inference_endpoints.py +++ b/src/huggingface_hub/_inference_endpoints.py @@ -256,9 +256,9 @@ def update( accelerator (`str`, *optional*): The hardware accelerator to be used for inference (e.g. `"cpu"`). instance_size (`str`, *optional*): - The size or type of the instance to be used for hosting the model (e.g. `"large"`). + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). instance_type (`str`, *optional*): - The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`). + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). min_replica (`int`, *optional*): The minimum number of replicas (instances) to keep running for the Inference Endpoint. max_replica (`int`, *optional*): diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index c573c05967..a7ad1004bb 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -7147,9 +7147,9 @@ def create_inference_endpoint( accelerator (`str`): The hardware accelerator to be used for inference (e.g. `"cpu"`). instance_size (`str`): - The size or type of the instance to be used for hosting the model (e.g. `"large"`). + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). instance_type (`str`): - The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`). + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). region (`str`): The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`). vendor (`str`): @@ -7193,8 +7193,8 @@ def create_inference_endpoint( ... vendor="aws", ... region="us-east-1", ... type="protected", - ... instance_size="medium", - ... instance_type="c6i", + ... instance_size="x2", + ... instance_type="intel-icl", ... ) >>> endpoint InferenceEndpoint(name='my-endpoint-name', status="pending",...) @@ -7217,8 +7217,8 @@ def create_inference_endpoint( ... vendor="aws", ... region="us-east-1", ... type="protected", - ... instance_size="medium", - ... instance_type="g5.2xlarge", + ... instance_size="x1", + ... instance_type="nvidia-a10g", ... custom_image={ ... "health_route": "/health", ... "env": { @@ -7351,9 +7351,9 @@ def update_inference_endpoint( accelerator (`str`, *optional*): The hardware accelerator to be used for inference (e.g. `"cpu"`). instance_size (`str`, *optional*): - The size or type of the instance to be used for hosting the model (e.g. `"large"`). + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). instance_type (`str`, *optional*): - The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`). + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). min_replica (`int`, *optional*): The minimum number of replicas (instances) to keep running for the Inference Endpoint. max_replica (`int`, *optional*): diff --git a/tests/test_inference_endpoints.py b/tests/test_inference_endpoints.py index edcf2d4d34..000609240f 100644 --- a/tests/test_inference_endpoints.py +++ b/tests/test_inference_endpoints.py @@ -20,8 +20,8 @@ "provider": {"vendor": "aws", "region": "us-east-1"}, "compute": { "accelerator": "cpu", - "instanceType": "c6i", - "instanceSize": "medium", + "instanceType": "intel-icl", + "instanceSize": "x2", "scaling": {"minReplica": 0, "maxReplica": 1}, }, "model": { @@ -51,8 +51,8 @@ "provider": {"vendor": "aws", "region": "us-east-1"}, "compute": { "accelerator": "cpu", - "instanceType": "c6i", - "instanceSize": "medium", + "instanceType": "intel-icl", + "instanceSize": "x2", "scaling": {"minReplica": 0, "maxReplica": 1}, }, "model": { @@ -83,8 +83,8 @@ "provider": {"vendor": "aws", "region": "us-east-1"}, "compute": { "accelerator": "cpu", - "instanceType": "c6i", - "instanceSize": "medium", + "instanceType": "intel-icl", + "instanceSize": "x2", "scaling": {"minReplica": 0, "maxReplica": 1}, }, "model": {