Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: endpoints rename instances doc #2282

Merged
merged 1 commit into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions docs/source/en/guides/inference_endpoints.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ The first step is to create an Inference Endpoint using [`create_inference_endpo
... vendor="aws",
... region="us-east-1",
... type="protected",
... instance_size="medium",
... instance_type="c6i"
... instance_size="x2",
... instance_type="intel-icl"
... )
```

Expand Down Expand Up @@ -58,8 +58,8 @@ By default the Inference Endpoint is built from a docker image provided by Huggi
... vendor="aws",
... region="us-east-1",
... type="protected",
... instance_size="medium",
... instance_type="g5.2xlarge",
... instance_size="x1",
... instance_type="nvidia-a10g",
... custom_image={
... "health_route": "/health",
... "env": {
Expand Down Expand Up @@ -203,7 +203,7 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)

# Update to larger instance
>>> endpoint.update(accelerator="cpu", instance_size="large", instance_type="c6i")
>>> endpoint.update(accelerator="cpu", instance_size="x4", instance_type="intel-icl")
InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)
```

Expand Down
12 changes: 6 additions & 6 deletions docs/source/ko/guides/inference_endpoints.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
... vendor="aws",
... region="us-east-1",
... type="protected",
... instance_size="medium",
... instance_type="c6i"
... instance_size="x2",
... instance_type="intel-icl"
... )
```

Expand Down Expand Up @@ -57,8 +57,8 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
... vendor="aws",
... region="us-east-1",
... type="protected",
... instance_size="medium",
... instance_type="g5.2xlarge",
... instance_size="x1",
... instance_type="nvidia-a10g",
... custom_image={
... "health_route": "/health",
... "env": {
Expand Down Expand Up @@ -202,7 +202,7 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)

# 더 큰 인스턴스로 업데이트
>>> endpoint.update(accelerator="cpu", instance_size="large", instance_type="c6i")
>>> endpoint.update(accelerator="cpu", instance_size="x4", instance_type="intel-icl")
InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)
```

Expand Down Expand Up @@ -254,4 +254,4 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2

# 엔드포인트 중지
>>> endpoint.pause()
```
```
4 changes: 2 additions & 2 deletions src/huggingface_hub/_inference_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,9 @@ def update(
accelerator (`str`, *optional*):
The hardware accelerator to be used for inference (e.g. `"cpu"`).
instance_size (`str`, *optional*):
The size or type of the instance to be used for hosting the model (e.g. `"large"`).
The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
instance_type (`str`, *optional*):
The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`).
The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
min_replica (`int`, *optional*):
The minimum number of replicas (instances) to keep running for the Inference Endpoint.
max_replica (`int`, *optional*):
Expand Down
16 changes: 8 additions & 8 deletions src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7147,9 +7147,9 @@ def create_inference_endpoint(
accelerator (`str`):
The hardware accelerator to be used for inference (e.g. `"cpu"`).
instance_size (`str`):
The size or type of the instance to be used for hosting the model (e.g. `"large"`).
The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
instance_type (`str`):
The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`).
The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
region (`str`):
The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`).
vendor (`str`):
Expand Down Expand Up @@ -7193,8 +7193,8 @@ def create_inference_endpoint(
... vendor="aws",
... region="us-east-1",
... type="protected",
... instance_size="medium",
... instance_type="c6i",
... instance_size="x2",
... instance_type="intel-icl",
... )
>>> endpoint
InferenceEndpoint(name='my-endpoint-name', status="pending",...)
Expand All @@ -7217,8 +7217,8 @@ def create_inference_endpoint(
... vendor="aws",
... region="us-east-1",
... type="protected",
... instance_size="medium",
... instance_type="g5.2xlarge",
... instance_size="x1",
... instance_type="nvidia-a10g",
... custom_image={
... "health_route": "/health",
... "env": {
Expand Down Expand Up @@ -7351,9 +7351,9 @@ def update_inference_endpoint(
accelerator (`str`, *optional*):
The hardware accelerator to be used for inference (e.g. `"cpu"`).
instance_size (`str`, *optional*):
The size or type of the instance to be used for hosting the model (e.g. `"large"`).
The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
instance_type (`str`, *optional*):
The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`).
The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
min_replica (`int`, *optional*):
The minimum number of replicas (instances) to keep running for the Inference Endpoint.
max_replica (`int`, *optional*):
Expand Down
12 changes: 6 additions & 6 deletions tests/test_inference_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
"provider": {"vendor": "aws", "region": "us-east-1"},
"compute": {
"accelerator": "cpu",
"instanceType": "c6i",
"instanceSize": "medium",
"instanceType": "intel-icl",
"instanceSize": "x2",
"scaling": {"minReplica": 0, "maxReplica": 1},
},
"model": {
Expand Down Expand Up @@ -51,8 +51,8 @@
"provider": {"vendor": "aws", "region": "us-east-1"},
"compute": {
"accelerator": "cpu",
"instanceType": "c6i",
"instanceSize": "medium",
"instanceType": "intel-icl",
"instanceSize": "x2",
"scaling": {"minReplica": 0, "maxReplica": 1},
},
"model": {
Expand Down Expand Up @@ -83,8 +83,8 @@
"provider": {"vendor": "aws", "region": "us-east-1"},
"compute": {
"accelerator": "cpu",
"instanceType": "c6i",
"instanceSize": "medium",
"instanceType": "intel-icl",
"instanceSize": "x2",
"scaling": {"minReplica": 0, "maxReplica": 1},
},
"model": {
Expand Down
Loading