Skip to content

Commit

Permalink
return resource pool types in config endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
zubenkoivan committed Apr 27, 2024
1 parent 52e92b1 commit 0519b9b
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 9 deletions.
33 changes: 32 additions & 1 deletion platform_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from .orchestrator.jobs_storage import JobsStorage, PostgresJobsStorage
from .orchestrator.jobs_storage.base import JobStorageTransactionError
from .postgres import make_async_engine
from .resource import Preset
from .resource import Preset, ResourcePoolType
from .user import authorized_user, untrusted_user
from .utils.update_notifier import (
Notifier,
Expand Down Expand Up @@ -164,6 +164,10 @@ def _convert_cluster_config_to_payload(
) -> dict[str, Any]:
cluster_config = user_cluster_config.config
orgs = user_cluster_config.orgs
resource_pool_types = [
self._convert_resource_pool_type_to_payload(r)
for r in cluster_config.orchestrator.resource_pool_types
]
presets = [
self._convert_preset_to_payload(preset)
for preset in cluster_config.orchestrator.presets
Expand All @@ -177,6 +181,7 @@ def _convert_cluster_config_to_payload(
"metrics_url": str(cluster_config.ingress.metrics_url),
"disks_url": str(cluster_config.ingress.disks_url),
"buckets_url": str(cluster_config.ingress.buckets_url),
"resource_pool_types": resource_pool_types,
"resource_presets": presets,
"orgs": orgs,
"timezone": str(cluster_config.timezone),
Expand All @@ -189,6 +194,32 @@ def _convert_cluster_config_to_payload(
result["users_url"] = str(self._config.auth.public_endpoint_url)
return result

def _convert_resource_pool_type_to_payload(
self, resource_pool_type: ResourcePoolType
) -> dict[str, Any]:
payload: dict[str, Any] = {
"name": resource_pool_type.name,
"min_size": resource_pool_type.min_size,
"max_size": resource_pool_type.max_size,
"cpu": resource_pool_type.cpu,
"available_cpu": resource_pool_type.available_cpu,
"memory": resource_pool_type.memory,
"available_memory": resource_pool_type.available_memory,
"disk_size": resource_pool_type.disk_size,
"is_preemptible": resource_pool_type.is_preemptible,
}
if resource_pool_type.nvidia_gpu is not None:
payload["nvidia_gpu"] = resource_pool_type.nvidia_gpu
if resource_pool_type.amd_gpu is not None:
payload["amd_gpu"] = resource_pool_type.amd_gpu
if resource_pool_type.tpu:
payload["tpu"] = {
"types": resource_pool_type.tpu.types,
"software_versions": resource_pool_type.tpu.software_versions,
"ipv4_cidr_block": resource_pool_type.tpu.ipv4_cidr_block,
}
return payload

def _convert_preset_to_payload(self, preset: Preset) -> dict[str, Any]:
payload: dict[str, Any] = {
"name": preset.name,
Expand Down
2 changes: 1 addition & 1 deletion platform_api/cluster_config_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def _create_resource_pool_type(self, payload: dict[str, Any]) -> ResourcePoolTyp
available_cpu=payload.get("available_cpu") or cpu,
memory=memory,
available_memory=available_memory or memory,
disk_gb=payload.get("disk_size_gb"),
disk_size=payload.get("disk_size"),
min_size=payload.get("min_size"),
max_size=payload.get("max_size"),
tpu=self._create_tpu_resource(payload.get("tpu")),
Expand Down
2 changes: 1 addition & 1 deletion platform_api/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class ResourcePoolType:
available_memory: Optional[int] = None
nvidia_gpu: Optional[int] = None
amd_gpu: Optional[int] = None
disk_gb: Optional[int] = None
disk_size: Optional[int] = None
min_size: Optional[int] = None
max_size: Optional[int] = None
tpu: Optional[TPUResource] = None
Expand Down
6 changes: 6 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,27 +163,31 @@ def _f(**kwargs: Any) -> OrchestratorConfig:
available_cpu=1.0,
memory=2048 * 10**6,
available_memory=2048 * 10**6,
disk_size=150 * 10**9,
),
ResourcePoolType(
name="cpu-p",
cpu=1.0,
available_cpu=1.0,
memory=2048 * 10**6,
available_memory=2048 * 10**6,
disk_size=150 * 10**9,
is_preemptible=True,
),
ResourcePoolType(
cpu=100,
available_cpu=100,
memory=500_000 * 10**6,
available_memory=500_000 * 10**6,
disk_size=150 * 10**9,
),
ResourcePoolType(
name="tpu",
cpu=1.0,
available_cpu=1.0,
memory=2048 * 10**6,
available_memory=2048 * 10**6,
disk_size=150 * 10**9,
tpu=TPUResource(
ipv4_cidr_block="1.1.1.1/32",
types=("v2-8",),
Expand All @@ -196,7 +200,9 @@ def _f(**kwargs: Any) -> OrchestratorConfig:
available_cpu=1.0,
memory=2048 * 10**6,
available_memory=2048 * 10**6,
disk_size=150 * 10**9,
nvidia_gpu=1,
amd_gpu=2,
),
],
"presets": [
Expand Down
150 changes: 150 additions & 0 deletions tests/integration/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,56 @@ async def test_config(
"metrics_url": "https://neu.ro/api/v1/metrics",
"disks_url": "https://neu.ro/api/v1/disk",
"buckets_url": "https://neu.ro/api/v1/buckets",
"resource_pool_types": [
{
"name": "cpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
},
{
"name": "cpu-p",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"is_preemptible": True,
},
{
"name": mock.ANY,
"cpu": 100,
"available_cpu": 100,
"memory": 500_000 * 10**6,
"available_memory": 500_000 * 10**6,
"disk_size": 150 * 10**9,
},
{
"name": "tpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"tpu": {
"ipv4_cidr_block": "1.1.1.1/32",
"types": ("v2-8",),
"software_versions": ("1.14",),
},
},
{
"name": "gpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"nvidia_gpu": 1,
"amd_gpu": 1,
},
],
"resource_presets": [
{
"name": "gpu-small",
Expand Down Expand Up @@ -501,6 +551,56 @@ async def test_config__with_orgs_and_projects(
"metrics_url": "https://neu.ro/api/v1/metrics",
"disks_url": "https://neu.ro/api/v1/disk",
"buckets_url": "https://neu.ro/api/v1/buckets",
"resource_pool_types": [
{
"name": "cpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
},
{
"name": "cpu-p",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"is_preemptible": True,
},
{
"name": mock.ANY,
"cpu": 100,
"available_cpu": 100,
"memory": 500_000 * 10**6,
"available_memory": 500_000 * 10**6,
"disk_size": 150 * 10**9,
},
{
"name": "tpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"tpu": {
"ipv4_cidr_block": "1.1.1.1/32",
"types": ("v2-8",),
"software_versions": ("1.14",),
},
},
{
"name": "gpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"nvidia_gpu": 1,
"amd_gpu": 1,
},
],
"resource_presets": [
{
"name": "gpu-small",
Expand Down Expand Up @@ -687,6 +787,56 @@ async def test_config_with_oauth(
"metrics_url": "https://neu.ro/api/v1/metrics",
"disks_url": "https://neu.ro/api/v1/disk",
"buckets_url": "https://neu.ro/api/v1/buckets",
"resource_pool_types": [
{
"name": "cpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
},
{
"name": "cpu-p",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"is_preemptible": True,
},
{
"name": mock.ANY,
"cpu": 100,
"available_cpu": 100,
"memory": 500_000 * 10**6,
"available_memory": 500_000 * 10**6,
"disk_size": 150 * 10**9,
},
{
"name": "tpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"tpu": {
"ipv4_cidr_block": "1.1.1.1/32",
"types": ("v2-8",),
"software_versions": ("1.14",),
},
},
{
"name": "gpu",
"cpu": 1.0,
"available_cpu": 1.0,
"memory": 2048 * 10**6,
"available_memory": 2048 * 10**6,
"disk_size": 150 * 10**9,
"nvidia_gpu": 1,
"amd_gpu": 1,
},
],
"resource_presets": [
{
"name": "gpu-small",
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/test_cluster_config_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def clusters_payload(nfs_storage_payload: dict[str, Any]) -> list[dict[str, Any]
"available_cpu": 7.0,
"memory": 53248 * 10**6,
"available_memory": 49152 * 10**6,
"disk_size_gb": 150,
"disk_size": 150 * 10**9,
"tpu": {
"ipv4_cidr_block": "1.1.1.1/32",
"types": ["v2-8", "v3-8"],
Expand All @@ -148,7 +148,7 @@ def clusters_payload(nfs_storage_payload: dict[str, Any]) -> list[dict[str, Any]
"max_size": 16,
"cpu": 31.0,
"memory": 204800 * 10**6,
"disk_size_gb": 150,
"disk_size_gb": 150 * 10**9,
"nvidia_gpu": 4,
"amd_gpu": 4,
},
Expand All @@ -161,7 +161,7 @@ def clusters_payload(nfs_storage_payload: dict[str, Any]) -> list[dict[str, Any]
"available_cpu": 31.0,
"memory": 212992 * 10**6,
"available_memory": 204800 * 10**6,
"disk_size_gb": 150,
"disk_size_gb": 150 * 10**9,
"nvidia_gpu": 4,
"amd_gpu": 4,
},
Expand All @@ -174,7 +174,7 @@ def clusters_payload(nfs_storage_payload: dict[str, Any]) -> list[dict[str, Any]
"available_cpu": 7.0,
"memory": 53248 * 10**6,
"available_memory": 49152 * 10**6,
"disk_size_gb": 150,
"disk_size_gb": 150 * 10**9,
"nvidia_gpu": 1,
"amd_gpu": 1,
},
Expand All @@ -187,7 +187,7 @@ def clusters_payload(nfs_storage_payload: dict[str, Any]) -> list[dict[str, Any]
"available_cpu": 7.0,
"memory": 53248 * 10**6,
"available_memory": 49152 * 10**6,
"disk_size_gb": 150,
"disk_size_gb": 150 * 10**9,
"nvidia_gpu": 1,
"amd_gpu": 1,
},
Expand Down Expand Up @@ -262,7 +262,7 @@ def test_valid_cluster_config(
assert orchestrator.resource_pool_types[0].available_cpu == 7.0
assert orchestrator.resource_pool_types[0].memory == 53248 * 10**6
assert orchestrator.resource_pool_types[0].available_memory == 49152 * 10**6
assert orchestrator.resource_pool_types[0].disk_gb == 150
assert orchestrator.resource_pool_types[0].disk_size == 150 * 10**9
assert orchestrator.resource_pool_types[0].nvidia_gpu is None
assert orchestrator.resource_pool_types[0].tpu == TPUResource(
ipv4_cidr_block="1.1.1.1/32",
Expand Down

0 comments on commit 0519b9b

Please sign in to comment.