Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ piped_mlx.egg-info/
.coverage

.idea/
.vscode/
.DS_Store
.ruff_cache/
.logs/
5 changes: 5 additions & 0 deletions .vscode/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*

# ignore everything but itself & tasks
!.gitignore
!tasks.json
101 changes: 101 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "Start API (8080 + 58080)",
"type": "shell",
"command": "uv",
"args": [
"run",
"dnet-api",
"--http-port",
"8080",
"--grpc-port",
"58080"
],
"group": "build",
"presentation": {
"echo": true,
"reveal": "always",
"focus": false,
"panel": "new",
"showReuseMessage": true,
"clear": false
}
},
{
"label": "Start Shard (8081 + 58081)",
"type": "shell",
"command": "uv",
"args": [
"run",
"dnet-shard",
"--http-port",
"8081",
"--grpc-port",
"58081"
],
"group": "build",
"presentation": {
"echo": true,
"reveal": "always",
"focus": false,
"panel": "new",
"showReuseMessage": true,
"clear": false
}
},
{
"label": "Health (Shard 8081)",
"type": "shell",
"command": "curl -s http://localhost:8081/health -H \"Content-Type: application/json\" | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
},
{
"label": "Start Shard (8082 + 58082)",
"type": "shell",
"command": "uv",
"args": [
"run",
"dnet-shard",
"--http-port",
"8082",
"--grpc-port",
"58082"
],
"group": "build",
"presentation": {
"echo": true,
"reveal": "always",
"focus": false,
"panel": "new",
"showReuseMessage": true,
"clear": false
}
},
{
"label": "Prepare Topology (Qwen/Qwen3-4B-MLX-4bit)",
"type": "shell",
"command": "curl -X POST http://localhost:8080/v1/prepare_topology -H \"Content-Type: application/json\" -d '{ \"model\": \"Qwen/Qwen3-4B-MLX-4bit\" }' | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
},
{
"label": "Prepare & Load (Qwen/Qwen3-4B-MLX-4bit)",
"type": "shell",
"command": "uv run ./scripts/prepare_model.py Qwen/Qwen3-4B-MLX-4bit"
},
{
"label": "Get API Devices",
"type": "shell",
"command": "curl -s http://localhost:8080/v1/devices -H \"Content-Type: application/json\" | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
},
{
"label": "Get API Topology",
"type": "shell",
"command": "curl -s http://localhost:8080/v1/topology -H \"Content-Type: application/json\" | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
},
{
"label": "Chat Completions (Qwen/Qwen3-4B-MLX-4bit)",
"type": "shell",
"command": "curl -X POST http://localhost:8080/v1/chat/completions -H \"Content-Type: application/json\" -d '{\"model\":\"Qwen/Qwen3-4B-MLX-4bit\", \"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}], \"max_tokens\": 100}' | bun -p \"Bun.inspect(await Bun.stdin.json(), { colors: true })\""
}
]
}
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ format:
protos:
uv run ./scripts/generate_protos.py

.PHONY: update # | Update git submodules
.PHONY: update # | Update git submodules
update:
git submodule update --init --recursive

.PHONY: test # | Run tests
test:
uv run pytest -v

.PHONY: help # | List targets
help:
@grep '^.PHONY: .* #' Makefile | sed 's/\.PHONY: \(.*\) # \(.*\)/\1 \2/' | expand -t20
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,26 @@ curl http://localhost:8080/v1/devices \

## Testing

You can lint the code using Ruff:
You can run Pytest tests via:

```sh
uv run pytest -v
```

You can check linting and formatting via Ruff:

```sh
# lint
uvx ruff check

# format
uvx ruff format --diff
```

> [!TIP]
>
> If you are using VsCode, we have prepared [tasks](./.vscode/tasks.json) that you can run easily from the <kbd> Command Palette > Tasks: Run Task </kbd>.

## License

You can find the license [here](./LICENSE).
2 changes: 1 addition & 1 deletion lib/dperf
Submodule dperf updated from baa3a1 to 8e2b13
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ requires = ["uv_build>=0.8.17,<0.9.0"]
build-backend = "uv_build"

[tool.pytest.ini_options]
python_files = ["dnet/**/*_test.py", "tests/*.py"]
python_files = ["src/dnet/**/*_test.py", "tests/*.py"]
python_functions = ["test_"]
log_cli = true

[tool.ruff]
exclude = [".git", ".venv", "__pycache__", "build", "dist", "lib"]
line-length = 88 # black
line-length = 88 # black
14 changes: 8 additions & 6 deletions src/dnet/ring/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,17 @@ class ChatBaseParams(BaseModel):
class ChatParams(ChatBaseParams):
"""Extended parameters for chat requests."""

stream: Optional[bool] = False
max_tokens: Optional[int] = Field(default=100, ge=0)
logit_bias: Optional[Dict[int, float]] = Field(default_factory=dict)
logprobs: Optional[int] = Field(default=-1)
stop: Optional[Union[str, List[str]]] = []
profile: Optional[bool] = False
stream: bool = Field(default=False)
max_tokens: int = Field(default=100, ge=0)
logit_bias: Dict[int, float] = Field(default_factory=dict)
logprobs: int = Field(default=-1)
stop: Union[str, List[str]] = Field(default_factory=list)
profile: bool = Field(default=False)

def __init__(self, **data: Any):
super().__init__(**data)

# FIXME: why do this?
if isinstance(self.stop, str):
self.stop = [self.stop]

Expand Down
Loading