Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 5ca8257

Browse files
Merge pull request #1625 from janhq/chore/models-api
chore: Running Models API reference
2 parents 5338a78 + 9cc9e5d commit 5ca8257

File tree

1 file changed

+233
-19
lines changed

1 file changed

+233
-19
lines changed

docs/static/openapi/cortex.json

Lines changed: 233 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@
156156
"get": {
157157
"operationId": "HealthController_check",
158158
"summary": "Check health",
159-
"description": "Checks the health of the application.",
159+
"description": "Performs a comprehensive check of the application's health status.",
160160
"parameters": [],
161161
"responses": {
162162
"200": {
@@ -175,7 +175,7 @@
175175
"delete": {
176176
"operationId": "Terminate server process",
177177
"summary": "Terminate server",
178-
"description": "Terminate server process.",
178+
"description": "Initiates the shutdown process for the server, ensuring that all active connections are gracefully closed and any ongoing processes are properly terminated.",
179179
"parameters": [],
180180
"responses": {
181181
"200": {
@@ -329,12 +329,12 @@
329329
}
330330
},
331331
"tags": [
332-
"Models"
332+
"Pulling Models"
333333
]
334334
},
335335
"delete": {
336336
"tags": [
337-
"Models"
337+
"Pulling Models"
338338
],
339339
"summary": "Stop model download",
340340
"description": "Stops the download of a model with the corresponding taskId provided in the request body",
@@ -449,15 +449,15 @@
449449
}
450450
},
451451
"tags": [
452-
"Models"
452+
"Running Models"
453453
]
454454
}
455455
},
456456
"/v1/models/start": {
457457
"post": {
458458
"operationId": "ModelsController_startModel",
459459
"summary": "Start model",
460-
"description": "Load a model into memory.",
460+
"description": "Load a model into memory. Note: Request body parameters will override those loaded from model.yml",
461461
"requestBody": {
462462
"required": true,
463463
"content": {
@@ -484,7 +484,7 @@
484484
}
485485
},
486486
"tags": [
487-
"Models"
487+
"Running Models"
488488
]
489489
}
490490
},
@@ -498,7 +498,7 @@
498498
"content": {
499499
"application/json": {
500500
"schema": {
501-
"$ref": "#/components/schemas/ModelStartDto"
501+
"$ref": "#/components/schemas/ModelStopDto"
502502
},
503503
"example": {
504504
"model": "llama3:8b-gguf-q6-k"
@@ -519,7 +519,7 @@
519519
}
520520
},
521521
"tags": [
522-
"Models"
522+
"Running Models"
523523
]
524524
}
525525
},
@@ -552,7 +552,7 @@
552552
}
553553
},
554554
"tags": [
555-
"Models"
555+
"Running Models"
556556
]
557557
},
558558
"delete": {
@@ -583,7 +583,7 @@
583583
}
584584
},
585585
"tags": [
586-
"Models"
586+
"Running Models"
587587
]
588588
}
589589
},
@@ -625,7 +625,7 @@
625625
}
626626
},
627627
"tags": [
628-
"Models"
628+
"Running Models"
629629
]
630630
}
631631
},
@@ -668,7 +668,7 @@
668668
}
669669
},
670670
"tags": [
671-
"Models"
671+
"Pulling Models"
672672
]
673673
}
674674
},
@@ -1368,8 +1368,12 @@
13681368
"description": "These endpoints manage the lifecycle of an Assistant within a conversation thread."
13691369
},
13701370
{
1371-
"name": "Models",
1372-
"description": "These endpoints provide a list and descriptions of all available models within the Cortex framework."
1371+
"name": "Pulling Models",
1372+
"description": "These endpoints handle downloading and importing models."
1373+
},
1374+
{
1375+
"name": "Running Models",
1376+
"description": "These endpoints support a range of operations that allow users to effectively control and interact with their models"
13731377
},
13741378
{
13751379
"name": "Server",
@@ -1399,7 +1403,8 @@
13991403
"Chat",
14001404
"Engines",
14011405
"Events",
1402-
"Models",
1406+
"Pulling Models",
1407+
"Running Models",
14031408
"Processes",
14041409
"Status",
14051410
"Server"
@@ -3235,8 +3240,60 @@
32353240
"type": "string",
32363241
"example": "llama3:8b-gguf-q6-k",
32373242
"description": "A downloaded model name."
3243+
},
3244+
"ctx_len": {
3245+
"type": "number",
3246+
"description": "The context length for model operations varies; the maximum depends on the specific model used.",
3247+
"example": 4096
3248+
},
3249+
"ngl": {
3250+
"type": "number",
3251+
"description": "Determines GPU layer usage.",
3252+
"example": 32
3253+
},
3254+
"n_parallel": {
3255+
"type": "number",
3256+
"minimum": 1,
3257+
"description": "Number of parallel processing units to use.",
3258+
"example": 1
3259+
},
3260+
"cache_type": {
3261+
"type": "string",
3262+
"description": "KV cache type: f16, q8_0, q4_0, default is f16",
3263+
"example": "f16"
3264+
},
3265+
"caching_enabled": {
3266+
"type": "boolean",
3267+
"description": "To enable prompt caching or not",
3268+
"example": true
3269+
},
3270+
"model_path": {
3271+
"type": "string",
3272+
"description": "Local path to LLM model file",
3273+
"example": "/tmp/model.gguf"
3274+
},
3275+
"mmproj": {
3276+
"type": "string",
3277+
"description": "Local path to mmproj model file",
3278+
"example": "/tmp/model.gguf"
32383279
}
3239-
}
3280+
},
3281+
"required": [
3282+
"model"
3283+
]
3284+
},
3285+
"ModelStopDto": {
3286+
"type": "object",
3287+
"properties": {
3288+
"model": {
3289+
"type": "string",
3290+
"example": "llama3:8b-gguf-q6-k",
3291+
"description": "A downloaded model name."
3292+
}
3293+
},
3294+
"required": [
3295+
"model"
3296+
]
32403297
},
32413298
"ImportModelRequest": {
32423299
"type": "object",
@@ -3256,7 +3313,10 @@
32563313
"option": {
32573314
"type": "string",
32583315
"description": "Import options such as symlink or copy.",
3259-
"enum": ["symlink", "copy"]
3316+
"enum": [
3317+
"symlink",
3318+
"copy"
3319+
]
32603320
}
32613321
},
32623322
"required": [
@@ -3551,7 +3611,161 @@
35513611
},
35523612
"UpdateModelDto": {
35533613
"type": "object",
3554-
"properties": {}
3614+
"properties": {
3615+
"files": {
3616+
"type": "array",
3617+
"description": "List of file paths associated with the model. Can be relative or absolute.",
3618+
"items": {
3619+
"type": "string",
3620+
"example": "models\\cortex.so\\tinyllama\\1b-gguf\\model.gguf"
3621+
}
3622+
},
3623+
"stop": {
3624+
"type": "array",
3625+
"description": "Tokens that signal the end of generation.",
3626+
"items": {
3627+
"type": "string"
3628+
},
3629+
"example": [
3630+
"</s>"
3631+
]
3632+
},
3633+
"stream": {
3634+
"type": "boolean",
3635+
"description": "Whether to stream the output as it is generated.",
3636+
"example": true
3637+
},
3638+
"top_p": {
3639+
"type": "number",
3640+
"description": "Controls nucleus sampling; the model considers the results of the tokens with top_p probability mass.",
3641+
"example": 0.95
3642+
},
3643+
"temperature": {
3644+
"type": "number",
3645+
"description": "Controls randomness in token selection; lower values make the output more deterministic.",
3646+
"example": 0.7
3647+
},
3648+
"frequency_penalty": {
3649+
"type": "number",
3650+
"description": "Penalizes repeated tokens based on their frequency.",
3651+
"example": 0
3652+
},
3653+
"presence_penalty": {
3654+
"type": "number",
3655+
"description": "Penalizes tokens that have already appeared in the output.",
3656+
"example": 0
3657+
},
3658+
"max_tokens": {
3659+
"type": "integer",
3660+
"description": "Maximum number of tokens to generate.",
3661+
"example": 4096
3662+
},
3663+
"seed": {
3664+
"type": "integer",
3665+
"description": "Seed for random number generation to ensure reproducibility; -1 for random seed.",
3666+
"example": -1
3667+
},
3668+
"dynatemp_range": {
3669+
"type": "number",
3670+
"description": "Range for dynamic temperature adjustment.",
3671+
"example": 0
3672+
},
3673+
"dynatemp_exponent": {
3674+
"type": "number",
3675+
"description": "Exponent for dynamic temperature adjustment.",
3676+
"example": 1
3677+
},
3678+
"top_k": {
3679+
"type": "integer",
3680+
"description": "Limits the sampling pool to the top_k most probable tokens.",
3681+
"example": 40
3682+
},
3683+
"min_p": {
3684+
"type": "number",
3685+
"description": "Minimum probability threshold for token selection.",
3686+
"example": 0.05
3687+
},
3688+
"tfs_z": {
3689+
"type": "number",
3690+
"description": "Threshold for token frequency sampling.",
3691+
"example": 1
3692+
},
3693+
"typ_p": {
3694+
"type": "number",
3695+
"description": "Controls typical sampling; similar to top_p but focuses on local token distribution.",
3696+
"example": 1
3697+
},
3698+
"repeat_last_n": {
3699+
"type": "integer",
3700+
"description": "Number of recent tokens to consider for repetition penalty.",
3701+
"example": 64
3702+
},
3703+
"repeat_penalty": {
3704+
"type": "number",
3705+
"description": "Penalty applied to repeated tokens.",
3706+
"example": 1
3707+
},
3708+
"mirostat": {
3709+
"type": "boolean",
3710+
"description": "Enables or disables Mirostat sampling.",
3711+
"example": false
3712+
},
3713+
"mirostat_tau": {
3714+
"type": "number",
3715+
"description": "Target entropy for Mirostat sampling.",
3716+
"example": 5
3717+
},
3718+
"mirostat_eta": {
3719+
"type": "number",
3720+
"description": "Learning rate for Mirostat sampling.",
3721+
"example": 0.1
3722+
},
3723+
"penalize_nl": {
3724+
"type": "boolean",
3725+
"description": "Whether to penalize newline tokens.",
3726+
"example": false
3727+
},
3728+
"ignore_eos": {
3729+
"type": "boolean",
3730+
"description": "Whether to ignore end-of-sequence tokens during generation.",
3731+
"example": false
3732+
},
3733+
"n_probs": {
3734+
"type": "integer",
3735+
"description": "Number of probabilities to consider for each token.",
3736+
"example": 0
3737+
},
3738+
"min_keep": {
3739+
"type": "integer",
3740+
"description": "Minimum number of tokens to keep in the buffer.",
3741+
"example": 0
3742+
},
3743+
"engine": {
3744+
"type": "string",
3745+
"description": "The engine used to run the model.",
3746+
"example": "llama-cpp"
3747+
},
3748+
"prompt_template": {
3749+
"type": "string",
3750+
"description": "Template used for formatting prompts.",
3751+
"example": "\n\n<|system|>\n{system_message}</s>\n\n\n\n\n<|user|>\n{prompt}</s>\n\n\n<|assistant|>\n\n"
3752+
},
3753+
"ctx_len": {
3754+
"type": "integer",
3755+
"description": "Context length for the model.",
3756+
"example": 4096
3757+
},
3758+
"n_parallel": {
3759+
"type": "integer",
3760+
"description": "Number of parallel threads for execution.",
3761+
"example": 1
3762+
},
3763+
"ngl": {
3764+
"type": "integer",
3765+
"description": "Number of GPU layers.",
3766+
"example": 33
3767+
}
3768+
}
35553769
},
35563770
"DeleteModelResponseDto": {
35573771
"type": "object",

0 commit comments

Comments
 (0)