diff --git a/src/llama_client_slot.h b/src/llama_client_slot.h index 877aeee6..129dd7eb 100644 --- a/src/llama_client_slot.h +++ b/src/llama_client_slot.h @@ -38,13 +38,12 @@ static bool server_verbose = false; using json = nlohmann::json; -// TODO: can become bool if we can't find use of more states -enum class SlotState { +enum class SlotState: uint8_t { IDLE, PROCESSING, }; -enum class SlotCommand { +enum class SlotCommand: uint8_t { NONE, LOAD_PROMPT, RELEASE, diff --git a/src/llama_engine.cc b/src/llama_engine.cc index 9acb4aac..a6e71ec1 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -10,7 +10,7 @@ constexpr const int k400BadRequest = 400; constexpr const int k409Conflict = 409; constexpr const int k500InternalServerError = 500; -enum class InferenceStatus { PENDING, RUNNING, EOS, FINISHED }; +enum class InferenceStatus: uint8_t { PENDING, RUNNING, EOS, FINISHED }; struct inferenceState { int task_id; InferenceStatus inference_status = InferenceStatus::PENDING; diff --git a/src/llama_server_context.h b/src/llama_server_context.h index b8caab80..9c0eaed6 100644 --- a/src/llama_server_context.h +++ b/src/llama_server_context.h @@ -62,7 +62,7 @@ enum class StopType : uint8_t { STOP_PARTIAL, }; -enum class ModelType { LLM = 0, EMBEDDING }; +enum class ModelType: uint8_t { LLM = 0, EMBEDDING }; // TODO: reuse llama_detokenize template