diff --git a/src/llama_client_slot.h b/src/llama_client_slot.h
index 877aeee6..129dd7eb 100644
--- a/src/llama_client_slot.h
+++ b/src/llama_client_slot.h
@@ -38,13 +38,12 @@ static bool server_verbose = false;
 
 using json = nlohmann::json;
 
-// TODO: can become bool if we can't find use of more states
-enum class SlotState {
+enum class SlotState: uint8_t {
   IDLE,
   PROCESSING,
 };
 
-enum class SlotCommand {
+enum class SlotCommand: uint8_t {
   NONE,
   LOAD_PROMPT,
   RELEASE,
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
index 9acb4aac..a6e71ec1 100644
--- a/src/llama_engine.cc
+++ b/src/llama_engine.cc
@@ -10,7 +10,7 @@ constexpr const int k400BadRequest = 400;
 constexpr const int k409Conflict = 409;
 constexpr const int k500InternalServerError = 500;
 
-enum class InferenceStatus { PENDING, RUNNING, EOS, FINISHED };
+enum class InferenceStatus: uint8_t { PENDING, RUNNING, EOS, FINISHED };
 struct inferenceState {
   int task_id;
   InferenceStatus inference_status = InferenceStatus::PENDING;
diff --git a/src/llama_server_context.h b/src/llama_server_context.h
index b8caab80..9c0eaed6 100644
--- a/src/llama_server_context.h
+++ b/src/llama_server_context.h
@@ -62,7 +62,7 @@ enum class StopType : uint8_t {
   STOP_PARTIAL,
 };
 
-enum class ModelType { LLM = 0, EMBEDDING };
+enum class ModelType: uint8_t { LLM = 0, EMBEDDING };
 
 // TODO: reuse llama_detokenize
 template <class Iter>