Skip to content

Commit bef194d

Browse files
committed
fix(rpc): Improve input validation and error handling
The `rpc-server` was vulnerable to Denial of Service attacks via several RPC commands (`SET_TENSOR`, `GRAPH_COMPUTE`, etc.). Malformed messages could trigger failed assertions (e.g., invalid `ggml_type`) or out-of-bounds reads/writes leading to `GGML_ABORT` calls, crashing the server process. This PR introduces robust input validation and replaces `abort()` calls with graceful error handling: - **Type Validation:** `deserialize_tensor` now checks if the `tensor->type` is within the valid `GGML_TYPE_COUNT` range *before* calling `ggml_new_tensor_4d`. Returns `nullptr` on invalid type. - **Bounds Checks:** Replaced `GGML_ABORT` in `set_tensor`, `set_tensor_hash`, and `get_tensor` handlers with error logging and returning `false` when data/offset parameters are out of buffer bounds. - **Size Checks:** Added safe arithmetic checks (for overflow) in `graph_compute` when calculating required message sizes based on client-provided `n_nodes` and `n_tensors`. Returns early if the reported sizes conflict with the actual message size or would lead to overflow. - **Error Propagation:** - `create_node` now checks for `nullptr` return values from `deserialize_tensor` and its recursive calls, propagating `nullptr` upwards on failure. Uses `find` instead of `at` for safer map access. - `copy_tensor` now checks for `nullptr` from `deserialize_tensor` and sets the response status to failure if deserialization or bounds checks fail. - `graph_compute` now checks for `nullptr` return from `create_node` and returns failure status correctly. The final return value now reflects the actual computation status. These changes improve the RPC server's resilience against malformed client requests, preventing crashes and ensuring errors are handled more gracefully. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
1 parent ab47dec commit bef194d

File tree

1 file changed

+123
-15
lines changed

1 file changed

+123
-15
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

+123-15
Original file line numberDiff line numberDiff line change
@@ -973,8 +973,21 @@ bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) {
973973
}
974974

975975
ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rpc_tensor * tensor) {
976+
// Validate tensor type before using it
977+
if (tensor->type < 0 || tensor->type >= GGML_TYPE_COUNT) {
978+
GGML_LOG_ERROR("[%s] invalid tensor type received: %u\n", __func__, tensor->type);
979+
return nullptr;
980+
}
981+
976982
ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type,
977983
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
984+
985+
// ggml_new_tensor_4d might fail if dimensions are invalid, although less likely to crash than invalid type
986+
if (result == nullptr) {
987+
GGML_LOG_ERROR("[%s] ggml_new_tensor_4d failed for type %u\\n", __func__, tensor->type);
988+
return nullptr;
989+
}
990+
978991
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
979992
result->nb[i] = tensor->nb[i];
980993
}
@@ -1034,7 +1047,10 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
10341047
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
10351048

10361049
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1037-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1050+
// Replace GGML_ABORT with logging and error return
1051+
GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu) out of buffer bounds [0x%zx, 0x%zx)\n",
1052+
__func__, in_tensor->data, offset, size, p0, p1);
1053+
return false;
10381054
}
10391055
}
10401056

@@ -1109,7 +1125,11 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
11091125
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
11101126

11111127
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
1112-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1128+
// Replace GGML_ABORT with logging and error return
1129+
GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
1130+
__func__, in_tensor->data, offset, size, *hash, p0, p1);
1131+
response.result = 0;
1132+
return false;
11131133
}
11141134
}
11151135
ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
@@ -1174,7 +1194,10 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
11741194
if (request.tensor.data + request.offset < p0 ||
11751195
request.tensor.data + request.offset >= p1 ||
11761196
request.size > (p1 - request.tensor.data - request.offset)) {
1177-
GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
1197+
// Replace GGML_ABORT with logging and error return
1198+
GGML_LOG_ERROR("[%s] requested tensor region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%" PRIu64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
1199+
__func__, request.tensor.data, request.offset, request.size, p0, p1);
1200+
return false;
11781201
}
11791202
}
11801203

@@ -1197,6 +1220,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
11971220
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
11981221
if (src == nullptr || dst == nullptr) {
11991222
GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
1223+
response.result = 0;
12001224
return false;
12011225
}
12021226

@@ -1214,6 +1238,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
12141238
dst_data + src_size,
12151239
dst_base,
12161240
dst_base + dst_buf_sz);
1241+
response.result = 0;
12171242
return false;
12181243
}
12191244

@@ -1234,43 +1259,118 @@ ggml_tensor * rpc_server::create_node(uint64_t id,
12341259
if (tensor_map.find(id) != tensor_map.end()) {
12351260
return tensor_map[id];
12361261
}
1237-
const rpc_tensor * tensor = tensor_ptrs.at(id);
1262+
// Safely find the tensor pointer
1263+
auto it_ptr = tensor_ptrs.find(id);
1264+
if (it_ptr == tensor_ptrs.end()) {
1265+
GGML_LOG_ERROR("[%s] tensor id %" PRIu64 " not found in provided tensors\n", __func__, id);
1266+
return nullptr;
1267+
}
1268+
const rpc_tensor * tensor = it_ptr->second;
1269+
12381270
struct ggml_tensor * result = deserialize_tensor(ctx, tensor);
12391271
if (result == nullptr) {
12401272
return nullptr;
12411273
}
12421274
tensor_map[id] = result;
12431275
for (int i = 0; i < GGML_MAX_SRC; i++) {
12441276
result->src[i] = create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map);
1277+
// Check if recursive call failed
1278+
if (tensor->src[i] != 0 && result->src[i] == nullptr) {
1279+
GGML_LOG_ERROR("[%s] failed to create source node %d (src_id=%" PRIu64 ") for node id %" PRIu64 "\n",
1280+
__func__, i, tensor->src[i], id);
1281+
// Must return nullptr to signal failure up the call stack
1282+
return nullptr;
1283+
}
12451284
}
12461285
result->view_src = create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map);
1286+
// Check if recursive call failed
1287+
if (tensor->view_src != 0 && result->view_src == nullptr) {
1288+
GGML_LOG_ERROR("[%s] failed to create view_src node (view_src_id=%" PRIu64 ") for node id %" PRIu64 "\n",
1289+
__func__, tensor->view_src, id);
1290+
// Must return nullptr to signal failure up the call stack
1291+
return nullptr;
1292+
}
12471293
result->view_offs = tensor->view_offs;
12481294
return result;
12491295
}
12501296

12511297
bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph_compute_rsp & response) {
12521298
// serialization format:
1253-
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) |
1254-
if (input.size() < sizeof(uint32_t)) {
1299+
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t)) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) |
1300+
1301+
// Perform robust size checks with overflow protection
1302+
const size_t min_header_size = sizeof(uint32_t);
1303+
if (input.size() < min_header_size) {
1304+
GGML_LOG_ERROR("[%s] input message too small for n_nodes header: %zu bytes\n", __func__, input.size());
1305+
response.result = GGML_STATUS_FAILED;
12551306
return false;
12561307
}
12571308
uint32_t n_nodes;
12581309
memcpy(&n_nodes, input.data(), sizeof(n_nodes));
1259-
if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t)) {
1310+
1311+
// Calculate required size for nodes array
1312+
size_t nodes_array_bytes = n_nodes * sizeof(uint64_t);
1313+
1314+
// Calculate required size up to n_tensors field safely
1315+
size_t required_size_before_tensors = min_header_size;
1316+
1317+
// Check for overflow before adding nodes_array_bytes
1318+
if (SIZE_MAX - required_size_before_tensors < nodes_array_bytes) {
1319+
GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 1: n_nodes=%u\n", __func__, n_nodes);
1320+
response.result = GGML_STATUS_FAILED; // Use correct status
1321+
return false;
1322+
}
1323+
required_size_before_tensors += nodes_array_bytes;
1324+
1325+
const size_t n_tensors_field_size = sizeof(uint32_t);
1326+
// Check for overflow before adding n_tensors_field_size
1327+
if (SIZE_MAX - required_size_before_tensors < n_tensors_field_size) {
1328+
GGML_LOG_ERROR("[%s] integer overflow calculating size before tensors step 2: n_nodes=%u\n", __func__, n_nodes);
1329+
response.result = GGML_STATUS_FAILED; // Use correct status
12601330
return false;
12611331
}
1262-
const uint64_t * nodes = (const uint64_t *)(input.data() + sizeof(n_nodes));
1332+
required_size_before_tensors += n_tensors_field_size;
1333+
1334+
if (input.size() < required_size_before_tensors) {
1335+
GGML_LOG_ERROR("[%s] input message too small for nodes array or n_tensors header: %zu bytes, needed %zu\n", __func__, input.size(), required_size_before_tensors);
1336+
response.result = GGML_STATUS_FAILED; // Use correct status
1337+
return false;
1338+
}
1339+
1340+
// Read n_tensors
1341+
const uint64_t * nodes_ptr = (const uint64_t *)(input.data() + sizeof(n_nodes));
12631342
uint32_t n_tensors;
1264-
memcpy(&n_tensors, input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t), sizeof(n_tensors));
1265-
if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t) + n_tensors*sizeof(rpc_tensor)) {
1343+
memcpy(&n_tensors, input.data() + min_header_size + nodes_array_bytes, sizeof(n_tensors));
1344+
1345+
// Calculate required size for tensors array
1346+
size_t tensors_array_bytes = n_tensors * sizeof(rpc_tensor);
1347+
1348+
// Calculate total required size safely
1349+
size_t required_total_size = required_size_before_tensors;
1350+
1351+
// Check for overflow before adding tensors_array_bytes
1352+
if (SIZE_MAX - required_total_size < tensors_array_bytes) {
1353+
GGML_LOG_ERROR("[%s] integer overflow calculating total required size: n_nodes=%u, n_tensors=%u\n", __func__, n_nodes, n_tensors);
1354+
response.result = GGML_STATUS_FAILED;
1355+
return false;
1356+
}
1357+
required_total_size += tensors_array_bytes;
1358+
1359+
if (input.size() < required_total_size) {
1360+
GGML_LOG_ERROR("[%s] input message too small for tensors array: %zu bytes, needed %zu\n", __func__, input.size(), required_total_size);
1361+
response.result = GGML_STATUS_FAILED;
12661362
return false;
12671363
}
1268-
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors));
1364+
1365+
// Pointers are now safe to use based on size checks
1366+
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + required_size_before_tensors);
12691367
GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
12701368

1271-
size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
1369+
// Estimate buffer size for context
1370+
size_t ctx_buf_size = ggml_tensor_overhead()*((size_t)n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
1371+
12721372
struct ggml_init_params params = {
1273-
/*.mem_size =*/ buf_size,
1373+
/*.mem_size =*/ ctx_buf_size,
12741374
/*.mem_buffer =*/ NULL,
12751375
/*.no_alloc =*/ true,
12761376
};
@@ -1286,12 +1386,20 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
12861386
std::unordered_map<uint64_t, ggml_tensor*> tensor_map;
12871387
for (uint32_t i = 0; i < n_nodes; i++) {
12881388
int64_t id;
1289-
memcpy(&id, &nodes[i], sizeof(id));
1389+
memcpy(&id, &nodes_ptr[i], sizeof(id));
12901390
graph->nodes[i] = create_node(id, ctx, tensor_ptrs, tensor_map);
1391+
// Check if create_node failed (e.g., due to invalid type or missing ID)
1392+
if (graph->nodes[i] == nullptr) {
1393+
GGML_LOG_ERROR("[%s] failed to create graph node %d (id=%" PRId64 ")\n", __func__, i, id);
1394+
response.result = GGML_STATUS_FAILED;
1395+
// No need to free ctx, ggml_context_ptr handles it.
1396+
return false;
1397+
}
12911398
}
12921399
ggml_status status = ggml_backend_graph_compute(backend, graph);
12931400
response.result = status;
1294-
return true;
1401+
// Return true only if computation succeeded
1402+
return status == GGML_STATUS_SUCCESS;
12951403
}
12961404

12971405
rpc_server::~rpc_server() {

0 commit comments

Comments
 (0)