Add --embd-output-format raw for plain numeric embedding output

SamMalayek · SamMalayek · commit cd96be73a375 · 2025-10-12T14:38:47.000-07:00
This new option outputs embeddings as raw space-separated floats, without JSON or 'embedding N:' prefixes. Useful for downstream vector pipelines and scripting.
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
@@ -4,6 +4,7 @@
 #include "llama.h"
 
 #include <ctime>
+#include <cstdio>
 #include <algorithm>
 
 #if defined(_MSC_VER)
@@ -70,6 +71,29 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
     }
 }
 
+// plain, pipe-friendly output: one embedding per line
+static void print_raw_embeddings(const float * emb,
+                                 int n_embd_count,
+                                 int n_embd,
+                                 const llama_model * model,
+                                 enum llama_pooling_type pooling_type,
+                                 int embd_normalize) {
+    const uint32_t n_cls_out = llama_model_n_cls_out(model);
+    const bool is_rank = (pooling_type == LLAMA_POOLING_TYPE_RANK);
+    const int cols = is_rank ? std::min<int>(n_embd, (int) n_cls_out) : n_embd;
+
+    for (int j = 0; j < n_embd_count; ++j) {
+        for (int i = 0; i < cols; ++i) {
+            if (embd_normalize == 0) {
+                printf("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
+            } else {
+                printf("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
+            }
+        }
+        printf("\n");
+    }
+}
+
 int main(int argc, char ** argv) {
     common_params params;
 
@@ -259,6 +283,10 @@ int main(int argc, char ** argv) {
     float * out = emb + e * n_embd;
     batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
 
+    if (params.embd_out == "raw") {
+        print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize);
+    }
+
     if (params.embd_out.empty()) {
         LOG("\n");