Add e2e tests for embedding raw flag

SamMalayek · SamMalayek · commit 015351e4112b · 2025-11-02T07:47:47.000-08:00
diff --git a/.github/workflows/embeddings.yml b/.github/workflows/embeddings.yml
@@ -0,0 +1,59 @@
+# Embedding CLI build and tests
+name: Embedding CLI
+
+on:
+    workflow_dispatch:
+    push:
+        branches:
+            - feature/*
+            - master
+        paths:
+            - '.github/workflows/embeddings.yml'
+            - 'examples/embedding/**'
+            - 'examples/tests/**'
+    pull_request:
+        types: [opened, synchronize, reopened]
+        paths:
+            - '.github/workflows/embeddings.yml'
+            - 'examples/embedding/**'
+            - 'examples/tests/**'
+
+jobs:
+    embedding-cli-tests:
+        runs-on: ubuntu-latest
+
+        steps:
+            - name: Install system deps
+              run: |
+                  sudo apt-get update
+                  sudo apt-get -y install \
+                    build-essential \
+                    cmake \
+                    curl \
+                    libcurl4-openssl-dev \
+                    python3-pip
+
+            - name: Checkout repository
+              uses: actions/checkout@v4
+              with:
+                  fetch-depth: 0
+
+            - name: Set up Python
+              uses: actions/setup-python@v5
+              with:
+                  python-version: '3.11'
+
+            - name: Install Python deps
+              run: |
+                  pip install -r requirements.txt || echo "No extra requirements found"
+                  pip install pytest
+
+            - name: Build llama-embedding
+              run: |
+                  cmake -B build \
+                    -DCMAKE_BUILD_TYPE=Release
+                  cmake --build build --target llama-embedding -j $(nproc)
+
+            - name: Run embedding tests
+              run: |
+                  pytest -v examples/tests
diff --git a/examples/tests/__init__.py b/examples/tests/__init__.py
diff --git a/examples/tests/test_embedding.py b/examples/tests/test_embedding.py
@@ -0,0 +1,112 @@
+import json, os, subprocess
+from pathlib import Path
+import numpy as np
+
+
+# ---------------------------------------------------------------------------
+# Model helpers
+# ---------------------------------------------------------------------------
+
+def get_model_hf_params():
+    """Default lightweight embedding model."""
+    return {
+        "hf_repo": "ggml-org/embeddinggemma-300M-qat-q4_0-GGUF",
+        "hf_file": "embeddinggemma-300M-qat-Q4_0.gguf",
+    }
+
+
+def ensure_model_downloaded(params=None):
+    """Ensures the embedding model is cached locally."""
+    repo_root = Path(__file__).resolve().parents[2]
+    emb_path = repo_root / "build/bin/llama-embedding"
+    if os.name == "nt" and not emb_path.exists():
+        emb_path = repo_root / "build/bin/Release/llama-embedding.exe"
+    if not emb_path.exists():
+        raise FileNotFoundError(f"llama-embedding not found at {emb_path}")
+
+    params = params or get_model_hf_params()
+    env = {**os.environ, "LLAMA_CACHE": os.environ.get("LLAMA_CACHE", "tmp")}
+
+    cmd = [
+        str(emb_path),
+        "-hfr", params["hf_repo"],
+        "-hff", params["hf_file"],
+        "--ctx-size", "16",
+        "--embd-output-format", "json",
+        "--no-warmup",
+        "--threads", "1",
+    ]
+    result = subprocess.run(cmd, input="ok", capture_output=True, text=True, env=env)
+    if result.returncode:
+        raise RuntimeError(f"Model download failed:\n{result.stderr}")
+    return params
+
+
+def run_embedding(text, fmt="raw", params=None):
+    """Runs llama-embedding and returns stdout."""
+    repo_root = Path(__file__).resolve().parents[2]
+    exe = repo_root / "build/bin/llama-embedding"
+    assert exe.exists(), f"Missing binary: {exe}"
+
+    params = ensure_model_downloaded(params)
+    env = {**os.environ, "LLAMA_CACHE": os.environ.get("LLAMA_CACHE", "tmp")}
+    cmd = [
+        str(exe),
+        "-hfr", params["hf_repo"],
+        "-hff", params["hf_file"],
+        "--ctx-size", "2048",
+        "--embd-output-format", fmt,
+    ]
+    out = subprocess.run(cmd, input=text, capture_output=True, text=True, env=env)
+    if out.returncode:
+        raise AssertionError(f"embedding failed ({out.returncode}):\n{out.stderr}")
+    return out.stdout.strip()
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+def test_embedding_raw_and_json_consistency():
+    """Compare raw vs JSON embedding output."""
+    out_raw = run_embedding("hello world", "raw")
+    floats_raw = np.array(out_raw.split(), float)
+    floats_json = np.array(json.loads(run_embedding("hello world", "json"))["data"][0]["embedding"])
+
+    assert len(floats_raw) == len(floats_json)
+    cos = np.dot(floats_raw, floats_json) / (np.linalg.norm(floats_raw) * np.linalg.norm(floats_json))
+    assert cos > 0.999, f"Unexpected divergence between raw and JSON output ({cos:.4f})"
+
+
+def test_embedding_empty_input():
+    """Ensure empty input is deterministic and finite."""
+    out1 = np.array(run_embedding("", "raw").split(), float)
+    out2 = np.array(run_embedding("", "raw").split(), float)
+
+    norm = np.linalg.norm(out1)
+    assert len(out1) and np.all(np.isfinite(out1))
+    assert 0.1 < norm < 10
+    cos = np.dot(out1, out2) / (np.linalg.norm(out1) * np.linalg.norm(out2))
+    assert cos > 0.9999, f"Empty input not deterministic (cos={cos:.4f})"
+
+
+def test_embedding_very_long_input():
+    """Stress test: very long input within context window."""
+    text = "lorem " * 2000
+    floats = np.array(run_embedding(text, "raw").split(), float)
+    assert len(floats) > 100 and np.isfinite(np.linalg.norm(floats))
+
+
+def test_embedding_output_shape():
+    """Basic embedding sanity check."""
+    floats = np.array(run_embedding("hello world", "raw").split(), float)
+    assert len(floats) > 100 and 0.5 < np.linalg.norm(floats) < 2.0
+
+
+def test_embedding_invalid_flag():
+    """Invalid flag should produce non-zero exit and error output."""
+    repo_root = Path(__file__).resolve().parents[2]
+    exe = repo_root / "build/bin/llama-embedding"
+    result = subprocess.run([str(exe), "--no-such-flag"], capture_output=True, text=True)
+    assert result.returncode != 0
+    assert any(k in result.stderr.lower() for k in ("error", "invalid", "unknown"))