llamastack · yanxi0830 · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024
diff --git a/src/llama_stack_client/lib/cli/datasets/datasets.py b/src/llama_stack_client/lib/cli/datasets/datasets.py
@@ -3,14 +3,10 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-
-import json
-from typing import Optional
-
 import click
-import yaml
 
 from .list import list_datasets
+from .register import register
 
 
 @click.group()
@@ -19,49 +15,6 @@ def datasets():
     pass
 
 
-@datasets.command()
-@click.option("--dataset-id", required=True, help="Id of the dataset")
-@click.option("--provider-id", help="Provider ID for the dataset", default=None)
-@click.option("--provider-dataset-id", help="Provider's dataset ID", default=None)
-@click.option("--metadata", type=str, help="Metadata of the dataset")
-@click.option("--url", type=str, help="URL of the dataset", required=True)
-@click.option("--schema", type=str, help="JSON schema of the dataset", required=True)
-@click.pass_context
-def register(
-    ctx,
-    dataset_id: str,
-    provider_id: Optional[str],
-    provider_dataset_id: Optional[str],
-    metadata: Optional[str],
-    url: str,
-    schema: str,
-):
-    """Create a new dataset"""
-    client = ctx.obj["client"]
-
-    try:
-        dataset_schema = json.loads(schema)
-    except json.JSONDecodeError as err:
-        raise click.BadParameter("Schema must be valid JSON") from err
-
-    if metadata:
-        try:
-            metadata = json.loads(metadata)
-        except json.JSONDecodeError as err:
-            raise click.BadParameter("Metadata must be valid JSON") from err
-
-    response = client.datasets.register(
-        dataset_id=dataset_id,
-        dataset_schema=dataset_schema,
-        url={"uri": url},
-        provider_id=provider_id,
-        provider_dataset_id=provider_dataset_id,
-        metadata=metadata,
-    )
-    if response:
-        click.echo(yaml.dump(response.dict()))
-
-
 # Register subcommands
 datasets.add_command(list_datasets)
 datasets.add_command(register)
diff --git a/src/llama_stack_client/lib/cli/datasets/register.py b/src/llama_stack_client/lib/cli/datasets/register.py
@@ -0,0 +1,77 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import base64
+import json
+import mimetypes
+import os
+from typing import Optional
+
+import click
+import yaml
+
+
+def data_url_from_file(file_path: str) -> str:
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found: {file_path}")
+
+    with open(file_path, "rb") as file:
+        file_content = file.read()
+
+    base64_content = base64.b64encode(file_content).decode("utf-8")
+    mime_type, _ = mimetypes.guess_type(file_path)
+
+    data_url = f"data:{mime_type};base64,{base64_content}"
+    return data_url
+
+
+@click.command("register")
+@click.option("--dataset-id", required=True, help="Id of the dataset")
+@click.option("--provider-id", help="Provider ID for the dataset", default=None)
+@click.option("--provider-dataset-id", help="Provider's dataset ID", default=None)
+@click.option("--metadata", type=str, help="Metadata of the dataset")
+@click.option("--url", type=str, help="URL of the dataset", required=True)
+@click.option(
+    "--dataset-path", required=False, help="Local file path to the dataset. If specified, upload dataset via URL"
+)
+@click.option("--schema", type=str, help="JSON schema of the dataset", required=True)
+@click.pass_context
+def register(
+    ctx,
+    dataset_id: str,
+    provider_id: Optional[str],
+    provider_dataset_id: Optional[str],
+    metadata: Optional[str],
+    url: str,
+    dataset_path: Optional[str],
+    schema: str,
+):
+    """Create a new dataset"""
+    client = ctx.obj["client"]
+
+    try:
+        dataset_schema = json.loads(schema)
+    except json.JSONDecodeError as err:
+        raise click.BadParameter("Schema must be valid JSON") from err
+
+    if metadata:
+        try:
+            metadata = json.loads(metadata)
+        except json.JSONDecodeError as err:
+            raise click.BadParameter("Metadata must be valid JSON") from err
+
+    if dataset_path:
+        url = data_url_from_file(dataset_path)
+
+    response = client.datasets.register(
+        dataset_id=dataset_id,
+        dataset_schema=dataset_schema,
+        url={"uri": url},
+        provider_id=provider_id,
+        provider_dataset_id=provider_dataset_id,
+        metadata=metadata,
+    )
+    if response:
+        click.echo(yaml.dump(response.dict()))
diff --git a/src/llama_stack_client/lib/cli/eval/eval.py b/src/llama_stack_client/lib/cli/eval/eval.py
@@ -8,6 +8,7 @@
 import click
 
 from .run_benchmark import run_benchmark
+from .run_scoring import run_scoring
 
 
 @click.group()
@@ -18,3 +19,4 @@ def eval():
 
 # Register subcommands
 eval.add_command(run_benchmark)
+eval.add_command(run_scoring)
diff --git a/src/llama_stack_client/lib/cli/eval/run_benchmark.py b/src/llama_stack_client/lib/cli/eval/run_benchmark.py
@@ -29,7 +29,11 @@
     help="Path to the dump eval results output directory",
 )
 @click.option(
-    "--num-examples", required=False, help="Number of examples to evaluate on, useful for debugging", default=None
+    "--num-examples",
+    required=False,
+    help="Number of examples to evaluate on, useful for debugging",
+    default=None,
+    type=int,
 )
 @click.option(
     "--visualize",
@@ -46,7 +50,7 @@ def run_benchmark(
     num_examples: Optional[int],
     visualize: bool,
 ):
-    """Run a evaluation benchmark"""
+    """Run a evaluation benchmark task"""
 
     client = ctx.obj["client"]
 

diff --git a/src/llama_stack_client/lib/cli/eval/run_scoring.py b/src/llama_stack_client/lib/cli/eval/run_scoring.py
@@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import os
+from typing import Optional
+
+import click
+import pandas
+from rich import print as rprint
+from tqdm.rich import tqdm
+
+
+@click.command("run_scoring")
+@click.argument("scoring-function-ids", nargs=-1, required=True)
+@click.option(
+    "--dataset-id",
+    required=False,
+    help="Pre-registered dataset_id to score (from llama-stack-client datasets list)",
+)
+@click.option(
+    "--dataset-path",
+    required=False,
+    help="Path to the dataset file to score",
+    type=click.Path(exists=True),
+)
+@click.option(
+    "--scoring-params-config",
+    required=False,
+    help="Path to the scoring params config file in JSON format",
+    type=click.Path(exists=True),
+)
+@click.option(
+    "--num-examples",
+    required=False,
+    help="Number of examples to evaluate on, useful for debugging",
+    default=None,
+    type=int,
+)
+@click.option(
+    "--output-dir",
+    required=True,
+    help="Path to the dump eval results output directory",
+)
+@click.option(
+    "--visualize",
+    is_flag=True,
+    default=False,
+    help="Visualize evaluation results after completion",
+)
+@click.pass_context
+def run_scoring(
+    ctx,
+    scoring_function_ids: tuple[str, ...],
+    dataset_id: Optional[str],
+    dataset_path: Optional[str],
+    scoring_params_config: Optional[str],
+    num_examples: Optional[int],
+    output_dir: str,
+    visualize: bool,
+):
+    """Run scoring from application datasets"""
+    # one of dataset_id or dataset_path is required
+    if dataset_id is None and dataset_path is None:
+        raise click.BadParameter("Specify either dataset_id (pre-registered dataset) or dataset_path (local file)")
+
+    client = ctx.obj["client"]
+
+    scoring_params = {fn_id: None for fn_id in scoring_function_ids}
+    if scoring_params_config:
+        with open(scoring_params_config, "r") as f:
+            scoring_params = json.load(f)
+
+    output_res = {}
+
+    if dataset_id is not None:
+        dataset = client.datasets.retrieve(dataset_id=dataset_id)
+        if not dataset:
+            click.BadParameter(
+                f"Dataset {dataset_id} not found. Please register using llama-stack-client datasets register"
+            )
+
+        # TODO: this will eventually be replaced with jobs polling from server vis score_bath
+        # For now, get all datasets rows via datasetio API
+        results = client.datasetio.get_rows_paginated(
+            dataset_id=dataset_id, rows_in_page=-1 if num_examples is None else num_examples
+        )
+        rows = results.rows
+
+    if dataset_path is not None:
+        df = pandas.read_csv(dataset_path)
+        rows = df.to_dict(orient="records")
+        if num_examples is not None:
+            rows = rows[:num_examples]
+
+    for r in tqdm(rows):
+        score_res = client.scoring.score(
+            input_rows=[r],
+            scoring_functions=scoring_params,
+        )
+        for k in r.keys():
+            if k not in output_res:
+                output_res[k] = []
+            output_res[k].append(r[k])
+
+        for fn_id in scoring_function_ids:
+            if fn_id not in output_res:
+                output_res[fn_id] = []
+            output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
+
+    # Create output directory if it doesn't exist
+    os.makedirs(output_dir, exist_ok=True)
+    output_file = os.path.join(output_dir, f"{dataset_id}_score_results.csv")
+    df = pandas.DataFrame(output_res)
+    df.to_csv(output_file, index=False)
+
+    rprint(f"[green]✓[/green] Results saved to: [blue]{output_file}[/blue]!\n")