From 2a5b7383fe553502c884031e12249dd5fe786356 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 15 Nov 2024 12:17:37 -0500 Subject: [PATCH 1/6] scoring cli --- .../lib/cli/datasets/datasets.py | 49 +--------- .../lib/cli/datasets/register.py | 77 ++++++++++++++++ src/llama_stack_client/lib/cli/eval/eval.py | 2 + .../lib/cli/eval/run_benchmark.py | 2 +- .../lib/cli/eval/run_scoring.py | 91 +++++++++++++++++++ 5 files changed, 172 insertions(+), 49 deletions(-) create mode 100644 src/llama_stack_client/lib/cli/datasets/register.py create mode 100644 src/llama_stack_client/lib/cli/eval/run_scoring.py diff --git a/src/llama_stack_client/lib/cli/datasets/datasets.py b/src/llama_stack_client/lib/cli/datasets/datasets.py index 3f24b911..a8197d2a 100644 --- a/src/llama_stack_client/lib/cli/datasets/datasets.py +++ b/src/llama_stack_client/lib/cli/datasets/datasets.py @@ -3,14 +3,10 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -import json -from typing import Optional - import click -import yaml from .list import list_datasets +from .register import register @click.group() @@ -19,49 +15,6 @@ def datasets(): pass -@datasets.command() -@click.option("--dataset-id", required=True, help="Id of the dataset") -@click.option("--provider-id", help="Provider ID for the dataset", default=None) -@click.option("--provider-dataset-id", help="Provider's dataset ID", default=None) -@click.option("--metadata", type=str, help="Metadata of the dataset") -@click.option("--url", type=str, help="URL of the dataset", required=True) -@click.option("--schema", type=str, help="JSON schema of the dataset", required=True) -@click.pass_context -def register( - ctx, - dataset_id: str, - provider_id: Optional[str], - provider_dataset_id: Optional[str], - metadata: Optional[str], - url: str, - schema: str, -): - """Create a new dataset""" - client = ctx.obj["client"] - - try: - dataset_schema = json.loads(schema) - except json.JSONDecodeError as err: - raise click.BadParameter("Schema must be valid JSON") from err - - if metadata: - try: - metadata = json.loads(metadata) - except json.JSONDecodeError as err: - raise click.BadParameter("Metadata must be valid JSON") from err - - response = client.datasets.register( - dataset_id=dataset_id, - dataset_schema=dataset_schema, - url={"uri": url}, - provider_id=provider_id, - provider_dataset_id=provider_dataset_id, - metadata=metadata, - ) - if response: - click.echo(yaml.dump(response.dict())) - - # Register subcommands datasets.add_command(list_datasets) datasets.add_command(register) diff --git a/src/llama_stack_client/lib/cli/datasets/register.py b/src/llama_stack_client/lib/cli/datasets/register.py new file mode 100644 index 00000000..5887cf42 --- /dev/null +++ b/src/llama_stack_client/lib/cli/datasets/register.py @@ -0,0 +1,77 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +import base64 +import json +import mimetypes +import os +from typing import Optional + +import click +import yaml + + +def data_url_from_file(file_path: str) -> str: + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + with open(file_path, "rb") as file: + file_content = file.read() + + base64_content = base64.b64encode(file_content).decode("utf-8") + mime_type, _ = mimetypes.guess_type(file_path) + + data_url = f"data:{mime_type};base64,{base64_content}" + return data_url + + +@click.command("register") +@click.option("--dataset-id", required=True, help="Id of the dataset") +@click.option("--provider-id", help="Provider ID for the dataset", default=None) +@click.option("--provider-dataset-id", help="Provider's dataset ID", default=None) +@click.option("--metadata", type=str, help="Metadata of the dataset") +@click.option("--url", type=str, help="URL of the dataset", required=True) +@click.option( + "--dataset-path", required=False, help="Local file path to the dataset. If specified, upload dataset via URL" +) +@click.option("--schema", type=str, help="JSON schema of the dataset", required=True) +@click.pass_context +def register( + ctx, + dataset_id: str, + provider_id: Optional[str], + provider_dataset_id: Optional[str], + metadata: Optional[str], + url: str, + dataset_path: Optional[str], + schema: str, +): + """Create a new dataset""" + client = ctx.obj["client"] + + try: + dataset_schema = json.loads(schema) + except json.JSONDecodeError as err: + raise click.BadParameter("Schema must be valid JSON") from err + + if metadata: + try: + metadata = json.loads(metadata) + except json.JSONDecodeError as err: + raise click.BadParameter("Metadata must be valid JSON") from err + + if dataset_path: + url = data_url_from_file(dataset_path) + + response = client.datasets.register( + dataset_id=dataset_id, + dataset_schema=dataset_schema, + url={"uri": url}, + provider_id=provider_id, + provider_dataset_id=provider_dataset_id, + metadata=metadata, + ) + if response: + click.echo(yaml.dump(response.dict())) diff --git a/src/llama_stack_client/lib/cli/eval/eval.py b/src/llama_stack_client/lib/cli/eval/eval.py index 6ff9aa2b..6f2e1c66 100644 --- a/src/llama_stack_client/lib/cli/eval/eval.py +++ b/src/llama_stack_client/lib/cli/eval/eval.py @@ -8,6 +8,7 @@ import click from .run_benchmark import run_benchmark +from .run_scoring import run_scoring @click.group() @@ -18,3 +19,4 @@ def eval(): # Register subcommands eval.add_command(run_benchmark) +eval.add_command(run_scoring) diff --git a/src/llama_stack_client/lib/cli/eval/run_benchmark.py b/src/llama_stack_client/lib/cli/eval/run_benchmark.py index 9cde1fb1..aa8e1cb0 100644 --- a/src/llama_stack_client/lib/cli/eval/run_benchmark.py +++ b/src/llama_stack_client/lib/cli/eval/run_benchmark.py @@ -46,7 +46,7 @@ def run_benchmark( num_examples: Optional[int], visualize: bool, ): - """Run a evaluation benchmark""" + """Run a evaluation benchmark task""" client = ctx.obj["client"] diff --git a/src/llama_stack_client/lib/cli/eval/run_scoring.py b/src/llama_stack_client/lib/cli/eval/run_scoring.py new file mode 100644 index 00000000..f0e0333c --- /dev/null +++ b/src/llama_stack_client/lib/cli/eval/run_scoring.py @@ -0,0 +1,91 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +import os +from typing import Optional + +import click +import pandas +from rich import print as rprint +from tqdm.rich import tqdm + + +@click.command("run_scoring") +@click.argument("scoring-function-ids", nargs=-1, required=True) +@click.option( + "--dataset-id", + required=True, + help="Pre-registered dataset_id to score (from llama-stack-client datasets list)", +) +@click.option( + "--scoring-params-config", + required=False, + help="Path to the scoring params config file in JSON format", + type=click.Path(exists=True), +) +@click.option( + "--output-dir", + required=True, + help="Path to the dump eval results output directory", +) +@click.option( + "--visualize", + is_flag=True, + default=False, + help="Visualize evaluation results after completion", +) +@click.pass_context +def run_scoring( + ctx, + scoring_function_ids: tuple[str, ...], + dataset_id: str, + scoring_params_config: Optional[str], + output_dir: str, + visualize: bool, +): + """Run scoring from application datasets""" + client = ctx.obj["client"] + + scoring_params = {fn_id: None for fn_id in scoring_function_ids} + if scoring_params_config: + with open(scoring_params_config, "r") as f: + scoring_params = json.load(f) + print(scoring_params) + + dataset = client.datasets.retrieve(dataset_id=dataset_id) + if not dataset: + click.BadParameter( + f"Dataset {dataset_id} not found. Please register using llama-stack-client datasets register" + ) + + output_res = {} + + rows = client.datasetio.get_rows_paginated(dataset_id=dataset_id, rows_in_page=-1) + for r in tqdm(rows.rows): + score_res = client.scoring.score( + input_rows=[r], + scoring_functions=scoring_params, + ) + for k in r.keys(): + if k not in output_res: + output_res[k] = [] + output_res[k].append(r[k]) + + for fn_id in scoring_function_ids: + if fn_id not in output_res: + output_res[fn_id] = [] + output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) + + break + + # Create output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + output_file = os.path.join(output_dir, f"{dataset_id}_score_results.csv") + df = pandas.DataFrame(output_res) + df.to_csv(output_file, index=False) + + rprint(f"[green]✓[/green] Results saved to: [blue]{output_file}[/blue]!\n") From f13ce6f7c212c9477eb2be7bbad7473766612d87 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 15 Nov 2024 14:05:23 -0500 Subject: [PATCH 2/6] scoring cli --- src/llama_stack_client/lib/cli/eval/run_scoring.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/llama_stack_client/lib/cli/eval/run_scoring.py b/src/llama_stack_client/lib/cli/eval/run_scoring.py index f0e0333c..9c1c936a 100644 --- a/src/llama_stack_client/lib/cli/eval/run_scoring.py +++ b/src/llama_stack_client/lib/cli/eval/run_scoring.py @@ -27,6 +27,9 @@ help="Path to the scoring params config file in JSON format", type=click.Path(exists=True), ) +@click.option( + "--num-examples", required=False, help="Number of examples to evaluate on, useful for debugging", default=None +) @click.option( "--output-dir", required=True, @@ -44,6 +47,7 @@ def run_scoring( scoring_function_ids: tuple[str, ...], dataset_id: str, scoring_params_config: Optional[str], + num_examples: Optional[int], output_dir: str, visualize: bool, ): @@ -54,7 +58,6 @@ def run_scoring( if scoring_params_config: with open(scoring_params_config, "r") as f: scoring_params = json.load(f) - print(scoring_params) dataset = client.datasets.retrieve(dataset_id=dataset_id) if not dataset: @@ -64,7 +67,9 @@ def run_scoring( output_res = {} - rows = client.datasetio.get_rows_paginated(dataset_id=dataset_id, rows_in_page=-1) + rows = client.datasetio.get_rows_paginated( + dataset_id=dataset_id, rows_in_page=-1 if num_examples is None else num_examples + ) for r in tqdm(rows.rows): score_res = client.scoring.score( input_rows=[r], @@ -80,8 +85,6 @@ def run_scoring( output_res[fn_id] = [] output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) - break - # Create output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) output_file = os.path.join(output_dir, f"{dataset_id}_score_results.csv") From fc8b9825fea97e69f1e4d2b92ba860f7551f8479 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 15 Nov 2024 15:16:19 -0500 Subject: [PATCH 3/6] add option for dataset_path to read rows from local file --- .../lib/cli/eval/run_scoring.py | 42 ++++++++++++++----- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/llama_stack_client/lib/cli/eval/run_scoring.py b/src/llama_stack_client/lib/cli/eval/run_scoring.py index 9c1c936a..c97013f9 100644 --- a/src/llama_stack_client/lib/cli/eval/run_scoring.py +++ b/src/llama_stack_client/lib/cli/eval/run_scoring.py @@ -18,9 +18,15 @@ @click.argument("scoring-function-ids", nargs=-1, required=True) @click.option( "--dataset-id", - required=True, + required=False, help="Pre-registered dataset_id to score (from llama-stack-client datasets list)", ) +@click.option( + "--dataset-path", + required=False, + help="Path to the dataset file to score", + type=click.Path(exists=True), +) @click.option( "--scoring-params-config", required=False, @@ -45,13 +51,18 @@ def run_scoring( ctx, scoring_function_ids: tuple[str, ...], - dataset_id: str, + dataset_id: Optional[str], + dataset_path: Optional[str], scoring_params_config: Optional[str], num_examples: Optional[int], output_dir: str, visualize: bool, ): """Run scoring from application datasets""" + # one of dataset_id or dataset_path is required + if dataset_id is None and dataset_path is None: + raise click.BadParameter("Specify either dataset_id (pre-registered dataset) or dataset_path (local file)") + client = ctx.obj["client"] scoring_params = {fn_id: None for fn_id in scoring_function_ids} @@ -59,18 +70,27 @@ def run_scoring( with open(scoring_params_config, "r") as f: scoring_params = json.load(f) - dataset = client.datasets.retrieve(dataset_id=dataset_id) - if not dataset: - click.BadParameter( - f"Dataset {dataset_id} not found. Please register using llama-stack-client datasets register" + output_res = {} + + if dataset_id is not None: + dataset = client.datasets.retrieve(dataset_id=dataset_id) + if not dataset: + click.BadParameter( + f"Dataset {dataset_id} not found. Please register using llama-stack-client datasets register" + ) + + # TODO: this will eventually be replaced with jobs polling from server vis score_bath + # For now, get all datasets rows via datasetio API + rows = client.datasetio.get_rows_paginated( + dataset_id=dataset_id, rows_in_page=-1 if num_examples is None else num_examples ) + rows = rows.rows - output_res = {} + if dataset_path is not None: + df = pandas.read_csv(dataset_path) + rows = df.to_dict(orient="records") - rows = client.datasetio.get_rows_paginated( - dataset_id=dataset_id, rows_in_page=-1 if num_examples is None else num_examples - ) - for r in tqdm(rows.rows): + for r in tqdm(rows): score_res = client.scoring.score( input_rows=[r], scoring_functions=scoring_params, From d96fe97410a16f4664da475d5ca04ec076a64cfb Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 15 Nov 2024 15:22:55 -0500 Subject: [PATCH 4/6] fix num examples --- src/llama_stack_client/lib/cli/eval/run_scoring.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/llama_stack_client/lib/cli/eval/run_scoring.py b/src/llama_stack_client/lib/cli/eval/run_scoring.py index c97013f9..938f29b3 100644 --- a/src/llama_stack_client/lib/cli/eval/run_scoring.py +++ b/src/llama_stack_client/lib/cli/eval/run_scoring.py @@ -34,7 +34,11 @@ type=click.Path(exists=True), ) @click.option( - "--num-examples", required=False, help="Number of examples to evaluate on, useful for debugging", default=None + "--num-examples", + required=False, + help="Number of examples to evaluate on, useful for debugging", + default=None, + type=int, ) @click.option( "--output-dir", @@ -89,6 +93,8 @@ def run_scoring( if dataset_path is not None: df = pandas.read_csv(dataset_path) rows = df.to_dict(orient="records") + if num_examples is not None: + rows = rows[:num_examples] for r in tqdm(rows): score_res = client.scoring.score( From 1cf9e107a2f91d958d2e87a42b3347ee848c5f32 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 15 Nov 2024 15:23:17 -0500 Subject: [PATCH 5/6] fix num examples --- src/llama_stack_client/lib/cli/eval/run_benchmark.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llama_stack_client/lib/cli/eval/run_benchmark.py b/src/llama_stack_client/lib/cli/eval/run_benchmark.py index aa8e1cb0..e834942b 100644 --- a/src/llama_stack_client/lib/cli/eval/run_benchmark.py +++ b/src/llama_stack_client/lib/cli/eval/run_benchmark.py @@ -29,7 +29,11 @@ help="Path to the dump eval results output directory", ) @click.option( - "--num-examples", required=False, help="Number of examples to evaluate on, useful for debugging", default=None + "--num-examples", + required=False, + help="Number of examples to evaluate on, useful for debugging", + default=None, + type=int, ) @click.option( "--visualize", From b9223eb357786e41172d881e060ff28466dbcb79 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 15 Nov 2024 15:24:04 -0500 Subject: [PATCH 6/6] naming --- src/llama_stack_client/lib/cli/eval/run_scoring.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama_stack_client/lib/cli/eval/run_scoring.py b/src/llama_stack_client/lib/cli/eval/run_scoring.py index 938f29b3..7fae247c 100644 --- a/src/llama_stack_client/lib/cli/eval/run_scoring.py +++ b/src/llama_stack_client/lib/cli/eval/run_scoring.py @@ -85,10 +85,10 @@ def run_scoring( # TODO: this will eventually be replaced with jobs polling from server vis score_bath # For now, get all datasets rows via datasetio API - rows = client.datasetio.get_rows_paginated( + results = client.datasetio.get_rows_paginated( dataset_id=dataset_id, rows_in_page=-1 if num_examples is None else num_examples ) - rows = rows.rows + rows = results.rows if dataset_path is not None: df = pandas.read_csv(dataset_path)