diff --git a/cli_docs.md b/cli_docs.md index 13351cd7..d8551674 100644 --- a/cli_docs.md +++ b/cli_docs.md @@ -270,8 +270,6 @@ Options: --to [%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S] Set the end date for fetching completions (inclusive). Use the format: YYYY-MM-DD. - --compact Enable to download only the compact version - of the output. -f, --file TEXT Specify the filename and path for the output file. ``` diff --git a/log10/cli/completions.py b/log10/cli/completions.py index 8747bc16..50a4f675 100644 --- a/log10/cli/completions.py +++ b/log10/cli/completions.py @@ -4,18 +4,17 @@ import click import pandas as pd import rich -import tqdm from rich.console import Console from rich.table import Table from log10._httpx_utils import _get_time_diff, _try_get from log10.cli_utils import generate_markdown_report, generate_results_table from log10.completions.completions import ( + Completions, _check_model_support, _compare, _get_completion, _get_completions_url, - _write_completions, ) from log10.llm import Log10Config from log10.prompt_analyzer import PromptAnalyzer, convert_suggestion_to_markdown, display_prompt_analyzer_suggestions @@ -24,7 +23,7 @@ _log10_config = Log10Config() -def _render_completions_table(completions_data, total_completions): +def _render_completions_table(completions_data): data_for_table = [] for completion in completions_data: prompt, response = "", "" @@ -45,9 +44,12 @@ def _render_completions_table(completions_data, total_completions): message = first_choice["message"] response = ( message.get("content") - or message.get("tool_calls", [])[-1].get("function", {}).get("arguments", "") - if message.get("tool_calls") - else "" + or ( + message.get("tool_calls")[-1].get("function", {}).get("arguments", "") + if message.get("tool_calls") + else "" + ) + or "" ) elif "function_call" in first_choice: response = json.dumps(first_choice.get("function_call", {})) @@ -86,7 +88,6 @@ def _render_completions_table(completions_data, total_completions): console = Console() console.print(table) - console.print(f"{total_completions=}") def _render_comparison_table(model_response_raw_data): @@ -184,10 +185,9 @@ def list_completions(limit, offset, timeout, tags, from_date, to_date): res = _try_get(url, timeout) completions = res.json() - total_completions = completions["total"] completions = completions["data"] - _render_completions_table(completions, total_completions) + _render_completions_table(completions) @click.command() @@ -201,8 +201,8 @@ def get_completion(id): @click.command() -@click.option("--limit", default="", help="Specify the maximum number of completions to retrieve.") -@click.option("--offset", default="", help="Set the starting point (offset) from where to begin fetching completions.") +@click.option("--limit", default=50, help="Specify the maximum number of completions to retrieve.") +@click.option("--offset", default=0, help="Set the starting point (offset) from where to begin fetching completions.") @click.option( "--timeout", default=10, help="Set the maximum time (in seconds) allowed for the HTTP request to complete." ) @@ -219,40 +219,69 @@ def get_completion(id): type=click.DateTime(), help="Set the end date for fetching completions (inclusive). Use the format: YYYY-MM-DD.", ) -@click.option("--compact", is_flag=True, help="Enable to download only the compact version of the output.") -@click.option("--file", "-f", default="completions.jsonl", help="Specify the filename and path for the output file.") -def download_completions(limit, offset, timeout, tags, from_date, to_date, compact, file): +@click.option( + "--file", + "-f", + type=click.Path(dir_okay=False), + default="completions.jsonl", + help="Specify the filename and path for the output file. Only .jsonl extension is supported.", +) +def download_completions(limit, offset, timeout, tags, from_date, to_date, file): """ Download completions to a jsonl file """ - base_url = _log10_config.url - org_id = _log10_config.org_id + input_offset = int(offset) + input_limit = int(limit) + fetched_total = 0 + batch_size = 10 - init_url = _get_completions_url(1, 0, tags, from_date, to_date, base_url, org_id) - res = _try_get(init_url) - if res.status_code != 200: - rich.print(f"Error: {res.json()}") - return + if file: + path = Path(file) + if path.exists(): + rich.print(f'Warning: The file "{file}" already exists and will be overwritten.') - total_completions = res.json()["total"] - offset = int(offset) if offset else 0 - limit = int(limit) if limit else total_completions - rich.print(f"Download total completions: {limit}/{total_completions}") - if not click.confirm("Do you want to continue?"): + ext_name = path.suffix.lower() + if ext_name not in [".jsonl"]: + raise click.UsageError(f"Only .jsonl extension is supported for the output file. Got: {ext_name}") + + console = Console() + track_limit = input_limit if input_limit < batch_size else batch_size + track_offset = input_offset + try: + with console.status("[bold green]Downloading completions...", spinner="bouncingBar") as _status: + with open(file, "w") as output_file: + while True and track_limit > 0: + new_data = Completions()._get_completions( + offset=track_offset, + limit=track_limit, + timeout=timeout, + tag_names=tags, + from_date=from_date, + to_date=to_date, + ) + + new_data_size = len(new_data) + fetched_total += new_data_size + + for completion in new_data: + output_file.write(json.dumps(completion) + "\n") + + console.print(f"Downloaded {fetched_total} completions to {file}.") + + if new_data_size == 0 or new_data_size < track_limit: + break + + track_offset += new_data_size + track_limit = ( + input_limit - fetched_total if input_limit - fetched_total < batch_size else batch_size + ) + except Exception as e: + rich.print(f"Error fetching completions {e}") + if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json(): + rich.print(e.response.json()["error"]) return - # dowlnoad completions - pbar = tqdm.tqdm(total=limit) - batch_size = 10 - end = offset + limit if offset + limit < total_completions else total_completions - for batch in range(offset, end, batch_size): - current_batch_size = batch_size if batch + batch_size < end else end - batch - download_url = _get_completions_url( - current_batch_size, batch, tags, from_date, to_date, base_url, org_id, printout=False - ) - res = _try_get(download_url, timeout) - _write_completions(res, file, compact) - pbar.update(current_batch_size) + rich.print(f"Download total completions: {fetched_total}. Saved to {file}") @click.command() diff --git a/log10/completions/completions.py b/log10/completions/completions.py index 6d16850d..2f094895 100644 --- a/log10/completions/completions.py +++ b/log10/completions/completions.py @@ -1,5 +1,7 @@ import json +import logging import time +from typing import List, Optional import click import httpx @@ -8,6 +10,14 @@ from log10.llm import Log10Config +logging.basicConfig( + format="[%(asctime)s - %(name)s - %(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) +logger: logging.Logger = logging.getLogger("LOG10") +logger.setLevel(logging.INFO) + + _log10_config = Log10Config() @@ -75,19 +85,6 @@ def _get_valid_date_range(from_date, to_date): return date_range -def _write_completions(res, output_file, compact_mode): - """Processes completions and appends them to the output file.""" - with open(output_file, "a") as file: - data = res.json()["data"] - if compact_mode: - for completion in data: - file.write(json.dumps(completion) + "\n") - else: - for completion_id in (completion["id"] for completion in data): - completion = _get_completion(completion_id).json()["data"] - file.write(json.dumps(completion) + "\n") - - def _get_llm_repsone( model: str, messages: list[dict], @@ -203,3 +200,41 @@ def _compare(models: list[str], messages: dict, temperature: float = 0.2, max_to def _check_model_support(model: str) -> bool: return model in _SUPPORTED_MODELS + + +class Completions: + completions_path = "/api/completions" + + def __init__(self, log10_config: Log10Config = None): + self._log10_config = log10_config or Log10Config() + self._http_client = httpx.Client() + self._http_client.headers = { + "x-log10-token": self._log10_config.token, + "x-log10-organization-id": self._log10_config.org_id, + "Content-Type": "application/json", + } + + self.org_id = self._log10_config.org_id + self.base_url = self._log10_config.url + self.url = f"{self.base_url}{self.completions_path}?organization_id={self.org_id}" + + def _get_completions( + self, + offset: int, + limit: int, + timeout: int, + tag_names: Optional[List[str]] = None, + from_date: click.DateTime = None, + to_date: click.DateTime = None, + printout: bool = True, + ) -> List[dict]: + url = _get_completions_url(limit, offset, tag_names, from_date, to_date, self.base_url, self.org_id) + # Fetch completions + response = _try_get(url, timeout) + + if response.status_code != 200: + logger.error(f"Error: {response.json()}") + return + + completions = response.json() + return completions["data"] diff --git a/poetry.lock b/poetry.lock index fd92d24f..afe6c32b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiohttp" @@ -2748,7 +2748,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -2756,15 +2755,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -2781,7 +2773,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -2789,7 +2780,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, diff --git a/tests/test_cli.py b/tests/test_cli.py index 3a312120..6859a25c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -18,7 +18,6 @@ def test_list_completions(runner): result = runner.invoke(cli, ["completions", "list"]) print(result.output) assert result.exit_code == 0 - assert "total_completions=" in result.output def test_get_completion(runner): @@ -30,7 +29,7 @@ def test_get_completion(runner): def test_download_completions(runner): result = runner.invoke(cli, ["completions", "download", "--limit", "1", "--tags", "log10/summary-grading"]) assert result.exit_code == 0 - assert "Download total completions: 1/" in result.output + assert "Download total completions: 1. Saved to completions.jsonl" in result.output def test_benchmark_models_with_ids(runner):