Skip to content

Commit

Permalink
Fix listing and download completions cli bug (#240)
Browse files Browse the repository at this point in the history
* Update fetching completions to not get total

* Update download completions logic without using total

* Update poetry.lock file

* Update test_cli assertion to pass with new changes

* Fix test assertion

* Keep file open while writing data

* Remove --compact from cli_docs

* Remove unused code

* Add warning message for overwriting file and check extensions

* Remove .md and .csv extensions

* Update spinner with bouncingbar
  • Loading branch information
kxtran authored Jul 31, 2024
1 parent 7611486 commit 9626930
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 66 deletions.
2 changes: 0 additions & 2 deletions cli_docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,6 @@ Options:
--to [%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S]
Set the end date for fetching completions
(inclusive). Use the format: YYYY-MM-DD.
--compact Enable to download only the compact version
of the output.
-f, --file TEXT Specify the filename and path for the output
file.
```
Expand Down
105 changes: 67 additions & 38 deletions log10/cli/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,17 @@
import click
import pandas as pd
import rich
import tqdm
from rich.console import Console
from rich.table import Table

from log10._httpx_utils import _get_time_diff, _try_get
from log10.cli_utils import generate_markdown_report, generate_results_table
from log10.completions.completions import (
Completions,
_check_model_support,
_compare,
_get_completion,
_get_completions_url,
_write_completions,
)
from log10.llm import Log10Config
from log10.prompt_analyzer import PromptAnalyzer, convert_suggestion_to_markdown, display_prompt_analyzer_suggestions
Expand All @@ -24,7 +23,7 @@
_log10_config = Log10Config()


def _render_completions_table(completions_data, total_completions):
def _render_completions_table(completions_data):
data_for_table = []
for completion in completions_data:
prompt, response = "", ""
Expand All @@ -45,9 +44,12 @@ def _render_completions_table(completions_data, total_completions):
message = first_choice["message"]
response = (
message.get("content")
or message.get("tool_calls", [])[-1].get("function", {}).get("arguments", "")
if message.get("tool_calls")
else ""
or (
message.get("tool_calls")[-1].get("function", {}).get("arguments", "")
if message.get("tool_calls")
else ""
)
or ""
)
elif "function_call" in first_choice:
response = json.dumps(first_choice.get("function_call", {}))
Expand Down Expand Up @@ -86,7 +88,6 @@ def _render_completions_table(completions_data, total_completions):

console = Console()
console.print(table)
console.print(f"{total_completions=}")


def _render_comparison_table(model_response_raw_data):
Expand Down Expand Up @@ -184,10 +185,9 @@ def list_completions(limit, offset, timeout, tags, from_date, to_date):
res = _try_get(url, timeout)

completions = res.json()
total_completions = completions["total"]
completions = completions["data"]

_render_completions_table(completions, total_completions)
_render_completions_table(completions)


@click.command()
Expand All @@ -201,8 +201,8 @@ def get_completion(id):


@click.command()
@click.option("--limit", default="", help="Specify the maximum number of completions to retrieve.")
@click.option("--offset", default="", help="Set the starting point (offset) from where to begin fetching completions.")
@click.option("--limit", default=50, help="Specify the maximum number of completions to retrieve.")
@click.option("--offset", default=0, help="Set the starting point (offset) from where to begin fetching completions.")
@click.option(
"--timeout", default=10, help="Set the maximum time (in seconds) allowed for the HTTP request to complete."
)
Expand All @@ -219,40 +219,69 @@ def get_completion(id):
type=click.DateTime(),
help="Set the end date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
)
@click.option("--compact", is_flag=True, help="Enable to download only the compact version of the output.")
@click.option("--file", "-f", default="completions.jsonl", help="Specify the filename and path for the output file.")
def download_completions(limit, offset, timeout, tags, from_date, to_date, compact, file):
@click.option(
"--file",
"-f",
type=click.Path(dir_okay=False),
default="completions.jsonl",
help="Specify the filename and path for the output file. Only .jsonl extension is supported.",
)
def download_completions(limit, offset, timeout, tags, from_date, to_date, file):
"""
Download completions to a jsonl file
"""
base_url = _log10_config.url
org_id = _log10_config.org_id
input_offset = int(offset)
input_limit = int(limit)
fetched_total = 0
batch_size = 10

init_url = _get_completions_url(1, 0, tags, from_date, to_date, base_url, org_id)
res = _try_get(init_url)
if res.status_code != 200:
rich.print(f"Error: {res.json()}")
return
if file:
path = Path(file)
if path.exists():
rich.print(f'Warning: The file "{file}" already exists and will be overwritten.')

total_completions = res.json()["total"]
offset = int(offset) if offset else 0
limit = int(limit) if limit else total_completions
rich.print(f"Download total completions: {limit}/{total_completions}")
if not click.confirm("Do you want to continue?"):
ext_name = path.suffix.lower()
if ext_name not in [".jsonl"]:
raise click.UsageError(f"Only .jsonl extension is supported for the output file. Got: {ext_name}")

console = Console()
track_limit = input_limit if input_limit < batch_size else batch_size
track_offset = input_offset
try:
with console.status("[bold green]Downloading completions...", spinner="bouncingBar") as _status:
with open(file, "w") as output_file:
while True and track_limit > 0:
new_data = Completions()._get_completions(
offset=track_offset,
limit=track_limit,
timeout=timeout,
tag_names=tags,
from_date=from_date,
to_date=to_date,
)

new_data_size = len(new_data)
fetched_total += new_data_size

for completion in new_data:
output_file.write(json.dumps(completion) + "\n")

console.print(f"Downloaded {fetched_total} completions to {file}.")

if new_data_size == 0 or new_data_size < track_limit:
break

track_offset += new_data_size
track_limit = (
input_limit - fetched_total if input_limit - fetched_total < batch_size else batch_size
)
except Exception as e:
rich.print(f"Error fetching completions {e}")
if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
rich.print(e.response.json()["error"])
return

# dowlnoad completions
pbar = tqdm.tqdm(total=limit)
batch_size = 10
end = offset + limit if offset + limit < total_completions else total_completions
for batch in range(offset, end, batch_size):
current_batch_size = batch_size if batch + batch_size < end else end - batch
download_url = _get_completions_url(
current_batch_size, batch, tags, from_date, to_date, base_url, org_id, printout=False
)
res = _try_get(download_url, timeout)
_write_completions(res, file, compact)
pbar.update(current_batch_size)
rich.print(f"Download total completions: {fetched_total}. Saved to {file}")


@click.command()
Expand Down
61 changes: 48 additions & 13 deletions log10/completions/completions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import logging
import time
from typing import List, Optional

import click
import httpx
Expand All @@ -8,6 +10,14 @@
from log10.llm import Log10Config


logging.basicConfig(
format="[%(asctime)s - %(name)s - %(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger: logging.Logger = logging.getLogger("LOG10")
logger.setLevel(logging.INFO)


_log10_config = Log10Config()


Expand Down Expand Up @@ -75,19 +85,6 @@ def _get_valid_date_range(from_date, to_date):
return date_range


def _write_completions(res, output_file, compact_mode):
"""Processes completions and appends them to the output file."""
with open(output_file, "a") as file:
data = res.json()["data"]
if compact_mode:
for completion in data:
file.write(json.dumps(completion) + "\n")
else:
for completion_id in (completion["id"] for completion in data):
completion = _get_completion(completion_id).json()["data"]
file.write(json.dumps(completion) + "\n")


def _get_llm_repsone(
model: str,
messages: list[dict],
Expand Down Expand Up @@ -203,3 +200,41 @@ def _compare(models: list[str], messages: dict, temperature: float = 0.2, max_to

def _check_model_support(model: str) -> bool:
return model in _SUPPORTED_MODELS


class Completions:
completions_path = "/api/completions"

def __init__(self, log10_config: Log10Config = None):
self._log10_config = log10_config or Log10Config()
self._http_client = httpx.Client()
self._http_client.headers = {
"x-log10-token": self._log10_config.token,
"x-log10-organization-id": self._log10_config.org_id,
"Content-Type": "application/json",
}

self.org_id = self._log10_config.org_id
self.base_url = self._log10_config.url
self.url = f"{self.base_url}{self.completions_path}?organization_id={self.org_id}"

def _get_completions(
self,
offset: int,
limit: int,
timeout: int,
tag_names: Optional[List[str]] = None,
from_date: click.DateTime = None,
to_date: click.DateTime = None,
printout: bool = True,
) -> List[dict]:
url = _get_completions_url(limit, offset, tag_names, from_date, to_date, self.base_url, self.org_id)
# Fetch completions
response = _try_get(url, timeout)

if response.status_code != 200:
logger.error(f"Error: {response.json()}")
return

completions = response.json()
return completions["data"]
12 changes: 1 addition & 11 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def test_list_completions(runner):
result = runner.invoke(cli, ["completions", "list"])
print(result.output)
assert result.exit_code == 0
assert "total_completions=" in result.output


def test_get_completion(runner):
Expand All @@ -30,7 +29,7 @@ def test_get_completion(runner):
def test_download_completions(runner):
result = runner.invoke(cli, ["completions", "download", "--limit", "1", "--tags", "log10/summary-grading"])
assert result.exit_code == 0
assert "Download total completions: 1/" in result.output
assert "Download total completions: 1. Saved to completions.jsonl" in result.output


def test_benchmark_models_with_ids(runner):
Expand Down

0 comments on commit 9626930

Please sign in to comment.