Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix listing and download completions cli bug #240

Merged
merged 12 commits into from
Jul 31, 2024
2 changes: 0 additions & 2 deletions cli_docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,6 @@ Options:
--to [%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S]
Set the end date for fetching completions
(inclusive). Use the format: YYYY-MM-DD.
--compact Enable to download only the compact version
of the output.
-f, --file TEXT Specify the filename and path for the output
file.
```
Expand Down
105 changes: 67 additions & 38 deletions log10/cli/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,17 @@
import click
import pandas as pd
import rich
import tqdm
from rich.console import Console
from rich.table import Table

from log10._httpx_utils import _get_time_diff, _try_get
from log10.cli_utils import generate_markdown_report, generate_results_table
from log10.completions.completions import (
Completions,
_check_model_support,
_compare,
_get_completion,
_get_completions_url,
_write_completions,
)
from log10.llm import Log10Config
from log10.prompt_analyzer import PromptAnalyzer, convert_suggestion_to_markdown, display_prompt_analyzer_suggestions
Expand All @@ -24,7 +23,7 @@
_log10_config = Log10Config()


def _render_completions_table(completions_data, total_completions):
def _render_completions_table(completions_data):
data_for_table = []
for completion in completions_data:
prompt, response = "", ""
Expand All @@ -45,9 +44,12 @@ def _render_completions_table(completions_data, total_completions):
message = first_choice["message"]
response = (
message.get("content")
or message.get("tool_calls", [])[-1].get("function", {}).get("arguments", "")
if message.get("tool_calls")
else ""
or (
message.get("tool_calls")[-1].get("function", {}).get("arguments", "")
if message.get("tool_calls")
else ""
)
or ""
)
elif "function_call" in first_choice:
response = json.dumps(first_choice.get("function_call", {}))
Expand Down Expand Up @@ -86,7 +88,6 @@ def _render_completions_table(completions_data, total_completions):

console = Console()
console.print(table)
console.print(f"{total_completions=}")


def _render_comparison_table(model_response_raw_data):
Expand Down Expand Up @@ -184,10 +185,9 @@ def list_completions(limit, offset, timeout, tags, from_date, to_date):
res = _try_get(url, timeout)

completions = res.json()
total_completions = completions["total"]
completions = completions["data"]

_render_completions_table(completions, total_completions)
_render_completions_table(completions)


@click.command()
Expand All @@ -201,8 +201,8 @@ def get_completion(id):


@click.command()
@click.option("--limit", default="", help="Specify the maximum number of completions to retrieve.")
@click.option("--offset", default="", help="Set the starting point (offset) from where to begin fetching completions.")
@click.option("--limit", default=50, help="Specify the maximum number of completions to retrieve.")
@click.option("--offset", default=0, help="Set the starting point (offset) from where to begin fetching completions.")
@click.option(
"--timeout", default=10, help="Set the maximum time (in seconds) allowed for the HTTP request to complete."
)
Expand All @@ -219,40 +219,69 @@ def get_completion(id):
type=click.DateTime(),
help="Set the end date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
)
@click.option("--compact", is_flag=True, help="Enable to download only the compact version of the output.")
@click.option("--file", "-f", default="completions.jsonl", help="Specify the filename and path for the output file.")
def download_completions(limit, offset, timeout, tags, from_date, to_date, compact, file):
@click.option(
"--file",
"-f",
type=click.Path(dir_okay=False),
default="completions.jsonl",
help="Specify the filename and path for the output file. Only .jsonl extension is supported.",
)
def download_completions(limit, offset, timeout, tags, from_date, to_date, file):
"""
Download completions to a jsonl file
"""
base_url = _log10_config.url
org_id = _log10_config.org_id
input_offset = int(offset)
input_limit = int(limit)
fetched_total = 0
batch_size = 10

init_url = _get_completions_url(1, 0, tags, from_date, to_date, base_url, org_id)
res = _try_get(init_url)
if res.status_code != 200:
rich.print(f"Error: {res.json()}")
return
if file:
path = Path(file)
if path.exists():
rich.print(f'Warning: The file "{file}" already exists and will be overwritten.')

total_completions = res.json()["total"]
offset = int(offset) if offset else 0
limit = int(limit) if limit else total_completions
rich.print(f"Download total completions: {limit}/{total_completions}")
if not click.confirm("Do you want to continue?"):
ext_name = path.suffix.lower()
if ext_name not in [".jsonl"]:
raise click.UsageError(f"Only .jsonl extension is supported for the output file. Got: {ext_name}")

console = Console()
track_limit = input_limit if input_limit < batch_size else batch_size
track_offset = input_offset
try:
with console.status("[bold green]Downloading completions...", spinner="bouncingBar") as _status:
with open(file, "w") as output_file:
while True and track_limit > 0:
new_data = Completions()._get_completions(
offset=track_offset,
limit=track_limit,
timeout=timeout,
tag_names=tags,
from_date=from_date,
to_date=to_date,
)

new_data_size = len(new_data)
fetched_total += new_data_size

for completion in new_data:
output_file.write(json.dumps(completion) + "\n")

console.print(f"Downloaded {fetched_total} completions to {file}.")

if new_data_size == 0 or new_data_size < track_limit:
break

track_offset += new_data_size
track_limit = (
input_limit - fetched_total if input_limit - fetched_total < batch_size else batch_size
)
except Exception as e:
rich.print(f"Error fetching completions {e}")
if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
rich.print(e.response.json()["error"])
return

# dowlnoad completions
pbar = tqdm.tqdm(total=limit)
batch_size = 10
end = offset + limit if offset + limit < total_completions else total_completions
for batch in range(offset, end, batch_size):
current_batch_size = batch_size if batch + batch_size < end else end - batch
download_url = _get_completions_url(
current_batch_size, batch, tags, from_date, to_date, base_url, org_id, printout=False
)
res = _try_get(download_url, timeout)
_write_completions(res, file, compact)
pbar.update(current_batch_size)
rich.print(f"Download total completions: {fetched_total}. Saved to {file}")


@click.command()
Expand Down
61 changes: 48 additions & 13 deletions log10/completions/completions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import logging
import time
from typing import List, Optional

import click
import httpx
Expand All @@ -8,6 +10,14 @@
from log10.llm import Log10Config


logging.basicConfig(
format="[%(asctime)s - %(name)s - %(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger: logging.Logger = logging.getLogger("LOG10")
logger.setLevel(logging.INFO)


_log10_config = Log10Config()


Expand Down Expand Up @@ -75,19 +85,6 @@ def _get_valid_date_range(from_date, to_date):
return date_range


def _write_completions(res, output_file, compact_mode):
"""Processes completions and appends them to the output file."""
with open(output_file, "a") as file:
data = res.json()["data"]
if compact_mode:
for completion in data:
file.write(json.dumps(completion) + "\n")
else:
for completion_id in (completion["id"] for completion in data):
completion = _get_completion(completion_id).json()["data"]
file.write(json.dumps(completion) + "\n")


def _get_llm_repsone(
model: str,
messages: list[dict],
Expand Down Expand Up @@ -203,3 +200,41 @@ def _compare(models: list[str], messages: dict, temperature: float = 0.2, max_to

def _check_model_support(model: str) -> bool:
return model in _SUPPORTED_MODELS


class Completions:
completions_path = "/api/completions"

def __init__(self, log10_config: Log10Config = None):
self._log10_config = log10_config or Log10Config()
self._http_client = httpx.Client()
self._http_client.headers = {
"x-log10-token": self._log10_config.token,
"x-log10-organization-id": self._log10_config.org_id,
"Content-Type": "application/json",
}

self.org_id = self._log10_config.org_id
self.base_url = self._log10_config.url
self.url = f"{self.base_url}{self.completions_path}?organization_id={self.org_id}"

def _get_completions(
self,
offset: int,
limit: int,
timeout: int,
tag_names: Optional[List[str]] = None,
from_date: click.DateTime = None,
to_date: click.DateTime = None,
printout: bool = True,
) -> List[dict]:
url = _get_completions_url(limit, offset, tag_names, from_date, to_date, self.base_url, self.org_id)
# Fetch completions
response = _try_get(url, timeout)

if response.status_code != 200:
logger.error(f"Error: {response.json()}")
return

completions = response.json()
return completions["data"]
12 changes: 1 addition & 11 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def test_list_completions(runner):
result = runner.invoke(cli, ["completions", "list"])
print(result.output)
assert result.exit_code == 0
assert "total_completions=" in result.output


def test_get_completion(runner):
Expand All @@ -30,7 +29,7 @@ def test_get_completion(runner):
def test_download_completions(runner):
result = runner.invoke(cli, ["completions", "download", "--limit", "1", "--tags", "log10/summary-grading"])
assert result.exit_code == 0
assert "Download total completions: 1/" in result.output
assert "Download total completions: 1. Saved to completions.jsonl" in result.output


def test_benchmark_models_with_ids(runner):
Expand Down
Loading