Skip to content

Commit

Permalink
Update download completions logic without using total
Browse files Browse the repository at this point in the history
  • Loading branch information
kxtran committed Jul 31, 2024
1 parent 3055b45 commit c9bd602
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 41 deletions.
66 changes: 39 additions & 27 deletions log10/cli/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import click
import pandas as pd
import rich
import tqdm
from rich.console import Console
from rich.table import Table

from log10._httpx_utils import _get_time_diff, _try_get
from log10.cli_utils import generate_markdown_report, generate_results_table
from log10.completions.completions import (
Completions,
_check_model_support,
_compare,
_get_completion,
Expand Down Expand Up @@ -201,8 +201,8 @@ def get_completion(id):


@click.command()
@click.option("--limit", default="", help="Specify the maximum number of completions to retrieve.")
@click.option("--offset", default="", help="Set the starting point (offset) from where to begin fetching completions.")
@click.option("--limit", default=50, help="Specify the maximum number of completions to retrieve.")
@click.option("--offset", default=0, help="Set the starting point (offset) from where to begin fetching completions.")
@click.option(
"--timeout", default=10, help="Set the maximum time (in seconds) allowed for the HTTP request to complete."
)
Expand All @@ -225,34 +225,46 @@ def download_completions(limit, offset, timeout, tags, from_date, to_date, compa
"""
Download completions to a jsonl file
"""
base_url = _log10_config.url
org_id = _log10_config.org_id
input_offset = int(offset)
input_limit = int(limit)
fetched_total = 0
batch_size = 10

init_url = _get_completions_url(1, 0, tags, from_date, to_date, base_url, org_id)
res = _try_get(init_url)
if res.status_code != 200:
rich.print(f"Error: {res.json()}")
return
console = Console()
track_limit = input_limit if input_limit < batch_size else batch_size
track_offset = input_offset
try:
with console.status("[bold green]Downloading completions...") as _status:
while True and track_limit > 0:
# returns the data set from the offset at track_offset and the max of data is track_limit
new_data = Completions()._get_completions(
offset=track_offset,
limit=track_limit,
timeout=timeout,
tag_names=tags,
from_date=from_date,
to_date=to_date,
)

new_data_size = len(new_data)
fetched_total += new_data_size

if new_data_size == 0 or new_data_size < track_limit:
break

total_completions = res.json()["total"]
offset = int(offset) if offset else 0
limit = int(limit) if limit else total_completions
rich.print(f"Download total completions: {limit}/{total_completions}")
if not click.confirm("Do you want to continue?"):
track_offset += new_data_size
track_limit = input_limit - fetched_total if input_limit - fetched_total < batch_size else batch_size

# write new completions data to the downloaded file
_write_completions(new_data, file, compact)
console.print(f"Downloaded {fetched_total} completions so far to {file}.")
except Exception as e:
rich.print(f"Error fetching completions {e}")
if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
rich.print(e.response.json()["error"])
return

# dowlnoad completions
pbar = tqdm.tqdm(total=limit)
batch_size = 10
end = offset + limit if offset + limit < total_completions else total_completions
for batch in range(offset, end, batch_size):
current_batch_size = batch_size if batch + batch_size < end else end - batch
download_url = _get_completions_url(
current_batch_size, batch, tags, from_date, to_date, base_url, org_id, printout=False
)
res = _try_get(download_url, timeout)
_write_completions(res, file, compact)
pbar.update(current_batch_size)
rich.print(f"Total downloaded completions {fetched_total}. Saved to {file}")


@click.command()
Expand Down
51 changes: 49 additions & 2 deletions log10/completions/completions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import logging
import time
from typing import List, Optional

import click
import httpx
Expand All @@ -8,6 +10,14 @@
from log10.llm import Log10Config


logging.basicConfig(
format="[%(asctime)s - %(name)s - %(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger: logging.Logger = logging.getLogger("LOG10")
logger.setLevel(logging.INFO)


_log10_config = Log10Config()


Expand Down Expand Up @@ -75,10 +85,9 @@ def _get_valid_date_range(from_date, to_date):
return date_range


def _write_completions(res, output_file, compact_mode):
def _write_completions(data, output_file, compact_mode):
"""Processes completions and appends them to the output file."""
with open(output_file, "a") as file:
data = res.json()["data"]
if compact_mode:
for completion in data:
file.write(json.dumps(completion) + "\n")
Expand Down Expand Up @@ -203,3 +212,41 @@ def _compare(models: list[str], messages: dict, temperature: float = 0.2, max_to

def _check_model_support(model: str) -> bool:
return model in _SUPPORTED_MODELS


class Completions:
completions_path = "/api/completions"

def __init__(self, log10_config: Log10Config = None):
self._log10_config = log10_config or Log10Config()
self._http_client = httpx.Client()
self._http_client.headers = {
"x-log10-token": self._log10_config.token,
"x-log10-organization-id": self._log10_config.org_id,
"Content-Type": "application/json",
}

self.org_id = self._log10_config.org_id
self.base_url = self._log10_config.url
self.url = f"{self.base_url}{self.completions_path}?organization_id={self.org_id}"

def _get_completions(
self,
offset: int,
limit: int,
timeout: int,
tag_names: Optional[List[str]] = None,
from_date: click.DateTime = None,
to_date: click.DateTime = None,
printout: bool = True,
) -> List[dict]:
url = _get_completions_url(limit, offset, tag_names, from_date, to_date, self.base_url, self.org_id)
# Fetch completions
response = _try_get(url, timeout)

if response.status_code != 200:
logger.error(f"Error: {response.json()}")
return

completions = response.json()
return completions["data"]
14 changes: 2 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c9bd602

Please sign in to comment.