log10-io · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 17, 2024 · Jun 17, 2024
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
@@ -3,15 +3,19 @@ on: [pull_request]
 jobs:
   ruff:
     runs-on: ubuntu-latest
+    timeout-minutes: 3
+
     steps:
       - uses: actions/checkout@v3
 
-      - uses: chartboost/ruff-action@v1
+      - uses: astral-sh/ruff-action@v3
         with:
-          src: log10/ examples/ tests/
+          version: "latest"
+          src: src/ examples/ tests/
           args: check --no-fix
 
-      - uses: chartboost/ruff-action@v1
+      - uses: astral-sh/ruff-action@v3
         with:
-          src: log10/ examples/ tests/
+          version: "latest"
+          src: src/ examples/ tests/
           args: format --diff
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,29 +27,33 @@ on:
         description: 'Model name for Mistralai tests'
         type: string
         required: false
-      lamini_model:
-        description: 'Model name for Lamini tests'
-        type: string
-        required: false
       magentic_model:
         description: 'Model name for Magentic tests'
         type: string
         required: false
+      run_litellm_tests:
+        description: 'Run Litellm tests without specifying a model'
+        type: boolean
+        required: false
 
 env:
   PYTHON_VERSION: 3.11.4
 jobs:
   test:
     runs-on: ubuntu-latest
+    timeout-minutes: 15
+
     env:
       LOG10_URL: "https://log10.io"
       LOG10_ORG_ID: ${{ secrets.LOG10_ORG_ID }}
       LOG10_TOKEN: ${{ secrets.LOG10_TOKEN }}
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
-      LAMINI_API_KEY: ${{ secrets.LAMINI_API_KEY }}
       GOOGLE_API_KEY : ${{ secrets.GOOGLE_API_KEY }}
+      PERPLEXITYAI_API_KEY: ${{ secrets.PERPLEXITYAI_API_KEY }}
+      CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
+      PYTEST_ADDOPTS: "--color=yes"
     steps:
       - uses: actions/checkout@v4
       - name: Install poetry
@@ -61,9 +65,12 @@ jobs:
           cache: "poetry"
           architecture: 'x64'
       - name: Install dependencies
-        run: poetry install --all-extras
+        run: poetry install  --all-extras
 
-      - name: Run dispatch tests
+      - name: Run cli tests
+        run: poetry run pytest -vv tests/test_cli.py
+
+      - name: Run dispatch llm tests
         if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }}
         run: |
           echo "This is a dispatch event"
@@ -72,8 +79,8 @@ jobs:
           anthropic_model_input=${{ github.event.inputs.anthropic_model }}
           google_model_input=${{ github.event.inputs.google_model }}
           mistralai_model_input=${{ github.event.inputs.mistralai_model }}
-          lamini_model_input=${{ github.event.inputs.lamini_model }}
           magentic_model_input=${{ github.event.inputs.magentic_model }}
+          run_litellm_tests=${{ github.event.inputs.run_litellm_tests }}
 
           empty_inputs=true
           if [[ -n "$openai_model_input" ]]; then
@@ -101,24 +108,29 @@ jobs:
             poetry run pytest --mistralai_model=$mistralai_model_input -vv tests/test_mistralai.py
           fi
 
-          if [[ -n "$lamini_model_input" ]]; then
+          if [[ -n "$magentic_model_input" ]]; then
             empty_inputs=false
-            poetry run pytest --lamini_model=$lamini_model_input -vv tests/test_lamini.py
+            poetry run pytest --llm_provider=$magentic_model_input -vv tests/test_magentic.py
           fi
 
-          if [[ -n "$magentic_model_input" ]]; then
+
+          if [[ "$run_litellm_tests" == "true" ]]; then
             empty_inputs=false
-            poetry run pytest --magentic_model=$magentic_model_input -vv tests/test_magentic.py
+            poetry run pytest -vv tests/test_litellm.py
           fi
 
           if $empty_inputs; then
             echo "All variables are empty"
-            poetry run pytest -vv tests/
+            poetry run pytest -vv tests/ --ignore=tests/test_cli.py --ignore=tests/test_litellm.py --ignore=tests/test_magentic_perplexity.py
+            poetry run pytest -vv tests/test_litellm.py
+            poetry run pytest --llm_provider=anthropic -vv tests/test_magentic.py
+            poetry run pytest tests/test_magentic_perplexity.py -vv
           fi
 
-      - name: Run scheduled tests
+      - name: Run scheduled llm tests
         if: ${{ github.event_name == 'schedule' }}
         run: |
           echo "This is a schedule event"
-          poetry run pytest -vv tests/
+          poetry run pytest -vv tests/ --ignore=tests/test_cli.py --ignore=tests/test_litellm.py --ignore=tests/test_magentic_perplexity.py
           poetry run pytest --openai_model=gpt-4o -m chat -vv tests/test_openai.py
+          poetry run pytest tests/test_magentic_perplexity.py -vv
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,6 @@ users.db
 .env
 
 .vscode/
+
+# Test image files
+tests/*.png
diff --git a/Makefile b/Makefile
@@ -51,7 +51,6 @@ logging-chat:
 	python examples/logging/mistralai_chat_no_streaming.py
 	python examples/logging/openai_chat.py
 	python examples/logging/openai_chat_not_given.py
-	python examples/logging/lamini_generate.py
 	# python examples/logging/vertexai_gemini_chat.py
 	python examples/logging/openai_async_logging.py
 	python examples/logging/openai_async_stream_logging.py

diff --git a/README.md b/README.md
@@ -11,6 +11,8 @@
 
 ## 🤔 What is this?
 
+<img width="800" alt="Log10 stack" src="https://github.com/user-attachments/assets/8a790f82-6d75-4aa0-905b-7d3693815414">
+
 A one-line Python integration to manage your LLM data.
 
 ```python
@@ -33,7 +35,7 @@ Access your LLM data at [log10.io](https://log10.io)
 
 ### 📝📊 Logging
 
-Use Log10 to log both closed and open-source LLM calls. It helps you:
+Use Log10 to log both closed and open-source LLM calls, e.g. OpenAI, Anthropic, Google Gemini, Llama, Mistral, etc. It helps you:
 - Compare and identify the best models and prompts (try [playground](https://log10.io/docs/observability/playgrounds) and [llmeval](https://log10.io/docs/evaluation/installation))
 - Store feedback for fine-tuning
 - Collect performance metrics such as latency and usage
@@ -101,7 +103,7 @@ llm = Anthropic({"model": "claude-2"}, log10_config=Log10Config())
 
 #### Asynchronous LLM calls
 We support OpenAI and Anthropic Async-client (e.g. AsyncOpenAI and AsyncAnthropic client) in their Python SDK
-You could use the same code `log10(openai)` or `log10(anthropic)` and then call the async-client to start loggin asynchronous mode (including streaming).
+You could use the same code `log10(openai)` or `log10(anthropic)` and then call the async-client to start logging asynchronous mode (including streaming).
 
 Release `0.9.0` includes significant improvements in how we handle concurrency while using LLM in asynchronous streaming mode.
 This update is designed to ensure that logging at steady state incurs no overhead (previously up to 1-2 seconds), providing a smoother and more efficient experience in latency critical settings.
@@ -124,10 +126,12 @@ Ensure `finalize()` is called once, at the very end of your event loop to guaran
 For more details, check [async logging examples](./examples/logging/).
 
 #### Open-source LLMs
-Log open-source LLM calls, e.g. Llama-2, Mistral, etc from providers.
+Log open-source LLM calls, e.g. Llama, Mistral, etc from providers.
 Currently we support inference endpoints on Together.AI and MosaicML (ranked on the top based on our [benchmarking](https://arjunbansal.substack.com/p/which-llama-2-inference-api-should-i-use) on Llama-2 inference providers).
 Adding other providers is on the roadmap.
 
+If the providers support OpenAI API (e.g. [Groq](https://console.groq.com/docs/openai), [vLLM](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html), [Together](https://docs.together.ai/docs/openai-api-compatibility)), you can easily starting logging using `log10(openai)`.
+
 **MosaicML** with LLM abstraction. Full script [here](/examples/logging/mosaicml_completion.py).
 ```python
 from log10.mosaicml import MosaicML

diff --git a/cli_docs.md b/cli_docs.md
@@ -8,7 +8,7 @@ Here's a [demo video](https://www.loom.com/share/4f5da34df6e94b7083b1e33c707deb5
 Install the `log10-io` python package (version >= 0.6.7) and [setup Log10](README.md#⚙️-setup)
 
 ```bash
-$ pip install log10-io
+$ pip install 'log10-io[cli]'
 ```
 
 ### Completions
@@ -147,7 +147,9 @@ original_request:
 ╰─────────────────────────┴───────────────────────────────────────────────────────┴──────────────────────────────────┴───────────────╯
 ```
 
-You can also filter the completions by tags and generate a report in markdown file using `--file` or `-f`. And run our prompt analyzer (auto-prompt) using `--analyze_prompt`.
+You can also filter the completions by tags and save the results using `--file` or `-f`.
+Specify the output file using `.md` for a markdown report, `.csv` for comma-separated values, or `.jsonl` for JSON Lines format.
+And run our prompt analyzer (auto-prompt) using `--analyze_prompt`.
 
 ### Feedback Tasks and Feedback
 
@@ -198,6 +200,55 @@ To get auto generated feedback for a completion, use [`log10 feedback autofeedba
 
 ## CLI References
 
+```bash
+$ log10 --help
+Usage: log10 [OPTIONS] COMMAND [ARGS]...
+
+Options:
+  --help  Show this message and exit.
+
+Commands:
+  auto-prompt    Analyze prompts and messages to get suggestions
+  completions    Manage logs from completions i.e.
+  feedback       Manage feedback for completions i.e.
+  feedback-task  Manage tasks for feedback i.e.
+```
+
+### Auto Prompt
+
+```bash
+$ log10 auto-prompt --help
+Usage: log10 auto-prompt [OPTIONS] COMMAND [ARGS]...
+
+  Analyze prompts and messages to get suggestions
+
+Options:
+  --help  Show this message and exit.
+
+Commands:
+  analyze Analyze a prompt or messages and provide suggestions on how to improve it. 
+```
+
+#### log10 auto-prompt analyze
+```bash
+$ log10 auto-prompt analyze --help
+Usage: log10 auto-prompt analyze [OPTIONS]
+
+  Analyze a prompt or messages and provide suggestions on how to improve it.
+
+Options:
+  -p, --prompt TEXT  The prompt to analyze. Provide a string or a file
+                     containing the prompt. We allow three formats: 1) string
+                     prompt, e.g. "Summarize this article in 3 sentences." 2)
+                     messages, e.g. [{"role": "user", "content": "Hello"},
+                     {"role": "assistant", "content": "Hi"}] 3) log10
+                     completion, e.g. {..., "request": {..., "messages":
+                     [{"role": "user", "content": "Hello"}, {"role":
+                     "assistant", "content": "Hi"}], ...}, "response": {...}}
+                     The prompt input could be a string or a file path. If
+                     it's a file path, read the file.
+```
+
 ### Completions
 
 ```bash
@@ -225,7 +276,9 @@ Usage: log10 completions benchmark_models [OPTIONS]
   Compare completions using different models and generate report
 
 Options:
-  --ids TEXT            Completion IDs. Separate multiple ids with commas.
+  --ids TEXT            Log10 completion IDs. Provide a comma-separated list
+                        of completion IDs or a path to a JSON file containing
+                        the list of IDs.
   --tags TEXT           Filter completions by specific tags. Separate multiple
                         tags with commas.
   --limit TEXT          Specify the maximum number of completions to retrieve
@@ -237,9 +290,11 @@ Options:
   --max_tokens INTEGER  Max tokens
   --top_p FLOAT         Top p
   --analyze_prompt      Run prompt analyzer on the messages.
-  -f, --file TEXT       Specify the filename for the report in markdown
-                        format.
-  --help                Show this message and exit.
+  -f, --file FILE       Specify the filename to save the results. Specify the
+                        output file using `.md` for a markdown report, `.csv`
+                        for comma-separated values, or `.jsonl` for JSON Lines
+                        format. Only .md, .csv, and .jsonl extensions are
+                        supported.
 ```
 
 #### log10 completions download
@@ -266,8 +321,6 @@ Options:
   --to [%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S]
                                   Set the end date for fetching completions
                                   (inclusive). Use the format: YYYY-MM-DD.
-  --compact                       Enable to download only the compact version
-                                  of the output.
   -f, --file TEXT                 Specify the filename and path for the output
                                   file.
 ```
@@ -354,12 +407,15 @@ Usage: log10 feedback download [OPTIONS]
 
 Options:
   --offset INTEGER  The starting index from which to begin the feedback fetch.
-                    Leave empty to start from the beginning.
-  --limit TEXT      The maximum number of feedback items to retrieve. Leave
-                    empty to retrieve all.
+                    Defaults to 0.
+  --limit INTEGER   The maximum number of feedback items to retrieve. Defaults
+                    to 25.
   --task_id TEXT    The specific Task ID to filter feedback. If not provided,
                     feedback for all tasks will be fetched.
-  -f, --file TEXT   Path to the file where the feedback will be saved. The
+  --filter TEXT     The filter applied to the feedback. If not provided,
+                    feedback will not be filtered. e.g. `log10 feedback list
+                    --filter 'Coverage <= 5'`.
+  -f, --file FILE   Path to the file where the feedback will be saved. The
                     feedback data is saved in JSON Lines (jsonl) format. If
                     not specified, feedback will be printed to stdout.
 ```
@@ -393,6 +449,9 @@ Options:
                     to 25.
   --task_id TEXT    The specific Task ID to filter feedback. If not provided,
                     feedback for all tasks will be fetched.
+  --filter TEXT     The filter applied to the feedback. If not provided,
+                    feedback will not be filtered. e.g. `log10 feedback list
+                    --filter 'Coverage <= 5'`.
 ```
 
 #### log10 feedback predict

diff --git a/examples/logging/anthropic_async_completion.py b/examples/logging/anthropic_async_completion.py
@@ -0,0 +1,27 @@
+import asyncio
+import os
+
+from anthropic import AI_PROMPT
+
+from log10._httpx_utils import finalize
+from log10.load import AsyncAnthropic
+
+
+client = AsyncAnthropic(api_key=os.environ["ANTHROPIC_API_KEY"], tags=["test", "async_anthropic"])
+
+
+async def main():
+    response = await client.completions.create(
+        model="claude-3-haiku-20240307",
+        prompt=f"\n\nHuman:Write the names of all Star Wars movies and spinoffs along with the time periods in which they were set?{AI_PROMPT}",
+        temperature=0,
+        max_tokens_to_sample=1024,
+        top_p=1,
+        top_k=0,
+    )
+
+    print(response)
+    await finalize()
+
+
+asyncio.run(main())
diff --git a/examples/logging/anthropic_async_messages.py b/examples/logging/anthropic_async_messages.py
@@ -1,21 +1,22 @@
 import asyncio
 
-import anthropic
-
 from log10._httpx_utils import finalize
-from log10.load import log10
-
+from log10.load import AsyncAnthropic
 
-log10(anthropic)
 
-client = anthropic.AsyncAnthropic()
+client = AsyncAnthropic()
 
 
 async def main() -> None:
-    message = await client.beta.tools.messages.create(
-        model="claude-instant-1.2",
+    message = await client.messages.create(
+        model="claude-3-haiku-20240307",
         max_tokens=1000,
-        messages=[{"role": "user", "content": "Say hello!"}],
+        messages=[
+            {
+                "role": "user",
+                "content": "Generate complex and creative tongue twisters. Aim to create tongue twisters that are not only challenging to say but also engaging, entertaining, and potentially humorous. Consider incorporating wordplay, rhyme, and alliteration to enhance the difficulty and enjoyment of the tongue twisters.",
+            }
+        ],
     )
 
     print(message)

diff --git a/examples/logging/anthropic_async_messages_stream.py b/examples/logging/anthropic_async_messages_stream.py
@@ -17,7 +17,7 @@ async def main() -> None:
         messages=[
             {
                 "role": "user",
-                "content": "Say hello there!",
+                "content": "Help me create some similes to describe a person's laughter that is joyful and contagious?",
             }
         ],
         model="claude-3-haiku-20240307",