langchain-ai · jacoblee93 · Jan 31, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 31, 2025
diff --git a/python/README.md b/python/README.md
@@ -1,4 +1,4 @@
 # LangSmith Client SDK

 [![Release Notes](https://img.shields.io/github/release/langchain-ai/langsmith-sdk?logo=python)](https://github.com/langchain-ai/langsmith-sdk/releases)
 [![Python Downloads](https://static.pepy.tech/badge/langsmith/month)](https://pepy.tech/project/langsmith)
@@ -345,7 +345,7 @@
 my_function("hello world")
 ```
 
-# Instructor
+## Instructor
 
 We provide a convenient integration with [Instructor](https://jxnl.github.io/instructor/), largely by virtue of it essentially just using the OpenAI SDK.
 
@@ -417,6 +417,14 @@
 my_function("Jason is 25 years old")
 ```
 
+## Pytest Plugin
+
+The LangSmith pytest plugin lets Python developers define their datasets and evaluations as pytest test cases.
+See [online docs](https://docs.smith.langchain.com/evaluation/how_to_guides/pytest) for more information.
+
+This plugin is installed as part of the LangSmith SDK, and is enabled by default.
+See also official pytest docs: [How to install and use plugins](https://docs.pytest.org/en/stable/how-to/plugins.html)
+
 ## Additional Documentation
 
 To learn more about the LangSmith platform, check out the [docs](https://docs.smith.langchain.com/docs/).
diff --git a/python/langsmith/pytest_plugin.py b/python/langsmith/pytest_plugin.py
@@ -2,6 +2,7 @@
 
 import importlib.util
 import json
+import logging
 import os
 import time
 from collections import defaultdict
@@ -12,26 +13,33 @@
 from langsmith import utils as ls_utils
 from langsmith.testing._internal import test as ls_test
 
+logger = logging.getLogger(__name__)
+
 
 def pytest_addoption(parser):
-    """Set CLI options for choosing output format."""
-    group = parser.getgroup("langsmith", "LangSmith")
-    group.addoption(
-        "--output",
-        action="store",
-        default="pytest",
-        choices=["langsmith", "ls", "pytest"],
-        help=(
-            "Choose output format: 'langsmith' | 'ls' "
-            "(rich custom LangSmith output) or 'pytest' "
-            "(standard pytest). Defaults to 'pytest'."
-        ),
-    )
+    """Set a boolean flag for LangSmith output.
+
+    Skip if --langsmith-output is already defined.
+    """
+    try:
+        # Try to add the option, will raise if it already exists
+        group = parser.getgroup("langsmith", "LangSmith")
+        group.addoption(
+            "--langsmith-output",
+            action="store_true",
+            default=False,
+            help="Use LangSmith output (requires 'rich').",
+        )
+    except ValueError:
+        # Option already exists
+        logger.warning(
+            "LangSmith output flag cannot be added because it's already defined."
+        )
 
 
 def _handle_output_args(args):
     """Handle output arguments."""
-    if any(opt in args for opt in ["--output=langsmith", "--output=ls"]):
+    if any(opt in args for opt in ["--langsmith-output"]):
         # Only add --quiet if it's not already there
         if not any(a in args for a in ["-q", "--quiet"]):
             args.insert(0, "--quiet")
@@ -82,7 +90,7 @@ def pytest_report_teststatus(report, config):
     """Remove the short test-status character outputs ("./F")."""
     # The hook normally returns a 3-tuple: (short_letter, verbose_word, color)
     # By returning empty strings, the progress characters won't show.
-    if config.getoption("--output") in ("langsmith", "ls"):
+    if config.getoption("--langsmith-output"):
         return "", "", ""
 
 
@@ -301,23 +309,24 @@ def pytest_configure(config):
     config.addinivalue_line(
         "markers", "langsmith: mark test to be tracked in LangSmith"
     )
-    if config.getoption("--output") in ("langsmith", "ls"):
+    if config.getoption("--langsmith-output"):
         if not importlib.util.find_spec("rich"):
             msg = (
-                "Must have 'rich' installed to use --output='langsmith' | 'ls'. "
+                "Must have 'rich' installed to use --langsmith-output. "
                 "Please install with: `pip install -U 'langsmith[pytest]'`"
             )
             raise ValueError(msg)
         if os.environ.get("PYTEST_XDIST_TESTRUNUID"):
             msg = (
-                "--output='langsmith' | 'ls' not supported with pytest-xdist. "
-                "Please remove the '--output' option or '-n' option."
+                "--langsmith-output not supported with pytest-xdist. "
+                "Please remove the '--langsmith-output' option or '-n' option."
             )
             raise ValueError(msg)
         if ls_utils.test_tracking_is_disabled():
             msg = (
-                "--output='langsmith' | 'ls' not supported when env var"
-                "LANGSMITH_TEST_TRACKING='false'. Please remove the '--output' option "
+                "--langsmith-output not supported when env var"
+                "LANGSMITH_TEST_TRACKING='false'. Please remove the"
+                "'--langsmith-output' option "
                 "or enable test tracking."
             )
             raise ValueError(msg)

diff --git a/python/tests/evaluation/test_decorator.py b/python/tests/evaluation/test_decorator.py
@@ -0,0 +1,83 @@
+import os
+
+import pytest
+
+from langsmith import testing as t
+
+
+@pytest.mark.skipif(
+    not os.getenv("LANGSMITH_TRACING"),
+    reason="LANGSMITH_TRACING environment variable not set",
+)
+@pytest.mark.langsmith
+@pytest.mark.parametrize("c", list(range(10)))
+async def test_addition_single(c):
+    x = 3
+    y = 4
+    t.log_inputs({"x": x, "y": y, "c": c})
+
+    expected = 7 + c
+    t.log_reference_outputs({"sum": expected})
+
+    actual = x + y + c
+    t.log_outputs({"sum": actual})
+
+    t.log_feedback(key="foo", score=1)
+
+    assert actual == expected
+
+
+async def my_app():
+    return "hello"
+
+
+@pytest.mark.skipif(
+    not os.getenv("LANGSMITH_TRACING"),
+    reason="LANGSMITH_TRACING environment variable not set",
+)
+@pytest.mark.langsmith
+async def test_openai_says_hello():
+    # Traced code will be included in the test case
+    text = "Say hello!"
+    response = await my_app()
+    t.log_inputs({"text": text})
+    t.log_outputs({"response": response})
+    t.log_reference_outputs({"response": "hello!"})
+
+    # Use this context manager to trace any steps used for generating evaluation
+    # feedback separately from the main application logic
+    with t.trace_feedback():
+        grade = 1 if "hello" in response else 0
+        t.log_feedback(key="llm_judge", score=grade)
+
+    assert "hello" in response.lower()
+
+
+@pytest.mark.skipif(
+    not os.getenv("LANGSMITH_TRACING"),
+    reason="LANGSMITH_TRACING environment variable not set",
+)
+@pytest.mark.xfail(reason="Test failure output case")
+@pytest.mark.langsmith(output_keys=["expected"])
+@pytest.mark.parametrize(
+    "a, b, expected",
+    [
+        (1, 2, 3),
+        (3, 4, 7),
+    ],
+)
+async def test_addition_parametrized(a: int, b: int, expected: int):
+    t.log_outputs({"sum": a + b})
+    assert a + b != expected
+
+
+@pytest.mark.skipif(
+    not os.getenv("LANGSMITH_TRACING"),
+    reason="LANGSMITH_TRACING environment variable not set",
+)
+@pytest.mark.langsmith
+@pytest.mark.parametrize("a,b", [[i, i] for i in range(20)])
+def test_param(a, b):
+    t.log_outputs({"sum": a + b})
+    t.log_reference_outputs({"sum": a + b})
+    assert a + b == a + b