Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/llama_stack_client/_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,17 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
headers[idempotency_header] = options.idempotency_key or self._idempotency_key()

# Don't set the retry count header if it was already set or removed by the caller. We check
# Don't set these headers if they were already set or removed by the caller. We check
# `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
if "x-stainless-retry-count" not in (header.lower() for header in custom_headers):
lower_custom_headers = [header.lower() for header in custom_headers]
if "x-stainless-retry-count" not in lower_custom_headers:
headers["x-stainless-retry-count"] = str(retries_taken)
if "x-stainless-read-timeout" not in lower_custom_headers:
timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
if isinstance(timeout, Timeout):
timeout = timeout.read
if timeout is not None:
headers["x-stainless-read-timeout"] = str(timeout)

return headers

Expand Down
60 changes: 40 additions & 20 deletions src/llama_stack_client/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,13 @@ class LlamaStackClient(SyncAPIClient):
with_streaming_response: LlamaStackClientWithStreamedResponse

# client options
api_key: str | None

def __init__(
self,
*,
base_url: str | httpx.URL | None = None,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
Expand All @@ -123,19 +124,20 @@ def __init__(
_strict_response_validation: bool = False,
provider_data: Mapping[str, Any] | None = None,
) -> None:
"""Construct a new synchronous llama-stack-client client instance."""
if base_url is None:
base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
if base_url is None:
base_url = f"http://any-hosted-llama-stack.com"
"""Construct a new synchronous llama-stack-client client instance.

This automatically infers the `api_key` argument from the `LLAMA_STACK_CLIENT_API_KEY` environment variable if it is not provided.
"""
if api_key is None:
api_key = os.environ.get("LLAMA_STACK_CLIENT_API_KEY")
self.api_key = api_key

if base_url is None:
base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
if base_url is None:
base_url = f"http://any-hosted-llama-stack.com"

custom_headers = default_headers or {}
if api_key is not None:
custom_headers["Authorization"] = f"Bearer {api_key}"
custom_headers["X-LlamaStack-Client-Version"] = __version__
if provider_data is not None:
custom_headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
Expand Down Expand Up @@ -182,6 +184,14 @@ def __init__(
def qs(self) -> Querystring:
return Querystring(array_format="comma")

@property
@override
def auth_headers(self) -> dict[str, str]:
api_key = self.api_key
if api_key is None:
return {}
return {"Authorization": f"Bearer {api_key}"}

@property
@override
def default_headers(self) -> dict[str, str | Omit]:
Expand All @@ -194,8 +204,8 @@ def default_headers(self) -> dict[str, str | Omit]:
def copy(
self,
*,
base_url: str | httpx.URL | None = None,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
http_client: httpx.Client | None = None,
max_retries: int | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -228,8 +238,8 @@ def copy(

http_client = http_client or self._client
return self.__class__(
base_url=base_url or self.base_url,
api_key=api_key or self.api_key,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
http_client=http_client,
max_retries=max_retries if is_given(max_retries) else self.max_retries,
Expand Down Expand Up @@ -304,12 +314,13 @@ class AsyncLlamaStackClient(AsyncAPIClient):
with_streaming_response: AsyncLlamaStackClientWithStreamedResponse

# client options
api_key: str | None

def __init__(
self,
*,
base_url: str | httpx.URL | None = None,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
Expand All @@ -329,19 +340,20 @@ def __init__(
_strict_response_validation: bool = False,
provider_data: Mapping[str, Any] | None = None,
) -> None:
"""Construct a new async llama-stack-client client instance."""
if base_url is None:
base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
if base_url is None:
base_url = f"http://any-hosted-llama-stack.com"
"""Construct a new async llama-stack-client client instance.

This automatically infers the `api_key` argument from the `LLAMA_STACK_CLIENT_API_KEY` environment variable if it is not provided.
"""
if api_key is None:
api_key = os.environ.get("LLAMA_STACK_CLIENT_API_KEY")
self.api_key = api_key

if base_url is None:
base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
if base_url is None:
base_url = f"http://any-hosted-llama-stack.com"

custom_headers = default_headers or {}
if api_key is not None:
custom_headers["Authorization"] = f"Bearer {api_key}"
custom_headers["X-LlamaStack-Client-Version"] = __version__
if provider_data is not None:
custom_headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
Expand Down Expand Up @@ -388,6 +400,14 @@ def __init__(
def qs(self) -> Querystring:
return Querystring(array_format="comma")

@property
@override
def auth_headers(self) -> dict[str, str]:
api_key = self.api_key
if api_key is None:
return {}
return {"Authorization": f"Bearer {api_key}"}

@property
@override
def default_headers(self) -> dict[str, str | Omit]:
Expand All @@ -400,8 +420,8 @@ def default_headers(self) -> dict[str, str | Omit]:
def copy(
self,
*,
base_url: str | httpx.URL | None = None,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
http_client: httpx.AsyncClient | None = None,
max_retries: int | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -434,8 +454,8 @@ def copy(

http_client = http_client or self._client
return self.__class__(
base_url=base_url or self.base_url,
api_key=api_key or self.api_key,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
http_client=http_client,
max_retries=max_retries if is_given(max_retries) else self.max_retries,
Expand Down
2 changes: 1 addition & 1 deletion src/llama_stack_client/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"

# default timeout is 1 minute
DEFAULT_TIMEOUT = httpx.Timeout(timeout=60.0, connect=5.0)
DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
DEFAULT_MAX_RETRIES = 2
DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)

Expand Down
22 changes: 22 additions & 0 deletions src/llama_stack_client/resources/agents/turn.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def create(
messages: Iterable[turn_create_params.Message],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
stream: Literal[False] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -69,6 +70,8 @@ def create(
) -> Turn:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -88,6 +91,7 @@ def create(
messages: Iterable[turn_create_params.Message],
stream: Literal[True],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -98,6 +102,8 @@ def create(
) -> Stream[AgentTurnResponseStreamChunk]:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -117,6 +123,7 @@ def create(
messages: Iterable[turn_create_params.Message],
stream: bool,
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -127,6 +134,8 @@ def create(
) -> Turn | Stream[AgentTurnResponseStreamChunk]:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -146,6 +155,7 @@ def create(
messages: Iterable[turn_create_params.Message],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -165,6 +175,7 @@ def create(
"messages": messages,
"documents": documents,
"stream": stream,
"tool_config": tool_config,
"toolgroups": toolgroups,
},
turn_create_params.TurnCreateParams,
Expand Down Expand Up @@ -244,6 +255,7 @@ async def create(
messages: Iterable[turn_create_params.Message],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
stream: Literal[False] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -254,6 +266,8 @@ async def create(
) -> Turn:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -273,6 +287,7 @@ async def create(
messages: Iterable[turn_create_params.Message],
stream: Literal[True],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -283,6 +298,8 @@ async def create(
) -> AsyncStream[AgentTurnResponseStreamChunk]:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -302,6 +319,7 @@ async def create(
messages: Iterable[turn_create_params.Message],
stream: bool,
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -312,6 +330,8 @@ async def create(
) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
"""
Args:
tool_config: Configuration for tool use.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -331,6 +351,7 @@ async def create(
messages: Iterable[turn_create_params.Message],
documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
Expand All @@ -350,6 +371,7 @@ async def create(
"messages": messages,
"documents": documents,
"stream": stream,
"tool_config": tool_config,
"toolgroups": toolgroups,
},
turn_create_params.TurnCreateParams,
Expand Down
20 changes: 20 additions & 0 deletions src/llama_stack_client/resources/batch_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ def chat_completion(
) -> BatchInferenceChatCompletionResponse:
"""
Args:
response_format: Configuration for JSON schema-guided response generation.

tool_choice: Whether tool use is required or automatic. This is a hint to the model which may
not be followed. It depends on the Instruction Following capabilities of the
model.

tool_prompt_format: Prompt format for calling custom / zero shot tools.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down Expand Up @@ -118,6 +126,8 @@ def completion(
) -> BatchCompletion:
"""
Args:
response_format: Configuration for JSON schema-guided response generation.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down Expand Up @@ -185,6 +195,14 @@ async def chat_completion(
) -> BatchInferenceChatCompletionResponse:
"""
Args:
response_format: Configuration for JSON schema-guided response generation.

tool_choice: Whether tool use is required or automatic. This is a hint to the model which may
not be followed. It depends on the Instruction Following capabilities of the
model.

tool_prompt_format: Prompt format for calling custom / zero shot tools.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down Expand Up @@ -231,6 +249,8 @@ async def completion(
) -> BatchCompletion:
"""
Args:
response_format: Configuration for JSON schema-guided response generation.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down
Loading