diff --git a/docs/project.json b/docs/project.json index e2e1b3f2c..9e81a08f8 100644 --- a/docs/project.json +++ b/docs/project.json @@ -1 +1 @@ -{ "name": "nemo-guardrails-toolkit", "version": "0.15.0" } +{ "name": "nemo-guardrails-toolkit", "version": "0.16.0" } diff --git a/docs/release-notes.md b/docs/release-notes.md index 0ec999959..d6a03d0fc 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -12,6 +12,25 @@ The following sections summarize and highlight the changes for each release. For a complete record of changes in a release, refer to the [CHANGELOG.md](https://github.com/NVIDIA/NeMo-Guardrails/blob/develop/CHANGELOG.md) in the GitHub repository. +(v0-16-0)= + +## 0.16.0 + +(v0-16-0-features)= + +### Key Features + +- Enhanced tracing system with [OpenTelemetry semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/). To learn more, refer to [](tracing). For usage examples, refer to the following notebooks + - [Tracing Guardrails Quickstart](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/docs/getting-started/8-tracing/1_tracing_quickstart.ipynb) + - [Tracing Guardrails with Jaeger](https://github.com/NVIDIA/NeMo-Guardrails/tree/develop/docs/getting-started/8-tracing/2_tracing_with_jaeger.ipynb) +- Community integration with [GuardrailsAI](https://www.guardrailsai.com/) and [Pangea AI Guard](https://pangea.cloud/services/ai-guard). + +(v0-16-0-other-changes)= + +### Other Changes + +- Added documentation about using KV cache reuse for LLM-based NemoGuard NIMs. By using KV cache reuse, you can improve the performance of LLM-based NemoGuard NIMs where the system prompt is the same for all calls up to the point where user query and LLM response are injected. To learn more, refer to [](kv-cache-reuse). + (v0-15-0)= ## 0.15.0 diff --git a/docs/user-guides/advanced/kv-cache-reuse.md b/docs/user-guides/advanced/kv-cache-reuse.md index 8f52ba969..221ba0618 100644 --- a/docs/user-guides/advanced/kv-cache-reuse.md +++ b/docs/user-guides/advanced/kv-cache-reuse.md @@ -1,3 +1,5 @@ +(kv-cache-reuse)= + # KV Cache Reuse for NemoGuard NIM When you configure NeMo Guardrails to call NemoGuard NIMs in response to a client request, every NIM call interjecting the input and response adds to the inference latency. diff --git a/docs/versions1.json b/docs/versions1.json index ef9054100..64f8c008f 100644 --- a/docs/versions1.json +++ b/docs/versions1.json @@ -1,6 +1,10 @@ [ { "preferred": true, + "version": "0.16.0", + "url": "../0.16.0/" + }, + { "version": "0.15.0", "url": "../0.15.0/" },