From c791446bf7c4313e58cb477989b4163a50f3b63f Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Tue, 17 Sep 2024 23:08:33 -0700 Subject: [PATCH 01/11] Initial metamodel definition Signed-off-by: Prasad Mujumdar --- src/monocle_apptrace/metamodel/README | 1 + .../metamodel/entity_types.py | 55 +++++++ .../metamodel/span_example.json | 140 ++++++++++++++++++ .../metamodel/span_format.json | 22 +++ 4 files changed, 218 insertions(+) create mode 100644 src/monocle_apptrace/metamodel/README create mode 100644 src/monocle_apptrace/metamodel/entity_types.py create mode 100644 src/monocle_apptrace/metamodel/span_example.json create mode 100644 src/monocle_apptrace/metamodel/span_format.json diff --git a/src/monocle_apptrace/metamodel/README b/src/monocle_apptrace/metamodel/README new file mode 100644 index 0000000..436c7a9 --- /dev/null +++ b/src/monocle_apptrace/metamodel/README @@ -0,0 +1 @@ +Monocle metamodel is the way to manage standardization across all supported GenAI component stack. It includes the list of components that Monocle can identify and extract metadata. It also describes the format of traces and span that Monocle builds on on top of Open Telemetry format (https://opentelemetry.io/docs/concepts/signals/traces/). diff --git a/src/monocle_apptrace/metamodel/entity_types.py b/src/monocle_apptrace/metamodel/entity_types.py new file mode 100644 index 0000000..36e9507 --- /dev/null +++ b/src/monocle_apptrace/metamodel/entity_types.py @@ -0,0 +1,55 @@ +# Monocle meta model: +# Monocle Entities --> Entity Type --> Entity + +import enum + +class MonocleEntity(enum): + # Supported Workflow/language frameworks + class Workflow(enum): + Generic = 0 + Langchain = 1 + LlamaIndex = 2 + Haystack = 3 + + # Supported model types + class Model(enum): + Generic = 0 + LLM = 1 + Embedding = 2 + + # Support Vector databases + class VectorDB(enum): + Generic = 0 + Milvus = 1 + Chroma = 2 + Vespa = 3 + LanceDB = 4 + Pinecone = 5 + Viviate = 6 + PostgreS = 7 + Openstore = 8 + Clickhouse = 9 + ElasticSearch = 10 + SingleStore = 11 + CouchDB = 12 + + # Support application hosting frameworks + class AppHosting(enum): + Generic = 0 + AWS_Lambda = 1 + AWS_Sagemaker = 2 + Azure_Function = 3 + Github_Codespace = 4 + Azure_ML = 5 + + # Supported inference infra/services + class Inference(enum): + Generic = 0 + NVIDIA_Triton = 1 + OpenAI = 2 + Azure_OpenAI = 3 + AWS_Sagemaker = 4 + AWS_Bedrock = 5 + HuggingFace = 6 + Cohere = 7 + vLLM = 8 diff --git a/src/monocle_apptrace/metamodel/span_example.json b/src/monocle_apptrace/metamodel/span_example.json new file mode 100644 index 0000000..27a37f0 --- /dev/null +++ b/src/monocle_apptrace/metamodel/span_example.json @@ -0,0 +1,140 @@ +{ + "name": "llamaindex.retrieve", + "context": { + "trace_id": "0x93cd0bf865b3ffcc3cf9c075dc3e3797", + "span_id": "0x5d3f839e900bda24", + "trace_state": "[]" + }, + "kind": "SpanKind.INTERNAL", + "parent_id": "0x7a63d63e42ccac60", + "start_time": "2024-09-09T14:38:45.237182Z", + "end_time": "2024-09-09T14:38:45.620112Z", + "status": { + "status_code": "UNSET" + }, + "attributes": { + "Monocle.Entity": [ + { + "name": "ChromaVectorStore", + "type": "MonocleEntity.VectorDB.Chroma", + "attributes": { + "embedding_model": "BAAI/bge-small-en-v1.5" + } + } + ] + }, + "events": [ + { + "name": "ChromaVectorStore", + "timestamp": "timestamp", + "attributes": { + "context_input": "question: What is an americano?", + "context_output": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way" + } + } + ], + "links": [], + "resource": { + "attributes": { + "service.name": "coffee-bot" + }, + "schema_url": "" + } +} +{ + "name": "llamaindex.openai", + "context": { + "trace_id": "0x93cd0bf865b3ffcc3cf9c075dc3e3797", + "span_id": "0x8b6363e1937a4d7b", + "trace_state": "[]" + }, + "kind": "SpanKind.INTERNAL", + "parent_id": "0x7a63d63e42ccac60", + "start_time": "2024-09-09T14:38:45.622174Z", + "end_time": "2024-09-09T14:38:46.514120Z", + "status": { + "status_code": "UNSET" + }, + "attributes": { + "Monocle.Entity": [ + { + "name": "AzureOpenAI", + "type": "MonocleEntity.Inference.Azure_OpenAI", + "attributes": { + "temperature": 0.1, + "model_name": "gpt-35-turbo", + "provider_name": "api.openai.com", + "az_openai_deployment": "kshitiz-gpt", + "inference_endpoint": "https://okahu-openai-dev.openai.azure.com/" + } + } + ] + }, + "events": [ + { + "name": "AzureOpenAI", + "timestamp": "timestamp", + "attributes": { + "completion_tokens": 52, + "prompt_tokens": 233, + "total_tokens": 285 + }, + { + "name": "AzureOpenAI", + "timestamp": "timestamp", + "attributes": { + "name" : 52, + "question": "What is an americano?" + } + }, + ], + "links": [], + "resource": { + "attributes": { + "service.name": "coffee-bot" + }, + "schema_url": "" + } +} +{ + "name": "llamaindex.query", + "context": { + "trace_id": "0x93cd0bf865b3ffcc3cf9c075dc3e3797", + "span_id": "0x7a63d63e42ccac60", + "trace_state": "[]" + }, + "kind": "SpanKind.INTERNAL", + "parent_id": null, + "start_time": "2024-09-09T14:38:45.236627Z", + "end_time": "2024-09-09T14:38:46.514442Z", + "status": { + "status_code": "UNSET" + }, + "attributes": { + "workflow_name": "coffee-bot", + "workflow_type": "MonocleEntity.Workflow.LlamaIndex" + }, + "events": [ + { + "name": "input", + "timestamp": "2024-09-09T14:38:45.236701Z", + "attributes": { + "question": "What is an americano?" + } + }, + { + "name": "output", + "timestamp": "2024-09-09T14:38:46.514428Z", + "attributes": { + "response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way." + } + } + ], + "links": [], + "resource": { + "attributes": { + "service.name": "coffee-bot" + }, + "schema_url": "" + } +} \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/span_format.json b/src/monocle_apptrace/metamodel/span_format.json new file mode 100644 index 0000000..a3cb6da --- /dev/null +++ b/src/monocle_apptrace/metamodel/span_format.json @@ -0,0 +1,22 @@ +{ + "attributes": { + "Monocle.Entity": [ + { + "name": "Entity Name 1", + "type": "Monocle-Entity-Type", + "attributes": { + "entity-attribute-1": "value1" + } + } + ] + }, + "events" : [ + { + "name": "Entity Name 1", + "timestamp": "timestamp", + "attributes": { + "attribute-1-name": "attribute-1-value" + } + } + ] +} \ No newline at end of file From 0dbd54b250a24c468a27176220affd2efdd6f88c Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Wed, 18 Sep 2024 13:33:52 -0700 Subject: [PATCH 02/11] Updated metamode format and README Signed-off-by: Prasad Mujumdar --- src/monocle_apptrace/metamodel/README | 1 - src/monocle_apptrace/metamodel/README.md | 89 +++++++++++ .../metamodel/entity_types.py | 4 + .../metamodel/span_example.json | 79 +++++----- .../metamodel/span_format.json | 147 +++++++++++++++++- 5 files changed, 277 insertions(+), 43 deletions(-) delete mode 100644 src/monocle_apptrace/metamodel/README create mode 100644 src/monocle_apptrace/metamodel/README.md diff --git a/src/monocle_apptrace/metamodel/README b/src/monocle_apptrace/metamodel/README deleted file mode 100644 index 436c7a9..0000000 --- a/src/monocle_apptrace/metamodel/README +++ /dev/null @@ -1 +0,0 @@ -Monocle metamodel is the way to manage standardization across all supported GenAI component stack. It includes the list of components that Monocle can identify and extract metadata. It also describes the format of traces and span that Monocle builds on on top of Open Telemetry format (https://opentelemetry.io/docs/concepts/signals/traces/). diff --git a/src/monocle_apptrace/metamodel/README.md b/src/monocle_apptrace/metamodel/README.md new file mode 100644 index 0000000..4bc3df5 --- /dev/null +++ b/src/monocle_apptrace/metamodel/README.md @@ -0,0 +1,89 @@ +# Monocle metamode + +## Overview +Monocle metamodel is the way to manage standardization across all supported GenAI component stack. It includes the list of components that Monocle can identify and extract metadata. This help understanding and analyzing the traces from applications that include multiple components and can evolve over time. This is one of core value that Monocle provides to it's user community. + +## Meta model +The Monocle metamodel comprises of three things, +- Entity types, definitions of technology types and supported vendor implementations. +- A JSON format that overlays on top of Open Telemetry tracing format that includes the common attributes for each entity type. +- Map of component menthods to trace with instrumentation methods provided by Monocle. + +### Entity type +The entity type defines the type of GenAI component that Monocle understand. The monocle instrumentation can extract the relevenat information for this entity. There are a fixed set of [entity types](./entity_types.py) that are defined by Monocle out of the box, eg workflow, model etc. As the GenAI landscape evolves, the Monocle community will introduce a new entity type if the current entities won't represent a new technology component. +Each entity types has number of supported technology components that Monocle handles out of the box, eg. LlamaIndex is a supported workflow. Monocle community will continue to expand the breadth of the project by adding more components. + +### Consisten trace format +Monocle generates [traces](../../../Monocle_User_Guide.md#traces) which comprises of [spans](../../../Monocle_User_Guide.md#spans). Note that Monocle trace is [OpenTelemetry format](https://opentelemetry.io/docs/concepts/signals/traces/) compatible. Each span is essentially a step in the execution that interacts with one of more GenAI technology components. The please refer to the [full spec of the json format](./span_format.json) and a detailed [example](./span_example.json). +The ```attribute``` section of the span includes a list of such entities that are used in that span. +```json + "attributes": { + "Monocle.Entity": [ + { + "name": "AzureOpenAI", + "type": "MonocleEntity.Inference.Azure_OpenAI", + "attributes": { + "model_name": "gpt-35-turbo", + "provider_name": "api.openai.com", + "az_openai_deployment": "chatbot-gpt-3.5", + "inference_endpoint": "https://mybot-openai-dev.openai.azure.com/" + } + }, + { + "name": "gpt-35-turbo", + "type": "MonocleEntity.Model.LLM", + "attributes": { + "model_name": "gpt-35-turbo", + "temperature": 0.1, + } + } + ] + } +``` +The runtime data and metadata collected during the execution of that span are included in the ```events``` section of the trace (as per the Otel spec). Each entry in the event corrosponds to the entity involved in that trace execution if it has produced any runtime outputs. The event information is divided in two sections, ``data`` and ``metadata``. The data protion contains any user data that part of input or output of the span phase, eg model prompt or inference response. The ``metadata`` section includes other attributes that are runtime input or output, eg token count. +```json + "events": [ + { + "name": "gpt-35-turbo", + "timestamp": "timestamp", + "attributes": { + "data": { + "input": { + "question": "What is an americano?" + }, + "output": { + "response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way." + } + }, + "metadata": { + "completion_tokens": 52, + "prompt_tokens": 233, + "total_tokens": 285 + } + } + } + ] +``` +### Instrumentation method map +The map dectates what Monocle tracing method is relevant for the a given GenAI tech component method/API. It also specifies the name for that span to set in the trace output. +```python + { + "package": "llama_index.core.base.base_query_engine", + "object": "BaseQueryEngine", + "method": "query", + "span_name": "llamaindex.query", + "wrapper_package": "wrap_common", + "wrapper_method": "task_wrapper" + } +``` + +## Extending the meta model +Monocle is highly extensible. This section describe when one would need to extend the meta model. Please refer to Monocle [User guide](../../../Monocle_User_Guide.md) and [Contributor guide](../../../Monocle_contributor_guide.md) for detailed steps. +### Trace a new method/API +If you have overloaded an existing functionality in one of the supported components by creating a new function. Monocle doesn't know that this function should be traced, say because it's calling an LLM. You could define a new mapping so Monocle instrumentation can trace this function the say way it handles other LLM invocation functions. + +### Adding a new component provider +Let's say there's a new database that supports vector search capability which is not supported by the Monocle. In this case, first you'll need to add that database under the ``MonocleEntity.VectorDB`` list. Then you'll need to extend the method map and test if the existing Monocle tracing functions has logic to effectively trace the new component. If not, then you might need to implement new method to cover the gap and update the mapping table according. + +### Support new type of entity +If there's new component that can't be mapped to any of the existing entity types, then it'll require extending the metamodel and implement new instrumetation to support it. We recommend you initiate a discussion with Monocle community to add the support. \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/entity_types.py b/src/monocle_apptrace/metamodel/entity_types.py index 36e9507..a4fb97d 100644 --- a/src/monocle_apptrace/metamodel/entity_types.py +++ b/src/monocle_apptrace/metamodel/entity_types.py @@ -53,3 +53,7 @@ class Inference(enum): HuggingFace = 6 Cohere = 7 vLLM = 8 + + class Classification(enum): + Metadata = 0 + Data = 1 \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/span_example.json b/src/monocle_apptrace/metamodel/span_example.json index 27a37f0..817ae28 100644 --- a/src/monocle_apptrace/metamodel/span_example.json +++ b/src/monocle_apptrace/metamodel/span_example.json @@ -5,12 +5,12 @@ "span_id": "0x5d3f839e900bda24", "trace_state": "[]" }, - "kind": "SpanKind.INTERNAL", + "kind": "SpanKind.CLIENT", "parent_id": "0x7a63d63e42ccac60", "start_time": "2024-09-09T14:38:45.237182Z", "end_time": "2024-09-09T14:38:45.620112Z", "status": { - "status_code": "UNSET" + "status_code": "OK" }, "attributes": { "Monocle.Entity": [ @@ -20,6 +20,16 @@ "attributes": { "embedding_model": "BAAI/bge-small-en-v1.5" } + }, + { + "name": "BAAI/bge-small-en-v1.5", + "type": "MonocleEntity.Model.Embedding", + "attributes": { + "data": {}, + "metadata": { + "model_name": "BAAI/bge-small-en-v1.5" + } + } } ] }, @@ -28,8 +38,11 @@ "name": "ChromaVectorStore", "timestamp": "timestamp", "attributes": { - "context_input": "question: What is an americano?", - "context_output": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way" + "data": { + "context_input": "question: What is an americano?", + "context_output": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way" + }, + "metadata": {} } } ], @@ -48,12 +61,12 @@ "span_id": "0x8b6363e1937a4d7b", "trace_state": "[]" }, - "kind": "SpanKind.INTERNAL", + "kind": "SpanKind.CLIENT", "parent_id": "0x7a63d63e42ccac60", "start_time": "2024-09-09T14:38:45.622174Z", "end_time": "2024-09-09T14:38:46.514120Z", "status": { - "status_code": "UNSET" + "status_code": "OK" }, "attributes": { "Monocle.Entity": [ @@ -61,32 +74,42 @@ "name": "AzureOpenAI", "type": "MonocleEntity.Inference.Azure_OpenAI", "attributes": { - "temperature": 0.1, "model_name": "gpt-35-turbo", "provider_name": "api.openai.com", "az_openai_deployment": "kshitiz-gpt", "inference_endpoint": "https://okahu-openai-dev.openai.azure.com/" } + }, + { + "name": "gpt-35-turbo", + "type": "MonocleEntity.Model.LLM", + "attributes": { + "model_name": "gpt-35-turbo", + "temperature": 0.1, + } } ] }, "events": [ { - "name": "AzureOpenAI", - "timestamp": "timestamp", - "attributes": { - "completion_tokens": 52, - "prompt_tokens": 233, - "total_tokens": 285 - }, - { - "name": "AzureOpenAI", + "name": "gpt-35-turbo", "timestamp": "timestamp", "attributes": { - "name" : 52, - "question": "What is an americano?" + "data": { + "input": { + "question": "What is an americano?" + }, + "output": { + "response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way." + } + }, + "metadata": { + "completion_tokens": 52, + "prompt_tokens": 233, + "total_tokens": 285 + } } - }, + } ], "links": [], "resource": { @@ -103,32 +126,18 @@ "span_id": "0x7a63d63e42ccac60", "trace_state": "[]" }, - "kind": "SpanKind.INTERNAL", + "kind": "SpanKind.CLIENT", "parent_id": null, "start_time": "2024-09-09T14:38:45.236627Z", "end_time": "2024-09-09T14:38:46.514442Z", "status": { - "status_code": "UNSET" + "status_code": "OK" }, "attributes": { "workflow_name": "coffee-bot", "workflow_type": "MonocleEntity.Workflow.LlamaIndex" }, "events": [ - { - "name": "input", - "timestamp": "2024-09-09T14:38:45.236701Z", - "attributes": { - "question": "What is an americano?" - } - }, - { - "name": "output", - "timestamp": "2024-09-09T14:38:46.514428Z", - "attributes": { - "response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way." - } - } ], "links": [], "resource": { diff --git a/src/monocle_apptrace/metamodel/span_format.json b/src/monocle_apptrace/metamodel/span_format.json index a3cb6da..a0c3568 100644 --- a/src/monocle_apptrace/metamodel/span_format.json +++ b/src/monocle_apptrace/metamodel/span_format.json @@ -1,22 +1,155 @@ { + "name": "span-name", + "context": { + "trace_id": "trace-id", + "span_id": "span-id", + "trace_state": "[]" + }, + "kind": "SpanKind.CLIENT", + "parent_id": "parent-id or None (for root span)", + "start_time": "UTC timestamp", + "end_time": "UTC timestamp", + "status": { + "status_code": "OK or Error" + }, "attributes": { - "Monocle.Entity": [ + "description": "List of AI component entities used in this span, eg Model, Inference hosting service. Needs to be one of the supported entity types.", + "Monocle.Entities": [ { - "name": "Entity Name 1", + "name": "Monocle Entity Name", "type": "Monocle-Entity-Type", "attributes": { - "entity-attribute-1": "value1" + "description": "Metadata passed to this span eg. model name", + "optional-attrubute": "value" + } + }, + { + "name": "Workflow Entity Name", + "type": "MonocleEntity.Workflow.", + "attributes": { + "optional-attrubute": "value" + } + }, + { + "name": "Model Entity Name", + "type": "MonocleEntity.Model.", + "attributes": { + "model_name": "Name of model", + "optional-attrubute": "value" + } + }, + { + "name": "VectorDB Entity Name", + "type": "MonocleEntity.VectorDB.", + "attributes": { + "embedding-model-name": "value", + "optional-attrubute": "value" + } + }, + { + "name": "AppHosting Entity Name", + "type": "MonocleEntity.AppHosting.", + "attributes": { + "optional-attrubute": "value" + } + }, + { + "name": "Inference Entity Name", + "type": "MonocleEntity.Inference.", + "attributes": { + "model_name": "value", + "optional-attrubute": "value" } } ] }, "events" : [ { - "name": "Entity Name 1", - "timestamp": "timestamp", + "name": "Monocle Entity Name", + "timestamp": "UTC timestamp", + "attributes": { + "data": { + "description": "Data generated in this span eg inference response", + "optional-attrubute": "value" + }, + "metadata": { + "description": "Metadata generated in this span eg token count", + "optional-attrubute": "value" + } + } + }, + { + "name": "Workflow Entity Name", + "timestamp": "UTC timestamp", + "attributes": { + "data": { + "optional-attrubute": "value" + }, + "metadata": { + "optional-attrubute": "value" + } + } + }, + { + "name": "Model Entity Name", + "timestamp": "UTC timestamp", + "attributes": { + "data": { + "input": { + "optional-inputs-to-model": "values" + }, + "output": { + "optional-outputs-from-model": "values" + }, + "optional-attrubute": "value" + }, + "metadata": { + "optional-attrubute": "value" + } + } + }, + { + "name": "VectorDB Entity Name", + "timestamp": "UTC timestamp", "attributes": { - "attribute-1-name": "attribute-1-value" + "data": { + "optional-attrubute": "value" + }, + "metadata": { + "optional-attrubute": "value" + } + } + }, + { + "name": "AppHosting Entity Name", + "timestamp": "UTC timestamp", + "attributes": { + "data": { + "optional-attrubute": "value" + }, + "metadata": { + "optional-attrubute": "value" + } + } + }, + { + "name": "Inference Entity Name", + "timestamp": "UTC timestamp", + "attributes": { + "data": { + "optional-attrubute": "value" + }, + "metadata": { + "optional-attrubute": "value" + } } } - ] + ], + "links": [], + "resource": { + "attributes": { + "service.name": "top-workflow-name" + }, + "schema_url": "" + } } \ No newline at end of file From 987877df107a2610403c755a67f7fc3618173a4a Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Wed, 18 Sep 2024 13:41:14 -0700 Subject: [PATCH 03/11] Updated README and moved the map files Signed-off-by: Prasad Mujumdar --- src/monocle_apptrace/haystack/__init__.py | 2 +- src/monocle_apptrace/langchain/__init__.py | 2 +- src/monocle_apptrace/llamaindex/__init__.py | 2 +- src/monocle_apptrace/metamodel/{ => entities}/entity_types.py | 0 .../{wrapper_config => metamodel/maps}/haystack_methods.json | 0 .../{wrapper_config => metamodel/maps}/lang_chain_methods.json | 0 .../{wrapper_config => metamodel/maps}/llama_index_methods.json | 0 src/monocle_apptrace/metamodel/{ => spans}/span_example.json | 0 src/monocle_apptrace/metamodel/{ => spans}/span_format.json | 0 9 files changed, 3 insertions(+), 3 deletions(-) rename src/monocle_apptrace/metamodel/{ => entities}/entity_types.py (100%) rename src/monocle_apptrace/{wrapper_config => metamodel/maps}/haystack_methods.json (100%) rename src/monocle_apptrace/{wrapper_config => metamodel/maps}/lang_chain_methods.json (100%) rename src/monocle_apptrace/{wrapper_config => metamodel/maps}/llama_index_methods.json (100%) rename src/monocle_apptrace/metamodel/{ => spans}/span_example.json (100%) rename src/monocle_apptrace/metamodel/{ => spans}/span_format.json (100%) diff --git a/src/monocle_apptrace/haystack/__init__.py b/src/monocle_apptrace/haystack/__init__.py index 521dae0..0def306 100644 --- a/src/monocle_apptrace/haystack/__init__.py +++ b/src/monocle_apptrace/haystack/__init__.py @@ -6,4 +6,4 @@ logger = logging.getLogger(__name__) parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) HAYSTACK_METHODS = load_wrapper_from_config( - os.path.join(parent_dir, 'wrapper_config', 'haystack_methods.json')) + os.path.join(parent_dir, 'metamodel', 'maps', 'haystack_methods.json')) diff --git a/src/monocle_apptrace/langchain/__init__.py b/src/monocle_apptrace/langchain/__init__.py index 61364c0..f4d8f9c 100644 --- a/src/monocle_apptrace/langchain/__init__.py +++ b/src/monocle_apptrace/langchain/__init__.py @@ -3,4 +3,4 @@ parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) LANGCHAIN_METHODS = load_wrapper_from_config( - os.path.join(parent_dir, 'wrapper_config', 'lang_chain_methods.json')) + os.path.join(parent_dir, 'metamodel', 'maps', 'lang_chain_methods.json')) diff --git a/src/monocle_apptrace/llamaindex/__init__.py b/src/monocle_apptrace/llamaindex/__init__.py index 5df612d..5181ecb 100644 --- a/src/monocle_apptrace/llamaindex/__init__.py +++ b/src/monocle_apptrace/llamaindex/__init__.py @@ -12,4 +12,4 @@ def get_llm_span_name_for_openai(instance): parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) LLAMAINDEX_METHODS = load_wrapper_from_config( - os.path.join(parent_dir, 'wrapper_config', 'llama_index_methods.json')) + os.path.join(parent_dir, 'metamodel', 'maps', 'llama_index_methods.json')) diff --git a/src/monocle_apptrace/metamodel/entity_types.py b/src/monocle_apptrace/metamodel/entities/entity_types.py similarity index 100% rename from src/monocle_apptrace/metamodel/entity_types.py rename to src/monocle_apptrace/metamodel/entities/entity_types.py diff --git a/src/monocle_apptrace/wrapper_config/haystack_methods.json b/src/monocle_apptrace/metamodel/maps/haystack_methods.json similarity index 100% rename from src/monocle_apptrace/wrapper_config/haystack_methods.json rename to src/monocle_apptrace/metamodel/maps/haystack_methods.json diff --git a/src/monocle_apptrace/wrapper_config/lang_chain_methods.json b/src/monocle_apptrace/metamodel/maps/lang_chain_methods.json similarity index 100% rename from src/monocle_apptrace/wrapper_config/lang_chain_methods.json rename to src/monocle_apptrace/metamodel/maps/lang_chain_methods.json diff --git a/src/monocle_apptrace/wrapper_config/llama_index_methods.json b/src/monocle_apptrace/metamodel/maps/llama_index_methods.json similarity index 100% rename from src/monocle_apptrace/wrapper_config/llama_index_methods.json rename to src/monocle_apptrace/metamodel/maps/llama_index_methods.json diff --git a/src/monocle_apptrace/metamodel/span_example.json b/src/monocle_apptrace/metamodel/spans/span_example.json similarity index 100% rename from src/monocle_apptrace/metamodel/span_example.json rename to src/monocle_apptrace/metamodel/spans/span_example.json diff --git a/src/monocle_apptrace/metamodel/span_format.json b/src/monocle_apptrace/metamodel/spans/span_format.json similarity index 100% rename from src/monocle_apptrace/metamodel/span_format.json rename to src/monocle_apptrace/metamodel/spans/span_format.json From e43381a5f70f40cc59e3c0cdc34ed1ed8f0402a3 Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Mon, 23 Sep 2024 16:00:21 -0700 Subject: [PATCH 04/11] Update format for OTEL compatibility Signed-off-by: Prasad Mujumdar --- .../metamodel/spans/span_format.json | 123 +++++------------- 1 file changed, 35 insertions(+), 88 deletions(-) diff --git a/src/monocle_apptrace/metamodel/spans/span_format.json b/src/monocle_apptrace/metamodel/spans/span_format.json index a0c3568..594a2c4 100644 --- a/src/monocle_apptrace/metamodel/spans/span_format.json +++ b/src/monocle_apptrace/metamodel/spans/span_format.json @@ -14,54 +14,27 @@ }, "attributes": { "description": "List of AI component entities used in this span, eg Model, Inference hosting service. Needs to be one of the supported entity types.", - "Monocle.Entities": [ - { - "name": "Monocle Entity Name", - "type": "Monocle-Entity-Type", - "attributes": { - "description": "Metadata passed to this span eg. model name", - "optional-attrubute": "value" - } - }, - { - "name": "Workflow Entity Name", - "type": "MonocleEntity.Workflow.", - "attributes": { - "optional-attrubute": "value" - } - }, - { - "name": "Model Entity Name", - "type": "MonocleEntity.Model.", - "attributes": { - "model_name": "Name of model", - "optional-attrubute": "value" - } - }, - { - "name": "VectorDB Entity Name", - "type": "MonocleEntity.VectorDB.", - "attributes": { - "embedding-model-name": "value", - "optional-attrubute": "value" - } - }, - { - "name": "AppHosting Entity Name", - "type": "MonocleEntity.AppHosting.", - "attributes": { - "optional-attrubute": "value" - } - }, - { - "name": "Inference Entity Name", - "type": "MonocleEntity.Inference.", - "attributes": { - "model_name": "value", - "optional-attrubute": "value" - } - } - ] + "Monocle.Entities.list": ["Entity-name-1", "Entity-name-2"], + "Monocle.Entity..type": "Monocle-Entity-Type", + "Monocle.Entity..attribute": "Value", + + "Monocle.Entity..type": "MonocleEntity.Workflow.", + "Monocle.Entity..attribute": "Value", + + "Monocle.Entity..type": "MonocleEntity.Model.", + "Monocle.Entity..model_name": "Name of model", + "Monocle.Entity..attribute": "Value", + + "Monocle.Entity..type": "MonocleEntity.VectorDB.", + "Monocle.Entity..embedding-model-name": "Name of model", + "Monocle.Entity..attribute": "Value", + + "Monocle.Entity..type": "MonocleEntity.AppHosting.", + "Monocle.Entity..attribute": "Value", + + "Monocle.Entity..type": "MonocleEntity.Inference.", + "Monocle.Entity..model_name": "Name of model", + "Monocle.Entity..attribute": "Value" }, "events" : [ { @@ -69,12 +42,12 @@ "timestamp": "UTC timestamp", "attributes": { "data": { - "description": "Data generated in this span eg inference response", - "optional-attrubute": "value" + "data.description": "Data generated in this span eg inference response", + "data.optional-attrubute": "value" }, "metadata": { - "description": "Metadata generated in this span eg token count", - "optional-attrubute": "value" + "metadata.description": "Metadata generated in this span eg token count", + "metadata.optional-attrubute": "value" } } }, @@ -82,66 +55,40 @@ "name": "Workflow Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data": { - "optional-attrubute": "value" - }, - "metadata": { - "optional-attrubute": "value" - } + "data.optional-attrubute": "value", + "metadata.optional-attrubute": "value" } }, { "name": "Model Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data": { - "input": { - "optional-inputs-to-model": "values" - }, - "output": { - "optional-outputs-from-model": "values" - }, - "optional-attrubute": "value" - }, - "metadata": { - "optional-attrubute": "value" - } + "data.optional-attrubute": "value", + "metadata.optional-attrubute": "value" } }, { "name": "VectorDB Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data": { - "optional-attrubute": "value" - }, - "metadata": { - "optional-attrubute": "value" - } + "data.optional-attrubute": "value", + "metadata.optional-attrubute": "value" } }, { "name": "AppHosting Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data": { - "optional-attrubute": "value" - }, - "metadata": { - "optional-attrubute": "value" - } + "data.optional-attrubute": "value", + "metadata.optional-attrubute": "value" } }, { "name": "Inference Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data": { - "optional-attrubute": "value" - }, - "metadata": { - "optional-attrubute": "value" - } + "data.optional-attrubute": "value", + "metadata.optional-attrubute": "value" } } ], From c7e0be04b7978472c903bb4c8f510b0ef1142d16 Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Mon, 23 Sep 2024 23:27:23 -0700 Subject: [PATCH 05/11] Update format for OTEL compatibility and JSON conventions Signed-off-by: Prasad Mujumdar --- .../metamodel/spans/span_example.json | 87 +++++++------------ .../metamodel/spans/span_format.json | 53 +++++------ 2 files changed, 57 insertions(+), 83 deletions(-) diff --git a/src/monocle_apptrace/metamodel/spans/span_example.json b/src/monocle_apptrace/metamodel/spans/span_example.json index 817ae28..e73869e 100644 --- a/src/monocle_apptrace/metamodel/spans/span_example.json +++ b/src/monocle_apptrace/metamodel/spans/span_example.json @@ -13,35 +13,21 @@ "status_code": "OK" }, "attributes": { - "Monocle.Entity": [ - { - "name": "ChromaVectorStore", - "type": "MonocleEntity.VectorDB.Chroma", - "attributes": { - "embedding_model": "BAAI/bge-small-en-v1.5" - } - }, - { - "name": "BAAI/bge-small-en-v1.5", - "type": "MonocleEntity.Model.Embedding", - "attributes": { - "data": {}, - "metadata": { - "model_name": "BAAI/bge-small-en-v1.5" - } - } - } - ] + "monocle.entity.count": 2, + "monocle.entity.1.name": "ChromaVectorStore", + "monocle.entity.1.type": "MonocleEntity.VectorDB.Chroma", + "monocle.entity.1.embedding-model-name": "BAAI/bge-small-en-v1.5", + "monocle.entity.2.name": "BAAI/bge-small-en-v1.5", + "monocle.entity.2.type": "MonocleEntity.Model.embedding", + "monocle.entity.2.model_name": "BAAI/bge-small-en-v1.5" }, "events": [ { "name": "ChromaVectorStore", "timestamp": "timestamp", "attributes": { - "data": { - "context_input": "question: What is an americano?", - "context_output": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way" - }, + "data.context_input": "question: What is an americano?", + "data.context_output": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way", "metadata": {} } } @@ -69,45 +55,29 @@ "status_code": "OK" }, "attributes": { - "Monocle.Entity": [ - { - "name": "AzureOpenAI", - "type": "MonocleEntity.Inference.Azure_OpenAI", - "attributes": { - "model_name": "gpt-35-turbo", - "provider_name": "api.openai.com", - "az_openai_deployment": "kshitiz-gpt", - "inference_endpoint": "https://okahu-openai-dev.openai.azure.com/" - } - }, - { - "name": "gpt-35-turbo", - "type": "MonocleEntity.Model.LLM", - "attributes": { - "model_name": "gpt-35-turbo", - "temperature": 0.1, - } - } - ] + "monocle.entity.count": 2, + "monocle.entity.1.name": "AzureOpenAI", + "monocle.entity.1.type": "MonocleEntity.Inference.Azure_OpenAI", + "monocle.entity.1.model_name": "gpt-35-turbo", + "monocle.entity.1.provider_name": "api.openai.com", + "monocle.entity.1.az_openai_deployment": "kshitiz-gpt", + "monocle.entity.1.inference_endpoint": "https://okahu-openai-dev.openai.azure.com/", + + "monocle.entity.2.name": "gpt-35-turbo", + "monocle.entity.2.type": "MonocleEntity.Model.LLM", + "monocle.entity.2.model_name": "gpt-35-turbo", + "monocle.entity.2.temperature": 0.1 }, "events": [ { "name": "gpt-35-turbo", "timestamp": "timestamp", "attributes": { - "data": { - "input": { - "question": "What is an americano?" - }, - "output": { - "response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way." - } - }, - "metadata": { - "completion_tokens": 52, - "prompt_tokens": 233, - "total_tokens": 285 - } + "data.input.question": "What is an americano?", + "data.output.response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way.", + "metadata.completion_tokens": 52, + "metadata.prompt_tokens": 233, + "metadata.total_tokens": 285 } } ], @@ -134,8 +104,9 @@ "status_code": "OK" }, "attributes": { - "workflow_name": "coffee-bot", - "workflow_type": "MonocleEntity.Workflow.LlamaIndex" + "monocle.entity.count": 1, + "monocle.entity.1.name": "coffee-bot", + "monocle.entity.1.type": "MonocleEntity.Workflow.LlamaIndex" }, "events": [ ], diff --git a/src/monocle_apptrace/metamodel/spans/span_format.json b/src/monocle_apptrace/metamodel/spans/span_format.json index 594a2c4..d3fd4dd 100644 --- a/src/monocle_apptrace/metamodel/spans/span_format.json +++ b/src/monocle_apptrace/metamodel/spans/span_format.json @@ -14,42 +14,45 @@ }, "attributes": { "description": "List of AI component entities used in this span, eg Model, Inference hosting service. Needs to be one of the supported entity types.", - "Monocle.Entities.list": ["Entity-name-1", "Entity-name-2"], - "Monocle.Entity..type": "Monocle-Entity-Type", - "Monocle.Entity..attribute": "Value", + "monocle.entity.count": "count-of-entities", + + "monocle.entity..name": "Monocle-Entity-name", + "monocle.entity..type": "Monocle-Entity-Type", + "monocle.entity..attribute": "Value", - "Monocle.Entity..type": "MonocleEntity.Workflow.", - "Monocle.Entity..attribute": "Value", + "monocle.entity..name": "Workflow-Name", + "monocle.entity..type": "MonocleEntity.Workflow.", + "monocle.entity..attribute": "Value", - "Monocle.Entity..type": "MonocleEntity.Model.", - "Monocle.Entity..model_name": "Name of model", - "Monocle.Entity..attribute": "Value", + "monocle.entity..name": "Model-entity-name", + "monocle.entity..type": "MonocleEntity.Model.", + "monocle.entity..model_name": "Name of model", + "monocle.entity..attribute": "Value", - "Monocle.Entity..type": "MonocleEntity.VectorDB.", - "Monocle.Entity..embedding-model-name": "Name of model", - "Monocle.Entity..attribute": "Value", + "monocle.entity..name": "VectorDB-entity-name", + "monocle.entity..type": "MonocleEntity.VectorDB.", + "monocle.entity..embedding-model-name": "Name of model", + "monocle.entity..attribute": "Value", - "Monocle.Entity..type": "MonocleEntity.AppHosting.", - "Monocle.Entity..attribute": "Value", + "monocle.entity..name": "Apphosting-entity-name", + "monocle.entity..type": "MonocleEntity.AppHosting.", + "monocle.entity..attribute": "Value", - "Monocle.Entity..type": "MonocleEntity.Inference.", - "Monocle.Entity..model_name": "Name of model", - "Monocle.Entity..attribute": "Value" + "monocle.entity..name": "Inference-entity-name", + "monocle.entity..type": "MonocleEntity.Inference.", + "monocle.entity..model_name": "Name of model", + "monocle.entity..attribute": "Value" }, "events" : [ { "name": "Monocle Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data": { - "data.description": "Data generated in this span eg inference response", - "data.optional-attrubute": "value" - }, - "metadata": { - "metadata.description": "Metadata generated in this span eg token count", - "metadata.optional-attrubute": "value" - } - } + "data.description": "Data generated in this span eg inference response", + "data.optional-attrubute": "value", + "metadata.description": "Metadata generated in this span eg token count", + "metadata.optional-attrubute": "value" + } }, { "name": "Workflow Entity Name", From ccdef678973e77e41d886212cdfed8353053e828 Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Tue, 24 Sep 2024 08:49:27 -0700 Subject: [PATCH 06/11] Addressed review feedback Signed-off-by: Prasad Mujumdar --- src/monocle_apptrace/metamodel/README.md | 2 +- .../metamodel/spans/span_example.json | 17 ++++++------ .../metamodel/spans/span_format.json | 26 +++++++++---------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/monocle_apptrace/metamodel/README.md b/src/monocle_apptrace/metamodel/README.md index 4bc3df5..c3bc8c4 100644 --- a/src/monocle_apptrace/metamodel/README.md +++ b/src/monocle_apptrace/metamodel/README.md @@ -13,7 +13,7 @@ The Monocle metamodel comprises of three things, The entity type defines the type of GenAI component that Monocle understand. The monocle instrumentation can extract the relevenat information for this entity. There are a fixed set of [entity types](./entity_types.py) that are defined by Monocle out of the box, eg workflow, model etc. As the GenAI landscape evolves, the Monocle community will introduce a new entity type if the current entities won't represent a new technology component. Each entity types has number of supported technology components that Monocle handles out of the box, eg. LlamaIndex is a supported workflow. Monocle community will continue to expand the breadth of the project by adding more components. -### Consisten trace format +### Consistent trace format Monocle generates [traces](../../../Monocle_User_Guide.md#traces) which comprises of [spans](../../../Monocle_User_Guide.md#spans). Note that Monocle trace is [OpenTelemetry format](https://opentelemetry.io/docs/concepts/signals/traces/) compatible. Each span is essentially a step in the execution that interacts with one of more GenAI technology components. The please refer to the [full spec of the json format](./span_format.json) and a detailed [example](./span_example.json). The ```attribute``` section of the span includes a list of such entities that are used in that span. ```json diff --git a/src/monocle_apptrace/metamodel/spans/span_example.json b/src/monocle_apptrace/metamodel/spans/span_example.json index e73869e..99d4848 100644 --- a/src/monocle_apptrace/metamodel/spans/span_example.json +++ b/src/monocle_apptrace/metamodel/spans/span_example.json @@ -28,7 +28,7 @@ "attributes": { "data.context_input": "question: What is an americano?", "data.context_output": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way", - "metadata": {} + "metadata": "" } } ], @@ -58,9 +58,8 @@ "monocle.entity.count": 2, "monocle.entity.1.name": "AzureOpenAI", "monocle.entity.1.type": "MonocleEntity.Inference.Azure_OpenAI", - "monocle.entity.1.model_name": "gpt-35-turbo", - "monocle.entity.1.provider_name": "api.openai.com", - "monocle.entity.1.az_openai_deployment": "kshitiz-gpt", + "monocle.entity.1.provider_name": "openai.azure.com", + "monocle.entity.1.deployment": "kshitiz-gpt", "monocle.entity.1.inference_endpoint": "https://okahu-openai-dev.openai.azure.com/", "monocle.entity.2.name": "gpt-35-turbo", @@ -73,11 +72,11 @@ "name": "gpt-35-turbo", "timestamp": "timestamp", "attributes": { - "data.input.question": "What is an americano?", - "data.output.response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way.", - "metadata.completion_tokens": 52, - "metadata.prompt_tokens": 233, - "metadata.total_tokens": 285 + "data.input": "question: What is an americano?", + "data.output": "response: An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way.", + "metadata.completion_tokens": 52, + "metadata.prompt_tokens": 233, + "metadata.total_tokens": 285 } } ], diff --git a/src/monocle_apptrace/metamodel/spans/span_format.json b/src/monocle_apptrace/metamodel/spans/span_format.json index d3fd4dd..7a12b78 100644 --- a/src/monocle_apptrace/metamodel/spans/span_format.json +++ b/src/monocle_apptrace/metamodel/spans/span_format.json @@ -31,7 +31,7 @@ "monocle.entity..name": "VectorDB-entity-name", "monocle.entity..type": "MonocleEntity.VectorDB.", - "monocle.entity..embedding-model-name": "Name of model", + "monocle.entity..embedding_model_name": "Name of model", "monocle.entity..attribute": "Value", "monocle.entity..name": "Apphosting-entity-name", @@ -49,49 +49,49 @@ "timestamp": "UTC timestamp", "attributes": { "data.description": "Data generated in this span eg inference response", - "data.optional-attrubute": "value", + "data.optional_attribute.entity": "value", "metadata.description": "Metadata generated in this span eg token count", - "metadata.optional-attrubute": "value" + "metadata.optional_attribute.entity": "value" } }, { "name": "Workflow Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data.optional-attrubute": "value", - "metadata.optional-attrubute": "value" + "data.optional_attribute.entity": "value", + "metadata.optional_attribute.entity": "value" } }, { "name": "Model Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data.optional-attrubute": "value", - "metadata.optional-attrubute": "value" + "data.optional_attribute.entity": "value", + "metadata.optional_attribute.entity": "value" } }, { "name": "VectorDB Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data.optional-attrubute": "value", - "metadata.optional-attrubute": "value" + "data.optional_attribute.entity": "value", + "metadata.optional_attribute.entity": "value" } }, { "name": "AppHosting Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data.optional-attrubute": "value", - "metadata.optional-attrubute": "value" + "data.optional_attribute.entity": "value", + "metadata.optional_attribute.entity": "value" } }, { "name": "Inference Entity Name", "timestamp": "UTC timestamp", "attributes": { - "data.optional-attrubute": "value", - "metadata.optional-attrubute": "value" + "data.optional_attribute.entity": "value", + "metadata.optional_attribute.entity": "value" } } ], From f2ea2a94598019c914b81aa82bb47b13762cb9da Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Wed, 25 Sep 2024 20:55:45 -0700 Subject: [PATCH 07/11] Updated input/output/metadata format. Added readme Signed-off-by: Prasad Mujumdar --- src/monocle_apptrace/metamodel/README.md | 5 +- .../metamodel/entities/entity_types.py | 10 +- .../metamodel/spans/README.md | 119 ++++++++++++++++++ .../metamodel/spans/span_example.json | 83 +++++++----- .../metamodel/spans/span_format.json | 74 ++--------- 5 files changed, 195 insertions(+), 96 deletions(-) create mode 100644 src/monocle_apptrace/metamodel/spans/README.md diff --git a/src/monocle_apptrace/metamodel/README.md b/src/monocle_apptrace/metamodel/README.md index c3bc8c4..99e88b1 100644 --- a/src/monocle_apptrace/metamodel/README.md +++ b/src/monocle_apptrace/metamodel/README.md @@ -1,4 +1,4 @@ -# Monocle metamode +# Monocle metamodel ## Overview Monocle metamodel is the way to manage standardization across all supported GenAI component stack. It includes the list of components that Monocle can identify and extract metadata. This help understanding and analyzing the traces from applications that include multiple components and can evolve over time. This is one of core value that Monocle provides to it's user community. @@ -13,6 +13,9 @@ The Monocle metamodel comprises of three things, The entity type defines the type of GenAI component that Monocle understand. The monocle instrumentation can extract the relevenat information for this entity. There are a fixed set of [entity types](./entity_types.py) that are defined by Monocle out of the box, eg workflow, model etc. As the GenAI landscape evolves, the Monocle community will introduce a new entity type if the current entities won't represent a new technology component. Each entity types has number of supported technology components that Monocle handles out of the box, eg. LlamaIndex is a supported workflow. Monocle community will continue to expand the breadth of the project by adding more components. +### Span types +The GenAI application have specific types of spans where diffrent entities integrate. Monocle metamodel defines these types and specifies format for tracing data and metadata generated in such spans. + ### Consistent trace format Monocle generates [traces](../../../Monocle_User_Guide.md#traces) which comprises of [spans](../../../Monocle_User_Guide.md#spans). Note that Monocle trace is [OpenTelemetry format](https://opentelemetry.io/docs/concepts/signals/traces/) compatible. Each span is essentially a step in the execution that interacts with one of more GenAI technology components. The please refer to the [full spec of the json format](./span_format.json) and a detailed [example](./span_example.json). The ```attribute``` section of the span includes a list of such entities that are used in that span. diff --git a/src/monocle_apptrace/metamodel/entities/entity_types.py b/src/monocle_apptrace/metamodel/entities/entity_types.py index a4fb97d..a3631a2 100644 --- a/src/monocle_apptrace/metamodel/entities/entity_types.py +++ b/src/monocle_apptrace/metamodel/entities/entity_types.py @@ -18,7 +18,7 @@ class Model(enum): Embedding = 2 # Support Vector databases - class VectorDB(enum): + class Model(enum): Generic = 0 Milvus = 1 Chroma = 2 @@ -56,4 +56,10 @@ class Inference(enum): class Classification(enum): Metadata = 0 - Data = 1 \ No newline at end of file + Data = 1 + +class SpanType(enum): + Internal = 0 + Retrieval = 2 + Inference = 3 + Workflow = 4 \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/spans/README.md b/src/monocle_apptrace/metamodel/spans/README.md new file mode 100644 index 0000000..4705ee2 --- /dev/null +++ b/src/monocle_apptrace/metamodel/spans/README.md @@ -0,0 +1,119 @@ +# Monocle Span format +Monocle generates [traces](../../../../Monocle_User_Guide.md#traces) which comprises of [spans](../../../../Monocle_User_Guide.md#spans). Note that Monocle trace is [OpenTelemetry format](https://opentelemetry.io/docs/concepts/signals/traces/) compatible. Each span is essentially a step in the execution that interacts with one of more GenAI technology components. This document explains the [span format](./span_format.json) that Monocle generates for GenAI application tracing. + +Per the OpenTelemetry convention, each span contains an attribute section and event section. In Monocle generated trace, the attribute sections includes details of GenAI entities used in the span. The event section includes the input, output and metadata related to the execution of that span. + +## Attributes +The attribute sections includes details of GenAI entities used in the span. For each entity used in the span in includes the entity name and entity type. For every type of entity, there are required and optional attributes listed below. +### Json format +```json + attributes: + "span.type": "Monocle-span-type", + "entity.count": "count-of-entities", + + "entity..name": "Monocle-Entity-name", + "entity..type": "MonocleEntity." + ... +``` +The ```entity.count``` indicates total number of entities used in the given span. For each entity, the details are captured in ```entity..X```. For example, +```json + "attributes": { + "span.type": "Inference", + "entity.count": 2, + "entity.1.name": "AzureOpenAI", + "entity.1.type": "Inference.Azure_OpenAI", + "entity.2.name": "gpt-35-turbo", + "entity.2.type": "Model.LLM", + "entity.2.model_name": "gpt-35-turbo", +``` + +### Entity type specific attributes +#### MonocleEntity.Workflow +| Name | Description | Values | Required | +| - | - | - | - | +| name | Entity name generated by Monocle | Name String | Required | +| type | Monocle Entity type | MonocleEntity.Workflow | Required | +| optional-attribute | Additional attribute specific to entity | | Optional | + +### MonocleEntity.Model +| Name | Description | Values | Required | +| - | - | - | - | +| name | Entity name generated by Monocle | Name String | Required | +| type | Monocle Entity type | MonocleEntity.Model | Required | +| model_name | Name of model | String | Required | +| optional-attribute | Additional attribute specific to entity | | Optional | + +### MonocleEntity.AppHosting +| Name | Description | Values | Required | +| - | - | - | - | +| name | Entity name generated by Monocle | Name String | Required | +| type | Monocle Entity type | MonocleEntity.AppHosting | Required | +| optional-attribute | Additional attribute specific to entity | | Optional | + +### MonocleEntity.Inference +| Name | Description | Values | Required | +| - | - | - | - | +| name | Entity name generated by Monocle | Name String | Required | +| type | Monocle Entity type | MonocleEntity.Inference | Required | +| optional-attribute | Additional attribute specific to entity | | Optional | + +### MonocleEntity.VectorDB +| Name | Description | Values | Required | +| - | - | - | - | +| name | Entity name generated by Monocle | Name String | Required | +| type | Monocle Entity type | MonocleEntity.VectorDB | Required | +| optional-attribute | Additional attribute specific to entity | | Optional | + +## Events +The event section includes the input, output and metadata generated by that span execution. For each type of span, there are required and option input, output and metadata items listed below. If there's no data genearated in the space, the events will be an empty array. + +### Json format +```json + "events" : [ + { + "name": "data.input", + "timestamp": "UTC timestamp", + "attributes": { + "input_attribute": "value" + } + }, + { + "name": "data.output", + "timestamp": "UTC timestamp", + "attributes": { + "output_attribute": "value" + } + }, + { + "name": "metadata", + "timestamp": "UTC timestamp", + "attributes": { + "metadata_attribute": "value" + } + } + ] +``` +The ```span.type``` captured in ```attributes``` section of the span dectates the format of the ```events``` +### SpanType.Retrieval +| Name | Description | Values | Required | +| - | - | - | - | +| name | event name | data.input or data.output or metadata | Required | +| timestamp | timestap when the event occurred | UTC timestamp | Required | +| attributes | input/output/metadata attributes generated in span | Dictionary | Required | + +### SpanType.Inference +| Name | Description | Values | Required | +| - | - | - | - | +| name | event name | data.input or data.output or metadata | Required | +| timestamp | timestap when the event occurred | UTC timestamp | Required | +| attributes | input/output/metadata attributes generated in span | Dictionary | Required | + +### SpanType.Workflow +| Name | Description | Values | Required | +| - | - | - | - | +| name | event name | data.input or data.output or metadata | Required | +| timestamp | timestap when the event occurred | UTC timestamp | Required | +| attributes | input/output/metadata attributes generated in span | Dictionary | Required | + +### SpanType.Internal +Events are empty \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/spans/span_example.json b/src/monocle_apptrace/metamodel/spans/span_example.json index 99d4848..15fc5fe 100644 --- a/src/monocle_apptrace/metamodel/spans/span_example.json +++ b/src/monocle_apptrace/metamodel/spans/span_example.json @@ -13,24 +13,31 @@ "status_code": "OK" }, "attributes": { - "monocle.entity.count": 2, - "monocle.entity.1.name": "ChromaVectorStore", - "monocle.entity.1.type": "MonocleEntity.VectorDB.Chroma", - "monocle.entity.1.embedding-model-name": "BAAI/bge-small-en-v1.5", - "monocle.entity.2.name": "BAAI/bge-small-en-v1.5", - "monocle.entity.2.type": "MonocleEntity.Model.embedding", - "monocle.entity.2.model_name": "BAAI/bge-small-en-v1.5" + "span.type": "Retrieval", + "entity.count": 2, + "entity.1.name": "ChromaVectorStore", + "entity.1.type": "VectorDB.Chroma", + "entity.1.embedding-model-name": "BAAI/bge-small-en-v1.5", + "entity.2.name": "BAAI/bge-small-en-v1.5", + "entity.2.type": "Model.embedding", + "entity.2.model_name": "BAAI/bge-small-en-v1.5" }, "events": [ { - "name": "ChromaVectorStore", + "name": "data.input", "timestamp": "timestamp", "attributes": { - "data.context_input": "question: What is an americano?", - "data.context_output": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way", - "metadata": "" + "context_input": "question: What is an americano?" + } + }, + { + "name": "data.output", + "timestamp": "timestamp", + "attributes": { + "context_output": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way" } } + ], "links": [], "resource": { @@ -39,7 +46,7 @@ }, "schema_url": "" } -} +}, { "name": "llamaindex.openai", "context": { @@ -55,28 +62,41 @@ "status_code": "OK" }, "attributes": { - "monocle.entity.count": 2, - "monocle.entity.1.name": "AzureOpenAI", - "monocle.entity.1.type": "MonocleEntity.Inference.Azure_OpenAI", - "monocle.entity.1.provider_name": "openai.azure.com", - "monocle.entity.1.deployment": "kshitiz-gpt", - "monocle.entity.1.inference_endpoint": "https://okahu-openai-dev.openai.azure.com/", + "span.type": "Inference", + "entity.count": 2, + "entity.1.name": "AzureOpenAI", + "entity.1.type": "Inference.Azure_OpenAI", + "entity.1.provider_name": "openai.azure.com", + "entity.1.deployment": "kshitiz-gpt", + "entity.1.inference_endpoint": "https://okahu-openai-dev.openai.azure.com/", - "monocle.entity.2.name": "gpt-35-turbo", - "monocle.entity.2.type": "MonocleEntity.Model.LLM", - "monocle.entity.2.model_name": "gpt-35-turbo", - "monocle.entity.2.temperature": 0.1 + "entity.2.name": "gpt-35-turbo", + "entity.2.type": "Model.LLM", + "entity.2.model_name": "gpt-35-turbo", + "entity.2.temperature": 0.1 }, "events": [ { - "name": "gpt-35-turbo", + "name": "data.input", + "timestamp": "timestamp", + "attributes": { + "question": "What is an americano?", + } + }, + { + "name": "data.output", + "timestamp": "timestamp", + "attributes": { + "response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way.", + } + }, + { + "name": "metadata", "timestamp": "timestamp", "attributes": { - "data.input": "question: What is an americano?", - "data.output": "response: An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way.", - "metadata.completion_tokens": 52, - "metadata.prompt_tokens": 233, - "metadata.total_tokens": 285 + "completion_tokens": 52, + "prompt_tokens": 233, + "total_tokens": 285 } } ], @@ -103,9 +123,10 @@ "status_code": "OK" }, "attributes": { - "monocle.entity.count": 1, - "monocle.entity.1.name": "coffee-bot", - "monocle.entity.1.type": "MonocleEntity.Workflow.LlamaIndex" + "span.type": "Workflow", + "entity.count": 1, + "entity.1.name": "coffee-bot", + "entity.1.type": "Workflow.LlamaIndex" }, "events": [ ], diff --git a/src/monocle_apptrace/metamodel/spans/span_format.json b/src/monocle_apptrace/metamodel/spans/span_format.json index 7a12b78..f5edb14 100644 --- a/src/monocle_apptrace/metamodel/spans/span_format.json +++ b/src/monocle_apptrace/metamodel/spans/span_format.json @@ -14,84 +14,34 @@ }, "attributes": { "description": "List of AI component entities used in this span, eg Model, Inference hosting service. Needs to be one of the supported entity types.", - "monocle.entity.count": "count-of-entities", - "monocle.entity..name": "Monocle-Entity-name", - "monocle.entity..type": "Monocle-Entity-Type", - "monocle.entity..attribute": "Value", - - "monocle.entity..name": "Workflow-Name", - "monocle.entity..type": "MonocleEntity.Workflow.", - "monocle.entity..attribute": "Value", - - "monocle.entity..name": "Model-entity-name", - "monocle.entity..type": "MonocleEntity.Model.", - "monocle.entity..model_name": "Name of model", - "monocle.entity..attribute": "Value", - - "monocle.entity..name": "VectorDB-entity-name", - "monocle.entity..type": "MonocleEntity.VectorDB.", - "monocle.entity..embedding_model_name": "Name of model", - "monocle.entity..attribute": "Value", - - "monocle.entity..name": "Apphosting-entity-name", - "monocle.entity..type": "MonocleEntity.AppHosting.", - "monocle.entity..attribute": "Value", - - "monocle.entity..name": "Inference-entity-name", - "monocle.entity..type": "MonocleEntity.Inference.", - "monocle.entity..model_name": "Name of model", - "monocle.entity..attribute": "Value" + "span.type": "Monocle-span-type", + "entity.count": "count-of-entities", + + "entity..name": "Monocle-Entity-name", + "entity..type": "Monocle-Entity-Type", + "entity..": "Value" }, "events" : [ { - "name": "Monocle Entity Name", + "name": "data.input", "timestamp": "UTC timestamp", "attributes": { - "data.description": "Data generated in this span eg inference response", - "data.optional_attribute.entity": "value", - "metadata.description": "Metadata generated in this span eg token count", - "metadata.optional_attribute.entity": "value" + "input_attribute": "value" } }, { - "name": "Workflow Entity Name", - "timestamp": "UTC timestamp", - "attributes": { - "data.optional_attribute.entity": "value", - "metadata.optional_attribute.entity": "value" - } - }, - { - "name": "Model Entity Name", - "timestamp": "UTC timestamp", - "attributes": { - "data.optional_attribute.entity": "value", - "metadata.optional_attribute.entity": "value" - } - }, - { - "name": "VectorDB Entity Name", - "timestamp": "UTC timestamp", - "attributes": { - "data.optional_attribute.entity": "value", - "metadata.optional_attribute.entity": "value" - } - }, - { - "name": "AppHosting Entity Name", + "name": "data.output", "timestamp": "UTC timestamp", "attributes": { - "data.optional_attribute.entity": "value", - "metadata.optional_attribute.entity": "value" + "output_attribute": "value" } }, { - "name": "Inference Entity Name", + "name": "metadata", "timestamp": "UTC timestamp", "attributes": { - "data.optional_attribute.entity": "value", - "metadata.optional_attribute.entity": "value" + "metadata_attribute": "value" } } ], From 5de277172b5b11f73360bae353c7ec15cd9b45f8 Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Wed, 25 Sep 2024 22:47:27 -0700 Subject: [PATCH 08/11] Updated input/output/metadata format. Added readme Signed-off-by: Prasad Mujumdar --- src/monocle_apptrace/metamodel/README.md | 53 ++----------------- .../metamodel/entities/README.md | 43 +++++++++++++++ .../metamodel/entities/entity_types.py | 42 +++++---------- .../metamodel/spans/README.md | 6 ++- .../metamodel/spans/span_example.json | 2 +- 5 files changed, 66 insertions(+), 80 deletions(-) create mode 100644 src/monocle_apptrace/metamodel/entities/README.md diff --git a/src/monocle_apptrace/metamodel/README.md b/src/monocle_apptrace/metamodel/README.md index 99e88b1..1987ed5 100644 --- a/src/monocle_apptrace/metamodel/README.md +++ b/src/monocle_apptrace/metamodel/README.md @@ -14,59 +14,14 @@ The entity type defines the type of GenAI component that Monocle understand. The Each entity types has number of supported technology components that Monocle handles out of the box, eg. LlamaIndex is a supported workflow. Monocle community will continue to expand the breadth of the project by adding more components. ### Span types -The GenAI application have specific types of spans where diffrent entities integrate. Monocle metamodel defines these types and specifies format for tracing data and metadata generated in such spans. +The GenAI application have specific [types of spans](./spans/README.md#span-types-and-events) where diffrent entities integrate. Monocle metamodel defines these types and specifies format for tracing data and metadata generated in such spans. ### Consistent trace format Monocle generates [traces](../../../Monocle_User_Guide.md#traces) which comprises of [spans](../../../Monocle_User_Guide.md#spans). Note that Monocle trace is [OpenTelemetry format](https://opentelemetry.io/docs/concepts/signals/traces/) compatible. Each span is essentially a step in the execution that interacts with one of more GenAI technology components. The please refer to the [full spec of the json format](./span_format.json) and a detailed [example](./span_example.json). The ```attribute``` section of the span includes a list of such entities that are used in that span. -```json - "attributes": { - "Monocle.Entity": [ - { - "name": "AzureOpenAI", - "type": "MonocleEntity.Inference.Azure_OpenAI", - "attributes": { - "model_name": "gpt-35-turbo", - "provider_name": "api.openai.com", - "az_openai_deployment": "chatbot-gpt-3.5", - "inference_endpoint": "https://mybot-openai-dev.openai.azure.com/" - } - }, - { - "name": "gpt-35-turbo", - "type": "MonocleEntity.Model.LLM", - "attributes": { - "model_name": "gpt-35-turbo", - "temperature": 0.1, - } - } - ] - } -``` -The runtime data and metadata collected during the execution of that span are included in the ```events``` section of the trace (as per the Otel spec). Each entry in the event corrosponds to the entity involved in that trace execution if it has produced any runtime outputs. The event information is divided in two sections, ``data`` and ``metadata``. The data protion contains any user data that part of input or output of the span phase, eg model prompt or inference response. The ``metadata`` section includes other attributes that are runtime input or output, eg token count. -```json - "events": [ - { - "name": "gpt-35-turbo", - "timestamp": "timestamp", - "attributes": { - "data": { - "input": { - "question": "What is an americano?" - }, - "output": { - "response": "An americano is a type of coffee drink that is made by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way." - } - }, - "metadata": { - "completion_tokens": 52, - "prompt_tokens": 233, - "total_tokens": 285 - } - } - } - ] -``` +The runtime data and metadata collected during the execution of that span are included in the ```events``` section of the trace (as per the Otel spec). Each entry in the event corrosponds to the entity involved in that trace execution if it has produced any runtime outputs. +Please see the [span format](./spans/README.md) for details. + ### Instrumentation method map The map dectates what Monocle tracing method is relevant for the a given GenAI tech component method/API. It also specifies the name for that span to set in the trace output. ```python diff --git a/src/monocle_apptrace/metamodel/entities/README.md b/src/monocle_apptrace/metamodel/entities/README.md new file mode 100644 index 0000000..82d73c2 --- /dev/null +++ b/src/monocle_apptrace/metamodel/entities/README.md @@ -0,0 +1,43 @@ +# Monocle Entities +The entity type defines the type of GenAI component that Monocle understand. The monocle instrumentation can extract the relevenat information for this entity. There are a fixed set of [entity types](./entity_types.py) that are defined by Monocle out of the box, eg workflow, model etc. As the GenAI landscape evolves, the Monocle community will introduce a new entity type if the current entities won't represent a new technology component. + +## Entity Types +### MonocleEntity.Workflow +Workflow ie the core application code. Supported types are - +- Generic +- Langchain +- LlamaIndex +- Haystack + +### MonocleEntity.Model +GenAI models. Supported types are - +- Generic +- LLM +- Embedding + +### MonocleEntity.AppHosting +Application host services where the workflow code is run. Supported types are - +- Generic +- AWS_lambda +- AWS_sagemaker +- Azure_func +- Github_codespace +- Azure_mlw + +### MonocleEntity.Inference +The model hosting infrastructure services. Supported types are - +- Generic +- NVIDIA_triton +- OpenAI +- Azure_oai +- AWS_sagemaker +- AWS_bedrock +- HuggingFace + +### MonocleEntity.VectorDB +Vector search data stores. Supported types are - +- Generic +- Chroma +- AWS_es +- Milvus +- Pinecone diff --git a/src/monocle_apptrace/metamodel/entities/entity_types.py b/src/monocle_apptrace/metamodel/entities/entity_types.py index a3631a2..bbff1df 100644 --- a/src/monocle_apptrace/metamodel/entities/entity_types.py +++ b/src/monocle_apptrace/metamodel/entities/entity_types.py @@ -18,45 +18,31 @@ class Model(enum): Embedding = 2 # Support Vector databases - class Model(enum): + class VectorDB(enum): Generic = 0 - Milvus = 1 - Chroma = 2 - Vespa = 3 - LanceDB = 4 - Pinecone = 5 - Viviate = 6 - PostgreS = 7 - Openstore = 8 - Clickhouse = 9 - ElasticSearch = 10 - SingleStore = 11 - CouchDB = 12 + Chroma = 1 + AWS_es = 2 + Milvus = 3 + Pinecone = 4 # Support application hosting frameworks class AppHosting(enum): Generic = 0 - AWS_Lambda = 1 - AWS_Sagemaker = 2 - Azure_Function = 3 - Github_Codespace = 4 - Azure_ML = 5 + AWS_lambda = 1 + AWS_sagemaker = 2 + Azure_func = 3 + Github_codespace = 4 + Azure_mlw = 5 # Supported inference infra/services class Inference(enum): Generic = 0 - NVIDIA_Triton = 1 + NVIDIA_triton = 1 OpenAI = 2 - Azure_OpenAI = 3 - AWS_Sagemaker = 4 - AWS_Bedrock = 5 + Azure_oai = 3 + AWS_sagemaker = 4 + AWS_bedrock = 5 HuggingFace = 6 - Cohere = 7 - vLLM = 8 - - class Classification(enum): - Metadata = 0 - Data = 1 class SpanType(enum): Internal = 0 diff --git a/src/monocle_apptrace/metamodel/spans/README.md b/src/monocle_apptrace/metamodel/spans/README.md index 4705ee2..4fdb073 100644 --- a/src/monocle_apptrace/metamodel/spans/README.md +++ b/src/monocle_apptrace/metamodel/spans/README.md @@ -21,7 +21,7 @@ The ```entity.count``` indicates total number of entities used in the given span "span.type": "Inference", "entity.count": 2, "entity.1.name": "AzureOpenAI", - "entity.1.type": "Inference.Azure_OpenAI", + "entity.1.type": "Inference.Azure_oai", "entity.2.name": "gpt-35-turbo", "entity.2.type": "Model.LLM", "entity.2.model_name": "gpt-35-turbo", @@ -93,6 +93,8 @@ The event section includes the input, output and metadata generated by that span } ] ``` + +### Span types and events The ```span.type``` captured in ```attributes``` section of the span dectates the format of the ```events``` ### SpanType.Retrieval | Name | Description | Values | Required | @@ -116,4 +118,4 @@ The ```span.type``` captured in ```attributes``` section of the span dectates th | attributes | input/output/metadata attributes generated in span | Dictionary | Required | ### SpanType.Internal -Events are empty \ No newline at end of file +Events will be empty \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/spans/span_example.json b/src/monocle_apptrace/metamodel/spans/span_example.json index 15fc5fe..3563d44 100644 --- a/src/monocle_apptrace/metamodel/spans/span_example.json +++ b/src/monocle_apptrace/metamodel/spans/span_example.json @@ -65,7 +65,7 @@ "span.type": "Inference", "entity.count": 2, "entity.1.name": "AzureOpenAI", - "entity.1.type": "Inference.Azure_OpenAI", + "entity.1.type": "Inference.Azure_oai", "entity.1.provider_name": "openai.azure.com", "entity.1.deployment": "kshitiz-gpt", "entity.1.inference_endpoint": "https://okahu-openai-dev.openai.azure.com/", From 93fab47e61208d4c181eafca1c5ffd93db8e2fe2 Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Wed, 25 Sep 2024 23:35:02 -0700 Subject: [PATCH 09/11] Addressed review feedback Signed-off-by: Prasad Mujumdar --- src/monocle_apptrace/metamodel/entities/README.md | 2 +- src/monocle_apptrace/metamodel/entities/entity_types.py | 2 +- src/monocle_apptrace/metamodel/spans/span_example.json | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/monocle_apptrace/metamodel/entities/README.md b/src/monocle_apptrace/metamodel/entities/README.md index 82d73c2..55c7c22 100644 --- a/src/monocle_apptrace/metamodel/entities/README.md +++ b/src/monocle_apptrace/metamodel/entities/README.md @@ -34,7 +34,7 @@ The model hosting infrastructure services. Supported types are - - AWS_bedrock - HuggingFace -### MonocleEntity.VectorDB +### MonocleEntity.VectorStore Vector search data stores. Supported types are - - Generic - Chroma diff --git a/src/monocle_apptrace/metamodel/entities/entity_types.py b/src/monocle_apptrace/metamodel/entities/entity_types.py index bbff1df..c8abece 100644 --- a/src/monocle_apptrace/metamodel/entities/entity_types.py +++ b/src/monocle_apptrace/metamodel/entities/entity_types.py @@ -18,7 +18,7 @@ class Model(enum): Embedding = 2 # Support Vector databases - class VectorDB(enum): + class VectorStore(enum): Generic = 0 Chroma = 1 AWS_es = 2 diff --git a/src/monocle_apptrace/metamodel/spans/span_example.json b/src/monocle_apptrace/metamodel/spans/span_example.json index 3563d44..244c373 100644 --- a/src/monocle_apptrace/metamodel/spans/span_example.json +++ b/src/monocle_apptrace/metamodel/spans/span_example.json @@ -16,7 +16,7 @@ "span.type": "Retrieval", "entity.count": 2, "entity.1.name": "ChromaVectorStore", - "entity.1.type": "VectorDB.Chroma", + "entity.1.type": "VectorStore.Chroma", "entity.1.embedding-model-name": "BAAI/bge-small-en-v1.5", "entity.2.name": "BAAI/bge-small-en-v1.5", "entity.2.type": "Model.embedding", @@ -72,8 +72,7 @@ "entity.2.name": "gpt-35-turbo", "entity.2.type": "Model.LLM", - "entity.2.model_name": "gpt-35-turbo", - "entity.2.temperature": 0.1 + "entity.2.model_name": "gpt-35-turbo" }, "events": [ { @@ -94,6 +93,7 @@ "name": "metadata", "timestamp": "timestamp", "attributes": { + "temperature": 0.1, "completion_tokens": 52, "prompt_tokens": 233, "total_tokens": 285 From b98f14124b9e2bea8db86738d8af62831f2e8866 Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Wed, 25 Sep 2024 23:51:22 -0700 Subject: [PATCH 10/11] Addressed review feedback Signed-off-by: Prasad Mujumdar --- .../metamodel/entities/README.md | 50 ++++++++--------- .../metamodel/entities/entity_types.py | 54 +++++++++---------- .../metamodel/spans/span_example.json | 14 ++--- 3 files changed, 59 insertions(+), 59 deletions(-) diff --git a/src/monocle_apptrace/metamodel/entities/README.md b/src/monocle_apptrace/metamodel/entities/README.md index 55c7c22..7d228f2 100644 --- a/src/monocle_apptrace/metamodel/entities/README.md +++ b/src/monocle_apptrace/metamodel/entities/README.md @@ -4,40 +4,40 @@ The entity type defines the type of GenAI component that Monocle understand. The ## Entity Types ### MonocleEntity.Workflow Workflow ie the core application code. Supported types are - -- Generic -- Langchain -- LlamaIndex -- Haystack +- generic +- langchain +- llama_i_ndex +- haystack ### MonocleEntity.Model GenAI models. Supported types are - -- Generic -- LLM -- Embedding +- generic +- llm +- embedding ### MonocleEntity.AppHosting Application host services where the workflow code is run. Supported types are - -- Generic -- AWS_lambda -- AWS_sagemaker -- Azure_func -- Github_codespace -- Azure_mlw +- generic +- aws_lambda +- aws_sagemaker +- azure_func +- github_codespace +- azure_mlw ### MonocleEntity.Inference The model hosting infrastructure services. Supported types are - -- Generic -- NVIDIA_triton -- OpenAI -- Azure_oai -- AWS_sagemaker -- AWS_bedrock -- HuggingFace +- generic +- nvidia_triton +- openai +- azure_oai +- aws_sagemaker +- aws_bedrock +- hugging_face ### MonocleEntity.VectorStore Vector search data stores. Supported types are - -- Generic -- Chroma -- AWS_es -- Milvus -- Pinecone +- generic +- chroma +- aws_es +- milvus +- pinecone diff --git a/src/monocle_apptrace/metamodel/entities/entity_types.py b/src/monocle_apptrace/metamodel/entities/entity_types.py index c8abece..d0af618 100644 --- a/src/monocle_apptrace/metamodel/entities/entity_types.py +++ b/src/monocle_apptrace/metamodel/entities/entity_types.py @@ -6,46 +6,46 @@ class MonocleEntity(enum): # Supported Workflow/language frameworks class Workflow(enum): - Generic = 0 - Langchain = 1 - LlamaIndex = 2 - Haystack = 3 + generic = 0 + langchain = 1 + llama_index = 2 + haystack = 3 # Supported model types class Model(enum): - Generic = 0 - LLM = 1 - Embedding = 2 + generic = 0 + llm = 1 + embedding = 2 # Support Vector databases class VectorStore(enum): - Generic = 0 - Chroma = 1 - AWS_es = 2 + generic = 0 + chroma = 1 + aws_es = 2 Milvus = 3 Pinecone = 4 # Support application hosting frameworks class AppHosting(enum): - Generic = 0 - AWS_lambda = 1 - AWS_sagemaker = 2 - Azure_func = 3 - Github_codespace = 4 - Azure_mlw = 5 + generic = 0 + aws_lambda = 1 + aws_sagemaker = 2 + azure_func = 3 + github_codespace = 4 + azure_mlw = 5 # Supported inference infra/services class Inference(enum): - Generic = 0 - NVIDIA_triton = 1 - OpenAI = 2 - Azure_oai = 3 - AWS_sagemaker = 4 - AWS_bedrock = 5 - HuggingFace = 6 + generic = 0 + nvidia_triton = 1 + openai = 2 + azure_oai = 3 + aws_sagemaker = 4 + aws_bedrock = 5 + hugging_face = 6 class SpanType(enum): - Internal = 0 - Retrieval = 2 - Inference = 3 - Workflow = 4 \ No newline at end of file + internal = 0 + retrieval = 2 + inference = 3 + workflow = 4 \ No newline at end of file diff --git a/src/monocle_apptrace/metamodel/spans/span_example.json b/src/monocle_apptrace/metamodel/spans/span_example.json index 244c373..4a4dced 100644 --- a/src/monocle_apptrace/metamodel/spans/span_example.json +++ b/src/monocle_apptrace/metamodel/spans/span_example.json @@ -16,10 +16,10 @@ "span.type": "Retrieval", "entity.count": 2, "entity.1.name": "ChromaVectorStore", - "entity.1.type": "VectorStore.Chroma", + "entity.1.type": "vectorstore.chroma", "entity.1.embedding-model-name": "BAAI/bge-small-en-v1.5", "entity.2.name": "BAAI/bge-small-en-v1.5", - "entity.2.type": "Model.embedding", + "entity.2.type": "model.embedding", "entity.2.model_name": "BAAI/bge-small-en-v1.5" }, "events": [ @@ -62,16 +62,16 @@ "status_code": "OK" }, "attributes": { - "span.type": "Inference", + "span.type": "inference", "entity.count": 2, "entity.1.name": "AzureOpenAI", - "entity.1.type": "Inference.Azure_oai", + "entity.1.type": "inference.azure_oai", "entity.1.provider_name": "openai.azure.com", "entity.1.deployment": "kshitiz-gpt", "entity.1.inference_endpoint": "https://okahu-openai-dev.openai.azure.com/", "entity.2.name": "gpt-35-turbo", - "entity.2.type": "Model.LLM", + "entity.2.type": "model.llm", "entity.2.model_name": "gpt-35-turbo" }, "events": [ @@ -123,10 +123,10 @@ "status_code": "OK" }, "attributes": { - "span.type": "Workflow", + "span.type": "workflow", "entity.count": 1, "entity.1.name": "coffee-bot", - "entity.1.type": "Workflow.LlamaIndex" + "entity.1.type": "workflow.llama_index" }, "events": [ ], From a824354f366f0261cda109fd19189ef6f5260944 Mon Sep 17 00:00:00 2001 From: Prasad Mujumdar Date: Thu, 26 Sep 2024 12:40:43 -0700 Subject: [PATCH 11/11] Added entity json and updated readmes Signed-off-by: Prasad Mujumdar --- .../metamodel/entities/README.md | 13 +- .../metamodel/entities/entity_types.json | 157 ++++++++++++++++++ .../metamodel/spans/README.md | 2 +- 3 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 src/monocle_apptrace/metamodel/entities/entity_types.json diff --git a/src/monocle_apptrace/metamodel/entities/README.md b/src/monocle_apptrace/metamodel/entities/README.md index 7d228f2..a2a53fa 100644 --- a/src/monocle_apptrace/metamodel/entities/README.md +++ b/src/monocle_apptrace/metamodel/entities/README.md @@ -2,11 +2,17 @@ The entity type defines the type of GenAI component that Monocle understand. The monocle instrumentation can extract the relevenat information for this entity. There are a fixed set of [entity types](./entity_types.py) that are defined by Monocle out of the box, eg workflow, model etc. As the GenAI landscape evolves, the Monocle community will introduce a new entity type if the current entities won't represent a new technology component. ## Entity Types +Following attributes are supported for all entities +| Name | Description | Required | +| - | - | - | +| name | Entity name generated by Monocle | Required | +| type | Monocle Entity type | True | + ### MonocleEntity.Workflow Workflow ie the core application code. Supported types are - - generic - langchain -- llama_i_ndex +- llama_index - haystack ### MonocleEntity.Model @@ -14,6 +20,11 @@ GenAI models. Supported types are - - generic - llm - embedding +Following attributes are supported for all model type entities +| Name | Description | Required | +| - | - | - | +| model_name | Name of model | True | + ### MonocleEntity.AppHosting Application host services where the workflow code is run. Supported types are - diff --git a/src/monocle_apptrace/metamodel/entities/entity_types.json b/src/monocle_apptrace/metamodel/entities/entity_types.json new file mode 100644 index 0000000..aadab96 --- /dev/null +++ b/src/monocle_apptrace/metamodel/entities/entity_types.json @@ -0,0 +1,157 @@ +{ + "description": "Monocle entities represents kinds GenAI technology components and their implementations supported by Monocle", + "monocle_entities": [ + { + "attributes" : [ + { + "attribute_name": "name", + "attribute_description": "Monocle entity name", + "required": true + }, + { + "attribute_name": "type", + "attribute_description": "Monocle entity type", + "required": true + } + ], + "entities": [ + { + "name": "workflow", + "attributes" : [], + "types": [ + { + "type": "llama_index", + "attributes" : [] + }, + { + "type": "langchain", + "attributes" : [] + }, + { + "type": "haystack", + "attributes" : [] + }, + { + "type": "generic", + "attributes" : [] + } + ] + }, + { + "name": "model", + "attributes" : [ + { + "attribute_name": "model_name", + "attribute_description": "Model name", + "required": true + } + ], + "types": [ + { + "type": "llm", + "attributes" : [] + }, + { + "type": "embedding", + "attributes" : [] + }, + { + "type": "generic", + "attributes" : [] + } + ] + }, + { + "name": "vector_store", + "attributes" : [], + "types": [ + { + "type": "chroma", + "attributes" : [] + }, + { + "type": "aws_es", + "attributes" : [] + }, + { + "type": "milvus", + "attributes" : [] + }, + { + "type": "pinecone", + "attributes" : [] + }, + { + "type": "generic", + "attributes" : [] + } + ] + }, + { + "name": "app_hosting", + "attributes" : [], + "types": [ + { + "type": "aws_lambda", + "attributes" : [] + }, + { + "type": "aws_sagemaker", + "attributes" : [] + }, + { + "type": "azure_func", + "attributes" : [] + }, + { + "type": "azure_mlw", + "attributes" : [] + }, + { + "type": "github_codespace", + "attributes" : [] + }, + { + "type": "generic", + "attributes" : [] + } + ] + }, + { + "name": "inference", + "attributes" : [], + "types": [ + { + "type": "aws_sagemaker", + "attributes" : [] + }, + { + "type": "aws_bedrock", + "attributes" : [] + }, + { + "type": "azure_oai", + "attributes" : [] + }, + { + "type": "openai", + "attributes" : [] + }, + { + "type": "nvidia_triton", + "attributes" : [] + }, + { + "type": "hugging_face", + "attributes" : [] + }, + { + "type": "generic", + "attributes" : [] + } + ] + } + ] + } + ] +} diff --git a/src/monocle_apptrace/metamodel/spans/README.md b/src/monocle_apptrace/metamodel/spans/README.md index 4fdb073..d18db5f 100644 --- a/src/monocle_apptrace/metamodel/spans/README.md +++ b/src/monocle_apptrace/metamodel/spans/README.md @@ -94,7 +94,7 @@ The event section includes the input, output and metadata generated by that span ] ``` -### Span types and events +## Span types and events The ```span.type``` captured in ```attributes``` section of the span dectates the format of the ```events``` ### SpanType.Retrieval | Name | Description | Values | Required |