Add openai sync instrumentation.

k2io · Oct 10, 2023 · dd29433 · dd29433
1 parent 2834663
commit dd29433
Show file tree

Hide file tree

Showing 4 changed files with 318 additions and 4 deletions.
diff --git a/newrelic/config.py b/newrelic/config.py
@@ -2037,6 +2037,16 @@ def _process_trace_cache_import_hooks():
 
 
 def _process_module_builtin_defaults():
+    _process_module_definition(
+        "openai.api_resources.chat_completion",
+        "newrelic.hooks.mlmodel_openai",
+        "instrument_openai_api_resources_chat_completion",
+    )
+    _process_module_definition(
+        "openai.util",
+        "newrelic.hooks.mlmodel_openai",
+        "instrument_openai_util",
+    )
     _process_module_definition(
         "asyncio.base_events",
         "newrelic.hooks.coroutines_asyncio",

diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
@@ -0,0 +1,138 @@
+
+# Copyright 2010 New Relic, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import openai
+import uuid
+from newrelic.api.function_trace import FunctionTrace
+from newrelic.common.object_wrapper import wrap_function_wrapper
+from newrelic.api.transaction import current_transaction
+from newrelic.api.time_trace import get_trace_linking_metadata
+from newrelic.core.config import global_settings
+from newrelic.common.object_names import callable_name
+from newrelic.core.attribute import MAX_LOG_MESSAGE_LENGTH
+
+
+def wrap_chat_completion_create(wrapped, instance, args, kwargs):
+    transaction = current_transaction()
+
+    if not transaction:
+        return
+
+    ft_name = callable_name(wrapped)
+    with FunctionTrace(ft_name) as ft:
+        response = wrapped(*args, **kwargs)
+
+    if not response:
+        return
+
+    custom_attrs_dict = transaction._custom_params
+    conversation_id = custom_attrs_dict["conversation_id"] if "conversation_id" in custom_attrs_dict.keys() else str(uuid.uuid4())
+
+    chat_completion_id = str(uuid.uuid4())
+    available_metadata = get_trace_linking_metadata()
+    span_id = available_metadata.get("span.id", "")
+    trace_id = available_metadata.get("trace.id", "")
+
+    response_headers = getattr(response, "_nr_response_headers")
+    response_model = response.model
+    settings = transaction.settings if transaction.settings is not None else global_settings()
+
+    chat_completion_summary_dict = {
+        "id": chat_completion_id,
+        "appName": settings.app_name,
+        "conversation_id": conversation_id,
+        "span_id": span_id,
+        "trace_id": trace_id,
+        "transaction_id": transaction._transaction_id,
+        "api_key_last_four_digits": f"sk-{response.api_key[-4:]}",
+        "response_time": ft.duration,
+        "request.model": kwargs.get("model") or kwargs.get("engine"),
+        "response.model": response_model,
+        "response.organization":  response.organization,
+        "response.usage.completion_tokens": response.usage.completion_tokens,
+        "response.usage.total_tokens": response.usage.total_tokens,
+        "response.usage.prompt_tokens": response.usage.prompt_tokens,
+        "request.temperature": kwargs.get("temperature"),
+        "request.max_tokens": kwargs.get("max_tokens"),
+        "response.choices.finish_reason": response.choices[0].finish_reason,
+        "response.api_type": response.api_type,
+        "response.headers.llmVersion": response_headers.get("openai-version"),
+        "response.headers.ratelimitLimitRequests": check_rate_limit_header(response_headers, "x-ratelimit-limit-requests", True),
+        "response.headers.ratelimitLimitTokens": check_rate_limit_header(response_headers, "x-ratelimit-limit-tokens", True),
+        "response.headers.ratelimitResetTokens": check_rate_limit_header(response_headers, "x-ratelimit-reset-tokens", False),
+        "response.headers.ratelimitResetRequests": check_rate_limit_header(response_headers, "x-ratelimit-reset-requests", False),
+        "response.headers.ratelimitRemainingTokens": check_rate_limit_header(response_headers, "x-ratelimit-remaining-tokens", True),
+        "response.headers.ratelimitRemainingRequests": check_rate_limit_header(response_headers, "x-ratelimit-remaining-requests", True),
+        "vendor": "openAI",
+        "ingest_source": "Python",
+        "number_of_messages": len(kwargs.get("messages", [])) + len(response.choices),
+        "api_version": response_headers.get("openai-version")
+    }
+
+    transaction.record_ml_event("LlmChatCompletionSummary", chat_completion_summary_dict)
+    message_list = list(kwargs.get("messages", [])) + [response.choices[0].message]
+
+    create_chat_completion_message_event(transaction, message_list, chat_completion_id, span_id, trace_id, response_model)
+
+    return response
+
+
+def check_rate_limit_header(response_headers, header_name, is_int):
+    if header_name in response_headers:
+        header_value = response_headers.get(header_name)
+        if is_int:
+            header_value = int(header_value)
+        return header_value
+    else:
+        return None
+
+
+def create_chat_completion_message_event(transaction, message_list, chat_completion_id, span_id, trace_id, response_model):
+    if not transaction:
+        return
+
+    for index, message in enumerate(message_list):
+        chat_completion_message_dict = {
+            "id": str(uuid.uuid4()),
+            "span_id": span_id,
+            "trace_id": trace_id,
+            "transaction_id": transaction._transaction_id,
+            "content": message.get("content", "")[:MAX_LOG_MESSAGE_LENGTH],
+            "role": message.get("role"),
+            "completion_id": chat_completion_id,
+            "sequence": index,
+            "model": response_model,
+            "vendor": "openAI",
+            "ingest_source": "Python",
+        }
+        transaction.record_ml_event("LlmChatCompletionMessage", chat_completion_message_dict)
+
+
+def wrap_convert_to_openai_object(wrapped, instance, args, kwargs):
+    resp = args[0]
+    returned_response = wrapped(*args, **kwargs)
+
+    if isinstance(resp, openai.openai_response.OpenAIResponse):
+        setattr(returned_response, "_nr_response_headers", getattr(resp, "_headers", {}))
+
+    return returned_response
+
+
+def instrument_openai_api_resources_chat_completion(module):
+    if hasattr(module.ChatCompletion, "create"):
+        wrap_function_wrapper(module, "ChatCompletion.create", wrap_chat_completion_create)
+
+
+def instrument_openai_util(module):
+    wrap_function_wrapper(module, "convert_to_openai_object", wrap_convert_to_openai_object)
diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py
@@ -36,8 +36,11 @@
     "transaction_tracer.stack_trace_threshold": 0.0,
     "debug.log_data_collector_payloads": True,
     "debug.record_transaction_failure": True,
-    "ml_insights_event.enabled": True,
+    "machine_learning.enabled": True,
+    #"machine_learning.inference_events_value.enabled": True,
+    "ml_insights_events.enabled": True
 }
+
 collector_agent_registration = collector_agent_registration_fixture(
     app_name="Python Agent Test (mlmodel_openai)",
     default_settings=_default_settings,

diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
@@ -13,19 +13,182 @@
 # limitations under the License.
 
 import openai
+import pytest
+from testing_support.fixtures import (  # function_not_called,; override_application_settings,
+    function_not_called,
+    override_application_settings,
+    reset_core_stats_engine,
+)
+
+from newrelic.api.time_trace import current_trace
+from newrelic.api.transaction import current_transaction, add_custom_attribute
+from testing_support.validators.validate_ml_event_count import validate_ml_event_count
+from testing_support.validators.validate_ml_event_payload import (
+    validate_ml_event_payload,
+)
+from testing_support.validators.validate_ml_events import validate_ml_events
+from testing_support.validators.validate_ml_events_outside_transaction import (
+    validate_ml_events_outside_transaction,
+)
+
+import newrelic.core.otlp_utils
+from newrelic.api.application import application_instance as application
+from newrelic.api.background_task import background_task
+from newrelic.api.transaction import record_ml_event
+from newrelic.core.config import global_settings
+from newrelic.packages import six
 
 _test_openai_chat_completion_sync_messages = (
     {"role": "system", "content": "You are a scientist."},
-    {"role": "user", "content": "What is the boiling point of water?"},
-    {"role": "assistant", "content": "The boiling point of water is 212 degrees Fahrenheit."},
+    #{"role": "user", "content": "What is the boiling point of water?"},
+    #{"role": "assistant", "content": "The boiling point of water is 212 degrees Fahrenheit."},
     {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"},
 )
 
 
-def test_openai_chat_completion_sync():
+def set_trace_info():
+    txn = current_transaction()
+    if txn:
+        txn._trace_id = "trace-id"
+    trace = current_trace()
+    if trace:
+        trace.guid = "span-id"
+
+
+sync_chat_completion_recorded_events = [
+    (
+        {'type': 'LlmChatCompletionSummary'},
+        {
+            'id': None, # UUID that varies with each run
+            'appName': 'Python Agent Test (mlmodel_openai)',
+            'conversation_id': 'my-awesome-id',
+            'transaction_id': None,
+            'span_id': "span-id",
+            'trace_id': "trace-id",
+            'api_key_last_four_digits': 'sk-CRET',
+            'response_time': None, # Response time varies each test run
+            'request.model': 'gpt-3.5-turbo',
+            'response.model': 'gpt-3.5-turbo-0613',
+            'response.organization': 'new-relic-nkmd8b',
+            'response.usage.completion_tokens': 11,
+            'response.usage.total_tokens': 64,
+            'response.usage.prompt_tokens': 53,
+            'request.temperature': 0.7,
+            'request.max_tokens': 100,
+            'response.choices.finish_reason': 'stop',
+            'response.api_type': 'None',
+            'response.headers.llmVersion': '2020-10-01',
+            'response.headers.ratelimitLimitRequests': 200,
+            'response.headers.ratelimitLimitTokens': 40000,
+            'response.headers.ratelimitResetTokens': "90ms",
+            'response.headers.ratelimitResetRequests': "7m12s",
+            'response.headers.ratelimitRemainingTokens': 39940,
+            'response.headers.ratelimitRemainingRequests': 199,
+            'vendor': 'openAI',
+            'ingest_source': 'Python',
+            'number_of_messages': 3,
+            'api_version': '2020-10-01'
+        },
+    ),
+    (
+        {'type': 'LlmChatCompletionMessage'},
+        {
+            'id': None,
+            'span_id': "span-id",
+            'trace_id': "trace-id",
+            'transaction_id': None,
+            'content': 'You are a scientist.',
+            'role': 'system',
+            'completion_id': None,
+            'sequence': 0,
+            'model': 'gpt-3.5-turbo-0613',
+            'vendor': 'openAI',
+            'ingest_source': 'Python'
+        },
+    ),
+    (
+        {'type': 'LlmChatCompletionMessage'},
+        {
+            'id': None,
+            'span_id': "span-id",
+            'trace_id': "trace-id",
+            'transaction_id': None,
+            'content': 'What is 212 degrees Fahrenheit converted to Celsius?',
+            'role': 'user',
+            'completion_id': None,
+            'sequence': 1,
+            'model': 'gpt-3.5-turbo-0613',
+            'vendor': 'openAI',
+            'ingest_source': 'Python'
+        },
+    ),
+    (
+        {'type': 'LlmChatCompletionMessage'},
+        {
+            'id': None,
+            'span_id': "span-id",
+            'trace_id': "trace-id",
+            'transaction_id': None,
+            'content': '212 degrees Fahrenheit is equal to 100 degrees Celsius.',
+            'role': 'assistant',
+            'completion_id': None,
+            'sequence': 2,
+            'model': 'gpt-3.5-turbo-0613',
+            'vendor': 'openAI',
+            'ingest_source': 'Python'
+        }
+    ),
+]
+
+
+@reset_core_stats_engine()
+@validate_ml_events(sync_chat_completion_recorded_events)
+# One summary event, one system message, one user message, and one response message from the assistant
+@validate_ml_event_count(count=4)
+@background_task()
+def test_openai_chat_completion_sync_in_txn():
+    set_trace_info()
+    add_custom_attribute("conversation_id", "my-awesome-id")
+    openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=_test_openai_chat_completion_sync_messages,
+        temperature=0.7,
+        max_tokens=100
+    )
+
+
+@reset_core_stats_engine()
+# One summary event, one system message, one user message, and one response message from the assistant
+@validate_ml_event_count(count=0)
+def test_openai_chat_completion_sync_outside_txn():
+    set_trace_info()
+    add_custom_attribute("conversation_id", "my-awesome-id")
+    openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=_test_openai_chat_completion_sync_messages,
+        temperature=0.7,
+        max_tokens=100
+    )
+
+
+disabled_ml_settings = {
+    "machine_learning.enabled": False,
+    "machine_learning.inference_events_value.enabled": False,
+    "ml_insights_events.enabled": False
+}
+
+
+@override_application_settings(disabled_ml_settings)
+@reset_core_stats_engine()
+# One summary event, one system message, one user message, and one response message from the assistant
+@validate_ml_event_count(count=0)
+def test_openai_chat_completion_sync_disabled_settings():
+    set_trace_info()
     openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=_test_openai_chat_completion_sync_messages,
+        temperature=0.7,
+        max_tokens=100
     )