Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(0.8.2) KM Agent changes; OpenAI assistant service changes; #1809

Open
wants to merge 3 commits into
base: release/0.8.2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/breaking-changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The following settings are required:
Name | Default value
--- | ---
`FoundationaLLM:APIEndpoints:CoreAPI:Configuration:AllowedUploadFileExtensions` | `c, cpp, cs, css, csv, doc, docx, git, html, java, jpeg, jpg, js, json, md, pdf, php, png, pptx, py, rb, sh, tar, tex, ts, txt, xlsx, xml, zip`
`FoundationaLLM:APIEndpoints:CoreAPI:Configuration:AzureOpenAIAssistantsCodeInterpreterFileExtensions` | `c, cpp, cs, css, csv, doc, docx, git, html, java, jpeg, jpg, js, json, md, pdf, php, png, pptx, py, rb, sh, tar, tex, ts, txt, xlsx, xml, zip`
`FoundationaLLM:APIEndpoints:CoreAPI:Configuration:AzureOpenAIAssistantsFileSearchFileExtensions` | `c, cpp, cs, css, doc, docx, html, java, js, json, md, pdf, php, pptx, py, rb, sh, tex, ts, txt`

The following settings are optional (they should not be set by default):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,14 @@ await CreateResources<AssistantUserContext, FileUserContext>(
newOpenAIAssistantThreadId = ((JsonElement)newOpenAIAssistantThreadIdObject!).Deserialize<string>();

result.TryGetValue(OpenAIAgentCapabilityParameterNames.AssistantVectorStoreId, out var newOpenAIAssistantVectorStoreIdObject);
newOpenAIAssistantVectorStoreId = ((JsonElement)newOpenAIAssistantVectorStoreIdObject!).Deserialize<string>();
if (newOpenAIAssistantVectorStoreIdObject != null)
{
newOpenAIAssistantVectorStoreId = ((JsonElement)newOpenAIAssistantVectorStoreIdObject!).Deserialize<string>();
incompleteConversations[0].OpenAIVectorStoreId = newOpenAIAssistantVectorStoreId;
}

incompleteConversations[0].OpenAIThreadId = newOpenAIAssistantThreadId;
incompleteConversations[0].OpenAIThreadCreatedOn = DateTimeOffset.UtcNow;
incompleteConversations[0].OpenAIVectorStoreId = newOpenAIAssistantVectorStoreId;

#region Ensure that only one thread can update the assistant user context at a time.

Expand Down
8 changes: 8 additions & 0 deletions src/dotnet/Common/Constants/Data/AppConfiguration.json
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,14 @@
"content_type": "",
"first_version": "0.8.0"
},
{
"name": "AzureOpenAIAssistantsCodeInterpreterFileExtensions",
"description": "The comma-separated list file extensions that are supported by the Azure OpenAI Assistants code interpreter tool.",
"secret": "",
"value": "c, cpp, cs, css, csv, doc, docx, git, html, java, jpeg, jpg, js, json, md, pdf, php, png, pptx, py, rb, sh, tar, tex, ts, txt, xlsx, xml, zip",
"content_type": "",
"first_version": "0.8.0"
},
{
"name": "AllowedUploadFileExtensions",
"description": "The comma-separated list file extensions that users are allowed to upload to a conversation.",
Expand Down
55 changes: 42 additions & 13 deletions src/dotnet/Gateway/Services/GatewayCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
using OpenAI.Assistants;
using OpenAI.Files;
using OpenAI.VectorStores;
using System.ClientModel;
using System.Collections.Concurrent;
using System.Text.Json;

Expand Down Expand Up @@ -275,21 +276,12 @@ private async Task<Dictionary<string, object>> CreateOpenAIAgentCapability(strin
}
});

var threadResult = await assistantClient.CreateThreadAsync(new ThreadCreationOptions
{
ToolResources = new ToolResources()
{
FileSearch = new FileSearchToolResources()
{
VectorStoreIds = [vectorStoreResult.Value.Id]
}
}
});
//create but dont attach the vector store
var threadResult = await assistantClient.CreateThreadAsync(new ThreadCreationOptions{});
var thread = threadResult.Value;
var vectorStore = vectorStoreResult.Value;


result[OpenAIAgentCapabilityParameterNames.AssistantThreadId] = thread.Id;
result[OpenAIAgentCapabilityParameterNames.AssistantVectorStoreId] = vectorStore.Id;
result[OpenAIAgentCapabilityParameterNames.AssistantVectorStoreId] = vectorStoreResult.Value.Id;
}

var fileId = GetParameterValue<string>(parameters, OpenAIAgentCapabilityParameterNames.AssistantFileId, string.Empty);
Expand All @@ -315,6 +307,43 @@ private async Task<Dictionary<string, object>> CreateOpenAIAgentCapability(strin
var vectorStoreClient = GetAzureOpenAIVectorStoreClient(azureOpenAIAccount.Endpoint);
var vectorStoreId = GetRequiredParameterValue<string>(parameters, OpenAIAgentCapabilityParameterNames.AssistantVectorStoreId);

ClientResult<VectorStore> vectorStoreResult = null;

//try get vector store
if (!string.IsNullOrEmpty(vectorStoreId))
vectorStoreResult = await vectorStoreClient.GetVectorStoreAsync(vectorStoreId);
else
{
vectorStoreResult = await vectorStoreClient.CreateVectorStoreAsync(new VectorStoreCreationOptions
{
ExpirationPolicy = new VectorStoreExpirationPolicy
{
Anchor = VectorStoreExpirationAnchor.LastActiveAt,
Days = 365
}
});

result[OpenAIAgentCapabilityParameterNames.AssistantVectorStoreId] = vectorStoreResult.Value.Id;
}

var assistantClient = GetAzureOpenAIAssistantClient(azureOpenAIAccount.Endpoint);
var threadId = GetRequiredParameterValue<string>(parameters, OpenAIAgentCapabilityParameterNames.AssistantThreadId);
var thread = await assistantClient.GetThreadAsync(threadId);

if (thread.Value.ToolResources.FileSearch == null || thread.Value.ToolResources.FileSearch?.VectorStoreIds.Count == 0)
{
var threadResult = await assistantClient.ModifyThreadAsync(thread, new ThreadModificationOptions
{
ToolResources = new ToolResources()
{
FileSearch = new FileSearchToolResources()
{
VectorStoreIds = [vectorStoreResult.Value.Id]
}
}
});
}

var vectorizationResult = await vectorStoreClient.AddFileToVectorStoreAsync(vectorStoreId, fileId);

var startTime = DateTimeOffset.UtcNow;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class KnowledgeManagementOrchestration(
IHttpClientFactoryService httpClientFactoryService,
Dictionary<string, IResourceProviderService> resourceProviderServices,
bool dataSourceAccessDenied,
string? openAIThreadId,
string? openAIVectorStoreId) : OrchestrationBase(orchestrationService)
{
private readonly string _instanceId = instanceId;
Expand All @@ -57,6 +58,7 @@ public class KnowledgeManagementOrchestration(
private readonly IResourceProviderService _azureOpenAIResourceProvider =
resourceProviderServices[ResourceProviderNames.FoundationaLLM_AzureOpenAI];
private readonly string? _openAIVectorStoreId = openAIVectorStoreId;
private readonly string? _openAIThreadId = openAIThreadId;
private GatewayServiceClient _gatewayClient;

/// <inheritdoc/>
Expand Down Expand Up @@ -155,6 +157,7 @@ private async Task<List<AttachmentProperties>> GetAttachmentPaths(List<string> a
{ OpenAIAgentCapabilityParameterNames.Endpoint, fileUserContext.Endpoint },
{ OpenAIAgentCapabilityParameterNames.AddAssistantFileToVectorStore, fileMapping.RequiresVectorization },
{ OpenAIAgentCapabilityParameterNames.AssistantVectorStoreId, _openAIVectorStoreId ?? string.Empty },
{ OpenAIAgentCapabilityParameterNames.AssistantThreadId, _openAIThreadId ?? string.Empty },
{ OpenAIAgentCapabilityParameterNames.AssistantFileId, fileMapping.OpenAIFileId! }
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ public class OrchestrationBuilder
callContext.CurrentUserIdentity!,
logger);

var threadId = result.ExplodedObjects?[CompletionRequestObjectsKeys.OpenAIAssistantThreadId] as string;

if (result.Agent.AgentType == typeof(KnowledgeManagementAgent) || result.Agent.AgentType == typeof(AudioClassificationAgent))
{
var orchestrationName = string.IsNullOrWhiteSpace(result.Agent.OrchestrationSettings?.Orchestrator)
Expand All @@ -95,6 +97,7 @@ public class OrchestrationBuilder
serviceProvider.GetRequiredService<IHttpClientFactoryService>(),
resourceProviderServices,
result.DataSourceAccessDenied,
threadId,
vectorStoreId);

return kmOrchestration;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,23 @@ def _get_document_retriever(
text_embedding_profile = AzureOpenAIEmbeddingProfile.from_object(
request.objects[agent.vectorization.text_embedding_profile_object_id]
)

# text_embedding_profile has the embedding model name in Settings.
text_embedding_model_name = text_embedding_profile.settings.get(EmbeddingProfileSettingsKeys.MODEL_NAME)

# objects dictionary has the gateway API endpoint configuration.
gateway_endpoint_configuration = APIEndpointConfiguration.from_object(
request.objects[CompletionRequestObjectKeys.GATEWAY_API_ENDPOINT_CONFIGURATION]
)

gateway_embedding_service = GatewayTextEmbeddingService(
instance_id= self.instance_id,
user_identity=self.user_identity,
gateway_api_endpoint_configuration=gateway_endpoint_configuration,
model_name = text_embedding_model_name,
config=self.config
)

# array of objects containing the indexing profile and associated endpoint configuration
index_configurations = []

Expand All @@ -82,17 +82,17 @@ def _get_document_retriever(
indexing_profile = AzureAISearchIndexingProfile.from_object(
request.objects[profile_id]
)
# indexing profile has indexing_api_endpoint_configuration_object_id in Settings.
# indexing profile has indexing_api_endpoint_configuration_object_id in Settings.
indexing_api_endpoint_configuration = APIEndpointConfiguration.from_object(
request.objects[indexing_profile.settings.api_endpoint_configuration_object_id]
)

index_configurations.append(
KnowledgeManagementIndexConfiguration(
indexing_profile = indexing_profile,
api_endpoint_configuration = indexing_api_endpoint_configuration
)
)
)

retriever_factory = RetrieverFactory(
index_configurations=index_configurations,
Expand Down Expand Up @@ -283,7 +283,7 @@ def invoke(self, request: KnowledgeManagementCompletionRequest) -> CompletionRes
if "OpenAI.Assistants" in agent.capabilities:
operation_type_override = OperationTypes.ASSISTANTS_API
# create the service
assistant_svc = OpenAIAssistantsApiService(azure_openai_client=self._get_language_model(override_operation_type=operation_type_override, is_async=False))
assistant_svc = OpenAIAssistantsApiService(azure_openai_client=self._get_language_model(override_operation_type=operation_type_override, is_async=False),config=self.config)

# populate service request object
assistant_req = OpenAIAssistantsAPIRequest(
Expand All @@ -310,6 +310,21 @@ def invoke(self, request: KnowledgeManagementCompletionRequest) -> CompletionRes
attachments = []
)

for attachment in request.attachments:

if attachment.content_type.startswith('image/'):
continue

file_attachment = assistant_svc.create_file_attachment(attachment)

# Add user message
assistant_svc.add_thread_message(
thread_id = assistant_req.thread_id,
role = "user",
content = f"Summarize the attached {attachment.original_file_name} file.",
attachments = [file_attachment],
)

# Add user and assistant messages related to audio classification to the Assistants API request.
if audio_analysis_results is not None:
audio_analysis_context = ''
Expand Down Expand Up @@ -378,7 +393,7 @@ def invoke(self, request: KnowledgeManagementCompletionRequest) -> CompletionRes
label = prediction['label']
external_analysis_context += f'- {label}' + '\n'
external_analysis_context += '\n'

chain_context = { "context": lambda x: external_analysis_context, "question": RunnablePassthrough() }
else:
chain_context = { "context": RunnablePassthrough() }
Expand Down Expand Up @@ -458,7 +473,7 @@ async def ainvoke(self, request: KnowledgeManagementCompletionRequest) -> Comple
if "OpenAI.Assistants" in agent.capabilities:
operation_type_override = OperationTypes.ASSISTANTS_API
# create the service
assistant_svc = OpenAIAssistantsApiService(azure_openai_client=self._get_language_model(override_operation_type=operation_type_override, is_async=True))
assistant_svc = OpenAIAssistantsApiService(azure_openai_client=self._get_language_model(override_operation_type=operation_type_override, is_async=True),config=self.config)

# populate service request object
assistant_req = OpenAIAssistantsAPIRequest(
Expand All @@ -485,6 +500,20 @@ async def ainvoke(self, request: KnowledgeManagementCompletionRequest) -> Comple
attachments = []
)

for attachment in request.attachments:
if attachment.content_type.startswith('image/'):
continue

file_attachment = assistant_svc.create_file_attachment(attachment)

# Add user message
await assistant_svc.aadd_thread_message(
thread_id = assistant_req.thread_id,
role = "user",
content = f"Summarize the attached {attachment.original_file_name} file.",
attachments = [file_attachment],
)

# Add user and assistant messages related to audio classification to the Assistants API request.
if audio_analysis_results is not None:
audio_analysis_context = ''
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Class: OpenAIAssistantsApiService
Description: Integration with the OpenAI Assistants API.
"""
import os
from typing import List, Union
from openai import AsyncAzureOpenAI, AzureOpenAI
from openai.pagination import AsyncCursorPage, SyncCursorPage
Expand All @@ -27,13 +28,18 @@
AnalysisResult
)
from foundationallm.models.services import OpenAIAssistantsAPIRequest, OpenAIAssistantsAPIResponse
from foundationallm.models.attachments import AttachmentProperties
from foundationallm.config import Configuration

CODE_INTERPRETER_FILE_TYPES_CONFIG_NAME = "FoundationaLLM:APIEndpoints:CoreAPI:Configuration:AzureOpenAIAssistantsCodeInterpreterFileExtensions"
FILE_SEARCH_FILE_TYPES_CONFIG_NAME = "FoundationaLLM:APIEndpoints:CoreAPI:Configuration:AzureOpenAIAssistantsFileSearchFileExtensions"

class OpenAIAssistantsApiService:
"""
Integration with the OpenAI Assistants API.
"""

def __init__(self, azure_openai_client: Union[AzureOpenAI, AsyncAzureOpenAI]):
def __init__(self, azure_openai_client: Union[AzureOpenAI, AsyncAzureOpenAI], config : Configuration):
"""
Initializes an OpenAI Assistants API service.

Expand All @@ -45,6 +51,37 @@ def __init__(self, azure_openai_client: Union[AzureOpenAI, AsyncAzureOpenAI]):
"""
self.client = azure_openai_client

#split string and trim whitespace
self.file_tool_file_types = [x.strip() for x in config.get_value(CODE_INTERPRETER_FILE_TYPES_CONFIG_NAME).split(",")]
self.code_tool_file_types = [x.strip() for x in config.get_value(FILE_SEARCH_FILE_TYPES_CONFIG_NAME).split(",")]

self.tools = [
{
"type": "file_search"
},
{
"type": "code_interpreter"
}
]

def create_file_attachment(self, attachment : AttachmentProperties):
file_attachment = {}
file_attachment['file_id'] = attachment.provider_file_name
file_attachment['tools'] = []

ext = os.path.splitext(attachment.original_file_name)[1].replace('.','')

for tool in self.tools:
if tool['type'] == 'file_search':
if ext in self.file_tool_file_types:
file_attachment['tools'].append(tool)

if tool['type'] == 'code_interpreter':
if ext in self.code_tool_file_types:
file_attachment['tools'].append(tool)

return file_attachment

async def aadd_thread_message(self, thread_id: str, role: str, content: str, attachments: list = None):
return await self.client.beta.threads.messages.create(
thread_id = thread_id,
Expand Down Expand Up @@ -254,7 +291,7 @@ def _parse_single_message(self, message: Message):
The content items within the message.
"""
ret_content = []

# for each content item in the message
for ci in message.content:
match ci:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public KnowledgeManagementOrchestrationTests()
null,
null,
false,
string.Empty,
string.Empty);
}

Expand Down