Skip to content

Commit 77d4276

Browse files
Execute retrieval processes (#286)
1 parent 7c59aa1 commit 77d4276

File tree

9 files changed

+198
-3
lines changed

9 files changed

+198
-3
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Things we are currently working on:
1919
- [ ] App: Implement the process to vectorize one local file using embeddings
2020
- [ ] Runtime: Integration of the vector database [LanceDB](https://github.com/lancedb/lancedb)
2121
- [ ] App: Implement the continuous process of vectorizing data
22-
- [x] ~~App: Define a common retrieval context interface for the integration of RAG processes in chats (PR [#281](https://github.com/MindWorkAI/AI-Studio/pull/281), [#284](https://github.com/MindWorkAI/AI-Studio/pull/284))~~
22+
- [x] ~~App: Define a common retrieval context interface for the integration of RAG processes in chats (PR [#281](https://github.com/MindWorkAI/AI-Studio/pull/281), [#284](https://github.com/MindWorkAI/AI-Studio/pull/284), [#286](https://github.com/MindWorkAI/AI-Studio/pull/286))~~
2323
- [ ] App: Define a common augmentation interface for the integration of RAG processes in chats
2424
- [x] ~~App: Integrate data sources in chats (PR [#282](https://github.com/MindWorkAI/AI-Studio/pull/282))~~
2525

app/MindWork AI Studio/Chat/ChatThread.cs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using AIStudio.Components;
22
using AIStudio.Settings;
33
using AIStudio.Settings.DataModel;
4+
using AIStudio.Tools.ERIClient.DataModel;
45

56
namespace AIStudio.Chat;
67

@@ -150,4 +151,46 @@ public void Remove(IContent content, bool removeForRegenerate = false)
150151
// Remove the block from the chat thread:
151152
this.Blocks.Remove(block);
152153
}
154+
155+
/// <summary>
156+
/// Transforms this chat thread to an ERI chat thread.
157+
/// </summary>
158+
/// <param name="token">The cancellation token.</param>
159+
/// <returns>The ERI chat thread.</returns>
160+
public async Task<Tools.ERIClient.DataModel.ChatThread> ToERIChatThread(CancellationToken token = default)
161+
{
162+
//
163+
// Transform the content blocks:
164+
//
165+
var contentBlocks = new List<Tools.ERIClient.DataModel.ContentBlock>(this.Blocks.Count);
166+
foreach (var block in this.Blocks)
167+
{
168+
var (contentData, contentType) = block.Content switch
169+
{
170+
ContentImage image => (await image.AsBase64(token), Tools.ERIClient.DataModel.ContentType.IMAGE),
171+
ContentText text => (text.Text, Tools.ERIClient.DataModel.ContentType.TEXT),
172+
173+
_ => (string.Empty, Tools.ERIClient.DataModel.ContentType.UNKNOWN),
174+
};
175+
176+
contentBlocks.Add(new Tools.ERIClient.DataModel.ContentBlock
177+
{
178+
Role = block.Role switch
179+
{
180+
ChatRole.AI => Role.AI,
181+
ChatRole.USER => Role.USER,
182+
ChatRole.AGENT => Role.AGENT,
183+
ChatRole.SYSTEM => Role.SYSTEM,
184+
ChatRole.NONE => Role.NONE,
185+
186+
_ => Role.UNKNOW,
187+
},
188+
189+
Content = contentData,
190+
Type = contentType,
191+
});
192+
}
193+
194+
return new Tools.ERIClient.DataModel.ChatThread { ContentBlocks = contentBlocks };
195+
}
153196
}

app/MindWork AI Studio/Chat/ContentText.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using AIStudio.Components;
55
using AIStudio.Provider;
66
using AIStudio.Settings;
7+
using AIStudio.Tools.RAG;
78
using AIStudio.Tools.Services;
89

910
namespace AIStudio.Chat;
@@ -199,9 +200,30 @@ public async Task CreateFromProviderAsync(IProvider provider, Model chatModel, I
199200
//
200201
// Trigger the retrieval part of the (R)AG process:
201202
//
203+
var dataContexts = new List<IRetrievalContext>();
202204
if (proceedWithRAG)
203205
{
204-
206+
//
207+
// We kick off the retrieval process for each data source in parallel:
208+
//
209+
var retrievalTasks = new List<Task<IReadOnlyList<IRetrievalContext>>>(selectedDataSources.Count);
210+
foreach (var dataSource in selectedDataSources)
211+
retrievalTasks.Add(dataSource.RetrieveDataAsync(lastPrompt, chatThread, token));
212+
213+
//
214+
// Wait for all retrieval tasks to finish:
215+
//
216+
foreach (var retrievalTask in retrievalTasks)
217+
{
218+
try
219+
{
220+
dataContexts.AddRange(await retrievalTask);
221+
}
222+
catch (Exception e)
223+
{
224+
logger.LogError(e, "An error occurred during the retrieval process.");
225+
}
226+
}
205227
}
206228

207229
//

app/MindWork AI Studio/Chat/IContent.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,15 @@ public interface IContent
4242
/// Uses the provider to create the content.
4343
/// </summary>
4444
public Task CreateFromProviderAsync(IProvider provider, Model chatModel, IContent? lastPrompt, ChatThread? chatChatThread, CancellationToken token = default);
45+
46+
/// <summary>
47+
/// Returns the corresponding ERI content type.
48+
/// </summary>
49+
public Tools.ERIClient.DataModel.ContentType ToERIContentType => this switch
50+
{
51+
ContentText => Tools.ERIClient.DataModel.ContentType.TEXT,
52+
ContentImage => Tools.ERIClient.DataModel.ContentType.IMAGE,
53+
54+
_ => Tools.ERIClient.DataModel.ContentType.UNKNOWN,
55+
};
4556
}

app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
// ReSharper disable InconsistentNaming
22

33
using AIStudio.Assistants.ERI;
4+
using AIStudio.Chat;
5+
using AIStudio.Tools.ERIClient;
46
using AIStudio.Tools.ERIClient.DataModel;
7+
using AIStudio.Tools.RAG;
8+
using AIStudio.Tools.Services;
9+
10+
using ChatThread = AIStudio.Chat.ChatThread;
11+
using ContentType = AIStudio.Tools.ERIClient.DataModel.ContentType;
512

613
namespace AIStudio.Settings.DataModel;
714

@@ -43,4 +50,85 @@ public DataSourceERI_V1()
4350

4451
/// <inheritdoc />
4552
public ERIVersion Version { get; init; } = ERIVersion.V1;
53+
54+
/// <inheritdoc />
55+
public async Task<IReadOnlyList<IRetrievalContext>> RetrieveDataAsync(IContent lastPrompt, ChatThread thread, CancellationToken token = default)
56+
{
57+
// Important: Do not dispose the RustService here, as it is a singleton.
58+
var rustService = Program.SERVICE_PROVIDER.GetRequiredService<RustService>();
59+
var logger = Program.SERVICE_PROVIDER.GetRequiredService<ILogger<DataSourceERI_V1>>();
60+
61+
using var eriClient = ERIClientFactory.Get(this.Version, this)!;
62+
var authResponse = await eriClient.AuthenticateAsync(this, rustService, token);
63+
if (authResponse.Successful)
64+
{
65+
var retrievalRequest = new RetrievalRequest
66+
{
67+
LatestUserPromptType = lastPrompt.ToERIContentType,
68+
LatestUserPrompt = lastPrompt switch
69+
{
70+
ContentText text => text.Text,
71+
ContentImage image => await image.AsBase64(token),
72+
_ => string.Empty
73+
},
74+
75+
Thread = await thread.ToERIChatThread(token),
76+
MaxMatches = 10,
77+
RetrievalProcessId = null, // The ERI server selects the retrieval process when multiple processes are available
78+
Parameters = null, // The ERI server selects useful default parameters
79+
};
80+
81+
var retrievalResponse = await eriClient.ExecuteRetrievalAsync(retrievalRequest, token);
82+
if(retrievalResponse is { Successful: true, Data: not null })
83+
{
84+
//
85+
// Next, we have to transform the ERI context back to our generic retrieval context:
86+
//
87+
var genericRetrievalContexts = new List<IRetrievalContext>(retrievalResponse.Data.Count);
88+
foreach (var eriContext in retrievalResponse.Data)
89+
{
90+
switch (eriContext.Type)
91+
{
92+
case ContentType.TEXT:
93+
genericRetrievalContexts.Add(new RetrievalTextContext
94+
{
95+
Path = eriContext.Path ?? string.Empty,
96+
Type = eriContext.ToRetrievalContentType(),
97+
Links = eriContext.Links,
98+
Category = RetrievalContentCategory.TEXT,
99+
MatchedText = eriContext.MatchedContent,
100+
DataSourceName = eriContext.Name,
101+
SurroundingContent = eriContext.SurroundingContent,
102+
});
103+
break;
104+
105+
case ContentType.IMAGE:
106+
genericRetrievalContexts.Add(new RetrievalImageContext
107+
{
108+
Path = eriContext.Path ?? string.Empty,
109+
Type = eriContext.ToRetrievalContentType(),
110+
Links = eriContext.Links,
111+
Source = eriContext.MatchedContent,
112+
Category = RetrievalContentCategory.IMAGE,
113+
SourceType = ContentImageSource.BASE64,
114+
DataSourceName = eriContext.Name,
115+
});
116+
break;
117+
118+
default:
119+
logger.LogWarning($"The ERI context type '{eriContext.Type}' is not supported yet.");
120+
break;
121+
}
122+
}
123+
124+
return genericRetrievalContexts;
125+
}
126+
127+
logger.LogWarning($"Was not able to retrieve data from the ERI data source '{this.Name}'. Message: {retrievalResponse.Message}");
128+
return [];
129+
}
130+
131+
logger.LogWarning($"Was not able to authenticate with the ERI data source '{this.Name}'. Message: {authResponse.Message}");
132+
return [];
133+
}
46134
}

app/MindWork AI Studio/Settings/DataModel/DataSourceLocalDirectory.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
using AIStudio.Chat;
2+
using AIStudio.Tools.RAG;
3+
14
namespace AIStudio.Settings.DataModel;
25

36
/// <summary>
@@ -27,6 +30,13 @@ public DataSourceLocalDirectory()
2730
/// <inheritdoc />
2831
public DataSourceSecurity SecurityPolicy { get; init; } = DataSourceSecurity.NOT_SPECIFIED;
2932

33+
/// <inheritdoc />
34+
public Task<IReadOnlyList<IRetrievalContext>> RetrieveDataAsync(IContent lastPrompt, ChatThread thread, CancellationToken token = default)
35+
{
36+
IReadOnlyList<IRetrievalContext> retrievalContext = new List<IRetrievalContext>();
37+
return Task.FromResult(retrievalContext);
38+
}
39+
3040
/// <summary>
3141
/// The path to the directory.
3242
/// </summary>

app/MindWork AI Studio/Settings/DataModel/DataSourceLocalFile.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
using AIStudio.Chat;
2+
using AIStudio.Tools.RAG;
3+
14
namespace AIStudio.Settings.DataModel;
25

36
/// <summary>
@@ -27,6 +30,13 @@ public DataSourceLocalFile()
2730
/// <inheritdoc />
2831
public DataSourceSecurity SecurityPolicy { get; init; } = DataSourceSecurity.NOT_SPECIFIED;
2932

33+
/// <inheritdoc />
34+
public Task<IReadOnlyList<IRetrievalContext>> RetrieveDataAsync(IContent lastPrompt, ChatThread thread, CancellationToken token = default)
35+
{
36+
IReadOnlyList<IRetrievalContext> retrievalContext = new List<IRetrievalContext>();
37+
return Task.FromResult(retrievalContext);
38+
}
39+
3040
/// <summary>
3141
/// The path to the file.
3242
/// </summary>

app/MindWork AI Studio/Settings/IDataSource.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
using System.Text.Json.Serialization;
22

3+
using AIStudio.Chat;
34
using AIStudio.Settings.DataModel;
5+
using AIStudio.Tools.RAG;
46

57
namespace AIStudio.Settings;
68

@@ -37,4 +39,13 @@ public interface IDataSource
3739
/// Which data security policy is applied to this data source?
3840
/// </summary>
3941
public DataSourceSecurity SecurityPolicy { get; init; }
42+
43+
/// <summary>
44+
/// Perform the data retrieval process.
45+
/// </summary>
46+
/// <param name="lastPrompt">The last prompt from the chat.</param>
47+
/// <param name="thread">The chat thread.</param>
48+
/// <param name="token">The cancellation token.</param>
49+
/// <returns>The retrieved data context.</returns>
50+
public Task<IReadOnlyList<IRetrievalContext>> RetrieveDataAsync(IContent lastPrompt, ChatThread thread, CancellationToken token = default);
4051
}

app/MindWork AI Studio/Tools/RAG/RetrievalContentTypeExtensions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ namespace AIStudio.Tools.RAG;
44

55
public static class RetrievalContentTypeExtensions
66
{
7-
public static RetrievalContentType ToRetrievalContentType(Context eriContext)
7+
public static RetrievalContentType ToRetrievalContentType(this Context eriContext)
88
{
99
//
1010
// Right now, we have to parse the category string along the type enum to

0 commit comments

Comments
 (0)