Skip to content

Commit

Permalink
Merge pull request #632 from betalgo/629-how-to-use-chunking-strategy…
Browse files Browse the repository at this point in the history
…-in-vector-store

chunking strategy in vector store
  • Loading branch information
kayhantolga authored Sep 21, 2024
2 parents 7fdf98f + f086732 commit 4589f72
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 4 deletions.
28 changes: 28 additions & 0 deletions OpenAI.Playground/TestHelpers/AssistantHelpers/VectorTestHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,34 @@ public static async Task CreateVector(IOpenAIService openAI)
}
}

public static async Task CreateVectorWithChunkingStrategy(IOpenAIService openAI)
{
ConsoleExtensions.WriteLine("Create Vector Testing is starting:", ConsoleColor.Cyan);
var result = await openAI.Beta.VectorStores.CreateVectorStore(new()
{
Name = "Support FAQ",
ChunkingStrategy = new()
{
Type = StaticValues.VectorStoreStatics.ChunkingStrategyType.Static,
StaticParameters = new()
{
ChunkOverlapTokens = 400,
MaxChunkSizeTokens = 800
}
}
});

if (result.Successful)
{
CreatedVectorId = result.Id;
ConsoleExtensions.WriteLine($"Vector Created Successfully with ID: {result.Id}", ConsoleColor.Green);
}
else
{
ConsoleExtensions.WriteError(result.Error);
}
}

public static async Task ListVectors(IOpenAIService openAI)
{
ConsoleExtensions.WriteLine("List Vectors Testing is starting:", ConsoleColor.Cyan);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,11 @@ public class CreateVectorStoreFileBatchRequest
/// </summary>
[JsonPropertyName("file_ids")]
public List<string> FileIds { get; set; }

/// <summary>
/// The chunking strategy used to chunk the file(s). If not set, will use the auto strategy.
/// Only applicable if file_ids is non-empty.
/// </summary>
[JsonPropertyName("chunking_strategy")]
public ChunkingStrategy? ChunkingStrategy { get; set; }
}
39 changes: 39 additions & 0 deletions OpenAI.SDK/ObjectModels/RequestModels/CreateVectorStoreRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,43 @@ public class CreateVectorStoreRequest
/// </summary>
[JsonPropertyName("metadata")]
public Dictionary<string, string>? Metadata { get; set; }

/// <summary>
/// The chunking strategy used to chunk the file(s). If not set, will use the auto strategy.
/// Only applicable if file_ids is non-empty.
/// </summary>
[JsonPropertyName("chunking_strategy")]
public ChunkingStrategy? ChunkingStrategy { get; set; }
}

public class ChunkingStrategy
{
/// <summary>
/// The type of chunking strategy. Must be either "auto" or "static".
/// </summary>
[JsonPropertyName("type")]
public string Type { get; set; }

/// <summary>
/// The static chunking parameters. Required if type is "static".
/// </summary>
[JsonPropertyName("static")]
public StaticChunkingParameters? StaticParameters { get; set; }
}

public class StaticChunkingParameters
{
/// <summary>
/// The maximum number of tokens in each chunk. The default value is 800.
/// The minimum value is 100 and the maximum value is 4096.
/// </summary>
[JsonPropertyName("max_chunk_size_tokens")]
public int MaxChunkSizeTokens { get; set; }

/// <summary>
/// The number of tokens that overlap between chunks. The default value is 400.
/// Note that the overlap must not exceed half of max_chunk_size_tokens.
/// </summary>
[JsonPropertyName("chunk_overlap_tokens")]
public int ChunkOverlapTokens { get; set; }
}
59 changes: 55 additions & 4 deletions OpenAI.SDK/ObjectModels/RequestModels/ToolDefinition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@ public class ToolDefinition
public string Type { get; set; }

/// <summary>
/// Structured Outputs for function calling can be enabled with a single parameter, just by supplying strict: true.
/// Please note: This field is not mentioned in the API documentation but is referenced in other documents.
/// Structured Outputs for function calling can be enabled with a single parameter, just by supplying strict: true.
/// Please note: This field is not mentioned in the API documentation but is referenced in other documents.
/// </summary>
[JsonPropertyName("strict")]
public bool? Strict { get; set; }

/// <summary>
/// Overrides for the file search tool.
/// </summary>
[JsonPropertyName("file_search")]
public FileSearchTool? FileSearchTool { get; set; }

/// <summary>
/// A list of functions the model may generate JSON inputs for.
/// </summary>
Expand Down Expand Up @@ -73,11 +79,56 @@ public static ToolDefinition DefineRetrieval()
};
}

public static ToolDefinition DefineFileSearch()
public static ToolDefinition DefineFileSearch(FileSearchTool? fileSearchTool = null)
{
return new()
{
Type = StaticValues.AssistantsStatics.ToolCallTypes.FileSearch
Type = StaticValues.AssistantsStatics.ToolCallTypes.FileSearch,
FileSearchTool = fileSearchTool
};
}
}

public class FileSearchTool
{
/// <summary>
/// The maximum number of results the file search tool should output. The default is 20 for gpt-4* models and 5 for
/// gpt-3.5-turbo. This number should be between 1 and 50 inclusive.
/// Note that the file search tool may output fewer than max_num_results results.
/// <a href="https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings">
/// See the
/// file search tool documentation
/// </a>
/// for more information.
/// </summary>
[JsonPropertyName("max_num_results")]
public int? MaxNumberResults { get; set; }

/// <summary>
/// The ranking options for the file search. If not specified, the file search tool will use the auto ranker and a
/// score_threshold of 0.
/// See the
/// <a href="https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings">
/// file
/// search tool documentation
/// </a>
/// for more information.
/// </summary>
[JsonPropertyName("ranking_options")]
public RankingOptions? RankingOptions { get; set; }
}

public class RankingOptions
{
/// <summary>
/// The ranker to use for the file search. If not specified will use the auto ranker.
/// </summary>
[JsonPropertyName("ranker")]
public string? Ranker { get; set; }

/// <summary>
/// The score threshold for the file search. All values must be a floating point number between 0 and 1.
/// </summary>
[JsonPropertyName("score_threshold")]
public int ScoreThreshold { get; set; }
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Text.Json.Serialization;
using OpenAI.ObjectModels.RequestModels;

namespace OpenAI.ObjectModels.ResponseModels.VectorStoreResponseModels;

Expand Down Expand Up @@ -46,4 +47,10 @@ public record VectorStoreFileObject : BaseResponse
/// </summary>
[JsonPropertyName("last_error")]
public Error? LastError { get; set; }

/// <summary>
/// The strategy used to chunk the file.
/// </summary>
[JsonPropertyName("chunking_strategy")]
public ChunkingStrategy? ChunkingStrategy { get; set; }
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Text.Json.Serialization;
using OpenAI.ObjectModels.RequestModels;

namespace OpenAI.ObjectModels.ResponseModels.VectorStoreResponseModels;

Expand Down Expand Up @@ -63,4 +64,11 @@ public record VectorStoreObjectResponse : BaseResponse
/// </summary>
[JsonPropertyName("metadata")]
public Dictionary<string, string>? Metadata { get; set; }

/// <summary>
/// The chunking strategy used to chunk the file(s). If not set, will use the auto strategy.
/// Only applicable if file_ids is non-empty.
/// </summary>
[JsonPropertyName("chunking_strategy")]
public ChunkingStrategy? ChunkingStrategy { get; set; }
}
9 changes: 9 additions & 0 deletions OpenAI.SDK/ObjectModels/StaticValueHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,13 @@ public static class RequiredActionTypes
public static string SubmitToolOutputs => "submit_tool_outputs";
}
}

public static class VectorStoreStatics
{
public static class ChunkingStrategyType
{
public static string Auto => "auto";
public static string Static => "static";
}
}
}

0 comments on commit 4589f72

Please sign in to comment.