Skip to content

Commit

Permalink
Merge pull request #1 from ahmednfwela/pr/memishood/398
Browse files Browse the repository at this point in the history
improvements to "implement experimental ai-powered search #398"
  • Loading branch information
memishood authored Jan 11, 2025
2 parents b7f7e2a + 49699f2 commit 5a9144f
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 139 deletions.
16 changes: 7 additions & 9 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ First of all, thank you for contributing to Meilisearch! The goal of this docume
- [Setup ](#setup-)
- [Tests and Linter ](#tests-and-linter-)
- [Updating code samples](#updating-code-samples)
- [Run unit tests for embedders](#run-unit-tests-for-embedders)
- [Run integration tests for embedders](#run-integration-tests-for-embedders)
- [OpenAI Model Integration](#openai-model-integration)
- [Git Guidelines](#git-guidelines)
- [Git Branches ](#git-branches-)
- [Git Commits ](#git-commits-)
Expand Down Expand Up @@ -105,20 +106,17 @@ The process to define a new code sample is as follows:
dart run ./tool/bin/meili.dart update-samples --fail-on-change
```
### Run unit tests for embedders
### Run integration tests for embedders
Unit tests for embedders are located in `test/search_test.dart`
Integration tests for embedders are located in `test/search_test.dart`
#### OpenAI Model Integration
The tests utilize OpenAI models for embedding functionalities. Ensure you have a valid OpenAI API key to run these tests.
- Generate an OpenAI API Key
- Provide the API Key
- You can provide the OpenAI API key in one of two ways:
- Pass the key via terminal by setting an environment variable: `export OPEN_AI_API_KEY=your_openai_api_key`
- This will not work for flutter web when running unit tests
- Set the key directly in the `openAiKey` variable located in: `test/utils/client.dart`
- You can uncomment and run the embedders unit tests in `test/search_test.dart`
- Provide the API Key in one of two ways:
- Pass the key via environment variable: `export OPEN_AI_API_KEY=your_openai_api_key` (will not work on dart web)
- Pass the key via dart define: `dart --define=OPEN_AI_API_KEY=your_openai_api_key test --use-data-isolate-strategy` (Works on both web and native)
## Git Guidelines
Expand Down
34 changes: 22 additions & 12 deletions lib/src/settings/distribution.dart
Original file line number Diff line number Diff line change
@@ -1,21 +1,31 @@
class Distribution {
final double mean;
final double sigma;
/// Describes the mean and sigma of distribution of embedding similarity in the embedding space.
///
/// The intended use is to make the similarity score more comparable to the regular ranking score.
/// This allows to correct effects where results are too "packed" around a certain value.
class DistributionShift {
/// Value where the results are "packed".
/// Similarity scores are translated so that they are packed around 0.5 instead
final double currentMean;

Distribution({
required this.mean,
required this.sigma,
/// standard deviation of a similarity score.
///
/// Set below 0.4 to make the results less packed around the mean, and above 0.4 to make them more packed.
final double currentSigma;

DistributionShift({
required this.currentMean,
required this.currentSigma,
});

factory Distribution.fromMap(Map<String, Object?> map) {
return Distribution(
mean: map['mean'] as double,
sigma: map['sigma'] as double,
factory DistributionShift.fromMap(Map<String, Object?> map) {
return DistributionShift(
currentMean: map['current_mean'] as double,
currentSigma: map['current_sigma'] as double,
);
}

Map<String, Object?> toMap() => {
'mean': mean,
'sigma': sigma,
'current_mean': currentMean,
'current_sigma': currentSigma,
};
}
22 changes: 11 additions & 11 deletions lib/src/settings/embedder.dart
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import './distribution.dart';

sealed class Embedder {
abstract class Embedder {
const Embedder();

Map<String, Object?> toMap();
Expand All @@ -25,7 +25,7 @@ class OpenAiEmbedder extends Embedder {
final String? apiKey;
final String? documentTemplate;
final int? dimensions;
final Distribution? distribution;
final DistributionShift? distribution;
final String? url;
final int? documentTemplateMaxBytes;
final bool? binaryQuantized;
Expand Down Expand Up @@ -63,7 +63,7 @@ class OpenAiEmbedder extends Embedder {
documentTemplate: map['documentTemplate'] as String?,
dimensions: map['dimensions'] as int?,
distribution: distribution is Map<String, Object?>
? Distribution.fromMap(distribution)
? DistributionShift.fromMap(distribution)
: null,
url: map['url'] as String?,
documentTemplateMaxBytes: map['documentTemplateMaxBytes'] as int?,
Expand All @@ -77,7 +77,7 @@ class HuggingFaceEmbedder extends Embedder {
final String? model;
final String? revision;
final String? documentTemplate;
final Distribution? distribution;
final DistributionShift? distribution;
final int? documentTemplateMaxBytes;
final bool? binaryQuantized;

Expand Down Expand Up @@ -107,7 +107,7 @@ class HuggingFaceEmbedder extends Embedder {
model: map['model'] as String?,
documentTemplate: map['documentTemplate'] as String?,
distribution: distribution is Map<String, Object?>
? Distribution.fromMap(distribution)
? DistributionShift.fromMap(distribution)
: null,
documentTemplateMaxBytes: map['documentTemplateMaxBytes'] as int?,
binaryQuantized: map['binaryQuantized'] as bool?,
Expand All @@ -118,7 +118,7 @@ class HuggingFaceEmbedder extends Embedder {
class UserProvidedEmbedder extends Embedder {
static const source = 'userProvided';
final int dimensions;
final Distribution? distribution;
final DistributionShift? distribution;
final bool? binaryQuantized;

const UserProvidedEmbedder({
Expand All @@ -141,7 +141,7 @@ class UserProvidedEmbedder extends Embedder {
return UserProvidedEmbedder(
dimensions: map['dimensions'] as int,
distribution: distribution is Map<String, Object?>
? Distribution.fromMap(distribution)
? DistributionShift.fromMap(distribution)
: null,
binaryQuantized: map['binaryQuantized'] as bool?,
);
Expand All @@ -156,7 +156,7 @@ class RestEmbedder extends Embedder {
final String? apiKey;
final int? dimensions;
final String? documentTemplate;
final Distribution? distribution;
final DistributionShift? distribution;
final Map<String, Object?>? headers;
final int? documentTemplateMaxBytes;
final bool? binaryQuantized;
Expand Down Expand Up @@ -200,7 +200,7 @@ class RestEmbedder extends Embedder {
dimensions: map['dimensions'] as int?,
documentTemplate: map['documentTemplate'] as String?,
distribution: distribution is Map<String, Object?>
? Distribution.fromMap(distribution)
? DistributionShift.fromMap(distribution)
: null,
headers: map['headers'] as Map<String, Object?>?,
documentTemplateMaxBytes: map['documentTemplateMaxBytes'] as int?,
Expand All @@ -215,7 +215,7 @@ class OllamaEmbedder extends Embedder {
final String? apiKey;
final String? model;
final String? documentTemplate;
final Distribution? distribution;
final DistributionShift? distribution;
final int? dimensions;
final int? documentTemplateMaxBytes;
final bool? binaryQuantized;
Expand Down Expand Up @@ -253,7 +253,7 @@ class OllamaEmbedder extends Embedder {
model: map['model'] as String?,
documentTemplate: map['documentTemplate'] as String?,
distribution: distribution is Map<String, Object?>
? Distribution.fromMap(distribution)
? DistributionShift.fromMap(distribution)
: null,
dimensions: map['dimensions'] as int?,
documentTemplateMaxBytes: map['documentTemplateMaxBytes'] as int?,
Expand Down
1 change: 1 addition & 0 deletions pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ dev_dependencies:
lints: ">=2.1.0 <4.0.0"
json_serializable: ^6.7.1
build_runner: ^2.4.6
pub_semver: ^2.1.5

screenshots:
- description: The Meilisearch logo.
Expand Down
Loading

0 comments on commit 5a9144f

Please sign in to comment.