From c8783dbf0fb36a7f08feee9c26d23bac3571a95a Mon Sep 17 00:00:00 2001 From: meili-bot <74670311+meili-bot@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:22:04 +0200 Subject: [PATCH 1/5] Update README.md From 466293340b39eace7d357a1c98d77d43d4dbbe49 Mon Sep 17 00:00:00 2001 From: Morgane Dubus <30866152+mdubus@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:46:25 +0200 Subject: [PATCH 2/5] feat: AI-powered search changes for v1.11 (#1742) --- src/types/types.ts | 11 +++- tests/__snapshots__/settings.test.ts.snap | 10 ++-- tests/embedders.test.ts | 63 +++++++++++++++++++++++ tests/get_search.test.ts | 35 ------------- tests/search.test.ts | 39 -------------- 5 files changed, 79 insertions(+), 79 deletions(-) diff --git a/src/types/types.ts b/src/types/types.ts index b6feccdd6..9d9a5e312 100644 --- a/src/types/types.ts +++ b/src/types/types.ts @@ -98,7 +98,7 @@ export type SearchForFacetValuesResponse = { }; export type HybridSearch = { - embedder?: string; + embedder: string; semanticRatio?: number; }; @@ -389,6 +389,8 @@ export type OpenAiEmbedder = { dimensions?: number; distribution?: Distribution; url?: string; + documentTemplateMaxBytes?: number; + binaryQuantized?: boolean; }; export type HuggingFaceEmbedder = { @@ -397,12 +399,15 @@ export type HuggingFaceEmbedder = { revision?: string; documentTemplate?: string; distribution?: Distribution; + documentTemplateMaxBytes?: number; + binaryQuantized?: boolean; }; export type UserProvidedEmbedder = { source: "userProvided"; dimensions: number; distribution?: Distribution; + binaryQuantized?: boolean; }; export type RestEmbedder = { @@ -415,6 +420,8 @@ export type RestEmbedder = { request: Record; response: Record; headers?: Record; + documentTemplateMaxBytes?: number; + binaryQuantized?: boolean; }; export type OllamaEmbedder = { @@ -425,6 +432,8 @@ export type OllamaEmbedder = { documentTemplate?: string; distribution?: Distribution; dimensions?: number; + documentTemplateMaxBytes?: number; + binaryQuantized?: boolean; }; export type Embedder = diff --git a/tests/__snapshots__/settings.test.ts.snap b/tests/__snapshots__/settings.test.ts.snap index e05695111..069ab0055 100644 --- a/tests/__snapshots__/settings.test.ts.snap +++ b/tests/__snapshots__/settings.test.ts.snap @@ -249,8 +249,9 @@ exports[`Test on settings > Admin key: Update embedders settings 1`] = ` "distinctAttribute": null, "embedders": { "default": { - "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }} -{% endfor %}", + "documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }} +{% endif %}{% endfor %}", + "documentTemplateMaxBytes": 400, "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "source": "huggingFace", }, @@ -804,8 +805,9 @@ exports[`Test on settings > Master key: Update embedders settings 1`] = ` "distinctAttribute": null, "embedders": { "default": { - "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }} -{% endfor %}", + "documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }} +{% endif %}{% endfor %}", + "documentTemplateMaxBytes": 400, "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "source": "huggingFace", }, diff --git a/tests/embedders.test.ts b/tests/embedders.test.ts index 3bbffe265..02c061301 100644 --- a/tests/embedders.test.ts +++ b/tests/embedders.test.ts @@ -90,6 +90,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])( mean: 0.7, sigma: 0.3, }, + binaryQuantized: false, }, }; const task: EnqueuedTask = await client @@ -101,6 +102,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])( const response: Embedders = await client.index(index.uid).getEmbedders(); expect(response).toEqual(newEmbedder); + expect(response).not.toHaveProperty("documentTemplateMaxBytes"); }); test(`${permission} key: Update embedders with 'openAi' source`, async () => { @@ -118,6 +120,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])( sigma: 0.3, }, url: "https://api.openai.com/v1/embeddings", + documentTemplateMaxBytes: 500, + binaryQuantized: false, }, }; const task: EnqueuedTask = await client @@ -147,6 +151,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])( mean: 0.7, sigma: 0.3, }, + documentTemplateMaxBytes: 500, + binaryQuantized: false, }, }; const task: EnqueuedTask = await client @@ -188,6 +194,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])( headers: { "Custom-Header": "CustomValue", }, + documentTemplateMaxBytes: 500, + binaryQuantized: false, }, }; const task: EnqueuedTask = await client @@ -219,6 +227,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])( sigma: 0.3, }, dimensions: 512, + documentTemplateMaxBytes: 500, + binaryQuantized: false, }, }; const task: EnqueuedTask = await client @@ -266,6 +276,58 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])( expect(response).toEqual(null); }); + test(`${permission} key: search (POST) with vectors`, async () => { + const client = await getClient(permission); + + const { taskUid } = await client.index(index.uid).updateEmbedders({ + default: { + source: "userProvided", + dimensions: 1, + }, + }); + await client.waitForTask(taskUid); + + const response = await client.index(index.uid).search("", { + vector: [1], + hybrid: { + embedder: "default", + semanticRatio: 1.0, + }, + }); + + expect(response).toHaveProperty("hits"); + expect(response).toHaveProperty("semanticHitCount"); + // Those fields are no longer returned by the search response + // We want to ensure that they don't appear in it anymore + expect(response).not.toHaveProperty("vector"); + expect(response).not.toHaveProperty("_semanticScore"); + }); + + test(`${permission} key: search (GET) with vectors`, async () => { + const client = await getClient(permission); + + const { taskUid } = await client.index(index.uid).updateEmbedders({ + default: { + source: "userProvided", + dimensions: 1, + }, + }); + await client.waitForTask(taskUid); + + const response = await client.index(index.uid).searchGet("", { + vector: [1], + hybridEmbedder: "default", + hybridSemanticRatio: 1.0, + }); + + expect(response).toHaveProperty("hits"); + expect(response).toHaveProperty("semanticHitCount"); + // Those fields are no longer returned by the search response + // We want to ensure that they don't appear in it anymore + expect(response).not.toHaveProperty("vector"); + expect(response).not.toHaveProperty("_semanticScore"); + }); + test(`${permission} key: search for similar documents`, async () => { const client = await getClient(permission); @@ -288,6 +350,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])( await client.waitForTask(documentAdditionTask); const response = await client.index(index.uid).searchSimilarDocuments({ + embedder: "manual", id: "143", }); diff --git a/tests/get_search.test.ts b/tests/get_search.test.ts index f5834c355..241c72539 100644 --- a/tests/get_search.test.ts +++ b/tests/get_search.test.ts @@ -457,41 +457,6 @@ describe.each([ "The filter query parameter should be in string format when using searchGet", ); }); - test(`${permission} key: search with vectors`, async () => { - const client = await getClient(permission); - const adminClient = await getClient("Admin"); - const adminKey = await getKey("Admin"); - - await fetch(`${HOST}/experimental-features`, { - body: JSON.stringify({ vectorStore: true }), - headers: { - Authorization: `Bearer ${adminKey}`, - "Content-Type": "application/json", - }, - method: "PATCH", - }); - - const { taskUid } = await adminClient - .index(emptyIndex.uid) - .updateEmbedders({ - default: { - source: "userProvided", - dimensions: 1, - }, - }); - await adminClient.waitForTask(taskUid); - - const response = await client - .index(emptyIndex.uid) - .searchGet("", { vector: [1], hybridSemanticRatio: 1.0 }); - - expect(response).toHaveProperty("hits"); - expect(response).toHaveProperty("semanticHitCount"); - // Those fields are no longer returned by the search response - // We want to ensure that they don't appear in it anymore - expect(response).not.toHaveProperty("vector"); - expect(response).not.toHaveProperty("_semanticScore"); - }); test(`${permission} key: search without vectors`, async () => { const client = await getClient(permission); diff --git a/tests/search.test.ts b/tests/search.test.ts index f4cf6658b..df007036c 100644 --- a/tests/search.test.ts +++ b/tests/search.test.ts @@ -936,45 +936,6 @@ describe.each([ expect(response.hits.length).toEqual(0); }); - test(`${permission} key: search with vectors`, async () => { - const client = await getClient(permission); - const adminClient = await getClient("Admin"); - const adminKey = await getKey("Admin"); - - await fetch(`${HOST}/experimental-features`, { - body: JSON.stringify({ vectorStore: true }), - headers: { - Authorization: `Bearer ${adminKey}`, - "Content-Type": "application/json", - }, - method: "PATCH", - }); - - const { taskUid } = await adminClient - .index(emptyIndex.uid) - .updateEmbedders({ - default: { - source: "userProvided", - dimensions: 1, - }, - }); - await adminClient.waitForTask(taskUid); - - const response = await client.index(emptyIndex.uid).search("", { - vector: [1], - hybrid: { - semanticRatio: 1.0, - }, - }); - - expect(response).toHaveProperty("hits"); - expect(response).toHaveProperty("semanticHitCount"); - // Those fields are no longer returned by the search response - // We want to ensure that they don't appear in it anymore - expect(response).not.toHaveProperty("vector"); - expect(response).not.toHaveProperty("_semanticScore"); - }); - test(`${permission} key: search without vectors`, async () => { const client = await getClient(permission); const response = await client.index(index.uid).search("prince", {}); From edf3af0fa66d52bc7cae981dff729bbc1f7ca9ea Mon Sep 17 00:00:00 2001 From: Morgane Dubus <30866152+mdubus@users.noreply.github.com> Date: Thu, 10 Oct 2024 12:16:34 +0200 Subject: [PATCH 3/5] feat: support facet distribution for federated search (#1743) --- src/types/types.ts | 20 ++++- tests/search.test.ts | 169 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 187 insertions(+), 2 deletions(-) diff --git a/src/types/types.ts b/src/types/types.ts index 9d9a5e312..0713066e5 100644 --- a/src/types/types.ts +++ b/src/types/types.ts @@ -153,8 +153,17 @@ export type SearchRequestGET = Pagination & locales?: Locale[]; }; +export type MergeFacets = { + maxValuesPerFacet?: number | null; +}; + export type FederationOptions = { weight: number }; -export type MultiSearchFederation = { limit?: number; offset?: number }; +export type MultiSearchFederation = { + limit?: number; + offset?: number; + facetsByIndex?: Record; + mergeFacets?: MergeFacets | null; +}; export type MultiSearchQuery = SearchParams & { indexUid: string }; export type MultiSearchQueryWithFederation = MultiSearchQuery & { @@ -229,6 +238,14 @@ export type Hits> = Array>; export type FacetStat = { min: number; max: number }; export type FacetStats = Record; +export type FacetsByIndex = Record< + string, + { + distribution: FacetDistribution; + stats: FacetStats; + } +>; + export type SearchResponse< T = Record, S extends SearchParams | undefined = undefined, @@ -238,6 +255,7 @@ export type SearchResponse< query: string; facetDistribution?: FacetDistribution; facetStats?: FacetStats; + facetsByIndex?: FacetsByIndex; } & (undefined extends S ? Partial : true extends IsFinitePagination> diff --git a/tests/search.test.ts b/tests/search.test.ts index df007036c..30bfdc1c7 100644 --- a/tests/search.test.ts +++ b/tests/search.test.ts @@ -25,7 +25,7 @@ if (typeof fetch === "undefined") { } const index = { - uid: "movies_test", + uid: "books", }; const emptyIndex = { uid: "empty_test", @@ -80,6 +80,26 @@ const dataset = [ { id: 42, title: "The Hitchhiker's Guide to the Galaxy", genre: "fantasy" }, ]; +type Movies = { + id: number; + title: string; +}; + +const movies = [ + { + id: 1, + title: "Pride and Prejudice", + }, + { + id: 2, + title: "The Hobbit: An Unexpected Journey", + }, + { + id: 3, + title: "Harry Potter and the Half-Blood Prince", + }, +]; + describe.each([ { permission: "Master" }, { permission: "Admin" }, @@ -194,6 +214,153 @@ describe.each([ expect(response2.hits[0].id).toEqual(1344); }); + test(`${permission} key: Multi search with facetsByIndex`, async () => { + const client = await getClient(permission); + const masterClient = await getClient("Master"); + + // Setup to have a new "movies" index + await masterClient.createIndex("movies"); + const newFilterableAttributes = ["title", "id"]; + const { taskUid: task1 }: EnqueuedTask = await masterClient + .index("movies") + .updateSettings({ + filterableAttributes: newFilterableAttributes, + sortableAttributes: ["id"], + }); + await masterClient.waitForTask(task1); + const { taskUid: task2 } = await masterClient + .index("movies") + .addDocuments(movies); + await masterClient.waitForTask(task2); + + // Make a multi search on both indexes with facetsByIndex + const response = await client.multiSearch({ + federation: { + limit: 20, + offset: 0, + facetsByIndex: { + movies: ["title", "id"], + books: ["title"], + }, + }, + queries: [ + { + q: "Hobbit", + indexUid: "movies", + }, + { + q: "Hobbit", + indexUid: "books", + }, + ], + }); + + expect(response).toHaveProperty("hits"); + expect(Array.isArray(response.hits)).toBe(true); + expect(response.hits.length).toEqual(2); + + expect(response).toHaveProperty("facetsByIndex"); + expect(response.facetsByIndex).toHaveProperty("movies"); + expect(response.facetsByIndex).toHaveProperty("books"); + + // Test search response on "movies" index + expect(response.facetsByIndex?.movies).toEqual({ + distribution: { + title: { + "The Hobbit: An Unexpected Journey": 1, + }, + id: { + "2": 1, + }, + }, + stats: { + id: { + min: 2, + max: 2, + }, + }, + }); + + // Test search response on "books" index + expect(response.facetsByIndex?.books).toEqual({ + distribution: { + title: { + "The Hobbit": 1, + }, + }, + stats: {}, + }); + }); + + test(`${permission} key: Multi search with mergeFacets`, async () => { + const client = await getClient(permission); + const masterClient = await getClient("Master"); + + // Setup to have a new "movies" index + await masterClient.createIndex("movies"); + const newFilterableAttributes = ["title", "id"]; + const { taskUid: task1 }: EnqueuedTask = await masterClient + .index("movies") + .updateSettings({ + filterableAttributes: newFilterableAttributes, + sortableAttributes: ["id"], + }); + await masterClient.waitForTask(task1); + const { taskUid: task2 } = await masterClient + .index("movies") + .addDocuments(movies); + await masterClient.waitForTask(task2); + + // Make a multi search on both indexes with mergeFacets + const response = await client.multiSearch({ + federation: { + limit: 20, + offset: 0, + facetsByIndex: { + movies: ["title", "id"], + books: ["title"], + }, + mergeFacets: { + maxValuesPerFacet: 10, + }, + }, + queries: [ + { + q: "Hobbit", + indexUid: "movies", + }, + { + q: "Hobbit", + indexUid: "books", + }, + ], + }); + + expect(response).toHaveProperty("hits"); + expect(Array.isArray(response.hits)).toBe(true); + expect(response.hits.length).toEqual(2); + + expect(response).toHaveProperty("facetDistribution"); + expect(response).toHaveProperty("facetStats"); + + expect(response.facetDistribution).toEqual({ + title: { + "The Hobbit": 1, + "The Hobbit: An Unexpected Journey": 1, + }, + id: { + "2": 1, + }, + }); + + expect(response.facetStats).toEqual({ + id: { + min: 2, + max: 2, + }, + }); + }); + test(`${permission} key: Basic search`, async () => { const client = await getClient(permission); const response = await client.index(index.uid).search("prince", {}); From b90cc52aecac8bf7e740b2caaa5e5243969eb156 Mon Sep 17 00:00:00 2001 From: curquiza Date: Thu, 10 Oct 2024 17:27:02 +0200 Subject: [PATCH 4/5] Add code samples --- .code-samples.meilisearch.yaml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index 93791c842..b6cfaf2b5 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -759,7 +759,16 @@ search_parameter_reference_ranking_score_threshold_1: |- search_parameter_reference_retrieve_vectors_1: |- client.index('INDEX_NAME').search('kitchen utensils', { retrieveVectors: true, - hybrid: { embedder: 'default'} + hybrid: { + embedder: 'EMBEDDER_NAME' + } + }) +search_parameter_guide_hybrid_1: |- + client.index('INDEX_NAME').search('kitchen utensils', { + hybrid: { + semanticRatio: 0.9, + embedder: 'EMBEDDER_NAME' + } }) get_similar_post_1: |- client.index('INDEX_NAME').searchSimilarDocuments({ id: 'TARGET_DOCUMENT_ID'}) @@ -788,7 +797,7 @@ multi_search_federated_1: |- ] }) search_parameter_reference_locales_1: |- - client.index('INDEX_NAME').search('進撃の巨人', { locales: ['jpn'] }) + client.index('INDEX_NAME').search('QUERY TEXT IN JAPANESE', { locales: ['jpn'] }) get_localized_attribute_settings_1: |- client.index('INDEX_NAME').getLocalizedAttributes() update_localized_attribute_settings_1: |- From e66c4943466672469019331aefe84e881e9670b1 Mon Sep 17 00:00:00 2001 From: curquiza Date: Wed, 23 Oct 2024 21:46:51 +0200 Subject: [PATCH 5/5] Fix code samples --- .code-samples.meilisearch.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index b6cfaf2b5..f79be49de 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -771,7 +771,7 @@ search_parameter_guide_hybrid_1: |- } }) get_similar_post_1: |- - client.index('INDEX_NAME').searchSimilarDocuments({ id: 'TARGET_DOCUMENT_ID'}) + client.index('INDEX_NAME').searchSimilarDocuments({ id: 'TARGET_DOCUMENT_ID', embedder: 'default' }) search_parameter_guide_matching_strategy_3: |- client.index('movies').search('white shirt', { matchingStrategy: 'frequency'