diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 562936d46c..ca35542926 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -120,6 +120,7 @@ pub async fn main() { .plugin(tauri_plugin_window_state::Builder::default().build()) .plugin(tauri_plugin_listener::init()) .plugin(tauri_plugin_listener2::init()) + .plugin(tauri_plugin_tantivy::init()) .plugin(tauri_plugin_audio_priority::init()) .plugin(tauri_plugin_local_stt::init( tauri_plugin_local_stt::InitOptions { diff --git a/apps/desktop/src/contexts/search/engine/content.ts b/apps/desktop/src/contexts/search/engine/content.ts index 56288f6b75..141544596a 100644 --- a/apps/desktop/src/contexts/search/engine/content.ts +++ b/apps/desktop/src/contexts/search/engine/content.ts @@ -13,5 +13,10 @@ export function createSessionSearchableContent( export function createHumanSearchableContent( row: Record, ): string { - return mergeContent([row.email, row.job_title, row.linkedin_username]); + return mergeContent([ + row.email, + row.job_title, + row.linkedin_username, + row.memo, + ]); } diff --git a/apps/desktop/src/contexts/search/engine/filters.ts b/apps/desktop/src/contexts/search/engine/filters.ts index 250250f193..c9c6bc7c09 100644 --- a/apps/desktop/src/contexts/search/engine/filters.ts +++ b/apps/desktop/src/contexts/search/engine/filters.ts @@ -1,31 +1,23 @@ +import type { SearchFilters as TantivySearchFilters } from "@hypr/plugin-tantivy"; + import type { SearchFilters } from "./types"; -export function buildOramaFilters( +export function buildTantivyFilters( filters: SearchFilters | null, -): Record | undefined { +): TantivySearchFilters | undefined { if (!filters || !filters.created_at) { return undefined; } - const createdAtConditions: Record = {}; - - if (filters.created_at.gte !== undefined) { - createdAtConditions.gte = filters.created_at.gte; - } - if (filters.created_at.lte !== undefined) { - createdAtConditions.lte = filters.created_at.lte; - } - if (filters.created_at.gt !== undefined) { - createdAtConditions.gt = filters.created_at.gt; - } - if (filters.created_at.lt !== undefined) { - createdAtConditions.lt = filters.created_at.lt; - } - if (filters.created_at.eq !== undefined) { - createdAtConditions.eq = filters.created_at.eq; - } - - return Object.keys(createdAtConditions).length > 0 - ? { created_at: createdAtConditions } - : undefined; + return { + created_at: { + gte: filters.created_at.gte ?? null, + lte: filters.created_at.lte ?? null, + gt: filters.created_at.gt ?? null, + lt: filters.created_at.lt ?? null, + eq: filters.created_at.eq ?? null, + }, + doc_type: null, + facet: null, + }; } diff --git a/apps/desktop/src/contexts/search/engine/index.tsx b/apps/desktop/src/contexts/search/engine/index.tsx index 71bb682b6f..97d7173853 100644 --- a/apps/desktop/src/contexts/search/engine/index.tsx +++ b/apps/desktop/src/contexts/search/engine/index.tsx @@ -1,5 +1,3 @@ -import { create, search as oramaSearch } from "@orama/orama"; -import { pluginQPS } from "@orama/plugin-qps"; import { createContext, useCallback, @@ -9,17 +7,18 @@ import { useState, } from "react"; +import { commands as tantivy } from "@hypr/plugin-tantivy"; + import { type Store as MainStore } from "../../../store/tinybase/store/main"; -import { buildOramaFilters } from "./filters"; +import { buildTantivyFilters } from "./filters"; import { indexHumans, indexOrganizations, indexSessions } from "./indexing"; import { createHumanListener, createOrganizationListener, createSessionListener, } from "./listeners"; -import type { Index, SearchFilters, SearchHit } from "./types"; -import { SEARCH_SCHEMA } from "./types"; -import { normalizeQuery, parseQuery } from "./utils"; +import type { SearchEntityType, SearchFilters, SearchHit } from "./types"; +import { normalizeQuery } from "./utils"; export type { SearchDocument, @@ -44,7 +43,6 @@ export function SearchEngineProvider({ store?: MainStore; }) { const [isIndexing, setIsIndexing] = useState(true); - const oramaInstance = useRef(null); const listenerIds = useRef([]); useEffect(() => { @@ -56,31 +54,24 @@ export function SearchEngineProvider({ setIsIndexing(true); try { - const db = create({ - schema: SEARCH_SCHEMA, - plugins: [pluginQPS()], - }); - - indexSessions(db, store); - indexHumans(db, store); - indexOrganizations(db, store); - - oramaInstance.current = db; + await indexSessions(store); + await indexHumans(store); + await indexOrganizations(store); const listener1 = store.addRowListener( "sessions", null, - createSessionListener(oramaInstance.current), + createSessionListener(), ); const listener2 = store.addRowListener( "humans", null, - createHumanListener(oramaInstance.current), + createHumanListener(), ); const listener3 = store.addRowListener( "organizations", null, - createOrganizationListener(oramaInstance.current), + createOrganizationListener(), ); listenerIds.current = [listener1, listener2, listener3]; @@ -106,43 +97,30 @@ export function SearchEngineProvider({ query: string, filters: SearchFilters | null = null, ): Promise => { - if (!oramaInstance.current) { - return []; - } - const normalizedQuery = normalizeQuery(query); + const tantivyFilters = buildTantivyFilters(filters); try { - const whereClause = buildOramaFilters(filters); - - if (normalizedQuery.length < 1) { - const searchResults = await oramaSearch(oramaInstance.current, { - term: "", - sortBy: { - property: "created_at", - order: "DESC", - }, - limit: 10, - ...(whereClause && { where: whereClause }), - }); - return searchResults.hits as SearchHit[]; - } + const result = await tantivy.search({ + query: normalizedQuery, + filters: tantivyFilters, + }); - const parsed = parseQuery(query); + if (result.status === "error") { + console.error("Search failed:", result.error); + return []; + } - const searchResults = await oramaSearch(oramaInstance.current, { - term: parsed.term, - exact: parsed.exact, - boost: { - title: 3, - content: 1, + return result.data.hits.map((hit) => ({ + score: hit.score, + document: { + id: hit.document.id, + type: hit.document.doc_type as SearchEntityType, + title: hit.document.title, + content: hit.document.content, + created_at: hit.document.created_at, }, - limit: 100, - tolerance: parsed.exact ? 0 : 1, - ...(whereClause && { where: whereClause }), - }); - - return searchResults.hits as SearchHit[]; + })); } catch (error) { console.error("Search failed:", error); return []; diff --git a/apps/desktop/src/contexts/search/engine/indexing.ts b/apps/desktop/src/contexts/search/engine/indexing.ts index 35fb8bcf41..849590efb3 100644 --- a/apps/desktop/src/contexts/search/engine/indexing.ts +++ b/apps/desktop/src/contexts/search/engine/indexing.ts @@ -1,11 +1,10 @@ -import { insert } from "@orama/orama"; +import { type SearchDocument, commands as tantivy } from "@hypr/plugin-tantivy"; import { type Store as MainStore } from "../../../store/tinybase/store/main"; import { createHumanSearchableContent, createSessionSearchableContent, } from "./content"; -import type { Index } from "./types"; import { collectCells, collectEnhancedNotesContent, @@ -13,7 +12,7 @@ import { toTrimmedString, } from "./utils"; -export function indexSessions(db: Index, store: MainStore): void { +export async function indexSessions(store: MainStore): Promise { const fields = [ "user_id", "created_at", @@ -24,22 +23,30 @@ export function indexSessions(db: Index, store: MainStore): void { "transcript", ]; + const documents: SearchDocument[] = []; + store.forEachRow("sessions", (rowId: string, _forEachCell) => { const row = collectCells(store, "sessions", rowId, fields); row.enhanced_notes_content = collectEnhancedNotesContent(store, rowId); const title = toTrimmedString(row.title) || "Untitled"; - void insert(db, { + documents.push({ id: rowId, - type: "session", + doc_type: "session", + language: null, title, content: createSessionSearchableContent(row), created_at: toEpochMs(row.created_at), + facets: [], }); }); + + if (documents.length > 0) { + await tantivy.updateDocuments(documents, null); + } } -export function indexHumans(db: Index, store: MainStore): void { +export async function indexHumans(store: MainStore): Promise { const fields = [ "name", "email", @@ -47,35 +54,52 @@ export function indexHumans(db: Index, store: MainStore): void { "job_title", "linkedin_username", "created_at", + "memo", ]; + const documents: SearchDocument[] = []; + store.forEachRow("humans", (rowId: string, _forEachCell) => { const row = collectCells(store, "humans", rowId, fields); const title = toTrimmedString(row.name) || "Unknown"; - void insert(db, { + documents.push({ id: rowId, - type: "human", + doc_type: "human", + language: null, title, content: createHumanSearchableContent(row), created_at: toEpochMs(row.created_at), + facets: [], }); }); + + if (documents.length > 0) { + await tantivy.updateDocuments(documents, null); + } } -export function indexOrganizations(db: Index, store: MainStore): void { +export async function indexOrganizations(store: MainStore): Promise { const fields = ["name", "created_at"]; + const documents: SearchDocument[] = []; + store.forEachRow("organizations", (rowId: string, _forEachCell) => { const row = collectCells(store, "organizations", rowId, fields); const title = toTrimmedString(row.name) || "Unknown Organization"; - void insert(db, { + documents.push({ id: rowId, - type: "organization", + doc_type: "organization", + language: null, title, content: "", created_at: toEpochMs(row.created_at), + facets: [], }); }); + + if (documents.length > 0) { + await tantivy.updateDocuments(documents, null); + } } diff --git a/apps/desktop/src/contexts/search/engine/listeners.ts b/apps/desktop/src/contexts/search/engine/listeners.ts index 699c771e40..c40b6a75ec 100644 --- a/apps/desktop/src/contexts/search/engine/listeners.ts +++ b/apps/desktop/src/contexts/search/engine/listeners.ts @@ -1,13 +1,13 @@ -import { remove, type TypedDocument, update } from "@orama/orama"; import { RowListener } from "tinybase/with-schemas"; +import { commands as tantivy } from "@hypr/plugin-tantivy"; + import { Schemas } from "../../../store/tinybase/store/main"; import { type Store as MainStore } from "../../../store/tinybase/store/main"; import { createHumanSearchableContent, createSessionSearchableContent, } from "./content"; -import type { Index } from "./types"; import { collectCells, collectEnhancedNotesContent, @@ -15,15 +15,18 @@ import { toTrimmedString, } from "./utils"; -export function createSessionListener( - index: Index, -): RowListener { +export function createSessionListener(): RowListener< + Schemas, + "sessions", + null, + MainStore +> { return (store, _, rowId) => { try { const rowExists = store.getRow("sessions", rowId); if (!rowExists) { - void remove(index, rowId); + void tantivy.removeDocument(rowId, null); } else { const fields = [ "user_id", @@ -36,15 +39,18 @@ export function createSessionListener( row.enhanced_notes_content = collectEnhancedNotesContent(store, rowId); const title = toTrimmedString(row.title) || "Untitled"; - const data: TypedDocument = { - id: rowId, - type: "session", - title, - content: createSessionSearchableContent(row), - created_at: toEpochMs(row.created_at), - }; - - void update(index, rowId, data); + void tantivy.updateDocument( + { + id: rowId, + doc_type: "session", + language: null, + title, + content: createSessionSearchableContent(row), + created_at: toEpochMs(row.created_at), + facets: [], + }, + null, + ); } } catch (error) { console.error("Failed to update session in search index:", error); @@ -52,28 +58,35 @@ export function createSessionListener( }; } -export function createHumanListener( - index: Index, -): RowListener { +export function createHumanListener(): RowListener< + Schemas, + "humans", + null, + MainStore +> { return (store, _, rowId) => { try { const rowExists = store.getRow("humans", rowId); if (!rowExists) { - void remove(index, rowId); + void tantivy.removeDocument(rowId, null); } else { const fields = ["name", "email", "created_at"]; const row = collectCells(store, "humans", rowId, fields); const title = toTrimmedString(row.name) || "Unknown"; - const data: TypedDocument = { - id: rowId, - type: "human", - title, - content: createHumanSearchableContent(row), - created_at: toEpochMs(row.created_at), - }; - void update(index, rowId, data); + void tantivy.updateDocument( + { + id: rowId, + doc_type: "human", + language: null, + title, + content: createHumanSearchableContent(row), + created_at: toEpochMs(row.created_at), + facets: [], + }, + null, + ); } } catch (error) { console.error("Failed to update human in search index:", error); @@ -81,29 +94,35 @@ export function createHumanListener( }; } -export function createOrganizationListener( - index: Index, -): RowListener { +export function createOrganizationListener(): RowListener< + Schemas, + "organizations", + null, + MainStore +> { return (store, _, rowId) => { try { const rowExists = store.getRow("organizations", rowId); if (!rowExists) { - void remove(index, rowId); + void tantivy.removeDocument(rowId, null); } else { const fields = ["name", "created_at"]; const row = collectCells(store, "organizations", rowId, fields); const title = toTrimmedString(row.name) || "Unknown Organization"; - const data: TypedDocument = { - id: rowId, - type: "organization", - title, - content: "", - created_at: toEpochMs(row.created_at), - }; - - void update(index, rowId, data); + void tantivy.updateDocument( + { + id: rowId, + doc_type: "organization", + language: null, + title, + content: "", + created_at: toEpochMs(row.created_at), + facets: [], + }, + null, + ); } } catch (error) { console.error("Failed to update organization in search index:", error); diff --git a/apps/desktop/src/contexts/search/engine/types.ts b/apps/desktop/src/contexts/search/engine/types.ts index 1aba64af52..71b7b7130b 100644 --- a/apps/desktop/src/contexts/search/engine/types.ts +++ b/apps/desktop/src/contexts/search/engine/types.ts @@ -1,4 +1,3 @@ -import { Orama } from "@orama/orama"; import { z } from "zod"; const searchEntityTypeSchema = z.enum(["session", "human", "organization"]); @@ -14,16 +13,6 @@ export const searchDocumentSchema = z.object({ export type SearchDocument = z.infer; -export const SEARCH_SCHEMA = { - id: "string", - type: "enum", - title: "string", - content: "string", - created_at: "number", -} as const satisfies InferOramaSchema; - -export type Index = Orama; - const numberFilterSchema = z .object({ gte: z.number().optional(), @@ -44,17 +33,3 @@ export type SearchHit = { score: number; document: SearchDocument; }; - -type InferOramaField = T extends z.ZodString - ? "string" - : T extends z.ZodNumber - ? "number" - : T extends z.ZodBoolean - ? "boolean" - : T extends z.ZodEnum - ? "enum" - : never; - -type InferOramaSchema> = { - [K in keyof T["shape"]]: InferOramaField; -}; diff --git a/apps/desktop/src/contexts/search/ui.tsx b/apps/desktop/src/contexts/search/ui.tsx index 3d85d7a665..02d47d24ea 100644 --- a/apps/desktop/src/contexts/search/ui.tsx +++ b/apps/desktop/src/contexts/search/ui.tsx @@ -58,7 +58,7 @@ interface SearchUIContextValue { setSelectedIndex: (index: number) => void; } -const SCORE_PERCENTILE_THRESHOLD = 0.1; +const SCORE_PERCENTILE_THRESHOLD = 0.2; const GROUP_TITLES: Record = { session: "Sessions", @@ -67,7 +67,7 @@ const GROUP_TITLES: Record = { }; function calculateDynamicThreshold(scores: number[]): number { - if (scores.length === 0) { + if (scores.length < 100) { return 0; } diff --git a/plugins/tantivy/build.rs b/plugins/tantivy/build.rs index 45cafe0ec8..4b8d4f2f48 100644 --- a/plugins/tantivy/build.rs +++ b/plugins/tantivy/build.rs @@ -1,4 +1,11 @@ -const COMMANDS: &[&str] = &["search", "reindex"]; +const COMMANDS: &[&str] = &[ + "search", + "reindex", + "add_document", + "update_document", + "update_documents", + "remove_document", +]; fn main() { tauri_plugin::Builder::new(COMMANDS).build(); diff --git a/plugins/tantivy/js/bindings.gen.ts b/plugins/tantivy/js/bindings.gen.ts index bacaed4400..d268c0444c 100644 --- a/plugins/tantivy/js/bindings.gen.ts +++ b/plugins/tantivy/js/bindings.gen.ts @@ -38,6 +38,14 @@ async updateDocument(document: SearchDocument, collection: string | null) : Prom else return { status: "error", error: e as any }; } }, +async updateDocuments(documents: SearchDocument[], collection: string | null) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:tantivy|update_documents", { documents, collection }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async removeDocument(id: string, collection: string | null) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("plugin:tantivy|remove_document", { id, collection }) }; diff --git a/plugins/tantivy/permissions/autogenerated/commands/add_document.toml b/plugins/tantivy/permissions/autogenerated/commands/add_document.toml new file mode 100644 index 0000000000..ea8d4a498e --- /dev/null +++ b/plugins/tantivy/permissions/autogenerated/commands/add_document.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-add-document" +description = "Enables the add_document command without any pre-configured scope." +commands.allow = ["add_document"] + +[[permission]] +identifier = "deny-add-document" +description = "Denies the add_document command without any pre-configured scope." +commands.deny = ["add_document"] diff --git a/plugins/tantivy/permissions/autogenerated/commands/remove_document.toml b/plugins/tantivy/permissions/autogenerated/commands/remove_document.toml new file mode 100644 index 0000000000..650326d235 --- /dev/null +++ b/plugins/tantivy/permissions/autogenerated/commands/remove_document.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-remove-document" +description = "Enables the remove_document command without any pre-configured scope." +commands.allow = ["remove_document"] + +[[permission]] +identifier = "deny-remove-document" +description = "Denies the remove_document command without any pre-configured scope." +commands.deny = ["remove_document"] diff --git a/plugins/tantivy/permissions/autogenerated/commands/update_document.toml b/plugins/tantivy/permissions/autogenerated/commands/update_document.toml new file mode 100644 index 0000000000..3f92c20bba --- /dev/null +++ b/plugins/tantivy/permissions/autogenerated/commands/update_document.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-update-document" +description = "Enables the update_document command without any pre-configured scope." +commands.allow = ["update_document"] + +[[permission]] +identifier = "deny-update-document" +description = "Denies the update_document command without any pre-configured scope." +commands.deny = ["update_document"] diff --git a/plugins/tantivy/permissions/autogenerated/commands/update_documents.toml b/plugins/tantivy/permissions/autogenerated/commands/update_documents.toml new file mode 100644 index 0000000000..7b10be452e --- /dev/null +++ b/plugins/tantivy/permissions/autogenerated/commands/update_documents.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-update-documents" +description = "Enables the update_documents command without any pre-configured scope." +commands.allow = ["update_documents"] + +[[permission]] +identifier = "deny-update-documents" +description = "Denies the update_documents command without any pre-configured scope." +commands.deny = ["update_documents"] diff --git a/plugins/tantivy/permissions/autogenerated/reference.md b/plugins/tantivy/permissions/autogenerated/reference.md index 1779ea8e75..ce887fb059 100644 --- a/plugins/tantivy/permissions/autogenerated/reference.md +++ b/plugins/tantivy/permissions/autogenerated/reference.md @@ -6,6 +6,10 @@ Default permissions for the plugin - `allow-search` - `allow-reindex` +- `allow-add-document` +- `allow-update-document` +- `allow-update-documents` +- `allow-remove-document` ## Permission Table @@ -16,6 +20,32 @@ Default permissions for the plugin + + + +`tantivy:allow-add-document` + + + + +Enables the add_document command without any pre-configured scope. + + + + + + + +`tantivy:deny-add-document` + + + + +Denies the add_document command without any pre-configured scope. + + + + @@ -45,6 +75,32 @@ Denies the reindex command without any pre-configured scope. +`tantivy:allow-remove-document` + + + + +Enables the remove_document command without any pre-configured scope. + + + + + + + +`tantivy:deny-remove-document` + + + + +Denies the remove_document command without any pre-configured scope. + + + + + + + `tantivy:allow-search` @@ -65,6 +121,58 @@ Enables the search command without any pre-configured scope. Denies the search command without any pre-configured scope. + + + + + + +`tantivy:allow-update-document` + + + + +Enables the update_document command without any pre-configured scope. + + + + + + + +`tantivy:deny-update-document` + + + + +Denies the update_document command without any pre-configured scope. + + + + + + + +`tantivy:allow-update-documents` + + + + +Enables the update_documents command without any pre-configured scope. + + + + + + + +`tantivy:deny-update-documents` + + + + +Denies the update_documents command without any pre-configured scope. + diff --git a/plugins/tantivy/permissions/default.toml b/plugins/tantivy/permissions/default.toml index 0695842e21..3e86d539e3 100644 --- a/plugins/tantivy/permissions/default.toml +++ b/plugins/tantivy/permissions/default.toml @@ -1,3 +1,3 @@ [default] description = "Default permissions for the plugin" -permissions = ["allow-search", "allow-reindex"] +permissions = ["allow-search", "allow-reindex", "allow-add-document", "allow-update-document", "allow-update-documents", "allow-remove-document"] diff --git a/plugins/tantivy/permissions/schemas/schema.json b/plugins/tantivy/permissions/schemas/schema.json index 9c2777282f..8bdca20abe 100644 --- a/plugins/tantivy/permissions/schemas/schema.json +++ b/plugins/tantivy/permissions/schemas/schema.json @@ -294,6 +294,18 @@ "PermissionKind": { "type": "string", "oneOf": [ + { + "description": "Enables the add_document command without any pre-configured scope.", + "type": "string", + "const": "allow-add-document", + "markdownDescription": "Enables the add_document command without any pre-configured scope." + }, + { + "description": "Denies the add_document command without any pre-configured scope.", + "type": "string", + "const": "deny-add-document", + "markdownDescription": "Denies the add_document command without any pre-configured scope." + }, { "description": "Enables the reindex command without any pre-configured scope.", "type": "string", @@ -306,6 +318,18 @@ "const": "deny-reindex", "markdownDescription": "Denies the reindex command without any pre-configured scope." }, + { + "description": "Enables the remove_document command without any pre-configured scope.", + "type": "string", + "const": "allow-remove-document", + "markdownDescription": "Enables the remove_document command without any pre-configured scope." + }, + { + "description": "Denies the remove_document command without any pre-configured scope.", + "type": "string", + "const": "deny-remove-document", + "markdownDescription": "Denies the remove_document command without any pre-configured scope." + }, { "description": "Enables the search command without any pre-configured scope.", "type": "string", @@ -319,10 +343,34 @@ "markdownDescription": "Denies the search command without any pre-configured scope." }, { - "description": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-search`\n- `allow-reindex`", + "description": "Enables the update_document command without any pre-configured scope.", + "type": "string", + "const": "allow-update-document", + "markdownDescription": "Enables the update_document command without any pre-configured scope." + }, + { + "description": "Denies the update_document command without any pre-configured scope.", + "type": "string", + "const": "deny-update-document", + "markdownDescription": "Denies the update_document command without any pre-configured scope." + }, + { + "description": "Enables the update_documents command without any pre-configured scope.", + "type": "string", + "const": "allow-update-documents", + "markdownDescription": "Enables the update_documents command without any pre-configured scope." + }, + { + "description": "Denies the update_documents command without any pre-configured scope.", + "type": "string", + "const": "deny-update-documents", + "markdownDescription": "Denies the update_documents command without any pre-configured scope." + }, + { + "description": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-search`\n- `allow-reindex`\n- `allow-add-document`\n- `allow-update-document`\n- `allow-update-documents`\n- `allow-remove-document`", "type": "string", "const": "default", - "markdownDescription": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-search`\n- `allow-reindex`" + "markdownDescription": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-search`\n- `allow-reindex`\n- `allow-add-document`\n- `allow-update-document`\n- `allow-update-documents`\n- `allow-remove-document`" } ] } diff --git a/plugins/tantivy/src/commands.rs b/plugins/tantivy/src/commands.rs index 6eec9623a1..df983430b6 100644 --- a/plugins/tantivy/src/commands.rs +++ b/plugins/tantivy/src/commands.rs @@ -50,6 +50,19 @@ pub(crate) async fn update_document( .map_err(|e| e.to_string()) } +#[tauri::command] +#[specta::specta] +pub(crate) async fn update_documents( + app: tauri::AppHandle, + documents: Vec, + collection: Option, +) -> Result<(), String> { + app.tantivy() + .update_documents(collection, documents) + .await + .map_err(|e| e.to_string()) +} + #[tauri::command] #[specta::specta] pub(crate) async fn remove_document( diff --git a/plugins/tantivy/src/ext.rs b/plugins/tantivy/src/ext.rs index 710d3c7a87..bcd4bc787d 100644 --- a/plugins/tantivy/src/ext.rs +++ b/plugins/tantivy/src/ext.rs @@ -1,6 +1,3 @@ -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::{Duration, Instant}; - use tantivy::collector::{Count, TopDocs}; use tantivy::query::{ BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, QueryParser, TermQuery, @@ -135,10 +132,6 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { index, reader, writer, - auto_commit: config.auto_commit, - commit_interval_ms: config.commit_interval_ms, - pending_writes: AtomicU64::new(0), - last_commit: std::sync::Mutex::new(Instant::now()), }; guard @@ -401,9 +394,6 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { writer.commit()?; - collection_index.pending_writes.store(0, Ordering::SeqCst); - *collection_index.last_commit.lock().unwrap() = Instant::now(); - tracing::info!( "Reindex completed for collection '{}'. Index cleared and ready for new documents. Fields: {:?}", collection_name, @@ -446,23 +436,7 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { } writer.add_document(doc)?; - - collection_index - .pending_writes - .fetch_add(1, Ordering::SeqCst); - - let should_commit = if collection_index.auto_commit { - let last_commit = collection_index.last_commit.lock().unwrap(); - last_commit.elapsed() >= Duration::from_millis(collection_index.commit_interval_ms) - } else { - true - }; - - if should_commit { - writer.commit()?; - collection_index.pending_writes.store(0, Ordering::SeqCst); - *collection_index.last_commit.lock().unwrap() = Instant::now(); - } + writer.commit()?; tracing::debug!( "Added document '{}' to collection '{}'", @@ -509,23 +483,7 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { } writer.add_document(doc)?; - - collection_index - .pending_writes - .fetch_add(1, Ordering::SeqCst); - - let should_commit = if collection_index.auto_commit { - let last_commit = collection_index.last_commit.lock().unwrap(); - last_commit.elapsed() >= Duration::from_millis(collection_index.commit_interval_ms) - } else { - true - }; - - if should_commit { - writer.commit()?; - collection_index.pending_writes.store(0, Ordering::SeqCst); - *collection_index.last_commit.lock().unwrap() = Instant::now(); - } + writer.commit()?; tracing::debug!( "Updated document '{}' in collection '{}'", @@ -536,10 +494,10 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { Ok(()) } - pub async fn remove_document( + pub async fn update_documents( &self, collection: Option, - id: String, + documents: Vec, ) -> Result<(), crate::Error> { let collection_name = Self::get_collection_name(collection); let state = self.manager.state::(); @@ -554,36 +512,45 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { let writer = &mut collection_index.writer; let fields = get_fields(schema); - let id_term = Term::from_field_text(fields.id, &id); - writer.delete_term(id_term); + let count = documents.len(); - collection_index - .pending_writes - .fetch_add(1, Ordering::SeqCst); + for document in documents { + let id_term = Term::from_field_text(fields.id, &document.id); + writer.delete_term(id_term); - let should_commit = if collection_index.auto_commit { - let last_commit = collection_index.last_commit.lock().unwrap(); - last_commit.elapsed() >= Duration::from_millis(collection_index.commit_interval_ms) - } else { - true - }; + let mut doc = TantivyDocument::new(); + doc.add_text(fields.id, &document.id); + doc.add_text(fields.doc_type, &document.doc_type); + doc.add_text(fields.language, document.language.as_deref().unwrap_or("")); + doc.add_text(fields.title, &document.title); + doc.add_text(fields.content, &document.content); + doc.add_i64(fields.created_at, document.created_at); + + for facet_path in &document.facets { + if let Ok(facet) = Facet::from_text(facet_path) { + doc.add_facet(fields.facets, facet); + } + } - if should_commit { - writer.commit()?; - collection_index.pending_writes.store(0, Ordering::SeqCst); - *collection_index.last_commit.lock().unwrap() = Instant::now(); + writer.add_document(doc)?; } + writer.commit()?; + tracing::debug!( - "Removed document '{}' from collection '{}'", - id, + "Updated {} documents in collection '{}'", + count, collection_name ); Ok(()) } - pub async fn flush(&self, collection: Option) -> Result<(), crate::Error> { + pub async fn remove_document( + &self, + collection: Option, + id: String, + ) -> Result<(), crate::Error> { let collection_name = Self::get_collection_name(collection); let state = self.manager.state::(); let mut guard = state.inner.write().await; @@ -593,17 +560,19 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { .get_mut(&collection_name) .ok_or_else(|| crate::Error::CollectionNotFound(collection_name.clone()))?; - let pending = collection_index.pending_writes.load(Ordering::SeqCst); - if pending > 0 { - collection_index.writer.commit()?; - collection_index.pending_writes.store(0, Ordering::SeqCst); - *collection_index.last_commit.lock().unwrap() = Instant::now(); - tracing::debug!( - "Flushed {} pending writes for collection '{}'", - pending, - collection_name - ); - } + let schema = &collection_index.schema; + let writer = &mut collection_index.writer; + let fields = get_fields(schema); + + let id_term = Term::from_field_text(fields.id, &id); + writer.delete_term(id_term); + writer.commit()?; + + tracing::debug!( + "Removed document '{}' from collection '{}'", + id, + collection_name + ); Ok(()) } diff --git a/plugins/tantivy/src/lib.rs b/plugins/tantivy/src/lib.rs index 697e22c400..f46bc3451a 100644 --- a/plugins/tantivy/src/lib.rs +++ b/plugins/tantivy/src/lib.rs @@ -7,8 +7,6 @@ mod tokenizer; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::Instant; use tantivy::schema::Schema; use tantivy::{Index, IndexReader, IndexWriter}; use tauri::Manager; @@ -107,8 +105,6 @@ pub struct CollectionConfig { pub name: String, pub path: String, pub schema_builder: fn() -> Schema, - pub auto_commit: bool, - pub commit_interval_ms: u64, pub schema_version: u32, } @@ -117,10 +113,6 @@ pub struct CollectionIndex { pub index: Index, pub reader: IndexReader, pub writer: IndexWriter, - pub auto_commit: bool, - pub commit_interval_ms: u64, - pub pending_writes: AtomicU64, - pub last_commit: std::sync::Mutex, } #[derive(Default)] @@ -148,6 +140,7 @@ fn make_specta_builder() -> tauri_specta::Builder { commands::reindex::, commands::add_document::, commands::update_document::, + commands::update_documents::, commands::remove_document::, ]) .error_handling(tauri_specta::ErrorHandlingMode::Result) @@ -167,8 +160,6 @@ pub fn init() -> tauri::plugin::TauriPlugin { name: "default".to_string(), path: "search_index".to_string(), schema_builder: schema::build_schema, - auto_commit: true, - commit_interval_ms: 1000, schema_version: SCHEMA_VERSION, }; @@ -179,31 +170,6 @@ pub fn init() -> tauri::plugin::TauriPlugin { Ok(()) }) - .on_event(|app, event| { - if let tauri::RunEvent::ExitRequested { .. } = event { - let state = app.state::(); - if let Ok(mut guard) = state.inner.try_write() { - for (name, collection) in guard.collections.iter_mut() { - let pending = collection.pending_writes.load(Ordering::SeqCst); - if pending > 0 { - if let Err(e) = collection.writer.commit() { - tracing::error!( - "Failed to flush pending writes for collection '{}': {}", - name, - e - ); - } else { - tracing::info!( - "Flushed {} pending writes for collection '{}' on exit", - pending, - name - ); - } - } - } - } - } - }) .build() } diff --git a/plugins/tantivy/src/tokenizer.rs b/plugins/tantivy/src/tokenizer.rs index ac9a1eb488..1a335064fc 100644 --- a/plugins/tantivy/src/tokenizer.rs +++ b/plugins/tantivy/src/tokenizer.rs @@ -1,7 +1,7 @@ use hypr_language::ISO639; use tantivy::Index; use tantivy::tokenizer::{ - AsciiFoldingFilter, Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer, + AsciiFoldingFilter, Language, LowerCaser, NgramTokenizer, RemoveLongFilter, Stemmer, TextAnalyzer, }; @@ -56,7 +56,7 @@ pub fn get_tokenizer_name_for_language(lang: &hypr_language::Language) -> &'stat pub fn register_tokenizers(index: &Index) { let tokenizer_manager = index.tokenizers(); - let multilang_tokenizer = TextAnalyzer::builder(SimpleTokenizer::default()) + let multilang_tokenizer = TextAnalyzer::builder(NgramTokenizer::new(1, 3, false).unwrap()) .filter(RemoveLongFilter::limit(40)) .filter(LowerCaser) .filter(AsciiFoldingFilter) @@ -85,7 +85,7 @@ pub fn register_tokenizers(index: &Index) { ]; for (name, lang) in languages { - let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default()) + let tokenizer = TextAnalyzer::builder(NgramTokenizer::new(1, 3, false).unwrap()) .filter(RemoveLongFilter::limit(40)) .filter(LowerCaser) .filter(AsciiFoldingFilter)