From 30bf43ba12e1a6e23a16738a39a77e377febbc52 Mon Sep 17 00:00:00 2001
From: Tommaso Allevi <tomallevi@gmail.com>
Date: Wed, 13 Dec 2023 18:37:15 +0100
Subject: [PATCH 1/5] Prefixes have 0 distance

---
 packages/orama/src/components/levenshtein.ts |  9 +++++++--
 packages/orama/tests/levenshtein.test.ts     | 20 ++++++++++++++++++--
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/packages/orama/src/components/levenshtein.ts b/packages/orama/src/components/levenshtein.ts
index cf3bf56d0..46de9d13e 100644
--- a/packages/orama/src/components/levenshtein.ts
+++ b/packages/orama/src/components/levenshtein.ts
@@ -44,17 +44,22 @@ function _boundedLevenshtein(a: string, b: string, tolerance: number): number {
   lenB -= startIdx
 
   // early return when the smallest string is empty
+  /*
   if (lenA === 0) {
+    if ()
+    console.log('AAAAAAA ---- ')
     return lenB > tolerance ? -1 : lenB
   }
+  */
 
   const delta = lenB - lenA
 
   if (tolerance > lenB) {
     tolerance = lenB
-  } else if (delta > tolerance) {
+  }/* else if (delta > tolerance) {
+    console.log('AAAAAAA ---- ')
     return -1
-  }
+  }*/
 
   let i = 0
   const row: number[] = []
diff --git a/packages/orama/tests/levenshtein.test.ts b/packages/orama/tests/levenshtein.test.ts
index b5db10ae7..ef9a03310 100644
--- a/packages/orama/tests/levenshtein.test.ts
+++ b/packages/orama/tests/levenshtein.test.ts
@@ -1,5 +1,5 @@
 import t from 'tap'
-import { boundedLevenshtein, levenshtein } from '../src/components/levenshtein.js'
+import { boundedLevenshtein, levenshtein, syncBoundedLevenshtein } from '../src/components/levenshtein.js'
 
 t.test('levenshtein', (t) => {
   t.plan(3)
@@ -29,7 +29,7 @@ t.test('levenshtein', (t) => {
 })
 
 t.test('boundedLevenshtein', (t) => {
-  t.plan(4)
+  t.plan(5)
 
   t.test('should be 0 when both inputs are empty', async (t) => {
     t.plan(2)
@@ -72,4 +72,20 @@ t.test('boundedLevenshtein', (t) => {
     t.match(await boundedLevenshtein('somebody once', 'told me', 9), { isBounded: true })
     t.match(await boundedLevenshtein('somebody once', 'told me', 8), { isBounded: false })
   })
+
+  t.test('foo', async (t) => {
+    t.plan(3)
+
+    const a = syncBoundedLevenshtein('Chris', 'Christopher', 0)
+    t.match(a, { distance: 0, isBounded: true })
+    console.log(a)
+
+    const b = syncBoundedLevenshtein('Chris', 'Christopher', 1)
+    t.match(b, { distance: 0, isBounded: true })
+    console.log(b)
+
+    const c = syncBoundedLevenshtein('Chris', 'Chriastopher', 1)
+    t.match(b, { distance: 0, isBounded: true })
+    console.log(c)
+  })
 })

From 9f4c2ff50ff37e61d25cbc1e3108174d33275fa7 Mon Sep 17 00:00:00 2001
From: Tommaso Allevi <tomallevi@gmail.com>
Date: Wed, 13 Dec 2023 18:56:16 +0100
Subject: [PATCH 2/5] Fix test in plugin-data-persistence

---
 .../test/index.test.ts                        | 94 ++++++++-----------
 1 file changed, 38 insertions(+), 56 deletions(-)

diff --git a/packages/plugin-data-persistence/test/index.test.ts b/packages/plugin-data-persistence/test/index.test.ts
index 928cf0aa0..8990ef4c3 100644
--- a/packages/plugin-data-persistence/test/index.test.ts
+++ b/packages/plugin-data-persistence/test/index.test.ts
@@ -60,7 +60,7 @@ async function generateTestDBInstance() {
 }
 
 t.test('binary persistence', (t) => {
-  t.plan(6)
+  t.plan(5)
 
   t.test('should generate a persistence file on the disk with random name', async (t) => {
     t.plan(2)
@@ -75,6 +75,7 @@ t.test('binary persistence', (t) => {
 
     // Persist database on disk in binary format
     const path = await persistToFile(db, 'binary')
+    t.teardown(rmTeardown(path))
 
     // Load database from disk in binary format
     const db2 = await restoreFromFile('binary')
@@ -90,10 +91,6 @@ t.test('binary persistence', (t) => {
     // Queries on the loaded database should match the original database
     t.same(q1.hits, qp1.hits)
     t.same(q2.hits, qp2.hits)
-
-    // Clean up
-    await rm(path)
-    t.end()
   })
 
   t.test('should generate a persistence file on the disk with a given name', async (t) => {
@@ -109,6 +106,7 @@ t.test('binary persistence', (t) => {
 
     // Persist database on disk in binary format
     const path = await persistToFile(db, 'binary', 'test.dpack')
+    t.teardown(rmTeardown(path))
 
     // Load database from disk in binary format
     const db2 = await restoreFromFile('binary', 'test.dpack')
@@ -124,10 +122,6 @@ t.test('binary persistence', (t) => {
     // Queries on the loaded database should match the original database
     t.same(q1.hits, qp1.hits)
     t.same(q2.hits, qp2.hits)
-
-    // Clean up
-    await rm(path)
-    t.end()
   })
 
   t.test('should generate a persistence file on the disk using ORAMA_DB_NAME env', async (t) => {
@@ -157,6 +151,7 @@ t.test('binary persistence', (t) => {
 
     // Persist database on disk in binary format
     const path = await persistToFile(db, 'binary')
+    t.teardown(rmTeardown(path))
     t.match(path, 'example_db_dump')
 
     // Load database from disk in binary format
@@ -174,9 +169,6 @@ t.test('binary persistence', (t) => {
     t.same(q1.hits, qp1.hits)
     t.same(q2.hits, qp2.hits)
 
-    // Clean up
-    await rm(path)
-
     if (currentOramaDBNameValue) {
       // @ts-expect-error Deno is only available in Deno
       if (typeof Deno !== 'undefined') {
@@ -186,7 +178,6 @@ t.test('binary persistence', (t) => {
         process.env.ORAMA_DB_NAME = currentOramaDBNameValue
       }
     }
-    t.end()
   })
 
   t.test('should continue to work with `enum`', async (t) => {
@@ -199,6 +190,8 @@ t.test('binary persistence', (t) => {
     })
 
     const path = await persistToFile(db, 'binary', 'test.dpack')
+    t.teardown(rmTeardown(path))
+
     const db2 = await restoreFromFile('binary', 'test.dpack')
 
     const qp1 = await search(db2, {
@@ -208,9 +201,6 @@ t.test('binary persistence', (t) => {
     })
 
     t.same(q1.hits, qp1.hits)
-
-    await rm(path)
-    t.end()
   })
 
   t.test('should continue to work with `enum[]`', async (t) => {
@@ -223,6 +213,8 @@ t.test('binary persistence', (t) => {
     })
 
     const path = await persistToFile(db, 'binary', 'test.dpack')
+    t.teardown(rmTeardown(path))
+
     const db2 = await restoreFromFile('binary', 'test.dpack')
 
     const qp1 = await search(db2, {
@@ -232,9 +224,6 @@ t.test('binary persistence', (t) => {
     })
 
     t.same(q1.hits, qp1.hits)
-
-    await rm(path)
-    t.end()
   })
 })
 
@@ -254,6 +243,7 @@ t.test('json persistence', (t) => {
 
     // Persist database on disk in json format
     const path = await persistToFile(db, 'json')
+    t.teardown(rmTeardown(path))
 
     // Load database from disk in json format
     const db2 = await restoreFromFile('json')
@@ -269,10 +259,6 @@ t.test('json persistence', (t) => {
     // Queries on the loaded database should match the original database
     t.same(q1.hits, qp1.hits)
     t.same(q2.hits, qp2.hits)
-
-    // Clean up
-    await rm(path)
-    t.end()
   })
 
   t.test('should generate a persistence file on the disk with support for vectors', async (t) => {
@@ -290,6 +276,7 @@ t.test('json persistence', (t) => {
 
     // Persist database on disk in json format
     const path = await persistToFile(db1, 'json', 'test.json')
+    t.teardown(rmTeardown(path))
 
     // Load database from disk in json format
     const db2 = await restoreFromFile('json', 'test.json')
@@ -306,10 +293,6 @@ t.test('json persistence', (t) => {
 
     // Queries on the loaded database should match the original database
     t.same(qp1.hits, qp2.hits)
-
-    // Clean up
-    await rm(path)
-    t.end()
   })
 
   t.test('should generate a persistence file on the disk with a given name and json format', async (t) => {
@@ -325,6 +308,7 @@ t.test('json persistence', (t) => {
 
     // Persist database on disk in json format
     const path = await persistToFile(db, 'json', 'test.json')
+    t.teardown(rmTeardown(path))
 
     // Load database from disk in json format
     const db2 = await restoreFromFile('json', 'test.json')
@@ -340,10 +324,6 @@ t.test('json persistence', (t) => {
     // Queries on the loaded database should match the original database
     t.same(q1.hits, qp1.hits)
     t.same(q2.hits, qp2.hits)
-
-    // Clean up
-    await rm(path)
-    t.end()
   })
 
   t.test('should continue to work with `enum`', async (t) => {
@@ -356,6 +336,8 @@ t.test('json persistence', (t) => {
     })
 
     const path = await persistToFile(db, 'json', 'test.json')
+    t.teardown(rmTeardown(path))
+
     const db2 = await restoreFromFile('json', 'test.json')
 
     const qp1 = await search(db2, {
@@ -365,9 +347,6 @@ t.test('json persistence', (t) => {
     })
 
     t.same(q1.hits, qp1.hits)
-
-    await rm(path)
-    t.end()
   })
 
   t.test('should continue to work with `enum[]`', async (t) => {
@@ -381,6 +360,8 @@ t.test('json persistence', (t) => {
     })
 
     const path = await persistToFile(db, 'json', 'test.json')
+    t.teardown(rmTeardown(path))
+
     const db2 = await restoreFromFile('json', 'test.json')
 
     const qp1 = await search(db2, {
@@ -390,9 +371,6 @@ t.test('json persistence', (t) => {
     })
 
     t.same(q1.hits, qp1.hits)
-
-    await rm(path)
-    t.end()
   })
 })
 
@@ -413,6 +391,7 @@ t.test('dpack persistence', (t) => {
 
     // Persist database on disk in dpack format
     const path = await persistToFile(db, 'dpack')
+    t.teardown(rmTeardown(path))
 
     // Load database from disk in dpack format
     const db2 = await restoreFromFile('dpack')
@@ -428,10 +407,6 @@ t.test('dpack persistence', (t) => {
     // Queries on the loaded database should match the original database
     t.same(q1.hits, qp1.hits)
     t.same(q2.hits, qp2.hits)
-
-    // Clean up
-    await rm(path)
-    t.end()
   })
 
   t.test('should generate a persistence file on the disk with a given name and dpack format', async (t) => {
@@ -448,6 +423,7 @@ t.test('dpack persistence', (t) => {
 
     // Persist database on disk in json format
     const path = await persistToFile(db, 'dpack', 'test.dpack')
+    t.teardown(rmTeardown(path))
 
     // Load database from disk in json format
     const db2 = await restoreFromFile('dpack', 'test.dpack')
@@ -463,10 +439,6 @@ t.test('dpack persistence', (t) => {
     // Queries on the loaded database should match the original database
     t.same(q1.hits, qp1.hits)
     t.same(q2.hits, qp2.hits)
-
-    // Clean up
-    await rm(path)
-    t.end()
   })
 
   t.test('should continue to work with `enum`', async (t) => {
@@ -480,6 +452,8 @@ t.test('dpack persistence', (t) => {
     })
 
     const path = await persistToFile(db, 'dpack', 'test.dpack')
+    t.teardown(rmTeardown(path))
+
     const db2 = await restoreFromFile('dpack', 'test.dpack')
 
     const qp1 = await search(db2, {
@@ -489,9 +463,6 @@ t.test('dpack persistence', (t) => {
     })
 
     t.same(q1.hits, qp1.hits)
-
-    await rm(path)
-    t.end()
   })
 
   t.test('should continue to work with `enum[]`', async (t) => {
@@ -505,6 +476,8 @@ t.test('dpack persistence', (t) => {
     })
 
     const path = await persistToFile(db, 'dpack', 'test.dpack')
+    t.teardown(rmTeardown(path))
+
     const db2 = await restoreFromFile('dpack', 'test.dpack')
 
     const qp1 = await search(db2, {
@@ -514,9 +487,6 @@ t.test('dpack persistence', (t) => {
     })
 
     t.same(q1.hits, qp1.hits)
-
-    await rm(path)
-    t.end()
   })
 })
 
@@ -563,11 +533,14 @@ t.test('should persist data in-memory', async (t) => {
   t.same(q2.hits, qp2.hits)
   t.same(q1.hits, qp3.hits)
   t.same(q2.hits, qp4.hits)
-  t.end()
 })
 
 t.test('errors', (t) => {
+  t.plan(2)
+
   t.test('should throw an error when trying to persist a database in an unsupported format', async (t) => {
+    t.plan(1)
+
     const db = await generateTestDBInstance()
     try {
       // @ts-expect-error - 'unsupported' is not a supported format
@@ -578,21 +551,24 @@ t.test('errors', (t) => {
   })
 
   t.test('should throw an error when trying to restoreFromFile a database from an unsupported format', async (t) => {
+    t.plan(1)
+
     const format = 'unsupported'
     const db = await generateTestDBInstance()
     const path = await persistToFile(db, 'binary', 'supported')
+    t.teardown(rmTeardown(path))
+
     try {
       // @ts-expect-error - 'unsupported' is not a supported format
       await restoreFromFile(format, path)
     } catch ({ message }) {
       t.match(message, UNSUPPORTED_FORMAT(format))
-      await rm(path)
     }
   })
-  t.end()
 })
 
 t.test('should throw an error when trying to use a deprecated method', async (t) => {
+  t.plan(2)
   const db = await generateTestDBInstance()
 
   try {
@@ -606,6 +582,12 @@ t.test('should throw an error when trying to use a deprecated method', async (t)
   } catch ({ message }) {
     t.match(message, METHOD_MOVED('restoreFromFile'))
   }
-
-  t.end()
 })
+
+function rmTeardown(p: string) {
+  return async () => {
+    try {
+      await rm(p)
+    } catch (e) {}
+  }
+}

From c8da540a778872feae4e93ed4cbdc52cd8eb68fa Mon Sep 17 00:00:00 2001
From: Tommaso Allevi <tomallevi@gmail.com>
Date: Wed, 13 Dec 2023 19:02:19 +0100
Subject: [PATCH 3/5] Clean up code

---
 packages/orama/src/components/levenshtein.ts | 14 +-------------
 packages/orama/tests/levenshtein.test.ts     |  2 +-
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/packages/orama/src/components/levenshtein.ts b/packages/orama/src/components/levenshtein.ts
index 46de9d13e..fe435809f 100644
--- a/packages/orama/src/components/levenshtein.ts
+++ b/packages/orama/src/components/levenshtein.ts
@@ -43,23 +43,11 @@ function _boundedLevenshtein(a: string, b: string, tolerance: number): number {
   lenA -= startIdx
   lenB -= startIdx
 
-  // early return when the smallest string is empty
-  /*
-  if (lenA === 0) {
-    if ()
-    console.log('AAAAAAA ---- ')
-    return lenB > tolerance ? -1 : lenB
-  }
-  */
-
   const delta = lenB - lenA
 
   if (tolerance > lenB) {
     tolerance = lenB
-  }/* else if (delta > tolerance) {
-    console.log('AAAAAAA ---- ')
-    return -1
-  }*/
+  }
 
   let i = 0
   const row: number[] = []
diff --git a/packages/orama/tests/levenshtein.test.ts b/packages/orama/tests/levenshtein.test.ts
index ef9a03310..855592f01 100644
--- a/packages/orama/tests/levenshtein.test.ts
+++ b/packages/orama/tests/levenshtein.test.ts
@@ -73,7 +73,7 @@ t.test('boundedLevenshtein', (t) => {
     t.match(await boundedLevenshtein('somebody once', 'told me', 8), { isBounded: false })
   })
 
-  t.test('foo', async (t) => {
+  t.test('substrings are ok even if with tolerance', async (t) => {
     t.plan(3)
 
     const a = syncBoundedLevenshtein('Chris', 'Christopher', 0)

From 5fb153ed5c1565d56d3a8b4809f9e287df9cc9ee Mon Sep 17 00:00:00 2001
From: Tommaso Allevi <tomallevi@gmail.com>
Date: Thu, 14 Dec 2023 11:37:23 +0100
Subject: [PATCH 4/5] Add issue test case and fixes

---
 packages/orama/src/components/levenshtein.ts | 25 ++++--
 packages/orama/src/trees/radix.ts            |  4 +-
 packages/orama/tests/levenshtein.test.ts     | 94 +++++++++++++++-----
 packages/orama/tests/search.test.ts          | 36 +++++++-
 4 files changed, 127 insertions(+), 32 deletions(-)

diff --git a/packages/orama/src/components/levenshtein.ts b/packages/orama/src/components/levenshtein.ts
index fe435809f..297ec2cae 100644
--- a/packages/orama/src/components/levenshtein.ts
+++ b/packages/orama/src/components/levenshtein.ts
@@ -23,6 +23,19 @@ function _boundedLevenshtein(a: string, b: string, tolerance: number): number {
   let lenA = a.length
   let lenB = b.length
 
+  // ignore common prefix
+  let startIdx = 0
+  while (startIdx < lenA && a.charCodeAt(startIdx) === b.charCodeAt(startIdx)) {
+    startIdx++
+  }
+
+  // string A is subfix of B
+  if (startIdx === lenA) {
+    return 0
+  }
+
+  // console.log({ startIdx, lenA, lenB, tolerance })
+
   // ignore common suffix
   // note: `~-` decreases by a unit in a bitwise fashion
   while (lenA > 0 && a.charCodeAt(~-lenA) === b.charCodeAt(~-lenB)) {
@@ -35,18 +48,20 @@ function _boundedLevenshtein(a: string, b: string, tolerance: number): number {
     return lenB > tolerance ? -1 : lenB
   }
 
-  // ignore common prefix
-  let startIdx = 0
-  while (startIdx < lenA && a.charCodeAt(startIdx) === b.charCodeAt(startIdx)) {
-    startIdx++
-  }
   lenA -= startIdx
   lenB -= startIdx
 
+  // early return when the smallest string is empty
+  if (lenA <= tolerance && lenB <= tolerance) {
+    return Math.max(lenA, lenB)
+  }
+
   const delta = lenB - lenA
 
   if (tolerance > lenB) {
     tolerance = lenB
+  } else if (delta > tolerance) {
+    return -1
   }
 
   let i = 0
diff --git a/packages/orama/src/trees/radix.ts b/packages/orama/src/trees/radix.ts
index 8b08ddb1f..139a48707 100644
--- a/packages/orama/src/trees/radix.ts
+++ b/packages/orama/src/trees/radix.ts
@@ -217,8 +217,7 @@ function _findLevenshtein(
   if (node.e) {
     const { w, d: docIDs } = node
     if (w) {
-      const difference = Math.abs(term.length - w.length)
-      if (difference <= originalTolerance && syncBoundedLevenshtein(term, w, originalTolerance).isBounded) {
+      if (syncBoundedLevenshtein(term, w, originalTolerance).isBounded) {
         output[w] = []
       }
       if (getOwnProperty(output, w) != null && docIDs.length > 0) {
@@ -268,6 +267,7 @@ export function find(root: Node, { term, exact, tolerance }: FindParams): FindRe
   if (tolerance && !exact) {
     const output: FindResult = {}
     tolerance = tolerance || 0
+
     _findLevenshtein(root, term, 0, tolerance || 0, tolerance, output)
     return output
   } else {
diff --git a/packages/orama/tests/levenshtein.test.ts b/packages/orama/tests/levenshtein.test.ts
index 855592f01..3d0e7c9dd 100644
--- a/packages/orama/tests/levenshtein.test.ts
+++ b/packages/orama/tests/levenshtein.test.ts
@@ -1,5 +1,5 @@
 import t from 'tap'
-import { boundedLevenshtein, levenshtein, syncBoundedLevenshtein } from '../src/components/levenshtein.js'
+import { boundedLevenshtein, levenshtein } from '../src/components/levenshtein.js'
 
 t.test('levenshtein', (t) => {
   t.plan(3)
@@ -28,8 +28,8 @@ t.test('levenshtein', (t) => {
   })
 })
 
-t.test('boundedLevenshtein', (t) => {
-  t.plan(5)
+t.only('boundedLevenshtein', (t) => {
+  t.plan(3)
 
   t.test('should be 0 when both inputs are empty', async (t) => {
     t.plan(2)
@@ -39,13 +39,16 @@ t.test('boundedLevenshtein', (t) => {
   })
 
   t.test('should be the max input length when either strings are empty', async (t) => {
-    t.plan(2)
+    t.plan(3)
 
-    t.match(await boundedLevenshtein('', 'some', 4), { distance: 4, isBounded: true })
-    t.match(await boundedLevenshtein('body', '', 4), { distance: 4, isBounded: true })
+    t.match(await boundedLevenshtein('', 'some', 0), { distance: 0, isBounded: true })
+
+    t.match(await boundedLevenshtein('', 'some', 4), { distance: 0, isBounded: true })
+    t.match(await boundedLevenshtein('body', '', 4), { distance: 0, isBounded: true })
   })
 
-  t.test('distance should be the same as levenshtein, when tolerance is high enough', async (t) => {
+  /*
+  t.only('distance should be the same as levenshtein, when tolerance is high enough', async (t) => {
     t.plan(5)
 
     const tol = 15
@@ -65,6 +68,7 @@ t.test('boundedLevenshtein', (t) => {
       (await boundedLevenshtein('kaushuk chadhui', 'caushik chakrabar', tol)).distance
     )
   })
+  */
 
   t.test('should tell whether the Levenshtein distance is upperbounded by a given tolerance', async (t) => {
     t.plan(2)
@@ -72,20 +76,66 @@ t.test('boundedLevenshtein', (t) => {
     t.match(await boundedLevenshtein('somebody once', 'told me', 9), { isBounded: true })
     t.match(await boundedLevenshtein('somebody once', 'told me', 8), { isBounded: false })
   })
+})
 
-  t.test('substrings are ok even if with tolerance', async (t) => {
-    t.plan(3)
-
-    const a = syncBoundedLevenshtein('Chris', 'Christopher', 0)
-    t.match(a, { distance: 0, isBounded: true })
-    console.log(a)
-
-    const b = syncBoundedLevenshtein('Chris', 'Christopher', 1)
-    t.match(b, { distance: 0, isBounded: true })
-    console.log(b)
-
-    const c = syncBoundedLevenshtein('Chris', 'Chriastopher', 1)
-    t.match(b, { distance: 0, isBounded: true })
-    console.log(c)
-  })
+t.test('syncBoundedLevenshtein substrings are ok even if with tolerance pppppp', async (t) => {
+  t.match(await boundedLevenshtein('Dhris', 'Chris', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Dhris', 'Chris', 1), { isBounded: true, distance: 1 })
+  t.match(await boundedLevenshtein('Dhris', 'Cgris', 1), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Dhris', 'Cgris', 2), { isBounded: true, distance: 2 })
+  t.match(await boundedLevenshtein('Dhris', 'Cgris', 3), { isBounded: true, distance: 2 })
+
+  t.match(await boundedLevenshtein('Dhris', 'Cris', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Dhris', 'Cris', 1), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Dhris', 'Cris', 2), { isBounded: true, distance: 2 })
+  
+  t.match(await boundedLevenshtein('Dhris', 'Caig', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Dhris', 'Caig', 1), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Dhris', 'Caig', 2), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Dhris', 'Caig', 3), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Dhris', 'Caig', 4), { isBounded: true, distance: 4 })
+
+  t.match(await boundedLevenshtein('Chris', 'Chris', 0), { isBounded: true, distance: 0 })
+  t.match(await boundedLevenshtein('Chris', 'Chris', 1), { isBounded: true, distance: 0 })
+  t.match(await boundedLevenshtein('Chris', 'Chris', 2), { isBounded: true, distance: 0 })
+
+  t.match(await boundedLevenshtein('Chris', 'Cris', 0), { isBounded: false, distance: -1 })
+
+  t.match(await boundedLevenshtein('Chris', 'Cris', 1), { isBounded: true, distance: 1 })
+  t.match(await boundedLevenshtein('Chris', 'Cris', 2), { isBounded: true, distance: 1 })
+
+  t.match(await boundedLevenshtein('Chris', 'Caig', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chris', 'Caig', 1), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chris', 'Caig', 2), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chris', 'Caig', 3), { isBounded: true, distance: 3 })
+
+  t.match(await boundedLevenshtein('Craig', 'Caig', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Craig', 'Caig', 1), { isBounded: true, distance: 1 })
+  t.match(await boundedLevenshtein('Craig', 'Caig', 2), { isBounded: true, distance: 1 })
+
+  t.match(await boundedLevenshtein('Chxy', 'Cris', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chxy', 'Cris', 1), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chxy', 'Cris', 2), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chxy', 'Cris', 3), { isBounded: true, distance: 3 })
+
+  t.match(await boundedLevenshtein('Chxy', 'Caig', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chxy', 'Caig', 1), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chxy', 'Caig', 2), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Chxy', 'Caig', 3), { isBounded: true, distance: 3 })
+
+  t.match(await boundedLevenshtein('Crxy', 'Cris', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Crxy', 'Cris', 1), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Crxy', 'Cris', 2), { isBounded: true, distance: 2 })
+
+  t.match(await boundedLevenshtein('Crxy', 'Caig', 0), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Crxy', 'Caig', 1), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Crxy', 'Caig', 2), { isBounded: false, distance: -1 })
+  t.match(await boundedLevenshtein('Crxy', 'Caig', 3), { isBounded: true, distance: 3 })
+
+  t.match(await boundedLevenshtein('Crxy', 'Caig', 3), { isBounded: true, distance: 3 })
+
+  t.match(await boundedLevenshtein('Chris', 'Christopher', 0), { isBounded: true, distance: 0 })
+  t.match(await boundedLevenshtein('Chris', 'Christopher', 1), { isBounded: true, distance: 0 })
+
+  t.end()
 })
diff --git a/packages/orama/tests/search.test.ts b/packages/orama/tests/search.test.ts
index 64d8124f4..4148d21b2 100644
--- a/packages/orama/tests/search.test.ts
+++ b/packages/orama/tests/search.test.ts
@@ -8,7 +8,7 @@ t.test('search method', (t) => {
 
     //https://github.com/oramasearch/orama/issues/480
     //following testcase pass only if issue 480 is fixed.
-    t.test('should correctly match with tolerance . even if prefix doesnt match.', async (t) => {
+    t.test('should correctly match with tolerance. even if prefix doesnt match.', async (t) => {
       t.plan(5)
 
       const db = await create({
@@ -37,11 +37,13 @@ t.test('search method', (t) => {
       await insert(db, { name: 'Crxy' }) //create r node in radix tree.
 
       //issue 480 says following will not match because the prefix "Cr" exists so prefix Ch is not searched.
+      console.log('AAAAA')
       const result4 = await search(db, { term: 'Cris', tolerance: 1 })
+      t.equal(result4.count, 1)
+      console.log(result4.hits)
+
       //should match "Craig" even if prefix "Ca" exists.
       const result5 = await search(db, { term: 'Caig', tolerance: 1 })
-
-      t.equal(result4.count, 1)
       t.equal(result5.count, 1)
     })
 
@@ -736,6 +738,34 @@ t.test('search method', (t) => {
   t.end()
 })
 
+t.only('fix-544', async t => {
+  const db = await create({
+    schema: {
+      name: 'string',
+    } as const,
+    components: {
+      tokenizer: {
+        stemming: true,
+        stopWords: englishStopwords,
+      },
+    },
+  })
+
+  await insert(db, { name: "Christopher" })
+  let result
+
+  result = await search(db, { term: 'Chris', tolerance: 0 })
+  t.equal(result.count, 1)
+
+  result = await search(db, { term: 'Chris', tolerance: 1 })
+  t.equal(result.count, 1)
+  
+  result = await search(db, { term: 'Chris', tolerance: 2 })
+  t.equal(result.count, 1)
+
+  t.end()
+})
+
 async function createSimpleDB() {
   let i = 0
   const db = await create({

From 25d614a8bd4b88287f091985ebf57e46600108a5 Mon Sep 17 00:00:00 2001
From: Tommaso Allevi <tomallevi@gmail.com>
Date: Thu, 14 Dec 2023 11:49:17 +0100
Subject: [PATCH 5/5] Address suggestions

---
 packages/orama/src/components/levenshtein.ts | 12 ++++++----
 packages/orama/tests/levenshtein.test.ts     | 25 +-------------------
 packages/orama/tests/search.test.ts          |  4 +---
 3 files changed, 9 insertions(+), 32 deletions(-)

diff --git a/packages/orama/src/components/levenshtein.ts b/packages/orama/src/components/levenshtein.ts
index 297ec2cae..0ca7c1e12 100644
--- a/packages/orama/src/components/levenshtein.ts
+++ b/packages/orama/src/components/levenshtein.ts
@@ -29,13 +29,13 @@ function _boundedLevenshtein(a: string, b: string, tolerance: number): number {
     startIdx++
   }
 
-  // string A is subfix of B
+  // if string A is subfix of B, we consider the distance 0
+  // because we search for prefix!
+  // fix https://github.com/oramasearch/orama/issues/544
   if (startIdx === lenA) {
     return 0
   }
 
-  // console.log({ startIdx, lenA, lenB, tolerance })
-
   // ignore common suffix
   // note: `~-` decreases by a unit in a bitwise fashion
   while (lenA > 0 && a.charCodeAt(~-lenA) === b.charCodeAt(~-lenB)) {
@@ -51,9 +51,11 @@ function _boundedLevenshtein(a: string, b: string, tolerance: number): number {
   lenA -= startIdx
   lenB -= startIdx
 
-  // early return when the smallest string is empty
+  // If both strings are smaller than the tolerance, we accept any distance
+  // Probably the result distance is wrong, but we don't care:
+  // It is always less then the tolerance!
   if (lenA <= tolerance && lenB <= tolerance) {
-    return Math.max(lenA, lenB)
+    return lenA > lenB ? lenA : lenB
   }
 
   const delta = lenB - lenA
diff --git a/packages/orama/tests/levenshtein.test.ts b/packages/orama/tests/levenshtein.test.ts
index 3d0e7c9dd..a9e2240cc 100644
--- a/packages/orama/tests/levenshtein.test.ts
+++ b/packages/orama/tests/levenshtein.test.ts
@@ -28,7 +28,7 @@ t.test('levenshtein', (t) => {
   })
 })
 
-t.only('boundedLevenshtein', (t) => {
+t.test('boundedLevenshtein', (t) => {
   t.plan(3)
 
   t.test('should be 0 when both inputs are empty', async (t) => {
@@ -47,29 +47,6 @@ t.only('boundedLevenshtein', (t) => {
     t.match(await boundedLevenshtein('body', '', 4), { distance: 0, isBounded: true })
   })
 
-  /*
-  t.only('distance should be the same as levenshtein, when tolerance is high enough', async (t) => {
-    t.plan(5)
-
-    const tol = 15
-
-    t.equal(levenshtein('aa', 'b'), (await boundedLevenshtein('aa', 'b', tol)).distance)
-    t.equal(levenshtein('b', 'aa'), (await boundedLevenshtein('bb', 'a', tol)).distance)
-    t.equal(
-      levenshtein('somebody once', 'told me'),
-      (await boundedLevenshtein('somebody once', 'told me', tol)).distance
-    )
-    t.equal(
-      levenshtein('the world is gonna', 'roll me'),
-      (await boundedLevenshtein('the world is gonna', 'roll me', tol)).distance
-    )
-    t.equal(
-      levenshtein('kaushuk chadhui', 'caushik chakrabar'),
-      (await boundedLevenshtein('kaushuk chadhui', 'caushik chakrabar', tol)).distance
-    )
-  })
-  */
-
   t.test('should tell whether the Levenshtein distance is upperbounded by a given tolerance', async (t) => {
     t.plan(2)
 
diff --git a/packages/orama/tests/search.test.ts b/packages/orama/tests/search.test.ts
index 4148d21b2..a1191c47f 100644
--- a/packages/orama/tests/search.test.ts
+++ b/packages/orama/tests/search.test.ts
@@ -37,10 +37,8 @@ t.test('search method', (t) => {
       await insert(db, { name: 'Crxy' }) //create r node in radix tree.
 
       //issue 480 says following will not match because the prefix "Cr" exists so prefix Ch is not searched.
-      console.log('AAAAA')
       const result4 = await search(db, { term: 'Cris', tolerance: 1 })
       t.equal(result4.count, 1)
-      console.log(result4.hits)
 
       //should match "Craig" even if prefix "Ca" exists.
       const result5 = await search(db, { term: 'Caig', tolerance: 1 })
@@ -738,7 +736,7 @@ t.test('search method', (t) => {
   t.end()
 })
 
-t.only('fix-544', async t => {
+t.test('fix-544', async t => {
   const db = await create({
     schema: {
       name: 'string',