Skip to content

Commit

Permalink
feat(insert): adds custom ID capabilities (#223)
Browse files Browse the repository at this point in the history
* feat(insert): adds custom ID capabilities

* feat(insert): adds custom IDs to batchInsert function
  • Loading branch information
micheleriva authored Jan 2, 2023
1 parent b8ecb11 commit bf82b60
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 17 deletions.
8 changes: 8 additions & 0 deletions src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,11 @@ export function INVALID_STEMMER_FUNCTION_TYPE(): string {
export function INVALID_TOKENIZER_FUNCTION(): string {
return `tokenizer.tokenizerFn must be a function.`;
}

export function TYPE_ERROR_ID_MUST_BE_STRING(type: string): string {
return `"id" must be of type "string". Got "${type}" instead.`;
}

export function ID_ALREADY_EXISTS(id: string): string {
return `Document with ID "${id}" already exists.`;
}
1 change: 0 additions & 1 deletion src/methods/create.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import type { PropertiesSchema, Configuration, Lyra } from "../types";
import type { Language } from "../tokenizer/languages";
import type { Node } from "../radix-tree/node";
import { defaultTokenizerConfig } from "../tokenizer";
import { intersectTokenScores } from "../utils";
import { assertSupportedLanguage } from "./common";
Expand Down
59 changes: 46 additions & 13 deletions src/methods/insert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@ import type { Lyra, PropertiesSchema } from "../types";
import type { ResolveSchema } from "../types";
import type { Language } from "../tokenizer/languages";
import type { TokenizerConfigExec } from "../tokenizer";
import type { Node } from "../radix-tree/node";
import { assertSupportedLanguage, assertDocSchema } from "./common";
import { trackInsertion } from "../insertion-checker";
import { hookRunner } from "./hooks";
import { uniqueId } from "../utils";
import { insert as radixInsert } from "../radix-tree/radix";
import * as ERRORS from "../errors";

export type InsertConfig = {
language: Language;
export type InsertConfig<S extends PropertiesSchema> = {
language?: Language;
id?: (doc: ResolveSchema<S>) => string | Promise<string>;
};

export type InsertBatchConfig = InsertConfig & {
export type InsertBatchConfig<S extends PropertiesSchema> = InsertConfig<S> & {
batchSize?: number;
};

Expand All @@ -32,12 +33,16 @@ export type InsertBatchConfig = InsertConfig & {
export async function insert<S extends PropertiesSchema>(
lyra: Lyra<S>,
doc: ResolveSchema<S>,
config?: InsertConfig,
config?: InsertConfig<S>,
): Promise<{ id: string }> {
config = { language: lyra.defaultLanguage, ...config };
const id = uniqueId();

assertSupportedLanguage(config.language);
const id = await getDocumentID(doc, config);

// If the ID already exists, we throw an error.
if (lyra.docs[id]) throw new Error(ERRORS.ID_ALREADY_EXISTS(id));

assertSupportedLanguage(config.language!);

assertDocSchema(doc, lyra.schema);

Expand All @@ -63,12 +68,12 @@ export async function insert<S extends PropertiesSchema>(
export async function insertWithHooks<S extends PropertiesSchema>(
lyra: Lyra<S>,
doc: ResolveSchema<S>,
config?: InsertConfig,
config?: InsertConfig<S>,
): Promise<{ id: string }> {
config = { language: lyra.defaultLanguage, ...config };
const id = uniqueId();
const id = await getDocumentID(doc, config);

assertSupportedLanguage(config.language);
assertSupportedLanguage(config.language!);

assertDocSchema(doc, lyra.schema);

Expand Down Expand Up @@ -103,7 +108,7 @@ export async function insertWithHooks<S extends PropertiesSchema>(
export async function insertBatch<S extends PropertiesSchema>(
lyra: Lyra<S>,
docs: ResolveSchema<S>[],
config?: InsertBatchConfig,
config?: InsertBatchConfig<S>,
): Promise<void> {
const batchSize = config?.batchSize ?? 1000;

Expand Down Expand Up @@ -136,11 +141,12 @@ function recursiveradixInsertion<S extends PropertiesSchema>(
lyra: Lyra<S>,
doc: ResolveSchema<S>,
id: string,
config: InsertConfig,
config: InsertConfig<S>,
prefix = "",
tokenizerConfig: TokenizerConfigExec,
schema: PropertiesSchema = lyra.schema,
) {
config = { language: lyra.defaultLanguage, ...config };
const { index, frequencies, tokenOccurrencies } = lyra;

for (const key of Object.keys(doc)) {
Expand All @@ -163,7 +169,7 @@ function recursiveradixInsertion<S extends PropertiesSchema>(
// Use propName here because if doc is a nested object
// We will get the wrong index
const requestedTrie = index[propName];
const tokens = tokenizerConfig.tokenizerFn(doc[key] as string, config.language, false, tokenizerConfig);
const tokens = tokenizerConfig.tokenizerFn(doc[key] as string, config.language!, false, tokenizerConfig);

if (!(propName in frequencies)) {
frequencies[propName] = {};
Expand Down Expand Up @@ -202,3 +208,30 @@ function recursiveradixInsertion<S extends PropertiesSchema>(
}
}
}

async function getDocumentID<S extends PropertiesSchema>(
doc: ResolveSchema<S>,
config: InsertConfig<S>,
): Promise<string> {
let id: string;

// If the user passes a custom ID function, we use it to generate the ID.
// This has the maximum priority.
if (config?.id) {
id = await config.id(doc);

// If the user passes an ID in the document, we use it.
} else if (doc.id && typeof doc.id === "string") {
id = doc.id;

// If the user passes an ID in the document, but it's not a string, we throw a type error.
} else if (doc.id && typeof doc.id !== "string") {
throw new TypeError(ERRORS.TYPE_ERROR_ID_MUST_BE_STRING(typeof doc.id));

// If the user doesn't pass an ID, we generate one.
} else {
id = uniqueId();
}

return id;
}
1 change: 0 additions & 1 deletion src/methods/remove.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import type { ResolveSchema } from "../types";
import type { PropertiesSchema, Lyra } from "../types";
import type { Node } from "../radix-tree/node";
import { removeDocumentByWord } from "../radix-tree/radix";
import { defaultTokenizerConfig } from "../tokenizer";
import * as ERRORS from "../errors";
Expand Down
1 change: 0 additions & 1 deletion src/methods/search.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import type { Lyra, PropertiesSchema } from "../types";
import type { SearchProperties, ResolveSchema } from "../types";
import type { Language } from "../tokenizer/languages";
import type { Node } from "../radix-tree/node";
import { defaultTokenizerConfig } from "../tokenizer";
import { getIndices } from "./common";
import { find as radixFind } from "../radix-tree/radix";
Expand Down
14 changes: 14 additions & 0 deletions tap-snapshots/tests/insert.test.ts.test.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* IMPORTANT
* This snapshot file is auto-generated, but designed for humans.
* It should be checked into source control and tracked carefully.
* Re-generate by setting TAP_SNAPSHOT=1 and running tests.
* Make sure to inspect the output below. Do not ignore changes!
*/
'use strict'
exports[`tests/insert.test.ts TAP insert should throw an error if the 'id' field is already taken > must match snapshot 1`] = `
Error: Document with ID "john-01" already exists.
`

exports[`tests/insert.test.ts TAP insert should throw an error if the 'id' field is not a string > must match snapshot 1`] = `
TypeError: "id" must be of type "string". Got "number" instead.
`
155 changes: 155 additions & 0 deletions tests/insert.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import t from "tap";
import { insert, insertBatch } from "../src/methods/insert";
import { create } from "../src/methods/create";

t.test("insert", async t => {
t.plan(6);

t.test("should use the 'id' field found in the document", async t => {
t.plan(2);

const db = await create({
schema: {
id: "string",
name: "string",
},
});

const i1 = await insert(db, {
id: "john-01",
name: "John",
});

const i2 = await insert(db, {
id: "doe-02",
name: "Doe",
});

t.equal(i1.id, "john-01");
t.equal(i2.id, "doe-02");
});

t.test("should use the custom 'id' function passed in the configuration object", async t => {
t.plan(2);

const db = await create({
schema: {
id: "string",
name: "string",
},
});

const i1 = await insert(
db,
{
id: "john-01",
name: "John",
},
{
id: doc => `${doc.name.toLowerCase()}-foo-bar-baz`,
},
);

const i2 = await insert(db, {
id: "doe-02",
name: "Doe",
});

t.equal(i1.id, "john-foo-bar-baz");
t.equal(i2.id, "doe-02");
});

t.test("should throw an error if the 'id' field is not a string", async t => {
t.plan(1);

const db = await create({
schema: {
id: "string",
name: "string",
},
});

try {
await insert(db, {
// @ts-expect-error error case
id: 123,
name: "John",
});
} catch (error) {
t.matchSnapshot(error);
}
});

t.test("should throw an error if the 'id' field is already taken", async t => {
t.plan(1);

const db = await create({
schema: {
id: "string",
name: "string",
},
});

await insert(db, {
id: "john-01",
name: "John",
});

try {
await insert(db, {
id: "john-01",
name: "John",
});
} catch (error) {
t.matchSnapshot(error);
}
});

t.test("should take the ID field even if not specified in the schema", async t => {
t.plan(1);

const db = await create({
schema: {
name: "string",
},
});

const i1 = await insert(db, {
// @ts-expect-error error case
id: "john-01",
name: "John",
});

t.equal(i1.id, "john-01");
});

t.test("custom ID should work with insertBatch as well", async t => {
t.plan(1);

const db = await create({
schema: {
id: "string",
name: "string",
},
});

await insertBatch(
db,
[
{
id: "01",
name: "John",
},
{
id: "02",
name: "Doe",
},
],
{
id: doc => `${doc.name.toLowerCase()}-${doc.id}`,
},
);

t.same(Object.keys(db.docs), ["john-01", "doe-02"]);
});
});
1 change: 0 additions & 1 deletion tests/lyra.edge.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import t from "tap";
import type { Node } from "../src/radix-tree/node";
import { create, insert, save, load, search } from "../src/lyra";
import { contains as trieContains } from "../src/radix-tree/radix";

Expand Down

0 comments on commit bf82b60

Please sign in to comment.