Skip to content

Commit

Permalink
feat: support custom @xenova/transformers (run-llama#1232)
Browse files Browse the repository at this point in the history
  • Loading branch information
himself65 authored Sep 19, 2024
1 parent fa01fa2 commit b48bcc3
Show file tree
Hide file tree
Showing 23 changed files with 317 additions and 190 deletions.
9 changes: 9 additions & 0 deletions .changeset/beige-coats-enjoy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
"@llamaindex/core": patch
"@llamaindex/env": patch
"llamaindex": patch
---

feat: add `load-transformers` event type when loading `@xenova/transformers` module

This would benefit user who want to customize the transformer env.
17 changes: 15 additions & 2 deletions packages/core/src/global/settings/callback-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,29 @@ export class CallbackManager {
dispatchEvent<K extends keyof LlamaIndexEventMaps>(
event: K,
detail: LlamaIndexEventMaps[K],
sync = false,
) {
const cbs = this.#handlers.get(event);
if (!cbs) {
return;
}
queueMicrotask(() => {
if (typeof queueMicrotask === "undefined") {
console.warn(
"queueMicrotask is not available, dispatching synchronously",
);
sync = true;
}
if (sync) {
cbs.forEach((handler) =>
handler(LlamaIndexCustomEvent.fromEvent(event, { ...detail })),
);
});
} else {
queueMicrotask(() => {
cbs.forEach((handler) =>
handler(LlamaIndexCustomEvent.fromEvent(event, { ...detail })),
);
});
}
}
}

Expand Down
13 changes: 12 additions & 1 deletion packages/env/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,18 @@
"@aws-crypto/sha256-js": "^5.2.0",
"@swc/cli": "^0.4.0",
"@swc/core": "^1.7.22",
"@xenova/transformers": "^2.17.2",
"concurrently": "^8.2.2",
"pathe": "^1.1.2",
"tiktoken": "^1.0.16",
"vitest": "^2.0.5"
},
"dependencies": {
"@types/lodash": "^4.17.7",
"@types/node": "^22.5.1"
},
"peerDependencies": {
"@aws-crypto/sha256-js": "^5.2.0",
"@xenova/transformers": "^2.17.2",
"js-tiktoken": "^1.0.12",
"pathe": "^1.1.2",
"tiktoken": "^1.0.15"
Expand All @@ -92,8 +94,17 @@
"@aws-crypto/sha256-js": {
"optional": true
},
"@xenova/transformers": {
"optional": true
},
"pathe": {
"optional": true
},
"tiktoken": {
"optional": true
},
"js-tiktoken": {
"optional": true
}
}
}
6 changes: 6 additions & 0 deletions packages/env/src/index.browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
import "./global-check.js";
export * from "./web-polyfill.js";

export {
loadTransformers,
setTransformers,
type LoadTransformerEvent,
type OnLoad,
} from "./multi-model/index.browser.js";
export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";

// @ts-expect-error
Expand Down
6 changes: 6 additions & 0 deletions packages/env/src/index.edge-light.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@
import "./global-check.js";
export * from "./node-polyfill.js";

export {
loadTransformers,
setTransformers,
type LoadTransformerEvent,
type OnLoad,
} from "./multi-model/index.non-nodejs.js";
export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
6 changes: 6 additions & 0 deletions packages/env/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ export function createSHA256(): SHA256 {
};
}

export {
loadTransformers,
setTransformers,
type LoadTransformerEvent,
type OnLoad,
} from "./multi-model/index.js";
export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/node.js";
export {
AsyncLocalStorage,
Expand Down
6 changes: 6 additions & 0 deletions packages/env/src/index.workerd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,10 @@ export function getEnv(name: string): string | undefined {
return INTERNAL_ENV[name];
}

export {
loadTransformers,
setTransformers,
type LoadTransformerEvent,
type OnLoad,
} from "./multi-model/index.non-nodejs.js";
export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
20 changes: 20 additions & 0 deletions packages/env/src/multi-model/index.browser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { getTransformers, setTransformers, type OnLoad } from "./shared.js";

export {
setTransformers,
type LoadTransformerEvent,
type OnLoad,
} from "./shared.js";
export async function loadTransformers(onLoad: OnLoad) {
if (getTransformers() === null) {
setTransformers(
// @ts-expect-error
await import("https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2"),
);
} else {
return getTransformers()!;
}
const transformer = getTransformers()!;
onLoad(transformer);
return transformer;
}
35 changes: 35 additions & 0 deletions packages/env/src/multi-model/index.non-nodejs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { getTransformers, setTransformers, type OnLoad } from "./shared.js";
export {
setTransformers,
type LoadTransformerEvent,
type OnLoad,
} from "./shared.js";

export async function loadTransformers(onLoad: OnLoad) {
if (getTransformers() === null) {
/**
* If you see this warning, it means that the current environment does not support the transformer.
* because "@xeonva/transformers" highly depends on Node.js APIs.
*
* One possible solution is to fix their implementation to make it work in the non-Node.js environment,
* but it's not worth the effort because Edge Runtime and Cloudflare Workers are not the for heavy Machine Learning task.
*
* Or you can provide an RPC server that runs the transformer in a Node.js environment.
* Or you just run the code in a Node.js environment.
*
* Refs: https://github.com/xenova/transformers.js/issues/309
*/
console.warn(
'"@xenova/transformers" is not officially supported in this environment, some features may not work as expected.',
);
setTransformers(
// @ts-expect-error
await import("@xenova/transformers/dist/transformers"),
);
} else {
return getTransformers()!;
}
const transformer = getTransformers()!;
onLoad(transformer);
return transformer;
}
20 changes: 20 additions & 0 deletions packages/env/src/multi-model/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { getTransformers, setTransformers, type OnLoad } from "./shared.js";

export {
setTransformers,
type LoadTransformerEvent,
type OnLoad,
} from "./shared.js";

export async function loadTransformers(onLoad: OnLoad) {
if (getTransformers() === null) {
setTransformers(await import("@xenova/transformers"));
} else {
return getTransformers()!;
}
const transformer = getTransformers()!;

onLoad(transformer);

return transformer;
}
17 changes: 17 additions & 0 deletions packages/env/src/multi-model/shared.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
let transformer: typeof import("@xenova/transformers") | null = null;

export function getTransformers() {
return transformer;
}

export function setTransformers(t: typeof import("@xenova/transformers")) {
transformer = t;
}

export type OnLoad = (
transformer: typeof import("@xenova/transformers"),
) => void;

export type LoadTransformerEvent = {
transformer: typeof import("@xenova/transformers");
};
Original file line number Diff line number Diff line change
@@ -1,107 +0,0 @@
:root {
--max-width: 1100px;
--border-radius: 12px;
--font-mono: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono",
"Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro",
"Fira Mono", "Droid Sans Mono", "Courier New", monospace;

--foreground-rgb: 0, 0, 0;
--background-start-rgb: 214, 219, 220;
--background-end-rgb: 255, 255, 255;

--primary-glow: conic-gradient(
from 180deg at 50% 50%,
#16abff33 0deg,
#0885ff33 55deg,
#54d6ff33 120deg,
#0071ff33 160deg,
transparent 360deg
);
--secondary-glow: radial-gradient(
rgba(255, 255, 255, 1),
rgba(255, 255, 255, 0)
);

--tile-start-rgb: 239, 245, 249;
--tile-end-rgb: 228, 232, 233;
--tile-border: conic-gradient(
#00000080,
#00000040,
#00000030,
#00000020,
#00000010,
#00000010,
#00000080
);

--callout-rgb: 238, 240, 241;
--callout-border-rgb: 172, 175, 176;
--card-rgb: 180, 185, 188;
--card-border-rgb: 131, 134, 135;
}

@media (prefers-color-scheme: dark) {
:root {
--foreground-rgb: 255, 255, 255;
--background-start-rgb: 0, 0, 0;
--background-end-rgb: 0, 0, 0;

--primary-glow: radial-gradient(rgba(1, 65, 255, 0.4), rgba(1, 65, 255, 0));
--secondary-glow: linear-gradient(
to bottom right,
rgba(1, 65, 255, 0),
rgba(1, 65, 255, 0),
rgba(1, 65, 255, 0.3)
);

--tile-start-rgb: 2, 13, 46;
--tile-end-rgb: 2, 5, 19;
--tile-border: conic-gradient(
#ffffff80,
#ffffff40,
#ffffff30,
#ffffff20,
#ffffff10,
#ffffff10,
#ffffff80
);

--callout-rgb: 20, 20, 20;
--callout-border-rgb: 108, 108, 108;
--card-rgb: 100, 100, 100;
--card-border-rgb: 200, 200, 200;
}
}

* {
box-sizing: border-box;
padding: 0;
margin: 0;
}

html,
body {
max-width: 100vw;
overflow-x: hidden;
}

body {
color: rgb(var(--foreground-rgb));
background: linear-gradient(
to bottom,
transparent,
rgb(var(--background-end-rgb))
)
rgb(var(--background-start-rgb));
}

a {
color: inherit;
text-decoration: none;
}

@media (prefers-color-scheme: dark) {
html {
color-scheme: dark;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// test runtime
import "llamaindex";
import { ClipEmbedding } from "llamaindex/embeddings/ClipEmbedding";
import { ClipEmbedding } from "llamaindex";
import "llamaindex/readers/SimpleDirectoryReader";

// @ts-expect-error
Expand Down
Loading

0 comments on commit b48bcc3

Please sign in to comment.