-
Notifications
You must be signed in to change notification settings - Fork 870
/
Copy pathdtypes.js
60 lines (53 loc) · 1.81 KB
/
dtypes.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import { apis } from "../env.js";
import { DEVICE_TYPES } from "./devices.js";
// TODO: Use the adapter from `env.backends.onnx.webgpu.adapter` to check for `shader-f16` support,
// when available in https://github.com/microsoft/onnxruntime/pull/19940.
// For more information, see https://github.com/microsoft/onnxruntime/pull/19857#issuecomment-1999984753
/**
* Checks if WebGPU fp16 support is available in the current environment.
*/
export const isWebGpuFp16Supported = (function () {
/** @type {boolean} */
let cachedResult;
return async function () {
if (cachedResult === undefined) {
if (!apis.IS_WEBGPU_AVAILABLE) {
cachedResult = false;
} else {
try {
const adapter = await navigator.gpu.requestAdapter();
cachedResult = adapter.features.has('shader-f16');
} catch (e) {
cachedResult = false;
}
}
}
return cachedResult;
};
})();
export const DATA_TYPES = Object.freeze({
fp32: 'fp32',
fp16: 'fp16',
q8: 'q8',
int8: 'int8',
uint8: 'uint8',
q4: 'q4',
bnb4: 'bnb4',
q4f16: 'q4f16', // fp16 model with int4 block weight quantization
});
/** @typedef {keyof typeof DATA_TYPES} DataType */
export const DEFAULT_DEVICE_DTYPE_MAPPING = Object.freeze({
// NOTE: If not specified, will default to fp32
[DEVICE_TYPES.wasm]: DATA_TYPES.q8,
});
/** @type {Record<DataType, string>} */
export const DEFAULT_DTYPE_SUFFIX_MAPPING = Object.freeze({
[DATA_TYPES.fp32]: '',
[DATA_TYPES.fp16]: '_fp16',
[DATA_TYPES.int8]: '_int8',
[DATA_TYPES.uint8]: '_uint8',
[DATA_TYPES.q8]: '_quantized',
[DATA_TYPES.q4]: '_q4',
[DATA_TYPES.q4f16]: '_q4f16',
[DATA_TYPES.bnb4]: '_bnb4',
});