forked from 0hq/WebGPT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers.js
171 lines (145 loc) · 4.71 KB
/
helpers.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
// ---------------- WebGPU Helper Functions ----------------
function createShader(device, code) {
return device.createShaderModule({
code,
});
}
function createBindGroupLayout(device, string_entries) {
const entries = string_entries.map((entry, i) => ({
binding: i,
visibility: GPUShaderStage.COMPUTE,
buffer: { type: entry },
}));
return device.createBindGroupLayout({
entries,
});
}
function createPipelineLayout(device, bindGroupLayouts) {
return device.createPipelineLayout({
bindGroupLayouts,
});
}
function createComputePipeline(device, shaderModule, pipelineLayout) {
return device.createComputePipeline({
layout: pipelineLayout,
compute: {
module: shaderModule,
entryPoint: "main",
},
});
}
function createPipeline(device, shaderString, bindGroupLayouts) {
const shaderModule = createShader(device, shaderString);
const pipelineLayout = createPipelineLayout(device, bindGroupLayouts);
const pipeline = createComputePipeline(device, shaderModule, pipelineLayout);
return pipeline;
}
function createBindGroup(device, bindGroupLayout, buffers) {
const entries = buffers.map((buffer, i) => ({
binding: i,
resource: {
buffer,
},
}));
return device.createBindGroup({
layout: bindGroupLayout,
entries,
});
}
function createBuffer(device, size, usage) {
return device.createBuffer({
size: size,
usage: usage,
});
}
function createOutputBuffer(device, commandEncoder, buffer, rows, cols) {
const outputBufferSize = bufferSizeCalc(rows, cols);
const outputBuffer = createBuffer(device, outputBufferSize, GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ);
commandEncoder.copyBufferToBuffer(buffer, 0, outputBuffer, 0, outputBufferSize);
return outputBuffer;
}
// ---------------- Other Helper Functions ----------------
function alignedSize(size, alignment) {
return Math.ceil(size / alignment) * alignment;
}
const workgroupCalc = (dim, size) => Math.min(Math.ceil(dim / size), 256);
let bufferSizeCalc = (dimA, dimB = 1) => {
throw new Error("BufferSizeCalc not initialized.");
};
function sampleFromDistribution(probs) {
const rand = Math.random();
let cumulativeProb = 0;
for (let i = 0; i < probs.length; i++) {
cumulativeProb += probs[i];
if (rand < cumulativeProb) {
return i;
}
}
return probs.length - 1;
}
function cpuSoftmax(logits, temperature = 1.0) {
const maxLogit = Math.max(...logits);
const expLogits = logits.map((logit) => Math.exp((logit - maxLogit) / temperature));
const sumExpLogits = expLogits.reduce((a, b) => a + b, 0);
return expLogits.map((expLogit) => expLogit / sumExpLogits);
}
function selectTopK(probs, top_k) {
const sortedIndices = Array.from(probs)
.map((value, index) => ({ value, index }))
.sort((a, b) => b.value - a.value)
.map(({ index }) => index);
const topKIndices = sortedIndices.slice(0, top_k);
const topKProbs = topKIndices.map((index) => probs[index]);
return { topKIndices, topKProbs };
}
// ----------------------- Matrix Operations -----------------------
function transposeArray(array, input_rows, input_cols) {
if (array.length !== input_rows * input_cols) {
console.error("Transpose dims failed, not transposing!");
// return array;
throw new Error("Transpose dims failed");
}
const transpose = [];
for (let col = 0; col < input_cols; col++) {
for (let row = 0; row < input_rows; row++) {
transpose.push(array[row * input_cols + col]);
}
}
return new Float32Array(transpose);
}
function deEmbedCPU(embeddings, embeddingWeights, seq_length, n_embd, vocab_size) {
// console.warn("I'm sorry for cheating... De-embedding output with CPU.");
const predictionEmbeddings = new Float32Array(embeddings).slice((seq_length - 1) * n_embd);
const logits = [];
for (let i = 0; i < vocab_size; i++) {
let dotProduct = 0;
for (let j = 0; j < n_embd; j++) {
dotProduct += embeddingWeights[i * n_embd + j] * predictionEmbeddings[j];
}
logits.push(dotProduct);
}
return logits;
}
function flattenEmbeddings(embeddings, n_embd, seq_length) {
const flattened = new Float32Array(n_embd * seq_length);
for (const [i, v] of embeddings.entries()) flattened.set(v, n_embd * i);
return flattened;
}
function leastPrimeFactor(n, start = 2) {
for (let i = start; i <= Math.sqrt(n); i++) {
if (n % i === 0) return i;
}
return n;
}
function formatAsMatrix(floatArray, dimA, dimB) {
const resultMatrix = [];
for (let i = 0; i < dimA; i++) {
resultMatrix.push(floatArray.slice(i * dimB, (i + 1) * dimB));
}
return resultMatrix;
}
async function loadBinaryFile(url) {
const response = await fetch(url);
const buffer = await response.arrayBuffer();
return new Float32Array(buffer);
}