Skip to content

Commit e052bc4

Browse files
committed
rewrite: no longer consider backward compitability; plan and make_plan
1 parent 7d2e391 commit e052bc4

File tree

8 files changed

+408
-169
lines changed

8 files changed

+408
-169
lines changed

Diff for: examples/baby-llama/baby-llama.cpp

+36-5
Original file line numberDiff line numberDiff line change
@@ -1586,7 +1586,6 @@ int main(int argc, char ** argv) {
15861586
int n_past = 0;
15871587

15881588
ggml_cgraph gf = {};
1589-
gf.n_threads = 1;
15901589

15911590
get_example_targets_batch(ctx0, 64*ex+0, tokens_input, targets);
15921591

@@ -1595,7 +1594,18 @@ int main(int argc, char ** argv) {
15951594
struct ggml_tensor * e = square_error_loss(ctx0, targets, logits);
15961595

15971596
ggml_build_forward_expand(&gf, e);
1598-
ggml_graph_compute(ctx0, &gf);
1597+
1598+
{
1599+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
1600+
if (plan.work_size > 0) {
1601+
plan.work_data = malloc(plan.work_size);
1602+
GGML_ASSERT(plan.work_data);
1603+
}
1604+
ggml_graph_compute(&plan, &gf);
1605+
if (plan.work_data) {
1606+
free(plan.work_data);
1607+
}
1608+
}
15991609

16001610
float error_before_opt = ggml_get_f32_1d(e, 0);
16011611

@@ -1611,7 +1621,18 @@ int main(int argc, char ** argv) {
16111621
ggml_opt(ctx0, opt_params_lbfgs, e);
16121622
//
16131623
ggml_build_forward_expand(&gf, e);
1614-
ggml_graph_compute(ctx0, &gf);
1624+
1625+
{
1626+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
1627+
if (plan.work_size > 0) {
1628+
plan.work_data = malloc(plan.work_size);
1629+
GGML_ASSERT(plan.work_data);
1630+
}
1631+
ggml_graph_compute(&plan, &gf);
1632+
if (plan.work_data) {
1633+
free(plan.work_data);
1634+
}
1635+
}
16151636

16161637
float error_after_opt = ggml_get_f32_1d(e, 0);
16171638

@@ -1659,13 +1680,23 @@ int main(int argc, char ** argv) {
16591680
struct ggml_context * ctx0 = ggml_init(params);
16601681

16611682
ggml_cgraph gf = {};
1662-
gf.n_threads = 1;
16631683

16641684
int n_past = 0;
16651685
struct ggml_tensor * logits = forward(&model, &kv_self, ctx0, &gf, tokens_input, sample_ctx, n_past);
16661686

16671687
ggml_build_forward_expand(&gf, logits);
1668-
ggml_graph_compute(ctx0, &gf);
1688+
1689+
{
1690+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
1691+
if (plan.work_size > 0) {
1692+
plan.work_data = malloc(plan.work_size);
1693+
GGML_ASSERT(plan.work_data);
1694+
}
1695+
ggml_graph_compute(&plan, &gf);
1696+
if (plan.work_data) {
1697+
free(plan.work_data);
1698+
}
1699+
}
16691700

16701701
struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
16711702
struct ggml_tensor * probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);

Diff for: examples/benchmark/benchmark-matmult.cpp

+37-9
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,22 @@ int main(int argc, char ** argv) {
159159
// printf("Creating compute graph\n");
160160
struct ggml_cgraph gf = ggml_build_forward(m11xm2);
161161

162-
gf.n_threads=benchmark_params.n_threads;
163-
printf("cgraph->n_threads=%i\n",gf.n_threads);
162+
printf("n_threads=%i\n", benchmark_params.n_threads);
164163

165164
TENSOR_DUMP(m11);
166165
TENSOR_DUMP(m2);
167166

168-
ggml_graph_compute(ctx, &gf);
167+
{
168+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, benchmark_params.n_threads);
169+
if (plan.work_size > 0) {
170+
plan.work_data = malloc(plan.work_size);
171+
GGML_ASSERT(plan.work_data);
172+
}
173+
ggml_graph_compute(&plan, &gf);
174+
if (plan.work_data) {
175+
free(plan.work_data);
176+
}
177+
}
169178

170179
TENSOR_DUMP(gf.nodes[0]);
171180

@@ -187,7 +196,6 @@ int main(int argc, char ** argv) {
187196

188197
// printf("Creating compute graph\n");
189198
struct ggml_cgraph gf31 = ggml_build_forward(q31);
190-
gf31.n_threads=benchmark_params.n_threads;
191199

192200
// Set up a second graph computation to make sure we override the CPU cache lines
193201
// printf("Creating new tensor q12 & Running quantize\n");
@@ -199,8 +207,7 @@ int main(int argc, char ** argv) {
199207

200208
//printf("Creating compute graph\n");
201209
struct ggml_cgraph gf32 = ggml_build_forward(q32);
202-
gf32.n_threads=benchmark_params.n_threads;
203-
printf("cgraph->n_threads=%i\n",gf31.n_threads);
210+
printf("n_threads=%i\n", benchmark_params.n_threads);
204211

205212
const int dimx = sizex;
206213
const int dimy = sizey;
@@ -221,14 +228,25 @@ int main(int argc, char ** argv) {
221228

222229
long long int start = ggml_time_us();
223230
//printf("Running ggml_graph_compute\n");
224-
ggml_graph_compute(ctx, &gf31);
231+
{
232+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf31, benchmark_params.n_threads);
233+
if (plan.work_size > 0) {
234+
plan.work_data = malloc(plan.work_size);
235+
GGML_ASSERT(plan.work_data);
236+
}
237+
ggml_graph_compute(&plan, &gf31);
238+
if (plan.work_data) {
239+
free(plan.work_data);
240+
}
241+
}
242+
225243
long long int stop = ggml_time_us();
226244
long long int usec = stop-start;
227245
double gflops = (double)(flops_per_matrix)/usec/1000.0;
228246
gflops_sum += gflops;
229247
printf("%9i;%8i;%6i;%6i;%6i;%15lli;%18lli;%10.2f\n",
230248
i,
231-
gf31.n_threads,
249+
benchmark_params.n_threads,
232250
sizex, sizey, sizez, flops_per_matrix,
233251
usec,gflops);
234252

@@ -253,7 +271,17 @@ int main(int argc, char ** argv) {
253271
}
254272

255273
// Running a different graph computation to make sure we override the CPU cache lines
256-
ggml_graph_compute(ctx, &gf32);
274+
{
275+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf32, benchmark_params.n_threads);
276+
if (plan.work_size > 0) {
277+
plan.work_data = malloc(plan.work_size);
278+
GGML_ASSERT(plan.work_data);
279+
}
280+
ggml_graph_compute(&plan, &gf32);
281+
if (plan.work_data) {
282+
free(plan.work_data);
283+
}
284+
}
257285
}
258286
printf("\n");
259287
printf("Average%78.2f\n",gflops_sum/((double)benchmark_params.n_iterations));

Diff for: examples/train-text-from-scratch/train-text-from-scratch.cpp

+34-7
Original file line numberDiff line numberDiff line change
@@ -3215,9 +3215,6 @@ int main(int argc, char ** argv) {
32153215
struct ggml_cgraph * gf = (struct ggml_cgraph *) gfbuf->data;
32163216
struct ggml_cgraph * gb = (struct ggml_cgraph *) gbbuf->data;
32173217

3218-
// ggml_cgraph gf = {};
3219-
gf->n_threads = params.n_threads;
3220-
gb->n_threads = params.n_threads;
32213218

32223219
get_example_targets_batch(lctx, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), ex, tokens_input, target_logits, target_probs);
32233220

@@ -3246,7 +3243,17 @@ int main(int argc, char ** argv) {
32463243
*gb = ggml_build_backward(ctx0, gf, true);
32473244
}
32483245

3249-
ggml_graph_compute(ctx0, gf);
3246+
{
3247+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(gf, params.n_threads);
3248+
if (plan.work_size > 0) {
3249+
plan.work_data = malloc(plan.work_size);
3250+
GGML_ASSERT(plan.work_data);
3251+
}
3252+
ggml_graph_compute(&plan, gf);
3253+
if (plan.work_data) {
3254+
free(plan.work_data);
3255+
}
3256+
}
32503257

32513258
size_t used_mem_before_opt = ggml_used_mem(ctx0);
32523259

@@ -3270,7 +3277,17 @@ int main(int argc, char ** argv) {
32703277
model.train_samples += n_batch;
32713278
model.train_tokens += n_batch * n_tokens;
32723279

3273-
ggml_graph_compute(ctx0, gf);
3280+
{
3281+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(gf, params.n_threads);
3282+
if (plan.work_size > 0) {
3283+
plan.work_data = malloc(plan.work_size);
3284+
GGML_ASSERT(plan.work_data);
3285+
}
3286+
ggml_graph_compute(&plan, gf);
3287+
if (plan.work_data) {
3288+
free(plan.work_data);
3289+
}
3290+
}
32743291

32753292
float error_after_opt = ggml_get_f32_1d(loss, 0);
32763293

@@ -3352,13 +3369,23 @@ int main(int argc, char ** argv) {
33523369
struct ggml_context * ctx0 = ggml_init(cparams);
33533370

33543371
ggml_cgraph gf = {};
3355-
gf.n_threads = params.n_threads;
33563372

33573373
int n_past = 0;
33583374
struct ggml_tensor * logits = forward(&model, &kv_self, ctx0, &gf, tokens_input, sample_ctx, n_past);
33593375

33603376
ggml_build_forward_expand(&gf, logits);
3361-
ggml_graph_compute(ctx0, &gf);
3377+
3378+
{
3379+
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, params.n_threads);
3380+
if (plan.work_size > 0) {
3381+
plan.work_data = malloc(plan.work_size);
3382+
GGML_ASSERT(plan.work_data);
3383+
}
3384+
ggml_graph_compute(&plan, &gf);
3385+
if (plan.work_data) {
3386+
free(plan.work_data);
3387+
}
3388+
}
33623389

33633390
//struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
33643391
//struct ggml_tensor * probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);

0 commit comments

Comments
 (0)