From 93bab1d2bcd27b67ef0b99cc5cf9732ff0a1d616 Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Wed, 5 Jun 2024 10:38:09 +0100 Subject: [PATCH 1/3] Dynamically allocate space in Horner_tree if WorkSpace is too small. There is no need to use the WorkSpace specifically for the sorting. If it is too small, just allocate a new buffer and free it after. --- sources/optimize.cc | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/sources/optimize.cc b/sources/optimize.cc index 47c9a61e..ca697de6 100644 --- a/sources/optimize.cc +++ b/sources/optimize.cc @@ -870,20 +870,22 @@ vector Horner_tree (const WORD *expr, const vector &order) { } // sort variables in individual terms using bubble sort - WORD *sorted = AT.WorkPointer; + WORD* sorted; + WORD* dynamicAlloc = 0; LONG sumsize = 0; for (const WORD *t=expr; *t!=0; t+=*t) { sumsize += *t; } - if ( sorted + sumsize > AT.WorkTop ) { - MLOCK(ErrorMessageLock); - MesPrint("=== Workspace overflow. %l bytes is not enough.",AM.WorkSize); - MesPrint("=== Change parameter WorkSpace in %s",setupfilename); - sumsize = (AT.WorkPointer-AT.WorkSpace+sumsize)*sizeof(WORD); - MesPrint("=== At least %l bytes are needed.",sumsize); - MUNLOCK(ErrorMessageLock); - Terminate(-1); + + // We actually need sumsize + 1 WORDS available, due to the "*sorted = 0;" + // at the end of the sort loop. + if ( AT.WorkPointer + sumsize + 1 > AT.WorkTop ) { + dynamicAlloc = (WORD*)Malloc1(sizeof(*dynamicAlloc)*(sumsize+1), "Horner_tree alloc"); + sorted = dynamicAlloc; + } + else { + sorted = AT.WorkPointer; } for (const WORD *t=expr; *t!=0; t+=*t) { @@ -919,7 +921,12 @@ vector Horner_tree (const WORD *expr, const vector &order) { } *sorted = 0; - sorted = AT.WorkPointer; + if ( dynamicAlloc ) { + sorted = dynamicAlloc; + } + else { + sorted = AT.WorkPointer; + } // find pointers to all terms and sort them efficiently vector terms; @@ -957,6 +964,10 @@ vector Horner_tree (const WORD *expr, const vector &order) { } res.resize(j); + if ( dynamicAlloc ) { + M_free(dynamicAlloc, "Horner_tree alloc"); + } + #ifdef DEBUG MesPrint ("*** [%s, w=%w] DONE: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]); #endif From 5c6f862fb68090f42793fbe9e5f0aa3d126e656b Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Wed, 5 Jun 2024 13:47:54 +0100 Subject: [PATCH 2/3] Re-use the remaining WorkSpace for each term through Generator Generator consumes WorkSpace, so here we run out very easily if we don't reset the position that we use, each iteration. --- sources/optimize.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sources/optimize.cc b/sources/optimize.cc index ca697de6..29cb9444 100644 --- a/sources/optimize.cc +++ b/sources/optimize.cc @@ -4453,15 +4453,19 @@ WORD generate_expression (WORD exprnr) { // scan for the original expression (marked by *t<0) and give the // terms to Generator WORD *t = AO.OptimizeResult.code; - { + { WORD old = AR.Cnumlhs; AR.Cnumlhs = 0; + // We can use the remaining part of the WorkSpace for every term that + // goes through Generator. We have WorkSpace problems here if we use + // the (modified) AT.WorkPointer every time in the loop. + WORD* currentWorkPointer = AT.WorkPointer; while (*t!=0) { bool is_expr = *t < 0; t++; while (*t!=0) { if (is_expr) { - memcpy(AT.WorkPointer, t, *t*sizeof(WORD)); - Generator(BHEAD AT.WorkPointer, C->numlhs); + memcpy(currentWorkPointer, t, *t*sizeof(WORD)); + Generator(BHEAD currentWorkPointer, C->numlhs); } t+=*t; } From b4925e24e93d5753974a192e2ea2868ca3a8f8e1 Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Tue, 12 Nov 2024 09:59:45 +0000 Subject: [PATCH 3/3] Add a workspace test for optimization --- check/fixes.frm | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/check/fixes.frm b/check/fixes.frm index edf91eb6..ef4112c1 100644 --- a/check/fixes.frm +++ b/check/fixes.frm @@ -2943,3 +2943,18 @@ Local F = rat(f,1); #pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_3f : +*--#[ PullReq535 : +* This test requires more than the specified 50K workspace. +#:maxtermsize 200 +#:workspace 50000 +S x1,...,x19; +L F = (x1+...+x19)^4; +Format O1; +.sort +#optimize F +L G = `optimvalue_'; +P G; +.end +assert succeeded? +assert result("G") =~ expr("389") +*--#] PullReq535 :