diff --git a/programs-orig/back-commafree4.w b/programs-orig/back-commafree4.w index 2504009..5f731ff 100644 --- a/programs-orig/back-commafree4.w +++ b/programs-orig/back-commafree4.w @@ -397,7 +397,7 @@ void sanity(void) { @ Here we check also the inverse property of the inverse lists. @d bluecheck(loc,off) { - for (p=loc+off,q=mem[loc+mmmm];p +#include +#include +#include "gb_graph.h" +#include "gb_save.h" +#include "gb_flip.h" +@; +main (int argc,char*argv[]) { + register int i,j,k,l,m,n,p,q,r,t,bad,vv,ll; + Graph *g; + Vertex *v,*w; + Arc *a; + @; + @; + while (1) { + rounds++; + if ((rounds%interval)==0) fprintf(stderr,"."); + @; + @; + } +} + +@ The command line names a graph in SGB format, followed by a minimum cutoff +time $T_{\rm min}$ (measured in search tree nodes examined before restarting). +Then comes a random seed, so that results of this run can be replicated +if necessary. +Then come zero or more prespecifications, in the form `\.{VERTNAME=label}'. + +@= +if (argc<4 || sscanf(argv[2],"%lld", + &Tmin)!=1 || sscanf(argv[3],"%d", + &seed)!=1) { + fprintf(stderr,"Usage: %s foo.gb Tmin seed [VERTEX=label...]\n", + argv[0]); + exit(-1); +} +g=restore_graph(argv[1]); +if (!g) { + fprintf(stderr,"I couldn't reconstruct graph %s!\n",argv[1]); + exit(-2); +} +m=g->m/2,n=g->n; +if (m>maxm) { + fprintf(stderr,"Sorry, at present I require m<=%d!\n",maxm); + exit(-3); +} +if (n>maxn) { + fprintf(stderr,"Sorry, at present I require n<=%d!\n",maxn); + exit(-4); +} +for (k=4;argv[k];k++) { + for (i=1;argv[k][i];i++) if (argv[k][i]=='=') break; + if (!argv[k][i] || sscanf(&argv[k][i+1],"%d", + &label)!=1 || label<0 || label>m) { + fprintf(stderr,"spec `%s' doesn't have the form `VERTEX=label'!\n", + argv[k]); + exit(-3); + } + argv[k][i]=0; + for (j=0;jvertices+j)->name,argv[k])==0) break; + if (j==n) { + fprintf(stderr,"There's no vertex named `%s'!\n", + argv[k]); + exit(-5); + } + if (verttoprespec[j]) { + fprintf(stderr,"Vertex %s was already specified!\n", + (g->vertices+j)->name); + exit(-6); + } + verttoprespec[j]=1; + if (!xprespec && (label==0 || label==m)) xprespec=1,prespec[0]=(j<<8)+label; + else prespec[prespecptr++]=(j<<8)+label; +} +gb_init_rand(seed); +fprintf(stderr,"OK, I've got a graph with %d vertices, %d edges.\n", + n,m); + +@ @= +for (k=0;kvertices+k; + for (a=v->arcs;a;a=a->next) { + w=a->tip; + edges[k][deg[k]++]=w-g->vertices; + } +} +for (k=0;k= +T=Tmin*reluctant_v; +if ((reluctant_u & -reluctant_u)!=reluctant_v) reluctant_v<<=1; +else reluctant_u++,reluctant_v=1; + +@ @= +int vbose=0; /* set this nonzero to watch me work */ +int rounds; /* how many random trials have we started? */ +long long nodes; /* how many nodes have we started on this round? */ +long long reluctant_u=1, reluctant_v=1; /* restart parameters */ +long long T; /* cutoff time for the current random trial */ +long long Tmin; /* minimum cutoff time (from the command line) */ +int seed; /* seed for |gb_init_rand| */ +int label; /* a label value read from |argv[k]| */ +int prespec[maxn]; /* prespecified labels */ +int verttoprespec[maxn]; /* has this vertex been prespecified? */ +int prespecptr=1; /* how many are prespecified? */ +int xprespec; /* did any of them specify label 0 or label |m|? */ + +@*Data structures. +The vertices are internally numbered from 0 to |n-1|. +Vertex |v| has |deg[v]| neighbors, and they appear in the first +|deg[v]| slots of |edges[v]|. Its label is |verttolab[v]|; but |verttolab[v]=-1| +if |v| hasn't yet been labeled. + +Labels potentially range from 0 to~|m|. +If label |l| hasn't yet been used, |labunlab[l]| is negative, +and |labtovert[l]| is undefined. +Otherwise |labtovert[l]| is the vertex labeled~|l|, and +|labunlab[l]| is the number of unlabeled neighbors of that vertex. + +For each |q| between 1 and~|m|, |edgecount[q]| is the number of +edges labeled~|q|. (This number might momentarily exceed~1, although +it will be exactly equal to~1 when the labeling is graceful.) + +@= +int deg[maxn]; /* how many neighbors of |v|? */ +int edges[maxn][maxm]; /* their identities */ +int verttolab[maxn]; /* what is |v|'s label? */ +int labtovert[maxm+1]; /* what vertex |v[l]| is labeled |l|? */ +int labunlab[maxm+1]; /* how many unlabeled neighbors does |v[l]| have? */ +int edgecount[maxm+1]; /* how many edges are labeled |q|? */ + +@ We begin by converting from Stanford GraphBase format to the data +structures used here. + +@= +for (k=0;k= +w1:@+@; + l=0,nodes=0; + if (!xprespec) { + while (1) { + vv=(n*(unsigned long long)gb_next_rand())>>31; + if (!verttoprespec[vv]) break; + } + move[0][0]=vv<<8; + } +w2:@+if (++nodes>T) goto done; + if (l; + target[l]=q; + @; + @; + moves[l]=r; +w3:@+if (r>0) { + t=move[l][--r]; + vv=t>>8,ll=t&0xff; + if (vbose) @; + @; + if (bad) goto w4a; + @; + x[l++]=r; + goto w2; +} +w4:@+if (--l>=0) { + r=x[l],t=move[l][r],vv=t>>8,ll=t&0xff; + @; +w4a:@+@; + goto w3; + } +done: + +@ @= +if (target[l]) fprintf(stderr,"L%d: %s=%d (%d of %d for edge %d)\n", + l,(g->vertices+vv)->name,ll,moves[l]-r,moves[l],target[l]); +else fprintf(stderr,"L%d: %s=%d (prespecified)\n", + l,(g->vertices+vv)->name,ll); + +@ @= +{ + count++; + fprintf(stderr,"\n"); + for (k=0;k<=m;k++) if (labunlab[k]>=0) { + if (labunlab[k]>0) + fprintf(stderr,"This can't happen!\n"); + vv=labtovert[k]; + printf("%s=%d ", + (g->vertices+vv)->name,k); + } + printf("#%d (round %d, step %lld\n", + count,rounds,nodes); + fflush(stdout); + goto done; +} + +@ By giving an arbitrary permutation to the list of possible moves, +we're providing the maximum randomization over the entire search tree for +all rooted labelings that meet the prespecifications. + +I don't think this will add a significant amount to the running time. +But if it does, we could back off by doing only a partial shuffle +(for example, only on certain levels, or a maximum of 10 swaps, or \dots). + +@= +for (q=r-1;q>0;q--) { + p=((q+1)*((unsigned long long)gb_next_rand()))>>31; + t=move[l][p]; + move[l][p]=move[l][q]; + move[l][q]=t; +} + +@ I think this is the inner loop. + +@= +for (r=j=0,k=q;k<=m;j++,k++) { + if (labunlab[j]>0 && labunlab[k]<0) { + for (vv=labtovert[j],i=deg[vv]-1;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t<0) move[l][r++]=(edges[vv][i]<<8)+k; + } + }@+else if (labunlab[j]<0 && labunlab[k]>0) { + for (vv=labtovert[k],i=deg[vv]-1;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t<0) move[l][r++]=(edges[vv][i]<<8)+j; + } + } +} + +@ And this loop too is pretty much ``inner.'' + +@= +for (p=deg[vv],i=p-1,bad=0;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t>=0) { + p--; + q=abs(t-ll); + t=edgecount[q], bad|=t; + edgecount[q]=t+1; + } +} + +@ Maybe the last line here will go faster if rewritten +`|edgecount[abs(t-ll)]-=(t>=0)|', because of branch prediction? +If so, are modern compilers smart enough to see this? + +@= +for (i=deg[vv]-1;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t>=0) edgecount[abs(t-ll)]--; +} + +@ The value of |p| has been set up for us nicely at this point. + +We need to perform another loop, but this one isn't needed quite so often. + +@= +verttolab[vv]=ll,labtovert[ll]=vv,labunlab[ll]=p; +for (i=deg[vv]-1;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t>=0) labunlab[t]--; +} + +@ @= +for (i=deg[vv]-1;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t>=0) labunlab[t]++; +} +verttolab[vv]=labunlab[ll]=-1; + +@ @= +int count; /* this many solutions found so far */ +int target[maxn]; /* the edge we try to set, on each level */ +int move[maxn][maxm]; /* the things we want to try, on each level */ +int x[maxn]; /* the moves currently being tried, on each level */ +int moves[maxn]; /* used in verbose tracing only */ + +@*Index. diff --git a/programs-orig/back-graceful.w b/programs-orig/back-graceful.w new file mode 100644 index 0000000..d0207fa --- /dev/null +++ b/programs-orig/back-graceful.w @@ -0,0 +1,385 @@ +\datethis +@i gb_types.w + +@*Intro. This is an experimental program to find all of the +graceful labelings of a given graph. Some vertex labels may be +prespecified if desired, by giving assignments of the form \.{VERTEX=label} +on the command line. + +If there are no prespecifications, the solutions are required to be +``canonical,'' in the sense that the vertices of the edge labeled |m-1| +are labeled 1 and~|m|, not 0 and~|m-1|. (This saves of factor of~2, +because every graceful labeling without prespecifications can be +``complemented'' by changing each label |l| to |m-l|.) + +I've tried to make the inner loops run fast, using some ideas of Tom Rokicki, +together with strange ideas of my own called `|labunlab|' and `|vertunlab|'. + +This program is based on {\mc BACK-GRACEFUL-ROOTED}, which finds +only a subset of the graceful labelings but works much faster. + +{\sl Implementation note:\/}\enspace This program uses the function +`|__builtin_popcountll|' that's provided by the \.{gcc} compiler. +You should include `\.{-march=native}' as one of the \.{CFLAGS} in +the \.{Makefile} that you use when compiling this. + +@d maxn 64 /* at most this many vertices */ +@d maxm 63 /* at most this many edges (could go to 127 with double bitmaps) */ +@d sadd64 __builtin_popcountll /* 64-bit sideways addition */ + +@c +#include +#include +#include +#include "gb_graph.h" +#include "gb_save.h" +@; +main (int argc,char*argv[]) { + register int i,j,k,l,m,n,p,q,r,t,bad,vv,ll,carry,forced; + register unsigned long long ebits,rebits,vbits,del; + Graph *g; + Vertex *v,*w; + Arc *a; + @; + @; + @; +} + +@ @= +if (argc<2) { + fprintf(stderr,"Usage: %s foo.gb [VERTEX=label...]\n", + argv[0]); + exit(-1); +} +g=restore_graph(argv[1]); +if (!g) { + fprintf(stderr,"I couldn't reconstruct graph %s!\n",argv[1]); + exit(-2); +} +m=g->m/2,n=g->n; +if (m>maxm) { + fprintf(stderr,"Sorry, at present I require m<=%d!\n",maxm); + exit(-3); +} +if (n>maxn) { + fprintf(stderr,"Sorry, at present I require n<=%d!\n",maxn); + exit(-4); +} +for (k=2;argv[k];k++) { + for (i=1;argv[k][i];i++) if (argv[k][i]=='=') break; + if (!argv[k][i] || sscanf(&argv[k][i+1],"%d", + &label)!=1 || label<0 || label>m) { + fprintf(stderr,"spec `%s' doesn't have the form `VERTEX=label'!\n", + argv[k]); + exit(-3); + } + argv[k][i]=0; + for (j=0;jvertices+j)->name,argv[k])==0) break; + if (j==n) { + fprintf(stderr,"There's no vertex named `%s'!\n", + argv[k]); + exit(-5); + } + if (verttoprespec[j]) { + fprintf(stderr,"Vertex %s was already specified!\n", + (g->vertices+j)->name); + exit(-6); + } + verttoprespec[j]=1; + prespec[prespecptr++]=(j<<8)+label; +} +fprintf(stderr,"OK, I've got a graph with %d vertices, %d edges, %d prespec%s.\n", + n,m,prespecptr,prespecptr==1?"":"s"); + +@ @= +int vbose=0; /* set this nonzero to watch me work */ +int label; /* a label value read from |argv[k]| */ +int prespec[maxn]; /* prespecified labels */ +int verttoprespec[maxn]; /* has this vertex been prespecified? */ +int prespecptr; /* how many are prespecified? */ + +@ @= +fprintf(stderr,"Altogether %lld %sgraceful labeling%s%s", + count,prespecptr?"":"canonical ",count==1?"":"s",prespecptr?" with":""); +for (k=0;kvertices+(prespec[k]>>8))->name,prespec[k]&0xff); +fprintf(stderr,"; %lld nodes.\n", + nodes); + +@*Data structures. +The vertices are internally numbered from 0 to |n-1|. +Vertex |v| has |deg[v]| neighbors, and they appear in the first +|deg[v]| slots of |edges[v]|. + +Labels potentially range from 0 to~|m|. +If label |l| hasn't yet been used, |labunlab[l]| is negative, +and |labtovert[l]| is undefined. +Otherwise |labtovert[l]| is the vertex labeled~|l|, and +|labunlab[l]| is the number of unlabeled neighbors of that vertex. + +The value of |verttolab[v]| is the label of~|v|, if any, otherwise |-1|. +If |v| is unlabeled, the value of |vertunlab[v]| tells how many of |v|'s neighbors +are also unlabeled. Otherwise |vertunlab[v]| is the number of neighbors +that were unlabeled when |v| became labeled. + +At level |l| of the backtracking, the first |l| vertices of |vlist| have been +labeled. The others haven't. (In fact, |vlist| is a permutation, and +|ilist[vlist[k]]=vlist[ilist[i]]=k| for $0\le k= +int deg[maxn]; /* how many neighbors of |v|? */ +int edges[maxn][maxm]; /* their identities */ +int verttolab[maxn]; /* what is |v|'s label? */ +int vertunlab[maxn]; /* how many unlabeled neighbors does it have? */ +int labtovert[maxm+1]; /* what vertex |v[l]| is labeled |l|? */ +int labunlab[maxm+1]; /* how many unlabeled neighbors does |v[l]| have? */ +int vlist[maxn]; /* a permutation of all the variables */ +int ilist[maxn]; /* the inverse of that permutation */ + +@ We begin by converting from Stanford GraphBase format to the data +structures used here. + +@= +for (k=0;kvertices+k; + verttolab[k]=-1,vlist[k]=ilist[k]=k; + for (a=v->arcs;a;a=a->next) { + w=a->tip; + edges[k][deg[k]++]=w-g->vertices; + } +} +for (q=0;q<=m;q++) labunlab[q]=-1; +for (k=0;k= +w1:@+@; + l=carry=forced=0; +w2: nodes++; + if (l>prespecptr) @; + if (carry) @@; + else if (l>=1) ; + if (q==0) @; + target[l]=q; + @; + } + moves[l]=r; +w3:@+if (r>0) { + t=move[l][--r]; + carry=t>>16,vv=(t>>8)&0xff,ll=t&0xff; + if (vbose) @; + forced=0; + @; + x[l++]=r; + goto w2; +} +w4:@+if (--l>=0) { + r=x[l],t=move[l][r],vv=(t>>8)&0xff,ll=t&0xff; + @; + goto w3; + } + +@ @= +if (forced) fprintf(stderr,"L%d: %s=%d (forced)\n", + l,(g->vertices+vv)->name,ll); +else if (lvertices+vv)->name,ll); +else if (carry) fprintf(stderr,"L%d: %s=%d (%d of %d starting edge %d)\n", + l,(g->vertices+vv)->name,ll,moves[l]-r,moves[l],target[l]); +else if (l>0 && move[l-1][x[l-1]]>=(1<<16)) + fprintf(stderr,"L%d: %s=%d (%d of %d completing edge %d)\n", + l,(g->vertices+vv)->name,ll,moves[l]-r,moves[l],target[l]); +else fprintf(stderr,"L%d: %s=%d (%d of %d for edge %d)\n", + l,(g->vertices+vv)->name,ll,moves[l]-r,moves[l],target[l]); + +@ @= +{ + count++; + for (k=0;k<=m;k++) if (labunlab[k]>=0) { + if (labunlab[k]>0) + fprintf(stderr,"This can't happen!\n"); + vv=labtovert[k]; + printf("%s=%d ", + (g->vertices+vv)->name,k); + } + printf("#%lld\n", + count); + fflush(stdout); + goto w4; +} + +@ At this point |vv| and |ll| have been set on the previous level, +when vertex~|vv| was labeled |ll| and we're hoping to give the label +|ll+target[l-1]| to one of |v|'s neighbors. + +@= +{ + q=target[l-1],target[l]=q; + for (r=0,i=deg[vv]-1;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t<0) move[l][r++]=(edges[vv][i]<<8)+(ll+q); + } +} + +@ There are essentially two ways to create an edge labeled~|q|, +for each pair of vertex labels $(j,k)$ with $k=j+q$: +Either exactly one of |labunlab[j]| and |labunlab[k]| is positive; +or both of them are negative. The latter case, which doesn't occur +in ``rooted'' solutions, can potentially involve a huge number of +subcases, because {\it any\/} pair of neighboring unlabeled vertices +might qualify. + +I think this is the inner loop. + +@= +for (r=0,j=(l==2 && !prespecptr),k=j+q;k<=m;j++,k++) { + if (labunlab[j]>0 && labunlab[k]<0) { + for (vv=labtovert[j],i=deg[vv]-1;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t<0) move[l][r++]=(edges[vv][i]<<8)+k; + } + }@+else if (labunlab[j]<0) { + if (labunlab[k]>0) { + for (vv=labtovert[k],i=deg[vv]-1;i>=0;i--) { + t=verttolab[edges[vv][i]]; + if (t<0) move[l][r++]=(edges[vv][i]<<8)+j; + } + }@+else if (labunlab[k]<0) { + for (i=n-1;i>=l;i--) { + vv=vlist[i]; + if (vertunlab[vv]) move[l][r++]=(1<<16)+(vv<<8)+j; + } + } + } +} + +@ And this loop too is pretty much ``inner.'' + +I apologize for being unable to resist jumping from +this section into the next, when backtracking is +seen to be needed. + +@= +for (p=deg[vv],i=p-1,bad=0;i>=0;i--) { + j=edges[vv][i],t=verttolab[j]; + if (t>=0) { + p--,labunlab[t]--,q=abs(t-ll); + del=1LL<vertices+vv)->name,ll); + goto abort; +} +verttolab[vv]=ll,labtovert[ll]=vv; +t=ilist[vv],p=vlist[l]; +vlist[l]=vv,vlist[t]=p,ilist[vv]=l,ilist[p]=t; +vbits-=1LL<= +vbits+=1LL<=0;i--) { + j=edges[vv][i],t=verttolab[j]; + if (t>=0) { + labunlab[t]++,q=abs(t-ll); + ebits-=1LL<prespecptr|, so that |target[l-1]| is meaningful.) + +@= +{ + register int i,j,k,vv,ll; + register unsigned long long vvbits; + for (forced=0,k=l;k=0;i--) { + j=edges[vv][i],ll=verttolab[j]; + if (ll>=0) /* |j| is a labeled neighbor of |vv|, which is unlabeled */ + vvbits&=~((ebits<>(m-ll))); + } + i=sadd64(vvbits); + if (i>1) continue; + if (i==0) { + if (vbose) fprintf(stderr,"L%d, %s stuck\n", + l,(g->vertices+vv)->name); + goto w4; + } + if (carry) continue; + ll=sadd64(vvbits-1); + move[l][0]=(vv<<8)+ll,forced=1; + } + if (forced) { + r=1,moves[l]=1; /* forced move */ + target[l]=target[l-1]; /* see above */ + goto w3; + } +} + +@ @= +long long count; /* this many solutions found so far */ +long long nodes; /* this many nodes in the search tree so far */ +int target[maxn]; /* the edge we try to set, on each level */ +int move[maxn][maxn*maxm]; /* the things we want to try, on each level */ +int x[maxn]; /* the moves currently being tried, on each level */ +int moves[maxn]; /* used in debugging and verbose tracing only */ + +@*Index. diff --git a/programs-orig/back-kepler-towers.w b/programs-orig/back-kepler-towers.w new file mode 100644 index 0000000..4702f6f --- /dev/null +++ b/programs-orig/back-kepler-towers.w @@ -0,0 +1,101 @@ +@*Intro. This program generates all Kepler towers made from $n$ bricks. +(It supplements the old program {\mc VIENNOT} in my +Mittag-Leffler report ``Three Catalan bijections,'' which was +incomplete: The claim that all towers are generated was never proved, because +I'd blithely assumed that there are no more than $C_n$ of them.) + +@d maxn 40 /* this is plenty big, since $C_{40}>10^{21}$ */ + +@c +#include +#include +int n; /* command line parameter */ +int x[maxn]; /* current brick position */ +int w[maxn]; /* current wall number */ +int p[maxn]; /* beginning of supporting layer */ +int q[maxn]; /* beginning of current layer */ +int t[maxn]; /* type of move: 1 if end of layer, 2 if end of wall */ +char punct[3]={',',';',':'}; /* separators */ +unsigned long long count; /* this many found */ +main (int argc,char*argv[]) { + register i,j,k,l,mask; + @; +b1: @; +b2:@+if (l>n) @; + w[l]=w[l-1]; + switch (t[l-1]) { +case 0: x[l]=x[l-1]+2; /* add brick to the current layer */ + if (x[l]>(1<; +b4:@+@; +b5:@+if (--l) { + if (p[l]==0 && t[l]!=1) goto b5; /* we're backtracking to previous wall */ + goto b4; + } + fprintf(stderr,"Altogether %lld towers generated.\n", + count); +} + +@ @= +if (argc!=2 || sscanf(argv[1],"%d", + &n)!=1) { + fprintf(stderr,"Usage: %s n\n", + argv[0]); + exit(-1); +} +if (n>maxn) { + fprintf(stderr,"You must be kidding; I can't handle n>%d!\n", + maxn); + exit(-2); +} + +@ @= +{ + count++; + if (n<=10) for (j=1;j<=n;j++) printf("%d%c", + x[j], j= +l=0,t[0]=1; +goto b4; + +@ @= +if (t[l-1]) q[l]=l,p[l]=q[l-1]; +else q[l]=q[l-1],p[l]=p[l-1]; +if (x[l]==(1<= +switch (t[l]) { +case 0: t[l++]=1; /* initiate a new layer */ + goto b2; +case 1:@+if (l+(1<; @; -@ @= -for (c=1;c= +for (c=p=1;c= +void print_verts() { + register int c; + for (c=1;c='0' && (c)<='9'? (c)-'0': (c)>='a' && (c)<='z'? (c)-'a'+10: (c)>='A' && (c)<='Z'? (c)-'A'+36: -1) @@ -225,21 +238,25 @@ recover from an improper clobbering of |mate| fields. } u=cl[cc].left,v=cl[cc].right; oo,mu=cl[u].mate,mv=cl[v].mate; - if (!mu) { /* |u| not an endpoint */ - if (!mv) { /* |v| not an endpoint */ + if (mu && cl[u].inner) { + if (vbose&show_choices) fprintf(stderr,"(not allowing "O".8s degree 3)\n", + cl[u].name); + return -1; + } + if (mv && cl[v].inner) { + if (vbose&show_choices) fprintf(stderr,"(not allowing "O".8s degree 3)\n", + cl[v].name); + return -1; + } + if (!mu) { /* |u| not in any fragment */ + if (!mv) { /* |v| not in any fragment */ oo,cl[u].mate=v,cl[v].mate=u,frags++; }@+else { /* |mv| is the endpoint opposite |v| */ ooo,cl[u].mate=mv,cl[mv].mate=u,cl[v].inner=1; } - }@+else if (!mv) { /* |u| is an endpoint but |v| isn't */ + }@+else if (!mv) { /* |mu| is the endpoint opposite |u| */ ooo,cl[v].mate=mu,cl[mu].mate=v,cl[u].inner=1; - }@+else { - if (cl[u].inner || cl[v].inner) { /* no mems for |inner| after |mate| */ - if (vbose&show_choices) - fprintf(stderr," (not causing degree 3 with "O".8s)\n", - cl[cc].name); - return -1; - } + }@+else { /* |u| and |v| are endpoints of fragments */ if (mu==v) { /* also |mv==u|, loop is closing */ if (frags!=1) { if (vbose&show_choices) diff --git a/programs-orig/dlx2.w b/programs-orig/dlx2.w index 5093731..e1694a2 100644 --- a/programs-orig/dlx2.w +++ b/programs-orig/dlx2.w @@ -859,20 +859,17 @@ tends to grow monotonically.) @= void print_progress(void) { - register int l,k,d,c,p; + register int l,k,d,c,p,ds=0; register double f,fd; fprintf(stderr," after "O"lld mems: "O"lld sols,",mems,count); for (f=0.0,fd=1.0,l=0;l=show_levels_max) { - fprintf(stderr,"..."); - break; - } + else if (!ds) ds=1,fprintf(stderr,"..."); } fprintf(stderr," "O".5f\n",f+0.5/fd); } diff --git a/programs-orig/ian-dlx.w b/programs-orig/ian-dlx.w index 0d4e69b..b40eea5 100644 --- a/programs-orig/ian-dlx.w +++ b/programs-orig/ian-dlx.w @@ -1,5 +1,5 @@ @*Intro. This program makes {\mc DLX3} data for an interesting problem -posed by Ian Cullis in 2022: Fill a $10\times10$ array with 1s, 2s, 3s, 4s +posed by Ian Tullis in 2022: Fill a $10\times10$ array with 1s, 2s, 3s, 4s so that there are exactly $k$ occurrences of $k$ in each row and each column. Also the 2s should form nontouching dominoes, the 3s should form nontouching trominoes, and the 4s should form nontouching ell-tetrominoes, where diff --git a/programs-orig/othello.w b/programs-orig/othello.w new file mode 100644 index 0000000..1fd5f54 --- /dev/null +++ b/programs-orig/othello.w @@ -0,0 +1,147 @@ +@*Intro. I'm (hastily) writing this program in order to get some basic +experience with a generalization of the game of Reversi (which is popularly +called ``Othello'' in its principal variant). + +I consider an $m\times n$ board, whose rows are numbered \.1, \.2, \.3, \dots, +and whose columns are named \.a, \.b, \.c, \dots. +A cell is identified by letter and digit. (I don't forbid $m>9$ or +$n>26$; but bugs will show up if those parameters get too big.) + +When prompted, the user should provide (on |stdin|) the name of the cell where +the next move is to be made, followed by a newline (aka `return'). +The newline can optionally be preceded by `\.!', in which case the +contents of the current board will be printed (on |stdout|). + +The first four moves simply place pieces on the board; they are made without +captures. (They are considered to be moves $-3$, $-2$, $-1$, and $0$, +so that move~1 will be the normal first move.) In standard Othello we +have $m=n=9$, and the preliminary moves are \.{d5}, \.{e5}, \.{e4}, \.{d4}; +the first move might then be \.{d3}, in which case the color of \.{d4} +will be reversed. + +This program actually allows more than two players. With $p$ players, +we start with move $1-p^2$, and use the first $p^2$ moves to set up the +initial position. + +I don't attempt to do anything tricky. Cells of the board contain $-1$ +if they're unoccupied, $c$ if they are occupied and the top color is~$c$. +The colors are 0, 1, \dots,~$p-1$. The value of |board[i][j]| is +maintained for $0\le i\le m+1$ and $0\le j\le n+1$, but the +boundary cells remain unoccupied. + +@d m 8 /* this many rows */ +@d n 8 /* this many columns */ +@d p 2 /* this many players */ + +@c +#include +#include +char board[m+2][n+2]; /* cells of the current board position */ +int move; /* the current move number */ +char deli[8]={-1,-1,-1,0,0,1,1,1}, + delj[8]={-1,0,1,-1,1,-1,0,1}; /* the eight directions */ +char buffer[8]; /* used for input */ +int total[p]; /* how many pieces show this color? */ +@; +void main(void) { + register int i,j,k,l,pass,player; + for (i=0;i<=m+1;i++) for (j=0;j<=n+1;j++) board[i][j]=-1; + for (move=1-p*p,player=0;;move++,player=(player+1)% + p) { + for (pass=0;pass); + player=(player+1)%p; + } + break; /* the game is over: |p| passes in a row */ +nextmove: printf("Move %d, player %c: ", + move,'0'+player); + fflush(stdout); /* make sure the user sees the prompt */ + @; + makemove(i,j,player); + if (buffer[2]=='!') print_board(); + } + print_board(); +} + +@ @= +if (!fgets(buffer,8,stdin)) { + fprintf(stderr,"Unexpected end of input!\n"); + exit(-1); +} +j=buffer[0]-'a'+1,i=buffer[1]-'0'; +if (i<1 || i>m || j<1 || j>n) { + fprintf(stderr,"Cell `%c%c' doesn't exist!\n", + buffer[0],buffer[1]); + print_board(); + goto nextmove; +} +if (!islegal(i,j,player)) { + fprintf(stderr,"No! `%c%c' isn't a legal move for %c.\n", + buffer[0],buffer[1],'0'+player); + print_board(); + goto nextmove; +} + +@ @= +void print_board(void) { + register i,j,k; + for (k=0;k=0) total[k]++; + printf("%c", + k<0?'.': '0'+k); + } + if (i==m) for (k=0;k= +int islegal(int i,int j,int c) { + register int ii,jj,k,l; + if (board[i][j]>=0) return 0; /* already occupied */ + if (move<=0) return 1; /* we're just gettin' started */ + for (k=0;k<8;k++) + @; + return 0; +} + +@ @= +{ + for (ii=i+deli[k],jj=j+delj[k],l=0;board[ii][jj]>=0; + ii+=deli[k],jj+=delj[k],l++) + if (board[ii][jj]==c) goto maybe; + continue; /* no occurrences of |c| in direction |k| */ +maybe:@+if (l) return 1; /* yes, that move reverses at least |l| cells */ + continue; /* two adjacent |c|'s */ +} + +@ @= +void makemove(int i,int j,int c) { + register int ii,jj,k; + board[i][j]=c; + if (move<=0) return; /* just gettin' started */ + for (k=0;k<8;k++) @; +} + +@ @= +{ + for (ii=i+deli[k],jj=j+delj[k]; board[ii][jj]>=0; + ii+=deli[k],jj+=delj[k]) + if (board[ii][jj]==c) goto reverse; + continue; /* no occureeces of |c| in direction |k| */ +reverse:@+for (ii-=deli[k],jj-=delj[k];ii!=i || jj!=j; + ii-=deli[k],jj-=delj[k]) board[ii][jj]=c; +} + +@*Index. diff --git a/programs-orig/perfect-partition-square.w b/programs-orig/perfect-partition-square.w new file mode 100644 index 0000000..131957b --- /dev/null +++ b/programs-orig/perfect-partition-square.w @@ -0,0 +1,214 @@ +@*Intro. Michael Keller suggested a problem that I couldn't stop thinking about, +although it is extremely special and unlikely to be mathematically useful or +elegant: ``Place seven 7s, \dots, seven 1s into a $7\times7$ square so that +the 14 rows and columns exhibit all 14 of the integer partitions of~7 +into more than one part.'' + +I doubt if there's a solution. But if there is, I guess I want to know. +So I'm writing this as fast as I can, using brute force for simplicity +wherever possible (and basically throwing efficiency out the door). + +[Footnote added after debugging: To my astonishment, there are 30885 +solutions! And this program needs less than half an hour to find them all, +despite the inefficiencies.] + +I break the problem into ${13\choose6}=1716$ subproblems, where each +subproblem chooses the partitions for the first six columns; the last +column is always assigned to partition 1111111. The rows are, of course, +assigned to the remaining seven partitions. + +Given such an assignment, I proceed to place the 7s, then the 6s, etc. +To place $l$, I choose a ``hard'' row or column where the partition +has a large part, say~$p$. If that row/col has $m$ empty slots, +I loop over the $m\choose p$ ways to put $l$'s into it. And for every +such placement I loop over the ${7l-m\choose 7-p}$ ways to +place the other $l$'s. + +Array $a$ holds the current placements. At level $l$, the row and +column partitions for unoccupied cells are specified by +arrays |rparts[l]| and |cparts[l]|. A partition is a hexadecimal integer +$(p_1\ldots p_7)_{16}$ with $p_1\ge\cdots\ge p_7\ge0$. + +@d modulus 100 /* print only solutions whose number is a multiple of this */ +@d lobits(k) ((1u<<(k))-1) /* this works for $k<32$ */ +@d gosper(b) {@+ register x=b,y; + x=b&-b,y=b+x; + b=y+(((y^b)/x)>>2);@+} + +@c +#include +#include +int parts[14]={ + 0x6100000, + 0x5200000, + 0x5110000, + 0x4300000, + 0x4210000, + 0x4111000, + 0x3310000, + 0x3220000, + 0x3211000, + 0x3111100, + 0x2221000, + 0x2211100, + 0x2111110, + 0x1111111}; +int rparts[8][8],cparts[8][8]; +char a[8][8]; /* the current placements */ +unsigned long long count; +@; +void main(void) { + register int b,i,j,k,bits; + cparts[7][6]=parts[13]; + for (bits=lobits(6);bits<1<<13;) { + @; + fprintf(stderr,"finished subproblem %x; so far %lld solutions.\n", + bits,count); + gosper(bits); + } +} + +@ @= +for (i=j=k=0,b=1<<12;b;b>>=1,k++) { + if (bits&b) cparts[7][j++]=parts[k]; /* partition |k| goes into a column */ + else rparts[7][i++]=parts[k]; /* partition |k| goes into a row */ +} +place(7); + +@ The recursive subroutine |place(l)| decides where to put all occurrences +of the digit~|l|. (If |l>0|, it calls |verify(l)|, which calls |place(l-1)|.) + +@= +void verify(int l); /* defined later */ +void place(int l) { + register int b,i,j,k,m,p,max,abits,bbits,thisrow,thiscol=-1; + if (l==0) @; + for (max=i=0;i<7;i++) + if (rparts[l][i]>max) max=rparts[l][i],thisrow=i; + for (j=0;j<7;j++) if (cparts[l][j]>max) max=cparts[l][j],thiscol=j; + if (thiscol>=0) @@; + else @; +} + +@ @= +{ + p=max>>24; /* this many (the largest element of the partition) in |thisrow| */ + for (m=0;max;m+=max&0xf,max>>=4) ; /* |m| is number of empty cells */ + for (abits=lobits(p);abits<1<= +{ + p=max>>24; /* this many (the largest element of the partition) in |thiscol| */ + for (m=0;max;m+=max&0xf,max>>=4) ; /* |m| is number of empty cells */ + for (abits=lobits(p);abits<1<= +void verify(int l) { + register i,j,k,m,q; + for (i=0;i<7;i++) { /* we will check row |i| for inconsistency */ + for (j=k=0;j<7;j++) if (a[i][j]==l) k++; /* |k| occurrences of |l| */ + m=rparts[l][i]; + if (k>0) { + for (;m;m>>=4) if ((m&0xf)==k) goto rowgotk; + return; /* invalid: |k| isn't one of the parts */ + }@+else { + if (m&(lobits(4*(8-l)))) return; /* |l| parts remain */ + } +rowgotk: continue; + } + for (j=0;j<7;j++) { /* we will check column |j| for inconsistency */ + for (i=k=0;i<7;i++) if (a[i][j]==l) k++; /* |k| occurrences of |l| */ + m=cparts[l][j]; + if (k>0) { + for (;m;m>>=4) if ((m&0xf)==k) goto colgotk; + return; /* invalid: |k| isn't one of the parts */ + }@+else { + if (m&(lobits(4*(8-l)))) return; /* |l| parts remain */ + } +colgotk: continue; + } + @; +} + +@ OK, we've verified the placement of the |l|'s, so we can proceed +to |l-1|. + +@= +for (i=0;i<7;i++) { /* we will update row |i| for the residual partition */ + for (j=k=0;j<7;j++) if (a[i][j]==l) k++; /* |k| occurrences of |l| */ + if (k>0) { /* we must remove part |k|, which exists */ + for (m=rparts[l][i],q=24;((m>>q)&0xf)!=k;q-=4) ; + rparts[l-1][i]=(m&-(1<<(q+4)))+((m&lobits(q))<<4); + }@+else rparts[l-1][i]=rparts[l][i]; +} +for (j=0;j<7;j++) { /* we will update column |j| for the residual partition */ + for (i=k=0;i<7;i++) if (a[i][j]==l) k++; /* |k| occurrences of |l| */ + if (k>0) { /* we must remove part |k|, which exists */ + for (m=cparts[l][j],q=24;((m>>q)&0xf)!=k;q-=4) ; + cparts[l-1][j]=(m&-(1<<(q+4)))+((m&lobits(q))<<4); + }@+else cparts[l-1][j]=cparts[l][j]; +} +place(l-1); + +@ @= +{ + count++; + if ((count% + modulus)==0) { + printf("%lld: ", + count); + for (i=0;i<7;i++) { + for (j=0;j<7;j++) printf("%d", + a[i][j]); + printf("%c", + i<6? ' ': '\n'); + } + } + return; +} + +@*Index. diff --git a/programs-orig/rank-parade1.w b/programs-orig/rank-parade1.w new file mode 100644 index 0000000..a03260c --- /dev/null +++ b/programs-orig/rank-parade1.w @@ -0,0 +1,164 @@ +@*Intro. Given a nonempty parade on the command line, +this quick-and-dirty program +computes its ``rank,'' as explained in my unpublication +{\sl Parades and poly-Bernoulli bijections}. + +The rank might be huge. So I don't actually compute it; I produce +Mathematica code that will do the numerical work. + +(Sorry --- I hacked this up in a huge hurry.) + +@d maxn 100 + +@c +#include +#include +int strg[maxn],strb[maxn]; /* the digit strings */ +int d; /* the order */ +int m,n; /* how many girls and boys? */ +int perm[maxn],rgs[maxn],hit[maxn]; +main (int argc,char*argv[]) { + register int i,j,k,prevj,t,p,l; + @; + @; + @
; + @
; + printf("extra+((gperm brace[%d,%d]+grgs)%d!+bperm)brace[%d,%d]+brgs\n", + m+1,d+1,d,n+1,d+1@q]))@>); +} + +@ An incorrect command line aborts the run. But we do explain what was wrong. + +@= +if (argc<2) { + fprintf(stderr,"Usage: %s \n", + argv[0]); + exit(-1); +} +for (k=1;k='0' && argv[k][i]<='9';i++) + j=10*j+argv[k][i]-'0'; + if (j==0 || argv[k][i]) { + fprintf(stderr,"Bad argument `%s'; should be a positive number!\n", + argv[k]); + exit(-3); + } + if (j>=maxn) { + fprintf(stderr,"Recompile me: maxn=%d!\n", + maxn); + exit(-6); + } + if (argv[k][0]=='g' && j>m) m=j; + else if (argv[k][0]=='b' && j>n) n=j; + if ((argv[k][0]=='g' && strg[j]>=0) || + (argv[k][0]=='b' && strb[j]>=0)) { + fprintf(stderr,"You've already mentioned %s!\n", + argv[k]); + exit(-4); + } + if (argv[k][0]==argv[k-1][0] && prevj>j) { + fprintf(stderr,"Out of order: %s>%s!\n", + argv[k-1],argv[k]); + exit(-5); + } + if (argv[k][0]=='b' && argv[k-1][0]!='b') d++; + if (argv[k][0]=='g') strg[j]=d;@+else strb[j]=d; +} +if (argv[k-1][0]=='b') { /* parade ended with a boy: |d| is too large */ + for (j=1;j<=n;j++) if (strb[j]==d) strb[j]=0; + d--; +} +for (j=1;j<=m;j++) if (strg[j]<0) { + fprintf(stderr,"girl g%d is missing!\n", + j); + exit(-7); +} +for (j=1;j<=n;j++) if (strb[j]<0) { + fprintf(stderr,"boy b%d is missing!\n", + j); + exit(-8); +} +fprintf(stderr, + "OK, that's a valid parade of order %d with %d girls and %d boys!\n", + d,m,n); + +@ @= +printf("(* output from %s", + argv[0]); +for (k=1;argv[k];k++) printf(" %s", + argv[k]); +printf(" *)\n"); +printf("brace=StirlingS2\n"); +printf("prank[inv_]:=Block[{sum=0,n=Length[inv]},\n"); +printf(" For[j=1,jmax,max++;sum+=(max+1)brace[j,max+1],\n"); +printf(" sum+=rgs[[j]]brace[j,max+1]]];\n"); +printf(" sum]\n"); +printf("extra = Sum[k!^2*brace[%d+1,k+1]*brace[%d+1,k+1],{k,0,%d}]\n", + m,n,d-1@q}]@>); + +@ @
= +for (j=1;j<=d;j++) hit[j]=-1; +for (k=0,j=1;j<=m;j++) { + if (hit[strg[j]]<0) hit[strg[j]]=++k,perm[k]=strg[j]; + rgs[j]=hit[strg[j]]; +} +fprintf(stderr,"girls' rgs is"); +for (j=0;j<=m;j++) fprintf(stderr," %d", + rgs[j]); +fprintf(stderr,"\nand their permutation is"); +for (j=1;j<=d;j++) fprintf(stderr," %d", + perm[j]); +fprintf(stderr,"\n"); +printf("gperm=prank[{"); +for (j=1;j<=d;j++) { + if (j>1) printf(","); + for (k=0,i=j+1;i<=d;i++) if (perm[i]1) printf(","); + printf("%d", + rgs[j]); +} +printf("}]\n"); + +@ @
= +for (j=1;j<=d;j++) hit[j]=-1; +for (k=0,j=1;j<=n;j++) { + if (hit[strb[j]]<0) hit[strb[j]]=++k,perm[k]=strb[j]; + rgs[j]=hit[strb[j]]; +} +fprintf(stderr,"boys' rgs is"); +for (j=0;j<=n;j++) fprintf(stderr," %d", + rgs[j]); +fprintf(stderr,"\nand their permutation is"); +for (j=1;j<=d;j++) fprintf(stderr," %d", + perm[j]); +fprintf(stderr,"\n"); +printf("bperm=prank[{"); +for (j=1;j<=d;j++) { + if (j>1) printf(","); + for (k=0,i=j+1;i<=d;i++) if (perm[i]1) printf(","); + printf("%d", + rgs[j]); +} +printf("}]\n"); + +@*Index. diff --git a/programs-orig/rank-parade2.w b/programs-orig/rank-parade2.w new file mode 100644 index 0000000..a74e204 --- /dev/null +++ b/programs-orig/rank-parade2.w @@ -0,0 +1,157 @@ +@*Intro. Given a nonempty parade on the command line, +this quick-and-dirty program +computes its ``recursive rank,'' as explained in my unpublication +{\sl Parades and poly-Bernoulli bijections}. + +The rank might be huge. So I don't actually compute it; I produce +Mathematica code that will do the numerical work. + +(Sorry --- I hacked this up in a huge hurry.) + +@d maxn 100 + +@c +#include +#include +int strg[maxn],strb[maxn]; /* the digit strings */ +int d; /* the order */ +int m,n; /* how many girls and boys? */ +int name[maxn]; /* original names of the current boys */ +int x[maxn],xname[maxn]; /* the type */ +main (int argc,char*argv[]) { + register int i,j,k,prevj,t,p,l,max; + @; + @; + for (j=1;j<=n;j++) name[j]=j; + while (m) @; + printf("0\n"); /* finish the Mathematica code */ +} + +@ An incorrect command line aborts the run. But we do explain what was wrong. + +@= +if (argc<2) { + fprintf(stderr,"Usage: %s \n", + argv[0]); + exit(-1); +} +for (k=1;k='0' && argv[k][i]<='9';i++) + j=10*j+argv[k][i]-'0'; + if (j==0 || argv[k][i]) { + fprintf(stderr,"Bad argument `%s'; should be a positive number!\n", + argv[k]); + exit(-3); + } + if (j>=maxn) { + fprintf(stderr,"Recompile me: maxn=%d!\n", + maxn); + exit(-6); + } + if (argv[k][0]=='g' && j>m) m=j; + else if (argv[k][0]=='b' && j>n) n=j; + if ((argv[k][0]=='g' && strg[j]>=0) || + (argv[k][0]=='b' && strb[j]>=0)) { + fprintf(stderr,"You've already mentioned %s!\n", + argv[k]); + exit(-4); + } + if (argv[k][0]==argv[k-1][0] && prevj>j) { + fprintf(stderr,"Out of order: %s>%s!\n", + argv[k-1],argv[k]); + exit(-5); + } + if (argv[k][0]=='b' && argv[k-1][0]!='b') d++; + if (argv[k][0]=='g') strg[j]=d;@+else strb[j]=d; +} +if (argv[k-1][0]=='b') { /* parade ended with a boy: |d| is too large */ + d--; /* however I still keep the entry |d+1|, not 0, in |strb|! */ +} +for (j=1;j<=m;j++) if (strg[j]<0) { + fprintf(stderr,"girl g%d is missing!\n", + j); + exit(-7); +} +for (j=1;j<=n;j++) if (strb[j]<0) { + fprintf(stderr,"boy b%d is missing!\n", + j); + exit(-8); +} +fprintf(stderr, + "OK, that's a valid parade of order %d with %d girls and %d boys!\n", + d,m,n); + +@ @= +{ + t=strg[m]+1; /* boys in block |t| = current type */ + for (max=n;max;max--) if (strb[max]==t) break; + if (max==0) l=0,p=n+1; + else { + for (l=0,p=j=1;j<=n;j++) { + if (strb[j]==t && j!=max) x[l]=j,xname[l++]=name[j]; + else strb[p]=strb[j],name[p++]=name[j]; + } + x[l]=max,xname[l]=name[max-l],l++; + @; + } + @; + n=p-1,m--; + +} + +@ @= +if (t>1) { + for (j=1;j=t) strg[j]--; + for (j=1;j=t) strb[j]--; + t--,d--; + } +} + +@ @= +printf("(* output from %s", + argv[0]); +for (k=1;argv[k];k++) printf(" %s", + argv[k]); +printf(" *)\n"); +printf("b=Binomial\n"); +printf("brank[typ_]:=Sum[b[typ[[k]]-1,k],{k,Length[typ]}]\n"); +printf("B[m_, n_] := Sum[k!^2*StirlingS2[m+1,k+1]*StirlingS2[n+1,k+1],\n"); +printf(" {k,0,Min[m,n]}];\n"); + +@ @= +fprintf(stderr,"removing g%d: type (", + m); +for (j=0;j); + for (j=1;j); + printf("+brank[{"); + for (j=0;j); +} + +@*Index. diff --git a/programs-orig/ssmcc-frb.ch b/programs-orig/ssmcc-frb.ch new file mode 100644 index 0000000..681a855 --- /dev/null +++ b/programs-orig/ssmcc-frb.ch @@ -0,0 +1,276 @@ +@x an experimental change file for SSMCC, based on SSXCC-FRB +I suggest that you read {\mc SSXCC}, {\mc SSXCC-BINARY} and {\mc DLX3} first. +@y +I suggest that you read {\mc SSXCC}, {\mc SSXCC-BINARY} and {\mc DLX3} first. + +The ``{\mc FRB} heuristic'' implemented here is motivated by the paper +of Li, Yin, and Li in +@^Li, Hongbo@> +@^Yin, Minghao@> +@^Li, Zhanshan@> +{\sl Leibniz International Proceedings in Informatics\/ \bf210} +(2021), 9:1--9:10 +[the proceedings of the 27th International Conference on Principles and +Practice of Constraint Programming, CP~2021]. +When the option chosen for branching on some primary item~$i$ causes +another primary item~$i'$ to be wiped out, we say that a failure has +occurred with respect to~$i$. We branch on an item +that has a small number of options and a relatively high failure rate. +Details are discussed below. +@z +@x +@d show_weight_bumps 32 /* |vbose| code to show new weights */ +@d show_final_weights 64 /* |vbose| code to display weights at the end */ +@y +@d show_final_stats 64 /* |vbose| code to display item stats at the end */ +@z +@x + if (vbose&show_final_weights) { + fprintf(stderr,"Final weights:\n"); + print_weights(); + } +@y + if (vbose&show_final_stats) { + fprintf(stderr,"Final primary item stats:\n"); + print_item_stats(); + } +@z +@x +`\.w$\langle\,$float$\,\rangle$' is the initial increment |dw| added to +an item's weight (default 1.0); +\item{$\bullet$} +`\.W$\langle\,$float$\,\rangle$' is the factor by which |dw| changes +dynamically (default 1.0); +@y +@z +@x +case 'w': k|=(sscanf(argv[j]+1,""O"f",&dw)-1);@+break; +case 'W': k|=(sscanf(argv[j]+1,""O"f",&dwfactor)-1);@+break; +@y +@z +@x + " [c] [C] [l] [t] [T] [w] [W] [S] < foo.dlx\n", +@y + " [c] [C] [l] [t] [T] [S] < foo.dlx\n", +@z +@x +A primary item $x$ also has a |wt| field, |set[x-5]|, initially~1. +The weight is increased by |dw| whenever we backtrack because |x| +cannot be covered. (Weights aren't actually {\it used} in the present +program; that will come in extensions to be written later. But it will +be convenient to have space ready for them in our data structures, +so that those extensions will be easy to write.) +@y +A primary item $x$ also has two special fields called +|assigns| and |failrate|, used in the {\mc FRB} heuristic. +Their significance is described below. +@z +@x +@d wt(x) set[(x)-7].f /* the current floating-point ``weight'' of |x| */ +@d primextra 7 /* this many extra entries of |set| for each primary item */ +@d secondextra 4 /* and this many for each secondary item */ +@d maxextra 7 /* maximum of |primextra| and |secondextra| */ +@y +@d assigns(x) set[(x)-7].f /* number of assignments tried so far for |x| */ +@d failrate(x) set[(x)-8].f /* the current ``failure rate'' of |x| */ +@d primextra 8 /* this many extra entries of |set| for each primary item */ +@d secondextra 4 /* and this many for each secondary item */ +@d maxextra 8 /* maximum of |primextra| and |secondextra| */ +@z +@x + fprintf(stderr," ("O"d of "O"d), length "O"d, weight "O".1f:\n", + pos(c)+1,active,size(c),wt(c)); +@y + fprintf(stderr, + " ("O"d of "O"d), length "O"d, failrate "O".1f of "O"g:\n", + pos(c)+1,active,size(c),failrate(c),assigns(c)); +@z +@x + o,wt(j)=w0; +@y + oo,assigns(j)=1.0,failrate(j)=0.5; +@z +@x + if (!include_option(cur_choice)) goto tryagain; + @;@+@; + goto forward; +@y + if (!include_option(cur_choice)) goto abort; + @; + @;@+@; + goto forward; +abort:@+@; +@z +@x +@ If a weight becomes dangerously large, we rescale all the weights. + +(That will happen only when |dwfactor| isn't 1.0. Adding a constant +eventually ``converges'': For example, if the constant is 1, we have convergence +to $2^{17}$ after $2^{17}-1=16777215$ steps. +If the constant~|dw| is .250001, convergence +to \.{8.38861e+06} occurs after 25165819 steps!) + +(Note: I threw in the parameters |dw| and |dwfactor| only to do experiments. +My preliminary experiments didn't turn up any noteworthy results. +But I didn't have time to do a careful study; hence there might +be some settings that work unexpectedly well. The code for rescaling +might be flaky, since it hasn't been tested very thoroughly at all.) + +@d dangerous 1e32f +@d wmin 1e-30f + +@= +cmems+=2,oo,wt(tough_itm)+=dw; +if (vbose&show_record_weights && wt(tough_itm)>maxwt) { + maxwt=wt(tough_itm); + fprintf(stderr,""O"8.1f ",maxwt); + print_item_name(tough_itm,stderr); + fprintf(stderr," "O"lld\n",nodes); +} +if (vbose&show_weight_bumps) { + print_item_name(tough_itm,stderr); + fprintf(stderr," wt "O".1f\n",wt(tough_itm)); +} +dw*=dwfactor; +if (wt(tough_itm)>=dangerous) { + register int k; + register float t; + tmems=mems; + for (k=0;k= +void print_weights(void) { + register int k; + for (k=0;k +@^Yin, Minghao@> +@^Li, Zhanshan@> +After the first assignment, |assigns(i)| will be~2.0, and +|failrate(i)| will be either 0.75 or 0.25, depending on whether +or not that assignment led to failure. After $k$ assignments, +the possible values of |failrate(i)| are $1/(2k+2)$, +$3/(2k+2)$, \dots,~$(2k+1)/(2k+2)$. + +@= +oo,assigns(best_itm)+=1.0; +oo,failrate(best_itm)-=failrate(best_itm)/assigns(best_itm); + +@ @= +oo,assigns(best_itm)+=1.0; +oo,failrate(best_itm)+=(1.0-failrate(best_itm))/assigns(best_itm); + +@ @= +void print_item_stats(void) { + register int k; + for (k=0;k= +{ + score=inf_size,tmems=mems; +@y +@d inf_size 0x7fffffff +@d dangerous 1e32f +@d infty 2e32f /* twice |dangerous| */ + +@= +{ + register float fscore,tscore,w; + score=inf_size,tmems=mems,fscore=infty; +@z +@x + for (k=0;kbound(item[k])) s=bound(item[k]); + o,t=size(item[k])+s-bound(item[k])+1; + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + if (bound(item[k])!=1 || s!=0) { + fprintf(stderr, "("O"d:"O"d,"O"d)", + bound(item[k])-s,bound(item[k]),t); + }@+else fprintf(stderr,"("O"d)",t); + } + if (t==1) + for (i=bound(item[k])-slack(item[k]);i>0;i--) o,force[forced++]=item[k]; + else if (t<=score && (t=best_l && (size(item[k])>best_l || + (item[k]bound(item[k])) s=bound(item[k]); + o,t=size(item[k])+s-bound(item[k])+1; + if (t==1) + for (i=bound(item[k])-slack(item[k]);i>0;i--) o,force[forced++]=item[k]; + else { + o,w=failrate(item[k]); + tscore=t/w; + if (tscore>=infty) tscore=dangerous; + if (tscore<=fscore && (tscore=best_l && (size(item[k])>best_l || + (item[k]=maxl-show_choices_gap) { + print_item_name(item[k],stderr);@+ + if (t==1) fprintf(stderr,"(1)"); + else { + if (bound(item[k])!=1 || s!=0) { + fprintf(stderr, "*("O"d:"O"d,"O"d,"O".1f)", + bound(item[k])-s,bound(item[k]),t,w); + }@+else fprintf(stderr,"("O"d,"O".1f)",t,w); + } + } + } +@z +@x + fprintf(stderr,"("O"d)\n",score); +@y + fprintf(stderr,"("O"d), score "O".4f\n",score,fscore); +@z diff --git a/programs-orig/ssmcc-wtd.ch b/programs-orig/ssmcc-wtd.ch new file mode 100644 index 0000000..92c1c74 --- /dev/null +++ b/programs-orig/ssmcc-wtd.ch @@ -0,0 +1,97 @@ +@x an experimental change file for SSMCC, based on SSXCC-WTD +I suggest that you read {\mc SSXCC}, {\mc SSXCC-BINARY} and {\mc DLX3} first. +@y +I suggest that you read {\mc SSXCC}, {\mc SSXCC-BINARY} and {\mc DLX3} first. + +The heuristic implemented in this version is motivated by the paper +of Boussemart, Hemery, Lecoutre, and Sais in {\sl Proc.\ 16th +European Conference on Artificial Intelligence} (2004), 146--150: We increase +the weight of a primary item when its current set of options becomes null. +Items are chosen for branching based on the size of their set divided by their +current weight, unless the choice is forced. +@^Boussemart, Fr\'ed\'eric@> +@^H\'emery, Fred@> +@^Lecoutre, Christophe@> +@^Sa{\"\i}s, Lakhdar@> +@z +@x + if (!include_option(cur_choice)) goto tryagain; + @;@+@; + goto forward; +@y + if (!include_option(cur_choice)) goto abort; + @;@+@; + goto forward; +abort:@+@; +@z +@x +@d inf_size 0x7fffffff + +@= +{ + score=inf_size,tmems=mems; +@y +@d inf_size 0x7fffffff +@d infty 2e32f /* twice |dangerous| */ + +@= +{ + register float fscore,tscore,w; + score=inf_size,tmems=mems,fscore=infty; +@z +@x + for (k=0;kbound(item[k])) s=bound(item[k]); + o,t=size(item[k])+s-bound(item[k])+1; + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + if (bound(item[k])!=1 || s!=0) { + fprintf(stderr, "("O"d:"O"d,"O"d)", + bound(item[k])-s,bound(item[k]),t); + }@+else fprintf(stderr,"("O"d)",t); + } + if (t==1) + for (i=bound(item[k])-slack(item[k]);i>0;i--) o,force[forced++]=item[k]; + else if (t<=score && (t=best_l && (size(item[k])>best_l || + (item[k]bound(item[k])) s=bound(item[k]); + o,t=size(item[k])+s-bound(item[k])+1; + if (t==1) + for (i=bound(item[k])-slack(item[k]);i>0;i--) o,force[forced++]=item[k]; + else { + o,w=wt(item[k]); + tscore=t/w; + if (tscore>=infty) tscore=dangerous; + if (tscore<=fscore && (tscore=best_l && (size(item[k])>best_l || + (item[k]=maxl-show_choices_gap) { + print_item_name(item[k],stderr);@+ + if (t==1) fprintf(stderr,"(1)"); + else { + if (bound(item[k])!=1 || s!=0) { + fprintf(stderr, "*("O"d:"O"d,"O"d,"O".1f)", + bound(item[k])-s,bound(item[k]),t,w); + }@+else fprintf(stderr,"("O"d,"O".1f)",t,w); + } + } + } +@z +@x + fprintf(stderr,"("O"d)\n",score); +@y + fprintf(stderr,"("O"d), score "O".4f\n",score,fscore); +@z diff --git a/programs-orig/ssmcc.w b/programs-orig/ssmcc.w new file mode 100644 index 0000000..4c07b61 --- /dev/null +++ b/programs-orig/ssmcc.w @@ -0,0 +1,1333 @@ +@s mod and +\let\Xmod=\bmod % this is CWEB magic for using "mod" instead of "%" +\font\boldital=cmbxti10 + +\datethis +@*Intro. This is a modification of the program {\mc SSXCC-BINARY}, +extending it to take into account multiplicities for the items. For +the multiplicities, the input format is the same as the one defined in the +program {\mc DLX3}. The extensions were introduced by Filip Stappers +in 2023. + +This program is another experiment in the use of so-called sparse-set data +structures instead of the dancing links. It is written as if living on a +planet where the sparse-set ideas are well known, but doubly linked links are +almost unheard-of. + +I suggest that you read {\mc SSXCC}, {\mc SSXCC-BINARY} and {\mc DLX3} first. + +After this program finds all solutions, it normally prints their total +number on |stderr|, together with statistics about how many +nodes were in the search tree, and how many ``updates'' were made. +The running time in ``mems'' is also reported, together with the approximate +number of bytes needed for data storage. +(An ``update'' is the removal of an option from the list of one its items.) +One ``mem'' essentially means a memory access to a 64-bit word. +The reported totals don't include the time or space needed to parse the +input or to format the output.) + +@d o mems++ /* count one mem */ +@d oo mems+=2 /* count two mems */ +@d ooo mems+=3 /* count three mems */ +@d subroutine_overhead mems+=4 +@d O "%" /* used for percent signs in format strings */ +@d mod % /* used for percent signs denoting remainder in \CEE/ */ +@# +@d max_stage 500 /* at most this many options in a solution */ +@d max_level 32000 /* at most this many levels in the search tree */ +@d max_cols 100000 /* at most this many items */ +@d max_nodes 10000000 /* at most this many nonzero elements in the matrix */ +@d savesize 10000000 /* at most this many entries on |savestack| */ +@d bufsize (9*max_cols+3) /* a buffer big enough to hold all item names */ +@# +@d show_basics 1 /* |vbose| code for basic stats; this is the default */ +@d show_choices 2 /* |vbose| code for backtrack logging */ +@d show_details 4 /* |vbose| code for further commentary */ +@d show_record_weights 16 /* |vbose| code for first time a weight appears */ +@d show_weight_bumps 32 /* |vbose| code to show new weights */ +@d show_final_weights 64 /* |vbose| code to display weights at the end */ +@d show_profile 128 /* |vbose| code to show the search tree profile */ +@d show_full_state 256 /* |vbose| code for complete state reports */ +@d show_tots 512 /* |vbose| code for reporting item totals at start */ +@d show_warnings 1024 /* |vbose| code for reporting options without primaries */ +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ + +@ Here is the overall structure: + +@c +#include +#include +#include +#include +#include "gb_flip.h" +typedef unsigned int uint; /* a convenient abbreviation */ +typedef unsigned long long ullng; /* ditto */ +@; +@; +@; +int main (int argc, char *argv[]) { + register int c,cc,i,j,k,p,pp,q,r,s,t,cur_choice,cur_node,best_itm,istage,score,best_s,best_l; + @; + @; + @; + if (vbose&show_basics) + @; + if (vbose&show_tots) + @; + imems=mems, mems=0; + if (baditem) @@; + else { + if (randomizing) @; + @; + } +done:@+if (vbose&show_profile) @; + if (vbose&show_final_weights) { + fprintf(stderr,"Final weights:\n"); + print_weights(); + } + if (vbose&show_max_deg) + fprintf(stderr,"The maximum best_itm size was "O"d.\n",maxdeg); + if (vbose&show_basics) { + fprintf(stderr,"Altogether "O"llu solution"O"s, "O"llu+"O"llu mems,", + count,count==1?"":"s",imems,mems); + bytes=(itemlength+setlength)*sizeof(int)+last_node*sizeof(node) + +2*maxl*sizeof(int)+maxsaveptr*sizeof(threeints); + fprintf(stderr," "O"llu updates, "O"llu bytes, "O"llu nodes,", + updates,bytes,nodes); + fprintf(stderr," ccost "O"lld%%.\n", + mems? (200*cmems+mems)/(2*mems):0); + } + if (sanity_checking) fprintf(stderr,"sanity_checking was on!\n"); + @; +} + +@ You can control the amount of output, as well as certain properties +of the algorithm, by specifying options on the command line: +\smallskip\item{$\bullet$} +`\.v$\langle\,$integer$\,\rangle$' enables or disables various kinds of verbose + output on |stderr|, given by binary codes such as |show_choices|; +\item{$\bullet$} +`\.m$\langle\,$integer$\,\rangle$' causes every $m$th solution +to be output (the default is \.{m0}, which merely counts them); +\item{$\bullet$} +`\.s$\langle\,$integer$\,\rangle$' causes the algorithm to randomize +the initial list of items (thus providing some variety, although +the solutions are by no means uniformly random); +\item{$\bullet$} +`\.d$\langle\,$integer$\,\rangle$' sets |delta|, which causes periodic +state reports on |stderr| after the algorithm has performed approximately +|delta| mems since the previous report (default 10000000000); +\item{$\bullet$} +`\.c$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.C$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown in the periodic state reports (default 10); +\item{$\bullet$} +`\.l$\langle\,$nonnegative integer$\,\rangle$' gives a {\it lower\/} limit, +relative to the maximum level so far achieved, to the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.t$\langle\,$positive integer$\,\rangle$' causes the program to +stop after this many solutions have been found; +\item{$\bullet$} +`\.T$\langle\,$integer$\,\rangle$' sets |timeout| (which causes abrupt +termination if |mems>timeout| at the beginning of a level); +\item{$\bullet$} +`\.w$\langle\,$float$\,\rangle$' is the initial increment |dw| added to +an item's weight (default 1.0); +\item{$\bullet$} +`\.W$\langle\,$float$\,\rangle$' is the factor by which |dw| changes +dynamically (default 1.0); +\item{$\bullet$} +`\.S$\langle\,$filename$\,\rangle$' to output a ``shape file'' that encodes +the search tree. + +@= +int random_seed=0; /* seed for the random words of |gb_rand| */ +int randomizing; /* has `\.s' been specified? */ +int vbose=show_basics+show_warnings; /* level of verbosity */ +int spacing; /* solution $k$ is output if $k$ is a multiple of |spacing| */ +int show_choices_max=1000000; /* above this level, |show_choices| is ignored */ +int show_choices_gap=1000000; /* below level |maxl-show_choices_gap|, + |show_details| is ignored */ +int show_levels_max=10; /* above this level, state reports stop */ +int maxl; /* maximum level actually reached */ +int maxs; /* maximum stage actually reached */ +int maxsaveptr; /* maximum size of |savestack| */ +char buf[bufsize]; /* input buffer */ +ullng count; /* solutions found so far */ +ullng options; /* options seen so far */ +ullng imems,mems,tmems,cmems; /* mem counts */ +ullng updates; /* update counts */ +ullng bytes; /* memory used by main data structures */ +ullng nodes; /* total number of branch nodes initiated */ +ullng thresh=10000000000; /* report when |mems| exceeds this, if |delta!=0| */ +ullng delta=10000000000; /* report every |delta| or so mems */ +ullng maxcount=0xffffffffffffffff; /* stop after finding this many solutions */ +ullng timeout=0x1fffffffffffffff; /* give up after this many mems */ +float w0=1.0,dw=1.0,dwfactor=1.0; /* initial weight, increment, and growth */ +float maxwt=1.0; /* largest weight seen so far */ +FILE *shape_file; /* file for optional output of search tree shape */ +char *shape_name; /* its name */ +int maxdeg; /* the largest branching degree seen so far */ + +@ If an option appears more than once on the command line, the first +appearance takes precedence. + +@= +for (j=argc-1,k=0;j;j--) switch (argv[j][0]) { +case 'v': k|=(sscanf(argv[j]+1,""O"d",&vbose)-1);@+break; +case 'm': k|=(sscanf(argv[j]+1,""O"d",&spacing)-1);@+break; +case 's': k|=(sscanf(argv[j]+1,""O"d",&random_seed)-1),randomizing=1;@+break; +case 'd': k|=(sscanf(argv[j]+1,""O"lld",&delta)-1),thresh=delta;@+break; +case 'c': k|=(sscanf(argv[j]+1,""O"d",&show_choices_max)-1);@+break; +case 'C': k|=(sscanf(argv[j]+1,""O"d",&show_levels_max)-1);@+break; +case 'l': k|=(sscanf(argv[j]+1,""O"d",&show_choices_gap)-1);@+break; +case 't': k|=(sscanf(argv[j]+1,""O"lld",&maxcount)-1);@+break; +case 'T': k|=(sscanf(argv[j]+1,""O"lld",&timeout)-1);@+break; +case 'w': k|=(sscanf(argv[j]+1,""O"f",&dw)-1);@+break; +case 'W': k|=(sscanf(argv[j]+1,""O"f",&dwfactor)-1);@+break; +case 'S': shape_name=argv[j]+1, shape_file=fopen(shape_name,"w"); + if (!shape_file) + fprintf(stderr,"Sorry, I can't open file `"O"s' for writing!\n", + shape_name); + break; +default: k=1; /* unrecognized command-line option */ +} +if (k) { + fprintf(stderr, "Usage: "O"s [v] [m] [s] [d]" + " [c] [C] [l] [t] [T] [w] [W] [S] < foo.dlx\n", + argv[0]); + exit(-1); +} +if (randomizing) gb_init_rand(random_seed); + +@ @= +if (shape_file) fclose(shape_file); + +@ Here's a subroutine that I hope is never invoked (except maybe +when I'm debugging). + +@= +void confusion(char*m) { + fprintf(stderr,""O"s!\n",m); +} + +@*Data structures. +Sparse-set data structures were introduced by Preston Briggs +and Linda Torczon [{\sl ACM Letters on Programming Languages and Systems\/ +\bf2} (1993), 59--69], who realized that exercise 2.12 in +Aho, Hopcroft, and Ullman's classic text {\sl The Design and Analysis +of Computer Algorithms\/} (Addison--Wesley, 1974) was much more than +just a slick trick to avoid initializing an array. +(Indeed, {\sl TAOCP\/} exercise 2.2.6--24 calls it the ``sparse array trick.'') + +The basic idea is amazingly simple, when specialized to the situations +that we need to deal with: We can represent a subset~$S$ of the universe +$U=\{x_0,x_1,\ldots,x_{n-1}\}$ by maintaining two $n$-element arrays +$p$ and $q$, each of which is a permutation of~$\{0,1,\ldots,n-1\}$, +together with an integer $s$ in the range $0\le s\le n$. In fact, $p$ is +the {\it inverse\/} of~$q$; and $s$ is the number of elements of~$S$. +The current value of the set $S$ is then simply +$\{x_{p_0},\ldots,x_{p_{s-1}}\}$. (Notice that every $s$-element subset can be +represented in $s!\,(n-s)!$ ways.) + +It's easy to test if $x_k\in S$, because that's true if and only if $q_k= +typedef struct node_struct { + int itm; /* the item |x| corresponding to this node */ + int loc; /* where this node resides in |x|'s active set */ + int clr; /* color associated with item |x| in this option, if any */ + int spr; /* a spare field inserted only to maintain 16-byte alignment */ +} node; +typedef union { + int i; /* an integer (32 bits) */ + float f; /* a floating point value (fits in 4 bytes) */ +} tetrabyte; + +@ @= +node nd[max_nodes]; /* the master list of nodes */ +int last_node; /* the first node in |nd| that's not yet used */ +int item[max_cols]; /* the master list of items */ +int second=max_cols; /* boundary between primary and secondary items */ +int last_itm; /* items seen so far during input, plus 1 */ +tetrabyte set[max_nodes+maxextra*max_cols]; /* active options for active items */ +int itemlength; /* number of elements used in |item| */ +int setlength; /* number of elements used in |set| */ +int active; /* current number of active items */ +int oactive; /* value of active before swapping out current-choice items */ +int baditem; /* an item with no options, plus 1 */ +int osecond; /* setting of |second| just after initial input */ +int force[max_cols]; /* stack of items known to have |size=bound-slack| */ +int forced; /* the number of items on that stack */ + +@ We're going to store string data (an item's name) in the midst of +the integer array |set|. So we've got to do some type coercion using +low-level \CEE/-ness. + +@= +typedef struct { + int l,r; +} twoints; +typedef struct { + int l,s,b; +} threeints; +typedef union { + unsigned char str[8]; /* eight one-byte characters */ + twoints lr; /* two four-byte integers */ +} stringbuf; +stringbuf namebuf; + +@ @= +void print_item_name(int k,FILE *stream) { + namebuf.lr.l=lname(k),namebuf.lr.r=rname(k); + fprintf(stream," "O".8s",namebuf.str); +} + +@ An option is identified not by name but by the names of the items it contains. +Here is a routine that prints an option, given a pointer to any of its +nodes. It also prints the position of the option in its item list. + +@= +void print_option(int p,FILE *stream,int showpos) { + register int k,q,x; + x=nd[p].itm; + if (p>=last_node || x<=0) { + fprintf(stderr,"Illegal option "O"d!\n",p); + return; + } + for (q=p;;) { + print_item_name(x,stream); + if (nd[q].clr) + fprintf(stream,":"O"c",nd[q].clr); + q++; + x=nd[q].itm; + if (x<0) q+=x,x=nd[q].itm; + if (q==p) break; + } + k=nd[q].loc; + if (showpos>0) fprintf(stream," ("O"d of "O"d)\n",k-x+1,size(x)); + else if (showpos==0) fprintf(stream,"\n"); +} +@# +void prow(int p) { + print_option(p,stderr,1); +} + +@ When I'm debugging, I might want to look at one of the current item lists. + +@= +void print_itm(int c) { + register int p; + if (c=setlength || + pos(c)<0 || pos(c)>=itemlength || item[pos(c)]!=c) { + fprintf(stderr,"Illegal item "O"d!\n",c); + return; + } + fprintf(stderr,"Item ("O"d)", c); + print_item_name(c,stderr); + if (c=active) + fprintf(stderr," (secondary "O"d, purified), length "O"d:\n", + pos(c)+1,size(c)); + else fprintf(stderr," (secondary "O"d), length "O"d:\n", + pos(c)+1,size(c)); + for (p=c;p= +void sanity() { + register int k,x,i,l,r,q,qq; + int ok = 1; + for (k=0;kr) fprintf(stderr,"itm>loc in node "O"d!\n",i); + else { + if (set[r].i!=i) { + fprintf(stderr,"Bad loc field for option "O"d of item",r-l+1); + print_item_name(l,stderr); + fprintf(stderr," in node "O"d!, set[r].i="O"d\n",i, set[r].i); + ok=0; + } + if (pos(l)= +while (1) { + + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Input line way too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + last_itm=1; + break; +} +if (!last_itm) panic("No items"); +for (;o,buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + @; + oo,lname(last_itm*ipropcount)=namebuf.lr.l, + rname(last_itm*ipropcount)=namebuf.lr.r; + o,slack(last_itm*ipropcount)=q-r,bound(last_itm*ipropcount)=q; + last_itm++; + if (last_itm>max_cols) panic("Too many items"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + if (buf[p]=='|') { + if (second!=max_cols) panic("Item name line contains | twice"); + second=last_itm; + for (p++;o,isspace(buf[p]);p++) ; + } +} + +@ @= +if (second==max_cols) istage=0;@+else istage=2; +start_name:@+for (j=0;j<8 && (o,!isspace(buf[p+j]));j++) { + if (buf[p+j]==':') { + if (istage) panic("Illegal `:' in item name"); + @; + r=q,istage=1; + goto start_name; + }@+else if (buf[p+j]=='|') { + if (istage>1) panic("Illegal `|' in item name"); + @; + if (q==0) panic("Upper bound is zero"); + if (istage==0) r=q; + else if (r>q) panic("Lower bound exceeds upper bound"); + istage=2; + goto start_name; + } + o,namebuf.str[j]=buf[p+j]; + } + switch (istage) { +case 1: panic("Lower bound without upper bound"); +case 0: q=r=1; +case 2: break; + } + if (j==0) panic("Item name empty"); + if (j==8 && !isspace(buf[p+j])) panic("Item name too long"); + @; + +@ @= +for (q=0,pp=p;pp'9') panic("Illegal digit in bound spec"); + q=10*q+buf[pp]-'0'; +} +p=pp+1; +while (j) namebuf.str[--j]=0; + +@ @= +for (k=last_itm-1;k;k--) { + if (o,lname(k*ipropcount)!=namebuf.lr.l) continue; + if (rname(k*ipropcount)==namebuf.lr.r) break; +} +if (k) panic("Duplicate item name"); + +@ I'm putting the option number into the |spr| field of the +spacer that follows it, as a +possible debugging aid. But the program doesn't currently use that information. + +@= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Option line too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + i=last_node; /* remember the spacer at the left of this option */ + for (pp=0;buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j])) && buf[p+j]!=':';j++) + o,namebuf.str[j]=buf[p+j]; + if (!j) panic("Empty item name"); + if (j==8 && !isspace(buf[p+j]) && buf[p+j]!=':') + panic("Item name too long"); + @; + if (buf[p+j]!=':') o,nd[last_node].clr=0; + else if (k>=second) { + if ((o,isspace(buf[p+j+1])) || (o,!isspace(buf[p+j+2]))) + panic("Color must be a single character"); + o,nd[last_node].clr=(unsigned char)buf[p+j+1]; + p+=2; + }@+else panic("Primary item must be uncolored"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + } + if (!pp) { + if (vbose&show_warnings) + fprintf(stderr,"Option ignored (no primary items): "O"s",buf); + while (last_node>i) { + @; + last_node--; + } + }@+else { + o,nd[i].loc=last_node-i; /* complete the previous spacer */ + last_node++; /* create the next spacer */ + if (last_node==max_nodes) panic("Too many nodes"); + options++; + o,nd[last_node].itm=i+1-last_node; + nd[last_node].spr=options; /* option number, for debugging only */ + } +} +@; +@; +@; +@; + +@ We temporarily use |pos| to recognize duplicate items in an option. + +@= +for (k=(last_itm-1)*ipropcount;k>=0;k-=ipropcount) { + if (o,lname(k)!=namebuf.lr.l) continue; + if (rname(k)==namebuf.lr.r) break; +} +if (!k) panic("Unknown item name"); +if (o,pos(k)>i) panic("Duplicate item name in this option"); +last_node++; +if (last_node==max_nodes) panic("Too many nodes"); +o,t=size(k); /* how many previous options have used this item? */ +o,nd[last_node].itm=k/ipropcount,nd[last_node].loc=t; +if ((k/ipropcount)= +o,k=nd[last_node].itm*ipropcount; +oo,size(k)--,pos(k)=i-1; + +@ @= +active=itemlength=last_itm-1; +for (k=0,j=primextra;k= +for (;k;k--) { + o,j=item[k-1]; + if (k==second) second=j; /* |second| is now an index into |set| */ + oo,size(j)=size(k*ipropcount); + o,pos(j)=k-1; + oo,rname(j)=rname(k*ipropcount),lname(j)=lname(k*ipropcount); + oo,slack(j)=slack(k*ipropcount),bound(j)=bound(k*ipropcount); + if (k<=osecond) { + o,wt(j)=w0; + if (size(j)= +for (k=1;k= +{ + if (vbose&show_choices) { + k=item[baditem-1]; + fprintf(stderr,"Item"); + print_item_name(k,stderr); + fprintf(stderr," has fewer than "O"d options!\n",bound(k)-slack(k)); + } +} + +@ @= +while (forced) { + o,j=force[--forced]; + if (vbose&show_details) { + fprintf(stderr,"Deactivating optionless item"); + print_item_name(j,stderr); + fprintf(stderr,"\n"); + } + oo,i=item[--active],pp=pos(j); + oo,item[active]=j,item[pp]=i; + oo,pos(j)=active,pos(i)=pp; +} + +@ The ``number of entries'' includes spacers (because {\mc DLX2} +includes spacers in its reports). If you want to know the +sum of the option lengths, just subtract the number of options. + +@= +fprintf(stderr, + "("O"lld options, "O"d+"O"d items, "O"d entries successfully read)\n", + options,osecond,itemlength-osecond,last_node); + +@ The item lengths after input are shown (on request). +But there's little use trying to show them after the process is done, +since they are restored somewhat blindly. +(Failures of the linked-list implementation in {\mc DLX2} could sometimes be +detected by showing the final lengths; but that reasoning no longer applies.) + +@= +{ + fprintf(stderr,"Item totals:"); + for (k=0;k= +for (k=active;k>1;) { + mems+=4,j=gb_unif_rand(k); + k--; + oo,oo,t=item[j],item[j]=item[k],item[k]=t; + oo,pos(t)=k,pos(item[j])=j; +} + +@*Binary branching versus $d$-way branching. +Nodes of the search tree in the previous program {\mc SSXCC}, on which +this one is based, are characterized by the name of a primary item~$i$ +that hasn't yet been covered. If that item currently appears in $d$~options +$\{o_1,\ldots,o_d\}$, node~$i$ has $d$ children, one for each choice of +the option that will cover~$i$. + +The present program, however, makes 2-way branches, and its nodes +are labeled with both an item~$i$ and an option~$o$. The left child of +node $(i,o)$ represents the subproblem in which $i$ is covered by~$o$, +as before. But the right child represents the subproblem for which +option~$o$ is removed but item~$i$ is still uncovered (unless $d=1$, +in which case there's no right child). Thus our search tree is now rather +like the binary tree that represents a general tree. (See {\sl The +Art of Computer Programming}, Section 2.3.2.) + +There usually is no good reason to do binary branching when we choose~$i$ +so as to minimize~$d$. On the right branch, $i$ will have $d-1$ remaining +options; and no item~$i'$ will have fewer than $d-1$. + +But this program is intended to provide the basis for {\it other\/} +programs, which extend the branching heuristic by taking dynamic +characteristics of the solution process into account. +While exploring the left branch in such extensions, we might discover +that a certain item~$i'$ is difficult to cover; hence we might prefer to +branch on an option $o'$ that covers~$i'$, after rejecting $o$ for item~$i$. + +@ We shall say that we're in stage $s$ when we've taken $s$ left branches. +We'll also say, as usual, that we're at level~$l$ when we've taken +$l$ branches altogether. + +Suppose, for instance, that we're at level 5, having +rejected $o_1$ for~$i_1$, +accepted $o_2$ for~$i_2$, +accepted $o_3$ for~$i_3$, +rejected $o_4$ for~$i_4$, and +rejected $o_5$ for~$i_5$. Then we will have |stage=2|, and +$|choice[k]|=o_k$ for $0\le k<5$; here each $o_k$ is a node whose |itm| field +is~$i_k$. Also +$$\eqalign{ +|stagelevel|[0]&=0,\cr +|stagelevel|[1]&=0,\cr +|stagelevel|[2]&=1,\cr +|stagelevel|[3]&=2,\cr +|stagelevel|[4]&=2,\cr +|stagelevel|[5]&=2;\cr} +\qquad\eqalign{ +|levelstage|[0]&=1,\cr +|levelstage|[1]&=2,\cr +|levelstage|[2]&=5.\cr}$$ +The option |choice[k]| has been accepted if and only if +|levelstage[stagelevel[k]]=k|. + +@= +int stage; /* number of choices in current partial solution */ +int level; /* current depth in the search tree (which is binary) */ +int choice[max_level]; /* the option and item chosen on each level */ +int deg[max_level]; /* the number of options the item had at that time */ +int levelstage[max_stage]; /* the most recent level at each stage */ +int stagelevel[max_level]; /* the stage that corresponds to each level */ +ullng profile[max_stage]; /* number of search tree nodes on each stage */ + +@*The dancing. +Our strategy for generating all exact covers will be to repeatedly +choose an active primary item and to branch on the ways to reduce +the possibilities for covering that item. +And we explore all possibilities via depth-first search. + +The neat part of this algorithm is the way the sets are maintained. +Depth-first search means last-in-first-out maintenance of data structures; +and the sparse-set representations make it particularly easy to undo +what we've done at deeper levels. + +The basic operation is ``including an option.'' That means (i)~removing +from the current subproblem all of the other options with which it conflicts, +and (ii)~considering all of its primary items to have their bounds decreased by 1. +If this would make the bound of an item 0, we can make that item inactive. + +@= +{ + level=stage=0; +forward: nodes++; + if (vbose&show_profile) profile[stage]++; + if (sanity_checking) sanity(); + @; + @; + @; + if (forced) { + o,best_itm = force[--forced]; + @; + } + if (score==inf_size) @; + @; +advance: oo,choice[level]=cur_choice=set[best_itm].i; + o,deg[level]=score; + if (!include_option(cur_choice)) goto tryagain; + @;@+@; + goto forward; +tryagain:@+if (score==1) goto prebackup; + if (vbose&show_choices) + fprintf(stderr,"Backtracking in stage "O"d\n",stage); + goto purgeit; +prebackup: o,saveptr=saved[stage]; +backup:@+if (--stage<0) goto done; + if (vbose&show_choices) + fprintf(stderr,"Backtracking to stage "O"d\n",stage); + o,level=levelstage[stage]; +purgeit:@+if (o,deg[level]==1) goto prebackup; + @; + o,cur_choice=choice[level]; + @; + @; + goto forward; +} + +@ We save the sizes of active items on |savestack|, whose entries +have two fields |l| and |r|, for an item and its size. This stack +makes it easy to undo all deletions, by simply restoring the former sizes +and bounds. + +@= +int level; /* number of choices in current partial solution */ +int choice[max_level]; /* the node chosen on each level */ +int saved[max_level+1]; /* size of |savestack| on each level */ +threeints savestack[savesize]; +int saveptr; /* current size of |savestack| */ +int tough_itm; /* item whose set of options has just become empty */ + +@ @= +if (delta && (mems>=thresh)) { + thresh+=delta; + if (vbose&show_full_state) print_state(); + else print_progress(); +} +if (mems>=timeout) { + fprintf(stderr,"TIMEOUT!\n");@+goto done; +} + +@ @= +if (++stage>maxs) { + if (stage>=max_stage) { + fprintf(stderr,"Too many stages!\n"); + exit(-40); + } + maxs=stage; +} + +@ @= +if (++level>maxl) { + if (level>=max_level) { + fprintf(stderr,"Too many levels!\n"); + exit(-4); + } + maxl=level; +} +oo,stagelevel[level]=stage,levelstage[stage]=level; + +@ The |include_option| routine extends the current partial solution, +by hiding option |opt|. In addition, it will cover any primary items in |opt| +if their bound after hiding |opt| becomes 0. The routine returns~0, however, if +that would make some other primary item uncoverable. (In the latter case, +|tough_itm| is set to the item that was problematic.) + +@= +int include_option(int opt) { + register int c,optp,nn,nnp,ss,ii,iii,p,pp,s; + subroutine_overhead; + if (vbose&show_choices) { + fprintf(stderr,"S"O"d:",stage); + print_option(opt,stderr,1); + } + for (;o,nd[opt-1].itm>0;opt--) ; /* move to the beginning of the option */ + for (;o,(ii=nd[opt].itm)>0;opt++) { + pp=nd[opt].loc; /* where |opt| appears in |ii|'s set */ + o,p=pos(ii); /* where |ii| appears in |item| */ + if (p>=active) { + if (ii>=second) continue; /* secondary item has been purified */ + confusion("active"); /* primary item of an active option must be active */ + } + @; + } + return 1; +} + +@ We need to remove the options that conflict with |opt| from the sets of their items. + +@= +{ + if (ii=second || bound(ii) == 0) { + o,ss=size(ii); + if (ii=ii;s--) if (s!=pp) { + o,optp=set[s].i; + if (c==0 || (o,nd[optp].clr!=c)) + @; + } + o,p=pos(ii); /* note that |pos(ii)| might have changed */ + o,iii=item[--active]; + oo,item[active]=ii,item[p]=iii; + oo,pos(ii)=active,pos(iii)=p; + }@+else@+{ + o,ss=size(ii)-1; + if (oo,ss < bound(ii)-slack(ii)) { + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + fprintf(stderr," can't cover"); + print_item_name(item[ii],stderr); + fprintf(stderr,"\n"); + } + tough_itm=ii; + forced=0; + return 0; /* abort the deletion, lest |ii| be wiped out */ + } + if (ss==0) { /* Just deactivate item |ii|, no hiding needed */ + o,iii=item[--active]; + oo,item[active]=ii,item[p]=iii; + oo,pos(ii)=active,pos(iii)=p; + }@+else@+{ + oo,nnp=set[ii+ss].i,size(ii)=ss; + oo,set[ii+ss].i=opt,set[pp].i=nnp; + oo,nd[opt].loc=ii+ss,nd[nnp].loc=pp; + updates++; + } + } +} + +@ At this point |optp| points to a node of an option that we want to +remove from the current subproblem. We swap it out of the sets of +all its items, except for the sets of inactive secondary items. (These have +been purified, and we shouldn't mess with their sets.) + +@= +{ + register int nn,ii,iii,p,pp,ss,nnp; + for (nn=optp;o,nd[nn-1].itm>0;nn--) ; /* move to beginning of the option */ + for (;o,(ii=nd[nn].itm)>0;nn++) { + p=nd[nn].loc; + if (p>=second && (o,pos(ii))>=active) continue; /* |ii| already purified */ + o,ss=size(ii)-1; + if (p=maxl-show_choices_gap) { + fprintf(stderr,"Can't cover"); + print_item_name(ii,stderr); + fprintf(stderr,", size="O"d, bound="O"d, slack="O"d, u="O"d\n", + ss,bound(ii), slack(ii), bound(ii)-slack(ii)); + } + tough_itm=ii; + forced=0; + return 0; /* abort the deletion, lest |ii| be wiped out */ + } + if (ss==0) { + o,iii=item[--active]; + o,pp = pos(ii); + if (vbose&show_details) { + fprintf(stderr, "Empty option list, deactivating"); + print_item_name(ii, stderr); + fprintf(stderr, "\n"); + } + oo,item[active]=ii,item[pp]=iii; + oo,pos(ii)=active,pos(iii)=pp; + } + } + if (ss > 0) { + o,nnp=set[ii+ss].i; + o,size(ii)=ss; + oo,set[ii+ss].i=nn,set[p].i=nnp; + oo,nd[nn].loc=ii+ss,nd[nnp].loc=p; + updates++; + } + } +} + +@ @= +{ + register int ii,iii,ss,p,nnp; + for (;o,nd[cur_choice-1].itm>0;cur_choice--) ; /* move to beginning */ + for (;o,(ii=nd[cur_choice].itm)>0;cur_choice++) { + p=nd[cur_choice].loc; + if (p>=second && (o,pos(ii))>=active) continue; /* |ii| inactive */ + o,ss=size(ii)-1; + if (p=maxl-show_choices_gap) { + fprintf(stderr," can't cover"); + print_item_name(item[ii],stderr); + fprintf(stderr,"\n"); + } + goto prebackup; + } + if (ss==0) { + o,iii=item[--active]; + o,pp=pos(ii); + if (vbose&show_details) { + fprintf(stderr, "Null move, deactivating"); + print_item_name(ii, stderr); + fprintf(stderr, "\n"); + } + oo,item[active]=ii,item[pp]=iii; + oo,pos(ii)=active,pos(iii)=pp; + } + } + if (ss > 0) { + oo,nnp=set[ii+ss].i,size(ii)=ss; + oo,set[ii+ss].i=cur_choice,set[p].i=nnp; + oo,nd[cur_choice].loc=ii+ss,nd[nnp].loc=p; + updates++; + } + } +} + +@ If a weight becomes dangerously large, we rescale all the weights. + +(That will happen only when |dwfactor| isn't 1.0. Adding a constant +eventually ``converges'': For example, if the constant is 1, we have convergence +to $2^{17}$ after $2^{17}-1=16777215$ steps. +If the constant~|dw| is .250001, convergence +to \.{8.38861e+06} occurs after 25165819 steps!) + +(Note: I threw in the parameters |dw| and |dwfactor| only to do experiments. +My preliminary experiments didn't turn up any noteworthy results. +But I didn't have time to do a careful study; hence there might +be some settings that work unexpectedly well. The code for rescaling +might be flaky, since it hasn't been tested very thoroughly at all.) + +@d dangerous 1e32f +@d wmin 1e-30f + +@= +cmems+=2,oo,wt(tough_itm)+=dw; +if (vbose&show_record_weights && wt(tough_itm)>maxwt) { + maxwt=wt(tough_itm); + fprintf(stderr,""O"8.1f ",maxwt); + print_item_name(tough_itm,stderr); + fprintf(stderr," "O"lld\n",nodes); +} +if (vbose&show_weight_bumps) { + print_item_name(tough_itm,stderr); + fprintf(stderr," wt "O".1f\n",wt(tough_itm)); +} +dw*=dwfactor; +if (wt(tough_itm)>=dangerous) { + register int k; + register float t; + tmems=mems; + for (k=0;k= +while (forced) { + o,best_itm=force[--forced]; + if (o,pos(best_itm)>=active) continue; + @; +} + +@ @= +void print_weights(void) { + register int k; + for (k=0;k=3|, +there are five, including the ``null'' choice. + +In general, the branching degree turns out to be $l+s-b+1$, where +$l$~is the length of the item, $b$ is the current bound, and +$s$ is the minimum of $b$ and the slack. This formula gives degree +$\le0$ if and only if |l| is too small to satisfy the item +constraint; in such cases we will backtrack immediately. +(It would have been possible to detect this condition early, +before updating all the data structures and increasing |level|. But that would +make the downdating process much more difficult and error-prone. Therefore +I wait to discover such anomalies until item-choosing time.) + +Let's assign the score |l+s-b+1| to each item. If two items have the +same score, I prefer the one with smaller |s|, because slack items +are less constrained. If two items with the same |s| have the same +score, I (counterintuitively) +prefer the one with larger~|b| (hence larger~|l|), because +that tends to reduce the size of the final search tree. + +Consider, for instance, the following example taken from {\mc MDANCE}: +If we want to choose 2 options from 4 in one item, and 3 options from 5 in another, +where all slacks are zero, and if the items are otherwise independent, +it turns out that the number of nodes per level if we choose the smaller +item first is $(1,3,6,6\cdot3,6\cdot6,6\cdot10)$. But if we choose +the larger item first it is $(1,3,6,10,10\cdot3,10\cdot6)$, which is +smaller in the middle levels. + +Notice that a secondary item is active if and only if it has not +been purified (that is, if and only if it hasn't yet appeared in +a chosen option). + +@d inf_size 0x7fffffff + +@= +{ + score=inf_size,tmems=mems; + if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); + for (k=0;kbound(item[k])) s=bound(item[k]); + o,t=size(item[k])+s-bound(item[k])+1; + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + if (bound(item[k])!=1 || s!=0) { + fprintf(stderr, "("O"d:"O"d,"O"d)", + bound(item[k])-s,bound(item[k]),t); + }@+else fprintf(stderr,"("O"d)",t); + } + if (t==1) + for (i=bound(item[k])-slack(item[k]);i>0;i--) o,force[forced++]=item[k]; + else if (t<=score && (t=best_l && (size(item[k])>best_l || + (item[k]=maxl-show_choices_gap) { + if (score==inf_size) fprintf(stderr," solution\n"); + else if (forced) { + fprintf(stderr, " found "O"d forced:", forced); + for (i=0;imaxdeg && score= +{ + count++; + if (spacing && (count mod spacing==0)) { + printf(""O"lld:\n",count); + for (k=0;k=maxcount) goto done; + goto backup; +} + +@ @= +o,saved[stage]=saveptr; +if (saveptr+active>maxsaveptr) { + if (saveptr+active>=savesize) { + fprintf(stderr,"Stack overflow (savesize="O"d)!\n",savesize); + exit(-5); + } + maxsaveptr=saveptr+active; +} +for (p=0;p= +o,active=saveptr-saved[stage]; +saveptr=saved[stage]; +for (p=0;p= +{ + if ((vbose&show_choices) && level= +void print_savestack(int start,int stop) { + int k; + for (k=start;k= +void print_state(void) { + register int l,s; + fprintf(stderr,"Current state (level "O"d):\n",level); + for (l=0;l=show_levels_max) { + fprintf(stderr," ...\n"); + break; + } + } + fprintf(stderr," "O"lld solutions, "O"lld mems, and max level "O"d so far.\n", + count,mems,maxl); +} + +@ During a long run, it's helpful to have some way to measure progress. +The following routine prints a string that indicates roughly where we +are in the search tree. The string consists of node degrees, +preceded by `\.{\char`\~}' if the node wasn't the current node in +its stage (that is, if the node represents an option that has already +been fully explored --- ``we've been there done that''). + +Following that string, a fractional estimate of total progress is computed, +based on the na{\"\i}ve assumption that the search tree has a uniform +branching structure. If the tree consists +of a single node, this estimate is~.5. Otherwise, if the first choice +is the $k$th choice in stage~0 and has degree~$d$, +the estimate is $(k-1)/(d+k-1)$ plus $1/(d+k-1)$ times the +recursively evaluated estimate for the $k$th subtree. (This estimate +might obviously be very misleading, in some cases, but at least it +tends to grow monotonically.) + +@= +void print_progress(void) { + register int l,ll,k,d,c,p,ds=0; + register double f,fd; + fprintf(stderr," after "O"lld mems: "O"lld sols,",mems,count); + for (f=0.0,fd=1.0,l=0;l=0 && stagelevel[ll]==stagelevel[l];k++,d++,ll--) ; + fd*=d,f+=(k-1)/fd; /* choice |l| is treated like |k| of |d| */ + } + if (l>=show_levels_max && !ds) ds=1,fprintf(stderr,"..."); + } + fprintf(stderr," "O".5f\n",f+0.5/fd); +} + +@ @= +{ + fprintf(stderr,"Profile:\n"); + for (k=0;k<=maxs;k++) + fprintf(stderr,""O"3d: "O"lld\n", + k,profile[k]); +} + +@*Index. diff --git a/programs-orig/ssxcc-binary.w b/programs-orig/ssxcc-binary.w new file mode 100644 index 0000000..36026ac --- /dev/null +++ b/programs-orig/ssxcc-binary.w @@ -0,0 +1,1166 @@ +@s mod and +\let\Xmod=\bmod % this is CWEB magic for using "mod" instead of "%" +\font\boldital=cmbxti10 + +\datethis +@*Intro. This program is an ``{\mc XCC} solver'' that I'm writing +as an experiment in the use of so-called sparse-set data structures +instead of the dancing links structures that I've played with for thirty years. +I plan to write it as if I live on a planet where the sparse-set +ideas are well known, but doubly linked links are almost unheard-of. + +The difference between this program and {\mc SSXCC}, on which it's based, +is that I use binary branching `$i=o$' versus `$i\ne o$' at each step, +where $i$ is an item and $o$ is an option, +while {\mc SSXCC} does $d$-way branching on all $d$ options that +currently cover item~$i$. The reason for binary branching is that +I plan to extend this program in various ways, in order to experiment +with several dynamic branching heuristics that I've seen in the literature; +those heuristics were designed with binary branching in mind. + +I suggest that you read {\mc SSXCC} first. + +After this program finds all solutions, it normally prints their total +number on |stderr|, together with statistics about how many +nodes were in the search tree, and how many ``updates'' were made. +The running time in ``mems'' is also reported, together with the approximate +number of bytes needed for data storage. +(An ``update'' is the removal of an option from its item list, +or the removal of a satisfied color constraint from its option. +One ``mem'' essentially means a memory access to a 64-bit word. +The reported totals don't include the time or space needed to parse the +input or to format the output.) + +@d o mems++ /* count one mem */ +@d oo mems+=2 /* count two mems */ +@d ooo mems+=3 /* count three mems */ +@d subroutine_overhead mems+=4 +@d O "%" /* used for percent signs in format strings */ +@d mod % /* used for percent signs denoting remainder in \CEE/ */ +@# +@d max_stage 500 /* at most this many options in a solution */ +@d max_level 32000 /* at most this many levels in the search tree */ +@d max_cols 100000 /* at most this many items */ +@d max_nodes 10000000 /* at most this many nonzero elements in the matrix */ +@d savesize 10000000 /* at most this many entries on |savestack| */ +@d bufsize (9*max_cols+3) /* a buffer big enough to hold all item names */ +@# +@d show_basics 1 /* |vbose| code for basic stats; this is the default */ +@d show_choices 2 /* |vbose| code for backtrack logging */ +@d show_details 4 /* |vbose| code for further commentary */ +@d show_record_weights 16 /* |vbose| code for first time a weight appears */ +@d show_weight_bumps 32 /* |vbose| code to show new weights */ +@d show_final_weights 64 /* |vbose| code to display weights at the end */ +@d show_profile 128 /* |vbose| code to show the search tree profile */ +@d show_full_state 256 /* |vbose| code for complete state reports */ +@d show_tots 512 /* |vbose| code for reporting item totals at start */ +@d show_warnings 1024 /* |vbose| code for reporting options without primaries */ +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ + +@ Here is the overall structure: + +@c +#include +#include +#include +#include +#include "gb_flip.h" +typedef unsigned int uint; /* a convenient abbreviation */ +typedef unsigned long long ullng; /* ditto */ +@; +@; +@; +main (int argc, char *argv[]) { + register int c,cc,i,j,k,p,pp,q,r,s,t,cur_choice,cur_node,best_itm; + @; + @; + @; + if (vbose&show_basics) + @; + if (vbose&show_tots) + @; + imems=mems, mems=0; + if (baditem) @@; + else { + if (randomizing) @; + @; + } +done:@+if (vbose&show_profile) @; + if (vbose&show_final_weights) { + fprintf(stderr,"Final weights:\n"); + print_weights(); + } + if (vbose&show_max_deg) + fprintf(stderr,"The maximum best_itm size was "O"d.\n",maxdeg); + if (vbose&show_basics) { + fprintf(stderr,"Altogether "O"llu solution"O"s, "O"llu+"O"llu mems,", + count,count==1?"":"s",imems,mems); + bytes=(itemlength+setlength)*sizeof(int)+last_node*sizeof(node) + +2*maxl*sizeof(int)+maxsaveptr*sizeof(twoints); + fprintf(stderr," "O"llu updates, "O"llu bytes, "O"llu nodes,", + updates,bytes,nodes); + fprintf(stderr," ccost "O"lld%%.\n", + mems? (200*cmems+mems)/(2*mems):0); + } + if (sanity_checking) fprintf(stderr,"sanity_checking was on!\n"); + @; +} + +@ You can control the amount of output, as well as certain properties +of the algorithm, by specifying options on the command line: +\smallskip\item{$\bullet$} +`\.v$\langle\,$integer$\,\rangle$' enables or disables various kinds of verbose + output on |stderr|, given by binary codes such as |show_choices|; +\item{$\bullet$} +`\.m$\langle\,$integer$\,\rangle$' causes every $m$th solution +to be output (the default is \.{m0}, which merely counts them); +\item{$\bullet$} +`\.s$\langle\,$integer$\,\rangle$' causes the algorithm to randomize +the initial list of items (thus providing some variety, although +the solutions are by no means uniformly random); +\item{$\bullet$} +`\.d$\langle\,$integer$\,\rangle$' sets |delta|, which causes periodic +state reports on |stderr| after the algorithm has performed approximately +|delta| mems since the previous report (default 10000000000); +\item{$\bullet$} +`\.c$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.C$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown in the periodic state reports (default 10); +\item{$\bullet$} +`\.l$\langle\,$nonnegative integer$\,\rangle$' gives a {\it lower\/} limit, +relative to the maximum level so far achieved, to the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.t$\langle\,$positive integer$\,\rangle$' causes the program to +stop after this many solutions have been found; +\item{$\bullet$} +`\.T$\langle\,$integer$\,\rangle$' sets |timeout| (which causes abrupt +termination if |mems>timeout| at the beginning of a level); +\item{$\bullet$} +`\.w$\langle\,$float$\,\rangle$' is the initial increment |dw| added to +an item's weight (default 1.0); +\item{$\bullet$} +`\.W$\langle\,$float$\,\rangle$' is the factor by which |dw| changes +dynamically (default 1.0); +\item{$\bullet$} +`\.S$\langle\,$filename$\,\rangle$' to output a ``shape file'' that encodes +the search tree. + +@= +int random_seed=0; /* seed for the random words of |gb_rand| */ +int randomizing; /* has `\.s' been specified? */ +int vbose=show_basics+show_warnings; /* level of verbosity */ +int spacing; /* solution $k$ is output if $k$ is a multiple of |spacing| */ +int show_choices_max=1000000; /* above this level, |show_choices| is ignored */ +int show_choices_gap=1000000; /* below level |maxl-show_choices_gap|, + |show_details| is ignored */ +int show_levels_max=10; /* above this level, state reports stop */ +int maxl; /* maximum level actually reached */ +int maxs; /* maximum stage actually reached */ +int maxsaveptr; /* maximum size of |savestack| */ +char buf[bufsize]; /* input buffer */ +ullng count; /* solutions found so far */ +ullng options; /* options seen so far */ +ullng imems,mems,tmems,cmems; /* mem counts */ +ullng updates; /* update counts */ +ullng bytes; /* memory used by main data structures */ +ullng nodes; /* total number of branch nodes initiated */ +ullng thresh=10000000000; /* report when |mems| exceeds this, if |delta!=0| */ +ullng delta=10000000000; /* report every |delta| or so mems */ +ullng maxcount=0xffffffffffffffff; /* stop after finding this many solutions */ +ullng timeout=0x1fffffffffffffff; /* give up after this many mems */ +float w0=1.0,dw=1.0,dwfactor=1.0; /* initial weight, increment, and growth */ +float maxwt=1.0; /* largest weight seen so far */ +FILE *shape_file; /* file for optional output of search tree shape */ +char *shape_name; /* its name */ +int maxdeg; /* the largest branching degree seen so far */ + +@ If an option appears more than once on the command line, the first +appearance takes precedence. + +@= +for (j=argc-1,k=0;j;j--) switch (argv[j][0]) { +case 'v': k|=(sscanf(argv[j]+1,""O"d",&vbose)-1);@+break; +case 'm': k|=(sscanf(argv[j]+1,""O"d",&spacing)-1);@+break; +case 's': k|=(sscanf(argv[j]+1,""O"d",&random_seed)-1),randomizing=1;@+break; +case 'd': k|=(sscanf(argv[j]+1,""O"lld",&delta)-1),thresh=delta;@+break; +case 'c': k|=(sscanf(argv[j]+1,""O"d",&show_choices_max)-1);@+break; +case 'C': k|=(sscanf(argv[j]+1,""O"d",&show_levels_max)-1);@+break; +case 'l': k|=(sscanf(argv[j]+1,""O"d",&show_choices_gap)-1);@+break; +case 't': k|=(sscanf(argv[j]+1,""O"lld",&maxcount)-1);@+break; +case 'T': k|=(sscanf(argv[j]+1,""O"lld",&timeout)-1);@+break; +case 'w': k|=(sscanf(argv[j]+1,""O"f",&dw)-1);@+break; +case 'W': k|=(sscanf(argv[j]+1,""O"f",&dwfactor)-1);@+break; +case 'S': shape_name=argv[j]+1, shape_file=fopen(shape_name,"w"); + if (!shape_file) + fprintf(stderr,"Sorry, I can't open file `"O"s' for writing!\n", + shape_name); + break; +default: k=1; /* unrecognized command-line option */ +} +if (k) { + fprintf(stderr, "Usage: "O"s [v] [m] [s] [d]" + " [c] [C] [l] [t] [T] [w] [W] [S] < foo.dlx\n", + argv[0]); + exit(-1); +} +if (randomizing) gb_init_rand(random_seed); + +@ @= +if (shape_file) fclose(shape_file); + +@ Here's a subroutine that I hope is never invoked (except maybe +when I'm debugging). + +@= +void confusion(char*m) { + fprintf(stderr,""O"s!\n",m); +} + +@*Data structures. +Sparse-set data structures were introduced by Preston Briggs +and Linda Torczon [{\sl ACM Letters on Programming Languages and Systems\/ +\bf2} (1993), 59--69], who realized that exercise 2.12 in +Aho, Hopcroft, and Ullman's classic text {\sl The Design and Analysis +of Computer Algorithms\/} (Addison--Wesley, 1974) was much more than +just a slick trick to avoid initializing an array. +(Indeed, {\sl TAOCP\/} exercise 2.2.6--24 calls it the ``sparse array trick.'') + +The basic idea is amazingly simple, when specialized to the situations +that we need to deal with: We can represent a subset~$S$ of the universe +$U=\{x_0,x_1,\ldots,x_{n-1}\}$ by maintaining two $n$-element arrays +$p$ and $q$, each of which is a permutation of~$\{0,1,\ldots,n-1\}$, +together with an integer $s$ in the range $0\le s\le n$. In fact, $p$ is +the {\it inverse\/} of~$q$; and $s$ is the number of elements of~$S$. +The current value of the set $S$ is then simply +$\{x_{p_0},\ldots,x_{p_{s-1}}\}$. (Notice that every $s$-element subset can be +represented in $s!\,(n-s)!$ ways.) + +It's easy to test if $x_k\in S$, because that's true if and only if $q_k= +typedef struct node_struct { + int itm; /* the item |x| corresponding to this node */ + int loc; /* where this node resides in |x|'s active set */ + int clr; /* color associated with item |x| in this option, if any */ + int spr; /* a spare field inserted only to maintain 16-byte alignment */ +} node; +typedef union { + int i; /* an integer (32 bits) */ + float f; /* a floating point value (fits in 4 bytes) */ +} tetrabyte; + +@ @= +node nd[max_nodes]; /* the master list of nodes */ +int last_node; /* the first node in |nd| that's not yet used */ +int item[max_cols]; /* the master list of items */ +int second=max_cols; /* boundary between primary and secondary items */ +int last_itm; /* items seen so far during input, plus 1 */ +tetrabyte set[max_nodes+maxextra*max_cols]; /* active options for active items */ +int itemlength; /* number of elements used in |item| */ +int setlength; /* number of elements used in |set| */ +int active; /* current number of active items */ +int baditem; /* an item with no options, plus 1 */ +int osecond; /* setting of |second| just after initial input */ +int force[max_cols]; /* stack of items known to have size 1 */ +int forced; /* the number of items on that stack */ + +@ We're going to store string data (an item's name) in the midst of +the integer array |set|. So we've got to do some type coercion using +low-level \CEE/-ness. + +@= +typedef struct { + int l,r; +} twoints; +typedef union { + unsigned char str[8]; /* eight one-byte characters */ + twoints lr; /* two four-byte integers */ +} stringbuf; +stringbuf namebuf; + +@ @= +void print_item_name(int k,FILE *stream) { + namebuf.lr.l=lname(k),namebuf.lr.r=rname(k); + fprintf(stream," "O".8s",namebuf.str); +} + +@ An option is identified not by name but by the names of the items it contains. +Here is a routine that prints an option, given a pointer to any of its +nodes. It also prints the position of the option in its item list. + +@= +void print_option(int p,FILE *stream,int showpos) { + register int k,q,x; + x=nd[p].itm; + if (p>=last_node || x<=0) { + fprintf(stderr,"Illegal option "O"d!\n",p); + return; + } + for (q=p;;) { + print_item_name(x,stream); + if (nd[q].clr) + fprintf(stream,":"O"c",nd[q].clr); + q++; + x=nd[q].itm; + if (x<0) q+=x,x=nd[q].itm; + if (q==p) break; + } + k=nd[q].loc; + if (showpos>0) fprintf(stream," ("O"d of "O"d)\n",k-x+1,size(x)); + else if (showpos==0) fprintf(stream,"\n"); +} +@# +void prow(int p) { + print_option(p,stderr,1); +} + +@ When I'm debugging, I might want to look at one of the current item lists. + +@= +void print_itm(int c) { + register int p; + if (c=setlength || + pos(c)<0 || pos(c)>=itemlength || item[pos(c)]!=c) { + fprintf(stderr,"Illegal item "O"d!\n",c); + return; + } + fprintf(stderr,"Item"); + print_item_name(c,stderr); + if (c=active) + fprintf(stderr," (secondary "O"d, purified), length "O"d:\n", + pos(c)+1,size(c)); + else fprintf(stderr," (secondary "O"d), length "O"d:\n", + pos(c)+1,size(c)); + for (p=c;p= +void sanity(void) { + register int k,x,i,l,r,q,qq; + for (k=0;kr) fprintf(stderr,"itm>loc in node "O"d!\n",i); + else { + if (set[r].i!=i) { + fprintf(stderr,"Bad loc field for option "O"d of item",r-l+1); + print_item_name(l,stderr); + fprintf(stderr," in node "O"d!\n",i); + } + if (pos(l)= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Input line way too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + last_itm=1; + break; +} +if (!last_itm) panic("No items"); +for (;o,buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j]));j++) { + if (buf[p+j]==':' || buf[p+j]=='|') + panic("Illegal character in item name"); + o,namebuf.str[j]=buf[p+j]; + } + if (j==8 && !isspace(buf[p+j])) panic("Item name too long"); + oo,lname(last_itm<<2)=namebuf.lr.l,rname(last_itm<<2)=namebuf.lr.r; + @; + last_itm++; + if (last_itm>max_cols) panic("Too many items"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + if (buf[p]=='|') { + if (second!=max_cols) panic("Item name line contains | twice"); + second=last_itm; + for (p++;o,isspace(buf[p]);p++) ; + } +} + +@ @= +for (k=last_itm-1;k;k--) { + if (o,lname(k<<2)!=namebuf.lr.l) continue; + if (rname(k<<2)==namebuf.lr.r) break; +} +if (k) panic("Duplicate item name"); + +@ I'm putting the option number into the |spr| field of the +spacer that follows it, as a +possible debugging aid. But the program doesn't currently use that information. + +@= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Option line too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + i=last_node; /* remember the spacer at the left of this option */ + for (pp=0;buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j])) && buf[p+j]!=':';j++) + o,namebuf.str[j]=buf[p+j]; + if (!j) panic("Empty item name"); + if (j==8 && !isspace(buf[p+j]) && buf[p+j]!=':') + panic("Item name too long"); + @; + if (buf[p+j]!=':') o,nd[last_node].clr=0; + else if (k>=second) { + if ((o,isspace(buf[p+j+1])) || (o,!isspace(buf[p+j+2]))) + panic("Color must be a single character"); + o,nd[last_node].clr=(unsigned char)buf[p+j+1]; + p+=2; + }@+else panic("Primary item must be uncolored"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + } + if (!pp) { + if (vbose&show_warnings) + fprintf(stderr,"Option ignored (no primary items): "O"s",buf); + while (last_node>i) { + @; + last_node--; + } + }@+else { + o,nd[i].loc=last_node-i; /* complete the previous spacer */ + last_node++; /* create the next spacer */ + if (last_node==max_nodes) panic("Too many nodes"); + options++; + o,nd[last_node].itm=i+1-last_node; + nd[last_node].spr=options; /* option number, for debugging only */ + } +} +@; +@; +@; + +@ We temporarily use |pos| to recognize duplicate items in an option. + +@= +for (k=(last_itm-1)<<2;k;k-=4) { + if (o,lname(k)!=namebuf.lr.l) continue; + if (rname(k)==namebuf.lr.r) break; +} +if (!k) panic("Unknown item name"); +if (o,pos(k)>i) panic("Duplicate item name in this option"); +last_node++; +if (last_node==max_nodes) panic("Too many nodes"); +o,t=size(k); /* how many previous options have used this item? */ +o,nd[last_node].itm=k>>2,nd[last_node].loc=t; +if ((k>>2)= +o,k=nd[last_node].itm<<2; +oo,size(k)--,pos(k)=i-1; + +@ @= +active=itemlength=last_itm-1; +for (k=0,j=primextra;k= +for (;k;k--) { + o,j=item[k-1]; + if (k==second) second=j; /* |second| is now an index into |set| */ + oo,size(j)=size(k<<2); + if (size(j)==0 && k<=osecond) baditem=k; + o,pos(j)=k-1; + oo,rname(j)=rname(k<<2),lname(j)=lname(k<<2); + if (k<=osecond) o,wt(j)=w0; +} + +@ @= +for (k=1;k= +{ + if (vbose&show_choices) { + fprintf(stderr,"Item"); + print_item_name(item[baditem-1],stderr); + fprintf(stderr," has no options!\n"); + } +} + +@ The ``number of entries'' includes spacers (because {\mc DLX2} +includes spacers in its reports). If you want to know the +sum of the option lengths, just subtract the number of options. + +@= +fprintf(stderr, + "("O"lld options, "O"d+"O"d items, "O"d entries successfully read)\n", + options,osecond,itemlength-osecond,last_node); + +@ The item lengths after input are shown (on request). +But there's little use trying to show them after the process is done, +since they are restored somewhat blindly. +(Failures of the linked-list implementation in {\mc DLX2} could sometimes be +detected by showing the final lengths; but that reasoning no longer applies.) + +@= +{ + fprintf(stderr,"Item totals:"); + for (k=0;k= +for (k=active;k>1;) { + mems+=4,j=gb_unif_rand(k); + k--; + oo,oo,t=item[j],item[j]=item[k],item[k]=t; + oo,pos(t)=k,pos(item[j])=j; +} + +@*Binary branching versus $d$-way branching. +Nodes of the search tree in the previous program {\mc SSXCC}, on which +this one is based, are characterized by the name of a primary item~$i$ +that hasn't yet been covered. If that item currently appears in $d$~options +$\{o_1,\ldots,o_d\}$, node~$i$ has $d$ children, one for each choice of +the option that will cover~$i$. + +The present program, however, makes 2-way branches, and its nodes +are labeled with both an item~$i$ and an option~$o$. The left child of +node $(i,o)$ represents the subproblem in which $i$ is covered by~$o$, +as before. But the right child represents the subproblem for which +option~$o$ is removed but item~$i$ is still uncovered (unless $d=1$, +in which case there's no right child). Thus our search tree is now rather +like the binary tree that represents a general tree. (See {\sl The +Art of Computer Programming}, Section 2.3.2.) + +There usually is no good reason to do binary branching when we choose~$i$ +so as to minimize~$d$. On the right branch, $i$ will have $d-1$ remaining +options; and no item~$i'$ will have fewer than $d-1$. + +But this program is intended to provide the basis for {\it other\/} +programs, which extend the branching heuristic by taking dynamic +characteristics of the solution process into account. +While exploring the left branch in such extensions, we might discover +that a certain item~$i'$ is difficult to cover; hence we might prefer to +branch on an option $o'$ that covers~$i'$, after rejecting $o$ for item~$i$. + +@ We shall say that we're in stage $s$ when we've taken $s$ left branches. +We'll also say, as usual, that we're at level~$l$ when we've taken +$l$ branches altogether. + +Suppose, for instance, that we're at level 5, having +rejected $o_1$ for~$i_1$, +accepted $o_2$ for~$i_2$, +accepted $o_3$ for~$i_3$, +rejected $o_4$ for~$i_4$, and +rejected $o_5$ for~$i_5$. Then we will have |stage=2|, and +$|choice[k]|=o_k$ for $0\le k<5$; here each $o_k$ is a node whose |itm| field +is~$i_k$. Also +$$\eqalign{ +|stagelevel|[0]&=0,\cr +|stagelevel|[1]&=0,\cr +|stagelevel|[2]&=1,\cr +|stagelevel|[3]&=2,\cr +|stagelevel|[4]&=2,\cr +|stagelevel|[5]&=2;\cr} +\qquad\eqalign{ +|levelstage|[0]&=1,\cr +|levelstage|[1]&=2,\cr +|levelstage|[2]&=5.\cr}$$ +The option |choice[k]| has been accepted if and only if +|levelstage[stagelevel[k]]=k|. + +@= +int stage; /* number of choices in current partial solution */ +int level; /* current depth in the search tree (which is binary) */ +int choice[max_level]; /* the option and item chosen on each level */ +int deg[max_level]; /* the number of options the item had at that time */ +int levelstage[max_stage]; /* the most recent level at each stage */ +int stagelevel[max_level]; /* the stage that corresponds to each level */ +ullng profile[max_stage]; /* number of search tree nodes on each stage */ + +@*The dancing. +Our strategy for generating all exact covers will be to repeatedly +choose an item that appears to be hardest to cover, namely an item whose set +is currently smallest, among all items that still need to be covered. +And we explore all possibilities via depth-first search. + +The neat part of this algorithm is the way the sets are maintained. +Depth-first search means last-in-first-out maintenance of data structures; +and the sparse-set representations make it particularly easy to undo +what we've done at deeper levels. + +The basic operation is ``including an option.'' That means (i)~removing +from the current subproblem all of the other options with which it conflicts, +and (ii)~considering all of its primary items to be covered, by +making them inactive. + +@= +{ + level=stage=0; +forward: nodes++; + if (vbose&show_profile) profile[stage]++; + if (sanity_checking) sanity(); + @; + @; + @; + if (forced) { + o,best_itm=force[--forced]; + @; + } + if (t==inf_size) @; + @; +advance: oo,choice[level]=cur_choice=set[best_itm].i; + o,deg[level]=t; + if (!include_option(cur_choice)) goto tryagain; + @;@+@; + goto forward; +tryagain:@+if (t==1) goto prebackup; + if (vbose&show_choices) + fprintf(stderr,"Backtracking in stage "O"d\n",stage); + goto purgeit; +prebackup: o,saveptr=saved[stage]; +backup:@+if (--stage<0) goto done; + if (vbose&show_choices) + fprintf(stderr,"Backtracking to stage "O"d\n",stage); + o,level=levelstage[stage]; +purgeit:@+if (o,deg[level]==1) goto prebackup; + @; + o,cur_choice=choice[level]; + @; + @; + goto forward; +} + +@ We save the sizes of active items on |savestack|, whose entries +have two fields |l| and |r|, for an item and its size. This stack +makes it easy to undo all deletions, by simply restoring the former sizes. + +@= +int level; /* number of choices in current partial solution */ +int choice[max_level]; /* the node chosen on each level */ +int saved[max_level+1]; /* size of |savestack| on each level */ +twoints savestack[savesize]; +int saveptr; /* current size of |savestack| */ +int tough_itm; /* item whose set of options has just become empty */ + +@ @= +if (delta && (mems>=thresh)) { + thresh+=delta; + if (vbose&show_full_state) print_state(); + else print_progress(); +} +if (mems>=timeout) { + fprintf(stderr,"TIMEOUT!\n");@+goto done; +} + +@ @= +if (++stage>maxs) { + if (stage>=max_stage) { + fprintf(stderr,"Too many stages!\n"); + exit(-40); + } + maxs=stage; +} + +@ @= +if (++level>maxl) { + if (level>=max_level) { + fprintf(stderr,"Too many levels!\n"); + exit(-4); + } + maxl=level; +} +oo,stagelevel[level]=stage,levelstage[stage]=level; + +@ The |include_option| routine extends the current partial solution, +by using option |opt| to cover one or more of the presently uncovered +primary items. It returns~0, however, if that would make some other +primary item uncoverable. (In the latter case, |tough_itm| is set +to the item that was problematic.) + +@= +int include_option(int opt) { + register int c,optp,nn,nnp,ss,ii,iii,p,pp,s; + subroutine_overhead; + if (vbose&show_choices) { + fprintf(stderr,"S"O"d:",stage); + print_option(opt,stderr,1); + } + for (;o,nd[opt-1].itm>0;opt--) ; /* move to the beginning of the option */ + for (;o,(ii=nd[opt].itm)>0;opt++) { + pp=nd[opt].loc; /* where |opt| appears in |ii|'s set */ + o,p=pos(ii); /* where |ii| appears in |item| */ + if (p; + } + return 1; +} + +@ We don't need to remove options from the set of |ii|, because |ii| will +soon be inactive. But of course we do need to remove the options that +conflict with |opt| from the sets of their items. + +@= +{ + o,ss=size(ii); + if (ii=ii;s--) if (s!=pp) { + o,optp=set[s].i; + if (c==0 || (o,nd[optp].clr!=c)) + @; + } + o,iii=item[--active]; + oo,item[active]=ii,item[p]=iii; + oo,pos(ii)=active,pos(iii)=p; +} + +@ At this point |optp| points to a node of an option that we want to +remove from the current subproblem. We swap it out of the sets of +all its items except for |nd[optp].itm| itself, and except for +the sets of inactive secondary items. (The latter have been purified, +and we shouldn't mess with their sets.) + +@= +{ + register int nn,ii,p,ss,nnp; + for (nn=optp;o,nd[nn-1].itm>0;nn--) ; /* move to beginning of the option */ + for (;o,(ii=nd[nn].itm)>0;nn++) if (nn!=optp) { + p=nd[nn].loc; + if (p>=second && (o,pos(ii)>=active)) continue; /* |ii| already purified */ + o,ss=size(ii)-1; + if (ss<=1 && p=maxl-show_choices_gap) { + fprintf(stderr," can't cover"); + print_item_name(ii,stderr); + fprintf(stderr,"\n"); + } + tough_itm=ii; + forced=0; + return 0; /* abort the deletion, lest |ii| be wiped out */ + } + else o,force[forced++]=ii; + } + o,nnp=set[ii+ss].i; + o,size(ii)=ss; + oo,set[ii+ss].i=nn,set[p].i=nnp; + oo,nd[nn].loc=ii+ss,nd[nnp].loc=p; + updates++; + } +} + +@ At this point every active primary item has at least two options in its set. +Therefore, when we delete |cur_choice| from the sets of each of its +active items, every set will still be nonempty. + +@= +{ + register int ii,ss,p,nnp; + for (;o,nd[cur_choice-1].itm>0;cur_choice--) ; /* move to beginning */ + for (;o,(ii=nd[cur_choice].itm)>0;cur_choice++) { + p=nd[cur_choice].loc; + if (p>=second && (o,pos(ii)>=active)) continue; /* |ii| inactive */ + o,ss=size(ii)-1; + oo,nnp=set[ii+ss].i,size(ii)=ss; + oo,set[ii+ss].i=cur_choice,set[p].i=nnp; + oo,nd[cur_choice].loc=ii+ss,nd[nnp].loc=p; + updates++; + } +} + +@ If a weight becomes dangerously large, we rescale all the weights. + +(That will happen only when |dwfactor| isn't 1.0. Adding a constant +eventually ``converges'': For example, if the constant is 1, we have convergence +to $2^{17}$ after $2^{17}-1=16777215$ steps. +If the constant~|dw| is .250001, convergence +to \.{8.38861e+06} occurs after 25165819 steps!) + +(Note: I threw in the parameters |dw| and |dwfactor| only to do experiments. +My preliminary experiments didn't turn up any noteworthy results. +But I didn't have time to do a careful study; hence there might +be some settings that work unexpectedly well. The code for rescaling +might be flaky, since it hasn't been tested very thoroughly at all.) + +@d dangerous 1e32f +@d wmin 1e-30f + +@= +cmems+=2,oo,wt(tough_itm)+=dw; +if (vbose&show_record_weights && wt(tough_itm)>maxwt) { + maxwt=wt(tough_itm); + fprintf(stderr,""O"8.1f ",maxwt); + print_item_name(tough_itm,stderr); + fprintf(stderr," "O"lld\n",nodes); +} +if (vbose&show_weight_bumps) { + print_item_name(tough_itm,stderr); + fprintf(stderr," wt "O".1f\n",wt(tough_itm)); +} +dw*=dwfactor; +if (wt(tough_itm)>=dangerous) { + register int k; + register float t; + tmems=mems; + for (k=0;k= +while (forced) { + o,best_itm=force[--forced]; + if (o,pos(best_itm); + } +} + +@ @= +void print_weights(void) { + register int k; + for (k=0;k= +{ + t=inf_size,tmems=mems; + if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); + for (k=0;k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=1) { + if (s==0) + fprintf(stderr,"I'm confused.\n"); /* |include_option| missed this */ + else o,force[forced++]=item[k]; + }@+else if (s<=t) { + if (s=maxl-show_choices_gap) { + if (forced) fprintf(stderr," found "O"d forced\n",forced); + else if (t==inf_size) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d)\n",t); + } + } + if (t>maxdeg && t= +{ + count++; + if (spacing && (count mod spacing==0)) { + printf(""O"lld:\n",count); + for (k=0;k=maxcount) goto done; + goto backup; +} + +@ @= +o,saved[stage]=saveptr; +if (saveptr+active>maxsaveptr) { + if (saveptr+active>=savesize) { + fprintf(stderr,"Stack overflow (savesize="O"d)!\n",savesize); + exit(-5); + } + maxsaveptr=saveptr+active; +} +for (p=0;p= +o,active=saveptr-saved[stage]; +saveptr=saved[stage]; +for (p=0;p= +{ + if ((vbose&show_choices) && level= +void print_savestack(int start,int stop) { + register k; + for (k=start;k= +void print_state(void) { + register int l,s; + fprintf(stderr,"Current state (level "O"d, stage "O"d):\n",level,stage); + for (l=0;l=show_levels_max) { + fprintf(stderr," ...\n"); + break; + } + } + fprintf(stderr," "O"lld solutions, "O"lld mems, and max level "O"d so far.\n", + count,mems,maxl); +} + +@ During a long run, it's helpful to have some way to measure progress. +The following routine prints a string that indicates roughly where we +are in the search tree. The string consists of node degrees, +preceded by `\.{\char`\~}' if the node wasn't the current node in +its stage (that is, if the node represents an option that has already +been fully explored --- ``we've been there done that''). + +Following that string, a fractional estimate of total progress is computed, +based on the na{\"\i}ve assumption that the search tree has a uniform +branching structure. If the tree consists +of a single node, this estimate is~.5. Otherwise, if the first choice +is the $k$th choice in stage~0 and has degree~$d$, +the estimate is $(k-1)/(d+k-1)$ plus $1/(d+k-1)$ times the +recursively evaluated estimate for the $k$th subtree. (This estimate +might obviously be very misleading, in some cases, but at least it +tends to grow monotonically.) + +Fine point: If we've just backtracked within stage |stage|, +the string of node degrees with end with a `\.{\char`\~}' entry, +and we haven't yet made {\it any\/} choice in the current~stage. +The test `|l==level-1|' below uses the fact that |levelstage[stage]=level| +to adjust the fractional estimate appropriately for the partial +progress in the current stage. + +@= +void print_progress(void) { + register int l,ll,k,d,c,p,ds=0; + register double f,fd; + fprintf(stderr," after "O"lld mems: "O"lld sols,",mems,count); + for (f=0.0,fd=1.0,l=0;l=0 && stagelevel[ll]==stagelevel[l];k++,d++,ll--) ; + fd*=d,f+=(k-1)/fd; /* choice |l| is treated like |k| of |d| */ + } + if (l>=show_levels_max && !ds) ds=1,fprintf(stderr,"..."); + } + fprintf(stderr," "O".5f\n",f+0.5/fd); +} + +@ @= +{ + fprintf(stderr,"Profile:\n"); + for (k=0;k<=maxs;k++) + fprintf(stderr,""O"3d: "O"lld\n", + k,profile[k]); +} + +@*Index. diff --git a/programs-orig/ssxcc-frb.ch b/programs-orig/ssxcc-frb.ch new file mode 100644 index 0000000..0068045 --- /dev/null +++ b/programs-orig/ssxcc-frb.ch @@ -0,0 +1,280 @@ +@x an experimental change file for SSXCC-BINARY (the new version of 25 Aug!) +those heuristics were designed with binary branching in mind. +@y +those heuristics were designed with binary branching in mind. + +The ``{\mc FRB} heuristic'' implemented here is motivated by the paper +of Li, Yin, and Li in +@^Li, Hongbo@> +@^Yin, Minghao@> +@^Li, Zhanshan@> +{\sl Leibniz International Proceedings in Informatics\/ \bf210} +(2021), 9:1--9:10 +[the proceedings of the 27th International Conference on Principles and +Practice of Constraint Programming, CP~2021]. +When the option chosen for branching on some primary item~$i$ causes +another primary item~$i'$ to be wiped out, we say that a failure has +occurred with respect to~$i$. We branch on an item +that has a small number of options and a relatively high failure rate. +Details are discussed below. +@z +@x +@d show_weight_bumps 32 /* |vbose| code to show new weights */ +@d show_final_weights 64 /* |vbose| code to display weights at the end */ +@y +@d show_final_stats 64 /* |vbose| code to display item stats at the end */ +@z +@x + if (vbose&show_final_weights) { + fprintf(stderr,"Final weights:\n"); + print_weights(); + } +@y + if (vbose&show_final_stats) { + fprintf(stderr,"Final primary item stats:\n"); + print_item_stats(); + } +@z +@x +`\.w$\langle\,$float$\,\rangle$' is the initial increment |dw| added to +an item's weight (default 1.0); +\item{$\bullet$} +`\.W$\langle\,$float$\,\rangle$' is the factor by which |dw| changes +dynamically (default 1.0); +@y +@z +@x +case 'w': k|=(sscanf(argv[j]+1,""O"f",&dw)-1);@+break; +case 'W': k|=(sscanf(argv[j]+1,""O"f",&dwfactor)-1);@+break; +@y +@z +@x + " [c] [C] [l] [t] [T] [w] [W] [S] < foo.dlx\n", +@y + " [c] [C] [l] [t] [T] [S] < foo.dlx\n", +@z +@x +A primary item $x$ also has a |wt| field, |set[x-5]|, initially~1. +The weight is increased by |dw| whenever we backtrack because |x| +cannot be covered. (Weights aren't actually {\it used} in the present +program; that will come in extensions to be written later. But it will +be convenient to have space ready for them in our data structures, +so that those extensions will be easy to write.) +@y +A primary item $x$ also has two special fields called +|assigns| and |failrate|, used in the {\mc FRB} heuristic. +Their significance is described below. +@z +@x +@d wt(x) set[(x)-5].f /* the current floating-point ``weight'' of |x| */ +@d primextra 5 /* this many extra entries of |set| for each primary item */ +@d secondextra 4 /* and this many for each secondary item */ +@d maxextra 5 /* maximum of |primextra| and |secondextra| */ +@y +@d assigns(x) set[(x)-5].f /* number of assignments tried so far for |x| */ +@d failrate(x) set[(x)-6].f /* the current ``failure rate'' of |x| */ +@d primextra 6 /* this many extra entries of |set| for each primary item */ +@d secondextra 4 /* and this many for each secondary item */ +@d maxextra 6 /* maximum of |primextra| and |secondextra| */ +@z +@x + fprintf(stderr," ("O"d of "O"d), length "O"d, weight "O".1f:\n", + pos(c)+1,active,size(c),wt(c)); +@y + fprintf(stderr, + " ("O"d of "O"d), length "O"d, failrate "O".1f of "O"g:\n", + pos(c)+1,active,size(c),failrate(c),assigns(c)); +@z +@x + if (k<=osecond) o,wt(j)=w0; +@y + if (k<=osecond) oo,assigns(j)=1.0,failrate(j)=0.5; +@z +@x + if (!include_option(cur_choice)) goto tryagain; + @;@+@; + goto forward; +@y + if (!include_option(cur_choice)) goto abort; + @; + @;@+@; + goto forward; +abort:@+@; +@z +@x +@ If a weight becomes dangerously large, we rescale all the weights. + +(That will happen only when |dwfactor| isn't 1.0. Adding a constant +eventually ``converges'': For example, if the constant is 1, we have convergence +to $2^{17}$ after $2^{17}-1=16777215$ steps. +If the constant~|dw| is .250001, convergence +to \.{8.38861e+06} occurs after 25165819 steps!) + +(Note: I threw in the parameters |dw| and |dwfactor| only to do experiments. +My preliminary experiments didn't turn up any noteworthy results. +But I didn't have time to do a careful study; hence there might +be some settings that work unexpectedly well. The code for rescaling +might be flaky, since it hasn't been tested very thoroughly at all.) + +@d dangerous 1e32f +@d wmin 1e-30f + +@= +cmems+=2,oo,wt(tough_itm)+=dw; +if (vbose&show_record_weights && wt(tough_itm)>maxwt) { + maxwt=wt(tough_itm); + fprintf(stderr,""O"8.1f ",maxwt); + print_item_name(tough_itm,stderr); + fprintf(stderr," "O"lld\n",nodes); +} +if (vbose&show_weight_bumps) { + print_item_name(tough_itm,stderr); + fprintf(stderr," wt "O".1f\n",wt(tough_itm)); +} +dw*=dwfactor; +if (wt(tough_itm)>=dangerous) { + register int k; + register float t; + tmems=mems; + for (k=0;k= +while (forced) { + o,best_itm=force[--forced]; + if (o,pos(best_itm); + } +} + +@ @= +void print_weights(void) { + register int k; + for (k=0;k +@^Yin, Minghao@> +@^Li, Zhanshan@> +After the first assignment, |assigns(i)| will be~2.0, and +|failrate(i)| will be either 0.75 or 0.25, depending on whether +or not that assignment led to failure. After $k$ assignments, +the possible values of |failrate(i)| are $1/(2k+2)$, +$3/(2k+2)$, \dots,~$(2k+1)/(2k+2)$. + +@= +oo,assigns(best_itm)+=1.0; +oo,failrate(best_itm)-=failrate(best_itm)/assigns(best_itm); + +@ @= +oo,assigns(best_itm)+=1.0; +oo,failrate(best_itm)+=(1.0-failrate(best_itm))/assigns(best_itm); + +@ @= +while (forced) { + o,best_itm=force[--forced]; + if (o,pos(best_itm); + } +} + +@ @= +void print_item_stats(void) { + register int k; + for (k=0;k= +{ + t=inf_size,tmems=mems; +@y +@d inf_size 0x7fffffff +@d dangerous 1e32f +@d infty 2e32f /* twice |dangerous| */ + +@= +{ + register float score,tscore,w; + t=inf_size,tmems=mems,score=infty; +@z +@x + for (k=0;k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=1) { + if (s==0) + fprintf(stderr,"I'm confused.\n"); /* |include_option| missed this */ + else o,force[forced++]=item[k]; + }@+else if (s<=t) { + if (s=infty) tscore=dangerous; + if (tscore<=score) { + if (tscore=maxl-show_choices_gap) { + print_item_name(item[k],stderr);@+ + if (s==1) fprintf(stderr,"(1)"); + else fprintf(stderr,"("O"d,"O".1f)",s,w); + } + } +@z +@x + fprintf(stderr,"("O"d)\n",t); +@y + fprintf(stderr,"("O"d), score "O".4f\n",t,score); +@z + diff --git a/programs-orig/ssxcc-frb0.ch b/programs-orig/ssxcc-frb0.ch new file mode 100644 index 0000000..e1580ab --- /dev/null +++ b/programs-orig/ssxcc-frb0.ch @@ -0,0 +1,271 @@ +@x a change file for SSXCC [not SSXCC-BINARY] +@ After this program finds all solutions, it normally prints their total +@y +This program differs from {\mc SSXCC} by choosing the item on which +to branch based on a ``weighted'' heuristic motivated by the paper +of Li, Yin, and Li in +@^Li, Hongbo@> +@^Yin, Minghao@> +@^Li, Zhanshan@> +{\sl Leibniz International Proceedings in Informatics\/ \bf210} +(2021), 9:1--9:10 +[the proceedings of the 27th International Conference on Principles and +Practice of Constraint Programming, CP~2021]. +When the option chosen for branching on some primary item~$i$ causes +another primary item~$i'$ to be wiped out, we say that a failure has +occurred with respect to~$i$. We branch on an item +that has a small number of options and a relatively high failure rate. +Details are discussed below. + +It's the same heuristic as in {\mc SSXCC-FRB}. But that version uses +binary branching, while this one (like {\mc SSXCC} itself) +uses $d$-way branching. + +@ After this program finds all solutions, it normally prints their total +@z +@x +done:@+if (vbose&show_profile) @; +@y +if (vbose&show_profile) @; +done:@+if (vbose&show_profile) @; + if (vbose&show_final_stats) { + fprintf(stderr,"Final primary item stats:\n"); + print_item_stats(); +} +@z +@x +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ +@y +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ +@d show_final_stats 4096 /* |vbose| code to display item stats at the end */ +@z +@x +The given options are stored sequentially in the |nd| array, with one node +@y +Each primary item in the |set| array also contains two special fields called +|assigns| and |fails|, which are used in the {\mc FRB} heuristic. +Their significance is described below. + +The given options are stored sequentially in the |nd| array, with one node +@z +@x +@d primextra 4 /* this many extra entries of |set| for each primary item */ +@d secondextra 4 /* and this many for each secondary item */ +@d maxextra 4 /* maximum of |primextra| and |secondextra| */ +@y +@d assigns(x) set[(x)-5] /* number of assignments tried so far for |x|, plus 1 */ +@d fails(x) set[(x)-6] /* how many of them failed? */ +@d primextra 6 /* this many extra entries of |set| for each primary item */ +@d secondextra 4 /* and this many for each secondary item */ +@d maxextra 6 /* maximum of |primextra| and |secondextra| */ +@z +@x + if (c; +@y + if (t==infty) @; +@z +@x + @; +@y + @; + @; +@z +@x +abort:@+if (o,cur_choice+1>=best_itm+size(best_itm)) goto backup; +@y + goto try_again; +abort:@+@; +try_again:@+if (o,cur_choice+1>=best_itm+size(best_itm)) goto backup; +@z +@x +@ The ``best item'' is considered to be an item that minimizes the +number of remaining choices. All candidates of size~1, if any, are +put on the |force| stack. If there are several candidates of size $>1$, +we choose the leftmost. + +Notice that a secondary item is active if and only if it has not +been purified (that is, if and only if it hasn't yet appeared in +a chosen option). + +(This program explores the search space in a slightly different order +from {\mc DLX2}, because the ordering of items in the active list +is no longer fixed. But ties are broken in the same way when $s>1$.) + +@= +t=max_nodes,tmems=mems; +if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); +for (k=0;k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=1) { + if (s==0) fprintf(stderr,"I'm confused.\n"); /* |hide| missed this */ + else o,force[forced++]=item[k]; + }@+else if (s<=t) { + if (s=maxl-show_choices_gap) { + if (forced) fprintf(stderr," found "O"d forced\n",forced); + else if (t==max_nodes) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d)\n",t); + } +} +if (t>maxdeg && t +@^Yin, Minghao@> +@^Li, Zhanshan@> + +@= +oo,assigns(best_itm)++; +if (assigns(best_itm)<=0) { + fprintf(stderr,"Too many assignments (2^{31})!\n"); + exit(-6); +} +cmems+=2; + +@ @= +oo,assigns(best_itm)++; +if (assigns(best_itm)<=0) { + fprintf(stderr,"Too many assignments (2^{31})!\n"); + exit(-66); +} +oo,fails(best_itm)++; +cmems+=4; + +@ @= +void print_item_stats(void) { + register int k; + for (k=0;k= +{ + register float score,tscore,w; + tmems=mems,score=finfty,t=infty; + if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); + for (k=0;k; + o,force[forced++]=item[k]; + }@+else { + o,w=(float)fails(item[k])+0.5; + o,w/=assigns(item[k]); + tscore=s/w; + if (tscore>=finfty) tscore=dangerous; + if (tscore=maxl-show_choices_gap) { + print_item_name(item[k],stderr);@+ + if (s==1) fprintf(stderr,"(1)"); + else fprintf(stderr,"("O"d,"O"d/"O"d)",s,fails(item[k]),assigns(item[k])); + } + } + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + if (forced) fprintf(stderr," found "O"d forced\n",forced); + else if (t==infty) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d), score "O".4f\n",t,score); + } + } + if (t>maxdeg && tmaxdeg && t= +@y +@ Oops --- we've run into a case where the current choice at |level-1| +has wiped out |item[k]|. Thus |item[k]|, which manifestly has the +smallest option list, is a |best_item| doomed to fail. + +@= +{ + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + fprintf(stderr,"\n--- Item "); + print_item_name(item[k],stderr); + fprintf(stderr," has been wiped out!\n"); + } + best_itm=item[k]; + @; + forced=0; + goto backup; +} + +@ @= +@z diff --git a/programs-orig/ssxcc-wtd.ch b/programs-orig/ssxcc-wtd.ch new file mode 100644 index 0000000..a3c0a14 --- /dev/null +++ b/programs-orig/ssxcc-wtd.ch @@ -0,0 +1,87 @@ +@x an experimental change file for SSXCC-BINARY (the new version of 25 Aug!) +those heuristics were designed with binary branching in mind. +@y +those heuristics were designed with binary branching in mind. + +The heuristic implemented in this version is motivated by the paper +of Boussemart, Hemery, Lecoutre, and Sais in {\sl Proc.\ 16th +European Conference on Artificial Intelligence} (2004), 146--150: We increase +the weight of a primary item when its current set of options becomes null. +Items are chosen for branching based on the size of their set divided by their +current weight, unless the choice is forced. +@^Boussemart, Fr\'ed\'eric@> +@^H\'emery, Fred@> +@^Lecoutre, Christophe@> +@^Sa{\"\i}s, Lakhdar@> +@z +@x + if (!include_option(cur_choice)) goto tryagain; + @;@+@; + goto forward; +@y + if (!include_option(cur_choice)) goto abort; + @;@+@; + goto forward; +abort:@+@; +@z +@x +@d inf_size 0x7fffffff + +@= +{ + t=inf_size,tmems=mems; +@y +@d inf_size 0x7fffffff +@d infty 2e32f /* twice |dangerous| */ + +@= +{ + register float score,tscore,w; + t=inf_size,tmems=mems,score=infty; +@z +@x + for (k=0;k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=1) { + if (s==0) + fprintf(stderr,"I'm confused.\n"); /* |include_option| missed this */ + else o,force[forced++]=item[k]; + }@+else if (s<=t) { + if (s=infty) tscore=dangerous; + if (tscore<=score) { + if (tscore=maxl-show_choices_gap) { + print_item_name(item[k],stderr);@+ + if (s==1) fprintf(stderr,"(1)"); + else fprintf(stderr,"("O"d,"O"g)",s,w); + } + } +@z +@x + fprintf(stderr,"("O"d)\n",t); +@y + fprintf(stderr,"("O"d), score "O".4f\n",t,score); +@z diff --git a/programs-orig/ssxcc-wtd0.ch b/programs-orig/ssxcc-wtd0.ch new file mode 100644 index 0000000..34bf534 --- /dev/null +++ b/programs-orig/ssxcc-wtd0.ch @@ -0,0 +1,247 @@ +@x a change file for SSXCC [not SSXCC-BINARY] +@ After this program finds all solutions, it normally prints their total +@y +This program differs from {\mc SSXCC} by choosing the item on which +to branch based on a ``weighted'' heuristic proposed by +Boussemart, Hemery, Lecoutre, and Sais in {\sl Proc.\ 16th +European Conference on Artificial Intelligence} (2004), 146--150: We increase +the weight of a primary item when its current set of options becomes null. +Items are chosen for branching based on the size of their set divided by their +current weight, unless the choice is forced. +@^Boussemart, Fr\'ed\'eric@> +@^H\'emery, Fred@> +@^Lecoutre, Christophe@> +@^Sa{\"\i}s, Lakhdar@> + +It's the same heuristic as in {\mc SSXCC-WTD}. But that version uses +binary branching, while this one (like {\mc SSXCC} itself) +uses $d$-way branching. + +@ After this program finds all solutions, it normally prints their total +@z +@x +done:@+if (vbose&show_profile) @; +@y +done:@+if (vbose&show_profile) @; +if (vbose&show_final_weights) { + fprintf(stderr,"Final weights:\n"); + print_weights(); +} +@z +@x +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ +@y +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ +@d show_final_weights 4096 /* |vbose| code to display weights at the end */ +@d show_weight_bumps 8192 /* |vbose| code to show new weights */ +@z +@x +The given options are stored sequentially in the |nd| array, with one node +@y +The |set| array contains a |wt| field for each primary item. +This weight, initially~1, is increased by 1 whenever we run into a +situation where |x| cannot be supported. + +The given options are stored sequentially in the |nd| array, with one node +@z +@x +@d primextra 4 /* this many extra entries of |set| for each primary item */ +@d secondextra 4 /* and this many for each secondary item */ +@d maxextra 4 /* maximum of |primextra| and |secondextra| */ +@y +@d wt(x) set[(x)-5] /* the current weight of item |x| */ +@d primextra 5 /* this many extra entries of |set| for each primary item */ +@d secondextra 4 /* and this many for each secondary item */ +@d maxextra 5 /* maximum of |primextra| and |secondextra| */ +@z +@x +int forced; /* the number of items on that stack */ +@y +int forced; /* the number of items on that stack */ +int tough_itm; /* an item that led to difficulty */ +@z +@x + if (c; +@y + if (t==infty) @; +@z +@x +abort:@+if (o,cur_choice+1>=best_itm+size(best_itm)) goto backup; +@y + goto try_again; +abort:@+@; +try_again:@+if (o,cur_choice+1>=best_itm+size(best_itm)) goto backup; +@z +@x + return 0; +@y + tough_itm=uu; + return 0; +@z +@x +@ The ``best item'' is considered to be an item that minimizes the +number of remaining choices. All candidates of size~1, if any, are +put on the |force| stack. If there are several candidates of size $>1$, +we choose the leftmost. + +Notice that a secondary item is active if and only if it has not +been purified (that is, if and only if it hasn't yet appeared in +a chosen option). + +(This program explores the search space in a slightly different order +from {\mc DLX2}, because the ordering of items in the active list +is no longer fixed. But ties are broken in the same way when $s>1$.) + +@= +t=max_nodes,tmems=mems; +if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); +for (k=0;k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=1) { + if (s==0) fprintf(stderr,"I'm confused.\n"); /* |hide| missed this */ + else o,force[forced++]=item[k]; + }@+else if (s<=t) { + if (s=maxl-show_choices_gap) { + if (forced) fprintf(stderr," found "O"d forced\n",forced); + else if (t==max_nodes) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d)\n",t); + } +} +if (t>maxdeg && t= +cmems+=2,oo,wt(tough_itm)++; +if (wt(tough_itm)<=0) { + fprintf(stderr,"Weight overflow (2^31)!\n"); + exit(-6); +} +if (vbose&show_weight_bumps) { + print_item_name(tough_itm,stderr); + fprintf(stderr," wt "O"d\n",wt(tough_itm)); +} + +@ @= +void print_weights(void) { + register int k; + for (k=0;k= +{ + register float score,tscore,w; + score=finfty,t=infty,tmems=mems; + if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); + for (k=0;k; + o,force[forced++]=item[k]; + }@+else { + o,w=wt(item[k]); + tscore=s/w; + if (tscore>=finfty) tscore=dangerous; + if (tscore=maxl-show_choices_gap) { + print_item_name(item[k],stderr);@+ + if (s==1) fprintf(stderr,"(1)"); + else fprintf(stderr,"("O"d,"O"d)",s,wt(item[k])); + } + } + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + if (forced) fprintf(stderr," found "O"d forced\n",forced); + else if (t==infty) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d), score "O".4f\n",t,score); + } + } + if (t>maxdeg && tmaxdeg && t= +@y +@ Oops --- we've run into a case where the current choice at |level-1| +has wiped out |item[k]|. Thus |item[k]| is not a ``best item'' for branching, +even though it manifestly has the smallest option list; it's really +a ``tough item.'' (Impossibly good.) + +@= +{ + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + fprintf(stderr,"\n--- Item "); + print_item_name(item[k],stderr); + fprintf(stderr," has been wiped out!\n"); + } + tough_itm=item[k]; + cmems+=mems-tmems; + @; + forced=0; + goto backup; +} + +@ @= +@z diff --git a/programs-orig/ssxcc.w b/programs-orig/ssxcc.w new file mode 100644 index 0000000..f9db178 --- /dev/null +++ b/programs-orig/ssxcc.w @@ -0,0 +1,986 @@ +@s mod and +\let\Xmod=\bmod % this is CWEB magic for using "mod" instead of "%" + +\datethis +@*Intro. This program is an ``{\mc XCC} solver'' that I'm writing +as an experiment in the use of so-called sparse-set data structures +instead of the dancing links structures that I've played with for thirty years. +I plan to write it as if I live on a planet where the sparse-set +ideas are well known, but doubly linked links are almost unheard-of. +As I begin, I know that the similar program {\mc SSXC1} works fine. + +I shall accept the {\mc DLX} input format used in the previous solvers, +without change, so that a fair comparison can be made. +(See the program {\mc DLX2} for definitions. Much of the code from +that program is used to parse the input for this one.) + +My original attempt, {\mc SSXC0}, kept the basic structure of {\mc DLX1} +and changed only the data structure link conventions. The present +version incorporates new ideas from Christine Solnon's +program {\mc XCC-WITH-DANCING-CELLS}, which she wrote in October 2020. +In particular, she proposed saving all the active set sizes on a stack; +program {\mc SSXC0} recomputed them by undoing the forward calculations +in reverse. She also showed how to unify ``purification'' with ``covering.'' + +In August, 2023, Christine told me about two further improvements: +We can easily recognize most cases where an item has only one +option left (a ``forced move''). And in such cases, it isn't +necessary to save the domain sizes of the other active elements, +because we won't need that information when backtracking. + +@ After this program finds all solutions, it normally prints their total +number on |stderr|, together with statistics about how many +nodes were in the search tree, and how many ``updates'' were made. +The running time in ``mems'' is also reported, together with the approximate +number of bytes needed for data storage. +(An ``update'' is the removal of an option from its item list, +or the removal of a satisfied color constraint from its option. +One ``mem'' essentially means a memory access to a 64-bit word. +The reported totals don't include the time or space needed to parse the +input or to format the output.) + +@d o mems++ /* count one mem */ +@d oo mems+=2 /* count two mems */ +@d ooo mems+=3 /* count three mems */ +@d O "%" /* used for percent signs in format strings */ +@d mod % /* used for percent signs denoting remainder in \CEE/ */ + +@d max_level 5000 /* at most this many options in a solution */ +@d max_cols 100000 /* at most this many items */ +@d max_nodes 10000000 /* at most this many nonzero elements in the matrix */ +@d savesize 10000000 /* at most this many entries on |savestack| */ +@d bufsize (9*max_cols+3) /* a buffer big enough to hold all item names */ + +@ Here is the overall structure: + +@c +#include +#include +#include +#include +#include "gb_flip.h" +typedef unsigned int uint; /* a convenient abbreviation */ +typedef unsigned long long ullng; /* ditto */ +@; +@; +@; +main (int argc, char *argv[]) { + register int c,cc,i,j,k,p,pp,q,r,s,t,cur_choice,cur_node,best_itm; + @; + @; + @; + if (vbose&show_basics) + @; + if (vbose&show_tots) + @; + imems=mems, mems=0; + if (baditem) @@; + else { + if (randomizing) @; + @; + } +done:@+if (vbose&show_profile) @; + if (vbose&show_max_deg) + fprintf(stderr,"The maximum branching degree was "O"d.\n",maxdeg); + if (vbose&show_basics) { + fprintf(stderr,"Altogether "O"llu solution"O"s, "O"llu+"O"llu mems,", + count,count==1?"":"s",imems,mems); + bytes=(itemlength+setlength)*sizeof(int)+last_node*sizeof(node) + +2*maxl*sizeof(int)+maxsaveptr*sizeof(twoints); + fprintf(stderr," "O"llu updates, "O"llu bytes, "O"llu nodes,", + updates,bytes,nodes); + fprintf(stderr," ccost "O"lld%%.\n", + mems? (200*cmems+mems)/(2*mems):0); + } + if (sanity_checking) fprintf(stderr,"sanity_checking was on!\n"); + @; +} + +@ You can control the amount of output, as well as certain properties +of the algorithm, by specifying options on the command line: +\smallskip\item{$\bullet$} +`\.v$\langle\,$integer$\,\rangle$' enables or disables various kinds of verbose + output on |stderr|, given by binary codes such as |show_choices|; +\item{$\bullet$} +`\.m$\langle\,$integer$\,\rangle$' causes every $m$th solution +to be output (the default is \.{m0}, which merely counts them); +\item{$\bullet$} +`\.s$\langle\,$integer$\,\rangle$' causes the algorithm to randomize +the initial list of items (thus providing some variety, although +the solutions are by no means uniformly random); +\item{$\bullet$} +`\.d$\langle\,$integer$\,\rangle$' sets |delta|, which causes periodic +state reports on |stderr| after the algorithm has performed approximately +|delta| mems since the previous report (default 10000000000); +\item{$\bullet$} +`\.c$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.C$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown in the periodic state reports; +\item{$\bullet$} +`\.l$\langle\,$nonnegative integer$\,\rangle$' gives a {\it lower\/} limit, +relative to the maximum level so far achieved, to the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.t$\langle\,$positive integer$\,\rangle$' causes the program to +stop after this many solutions have been found; +\item{$\bullet$} +`\.T$\langle\,$integer$\,\rangle$' sets |timeout| (which causes abrupt +termination if |mems>timeout| at the beginning of a level); +\item{$\bullet$} +`\.S$\langle\,$filename$\,\rangle$' to output a ``shape file'' that encodes +the search tree. + +@d show_basics 1 /* |vbose| code for basic stats; this is the default */ +@d show_choices 2 /* |vbose| code for backtrack logging */ +@d show_details 4 /* |vbose| code for further commentary */ +@d show_profile 128 /* |vbose| code to show the search tree profile */ +@d show_full_state 256 /* |vbose| code for complete state reports */ +@d show_tots 512 /* |vbose| code for reporting item totals at start */ +@d show_warnings 1024 /* |vbose| code for reporting options without primaries */ +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ + +@= +int random_seed=0; /* seed for the random words of |gb_rand| */ +int randomizing; /* has `\.s' been specified? */ +int vbose=show_basics+show_warnings; /* level of verbosity */ +int spacing; /* solution $k$ is output if $k$ is a multiple of |spacing| */ +int show_choices_max=1000000; /* above this level, |show_choices| is ignored */ +int show_choices_gap=1000000; /* below level |maxl-show_choices_gap|, + |show_details| is ignored */ +int show_levels_max=1000000; /* above this level, state reports stop */ +int maxl; /* maximum level actually reached */ +int maxsaveptr; /* maximum size of |savestack| */ +char buf[bufsize]; /* input buffer */ +ullng count; /* solutions found so far */ +ullng options; /* options seen so far */ +ullng imems,mems,tmems,cmems; /* mem counts */ +ullng updates; /* update counts */ +ullng bytes; /* memory used by main data structures */ +ullng nodes; /* total number of branch nodes initiated */ +ullng thresh=10000000000; /* report when |mems| exceeds this, if |delta!=0| */ +ullng delta=10000000000; /* report every |delta| or so mems */ +ullng maxcount=0xffffffffffffffff; /* stop after finding this many solutions */ +ullng timeout=0x1fffffffffffffff; /* give up after this many mems */ +FILE *shape_file; /* file for optional output of search tree shape */ +char *shape_name; /* its name */ +int maxdeg; /* the largest branching degree seen so far */ + +@ If an option appears more than once on the command line, the first +appearance takes precedence. + +@= +for (j=argc-1,k=0;j;j--) switch (argv[j][0]) { +case 'v': k|=(sscanf(argv[j]+1,""O"d",&vbose)-1);@+break; +case 'm': k|=(sscanf(argv[j]+1,""O"d",&spacing)-1);@+break; +case 's': k|=(sscanf(argv[j]+1,""O"d",&random_seed)-1),randomizing=1;@+break; +case 'd': k|=(sscanf(argv[j]+1,""O"lld",&delta)-1),thresh=delta;@+break; +case 'c': k|=(sscanf(argv[j]+1,""O"d",&show_choices_max)-1);@+break; +case 'C': k|=(sscanf(argv[j]+1,""O"d",&show_levels_max)-1);@+break; +case 'l': k|=(sscanf(argv[j]+1,""O"d",&show_choices_gap)-1);@+break; +case 't': k|=(sscanf(argv[j]+1,""O"lld",&maxcount)-1);@+break; +case 'T': k|=(sscanf(argv[j]+1,""O"lld",&timeout)-1);@+break; +case 'S': shape_name=argv[j]+1, shape_file=fopen(shape_name,"w"); + if (!shape_file) + fprintf(stderr,"Sorry, I can't open file `"O"s' for writing!\n", + shape_name); + break; +default: k=1; /* unrecognized command-line option */ +} +if (k) { + fprintf(stderr, "Usage: "O"s [v] [m] [s] [d]" + " [c] [C] [l] [t] [T] [S] < foo.dlx\n", + argv[0]); + exit(-1); +} +if (randomizing) gb_init_rand(random_seed); + +@ @= +if (shape_file) fclose(shape_file); + +@*Data structures. +Sparse-set data structures were introduced by Preston Briggs +and Linda Torczon [{\sl ACM Letters on Programming Languages and Systems\/ +\bf2} (1993), 59--69], who realized that exercise 2.12 in +Aho, Hopcroft, and Ullman's classic text {\sl The Design and Analysis +of Computer Algorithms\/} (Addison--Wesley, 1974) was much more than +just a slick trick to avoid initializing an array. +(Indeed, {\sl TAOCP\/} exercise 2.2.6--24 calls it the ``sparse array trick.'') + +The basic idea is amazingly simple, when specialized to the situations +that we need to deal with: We can represent a subset~$S$ of the universe +$U=\{x_0,x_1,\ldots,x_{n-1}\}$ by maintaining two $n$-element arrays +$p$ and $q$, each of which is a permutation of~$\{0,1,\ldots,n-1\}$, +together with an integer $s$ in the range $0\le s\le n$. In fact, $p$ is +the {\it inverse\/} of~$q$; and $s$ is the number of elements of~$S$. +The current value of the set $S$ is then simply +$\{x_{p_0},\ldots,x_{p_{s-1}}\}$. (Notice that every $s$-element subset can be +represented in $s!\,(n-s)!$ ways.) + +It's easy to test if $x_k\in S$, because that's true if and only if $q_k= +typedef struct node_struct { + int itm; /* the item |x| corresponding to this node */ + int loc; /* where this node resides in |x|'s active set */ + int clr; /* color associated with item |x| in this option, if any */ + int spr; /* a spare field inserted only to maintain 16-byte alignment */ +} node; + +@ @= +node nd[max_nodes]; /* the master list of nodes */ +int last_node; /* the first node in |nd| that's not yet used */ +int item[max_cols]; /* the master list of items */ +int second=max_cols; /* boundary between primary and secondary items */ +int last_itm; /* items seen so far during input, plus 1 */ +int set[max_nodes+maxextra*max_cols]; /* the sets of active options for active items */ +int itemlength; /* number of elements used in |item| */ +int setlength; /* number of elements used in |set| */ +int active; /* current number of active items */ +int oactive; /* value of active before swapping out current-choice items */ +int baditem; /* an item with no options, plus 1 */ +int osecond; /* setting of |second| just after initial input */ +int force[max_cols]; /* stack of items known to have size 1 */ +int forced; /* the number of items on that stack */ + +@ We're going to store string data (an item's name) in the midst of +the integer array |set|. So we've got to do some type coercion using +low-level \CEE/-ness. + +@= +typedef struct { + int l,r; +} twoints; +typedef union { + unsigned char str[8]; /* eight one-byte characters */ + twoints lr; /* two four-byte integers */ +} stringbuf; +stringbuf namebuf; + +@ @= +void print_item_name(int k,FILE *stream) { + namebuf.lr.l=lname(k),namebuf.lr.r=rname(k); + fprintf(stream," "O".8s",namebuf.str); +} + +@ An option is identified not by name but by the names of the items it contains. +Here is a routine that prints an option, given a pointer to any of its +nodes. It also prints the position of the option in its item list. + +@= +void print_option(int p,FILE *stream) { + register int k,q,x; + x=nd[p].itm; + if (p>=last_node || x<=0) { + fprintf(stderr,"Illegal option "O"d!\n",p); + return; + } + for (q=p;;) { + print_item_name(x,stream); + if (nd[q].clr) + fprintf(stream,":"O"c",nd[q].clr); + q++; + x=nd[q].itm; + if (x<0) q+=x,x=nd[q].itm; + if (q==p) break; + } + k=nd[q].loc; + fprintf(stream," ("O"d of "O"d)\n",k-x+1,size(x)); +} +@# +void prow(int p) { + print_option(p,stderr); +} + +@ When I'm debugging, I might want to look at one of the current item lists. + +@= +void print_itm(int c) { + register int p; + if (c=setlength || + pos(c)<0 || pos(c)>=itemlength || item[pos(c)]!=c) { + fprintf(stderr,"Illegal item "O"d!\n",c); + return; + } + fprintf(stderr,"Item"); + print_item_name(c,stderr); + if (c=active) + fprintf(stderr," (secondary "O"d, purified), length "O"d:\n", + pos(c)+1,size(c)); + else fprintf(stderr," (secondary "O"d), length "O"d:\n", + pos(c)+1,size(c)); + for (p=c;p= +void sanity(void) { + register int k,x,i,l,r,q,qq; + for (k=0;kr) fprintf(stderr,"itm>loc in node "O"d!\n",i); + else { + if (set[r]!=i) { + fprintf(stderr,"Bad loc field for option "O"d of item",r-l+1); + print_item_name(l,stderr); + fprintf(stderr," in node "O"d!\n",i); + } + if (pos(l)= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Input line way too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + last_itm=1; + break; +} +if (!last_itm) panic("No items"); +for (;o,buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j]));j++) { + if (buf[p+j]==':' || buf[p+j]=='|') + panic("Illegal character in item name"); + o,namebuf.str[j]=buf[p+j]; + } + if (j==8 && !isspace(buf[p+j])) panic("Item name too long"); + oo,lname(last_itm<<2)=namebuf.lr.l,rname(last_itm<<2)=namebuf.lr.r; + @; + last_itm++; + if (last_itm>max_cols) panic("Too many items"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + if (buf[p]=='|') { + if (second!=max_cols) panic("Item name line contains | twice"); + second=last_itm; + for (p++;o,isspace(buf[p]);p++) ; + } +} + +@ @= +for (k=last_itm-1;k;k--) { + if (o,lname(k<<2)!=namebuf.lr.l) continue; + if (rname(k<<2)==namebuf.lr.r) break; +} +if (k) panic("Duplicate item name"); + +@ I'm putting the option number into the |spr| field of the +spacer that follows it, as a +possible debugging aid. But the program doesn't currently use that information. + +@= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Option line too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + i=last_node; /* remember the spacer at the left of this option */ + for (pp=0;buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j])) && buf[p+j]!=':';j++) + o,namebuf.str[j]=buf[p+j]; + if (!j) panic("Empty item name"); + if (j==8 && !isspace(buf[p+j]) && buf[p+j]!=':') + panic("Item name too long"); + @; + if (buf[p+j]!=':') o,nd[last_node].clr=0; + else if (k>=second) { + if ((o,isspace(buf[p+j+1])) || (o,!isspace(buf[p+j+2]))) + panic("Color must be a single character"); + o,nd[last_node].clr=(unsigned char)buf[p+j+1]; + p+=2; + }@+else panic("Primary item must be uncolored"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + } + if (!pp) { + if (vbose&show_warnings) + fprintf(stderr,"Option ignored (no primary items): "O"s",buf); + while (last_node>i) { + @; + last_node--; + } + }@+else { + o,nd[i].loc=last_node-i; /* complete the previous spacer */ + last_node++; /* create the next spacer */ + if (last_node==max_nodes) panic("Too many nodes"); + options++; + o,nd[last_node].itm=i+1-last_node; + nd[last_node].spr=options; /* option number, for debugging only */ + } +} +@; +@; +@; + +@ We temporarily use |pos| to recognize duplicate items in an option. + +@= +for (k=(last_itm-1)<<2;k;k-=4) { + if (o,lname(k)!=namebuf.lr.l) continue; + if (rname(k)==namebuf.lr.r) break; +} +if (!k) panic("Unknown item name"); +if (o,pos(k)>i) panic("Duplicate item name in this option"); +last_node++; +if (last_node==max_nodes) panic("Too many nodes"); +o,t=size(k); /* how many previous options have used this item? */ +o,nd[last_node].itm=k>>2,nd[last_node].loc=t; +if ((k>>2)= +o,k=nd[last_node].itm<<2; +oo,size(k)--,pos(k)=i-1; + +@ @= +active=itemlength=last_itm-1; +for (k=0,j=primextra;k= +for (;k;k--) { + o,j=item[k-1]; + if (k==second) second=j; /* |second| is now an index into |set| */ + oo,size(j)=size(k<<2); + if (size(j)==0 && k<=osecond) baditem=k; + o,pos(j)=k-1; + oo,rname(j)=rname(k<<2),lname(j)=lname(k<<2); +} + +@ @= +for (k=1;k= +{ + if (vbose&show_choices) { + fprintf(stderr,"Item"); + print_item_name(item[baditem-1],stderr); + fprintf(stderr," has no options!\n"); + } +} + +@ The ``number of entries'' includes spacers (because {\mc DLX2} +includes spacers in its reports). If you want to know the +sum of the option lengths, just subtract the number of options. + +@= +fprintf(stderr, + "("O"lld options, "O"d+"O"d items, "O"d entries successfully read)\n", + options,osecond,itemlength-osecond,last_node); + +@ The item lengths after input are shown (on request). +But there's little use trying to show them after the process is done, +since they are restored somewhat blindly. +(Failures of the linked-list implementation in {\mc DLX2} could sometimes be +detected by showing the final lengths; but that reasoning no longer applies.) + +@= +{ + fprintf(stderr,"Item totals:"); + for (k=0;k= +for (k=active;k>1;) { + mems+=4,j=gb_unif_rand(k); + k--; + oo,oo,t=item[j],item[j]=item[k],item[k]=t; + oo,pos(t)=k,pos(item[j])=j; +} + +@*The dancing. +Our strategy for generating all exact covers will be to repeatedly +choose an item that appears to be hardest to cover, namely an item whose set +is currently smallest, among all items that still need to be covered. +And we explore all possibilities via depth-first search. + +The neat part of this algorithm is the way the sets are maintained. +Depth-first search means last-in-first-out maintenance of data structures; +and the sparse-set representations make it particularly easy to undo +what we've done at deeper levels. + +The basic operation is ``covering an item.'' That means removing it +from the set of items needing to be covered, and ``hiding'' its +options: removing them from the sets of the other items they contain. + +@= +{ + level=0; +forward: nodes++; + if (vbose&show_profile) profile[level]++; + if (sanity_checking) sanity(); + @; + @; + @; + if (forced) { + o,best_itm=force[--forced]; + @; + } + if (t==max_nodes) @; + @; + oactive=active,hide(best_itm,0,0); /* hide its options */ + cur_choice=best_itm; + @; +advance: oo,cur_node=choice[level]=set[cur_choice]; +tryit:@+if ((vbose&show_choices) && level; + @; + if (++level>maxl) { + if (level>=max_level) { + fprintf(stderr,"Too many levels!\n"); + exit(-4); + } + maxl=level; + } + goto forward; +backup:@+if (level==0) goto done; + level--; + oo,cur_node=choice[level],best_itm=nd[cur_node].itm,cur_choice=nd[cur_node].loc; +abort:@+if (o,cur_choice+1>=best_itm+size(best_itm)) goto backup; + @; + cur_choice++;@+goto advance; +} + +@ We save the sizes of active items on |savestack|, whose entries +have two fields |l| and |r|, for an item and its size. This stack +makes it easy to undo all deletions, by simply restoring the former sizes. + +@= +int level; /* number of choices in current partial solution */ +int choice[max_level]; /* the node chosen on each level */ +int saved[max_level+1]; /* size of |savestack| on each level */ +ullng profile[max_level]; /* number of search tree nodes on each level */ +twoints savestack[savesize]; +int saveptr; /* current size of |savestack| */ + +@ @= +if (delta && (mems>=thresh)) { + thresh+=delta; + if (vbose&show_full_state) print_state(); + else print_progress(); +} +if (mems>=timeout) { + fprintf(stderr,"TIMEOUT!\n");@+goto done; +} + +@ @= +p=active-1,active=p; +o,pp=pos(best_itm); +o,cc=item[p]; +oo,item[p]=best_itm,item[pp]=cc; +oo,pos(cc)=pp,pos(best_itm)=p; +updates++; + +@ Note that a colored secondary item might have already been purified, +in which case it has already been swapped out. We don't want to +tamper with any of the inactive items. + +@= +p=oactive=active; +for (q=cur_node+1;q!=cur_node;) { + o,c=nd[q].itm; + if (c<0) q+=c; + else { + o,pp=pos(c); + if (pp=oactive|. + +@= +for (q=cur_node+1;q!=cur_node;) { + o,cc=nd[q].itm; + if (cc<0) q+=cc; + else { + if (cc= +int hide(int c,int color,int check) { + register int cc,s,rr,ss,nn,tt,uu,vv,nnp; + for (o,rr=c,s=c+size(c);rr; + } + return 1; +} + +@ @= +{ + for (nn=tt+1;nn!=tt;) { + o,uu=nd[nn].itm,vv=nd[nn].loc; + if (uu<0) {@+nn+=uu;@+continue;@+} + if (o,pos(uu)1$, +we choose the leftmost. + +Notice that a secondary item is active if and only if it has not +been purified (that is, if and only if it hasn't yet appeared in +a chosen option). + +(This program explores the search space in a slightly different order +from {\mc DLX2}, because the ordering of items in the active list +is no longer fixed. But ties are broken in the same way when $s>1$.) + +@= +t=max_nodes,tmems=mems; +if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); +for (k=0;k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=1) { + if (s==0) fprintf(stderr,"I'm confused.\n"); /* |hide| missed this */ + else o,force[forced++]=item[k]; + }@+else if (s<=t) { + if (s=maxl-show_choices_gap) { + if (forced) fprintf(stderr," found "O"d forced\n",forced); + else if (t==max_nodes) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d)\n",t); + } +} +if (t>maxdeg && t= +while (forced) { + o,best_itm=force[--forced]; + if (o,pos(best_itm); +} + +@ @= +{ + count++; + if (spacing && (count mod spacing==0)) { + printf(""O"lld:\n",count); + for (k=0;k=maxcount) goto done; + goto backup; +} + +@ @= +if (saveptr+active>maxsaveptr) { + if (saveptr+active>=savesize) { + fprintf(stderr,"Stack overflow (savesize="O"d)!\n",savesize); + exit(-5); + } + maxsaveptr=saveptr+active; +} +for (p=0;p= +o,saveptr=saved[level+1]; +o,active=saveptr-saved[level]; +for (p=-active;p<0;p++) + oo,size(savestack[saveptr+p].l)=savestack[saveptr+p].r; + +@ A forced move occurs when |best_itm| has only one remaining option. +In this case we can streamline the computation, because there's no need +to save the current active sizes. (They won't be looked at.) + +@= +{ + if ((vbose&show_choices) && level; + oactive=active,hide(best_itm,0,0); /* hide its options */ + cur_choice=best_itm; + o,saved[level+1]=saveptr; /* nothing placed on |savestack| */ + goto advance; +} + +@ @= +void print_savestack(int start,int stop) { + register k; + for (k=start;k<=stop;k++) { + print_item_name(savestack[k].l,stderr); + fprintf(stderr,"("O"d), "O"d\n",savestack[k].l,savestack[k].r); + } +} + +@ @= +void print_state(void) { + register int l; + fprintf(stderr,"Current state (level "O"d):\n",level); + for (l=0;l=show_levels_max) { + fprintf(stderr," ...\n"); + break; + } + } + fprintf(stderr," "O"lld solutions, "O"lld mems, and max level "O"d so far.\n", + count,mems,maxl); +} + +@ During a long run, it's helpful to have some way to measure progress. +The following routine prints a string that indicates roughly where we +are in the search tree. The string consists of character pairs, separated +by blanks, where each character pair represents a branch of the search +tree. When a node has $d$ descendants and we are working on the $k$th, +the two characters respectively represent $k$ and~$d$ in a simple code; +namely, the values 0, 1, \dots, 61 are denoted by +$$\.0,\ \.1,\ \dots,\ \.9,\ \.a,\ \.b,\ \dots,\ \.z,\ \.A,\ \.B,\ \dots,\.Z.$$ +All values greater than 61 are shown as `\.*'. Notice that as computation +proceeds, this string will increase lexicographically. + +Following that string, a fractional estimate of total progress is computed, +based on the na{\"\i}ve assumption that the search tree has a uniform +branching structure. If the tree consists +of a single node, this estimate is~.5; otherwise, if the first choice +is `$k$ of~$d$', the estimate is $(k-1)/d$ plus $1/d$ times the +recursively evaluated estimate for the $k$th subtree. (This estimate +might obviously be very misleading, in some cases, but at least it +tends to grow monotonically.) + +@= +void print_progress(void) { + register int l,k,d,c,p,ds=0; + register double f,fd; + fprintf(stderr," after "O"lld mems: "O"lld sols,",mems,count); + for (f=0.0,fd=1.0,l=0;l= +{ + fprintf(stderr,"Profile:\n"); + for (level=0;level<=maxl;level++) + fprintf(stderr,""O"3d: "O"lld\n", + level,profile[level]); +} + +@*Index. diff --git a/programs-orig/ssxcc0.w b/programs-orig/ssxcc0.w new file mode 100644 index 0000000..4d7f943 --- /dev/null +++ b/programs-orig/ssxcc0.w @@ -0,0 +1,934 @@ +@s mod and +\let\Xmod=\bmod % this is CWEB magic for using "mod" instead of "%" + +\datethis +@*Intro. This program is an ``{\mc XCC} solver'' that I'm writing +as an experiment in the use of so-called sparse-set data structures +instead of the dancing links structures that I've played with for thirty years. +I plan to write it as if I live on a planet where the sparse-set +ideas are well known, but doubly linked links are almost unheard-of. +As I begin, I know that the similar program {\mc SSXC1} works fine. + +I shall accept the {\mc DLX} input format used in the previous solvers, +without change, so that a fair comparison can be made. +(See the program {\mc DLX2} for definitions. Much of the code from +that program is used to parse the input for this one.) + +My original attempt, {\mc SSXC0}, kept the basic structure of {\mc DLX1} +and changed only the data structure link conventions. The present +version incorporates new ideas from Christine Solnon's +program {\mc XCC-WITH-DANCING-CELLS}, which she wrote in October 2020. +In particular, she proposed saving all the active set sizes on a stack; +program {\mc SSXC0} recomputed them by undoing the forward calculations +in reverse. She also showed how to unify ``purification'' with ``covering.'' + +@ After this program finds all solutions, it normally prints their total +number on |stderr|, together with statistics about how many +nodes were in the search tree, and how many ``updates'' were made. +The running time in ``mems'' is also reported, together with the approximate +number of bytes needed for data storage. +(An ``update'' is the removal of an option from its item list, +or the removal of a satisfied color constraint from its option. +One ``mem'' essentially means a memory access to a 64-bit word. +The reported totals don't include the time or space needed to parse the +input or to format the output.) + +Here is the overall structure: + +@d o mems++ /* count one mem */ +@d oo mems+=2 /* count two mems */ +@d ooo mems+=3 /* count three mems */ +@d O "%" /* used for percent signs in format strings */ +@d mod % /* used for percent signs denoting remainder in \CEE/ */ + +@d max_level 5000 /* at most this many options in a solution */ +@d max_cols 100000 /* at most this many items */ +@d max_nodes 10000000 /* at most this many nonzero elements in the matrix */ +@d savesize 10000000 /* at most this many entries on |savestack| */ +@d bufsize (9*max_cols+3) /* a buffer big enough to hold all item names */ + +@c +#include +#include +#include +#include +#include "gb_flip.h" +typedef unsigned int uint; /* a convenient abbreviation */ +typedef unsigned long long ullng; /* ditto */ +@; +@; +@; +main (int argc, char *argv[]) { + register int c,cc,i,j,k,p,pp,q,r,s,t,cur_choice,cur_node,best_itm; + @; + @; + @; + if (vbose&show_basics) + @; + if (vbose&show_tots) + @; + imems=mems, mems=0; + if (baditem) @@; + else { + if (randomizing) @; + @; + } +done:@+if (vbose&show_profile) @; + if (vbose&show_max_deg) + fprintf(stderr,"The maximum branching degree was "O"d.\n",maxdeg); + if (vbose&show_basics) { + fprintf(stderr,"Altogether "O"llu solution"O"s, "O"llu+"O"llu mems,", + count,count==1?"":"s",imems,mems); + bytes=(itemlength+setlength)*sizeof(int)+last_node*sizeof(node) + +2*maxl*sizeof(int)+maxsaveptr*sizeof(twoints); + fprintf(stderr," "O"llu updates, "O"llu bytes, "O"llu nodes,", + updates,bytes,nodes); + fprintf(stderr," ccost "O"lld%%.\n", + mems? (200*cmems+mems)/(2*mems):0); + } + if (sanity_checking) fprintf(stderr,"sanity_checking was on!\n"); + @; +} + +@ You can control the amount of output, as well as certain properties +of the algorithm, by specifying options on the command line: +\smallskip\item{$\bullet$} +`\.v$\langle\,$integer$\,\rangle$' enables or disables various kinds of verbose + output on |stderr|, given by binary codes such as |show_choices|; +\item{$\bullet$} +`\.m$\langle\,$integer$\,\rangle$' causes every $m$th solution +to be output (the default is \.{m0}, which merely counts them); +\item{$\bullet$} +`\.s$\langle\,$integer$\,\rangle$' causes the algorithm to randomize +the initial list of items (thus providing some variety, although +the solutions are by no means uniformly random); +\item{$\bullet$} +`\.d$\langle\,$integer$\,\rangle$' sets |delta|, which causes periodic +state reports on |stderr| after the algorithm has performed approximately +|delta| mems since the previous report (default 10000000000); +\item{$\bullet$} +`\.c$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.C$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown in the periodic state reports; +\item{$\bullet$} +`\.l$\langle\,$nonnegative integer$\,\rangle$' gives a {\it lower\/} limit, +relative to the maximum level so far achieved, to the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.t$\langle\,$positive integer$\,\rangle$' causes the program to +stop after this many solutions have been found; +\item{$\bullet$} +`\.T$\langle\,$integer$\,\rangle$' sets |timeout| (which causes abrupt +termination if |mems>timeout| at the beginning of a level); +\item{$\bullet$} +`\.S$\langle\,$filename$\,\rangle$' to output a ``shape file'' that encodes +the search tree. + +@d show_basics 1 /* |vbose| code for basic stats; this is the default */ +@d show_choices 2 /* |vbose| code for backtrack logging */ +@d show_details 4 /* |vbose| code for further commentary */ +@d show_profile 128 /* |vbose| code to show the search tree profile */ +@d show_full_state 256 /* |vbose| code for complete state reports */ +@d show_tots 512 /* |vbose| code for reporting item totals at start */ +@d show_warnings 1024 /* |vbose| code for reporting options without primaries */ +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ + +@= +int random_seed=0; /* seed for the random words of |gb_rand| */ +int randomizing; /* has `\.s' been specified? */ +int vbose=show_basics+show_warnings; /* level of verbosity */ +int spacing; /* solution $k$ is output if $k$ is a multiple of |spacing| */ +int show_choices_max=1000000; /* above this level, |show_choices| is ignored */ +int show_choices_gap=1000000; /* below level |maxl-show_choices_gap|, + |show_details| is ignored */ +int show_levels_max=1000000; /* above this level, state reports stop */ +int maxl; /* maximum level actually reached */ +int maxsaveptr; /* maximum size of |savestack| */ +char buf[bufsize]; /* input buffer */ +ullng count; /* solutions found so far */ +ullng options; /* options seen so far */ +ullng imems,mems,tmems,cmems; /* mem counts */ +ullng updates; /* update counts */ +ullng bytes; /* memory used by main data structures */ +ullng nodes; /* total number of branch nodes initiated */ +ullng thresh=10000000000; /* report when |mems| exceeds this, if |delta!=0| */ +ullng delta=10000000000; /* report every |delta| or so mems */ +ullng maxcount=0xffffffffffffffff; /* stop after finding this many solutions */ +ullng timeout=0x1fffffffffffffff; /* give up after this many mems */ +FILE *shape_file; /* file for optional output of search tree shape */ +char *shape_name; /* its name */ +int maxdeg; /* the largest branching degree seen so far */ + +@ If an option appears more than once on the command line, the first +appearance takes precedence. + +@= +for (j=argc-1,k=0;j;j--) switch (argv[j][0]) { +case 'v': k|=(sscanf(argv[j]+1,""O"d",&vbose)-1);@+break; +case 'm': k|=(sscanf(argv[j]+1,""O"d",&spacing)-1);@+break; +case 's': k|=(sscanf(argv[j]+1,""O"d",&random_seed)-1),randomizing=1;@+break; +case 'd': k|=(sscanf(argv[j]+1,""O"lld",&delta)-1),thresh=delta;@+break; +case 'c': k|=(sscanf(argv[j]+1,""O"d",&show_choices_max)-1);@+break; +case 'C': k|=(sscanf(argv[j]+1,""O"d",&show_levels_max)-1);@+break; +case 'l': k|=(sscanf(argv[j]+1,""O"d",&show_choices_gap)-1);@+break; +case 't': k|=(sscanf(argv[j]+1,""O"lld",&maxcount)-1);@+break; +case 'T': k|=(sscanf(argv[j]+1,""O"lld",&timeout)-1);@+break; +case 'S': shape_name=argv[j]+1, shape_file=fopen(shape_name,"w"); + if (!shape_file) + fprintf(stderr,"Sorry, I can't open file `"O"s' for writing!\n", + shape_name); + break; +default: k=1; /* unrecognized command-line option */ +} +if (k) { + fprintf(stderr, "Usage: "O"s [v] [m] [s] [d]" + " [c] [C] [l] [t] [T] [S] < foo.dlx\n", + argv[0]); + exit(-1); +} +if (randomizing) gb_init_rand(random_seed); + +@ @= +if (shape_file) fclose(shape_file); + +@*Data structures. +Sparse-set data structures were introduced by Preston Briggs +and Linda Torczon [{\sl ACM Letters on Programming Languages and Systems\/ +\bf2} (1993), 59--69], who realized that exercise 2.12 in +Aho, Hopcroft, and Ullman's classic text {\sl The Design and Analysis +of Computer Algorithms\/} (Addison--Wesley, 1974) was much more than +just a slick trick to avoid initializing an array. +(Indeed, {\sl TAOCP\/} exercise 2.2.6--24 calls it the ``sparse array trick.'') + +The basic idea is amazingly simple, when specialized to the situations +that we need to deal with: We can represent a subset~$S$ of the universe +$U=\{x_0,x_1,\ldots,x_{n-1}\}$ by maintaining two $n$-element arrays +$p$ and $q$, each of which is a permutation of~$\{0,1,\ldots,n-1\}$, +together with an integer $s$ in the range $0\le s\le n$. In fact, $p$ is +the {\it inverse\/} of~$q$; and $s$ is the number of elements of~$S$. +The current value of the set $S$ is then simply +$\{x_{p_0},\ldots,x_{p_{s-1}}\}$. (Notice that every $s$-element subset can be +represented in $s!\,(n-s)!$ ways.) + +It's easy to test if $x_k\in S$, because that's true if and only if $q_k= +typedef struct node_struct { + int itm; /* the item |x| corresponding to this node */ + int loc; /* where this node resides in |x|'s active set */ + int clr; /* color associated with item |x| in this option, if any */ + int spr; /* a spare field inserted only to maintain 16-byte alignment */ +} node; + +@ @= +node nd[max_nodes]; /* the master list of nodes */ +int last_node; /* the first node in |nd| that's not yet used */ +int item[max_cols]; /* the master list of items */ +int second=max_cols; /* boundary between primary and secondary items */ +int last_itm; /* items seen so far during input, plus 1 */ +int set[max_nodes+4*max_cols]; /* the sets of active options for active items */ +int itemlength; /* number of elements used in |item| */ +int setlength; /* number of elements used in |set| */ +int active; /* current number of active items */ +int oactive; /* value of active before swapping out current-choice items */ +int baditem; /* an item with no options, plus 1 */ +int osecond; /* setting of |second| just after initial input */ + +@ We're going to store string data (an item's name) in the midst of +the integer array |set|. So we've got to do some type coercion using +low-level \CEE/-ness. + +@= +typedef struct { + int l,r; +} twoints; +typedef union { + unsigned char str[8]; /* eight one-byte characters */ + twoints lr; /* two four-byte integers */ +} stringbuf; +stringbuf namebuf; + +@ @= +void print_item_name(int k,FILE *stream) { + namebuf.lr.l=lname(k),namebuf.lr.r=rname(k); + fprintf(stream," "O".8s",namebuf.str); +} + +@ An option is identified not by name but by the names of the items it contains. +Here is a routine that prints an option, given a pointer to any of its +nodes. It also prints the position of the option in its item list. + +@= +void print_option(int p,FILE *stream) { + register int k,q,x; + x=nd[p].itm; + if (p>=last_node || x<=0) { + fprintf(stderr,"Illegal option "O"d!\n",p); + return; + } + for (q=p;;) { + print_item_name(x,stream); + if (nd[q].clr) + fprintf(stream,":"O"c",nd[q].clr); + q++; + x=nd[q].itm; + if (x<0) q+=x,x=nd[q].itm; + if (q==p) break; + } + k=nd[q].loc; + fprintf(stream," ("O"d of "O"d)\n",k-x+1,size(x)); +} +@# +void prow(int p) { + print_option(p,stderr); +} + +@ When I'm debugging, I might want to look at one of the current item lists. + +@= +void print_itm(int c) { + register int p; + if (c<4 || c>=setlength || + pos(c)<0 || pos(c)>=itemlength || item[pos(c)]!=c) { + fprintf(stderr,"Illegal item "O"d!\n",c); + return; + } + fprintf(stderr,"Item"); + print_item_name(c,stderr); + if (c=active) + fprintf(stderr," (secondary "O"d, purified), length "O"d:\n", + pos(c)+1,size(c)); + else fprintf(stderr," (secondary "O"d), length "O"d:\n", + pos(c)+1,size(c)); + for (p=c;p= +void sanity(void) { + register int k,x,i,l,r,q,qq; + for (k=0;kr) fprintf(stderr,"itm>loc in node "O"d!\n",i); + else { + if (set[r]!=i) { + fprintf(stderr,"Bad loc field for option "O"d of item",r-l+1); + print_item_name(l,stderr); + fprintf(stderr," in node "O"d!\n",i); + } + if (pos(l)= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Input line way too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + last_itm=1; + break; +} +if (!last_itm) panic("No items"); +for (;o,buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j]));j++) { + if (buf[p+j]==':' || buf[p+j]=='|') + panic("Illegal character in item name"); + o,namebuf.str[j]=buf[p+j]; + } + if (j==8 && !isspace(buf[p+j])) panic("Item name too long"); + oo,lname(last_itm<<2)=namebuf.lr.l,rname(last_itm<<2)=namebuf.lr.r; + @; + last_itm++; + if (last_itm>max_cols) panic("Too many items"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + if (buf[p]=='|') { + if (second!=max_cols) panic("Item name line contains | twice"); + second=last_itm; + for (p++;o,isspace(buf[p]);p++) ; + } +} + +@ @= +for (k=last_itm-1;k;k--) { + if (o,lname(k<<2)!=namebuf.lr.l) continue; + if (rname(k<<2)==namebuf.lr.r) break; +} +if (k) panic("Duplicate item name"); + +@ I'm putting the option number into the |spr| field of the +spacer that follows it, as a +possible debugging aid. But the program doesn't currently use that information. + +@= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Option line too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + i=last_node; /* remember the spacer at the left of this option */ + for (pp=0;buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j])) && buf[p+j]!=':';j++) + o,namebuf.str[j]=buf[p+j]; + if (!j) panic("Empty item name"); + if (j==8 && !isspace(buf[p+j]) && buf[p+j]!=':') + panic("Item name too long"); + @; + if (buf[p+j]!=':') o,nd[last_node].clr=0; + else if (k>=second) { + if ((o,isspace(buf[p+j+1])) || (o,!isspace(buf[p+j+2]))) + panic("Color must be a single character"); + o,nd[last_node].clr=(unsigned char)buf[p+j+1]; + p+=2; + }@+else panic("Primary item must be uncolored"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + } + if (!pp) { + if (vbose&show_warnings) + fprintf(stderr,"Option ignored (no primary items): "O"s",buf); + while (last_node>i) { + @; + last_node--; + } + }@+else { + o,nd[i].loc=last_node-i; /* complete the previous spacer */ + last_node++; /* create the next spacer */ + if (last_node==max_nodes) panic("Too many nodes"); + options++; + o,nd[last_node].itm=i+1-last_node; + nd[last_node].spr=options; /* option number, for debugging only */ + } +} +@; +@; +@; + +@ We temporarily use |pos| to recognize duplicate items in an option. + +@= +for (k=(last_itm-1)<<2;k;k-=4) { + if (o,lname(k)!=namebuf.lr.l) continue; + if (rname(k)==namebuf.lr.r) break; +} +if (!k) panic("Unknown item name"); +if (o,pos(k)>i) panic("Duplicate item name in this option"); +last_node++; +if (last_node==max_nodes) panic("Too many nodes"); +o,t=size(k); /* how many previous options have used this item? */ +o,nd[last_node].itm=k>>2,nd[last_node].loc=t; +if ((k>>2)= +o,k=nd[last_node].itm<<2; +oo,size(k)--,pos(k)=i-1; + +@ @= +active=itemlength=last_itm-1; +for (k=0,j=4;k= +for (;k;k--) { + o,j=item[k-1]; + if (k==second) second=j; /* |second| is now an index into |set| */ + oo,size(j)=size(k<<2); + if (size(j)==0 && k<=osecond) baditem=k; + o,pos(j)=k-1; + oo,rname(j)=rname(k<<2),lname(j)=lname(k<<2); +} + +@ @= +for (k=1;k= +{ + if (vbose&show_choices) { + fprintf(stderr,"Item"); + print_item_name(item[baditem-1],stderr); + fprintf(stderr," has no options!\n"); + } +} + +@ The ``number of entries'' includes spacers (because {\mc DLX2} +includes spacers in its reports). If you want to know the +sum of the option lengths, just subtract the number of options. + +@= +fprintf(stderr, + "("O"lld options, "O"d+"O"d items, "O"d entries successfully read)\n", + options,osecond,itemlength-osecond,last_node); + +@ The item lengths after input are shown (on request). +But there's little use trying to show them after the process is done, +since they are restored somewhat blindly. +(Failures of the linked-list implementation in {\mc DLX2} could sometimes be +detected by showing the final lengths; but that reasoning no longer applies.) + +@= +{ + fprintf(stderr,"Item totals:"); + for (k=0;k= +for (k=active;k>1;) { + mems+=4,j=gb_unif_rand(k); + k--; + oo,oo,t=item[j],item[j]=item[k],item[k]=t; + oo,pos(t)=k,pos(item[j])=j; +} + +@*The dancing. +Our strategy for generating all exact covers will be to repeatedly +choose an item that appears to be hardest to cover, namely an item whose set +is currently smallest, among all items that still need to be covered. +And we explore all possibilities via depth-first search. + +The neat part of this algorithm is the way the sets are maintained. +Depth-first search means last-in-first-out maintenance of data structures; +and the sparse-set representations make it particularly easy to undo +what we've done at deeper levels. + +The basic operation is ``covering an item.'' That means removing it +from the set of items needing to be covered, and ``hiding'' its +options: removing them from the sets of the other items they contain. + +@= +{ + level=0; +forward: nodes++; + if (vbose&show_profile) profile[level]++; + if (sanity_checking) sanity(); + @; + @; + if (t==max_nodes) @; + @; + oactive=active,hide(best_itm,0,0); /* hide its options */ + cur_choice=best_itm; + @; +advance: oo,cur_node=choice[level]=set[cur_choice]; +tryit:@+if ((vbose&show_choices) && level; + @; + if (++level>maxl) { + if (level>=max_level) { + fprintf(stderr,"Too many levels!\n"); + exit(-4); + } + maxl=level; + } + goto forward; +backup:@+if (level==0) goto done; + level--; + oo,cur_node=choice[level],best_itm=nd[cur_node].itm,cur_choice=nd[cur_node].loc; +abort:@+if (o,cur_choice+1>=best_itm+size(best_itm)) goto backup; + @; + cur_choice++;@+goto advance; +} + +@ We save the sizes of active items on |savestack|, whose entries +have two fields |l| and |r|, for an item and its size. This stack +makes it easy to undo all deletions, by simply restoring the former sizes. + +@= +int level; /* number of choices in current partial solution */ +int choice[max_level]; /* the node chosen on each level */ +int saved[max_level+1]; /* size of |savestack| on each level */ +ullng profile[max_level]; /* number of search tree nodes on each level */ +twoints savestack[savesize]; +int saveptr; /* current size of |savestack| */ + +@ @= +if (delta && (mems>=thresh)) { + thresh+=delta; + if (vbose&show_full_state) print_state(); + else print_progress(); +} +if (mems>=timeout) { + fprintf(stderr,"TIMEOUT!\n");@+goto done; +} + +@ @= +p=active-1,active=p; +o,pp=pos(best_itm); +o,cc=item[p]; +oo,item[p]=best_itm,item[pp]=cc; +oo,pos(cc)=pp,pos(best_itm)=p; +updates++; + +@ Note that a colored secondary item might have already been purified, +in which case it has already been swapped out. We don't want to +tamper with any of the inactive items. + +@= +p=oactive=active; +for (q=cur_node+1;q!=cur_node;) { + o,c=nd[q].itm; + if (c<0) q+=c; + else { + o,pp=pos(c); + if (pp=oactive|. + +@= +for (q=cur_node+1;q!=cur_node;) { + o,cc=nd[q].itm; + if (cc<0) q+=cc; + else { + if (cc= +int hide(int c,int color,int check) { + register int cc,s,rr,ss,nn,tt,uu,vv,nnp; + for (o,rr=c,s=c+size(c);rr; + } + return 1; +} + +@ @= +{ + for (nn=tt+1;nn!=tt;) { + o,uu=nd[nn].itm,vv=nd[nn].loc; + if (uu<0) {@+nn+=uu;@+continue;@+} + if (o,pos(uu)= +t=max_nodes,tmems=mems; +if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); +for (k=0;t>1 && k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=t) { + if (s=maxl-show_choices_gap) { + if (t==max_nodes) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d)\n",t); + } +} +if (t>maxdeg && t= +{ + count++; + if (spacing && (count mod spacing==0)) { + printf(""O"lld:\n",count); + for (k=0;k=maxcount) goto done; + goto backup; +} + +@ @= +if (saveptr+active>maxsaveptr) { + if (saveptr+active>=savesize) { + fprintf(stderr,"Stack overflow (savesize="O"d)!\n",savesize); + exit(-5); + } + maxsaveptr=saveptr+active; +} +for (p=0;p= +o,saveptr=saved[level+1]; +o,active=saveptr-saved[level]; +for (p=-active;p<0;p++) + oo,size(savestack[saveptr+p].l)=savestack[saveptr+p].r; + +@ @= +void print_savestack(int start,int stop) { + register k; + for (k=start;k<=stop;k++) { + print_item_name(savestack[k].l,stderr); + fprintf(stderr,"("O"d), "O"d\n",savestack[k].l,savestack[k].r); + } +} + +@ @= +void print_state(void) { + register int l; + fprintf(stderr,"Current state (level "O"d):\n",level); + for (l=0;l=show_levels_max) { + fprintf(stderr," ...\n"); + break; + } + } + fprintf(stderr," "O"lld solutions, "O"lld mems, and max level "O"d so far.\n", + count,mems,maxl); +} + +@ During a long run, it's helpful to have some way to measure progress. +The following routine prints a string that indicates roughly where we +are in the search tree. The string consists of character pairs, separated +by blanks, where each character pair represents a branch of the search +tree. When a node has $d$ descendants and we are working on the $k$th, +the two characters respectively represent $k$ and~$d$ in a simple code; +namely, the values 0, 1, \dots, 61 are denoted by +$$\.0,\ \.1,\ \dots,\ \.9,\ \.a,\ \.b,\ \dots,\ \.z,\ \.A,\ \.B,\ \dots,\.Z.$$ +All values greater than 61 are shown as `\.*'. Notice that as computation +proceeds, this string will increase lexicographically. + +Following that string, a fractional estimate of total progress is computed, +based on the na{\"\i}ve assumption that the search tree has a uniform +branching structure. If the tree consists +of a single node, this estimate is~.5; otherwise, if the first choice +is `$k$ of~$d$', the estimate is $(k-1)/d$ plus $1/d$ times the +recursively evaluated estimate for the $k$th subtree. (This estimate +might obviously be very misleading, in some cases, but at least it +tends to grow monotonically.) + +@= +void print_progress(void) { + register int l,k,d,c,p,ds=0; + register double f,fd; + fprintf(stderr," after "O"lld mems: "O"lld sols,",mems,count); + for (f=0.0,fd=1.0,l=0;l= +{ + fprintf(stderr,"Profile:\n"); + for (level=0;level<=maxl;level++) + fprintf(stderr,""O"3d: "O"lld\n", + level,profile[level]); +} + +@*Index. diff --git a/programs-orig/tcb.tex b/programs-orig/tcb.tex index 4659e71..d1deab0 100644 --- a/programs-orig/tcb.tex +++ b/programs-orig/tcb.tex @@ -1,5 +1,23 @@ +\input epsf \font\mc=cmr9 +\null\vfill +\noindent +[This is a reprint, with slight amendments, of +$$\displaylines{ +\hbox{\bf Three Catalan Bijections}\cr +\noalign{\bigskip} +\hbox{D. Knuth}\cr +\noalign{\bigskip} +\hbox{Report No.~04, 2004/2005, spring}\cr +\hbox{{\mc ISSN} 1103-467X}\cr +\hbox{{\mc ISRN} IML-R- -04-04/05- -SE+spring}\cr}$$ +from Institut Mittag-Leffler of The Royal +Swedish Academy of Sciences. It reports on work supported by the +Swedish Research Council while the author was in residence at +IML in Djursholm, Sweden, during January and February of 2005.] + +\pageno=1 \centerline{\bf Three Catalan Bijections} \bigskip \centerline{by Donald E. Knuth} @@ -24,7 +42,8 @@ Given a number $n>1$, each program generates all $C_n$ objects of one type, bijects them into objects of another type, verifies that the parameter~$m$ has not changed, and applies the inverse bijection -to prove constructively that the correspondence is indeed one-to-one. +to prove constructively that the correspondence is indeed one-to-one +(at least for this value of~$n$). Program 1, called {\mc ZEILBERGER}, converts between (1) and (2). Program~2, {\mc FRAN\c{C}ON}, converts between (2) and~(3). And @@ -54,7 +73,7 @@ both carry out their work in the same direction as they translate one object to another. By contrast, the inverse bijections in Programs 1 and~3 essentially cause time to run backward when they undo the -effects of forward-runnning bijections. +effects of forward-running bijections. Program 3 introduces a new bijection that was recently explained pictorially to the author by its creator, Xavier Viennot. The @@ -117,5 +136,3 @@ \input viennot.tex \deadcycles=0 \end - - diff --git a/programs-orig/unrank-parade1.w b/programs-orig/unrank-parade1.w new file mode 100644 index 0000000..0a94ef4 --- /dev/null +++ b/programs-orig/unrank-parade1.w @@ -0,0 +1,136 @@ +@*Intro. This little program finds the parade of rank $r$ from among +the $B_{m,n}$ parades that can be made by $m$ girls and $n$ boys, +given $m$, $n$, and $r$. +(See section 3 of my unpublication ``Poly-Bernoulli Bijections.'') + +@d maxn 25 /* Stirling partition numbers will be less than $2^{61}$ */ + +@c +#include +#include +int m,n; /* command-line parameters */ +int gpart,gperm,bpart,bperm; +long long r,rr; /* command-line parameter */ +long long spart[maxn+1][maxn+1]; /* stirling partition numbers */ +int rgsg[maxn+1],rgsb[maxn+1]; /* restricted growth sequences for girls, boys */ +int permg[maxn],permb[maxn]; /* permutations for girls, boys */ +int inv[maxn]; /* inversions of permutation to be constructed */ +main(int argc,char*argv[]) { + register int i,j,k,kk; + register long long f,s,t; + register double ff,ss,tt; + @; + @; + @; + @; +} + +@ @= +spart[0][0]=1; +for (j=1;j= +if (argc!=4 || sscanf(argv[1],"%d", + &m)!=1 || sscanf(argv[2],"%d", + &n)!=1 || sscanf(argv[3],"%lld", + &r)!=1) { + fprintf(stderr,"Usage: %s m n r\n", + argv[0]); + exit(-1); +} +if (m>=maxn || n>=maxn) { + fprintf(stderr,"Sorry, m and n must be less than %d!\n", + maxn); + exit(-2); +} + +@ @= +rr=r; +if (r==0) kk=0;@+else kk=-1,r--; +for (ss=ff=1.0,f=1,k=1;k<=m && k<=n;k++) { + ff*=k; /* |ff| is a floating-point approximation to $k!$ */ + tt=ff*ff*(double)spart[m+1][k+1]*(double)spart[n+1][k+1]; + ss+=tt; + if (kk<0) { + if (tt>=(double)0x8000000000000000) { + fprintf(stderr,"I don't have enough precision!\n"); + exit(-3); + } + f*=k; /* |f| is exactly $k!$ */ + t=f*f*spart[m+1][k+1]*spart[n+1][k+1]; /* |t| is exactly the |k|th term */ + if (r); +if (kk<0) { + fprintf(stderr,"rank %lld is impossible!\n", + rr); + exit(-4); +} +fprintf(stderr,"We will find the parade for term %d of rank %lld.\n", + kk,r); +bpart=r % spart[n+1][kk+1], r=r/spart[n+1][kk+1]; +bperm=r % f, r=r/f; +fprintf(stderr,"Boys use partition of rank %d and permutation of rank %d.\n", + bpart,bperm); +gpart=r % spart[m+1][kk+1], gperm=r/spart[m+1][kk+1]; +fprintf(stderr,"Girls use partition of rank %d and permutation of rank %d.\n", + gpart,gperm); + +@ @= +@; +@; +permb[0]=kk+1; +for (j=0;j<=kk;) { + for (i=1;i<=m;i++) if (permg[rgsg[i]]==j) printf(" g%d", + i); + j++; + for (i=1;i<=n;i++) if (permb[rgsb[i]]==j) printf(" b%d", + i); +} +printf("\n"); + +@ @= +for (i=kk,j=n; j>=0;j--) { + if (bpart>=(i+1)*spart[j][i+1]) bpart-=(i+1)*spart[j][i+1],rgsb[j]=i--; + else rgsb[j]=bpart/spart[j][i+1], bpart=bpart%spart[j][i+1]; +} +fprintf(stderr,"Boys rgs:"); +for (j=0;j<=n;j++) fprintf(stderr," %d", + rgsb[j]); +fprintf(stderr,".\n"); +for (j=1;j<=kk;j++) inv[kk+1-j]=bperm%j, bperm=bperm/j; +for (j=kk;j;j--) { + permb[j]=1+inv[j]; + for (i=j+1;i<=kk;i++) if (permb[i]>=permb[j]) permb[i]++; +} +fprintf(stderr,"Boys perm:"); +for (j=1;j<=kk;j++) fprintf(stderr," %d", + permb[j]); +fprintf(stderr,".\n"); + +@ @= +for (i=kk,j=m; j>=0;j--) { + if (gpart>=(i+1)*spart[j][i+1]) gpart-=(i+1)*spart[j][i+1],rgsg[j]=i--; + else rgsg[j]=gpart/spart[j][i+1], gpart=gpart%spart[j][i+1]; +} +fprintf(stderr,"Girls rgs:"); +for (j=0;j<=m;j++) fprintf(stderr," %d", + rgsg[j]); +fprintf(stderr,".\n"); +for (j=1;j<=kk;j++) inv[kk+1-j]=gperm%j, gperm=gperm/j; +for (j=kk;j;j--) { + permg[j]=1+inv[j]; + for (i=j+1;i<=kk;i++) if (permg[i]>=permg[j]) permg[i]++; +} +fprintf(stderr,"Girls perm:"); +for (j=1;j<=kk;j++) fprintf(stderr," %d", + permg[j]); +fprintf(stderr,".\n"); + + + +@*Index. diff --git a/programs-orig/unrank-parade2.w b/programs-orig/unrank-parade2.w new file mode 100644 index 0000000..87d4f53 --- /dev/null +++ b/programs-orig/unrank-parade2.w @@ -0,0 +1,221 @@ +@*Intro. This little program finds the parade of rank $r$ from among +the $B_{m,n}$ parades that can be made by $m$ girls and $n$ boys, +given $m$, $n$, and $r$, using the alternative ranking scheme +in section 5 of my unpublication ``Poly-Bernoulli Bijections.'' + +Apology: I hacked this {\it very\/} hastily. It is {\it not\/} robust: It +doesn't check for overflow, if the numbers exceed 63 bits. + +@d maxn 25 + +@c +#include +#include +int m,n; /* command-line parameters */ +long long r; /* command-line parameter */ +long long pB[maxn][maxn]; /* poly-Bernoulli numbers */ +int bico[maxn][maxn]; /* binomial coefficients */ +int gsg[maxn+1],gsb[maxn+1]; /* growth sequences for girls, boys */ +int ord; /* order of the global parade */ +int samp[maxn]; /* subset to be recursively collapsed */ +@; +main(int argc,char*argv[]) { + register int i,j,k,kk; + register long long f,s,t; + register double ff,ss,tt; + @; + @; + @; + unrank(m,n,r,stdout); +} + +@ @= +for (j=0;j= +if (argc!=4 || sscanf(argv[1],"%d", + &m)!=1 || sscanf(argv[2],"%d", + &n)!=1 || sscanf(argv[3],"%lld", + &r)!=1) { + fprintf(stderr,"Usage: %s m n r\n", + argv[0]); + exit(-1); +} +if (m>=maxn || n>=maxn) { + fprintf(stderr,"Sorry, m and n must be less than %d!\n", + maxn); + exit(-2); +} + +@ We compute |pB| numbers only as needed: |pB[m][n]| is +negative if $B_{m,n}$ hasn't yet been computed; otherwise +it's a ``cache memo'' of the true value. + +@= +for (j=0;j= +long long getpB(int mm,int n){ + register int m,k; + register long long s; + if (pB[mm][n]<0) { + m=mm-1; + s=getpB(m,n); + for (k=1;k<=n;k++) s+=bico[n][k]*getpB(m,n+1-k); + pB[mm][n]=pB[n][mm]=s; + } + return pB[mm][n]; +} + +@ And here is that recursive procedure itself. +It returns the desired parade in the global +arrays |gsg| and |gsb|, which will define ordered partitions +of order |ord| for |mm| girls and |n| boys. + +@= +void unrank(int mm,int n,long long r,FILE *outfile) { + register int i,j,m,k,kk,l,nn,p,pp,split,r0,max; + register long long t,rr=r; + if (mm==0) @@; + else { + k=0,m=mm-1,t=getpB(m,n); + if (rn) { + fprintf(stderr,"r is too big!\n"); + exit(-666); + } + r-=t; + t=bico[n][k]*getpB(m,n+1-k); + if (r%pB[m][n+1-k],stderr); + r=r/pB[m][n+1-k]; + @; + } + @; + } + @; +} + +@ @= +{ + ord=0; + for (i=1;i<=n;i++) gsb[i]=0; +} + +@ The heart of this program is the ``lifting'' process, which +inverts the mapping $\Pi\mapsto\Pi'$ described in my unpublication. + +We're given an ordered partition for $m$ girls into |ord| blocks, in |gsg|; +also an ordered partition for $n+1-k$ boys into |ord| blocks, in |gsb|; +also a set of $k>0$ boys, listed in |samp| in increasing order of age. +The basic idea is to extend the parade by putting all of |samp| in +place of its oldest boy, and to make that sample immediately follow +a newly inserted girl |mm=m+1|. + +Suppose the oldest boy in the sample is named Max. He is boy number +|samp[k-1]-(k-1)| before lifting; but he'll be number |samp[k-1]| +afterwards, because we'll renumber {\it all\/} boys to agree with |samp|. + +Assume that Max is in block |p| of the given parade. If he's alone in +that block, we simply place girl |mm| ahead of him. Otherwise, however, +we split him off from the other boys of block~|p| (some of which +might be older, some younger), and put girl~|mm| between him and his +former fellows. That increases |ord|, introduces a new block |p+1|, +and causes later block numbers to be stepped up. + +One further complication is that we use |p=0| to encode the +final block of boys, if a boy comes last. Therefore block `|p+1|' +has to be properly understood. + +@= +if (k==0) @@; +else { + nn=n+1-k,max=samp[k-1]-(k-1),p=gsb[max]; + @; + if (split) { /* at least one other boy is also in block |p| */ + ord++; + if (p==0) { /* actually those boys are in the largest block */ + for (j=1;j<=nn;j++) if (gsb[j]==0) gsb[j]=ord; + } + } + @; +} + +@ Here's the coolest section of this program (but it's tricky, so +I hope I've got it right). We can work in place because |j>=i| in this loop. + +@= +for (i=nn,j=n,l=k-2;i;i--,j--) { + while (l>=0 && j==samp[l]) l--,j--; /* leave holes for the sample */ + pp=gsb[i]; + if (split && p>0 && pp>p) gsb[j]=pp+1; + else gsb[j]=pp; +} +for (l=0;l=p) gsg[j]++; + gsg[mm]=p; + } +}@+else gsg[mm]=(p?p-1:ord); + +@ Max is alone if and only if he's the only guy in block |p|. + +@= +for (split=-1,j=1;j<=nn;j++) if (gsb[j]==p && ++split) break; + +@ @= +{ + fprintf(stderr,"extend with empty set\n"); + for (j=1;j<=n;j++) if (gsb[j]==0) break; + if (j<=n) { /* appending $g_{m+1}$ after a boy */ + ord++; + for (j=1;j<=n;j++) if (gsb[j]==0) gsb[j]=ord; + } + gsg[mm]=ord; +} + +@ @= +fprintf(outfile,"Parade %lld for %d and %d:", + rr,mm,n); +for (j=0;j<=ord;) { + for (i=1;i<=mm;i++) + if (gsg[i]==j) fprintf(outfile," g%d", + i); + j++; + for (i=1;i<=n;i++) + if ((j>ord && gsb[i]==0) || (j<=ord && gsb[i]==j)) + fprintf(outfile," b%d", + i); + } +fprintf(outfile,"\n"); + +@ Of course we use the recurrence +${n\choose k}={n-1\choose k}+{n-1\choose k-1}$ here. + +@= +nn=n,kk=k,r0=r; +while (kk) { + if (r); + +@*Index. diff --git a/programs-orig/viennot.w b/programs-orig/viennot.w index 6dbee20..38b4e7b 100644 --- a/programs-orig/viennot.w +++ b/programs-orig/viennot.w @@ -53,6 +53,11 @@ $p_j-1$, $p_j$, or $p_j+1$ (modulo~$2^k$). The walls also contain little struts, not shown in the diagram, which keep the bricks of each ring from tipping over.) +An auxiliary program, {\mc BACK-KEPLER-TOWERS}, generates all Kepler towers +by ``brute force,'' directly from this definition. One can use it to verify +experimentally that the number of Kepler towers with $n$ bricks is exactly +the Catalan number~$C_n$, if $n$ isn't too large. + @ And what is a nested string? A nested string (aka Dyck word) of order~$n$ is a sequence $d_0$, $d_1$, \dots,~$d_{2n-1}$ of $\pm1$s whose partial sums $y_k=d_0+\cdots+d_k$ are nonnegative, and whose overall sum diff --git a/programs-orig/xccdc-frb.ch b/programs-orig/xccdc-frb.ch new file mode 100644 index 0000000..bdf4794 --- /dev/null +++ b/programs-orig/xccdc-frb.ch @@ -0,0 +1,235 @@ +@x +@ After this program finds all solutions, it normally prints their total +@y +This version of {\mc XCCDC} uses a dynamic weighting scheme, namely +the ``{\mc FRB} heuristic,'' motivated by the paper +of Li, Yin, and Li in +@^Li, Hongbo@> +@^Yin, Minghao@> +@^Li, Zhanshan@> +{\sl Leibniz International Proceedings in Informatics\/ \bf210} +(2021), 9:1--9:10 +[the proceedings of the 27th International Conference on Principles and +Practice of Constraint Programming, CP~2021]. +When the option chosen for branching on some primary item~$i$ causes +another primary item~$i'$ to be wiped out, we say that a failure has +occurred with respect to~$i$. We branch on an item +that has a small number of options and a relatively high failure rate. +Details are discussed below. + +@ After this program finds all solutions, it normally prints their total +@z +@x +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ +@y +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ +@d show_final_stats 4096 /* |vbose| code to display item stats at the end */ +@z +@x +if (vbose&show_profile) @; +@y +if (vbose&show_profile) @; +if (vbose&show_final_stats) { + fprintf(stderr,"Final primary item stats:\n"); + print_item_stats(); +} +@z +@x +(The |match| field is present only in secondary items.) +@y +(The |match| field is present only in secondary items.) + +Finally, a primary item $x$ also has two special fields called +|assigns| and |fails|, used in the {\mc FRB} heuristic. +Their significance is described below. +@z +@x +@d match(x) set[(x)-6] /* a required color in compatibility tests */ +@d primextra 5 /* this many extra entries of |set| for each primary item */ +@d secondextra 6 /* and this many for each secondary item */ +@d maxextra 6 /* maximum of |primextra| and |secondextra| */ +@y +@d match(x) set[(x)-6] /* a required color in compatibility tests */ +@d assigns(x) set[(x)-6] /* number of assignments tried so far for |x|, plus 1 */ +@d fails(x) set[(x)-7] /* how many of them failed? */ +@d primextra 7 /* this many extra entries of |set| for each primary item */ +@d secondextra 6 /* and this many for each secondary item */ +@d maxextra 7 /* maximum of |primextra| and |secondextra| */ +@z +@x + if (c; + goto tryagain; + } + if (!empty_the_queue()) { + nmems+=mems-tmems; + @; + goto tryagain; + } + @; +@z +@x +@ The ``best item'' is considered to be an item that minimizes the +number of remaining choices. If there are several candidates, we +choose the first one that we encounter. + +Each primary item should have at least one valid choice, because +of domain consistency. + +@d infty 0x7fffffff + +@= +t=infty; +if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Stage "O"d,",stage); +for (k=0;t>1 && k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=t) { + if (s==0) fprintf(stderr,"I'm confused.\n"); /* |hide| missed this */ + if (s=maxl-show_choices_gap) { + if (t==infty) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d)\n",t); + } +} +if (t>maxdeg && t +@^Yin, Minghao@> +@^Li, Zhanshan@> + +@= +oo,assigns(best_itm)++; +if (assigns(best_itm)<=0) { + fprintf(stderr,"Too many assignments (2^{31})!\n"); + exit(-6); +} +bmems+=2; + +@ @= +oo,assigns(best_itm)++; +if (assigns(best_itm)<=0) { + fprintf(stderr,"Too many assignments (2^{31})!\n"); + exit(-66); +} +oo,fails(best_itm)++; +bmems+=4; + +@ @= +void print_item_stats(void) { + register int k; + for (k=0;k= +{ + register float score,tscore,w; + register int force; + score=finfty,t=infty; + if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); + for (k=0;t>1 && k=finfty) tscore=dangerous; + if (tscore=maxl-show_choices_gap) { + print_item_name(item[k],stderr);@+ + if (t==1) fprintf(stderr,"(1)"); + else fprintf(stderr,"("O"d,"O"d/"O"d)",s,fails(item[k]),assigns(item[k])); + } + } + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + if (t==infty) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr);@+ + if (t==1) fprintf(stderr,"(forced)\n"); + else fprintf(stderr,"("O"d), score "O".4f\n",t,score); + } + } + if (t>maxdeg && t +@^H\'emery, Fred@> +@^Lecoutre, Christophe@> +@^Sa{\"\i}s, Lakhdar@> + +@ After this program finds all solutions, it normally prints their total +@z +@x +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ +@y +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ +@d show_final_weights 4096 /* |vbose| code to display weights at the end */ +@d show_weight_bumps 8192 /* |vbose| code to show new weights */ +@z +@x +if (vbose&show_profile) @; +@y +if (vbose&show_profile) @; +if (vbose&show_final_weights) { + fprintf(stderr,"Final weights:\n"); + print_weights(); +} +@z +@x +(The |match| field is present only in secondary items.) +@y +(The |match| field is present only in secondary items.) + +Finally, the |set| array contains a |wt| field for each primary item; +|wt(x)| occupies the position of |match(x)| in a secondary item. +This weight, initially~1, is increased by 1 whenever we run into a +situation where |x| cannot be supported. +@z +@x +@d match(x) set[(x)-6] /* a required color in compatibility tests */ +@d primextra 5 /* this many extra entries of |set| for each primary item */ +@y +@d match(x) set[(x)-6] /* a required color in compatibility tests */ +@d wt(x) set[(x)-6] /* the current weight of item |x| */ +@d primextra 6 /* this many extra entries of |set| for each primary item */ +@z +@x + if (c; +@y + if (!(vbose&show_weight_bumps)) { + fprintf(stderr," can't cover"); + print_item_name(ii,stderr); + fprintf(stderr,"\n"); + } + } + tough_itm=ii; + @; +@z +@x + if (!include_option(cur_choice)) { + nmems+=mems-tmems; + goto tryagain; + } + if (!empty_the_queue()) { + nmems+=mems-tmems; + goto tryagain; + } +@y + if (!include_option(cur_choice)) { + nmems+=mems-tmems; + @; + goto tryagain; + } + if (!empty_the_queue()) { + nmems+=mems-tmems; + @; + goto tryagain; + } +@z +@x + if (!(o,purge_the_option(choice[level],active,"removing"))) { + pmems+=mems-tmems; + goto backup; + } + if (!empty_the_queue()) { + pmems+=mems-tmems; + goto backup; + } +@y + if (!(o,purge_the_option(choice[level],active,"removing"))) { + pmems+=mems-tmems; + @; + goto backup; + } + if (!empty_the_queue()) { + pmems+=mems-tmems; + @; + goto backup; + } +@z +@x +int saveptr; /* current size of |savestack| */ +@y +int saveptr; /* current size of |savestack| */ +int tough_itm; /* an item that led to difficulty */ +@z +@x +@ The ``best item'' is considered to be an item that minimizes the +number of remaining choices. If there are several candidates, we +choose the first one that we encounter. + +Each primary item should have at least one valid choice, because +of domain consistency. + +@d infty 0x7fffffff + +@= +t=infty; +if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Stage "O"d,",stage); +for (k=0;t>1 && k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=t) { + if (s==0) fprintf(stderr,"I'm confused.\n"); /* |hide| missed this */ + if (s=maxl-show_choices_gap) { + if (t==infty) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d)\n",t); + } +} +if (t>maxdeg && t= +bmems+=2,oo,wt(tough_itm)++; +if (wt(tough_itm)<=0) { + fprintf(stderr,"Weight overflow (2^31)!\n"); + exit(-6); +} +if (vbose&show_weight_bumps) { + print_item_name(tough_itm,stderr); + fprintf(stderr," wt "O"d\n",wt(tough_itm)); +} + +@ @= +void print_weights(void) { + register int k; + for (k=0;k= +{ + register float score,tscore,w; + register int force; + score=finfty,t=infty; + if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Level "O"d:",level); + for (k=0;t>1 && k=finfty) tscore=dangerous; + if (tscore=maxl-show_choices_gap) { + print_item_name(item[k],stderr);@+ + if (t==1) fprintf(stderr,"(1)"); + else fprintf(stderr,"("O"d,"O"d)",s,wt(item[k])); + } + } + if ((vbose&show_details) && + level=maxl-show_choices_gap) { + if (t==infty) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr);@+ + if (t==1) fprintf(stderr,"(forced)\n"); + else fprintf(stderr,"("O"d), score "O".4f\n",t,score); + } + } + if (t>maxdeg && t +#include +#include +#include +typedef unsigned int uint; /* a convenient abbreviation */ +typedef unsigned long long ullng; /* ditto */ +@; +@; +@; +int main (int argc, char *argv[]) { + register int c,cc,i,j,k,p,pp,q,r,s,t,cur_choice,best_itm; + @; + @; + @; + if (vbose&show_basics) + @; + if (vbose&show_tots) + @; + imems=mems, mems=0; + if (baditem) @@; + else @; +done:@+@; +} + +@ You can control the amount of output, as well as certain properties +of the algorithm, by specifying options on the command line: +\smallskip\item{$\bullet$} +`\.v$\langle\,$integer$\,\rangle$' enables or disables various kinds of verbose + output on |stderr|, given by binary codes such as |show_choices|; +\item{$\bullet$} +`\.m$\langle\,$integer$\,\rangle$' causes every $m$th solution +to be output (the default is \.{m0}, which merely counts them); +\item{$\bullet$} +`\.d$\langle\,$integer$\,\rangle$' sets |delta|, which causes periodic +state reports on |stderr| after the algorithm has performed approximately +|delta| mems since the previous report (default 10000000000); +\item{$\bullet$} +`\.c$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.C$\langle\,$positive integer$\,\rangle$' limits the levels on which +choices are shown in the periodic state reports; +\item{$\bullet$} +`\.l$\langle\,$nonnegative integer$\,\rangle$' gives a {\it lower\/} limit, +relative to the maximum level so far achieved, to the levels on which +choices are shown during verbose tracing; +\item{$\bullet$} +`\.t$\langle\,$positive integer$\,\rangle$' causes the program to +stop after this many solutions have been found; +\item{$\bullet$} +`\.T$\langle\,$integer$\,\rangle$' sets |timeout| (which causes abrupt +termination if |mems>timeout| at the beginning of a level); +\item{$\bullet$} +`\.S$\langle\,$filename$\,\rangle$' to output a ``shape file'' that encodes +the search tree; +\item{$\bullet$} +`\.x$\langle\,$positive integer$\,\rangle$' causes partial solutions of this +many stages to be written to files, not actually explored; +\item{$\bullet$} +`\.X$\langle\,$filename$\,\rangle$' to input and resume a partial solution. + +@d show_basics 1 /* |vbose| code for basic stats; this is the default */ +@d show_choices 2 /* |vbose| code for backtrack logging */ +@d show_details 4 /* |vbose| code for stats about choices */ +@d show_purges 8 /* |vbose| code to show inconsistent options deleted */ +@d show_supports 16 /* |vbose| code to show new supports */ +@d show_option_counts 32 /* |vbose| code to count active options */ +@d show_mstats 64 /* |vbose| code to show memory usage in key arrays */ +@d show_profile 128 /* |vbose| code to show the search tree profile */ +@d show_full_state 256 /* |vbose| code for complete state reports */ +@d show_tots 512 /* |vbose| code for reporting item totals at start */ +@d show_warnings 1024 /* |vbose| code for reporting options without primaries */ +@d show_max_deg 2048 /* |vbose| code for reporting maximum branching degree */ + +@ @= +int vbose=show_basics+show_warnings; /* level of verbosity */ +int spacing; /* solution $k$ is output if $k$ is a multiple of |spacing| */ +int show_choices_max=1000000; /* above this level, |show_choices| is ignored */ +int show_choices_gap=1000000; /* below level |maxl-show_choices_gap|, + |show_details| is ignored */ +int show_levels_max=1000000; /* above this level, state reports stop */ +int maxl,maxs; /* maximum level and stage actually reached */ +int xcutoff=-1,xcount; /* stage when partial solutions output, and their number */ +int maxsaveptr; /* maximum size of |savestack| */ +char buf[bufsize]; /* input buffer */ +ullng count; /* solutions found so far */ +ullng options; /* options seen so far */ +ullng imems,mems,bmems,nmems,pmems,tmems; /* mem counts */ +ullng updates; /* update counts */ +ullng bytes; /* memory used by main data structures */ +ullng nodes; /* total number of branch nodes initiated */ +ullng thresh=10000000000; /* report when |mems| exceeds this, if |delta!=0| */ +ullng delta=10000000000; /* report every |delta| or so mems */ +ullng maxcount=0xffffffffffffffff; /* stop after finding this many solutions */ +ullng timeout=0x1fffffffffffffff; /* give up after this many mems */ +FILE *shape_file; /* file for optional output of search tree shape */ +char *shape_name; /* its name */ +int maxdeg; /* the largest branching degree seen so far */ + +@ If an option appears more than once on the command line, the first +appearance takes precedence. + +@= +for (j=argc-1,k=0;j;j--) switch (argv[j][0]) { +case 'v': k|=(sscanf(argv[j]+1,""O"d",&vbose)-1);@+break; +case 'm': k|=(sscanf(argv[j]+1,""O"d",&spacing)-1);@+break; +case 'd': k|=(sscanf(argv[j]+1,""O"lld",&delta)-1),thresh=delta;@+break; +case 'c': k|=(sscanf(argv[j]+1,""O"d",&show_choices_max)-1);@+break; +case 'C': k|=(sscanf(argv[j]+1,""O"d",&show_levels_max)-1);@+break; +case 'l': k|=(sscanf(argv[j]+1,""O"d",&show_choices_gap)-1);@+break; +case 't': k|=(sscanf(argv[j]+1,""O"lld",&maxcount)-1);@+break; +case 'T': k|=(sscanf(argv[j]+1,""O"lld",&timeout)-1);@+break; +case 'S': shape_name=argv[j]+1, shape_file=fopen(shape_name,"w"); + if (!shape_file) + fprintf(stderr,"Sorry, I can't open file `"O"s' for writing!\n", + shape_name); + break; +case 'x': k|=(sscanf(argv[j]+1,""O"d",&xcutoff)-1);@+break; +case 'X': @; +default: k=1; /* unrecognized command-line option */ +} +if (k) { + fprintf(stderr, "Usage: "O"s [v] [m] [d] [c] [C] "@/ + "[l] [t] [T] [S] [x] [X] < foo.dlx\n", + argv[0]); + exit(-1); +} + +@ I don't report the memory used for |deg|, |stagelevel|, and |profile|, +because they are only for documentation, not part of the search process. + +@= +if (vbose&show_profile) @; +if (vbose&show_max_deg) + fprintf(stderr,"The maximum branching degree was "O"d.\n",maxdeg); +if (vbose&show_basics) { + fprintf(stderr,"Altogether "O"llu solution"O"s, "O"llu+"O"llu mems,", + count,count==1?"":"s",imems,mems); + bytes=(itemlength+setlength)*sizeof(int)+last_node*sizeof(node) + +(4*maxs+maxl)*sizeof(int)+maxsaveptr*sizeof(twoints) + +poolptr*sizeof(twoints); + fprintf(stderr," "O"llu updates, "O"llu bytes, "O"llu nodes,", + updates,bytes,nodes); + fprintf(stderr," acost "O"lld%%, bcost "O"lld%%, ccost "O"lld%%.\n", + mems? (200*nmems+mems)/(2*mems):0, + mems? (200*pmems+mems)/(2*mems):0, + mems? (200*bmems+mems)/(2*mems):0); +} +if (vbose&show_mstats) { + fprintf(stderr," itemlength="O"d, setlength="O"d, last_node="O"d;\n", + itemlength,setlength,last_node); + fprintf(stderr, + " maxsaveptr="O"d, poolptr="O"d, maxstage="O"d, maxlevel="O"d.\n", + maxsaveptr,poolptr,maxs,maxl); +} +if (sanity_checking) fprintf(stderr,"sanity_checking was on!\n"); +if (leak_checking) fprintf(stderr,"leak_checking was on!\n"); +if (shape_file) fclose(shape_file); +if (xcount) @; + +@ Here's a subroutine for use in debugging, but I hope it's never invoked. + +@= +void confusion(char *id) { /* an assertion has failed */ + fprintf(stderr,"trouble after "O"lld mems, "O"lld nodes: %s!\n", + mems,nodes,id); +} + +@*Data structures. +Sparse-set data structures were introduced by Preston Briggs +and Linda Torczon [{\sl ACM Letters on Programming Languages and Systems\/ +\bf2} (1993), 59--69], who realized that exercise 2.12 in +Aho, Hopcroft, and Ullman's classic text {\sl The Design and Analysis +of Computer Algorithms\/} (Addison--Wesley, 1974) was much more than +just a slick trick to avoid initializing an array. +(Indeed, {\sl TAOCP\/} exercise 2.2.6--24 calls it the ``sparse array trick.'') + +The basic idea is amazingly simple, when specialized to the situations +that we need to deal with: We can represent a subset~$S$ of the universe +$U=\{x_0,x_1,\ldots,x_{n-1}\}$ by maintaining two $n$-element arrays +$p$ and $q$, each of which is a permutation of~$\{0,1,\ldots,n-1\}$, +together with an integer $s$ in the range $0\le s\le n$. In fact, $p$ is +the {\it inverse\/} of~$q$; and $s$ is the number of elements of~$S$. +The current value of the set $S$ is then simply +$\{x_{p_0},\ldots,x_{p_{s-1}}\}$. (Notice that every $s$-element subset can be +represented in $s!\,(n-s)!$ ways.) + +It's easy to test if $x_k\in S$, because that's true if and only if $q_k= +typedef struct node_struct { + int itm; /* the item |x| corresponding to this node */ + int loc; /* where this node resides in |x|'s active set */ + int clr; /* color associated with item |x| in this option, if any */ + int xtra; /* used for special purposes (see below) */ +} node; + +@ @= +node nd[max_nodes]; /* the master list of nodes */ +int last_node; /* the first node in |nd| that's not yet used */ +int item[max_cols]; /* the master list of items */ +int second=max_cols; /* boundary between primary and secondary items */ +int last_itm; /* items seen so far during input, plus 1 */ +int set[max_nodes+6*max_cols]; /* sets of active options for active items */ +int itemlength; /* number of elements used in |item| */ +int setlength; /* number of elements used in |set| */ +int active; /* current number of active items */ +int oactive; /* value of active before swapping out current-choice items */ +int totopts; /* current number of active options */ +int baditem; /* an item with no options, plus 1 */ +int osecond; /* setting of |second| just after initial input */ + +@ We're going to store string data (an item's name) in the midst of +the integer array |set|. So we've got to do some type coercion using +low-level \CEE/-ness. + +@= +typedef struct { + int l,r; +} twoints; +typedef union { + unsigned char str[8]; /* eight one-byte characters */ + twoints lr; /* two four-byte integers */ +} stringbuf; +stringbuf namebuf; + +@ @= +void print_item_name(int k,FILE *stream) { + namebuf.lr.l=lname(k),namebuf.lr.r=rname(k); + fprintf(stream," "O".8s",namebuf.str); +} + +@ An option is identified not by name but by the names of the items it contains. +Here is a routine that prints an option, given a pointer to any of its +nodes. If |showid=1|, it also prints the value of |opt-1|, which should be the +location of the spacer just preceding |opt|. +Otherwise it optionally prints the position of the option in its item list. + +@= +void print_option(int opt,FILE *stream,int showpos,int showid) { + register int k,q,x; + x=nd[opt].itm; + if (opt>=last_node || x<=0) { + fprintf(stderr,"Illegal option "O"d!\n",opt); + return; + } + if (showid) fprintf(stream,""O"d `",opt-1); + for (q=opt;;) { + print_item_name(x,stream); + if (nd[q].clr) + fprintf(stream,":"O"c",nd[q].clr); + q++; + x=nd[q].itm; + if (x<0) q+=x,x=nd[q].itm; + if (q==opt) break; + } + k=nd[q].loc; + if (showid) fprintf(stream," '"); + if (showpos>0) fprintf(stream," ("O"d of "O"d)\n",k-x+1,size(x)); + else if (showpos==0) fprintf(stream,"\n"); +} +@# +void prow(int p) { + print_option(p,stderr,1,0); +} +@# +void propt(int opt) { /* |opt| should be the spacer just before an option */ + if (nd[opt].itm>=0) fprintf(stderr,""O"d isn't an option id!\n", + opt); + else print_option(opt+1,stderr,0,1); +} + +@ The |print_option| routine has a sort of inverse, which reads from |buf| +what purports to be the description of an option and verifies it. + +@= +int read_option(void) { + register int k,q,x,j,opt; + for (opt=0,k=1;o,buf[k]>='0' && buf[k]<='9';k++) opt=10*opt+buf[k]-'0'; + if ((o,buf[k]!=' ')||(o,buf[k+1]!='`')||(o,buf[k+2]!=' ')) return -1; + for (k+=3,q=opt+1;o,(x=nd[q].itm)>0;q++) { + oo,namebuf.lr.l=lname(x),namebuf.lr.r=rname(x); + for (j=0;j<8;j++) { + if (!namebuf.str[j]) break; + if (o,namebuf.str[j]!=buf[k+j]) return -1; + } + k+=j; /* we've verified the item name */ + if (o,nd[q].clr) { + if ((o,buf[k]!=':')||(o,(unsigned char)buf[k+1]!=nd[q].clr)) return -1; + k+=2; + } + if (o,buf[k++]!=' ') return -1; + } + if (buf[k]!='\'') return -1; + return opt+1; +} + +@ When I'm debugging, I might want to look at one of the current item lists. + +@= +void print_itm(int c) { + register int p; + if (c=setlength || + pos(c)<0 || pos(c)>=itemlength || item[pos(c)]!=c) { + fprintf(stderr,"Illegal item "O"d!\n",c); + return; + } + fprintf(stderr,"Item"); + print_item_name(c,stderr); + if (c=active) + fprintf(stderr," (secondary "O"d, purified), length "O"d:\n", + pos(c)+1,size(c)); + else fprintf(stderr," (secondary "O"d), length "O"d:\n", + pos(c)+1,size(c)); + for (p=c;p= +void sanity(void) { + register int k,x,i,l,r,q,qq; + for (k=0;kr) fprintf(stderr,"itm>loc in node "O"d!\n",i); + else { + if (set[r]!=i) { + fprintf(stderr,"Bad loc field for option "O"d of item",r-l+1); + print_item_name(l,stderr); + fprintf(stderr," in node "O"d!\n",i); + } + if (pos(l)= +twoints pool[poolsize]; /* where the linked lists live */ +int poolptr=1; /* the first unused cell of |pool| */ +int qfront,qrear; /* the front and rear of $Q$ */ +unsigned int curstamp; /* the current ``time stamp'' */ +unsigned int biggeststamp; /* the largest time stamp used so far */ +unsigned int compatstamp; /* another stamp, used for compatibility tests */ + +@ A few basic primitive routines undergird all of our list processing. + +(When counting mems here, we consider |avail| and |poolptr| to +be in global registers. The compiler could inline this code, +so I don't count any overhead for these subroutine calls.) + +@d avail pool[0].r /* head of the stack of available cells */ + +@= +int getavail(void) { /* return a pointer to an unused cell */ + register int p; + p=avail; + if (p) { + o,avail=link(p); + return p; /* |info(p)| might be anything */ + } + if (poolptr++>=poolsize) { + fprintf(stderr,"Pool overflow (poolsize="O"d)!\n",poolsize); + exit(-7); + } + return poolptr-1; +} +@# +void putavail(int p) { /* free the single cell |p| */ + o,link(p)=avail; + avail=p; +} + +@ Entries of a trigger list are pairs $(o,p)$, with the cell +that mentions option $o$ linking to the cell that mentions primary item~$p$. + +@= +void print_trigger(int opt) { + register int p,q; + fprintf(stderr,"trigger stack for option "); + print_option(opt+1,stderr,0,1); + for (p=trigger(opt);p;p=link(q)) { + q=link(p); + fprintf(stderr," "); + if (info(p)>=0) { + print_option(info(p)+1,stderr,-1,1); + fprintf(stderr,","); + p=link(p); + print_item_name(info(p),stderr); + }@+else @; + fprintf(stderr,"\n"); + } +} + +@ @= +void print_triggers(int all) { + register int opt,jj,optp; + for (opt=0;opt=jj+size(jj)) continue; /* is |opt| in |jj|'s set? */ + } + print_trigger(opt); + } +} + +@ Entries of a fixit list are pairs $(o,p)$, with the cell +that mentions option $o$ linking to the cell that mentions primary item~$p$. + +@= +void print_fixit(int opt) { + register int p,q; + fprintf(stderr,"fixit stack for option "); + print_option(opt+1,stderr,-1,1); + fprintf(stderr,":"); + for (p=fixit(opt);p;p=link(q)) { + q=link(p); + fprintf(stderr," "); + print_item_name(info(q),stderr); + fprintf(stderr,"["O"d]",info(p)); + } + fprintf(stderr,"\n"); +} + +@ @= +void print_queue(void) { + register int p; + for (p=qfront;p!=qrear;p=link(p)) + print_option(info(p)+1,stderr,0,1); +} + +@ Linked lists are wonderful; but a single weak link can cause a +catastrophic error. Therefore, when debugging, I want to be extra sure that this +program doesn't make any silly errors when it uses pool pointers. + +Furthermore, since I'm doing my own garbage collection, I want to avoid +any ``memory leaks'' that would occur when I've forgotten to recycle +a no-longer-used entry of the |pool|. + +The |list_check| routine laboriously goes through everything and makes +sure that every cell less than |poolptr| currently has one and only one use. + +Warning: Do not call |list_check| at a busy time during which lists +are being manipulated. Wait for a quiet time when all of the lists +are supposedly stable and well-formed. + +@d leak_checking 0 /* set this nonzero if you suspect linked-list bugs */ +@d signbit 0x8000000 +@d vet_and_set(l) {@+if ((l)<=0 || (l)>=poolptr) { + fprintf(stderr,"Bad link "O"d!\n",l); + return; + } + if (link(l)&signbit) { + fprintf(stderr,"Double link "O"d, "O"lld!\n",l,mems); + return; + } + link(l)^=signbit; + } + +@= +void list_check(int count) { + register int p,t,opt; + for (t=0,p=avail;p;t++,p=signbit^link(p)) + vet_and_set(p); + if (count) fprintf(stderr,"avail size "O"d\n",t); + for (opt=0;opt= +if (++compatstamp==badstamp) { + for (ii=0;ii= +@; +for (nn=opt+1;o,(ii=nd[nn].itm)>0;nn++) { + o,mark(ii)=compatstamp; + if (ii>=second) { + if (o,nd[nn].clr) o,match(ii)=nd[nn].clr; + else o,match(ii)=-1; /* this won't match any color */ + } +} + +@ At the beginning of this section, |nd[optp].itm| is an item |ii| in the +middle of some option~$O'$. If $O'$ is compatible with |opt|, we want to +reset |optp| so that |nd[optp]| is the spacer preceding~$O'$. +We use the fact that |ii| isn't present in~|opt|. + +@= +for (qq=optp,nn=qq+1;nn!=qq;nn++) { + if (o,(jj=nd[nn].itm)<=0) optp=nn+jj-1,nn=optp; /* |nn| is a spacer */ + else if (o,mark(jj)==compatstamp) { /* watch out, |jj| is in |opt| */ + if (jj= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Input line way too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + last_itm=1; + break; +} +if (!last_itm) panic("No items"); +for (;o,buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j]));j++) { + if (buf[p+j]==':' || buf[p+j]=='|') + panic("Illegal character in item name"); + o,namebuf.str[j]=buf[p+j]; + } + if (j==8 && !isspace(buf[p+j])) panic("Item name too long"); + oo,lname(last_itm<<2)=namebuf.lr.l,rname(last_itm<<2)=namebuf.lr.r; + @; + last_itm++; + if (last_itm>max_cols) panic("Too many items"); + for (p+=j+1;o,isspace(buf[p]);p++) ; + if (buf[p]=='|') { + if (second!=max_cols) panic("Item name line contains | twice"); + second=last_itm; + for (p++;o,isspace(buf[p]);p++) ; + } +} +if (second==last_itm) second=max_cols; /* no secondaries actually named */ + +@ @= +for (k=last_itm-1;k;k--) { + if (o,lname(k<<2)!=namebuf.lr.l) continue; + if (rname(k<<2)==namebuf.lr.r) break; +} +if (k) panic("Duplicate item name"); + +@ @= +while (1) { + if (!fgets(buf,bufsize,stdin)) break; + if (o,buf[p=strlen(buf)-1]!='\n') panic("Option line too long"); + for (p=0;o,isspace(buf[p]);p++) ; + if (buf[p]=='|' || !buf[p]) continue; /* bypass comment or blank line */ + i=last_node; /* remember the spacer at the left of this option */ + for (pp=0;buf[p];) { + o,namebuf.lr.l=namebuf.lr.r=0; + for (j=0;j<8 && (o,!isspace(buf[p+j])) && buf[p+j]!=':';j++) + o,namebuf.str[j]=buf[p+j]; + if (!j) panic("Empty item name"); + if (j==8 && !isspace(buf[p+j]) && buf[p+j]!=':') + panic("Item name too long"); + @; + if (buf[p+j]==':') { + if (k>=second) { + if ((o,isspace(buf[p+j+1])) || (o,!isspace(buf[p+j+2]))) + panic("Color must be a single character"); + o,nd[last_node+(pp?0:1)].clr=(unsigned char)buf[p+j+1]; + p+=2; + }@+else panic("Primary item must be uncolored"); + } + for (p+=j+1;o,isspace(buf[p]);p++) ; + } + if (!pp) { + if (vbose&show_warnings) + fprintf(stderr,"Option ignored (no primary items): "O"s",buf); + while (last_node>i) { + @; + last_node--; + } + }@+else { + o,nd[i].loc=last_node-i; /* complete the previous spacer */ + last_node++; /* create the next spacer */ + if (last_node==max_nodes) panic("Too many nodes"); + options++; + o,nd[last_node].itm=i+1-last_node; + } +} +@; +@; +@; + +@ We temporarily use |pos| to recognize duplicate items in an option. + +This program shifts the items of an option, if necessary, so that the +very first item is always primary. In other words, secondary items +that precede the first primary item are actually stored in +|nd[last_node+1]|. + +@= +for (k=(last_itm-1)<<2;k;k-=4) { + if (o,lname(k)!=namebuf.lr.l) continue; + if (rname(k)==namebuf.lr.r) break; +} +if (!k) panic("Unknown item name"); +if (o,pos(k)>i) panic("Duplicate item name in this option"); +last_node++; +if (last_node+1>=max_nodes) panic("Too many nodes"); +o,t=size(k); /* how many previous options have used this item? */ +if (!pp) { /* no primary items seen yet */ + if ((k>>2)>2,nd[i+1].loc=t,nd[i+1].clr=0; + else oo,nd[last_node+1].itm=k>>2,nd[last_node+1].loc=t,nd[last_node+1].clr=0; +}@+else oo,nd[last_node].itm=k>>2,nd[last_node].loc=t,nd[last_node].clr=0; +o,size(k)=t+1, pos(k)=last_node; + +@ @= +o,k=nd[last_node+1].itm<<2; +oo,size(k)--,pos(k)=i-1; + +@ @= +active=itemlength=last_itm-1; +for (k=0,j=primextra;k= +for (;k;k--) { + o,j=item[k-1]; + if (k==second) second=j; /* |second| is now an index into |set| */ + oo,size(j)=size(k<<2); + if (size(j)==0 && k<=osecond) baditem=k; + o,pos(j)=k-1; + oo,rname(j)=rname(k<<2),lname(j)=lname(k<<2); + o,mark(j)=0; +} + +@ @= +for (k=1;k= +{ + if (vbose&show_choices) { + fprintf(stderr,"Item"); + print_item_name(item[baditem-1],stderr); + fprintf(stderr," has no options!\n"); + } +} + +@ The ``number of entries'' includes spacers (because {\mc DLX2} +includes spacers in its reports). If you want to know the +sum of the option lengths, just subtract the number of options. + +@= +fprintf(stderr, + "("O"lld options, "O"d+"O"d items, "O"d entries successfully read)\n", + options,osecond,itemlength-osecond,last_node); + +@ The item lengths after input are shown (on request). +But there's little use trying to show them after the process is done, +since they are restored somewhat blindly. +(Failures of the linked-list implementation in {\mc DLX2} could sometimes be +detected by showing the final lengths; but that reasoning no longer applies.) + +@= +{ + fprintf(stderr,"Item totals:"); + for (k=0;k= +int opt_out(int opt,int act,char*typ) { + register int ii,jj,nn,nnp,p,q,qq,pp,ss,t,optp,cutoff,tmin=infinite_age; + subroutine_overhead; + if (vbose&show_purges) { + fprintf(stderr," "O"d "O"s option ",cur_age,typ); + print_option(opt+1,stderr,0,1); + } + @; + o,age(opt)=cur_age; + for (o,p=trigger(opt),pp=0; p;p=pp) { + o,optp=info(p),q=link(p); + o,ii=info(q),pp=link(q); + if (optp<0) @; + @; + @; + ooo,info(p)=opt,link(q)=fixit(optp); /* change trigger to fixit */ + if (!fixit(optp)) { /* we should enqueue |optp| */ + o,link(qrear)=getavail(),info(qrear)=optp,qrear=link(qrear); + o,age(optp)=infinite_age; + } + o,fixit(optp)=p; + continue; +inactive:@+if (t<0) @; + if (o,trig_head[t]==0) o,trig_tail[t]=q; + oo,link(q)=trig_head[t],trig_head[t]=p; /* move trigger to temp list |t| */ + if (t; + totopts--; + return 1; +} + +@ @= +int purge_the_option(register int opt,int act,char*typ) { /* |opt| isn't at the left spacer */ + for (opt--;o,nd[opt].itm>0;opt--) ; + return opt_out(opt,act,typ); +} + +@ After a secondary item has been purified, we mustn't mess with its set. +Secondary items that lie between |active| and the parameter |act| +are in the process of being purified. + +@= +for (nn=opt+1;o,(ii=nd[nn].itm)>0;nn++) { + p=nd[nn].loc; + if (p>=second && (o,pos(ii)>=act)) continue; /* |ii| already purified */ + o,ss=size(ii)-1; + if (ss==0 && p=maxl-show_choices_gap) { + fprintf(stderr," can't cover"); + print_item_name(ii,stderr); + fprintf(stderr,"\n"); + } + @; + } + o,nnp=set[ii+ss]; + o,size(ii)=ss; + oo,set[ii+ss]=nn,set[p]=nnp; + oo,nd[nn].loc=ii+ss,nd[nnp].loc=p; + updates++; +} + +@ We can't complete the current options to a viable set that's domain +consistent. So all of the fixit lists remaining in the queue must go back +into the trigger lists that triggered them. + +@= +while (qfront!=qrear) { + o,p=qfront,opt=info(p),qfront=link(p),putavail(p); + @; +} +return 0; + +@ @= +{ + for (o,p=fixit(opt); p; p=pp) { + oo,optp=info(p), q=link(p), info(p)=opt; + o,pp=link(q); /* |info(q)| is the same for triggers and fixits */ + oo,link(q)=trigger(optp), trigger(optp)=p; + } + o,fixit(opt)=0; +} + +@ An option with negative age will never be used, so we needn't trigger it. + +(I realized later that, in fact, an inactive option with age~0 will also +remain inactive. So I could also have discarded a few more entries, +and used $-c$ instead of $-c-1$ in hints. I've decided not to +make this optimization, for fear of breaking something.) + +@= +{ + putavail(p),putavail(q); + continue; +} + +@ The queue contains options where we've left holes in the support matrix. +The fixit lists of those options tell us where those holes are. + +@= +int empty_the_queue(void) { + register int p,q,pp,qq,s,ss,ii,jj,nn,opt,optp; + subroutine_overhead; + while (qfront!=qrear) { + o,p=qfront,opt=info(p),qfront=link(p),putavail(p); + if (fixit(opt)==0) confusion("queue"); + if (leak_checking) list_check(0); + @; + @; + for (o,p=fixit(opt); p; p=pp) { + o,q=link(p); /* ignore |info(p)|, which is irrelevant for now */ + o,ii=info(q),pp=link(q); /* |ii| is a primary item, not in |opt| */ + for (o,s=ii,ss=s+size(ii);s; + } + if (s==ss) { /* |opt| is inconsistent */ + if (vbose&show_supports) { + print_option(opt+1,stderr,-1,1); + fprintf(stderr,","); + print_item_name(ii,stderr); + fprintf(stderr," not supported\n"); + } + fixit(opt)=p; + @; + if (!opt_out(opt,active,"purging")) return 0; + break; /* move to another |opt| */ + }@+else @; + } + o,fixit(opt)=0; + } + return 1; +} + +@ Here's how we get the ball rolling by making every domain consistent +in the first place. + +At the beginning, all |mark| fields are zero. + +@= +int establish_dc(void) { + register int k,ii,jj,nn,opt,optp,p,q,qq,s,ss; + cur_age=-1; + qfront=qrear=getavail(); + for (opt=0;opt; + for (k=0;k; + } + if (s==ss) { /* |opt| is inconsistent */ + if (vbose&show_supports) { + print_option(opt+1,stderr,-1,1); + fprintf(stderr,","); + print_item_name(ii,stderr); + fprintf(stderr," not supported\n"); + } + if (!opt_out(opt,active,"purging")) return 0; + break; /* move to the next |opt| */ + }@+else { + p=getavail(),q=getavail(); + o,link(p)=q; + o,info(q)=ii; + @; + } + } + } + } + return empty_the_queue(); +} + +@ @= +{ + totopts=options; + if (!establish_dc()) { + if (vbose&show_choices) fprintf(stderr,"Inconsistent options!\n"); + goto done; + } + @; + if (vbose&show_choices) + fprintf(stderr,"Initial consistency after "O"lld mems.\n",mems); + @; +} + +@ The purged options that appear in trigger lists are useless baggage. + +@= +{ + register int opt,optp,p,q,pp,qq; + for (opt=0;opt=0) { + for (o,p=trigger(opt),qq=-1;p;p=pp) { + oo,optp=info(p),q=link(p),pp=link(q); + if (o,age(optp)<0) { + putavail(p),putavail(q); + if (qq<0) o,trigger(opt)=pp; + else o,link(qq)=pp; + }@+else qq=q; + } + } +} + +@ @= +{ + if (vbose&show_supports) { + print_option(opt+1,stderr,-1,1); + fprintf(stderr,","); + print_item_name(ii,stderr); + fprintf(stderr," supported by "); + print_option(optp+1,stderr,0,1); + } + o,info(p)=opt; + oo,link(q)=trigger(optp); + o,trigger(optp)=p; +} + +@*A view from the top. +Our strategy for generating all exact covers will be to repeatedly +choose an item that appears to be hardest to cover, namely an item whose set +is currently smallest, among all items that still need to be covered. +And we explore all possibilities via depth-first search, in the +following way: First we try using the first option in that item's set; +then we explore the consequences of {\it forbidding\/} that item. + +The neat part of this algorithm is the way the sets are maintained. +Depth-first search means last-in-first-out maintenance of data structures; +and the sparse-set representations make it particularly easy to undo +what we've done at less-deep levels. + +The basic operation is ``covering'' each item of a chosen option. +Covering means to make an item inactive. If it is primary, we remove it +from the set of items needing to be covered, and we block all other +options that contain it. If the item is secondary and still active +(not yet purified), we block all options in which it has the wrong color. + +The branching discipline that we follow is quite different from what we +did in {\mc DLX2} or {\mc SSXCC}, however, because we're now maintaining +domain consistency throughout the search. The old way was to choose a +``best item''~$p$, having say~$d$ options, and then to try option 1 of the +$d$~possibilities for~$p$, then option 2 of those~$d$, \dots, +option $d$ of those~$d$, before backtracking to the previous level. + +The new way, given consistent domains, starts out the same as before. +We choose a best item~$p_1$, having $d_1$ options, and we try its +first option. But after returning from that branch, we remove that +option and restore domain consistency; then we choose a new best item~$p_2$, +having $d_2$ options, and try the first of those. Eventually, after +trying and removing the first remaining options of $p_1$ through~$p_k$, +we'll reach a point where we can't make the remaining domains both +consistent and nonempty. That's when we back up. + +In this scenario, all of the subproblems for $p_1$, \dots, $p_k$ are +trying to extend the same partial solution with $s$ choices to a +partial solution that has $s+1$ choices. We call this ``stage $s$'' +of the search. Stage $s$ actually involves $k$ different nodes of +the (binary) search tree, each of which is on its own ``level.'' +(The level is the distance from the root; the stage is the number +of options that have been chosen in the current partial solution.) + +We might think of the search as a tree that makes a $k$-way branch +at stage~$s$, instead of as a tree that makes binary branches at each level. +Such an interpretation is equivalent to the ``natural correspondence'' between +ordinary trees and binary trees, discussed in {\sl TAOCP\/} Section 2.3.2. + +@ As search proceeds, the current subproblem gets easier and easier +as the number of active items and options gets smaller and smaller. +Let $I_s$ be the set of all items that are active when $s$ options +$c_1$, \dots,~$c_s$ +have been chosen to be in the partial solution-so-far. Thus $I_0$ is +the set of all items initially given; and $I_s$ for $s>0$ is obtained +from $I_{s-1}$ by removing the primary items and the previously +unpurified secondary items of~$c_s$. We denote the primary items of~$I_s$ +by $P_s$; these are the primary items not in $c_1$, \dots,~$c_s$. + +Let $O_{-1}$ be the set of all options actually given. Just before +entering stage~0, we reduce $O_{-1}$ to +\def\init{^{\rm init}}% +$O_0\init$, the largest subset of $O_{-1}$ that is domain consistent, +by purging options that have no support. In general, stage~$s$ +begins with a domain-consistent set of options $O_s\init$, +which is the largest such set that's compatible with $c_1$, \dots,~$c_s$. +Later on in stage~$s$ we usually work with a smaller set of active +options $O_s$, which is the largest domain-consistent set that's +contained in $O_s\init$ after we've removed the options whose +consequences as potential choices were previously examined in this stage. + +If every item in $P_s$ still belongs to at least one option of~$O_s$, +we're ready to make a new~$c_{s+1}$ from among those remaining +options. We get $O_{s+1}\init$ from $O_s$ by choosing $c_{s+1}$ and +blocking every option incompatible with it, +and then by purging options that aren't domain-consistent. + +Thus when we're in stage~$s$, there's a sequence of sets of options +$$O_{-1}\supseteq O_0\init \supseteq O_0 + \supset O_1\init \supseteq O_1\supset\cdots\supset + O_s\init \supseteq O_s,$$ +all of which are domain consistent except possibly $O_{-1}$. +Notice that +$$\hbox{if $o\in O_s\init$ and $p\in o$ then $p\in P_s$.}$$ + +And there's good news: +The support array $S[o,p]$ follows the nested structure of our search in +a useful way. Recall that $S[o,p]=\#$ if $p\in o$; otherwise +$S[o,p]=o'$, where $p\in o'$ and $o'$ is compatible with~$o$. + +This array is defined for all options $o\in O_0\init$, and for +all primary items $p\in P_0$. However, when we enter stage~$s$, +we're interested only in the much smaller subarray that contains +supports when $o\in O_s\init$ and $p\in P_s$. And when we're transitioning +from stage~$s$ to stage~$s+1$, we care only about a +still-smaller subarray, for $o\in O_s$ and $p\in P_s$. In particular, +domain consistency implies that we have +$$\eqalign{ +&\hbox{if $o\in O_s\init$ and $p\notin o$ and $p\in P_s$ then } + S[o,p]\in O_s\init;\cr +&\hbox{if $o\in O_s$ and $p\notin o$ and $p\in P_s$ then } + S[o,p]\in O_s.\cr}$$ + +@ Eventually a choice will fail, of course. Backtracking becomes +necessary in two distinct ways: \ (1)~If we've settled on a new~$c_s$ +among the options of $O_s$, but we're unable to reduce the remaining +compatible options to a domain-consistent $O_{s+1}\init$ without +emptying some domain, we ``backtrack in stage~$s$'' and reject that choice. +(Thus, we stay in stage~$s$ but move to a new level; the active items +remain the same.) \ (2)~If we've finished exploring a choice from $O_s$ +and are unable to reduce the other options to a smaller +domain-consistent~$O_s$, we ``backtrack to stage~$s-1$'' and reject~$c_{s-1}$. +(Thus, we resume where we left off at the previous +stage's deepest level; the active items revert back from $P_s$ +to the larger set~$P_{s-1}$.) + +I wish I could say that it was easy for me to discover the programming +logic just described. I~guess it was my baptism into what researchers +have called ``fine-grained'' versus ``coarse-grained'' algorithms. + +Notice that when we backtrack, we need not change the $S$ array in any way. +A support is always a support. Thus there's no point in trying to undo +any of the changes we've made to the current support structure. + +@*The triggering. +Suppose there are 1000 options and 100 items. Then the $S$ array has +100,000 entries, most of which are supports (that is, not~\#). +Every support is an entry in a trigger list; hence the trigger lists +are necessarily long. The task of maintaining domain consistency +might therefore seem hopelessly inefficient. + +On the other hand, after we've made some choices, there may be only 100 +options left, and perhaps 30 items not yet covered. Then at most 3000 +supports are relevant, and most of the information in trigger lists +is of no interest to us. An efficient scheme might therefore still +be possible, if we can figure out a way to avoid looking at useless +triggers. + +Ideally we'd like options from $O_s$ to appear at the top of each trigger +stack, with options from $O_s\init$ just below them, and with +$O_{s-1}$, $O_{s-1}\init$, \dots, $O_0$, $O_0\init$ furthest down. +The pairs $(o,p)$ of interest would then appear only near the top. + +Unfortunately such an arrangement cannot be guaranteed. Indeed, that's +obvious: The trigger-list entries occur in essentially arbitrary order +when we first form $O_0\init$. If they happen to be supports that +work for every subsequent stage, no changes to the trigger lists will +be needed, and we won't even want to look at those lists. + +We can, however, come sort of close to an ideal arrangement, by +exploiting the fact that every option not in the current $O_s$ has +been deactivated at least once. We look at \\{trigger}$(o)$ only after +$o$~has become inactive; and at that time we can reorder its entries. + +Therefore this program inserts markers into the trigger lists, saying that +``all further entries of this list are young'' (meaning deactivated early, +hence uninteresting until we've backtracked to an early stage). +Every such marker is accompanied by a time stamp, so that +we can recognize later when its message is no longer~true. + +@ When deactivating an option from $O_s\init$ that won't be in~$O_s$, +the ``current age'' |cur_age| is $2s$. And when deactivating an +option from $O_s$ that won't be in~$O_{s+1}\init$ it is $2s+1$. + +Thus an inactive option is in $O_s\init$ if and only if its age is $\ge2s$; +and it's in $O_s$ if and only if its age is $\ge2s+1$. + +Incidentally, I've tried to avoid making bad puns based on |cur_age| +versus courage, or \\{age} versus~\\{stage}. + +@ A negative entry |optp=-c| in a trigger list is a hint that all future +entries will have age less than~$c$. The search tree may have changed +since this hint was put into the list; so we must look at the relevant +stage stamp, to ensure that the hint is still valid. + +Suppose $o$ has age $2s$. Then $o$ is in $O_s\init$ but not in $O_s$. As +computation proceeds, without backtracking to stage~$s-1$, the set +$O_s$ might get smaller and smaller, but $o$ will still not be in~$O_s$. +Therefore a trigger hint saying that $o$ is inactive will be valid +until |stagestamp[s]| changes. (More precisely: If we backtrack to +stage~$s-1$, |stagestamp[s]| won't change until we progress again +to stage~$s$; before that time, we won't be looking at the hint.) + +Suppose $o$ has age $2s+1$. Then $o$ is in $O_s$ but not in $O_{s+1}\init$. As +computation proceeds, without backtracking in or to stage~$s$, the set +$O_{s+1}\init$ won't change. +Therefore a trigger hint saying that $o$ is inactive will be valid +until |stagestamp[s+1]| changes. + +That's why the following code says `|(cutoff+1)>>1|' when selecting the +relevant stage stamp. + +@= +{ + cutoff=-optp-1; + if (cutoff>1])) { + pp=p; + break; + } + putavail(p),putavail(q); /* discard an obsolete hint */ + continue; /* and ignore it */ +} + +@ If |optp| is inactive, it has been purged and its recorded +age is |cur_age| or less. Thus we can conclude that |optp| is active +whenever |age(optp)>cur_age|. + +In general, of course, that age test won't be conclusive and a slightly +more expensive test needs to be made by looking further into the data +structures. Option |optp| is active if and only if +it appears in the current set of its first item. +(This is where we use the fact that the first item of |optp| is primary.) + +@= +o,t=age(optp); +if (t<=cur_age) { + o,jj=nd[optp+1].itm; /* |jj| is |optp|'s first item */ + if (o,nd[optp+1].loc>=jj+size(jj)) goto inactive; +} /* branch if |optp| was removed from |jj|'s set */ + +@ When the trigger list for |opt| refers to an item |ii|, that item +is in |opt|. Suppose |ii| is currently inactive; then we wouldn't be purging +|opt| unless |ii| has just become inactive (and we're calling |opt_out| +from within |include_option|). + +@= +if (o,pos(ii)>=active) { + if (pos(ii)>=act) confusion("active"); + t=cur_age; + goto inactive; +} + +@ When we get here, |pp| is either zero or the cell where we found |cutoff|. +In the latter case, |pp=p| and |link(p)=q|; thus the cutoff hint +is in |p| and~|q|. + +All of the unused trigger entries have been redirected to the |trig_head| +lists, sorted by their age. + +@= +if (pp==0) cutoff=-1; +if (tmin<=cutoff) { + if (tmin>1],link(q)=trig_head[t]; + o,trig_head[t]=0; + pp=p; +} +if (trig_head[cur_age]) { + oo,link(trig_tail[cur_age])=pp; + /* give no hint for inactive options of the current age */ + o,pp=trig_head[cur_age],trig_head[cur_age]=0; +} +o,trigger(opt)=pp; + +@ @= +{ + fprintf(stderr,"cutoff for age "O"d",-info(p)-1); + if (info(q)!=stagestamp[(-info(p))>>1]) fprintf(stderr," (obsolete)"); +} + +@ At this point we want |curstamp| to have a value that's larger +than anything found in a trigger list~hint. Moreover, the values +of |stagestamp[0]|, \dots, |stagestamp[stage-1]| should all be +distinct and less than |curstamp|, because they might be used +in future hints. + +We may not be able to satisfy those conditions when |badstamp| is a +small positive constant! But we will +have checked out the following code at least once before failing. + +@= +if (++biggeststamp==badstamp) { + if (badstamp>0 && stage>=badstamp) { + fprintf(stderr,"Timestamp overflow (badstamp="O"d)!\n",badstamp); + exit(-11); + } + @; + for (k=0;k= +for (k=0;k= +if (o,age(opt)!=infinite_age) { + @; + continue; +} + +@*The dancing. +@= +level=stage=-1; +newstage:@+@; +newlevel: nodes++; + @; + if (vbose&show_profile) profile[stage]++; + if (sanity_checking) sanity(); + if (leak_checking) list_check(0); + @; + if (stage; + tmems=mems; + if (vbose&show_option_counts) + fprintf(stderr,"Level "O"d, stage "O"d, "O"d options, "O"d items\n", + level,stage,totopts,active); + @; + bmems+=mems-tmems; + if (stage==xcutoff) @; + if (t==infty) @; + oo,choice[level]=cur_choice=set[best_itm]; +got_choice:@+o,deg[level]=t; + if (t==1) o,saved[stage]=saveptr; + else @; + cur_age=stage+stage+1; + tmems=mems; + if (!include_option(cur_choice)) { + nmems+=mems-tmems; + goto tryagain; + } + if (!empty_the_queue()) { + nmems+=mems-tmems; + goto tryagain; + } + goto newstage; +tryagain:@+if (t==1) goto backup; + if (vbose&show_choices) fprintf(stderr,"Backtracking in stage "O"d\n",stage); + goto purgeit; +backup:@+if (--stage; +new_age: cur_age=stage+stage; + tmems=mems; + if (!(o,purge_the_option(choice[level],active,"removing"))) { + pmems+=mems-tmems; + goto backup; + } + if (!empty_the_queue()) { + pmems+=mems-tmems; + goto backup; + } + goto newlevel; + +@ We save the sizes of active items on |savestack|, whose entries +have two fields |l| and |r|, for an item and its size. This stack +makes it easy to undo all deletions, by simply restoring the former sizes. + +@= +int stage; /* number of choices in current partial solution */ +int level; /* current depth in the search tree (which is binary) */ +int cur_age; /* current |stage| or |stage+1/2| (times 2) */ +int choice[max_level]; /* the option and item chosen on each level */ +int deg[max_level]; /* the number of options that item had at the time */ +int saved[max_stage]; /* size of |savestack| at each stage */ +int levelstage[max_stage]; /* the most recent level at each stage */ +int stagelevel[max_level]; /* the stage that corresponds to each level */ +int levelopts[max_level]; /* options remaining at each level */ +int stagestamp[max_stage]; /* timestamp that's current at each stage */ +ullng profile[max_stage]={1}; /* number of search tree nodes on each stage */ +twoints savestack[savesize]; +int saveptr; /* current size of |savestack| */ +int trig_head[infinite_age],trig_tail[infinite_age]; /* in |opt_out| */ + +@ @= +if (++stage>maxs) { + if (stage>=max_stage) { + fprintf(stderr,"Too many stages!\n"); + exit(-40); + } + maxs=stage; +} +@; +o,stagestamp[stage]=curstamp; + +@ @= +if (++level>maxl) { + if (level>=max_level) { + fprintf(stderr,"Too many levels!\n"); + exit(-4); + } + maxl=level; +} +oo,stagelevel[level]=stage,levelstage[stage]=level; +if (vbose&show_option_counts) levelopts[level]=totopts; + +@ @= +if (delta && (mems>=thresh)) { + thresh+=delta; + if (vbose&show_full_state) print_state(); + else print_progress(); +} +if (mems>=timeout) { + fprintf(stderr,"TIMEOUT!\n");@+goto done; +} + +@ This is where we extend the partial solution. + +Notice a tricky point: We must go through the sets from right to left, +because the options we block move right as they leave the set. + +@= +int include_option(int opt) { + register int c,cc,k,p,q,pp,s,ss,optp; + subroutine_overhead; + if (vbose&show_choices) { + fprintf(stderr,"S"O"d:",stage); + print_option(opt,stderr,1,0); + } + for (opt--;o,nd[opt].itm>0;opt--) ; /* move back to the spacer */ + @; + for (k=active;k=second && (o,c=match(s))) { /* we must purify |s| */ + for (;ss>=s;ss--) { + o,optp=set[ss]; + if ((o,nd[optp].clr!=c) && !purge_the_option(optp,oactive,"blocking")) return 0; + } + }@+else for (;ss>=s;ss--) { + o,optp=set[ss]-1; + while (o,nd[optp].itm>0) optp--; /* move to the spacer */ + if (optp!=opt && !opt_out(optp,oactive,"blocking")) return 0; + } + } + @; + return 1; +} + +@ An item becomes inactive when it becomes part of the solution-so-far +(hence it leaves the problem-that-remains). Active primary items are those +that haven't yet been covered. Active secondary items are those that +haven't yet been purified. + +The active items are the first |active| entries of the |item| list. At one +time I thought it would be wise to keep primary and secondary items separate, using a +sparse-set discipline independently on each sector. But I found that the +time spent in maintaining and searching the active list was negligible +in comparison with the overall running time; so I've kept the implementation simple. + +At this point in the computation, an item of |opt| will be inactive +if and only if it is secondary +and purified, because we are including |opt| in the partial solution. + +@= +p=oactive=active; +for (q=opt+1;o,(c=nd[q].itm)>0;q++) { + o,pp=pos(c); + if (pp=second) oo,match(c)=nd[q].clr; + oo,pos(cc)=pp,pos(c)=p; + updates++; + } +} +active=p; + +@ This program differs from {\mc SSXCC} in one significant way: It makes +option |opt| inactive. In particular, it makes all of |opt|'s items +have size~0, except for unpurified secondaries. (Thus, we essentially say that +newly assigned variables---the inactivated primary items---should have +empty domains when they leave the current subproblem, while {\mc SSXCC} +left them with domains of size~1.) + +It would be a mistake to call |opt_out(opt,oactive)|, of course, +because that procedure doesn't want any primary items to become optionless. +On the contrary, we have precisely the opposite goal: We {\it celebrate\/} the +fact that all of the primaries in |opt| have become covered. + +We don't have to change |trigger(opt)|, because no active options +involve inactive primary items. + +@= +for (k=active;k=second) continue; + if (size(s)!=1) confusion("include"); + o,size(s)=0; +} +if (vbose&show_purges) { + fprintf(stderr," "O"d choosing option ",cur_age); + print_option(opt+1,stderr,0,1); +} +o,age(opt)=cur_age; +totopts--; + +@ The ``best item'' is considered to be an item that minimizes the +number of remaining choices. If there are several candidates, we +choose the first one that we encounter. + +Each primary item should have at least one valid choice, because +of domain consistency. + +@d infty 0x7fffffff + +@= +t=infty; +if ((vbose&show_details) && + level=maxl-show_choices_gap) + fprintf(stderr,"Stage "O"d,",stage); +for (k=0;t>1 && k=maxl-show_choices_gap) { + print_item_name(item[k],stderr); + fprintf(stderr,"("O"d)",s); + } + if (s<=t) { + if (s==0) fprintf(stderr,"I'm confused.\n"); /* |hide| missed this */ + if (s=maxl-show_choices_gap) { + if (t==infty) fprintf(stderr," solution\n"); + else { + fprintf(stderr," branching on"); + print_item_name(best_itm,stderr); + fprintf(stderr,"("O"d)\n",t); + } +} +if (t>maxdeg && t= +{ + count++; + if (spacing && (count mod spacing==0)) { + printf(""O"lld:\n",count); + for (k=0;k=maxcount) goto done; + goto backup; +} + +@ @= +{ + o,saved[stage]=saveptr; + if (saveptr+active>maxsaveptr) { + if (saveptr+active>=savesize) { + fprintf(stderr,"Stack overflow (savesize="O"d)!\n",savesize); + exit(-5); + } + maxsaveptr=saveptr+active; + } + for (p=0;p= +o,active=saveptr-saved[stage]; +saveptr=saved[stage]; +for (p=0;p= +void print_savestack(int start,int stop) { + register int k; + for (k=start;k= +void print_state(void) { + register int l,s; + fprintf(stderr,"Current state (level "O"d, stage "O"d):\n",level,stage); + for (l=0;l=show_levels_max) { + fprintf(stderr," ...\n"); + break; + } + } + fprintf(stderr," "O"lld solutions, "O"lld mems, and max level "O"d so far.\n", + count,mems,maxl); +} + +@ During a long run, it's helpful to have some way to measure progress. +The following routine prints a string that indicates roughly where we +are in the search tree. The string consists of node degrees, +preceded by `\.{\char`\~}' if the node wasn't the current node in +its stage (that is, if the node represents an option that has already +been fully explored --- ``we've been there done that''). + +Following that string, a fractional estimate of total progress is computed, +based on the na{\"\i}ve assumption that the search tree has a uniform +branching structure. If the tree consists +of a single node, this estimate is~.5. Otherwise, if the first choice +is the $k$th choice in stage~0 and has degree~$d$, +the estimate is $(k-1)/(d+k-1)$ plus $1/(d+k-1)$ times the +recursively evaluated estimate for the $k$th subtree. (This estimate +might obviously be very misleading, in some cases, but at least it +tends to grow monotonically.) + +Fine point: If we've just backtracked within stage |stage|, +the string of node degrees with end with a `\.{\char`\~}' entry, +and we haven't yet made {\it any\/} choice in the current~stage. +The test `|l==level-1|' below uses the fact that |levelstage[stage]=level| +to adjust the fractional estimate appropriately for the partial +progress in the current stage. + +@= +void print_progress(void) { + register int l,ll,k,d,c,p,ds=0; + register double f,fd; + fprintf(stderr," after "O"lld mems: "O"lld sols,",mems,count); + if (stage=0 && stagelevel[ll]==stagelevel[l]; + k++,d++,ll--) ; + fd*=d,f+=(k-1)/fd; /* choice |l| is treated like |k| of |d| */ + } + if (l>=show_levels_max+levelstage[groundstage] && !ds) + ds=1,fprintf(stderr,"..."); + } + fprintf(stderr," "O".5f\n",f+0.5/fd); + } +} + +@ @= +{ + fprintf(stderr,"Profile:\n"); + for (k=0;k<=maxs;k++) + fprintf(stderr,""O"3d: "O"lld\n", + k,profile[k]); +} + +@ I'm experimenting with a mechanism by which partial solutions of a large +problem can be saved to temporary files and computed separately --- for +example, by a cluster of computers working in parallel. Each partial solution +can be completed to full solutions when this program is run with one of the +files output here, using \.X$\langle\,$filename$\,\rangle$ on the command line. + +@= +{ + @; + fprintf(xcutoff_file,"Resume at stage "O"d\n",stage); + for (k=0;k0;j--) ; + putc(levelstage[stagelevel[k]]!=k? '-': '+', xcutoff_file); + print_option(j,xcutoff_file,0,1); + } + fclose(xcutoff_file); + xcount++; + goto backup; +} + +@ @d part_file_prefix "/tmp/part" /* should be at most 10 or so characters */ +@d part_file_name_size 20 + +@= +k=sprintf(xcutoff_name,part_file_prefix O"d",xcount); +xcutoff_file=fopen(xcutoff_name,"w"); +if (!xcutoff_file) { + fprintf(stderr,"Sorry, I can't open file `"O"s' for writing!\n",xcutoff_name); + exit(-1); +} + +@ @= +strncpy(xcutoff_name,argv[j]+1,part_file_name_size-1); +xcutoff_file=fopen(xcutoff_name,"r"); +if (!xcutoff_file) + fprintf(stderr,"Sorry, I can't open file `"O"s' for reading!\n", + xcutoff_name); +if (fgets(buf,bufsize,xcutoff_file)) { + if (strncmp(buf,"Resume at stage ",16)==0) { + for (groundstage=0,i=16;o,buf[i]>='0' && buf[i]<='9';i++) + groundstage=10*groundstage+buf[i]-'0'; + if (vbose&show_basics) fprintf(stderr, + "Resuming at stage "O"d\n",groundstage); + } +} +break; + +@ @= +{ + if (!fgets(buf,bufsize,xcutoff_file)) confusion("resuming"); + o,choice[level]=cur_choice=read_option(); + if (cur_choice<0) { + fprintf(stderr,"Misformatted option in file `"O"s':\n",xcutoff_name); + fprintf(stderr,""O"s",buf); + exit(-1); + } + t=1; + if (o,buf[0]=='+') goto got_choice; + goto new_age; +} + +@ @= +fprintf(stderr, + "Partial solutions saved on "part_file_prefix"0.."O"s.\n",xcutoff_name); + +@ @= +char xcutoff_name[part_file_name_size]; +FILE *xcutoff_file; +int groundstage; /* the stage where calculation begins or resumes */ + +@*Index.