From 41b6325a1b67734554b5303c9c6f439911f0dd9d Mon Sep 17 00:00:00 2001 From: nick black Date: Fri, 4 Feb 2022 14:16:10 -0500 Subject: [PATCH 01/10] [sixel] restructure rebuild to look like wipe #2573 --- src/lib/sixel.c | 59 +++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index 1c72db6ef..9d34c237c 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -1473,47 +1473,51 @@ int sixel_init(int fd){ return sixel_init_core("\e[?8452h", fd); } +static inline int +restore_band(sixelmap* smap, int band, int startx, int endx, + int starty, int endy, int dimx, int cellpixy, int cellpixx, + uint8_t* auxvec){ + // FIXME + return 0; +} + // only called for cells in SPRIXCELL_ANNIHILATED[_TRANS]. just post to // wipes_outstanding, so the Sixel gets regenerated the next render cycle, // just like wiping. this is necessary due to the complex nature of // modifying a Sixel -- we want to do them all in one batch. int sixel_rebuild(sprixel* s, int ycell, int xcell, uint8_t* auxvec){ - s->wipes_outstanding = true; - sixelmap* smap = s->smap; - const int cellpxx = ncplane_pile(s->n)->cellpxx; +//fprintf(stderr, "REBUILDING %d/%d\n", ycell, xcell); + if(auxvec == NULL){ + return -1; + } const int cellpxy = ncplane_pile(s->n)->cellpxy; + const int cellpxx = ncplane_pile(s->n)->cellpxx; + memset(auxvec + cellpxx * cellpxy, 0xff, cellpxx * cellpxy); + sixelmap* smap = s->smap; const int startx = xcell * cellpxx; const int starty = ycell * cellpxy; - int endx = ((xcell + 1) * cellpxx) - 1; - if(endx > s->pixx){ + int endx = ((xcell + 1) * cellpxx); + if(endx >= s->pixx){ endx = s->pixx; } - int endy = ((ycell + 1) * cellpxy) - 1; - if(endy > s->pixy){ + int endy = ((ycell + 1) * cellpxy); + if(endy >= s->pixy){ endy = s->pixy; } - int transparent = 0; + const int startband = starty / 6; + const int endband = endy / 6; //fprintf(stderr, "%d/%d start: %d/%d end: %d/%d bands: %d-%d\n", ycell, xcell, starty, startx, endy, endx, starty / 6, endy / 6); - /* FIXME - for(int x = startx ; x <= endx ; ++x){ - for(int y = starty ; y <= endy ; ++y){ - int auxvecidx = (y - starty) * cellpxx + (x - startx); - int trans = auxvec[cellpxx * cellpxy + auxvecidx]; - if(!trans){ - int color = auxvec[auxvecidx]; - int coff = smap->sixelcount * color; - int band = y / 6; - int boff = coff + band * s->pixx; - int xoff = boff + x; -//fprintf(stderr, "%d/%d band: %d coff: %d boff: %d rebuild %d/%d with color %d from %d %p xoff: %d\n", ycell, xcell, band, coff, boff, y, x, color, auxvecidx, auxvec, xoff); - s->smap->data[xoff] |= (1u << (y % 6)); - }else{ - ++transparent; - } - } + // walk through each color, and wipe the necessary sixels from each band + int w = 0; + for(int b = startband ; b < endband ; ++b){ + w += restore_band(smap, b, startx, endx, starty, endy, s->pixx, + cellpxy, cellpxx, auxvec); } - */ - sprixcell_e newstate; + s->wipes_outstanding = true; + // FIXME need to set this back up...how? return transparent count from + // restore_band(), and sum them up? + sprixcell_e newstate = SPRIXCELL_OPAQUE_SIXEL; // FIXME incorrect! + /* if(transparent == cellpxx * cellpxy){ newstate = SPRIXCELL_TRANSPARENT; }else if(transparent){ @@ -1521,6 +1525,7 @@ int sixel_rebuild(sprixel* s, int ycell, int xcell, uint8_t* auxvec){ }else{ newstate = SPRIXCELL_OPAQUE_SIXEL; } + */ s->n->tam[s->dimx * ycell + xcell].state = newstate; return 1; } From 130730073daaceb88856d14a51234252bfcba317 Mon Sep 17 00:00:00 2001 From: nick black Date: Fri, 4 Feb 2022 14:32:06 -0500 Subject: [PATCH 02/10] [sixel] drop sixeltable type/intermediary --- src/lib/sixel.c | 91 ++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index 9d34c237c..0fbb90f7b 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -4,6 +4,10 @@ #define RGBSIZE 3 +// this palette entry is a sentinel for a transparent pixel (and thus caps +// the palette at 65535 other entries). +#define TRANS_PALETTE_ENTRY 65535 + // returns the number of individual sixels necessary to represent the specified // pixel geometry. these might encompass more pixel rows than |dimy| would // suggest, up to the next multiple of 6 (i.e. a single row becomes a 6-row @@ -57,7 +61,7 @@ typedef struct sixelmap { // second pass: construct data for extracted colors over the sixels. the // map will be persisted in the sprixel; the remainder is lost. -// FIXME kill this off; induct directly into qstate +// FIXME kill this off; use sixelmap directly typedef struct sixeltable { sixelmap* map; // copy of palette indices / transparency bits } sixeltable; @@ -188,7 +192,7 @@ typedef struct qstate { const struct blitterargs* bargs; const uint32_t* data; int linesize; - sixeltable* stab; + sixelmap* smap; // these are the leny and lenx passed to sixel_blit(), which are likely // different from those reachable through bargs->len{y,x}! int leny, lenx; @@ -253,7 +257,7 @@ insert_color(qstate* qs, uint32_t pixel){ q->q.comps[1] = g; q->q.comps[2] = b; q->q.pop = 1; - ++qs->stab->map->colors; + ++qs->smap->colors; return 0; } onode* o; @@ -314,7 +318,7 @@ insert_color(qstate* qs, uint32_t pixel){ o->q[skey]->q.comps[2] = b; o->q[skey]->qlink = 0; o->q[skey]->cidx = 0; - ++qs->stab->map->colors; + ++qs->smap->colors; //fprintf(stderr, "INSERTED[%u]: %u %u %u\n", key, q->q.comps[0], q->q.comps[1], q->q.comps[2]); return 0; } @@ -341,13 +345,12 @@ find_color(const qstate* qs, uint32_t pixel){ } // create an auxiliary vector suitable for a Sixel sprixcell, and zero it out. -// there are three bytes per pixel in the cell: a contiguous set of 16-bit -// palette indices, and a contiguous set of two-value transparencies (these -// could be folded down to bits from bytes, saving 7/8 of the space FIXME). +// there are two bytes per pixel in the cell: a palette index of up to 65534, +// or 65535 to indicate transparency. static inline uint8_t* sixel_auxiliary_vector(const sprixel* s){ int pixels = ncplane_pile(s->n)->cellpxy * ncplane_pile(s->n)->cellpxx; - size_t slen = pixels * 3; + size_t slen = pixels * 2; uint8_t* ret = malloc(slen); if(ret){ memset(ret, 0, sizeof(slen)); @@ -760,10 +763,10 @@ choose(qstate* qs, qnode* q, int z, int i, int* hi, int* lo, // to the number of color registers. static inline int merge_color_table(qstate* qs){ - if(qs->stab->map->colors == 0){ + if(qs->smap->colors == 0){ return 0; } - qnode* qactive = get_active_set(qs, qs->stab->map->colors); + qnode* qactive = get_active_set(qs, qs->smap->colors); if(qactive == NULL){ return -1; } @@ -772,8 +775,8 @@ merge_color_table(qstate* qs){ // (this is not necessarily an optimizing huristic, but it'll do for now). int cidx = 0; //fprintf(stderr, "colors: %u cregs: %u\n", qs->colors, colorregs); - for(int z = qs->stab->map->colors - 1 ; z >= 0 ; --z){ - if(qs->stab->map->colors >= qs->bargs->u.pixel.colorregs){ + for(int z = qs->smap->colors - 1 ; z >= 0 ; --z){ + if(qs->smap->colors >= qs->bargs->u.pixel.colorregs){ if(cidx == qs->bargs->u.pixel.colorregs){ break; // we just ran out of color registers } @@ -782,7 +785,7 @@ merge_color_table(qstate* qs){ ++cidx; } free(qactive); - if(qs->stab->map->colors > qs->bargs->u.pixel.colorregs){ + if(qs->smap->colors > qs->bargs->u.pixel.colorregs){ // tend to those which couldn't get a color table entry. we start with two // values, lo and hi, initialized to -1. we iterate over the *static* qnodes, // descending into onodes to check their qnodes. we thus iterate over all @@ -813,7 +816,7 @@ merge_color_table(qstate* qs){ choose(qs, &qs->qnodes[z], z, -1, &hi, &lo, &hq, &lq); } } - qs->stab->map->colors = qs->bargs->u.pixel.colorregs; + qs->smap->colors = qs->bargs->u.pixel.colorregs; } return 0; } @@ -822,7 +825,7 @@ static inline void load_color_table(const qstate* qs){ int loaded = 0; int total = QNODECOUNT + (qs->dynnodes_total - qs->dynnodes_free); - for(int z = 0 ; z < total && loaded < qs->stab->map->colors ; ++z){ + for(int z = 0 ; z < total && loaded < qs->smap->colors ; ++z){ const qnode* q = &qs->qnodes[z]; if(chosen_p(q)){ qs->table[RGBSIZE * qidx(q) + 0] = ss(q->q.comps[0]); @@ -832,16 +835,16 @@ load_color_table(const qstate* qs){ } } //fprintf(stderr, "loaded: %u colors: %u\n", loaded, qs->colors); - assert(loaded == qs->stab->map->colors); + assert(loaded == qs->smap->colors); } // build up a sixel band from (up to) 6 rows of the source RGBA. static inline int build_sixel_band(qstate* qs, int bnum){ - sixelband* b = &qs->stab->map->bands[bnum]; - b->size = qs->stab->map->colors; + sixelband* b = &qs->smap->bands[bnum]; + b->size = qs->smap->colors; size_t bsize = sizeof(*b->vecs) * b->size; - size_t mlen = qs->stab->map->colors * sizeof(struct band_extender); + size_t mlen = qs->smap->colors * sizeof(struct band_extender); struct band_extender* meta = malloc(mlen); if(meta == NULL){ return -1; @@ -854,7 +857,7 @@ build_sixel_band(qstate* qs, int bnum){ memset(b->vecs, 0, bsize); memset(meta, 0, mlen); const int ystart = qs->bargs->begy + bnum * 6; - const int endy = (bnum + 1 == qs->stab->map->sixelbands ? + const int endy = (bnum + 1 == qs->smap->sixelbands ? qs->leny - qs->bargs->begy : ystart + 6); struct { int color; // 0..colormax @@ -913,7 +916,7 @@ build_sixel_band(qstate* qs, int bnum){ } } } - for(int i = 0 ; i < qs->stab->map->colors ; ++i){ + for(int i = 0 ; i < qs->smap->colors ; ++i){ if(meta[i].rle){ // color was wholly unused iff rle == 0 at end b->vecs[i] = sixelband_extend(b->vecs[i], &meta[i], qs->lenx, x); if(b->vecs[i] == NULL){ @@ -931,17 +934,17 @@ build_sixel_band(qstate* qs, int bnum){ // once again, and get the actual (color-indexed) sixels. static inline int build_data_table(qstate* qs){ - sixeltable* stab = qs->stab; - if(stab->map->sixelbands == 0){ + sixelmap* smap = qs->smap; + if(smap->sixelbands == 0){ logerror("no sixels"); return -1; } - for(int i = 0 ; i < qs->stab->map->sixelbands ; ++i){ + for(int i = 0 ; i < smap->sixelbands ; ++i){ if(build_sixel_band(qs, i) < 0){ return -1; } } - size_t tsize = RGBSIZE * qs->stab->map->colors; + size_t tsize = RGBSIZE * smap->colors; qs->table = malloc(tsize); if(qs->table == NULL){ return -1; @@ -979,7 +982,7 @@ extract_cell_color_table(qstate* qs, long cellid){ // transparent to mixed. if(cstarty >= cendy){ // we're entirely transparent sixel overhead tam[cellid].state = SPRIXCELL_TRANSPARENT; - qs->stab->map->p2 = SIXEL_P2_TRANS; // even one forces P2=1 + qs->smap->p2 = SIXEL_P2_TRANS; // even one forces P2=1 // FIXME need we set rmatrix? return 0; } @@ -1041,7 +1044,7 @@ extract_cell_color_table(qstate* qs, long cellid){ if(tam[cellid].state == SPRIXCELL_OPAQUE_SIXEL){ rmatrix[cellid] = 0; }else{ - qs->stab->map->p2 = SIXEL_P2_TRANS; // even one forces P2=1 + qs->smap->p2 = SIXEL_P2_TRANS; // even one forces P2=1 } return 0; } @@ -1073,14 +1076,14 @@ extract_color_table(qstate* qs){ ++cellid; } } - loginfo("octree got %"PRIu32" entries", qs->stab->map->colors); + loginfo("octree got %"PRIu32" entries", qs->smap->colors); if(merge_color_table(qs)){ return -1; } if(build_data_table(qs)){ return -1; } - loginfo("final palette: %u/%u colors", qs->stab->map->colors, qs->bargs->u.pixel.colorregs); + loginfo("final palette: %u/%u colors", qs->smap->colors, qs->bargs->u.pixel.colorregs); return 0; } @@ -1162,11 +1165,11 @@ write_sixel_header(qstate* qs, fbuf* f, int leny){ return -1; } // Set Raster Attributes - pan/pad=1 (pixel aspect ratio), Ph=qs->lenx, Pv=leny - int r = write_sixel_intro(f, qs->stab->map->p2, leny, qs->lenx); + int r = write_sixel_intro(f, qs->smap->p2, leny, qs->lenx); if(r < 0){ return -1; } - for(int i = 0 ; i < qs->stab->map->colors ; ++i){ + for(int i = 0 ; i < qs->smap->colors ; ++i){ const unsigned char* rgb = qs->table + i * RGBSIZE; //fprintf(fp, "#%d;2;%u;%u;%u", i, rgb[0], rgb[1], rgb[2]); int rr = write_sixel_creg(f, i, rgb[0], rgb[1], rgb[2]); @@ -1233,7 +1236,7 @@ sixel_reblit(sprixel* s){ // write out the sixel header after having quantized the palette. static inline int -sixel_blit_inner(qstate* qs, sixeltable* stab, const blitterargs* bargs, tament* tam){ +sixel_blit_inner(qstate* qs, sixelmap* smap, const blitterargs* bargs, tament* tam){ fbuf f; if(fbuf_init(&f)){ return -1; @@ -1244,7 +1247,7 @@ sixel_blit_inner(qstate* qs, sixeltable* stab, const blitterargs* bargs, tament* int outy = qs->leny; if(outy % 6){ outy += 6 - (qs->leny % 6); - stab->map->p2 = SIXEL_P2_TRANS; + smap->p2 = SIXEL_P2_TRANS; } int parse_start = write_sixel_header(qs, &f, outy); if(parse_start < 0){ @@ -1261,7 +1264,7 @@ sixel_blit_inner(qstate* qs, sixeltable* stab, const blitterargs* bargs, tament* fbuf_free(&f); return -1; } - s->smap = stab->map; + s->smap = smap; return 1; } @@ -1269,37 +1272,39 @@ sixel_blit_inner(qstate* qs, sixeltable* stab, const blitterargs* bargs, tament* // nearest multiple of six greater than or equal to |leny|. int sixel_blit(ncplane* n, int linesize, const void* data, int leny, int lenx, const blitterargs* bargs){ - sixeltable stable = { - .map = sixelmap_create(leny - bargs->begy), - }; - if(stable.map == NULL){ + if(bargs->u.pixel.colorregs >= TRANS_PALETTE_ENTRY){ + logerror("palette too large %d", bargs->u.pixel.colorregs); + return -1; + } + sixelmap* smap = sixelmap_create(leny - bargs->begy); + if(smap == NULL){ return -1; } assert(n->tam); qstate qs; if(alloc_qstate(bargs->u.pixel.colorregs, &qs)){ logerror("couldn't allocate qstate"); - sixelmap_free(stable.map); + sixelmap_free(smap); return -1; } qs.bargs = bargs; qs.data = data; qs.linesize = linesize; - qs.stab = &stable; + qs.smap = smap; qs.leny = leny; qs.lenx = lenx; if(extract_color_table(&qs)){ free(bargs->u.pixel.spx->needs_refresh); bargs->u.pixel.spx->needs_refresh = NULL; - sixelmap_free(stable.map); + sixelmap_free(smap); free_qstate(&qs); return -1; } // takes ownership of sixelmap on success - int r = sixel_blit_inner(&qs, &stable, bargs, n->tam); + int r = sixel_blit_inner(&qs, smap, bargs, n->tam); free_qstate(&qs); if(r < 0){ - sixelmap_free(stable.map); + sixelmap_free(smap); // FIXME free refresh table? } scrub_color_table(bargs->u.pixel.spx); From 61e8354f4a5689c5069a93c90cc3f68c0e5d7d5d Mon Sep 17 00:00:00 2001 From: nick black Date: Fri, 4 Feb 2022 14:50:08 -0500 Subject: [PATCH 03/10] [sixel] implement auxvec_idx() --- src/lib/sixel.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index 0fbb90f7b..6f14b885a 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -8,6 +8,9 @@ // the palette at 65535 other entries). #define TRANS_PALETTE_ENTRY 65535 +// bytes per element in the auxiliary vector +#define AUXVECELEMSIZE 2 + // returns the number of individual sixels necessary to represent the specified // pixel geometry. these might encompass more pixel rows than |dimy| would // suggest, up to the next multiple of 6 (i.e. a single row becomes a 6-row @@ -350,7 +353,7 @@ find_color(const qstate* qs, uint32_t pixel){ static inline uint8_t* sixel_auxiliary_vector(const sprixel* s){ int pixels = ncplane_pile(s->n)->cellpxy * ncplane_pile(s->n)->cellpxx; - size_t slen = pixels * 2; + size_t slen = pixels * AUXVECELEMSIZE; uint8_t* ret = malloc(slen); if(ret){ memset(ret, 0, sizeof(slen)); @@ -420,6 +423,24 @@ sixelband_extend(char* vec, struct band_extender* bes, int dimx, int curx){ return vec; } +// get the index into the auxvec (2 bytes per pixel) given the true y/x pixel +// coordinates, plus the origin+dimensions of the relevant cell. +static inline int +auxvec_idx(int y, int x, int sy, int sx, int cellpxy, int cellpxx){ + if(y >= sy + cellpxy || y < sy){ + logpanic("illegal y for %d cell at %d: %d", cellpxy, sy, y); + return -1; + } + if(x >= sx + cellpxx || x < sx){ + logpanic("illegal x for %d cell at %d: %d", cellpxx, sx, x); + return -1; + } + const int xoff = x - sx; + const int yoff = y - sy; + const int off = yoff * cellpxx + xoff; + return AUXVECELEMSIZE * off; +} + // the sixel |rep| is being wiped. the active pixels need be written to the // |auxvec|, which is (|ey| - |sy| + 1) rows of (|ex| - |sx| + 1) columns. // we are wiping the sixel |rep|, changing it to |mask|. From 76c8fe667d7d63994576f9295859deb38d5fde08 Mon Sep 17 00:00:00 2001 From: nick black Date: Sat, 5 Feb 2022 04:55:57 -0500 Subject: [PATCH 04/10] [sixel] pass down cellpixel parameters to write_auxvec() #2573 --- src/lib/sixel.c | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index 6f14b885a..13197468d 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -444,9 +444,17 @@ auxvec_idx(int y, int x, int sy, int sx, int cellpxy, int cellpxx){ // the sixel |rep| is being wiped. the active pixels need be written to the // |auxvec|, which is (|ey| - |sy| + 1) rows of (|ex| - |sx| + 1) columns. // we are wiping the sixel |rep|, changing it to |mask|. +// precondition: mask is a bitwise proper subset of rep static inline void -write_auxvec(uint8_t* auxvec, int color, int x, int len, int sx, int ex, - int sy, int ey, char rep, char mask){ +write_auxvec(uint8_t* auxvec, int color, int y, int x, int len, int sx, int ex, + int sy, int ey, char rep, char mask, int cellpxy, int cellpxx){ +fprintf(stderr, "AUXVEC UPDATE[%d] y/x: %d/%d:%d s: %d/%d e: %d/%d %d\n", color, y, x, len, sy, sx, ey, ex, rep); + for(int i = x ; i < x + len ; ++i){ + const int idx = auxvec_idx(y, i, sy, sx, cellpxy, cellpxx); +fprintf(stderr, "AUXVEC %d for %d: %d\n", i, color, idx); + (void)auxvec; // FIXME + (void)mask; // FIXME + } } // wipe the color within this band from startx to endx - 1, from starty to @@ -454,8 +462,9 @@ write_auxvec(uint8_t* auxvec, int color, int x, int len, int sx, int ex, // auxvec. mask is the allowable sixel, y-wise. returns a positive number if // pixels were wiped. static inline int -wipe_color(sixelband* b, int color, int startx, int endx, - int starty, int endy, char mask, int dimx, uint8_t* auxvec){ +wipe_color(sixelband* b, int color, int y, int startx, int endx, + int starty, int endy, char mask, int dimx, uint8_t* auxvec, + int cellpxy, int cellpxx){ const char* vec = b->vecs[color]; if(vec == NULL){ return 0; // no work to be done here @@ -503,14 +512,17 @@ wipe_color(sixelband* b, int color, int startx, int endx, x = startx; } if(x + rle >= endx){ - // FIXME this new rep might equal the next rep, in which case we ought combine + // FIXME this might equal the prev/next rep, and we ought combine +fprintf(stderr, "************************* %d %d %d\n", endx - x, x, rle); write_rle(newvec, &voff, endx - x, masked); - write_auxvec(auxvec, color, x, endx - x, startx, endx, starty, endy, rep, mask); + write_auxvec(auxvec, color, y, x, endx - x, startx, endx, starty, + endy, rep, mask, cellpxy, cellpxx); rle -= endx - x; x = endx; }else{ write_rle(newvec, &voff, rle, masked); - write_auxvec(auxvec, color, x, rle, startx, endx, starty, endy, rep, mask); + write_auxvec(auxvec, color, y, x, rle, startx, endx, starty, endy, + rep, mask, cellpxy, cellpxx); x += rle; rle = 0; } @@ -527,7 +539,7 @@ wipe_color(sixelband* b, int color, int startx, int endx, break; } } -//if(strcmp(newvec, b->vecs[color])) fprintf(stderr, "WIPED %d y [%d..%d) x [%d..%d) mask: %d [%s]\n", color, starty, endy, startx, endx, mask, newvec); +if(strcmp(newvec, b->vecs[color])) fprintf(stderr, "WIPED %d y [%d..%d) x [%d..%d) mask: %d [%s]\n", color, starty, endy, startx, endx, mask, newvec); free(b->vecs[color]); if(voff == 0){ // FIXME check for other null vectors; free such, and assign NULL @@ -542,7 +554,7 @@ wipe_color(sixelband* b, int color, int startx, int endx, // number of pixels actually wiped. static inline int wipe_band(sixelmap* smap, int band, int startx, int endx, - int starty, int endy, int dimx, int cellpixy, int cellpixx, + int starty, int endy, int dimx, int cellpxy, int cellpxx, uint8_t* auxvec){ //fprintf(stderr, "******************** BAND %d ********************8\n", band); int wiped = 0; @@ -561,7 +573,8 @@ wipe_band(sixelmap* smap, int band, int startx, int endx, sixelband* b = &smap->bands[band]; // offset into map->data where our color starts for(int i = 0 ; i < b->size ; ++i){ - wiped += wipe_color(b, i, startx, endx, starty, endy, mask, dimx, auxvec); + wiped += wipe_color(b, i, band * 6, startx, endx, starty, endy, mask, + dimx, auxvec, cellpxy, cellpxx); } return wiped; } @@ -1427,8 +1440,8 @@ typedef struct sixel_engine { unsigned workers_wanted; pthread_mutex_t lock; pthread_cond_t cond; - void* chunks; // FIXME bool done; + qstate* qs; } sixel_engine; // FIXME make this part of the context, sheesh @@ -1450,7 +1463,7 @@ sixel_worker(void* v){ } do{ pthread_mutex_lock(&sengine->lock); - while(sengine->chunks == NULL && !sengine->done){ + while(sengine->qs == NULL && !sengine->done){ pthread_cond_wait(&sengine->cond, &sengine->lock); } if(sengine->done){ @@ -1501,8 +1514,18 @@ int sixel_init(int fd){ static inline int restore_band(sixelmap* smap, int band, int startx, int endx, - int starty, int endy, int dimx, int cellpixy, int cellpixx, + int starty, int endy, int dimx, int cellpxy, int cellpxx, uint8_t* auxvec){ + (void)smap; + (void)band; + (void)startx; + (void)endx; + (void)starty; + (void)endy; + (void)dimx; + (void)cellpxy; + (void)cellpxx; + (void)auxvec; // FIXME return 0; } From c49fb19fdb799bd0428e6bbef5d32da15835a61c Mon Sep 17 00:00:00 2001 From: nick black Date: Sat, 5 Feb 2022 06:35:09 -0500 Subject: [PATCH 05/10] [sixel] queue + engine #2537 --- src/lib/sixel.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index 13197468d..e38fb78ba 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -1,9 +1,13 @@ #include +#include #include "internal.h" #include "fbuf.h" #define RGBSIZE 3 +// number of worker threads FIXME +#define POPULATION 3 + // this palette entry is a sentinel for a transparent pixel (and thus caps // the palette at 65535 other entries). #define TRANS_PALETTE_ENTRY 65535 @@ -181,6 +185,7 @@ qidx(const qnode* q){ } typedef struct qstate { + atomic_int refcount; // initialized to worker count // we always work in terms of quantized colors (quantization is the first // step of rendering), using indexes into the derived palette. the actual // palette need only be stored during the initial render, since the sixel @@ -199,6 +204,7 @@ typedef struct qstate { // these are the leny and lenx passed to sixel_blit(), which are likely // different from those reachable through bargs->len{y,x}! int leny, lenx; + struct qstate* next; // next in the threading engine's queue } qstate; #define QNODECOUNT 1000 @@ -230,6 +236,8 @@ alloc_qstate(unsigned colorregs, qstate* qs){ // when we pull a dynamic one that it needs its popcount initialized. memset(qs->qnodes, 0, sizeof(qnode) * QNODECOUNT); qs->table = NULL; + qs->refcount = 1 + POPULATION; + qs->next = NULL; return 0; } @@ -1435,7 +1443,7 @@ int sixel_draw(const tinfo* ti, const ncpile* p, sprixel* s, fbuf* f, // we keep a few worker threads spun up to assist with quantization. typedef struct sixel_engine { // FIXME we'll want maybe one per core in our cpuset? - pthread_t tids[3]; + pthread_t tids[POPULATION]; unsigned workers; unsigned workers_wanted; pthread_mutex_t lock; @@ -1461,18 +1469,27 @@ sixel_worker(void* v){ }else{ pthread_mutex_unlock(&globsengine.lock); } + qstate* qs = NULL; + pthread_mutex_lock(&sengine->lock); do{ - pthread_mutex_lock(&sengine->lock); - while(sengine->qs == NULL && !sengine->done){ + while((sengine->qs == NULL || sengine->qs == qs) && !sengine->done){ pthread_cond_wait(&sengine->cond, &sengine->lock); } if(sengine->done){ pthread_mutex_unlock(&sengine->lock); return NULL; } - // FIXME take workchunk + qs = sengine->qs; pthread_mutex_unlock(&sengine->lock); - // FIXME handle workchunk + // FIXME handle qs + if(--qs->refcount == 0){ + qstate* qnext = qs->next; + free_qstate(qs); + pthread_mutex_lock(&sengine->lock); + sengine->qs = qnext; + }else{ + pthread_mutex_lock(&sengine->lock); + } }while(1); } From 23b68e035c09f001527034f7538187b2f90f9aa5 Mon Sep 17 00:00:00 2001 From: nick black Date: Sat, 5 Feb 2022 07:36:27 -0500 Subject: [PATCH 06/10] [sixel] thread on bands #2573 --- src/lib/sixel.c | 86 ++++++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index e38fb78ba..d1da5a2ca 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -186,6 +186,7 @@ qidx(const qnode* q){ typedef struct qstate { atomic_int refcount; // initialized to worker count + atomic_int bandbuilder; // threads take bands as their work unit // we always work in terms of quantized colors (quantization is the first // step of rendering), using indexes into the derived palette. the actual // palette need only be stored during the initial render, since the sixel @@ -456,10 +457,15 @@ auxvec_idx(int y, int x, int sy, int sx, int cellpxy, int cellpxx){ static inline void write_auxvec(uint8_t* auxvec, int color, int y, int x, int len, int sx, int ex, int sy, int ey, char rep, char mask, int cellpxy, int cellpxx){ -fprintf(stderr, "AUXVEC UPDATE[%d] y/x: %d/%d:%d s: %d/%d e: %d/%d %d\n", color, y, x, len, sy, sx, ey, ex, rep); +//fprintf(stderr, "AUXVEC UPDATE[%d] y/x: %d/%d:%d s: %d/%d e: %d/%d %d\n", color, y, x, len, sy, sx, ey, ex, rep); for(int i = x ; i < x + len ; ++i){ const int idx = auxvec_idx(y, i, sy, sx, cellpxy, cellpxx); -fprintf(stderr, "AUXVEC %d for %d: %d\n", i, color, idx); +//fprintf(stderr, "AUXVEC %d for %d: %d\n", i, color, idx); + (void)ex; + (void)ey; + (void)rep; + (void)color; + (void)idx; (void)auxvec; // FIXME (void)mask; // FIXME } @@ -521,7 +527,7 @@ wipe_color(sixelband* b, int color, int y, int startx, int endx, } if(x + rle >= endx){ // FIXME this might equal the prev/next rep, and we ought combine -fprintf(stderr, "************************* %d %d %d\n", endx - x, x, rle); +//fprintf(stderr, "************************* %d %d %d\n", endx - x, x, rle); write_rle(newvec, &voff, endx - x, masked); write_auxvec(auxvec, color, y, x, endx - x, startx, endx, starty, endy, rep, mask, cellpxy, cellpxx); @@ -547,7 +553,7 @@ fprintf(stderr, "************************* %d %d %d\n", endx - x, x, rle); break; } } -if(strcmp(newvec, b->vecs[color])) fprintf(stderr, "WIPED %d y [%d..%d) x [%d..%d) mask: %d [%s]\n", color, starty, endy, startx, endx, mask, newvec); +//if(strcmp(newvec, b->vecs[color])) fprintf(stderr, "WIPED %d y [%d..%d) x [%d..%d) mask: %d [%s]\n", color, starty, endy, startx, endx, mask, newvec); free(b->vecs[color]); if(voff == 0){ // FIXME check for other null vectors; free such, and assign NULL @@ -883,6 +889,7 @@ load_color_table(const qstate* qs){ // build up a sixel band from (up to) 6 rows of the source RGBA. static inline int build_sixel_band(qstate* qs, int bnum){ +//fprintf(stderr, "building band %d\n", bnum); sixelband* b = &qs->smap->bands[bnum]; b->size = qs->smap->colors; size_t bsize = sizeof(*b->vecs) * b->size; @@ -972,6 +979,32 @@ build_sixel_band(qstate* qs, int bnum){ return 0; } +// we keep a few worker threads spun up to assist with quantization. +typedef struct sixel_engine { + // FIXME we'll want maybe one per core in our cpuset? + pthread_t tids[POPULATION]; + unsigned workers; + unsigned workers_wanted; + pthread_mutex_t lock; + pthread_cond_t cond; + bool done; + qstate* qs; +} sixel_engine; + +// FIXME make this part of the context, sheesh +static sixel_engine globsengine; + +static int +bandworker(qstate* qs){ + int b; + while((b = qs->bandbuilder++) < qs->smap->sixelbands){ + if(build_sixel_band(qs, b) < 0){ + return -1; + } + } + return 0; +} + // we have converged upon some number of colors. we now run over the pixels // once again, and get the actual (color-indexed) sixels. static inline int @@ -981,17 +1014,20 @@ build_data_table(qstate* qs){ logerror("no sixels"); return -1; } - for(int i = 0 ; i < smap->sixelbands ; ++i){ - if(build_sixel_band(qs, i) < 0){ - return -1; - } - } + qs->bandbuilder = 0; + pthread_mutex_lock(&globsengine.lock); + // FIXME need enqueue it + globsengine.qs = qs; + pthread_mutex_unlock(&globsengine.lock); + pthread_cond_signal(&globsengine.cond); size_t tsize = RGBSIZE * smap->colors; qs->table = malloc(tsize); if(qs->table == NULL){ return -1; } load_color_table(qs); + bandworker(qs); + // FIXME need to drop our reference, possibly drop qs return 0; } @@ -1440,21 +1476,6 @@ int sixel_draw(const tinfo* ti, const ncpile* p, sprixel* s, fbuf* f, return s->glyph.used; } -// we keep a few worker threads spun up to assist with quantization. -typedef struct sixel_engine { - // FIXME we'll want maybe one per core in our cpuset? - pthread_t tids[POPULATION]; - unsigned workers; - unsigned workers_wanted; - pthread_mutex_t lock; - pthread_cond_t cond; - bool done; - qstate* qs; -} sixel_engine; - -// FIXME make this part of the context, sheesh -static sixel_engine globsengine; - // a quantization worker. static void * sixel_worker(void* v){ @@ -1463,32 +1484,31 @@ sixel_worker(void* v){ if(++sengine->workers < sengine->workers_wanted){ pthread_mutex_unlock(&globsengine.lock); // don't bail on a failure here - if(pthread_create(&sengine->tids[sengine->workers], NULL, sixel_worker, sengine)){ + if(pthread_create(&globsengine.tids[sengine->workers], NULL, sixel_worker, sengine)){ logerror("couldn't spin up sixel worker %u", sengine->workers); } }else{ pthread_mutex_unlock(&globsengine.lock); } qstate* qs = NULL; - pthread_mutex_lock(&sengine->lock); + pthread_mutex_lock(&globsengine.lock); do{ while((sengine->qs == NULL || sengine->qs == qs) && !sengine->done){ - pthread_cond_wait(&sengine->cond, &sengine->lock); + pthread_cond_wait(&globsengine.cond, &globsengine.lock); } if(sengine->done){ - pthread_mutex_unlock(&sengine->lock); + pthread_mutex_unlock(&globsengine.lock); return NULL; } qs = sengine->qs; - pthread_mutex_unlock(&sengine->lock); - // FIXME handle qs + pthread_mutex_unlock(&globsengine.lock); + bandworker(qs); if(--qs->refcount == 0){ + pthread_mutex_lock(&globsengine.lock); qstate* qnext = qs->next; - free_qstate(qs); - pthread_mutex_lock(&sengine->lock); sengine->qs = qnext; }else{ - pthread_mutex_lock(&sengine->lock); + pthread_mutex_lock(&globsengine.lock); } }while(1); } From c35b1c6f072714270cc1576c65bb1eabdad833ad Mon Sep 17 00:00:00 2001 From: nick black Date: Sat, 5 Feb 2022 07:52:12 -0500 Subject: [PATCH 07/10] [sixel] need a broadcast to bring out all workers #2573 --- src/lib/sixel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index d1da5a2ca..9917328ef 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -1019,7 +1019,7 @@ build_data_table(qstate* qs){ // FIXME need enqueue it globsengine.qs = qs; pthread_mutex_unlock(&globsengine.lock); - pthread_cond_signal(&globsengine.cond); + pthread_cond_broadcast(&globsengine.cond); size_t tsize = RGBSIZE * smap->colors; qs->table = malloc(tsize); if(qs->table == NULL){ From f38418d896ce3862222b95d8d7bd49735a182d45 Mon Sep 17 00:00:00 2001 From: nick black Date: Sat, 5 Feb 2022 09:22:35 -0500 Subject: [PATCH 08/10] [sixel] widen check in auxvec_idx() --- src/lib/sixel.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index 9917328ef..08756d5cc 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -436,11 +436,11 @@ sixelband_extend(char* vec, struct band_extender* bes, int dimx, int curx){ // coordinates, plus the origin+dimensions of the relevant cell. static inline int auxvec_idx(int y, int x, int sy, int sx, int cellpxy, int cellpxx){ - if(y >= sy + cellpxy || y < sy){ + if(y >= sy + cellpxy || y < sy - cellpxy){ logpanic("illegal y for %d cell at %d: %d", cellpxy, sy, y); return -1; } - if(x >= sx + cellpxx || x < sx){ + if(x >= sx + cellpxx || x < sx - cellpxx){ logpanic("illegal x for %d cell at %d: %d", cellpxx, sx, x); return -1; } @@ -459,7 +459,11 @@ write_auxvec(uint8_t* auxvec, int color, int y, int x, int len, int sx, int ex, int sy, int ey, char rep, char mask, int cellpxy, int cellpxx){ //fprintf(stderr, "AUXVEC UPDATE[%d] y/x: %d/%d:%d s: %d/%d e: %d/%d %d\n", color, y, x, len, sy, sx, ey, ex, rep); for(int i = x ; i < x + len ; ++i){ + // we get the auxvec const int idx = auxvec_idx(y, i, sy, sx, cellpxy, cellpxx); + if(idx < 0){ + continue; + } //fprintf(stderr, "AUXVEC %d for %d: %d\n", i, color, idx); (void)ex; (void)ey; @@ -1001,6 +1005,7 @@ bandworker(qstate* qs){ if(build_sixel_band(qs, b) < 0){ return -1; } +//fprintf(stderr, "%lu DID BAND %d on %p\n", pthread_self(), b, qs); } return 0; } From 37a3c7650dc358fd835aa7b224cfb16877c44a29 Mon Sep 17 00:00:00 2001 From: nick black Date: Mon, 7 Feb 2022 01:03:25 -0500 Subject: [PATCH 09/10] [sixel] safe, reliable worker engine #2573 --- src/lib/sixel.c | 364 ++++++++++++++++++++++++++---------------------- 1 file changed, 201 insertions(+), 163 deletions(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index 08756d5cc..9b6d33712 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -5,9 +5,12 @@ #define RGBSIZE 3 -// number of worker threads FIXME +// number of worker threads FIXME fit to local machine #define POPULATION 3 +// a worker can have up to three qstates enqueued for work +#define WORKERDEPTH 3 + // this palette entry is a sentinel for a transparent pixel (and thus caps // the palette at 65535 other entries). #define TRANS_PALETTE_ENTRY 65535 @@ -15,21 +18,36 @@ // bytes per element in the auxiliary vector #define AUXVECELEMSIZE 2 -// returns the number of individual sixels necessary to represent the specified -// pixel geometry. these might encompass more pixel rows than |dimy| would -// suggest, up to the next multiple of 6 (i.e. a single row becomes a 6-row -// bitmap; as do two, three, four, five, or six rows). input is scaled geometry. -static inline int -sixelcount(int dimy, int dimx){ - return (dimy + 5) / 6 * dimx; -} +// three scaled sixel [0..100x3] components plus a population count. +typedef struct qsample { + unsigned char comps[RGBSIZE]; + uint32_t pop; +} qsample; -// returns the number of sixel bands (horizontal series of sixels, aka 6 rows) -// for |dimy| source rows. sixels are encoded as a series of sixel bands. -static inline int -sixelbandcount(int dimy){ - return sixelcount(dimy, 1); -} +// lowest samples for each node. first-order nodes track 1000 points in +// sixelspace (10x10x10). there are eight possible second-order nodes from a +// fractured first-order node, covering 125 points each (5x5x5). +typedef struct qnode { + qsample q; + // cidx plays two roles. during merge, we select the active set, and extract + // them (since they'll be sorted, we can't operate directly on the octree). + // here, we use cidx to map back to the initial octree entry, as we need + // update them (from the active set) at the end of merging. afterwards, the + // high bit indicates that it was chosen, and the cidx is a valid index into + // the final color table. it is otherwise a link to the merged qnode. + // during initial filtering, qlink determines whether a node has fractured: + // if qlink is non-zero, it is a one-biased index to an onode. + // FIXME combine these once more, but for now to keep it easy, we have two. + // qlink links back into the octree. + uint16_t qlink; + uint16_t cidx; +} qnode; + +// an octree-style node, used for fractured first-order nodes. the first +// bit is whether we're on the top or bottom of the R, then G, then B. +typedef struct onode { + qnode* q[8]; +} onode; // we set P2 based on whether there is any transparency in the sixel. if not, // use SIXEL_P2_ALLOPAQUE (0), for faster drawing in certain terminals. @@ -66,12 +84,99 @@ typedef struct sixelmap { sixel_p2_e p2; // set to SIXEL_P2_TRANS if we have transparent pixels } sixelmap; -// second pass: construct data for extracted colors over the sixels. the -// map will be persisted in the sprixel; the remainder is lost. -// FIXME kill this off; use sixelmap directly -typedef struct sixeltable { - sixelmap* map; // copy of palette indices / transparency bits -} sixeltable; +typedef struct qstate { + int refcount; // initialized to worker count + atomic_int bandbuilder; // threads take bands as their work unit + // we always work in terms of quantized colors (quantization is the first + // step of rendering), using indexes into the derived palette. the actual + // palette need only be stored during the initial render, since the sixel + // header can be preserved, and the palette is unchanged by wipes/restores. + unsigned char* table; // |colors| x RGBSIZE components + qnode* qnodes; + onode* onodes; + unsigned dynnodes_free; + unsigned dynnodes_total; + unsigned onodes_free; + unsigned onodes_total; + const struct blitterargs* bargs; + const uint32_t* data; + int linesize; + sixelmap* smap; + // these are the leny and lenx passed to sixel_blit(), which are likely + // different from those reachable through bargs->len{y,x}! + int leny, lenx; +} qstate; + +// a work_queue per worker thread. if used == WORKERDEPTH, this thread is +// backed up, and we cannot enqueue to it. writeto wraps around the array. +typedef struct work_queue { + qstate* qstates[WORKERDEPTH]; + unsigned writeto; + unsigned used; +} work_queue; + +// we keep a few worker threads (POPULATION) spun up to assist with +// quantization. each has an array of up to WORKERDEPTH qstates to work on. +typedef struct sixel_engine { + pthread_mutex_t lock; + pthread_cond_t cond; + work_queue queues[POPULATION]; + pthread_t tids[POPULATION]; + bool done; +} sixel_engine; + +// enqueue |qs| to any workers with available space. the number of workers with +// a reference will be stored in |qs|->refcount. +static void +enqueue_to_workers(sixel_engine* eng, qstate* qs){ + int usecount = 0; + pthread_mutex_lock(&eng->lock); + for(int i = 0 ; i < POPULATION ; ++i){ + work_queue* wq = &eng->queues[i]; + if(wq->used < WORKERDEPTH){ + wq->qstates[wq->writeto] = qs; + ++wq->used; + ++usecount; + } + if(++wq->writeto == WORKERDEPTH){ + wq->writeto = 0; + } + } + qs->refcount = usecount; + pthread_mutex_unlock(&eng->lock); + if(usecount){ + pthread_cond_broadcast(&eng->cond); + } +} + +// block until all workers have finished up with |qs| +static void +block_on_workers(sixel_engine* eng, qstate* qs){ + pthread_mutex_lock(&eng->lock); + while(qs->refcount){ + pthread_cond_wait(&eng->cond, &eng->lock); + } + pthread_mutex_unlock(&eng->lock); +} + +// FIXME make this part of the context, sheesh +static sixel_engine globsengine; + +// returns the number of individual sixels necessary to represent the specified +// pixel geometry. these might encompass more pixel rows than |dimy| would +// suggest, up to the next multiple of 6 (i.e. a single row becomes a 6-row +// bitmap; as do two, three, four, five, or six rows). input is scaled geometry. +static inline int +sixelcount(int dimy, int dimx){ + return (dimy + 5) / 6 * dimx; +} + +// returns the number of sixel bands (horizontal series of sixels, aka 6 rows) +// for |dimy| source rows. sixels are encoded as a series of sixel bands. +static inline int +sixelbandcount(int dimy){ + return sixelcount(dimy, 1); +} // whip up a sixelmap sans data for the specified pixel geometry and color // register count. @@ -113,37 +218,6 @@ void sixelmap_free(sixelmap *s){ } } -// three scaled sixel [0..100x3] components plus a population count. -typedef struct qsample { - unsigned char comps[RGBSIZE]; - uint32_t pop; -} qsample; - -// lowest samples for each node. first-order nodes track 1000 points in -// sixelspace (10x10x10). there are eight possible second-order nodes from a -// fractured first-order node, covering 125 points each (5x5x5). -typedef struct qnode { - qsample q; - // cidx plays two roles. during merge, we select the active set, and extract - // them (since they'll be sorted, we can't operate directly on the octree). - // here, we use cidx to map back to the initial octree entry, as we need - // update them (from the active set) at the end of merging. afterwards, the - // high bit indicates that it was chosen, and the cidx is a valid index into - // the final color table. it is otherwise a link to the merged qnode. - // during initial filtering, qlink determines whether a node has fractured: - // if qlink is non-zero, it is a one-biased index to an onode. - // FIXME combine these once more, but for now to keep it easy, we have two. - // qlink links back into the octree. - uint16_t qlink; - uint16_t cidx; -} qnode; - -// an octree-style node, used for fractured first-order nodes. the first -// bit is whether we're on the top or bottom of the R, then G, then B. -typedef struct onode { - qnode* q[8]; -} onode; - // convert rgb [0..255] to sixel [0..99] static inline unsigned ss(unsigned c){ @@ -184,30 +258,6 @@ qidx(const qnode* q){ return q->cidx & ~0x8000u; } -typedef struct qstate { - atomic_int refcount; // initialized to worker count - atomic_int bandbuilder; // threads take bands as their work unit - // we always work in terms of quantized colors (quantization is the first - // step of rendering), using indexes into the derived palette. the actual - // palette need only be stored during the initial render, since the sixel - // header can be preserved, and the palette is unchanged by wipes/restores. - unsigned char* table; // |colors| x RGBSIZE components - qnode* qnodes; - onode* onodes; - unsigned dynnodes_free; - unsigned dynnodes_total; - unsigned onodes_free; - unsigned onodes_total; - const struct blitterargs* bargs; - const uint32_t* data; - int linesize; - sixelmap* smap; - // these are the leny and lenx passed to sixel_blit(), which are likely - // different from those reachable through bargs->len{y,x}! - int leny, lenx; - struct qstate* next; // next in the threading engine's queue -} qstate; - #define QNODECOUNT 1000 // create+zorch an array of QNODECOUNT qnodes. this is 1000 entries covering @@ -218,28 +268,31 @@ typedef struct qstate { // we must have 8 dynnodes available for every onode we create, or we can run // into a situation where we don't have an available dynnode // (see insert_color()). -static int -alloc_qstate(unsigned colorregs, qstate* qs){ - qs->dynnodes_free = colorregs; - qs->dynnodes_total = qs->dynnodes_free; - if((qs->qnodes = malloc((QNODECOUNT + qs->dynnodes_total) * sizeof(qnode))) == NULL){ - return -1; - } - qs->onodes_free = qs->dynnodes_total / 8; - qs->onodes_total = qs->onodes_free; - if((qs->onodes = malloc(qs->onodes_total * sizeof(*qs->onodes))) == NULL){ - free(qs->qnodes); - return -1; - } - // don't technically need to clear the components, as we could - // check the pop, but it's hidden under the compulsory cache misses. - // we only initialize the static nodes, not the dynamic ones--we know - // when we pull a dynamic one that it needs its popcount initialized. - memset(qs->qnodes, 0, sizeof(qnode) * QNODECOUNT); - qs->table = NULL; - qs->refcount = 1 + POPULATION; - qs->next = NULL; - return 0; +static qstate* +alloc_qstate(unsigned colorregs){ + qstate* qs = malloc(sizeof(*qs)); + if(qs){ + qs->dynnodes_free = colorregs; + qs->dynnodes_total = qs->dynnodes_free; + if((qs->qnodes = malloc((QNODECOUNT + qs->dynnodes_total) * sizeof(qnode))) == NULL){ + free(qs); + return NULL; + } + qs->onodes_free = qs->dynnodes_total / 8; + qs->onodes_total = qs->onodes_free; + if((qs->onodes = malloc(qs->onodes_total * sizeof(*qs->onodes))) == NULL){ + free(qs->qnodes); + free(qs); + return NULL; + } + // don't technically need to clear the components, as we could + // check the pop, but it's hidden under the compulsory cache misses. + // we only initialize the static nodes, not the dynamic ones--we know + // when we pull a dynamic one that it needs its popcount initialized. + memset(qs->qnodes, 0, sizeof(qnode) * QNODECOUNT); + qs->table = NULL; + } + return qs; } // free internals of qstate object @@ -250,6 +303,7 @@ free_qstate(qstate *qs){ free(qs->qnodes); free(qs->onodes); free(qs->table); + free(qs); } } @@ -983,21 +1037,6 @@ build_sixel_band(qstate* qs, int bnum){ return 0; } -// we keep a few worker threads spun up to assist with quantization. -typedef struct sixel_engine { - // FIXME we'll want maybe one per core in our cpuset? - pthread_t tids[POPULATION]; - unsigned workers; - unsigned workers_wanted; - pthread_mutex_t lock; - pthread_cond_t cond; - bool done; - qstate* qs; -} sixel_engine; - -// FIXME make this part of the context, sheesh -static sixel_engine globsengine; - static int bandworker(qstate* qs){ int b; @@ -1005,7 +1044,6 @@ bandworker(qstate* qs){ if(build_sixel_band(qs, b) < 0){ return -1; } -//fprintf(stderr, "%lu DID BAND %d on %p\n", pthread_self(), b, qs); } return 0; } @@ -1020,11 +1058,7 @@ build_data_table(qstate* qs){ return -1; } qs->bandbuilder = 0; - pthread_mutex_lock(&globsengine.lock); - // FIXME need enqueue it - globsengine.qs = qs; - pthread_mutex_unlock(&globsengine.lock); - pthread_cond_broadcast(&globsengine.cond); + enqueue_to_workers(&globsengine, qs); size_t tsize = RGBSIZE * smap->colors; qs->table = malloc(tsize); if(qs->table == NULL){ @@ -1032,7 +1066,7 @@ build_data_table(qstate* qs){ } load_color_table(qs); bandworker(qs); - // FIXME need to drop our reference, possibly drop qs + block_on_workers(&globsengine, qs); return 0; } @@ -1364,28 +1398,28 @@ int sixel_blit(ncplane* n, int linesize, const void* data, int leny, int lenx, return -1; } assert(n->tam); - qstate qs; - if(alloc_qstate(bargs->u.pixel.colorregs, &qs)){ + qstate* qs; + if((qs = alloc_qstate(bargs->u.pixel.colorregs)) == NULL){ logerror("couldn't allocate qstate"); sixelmap_free(smap); return -1; } - qs.bargs = bargs; - qs.data = data; - qs.linesize = linesize; - qs.smap = smap; - qs.leny = leny; - qs.lenx = lenx; - if(extract_color_table(&qs)){ + qs->bargs = bargs; + qs->data = data; + qs->linesize = linesize; + qs->smap = smap; + qs->leny = leny; + qs->lenx = lenx; + if(extract_color_table(qs)){ free(bargs->u.pixel.spx->needs_refresh); bargs->u.pixel.spx->needs_refresh = NULL; sixelmap_free(smap); - free_qstate(&qs); + free_qstate(qs); return -1; } // takes ownership of sixelmap on success - int r = sixel_blit_inner(&qs, smap, bargs, n->tam); - free_qstate(&qs); + int r = sixel_blit_inner(qs, smap, bargs, n->tam); + free_qstate(qs); if(r < 0){ sixelmap_free(smap); // FIXME free refresh table? @@ -1484,47 +1518,52 @@ int sixel_draw(const tinfo* ti, const ncpile* p, sprixel* s, fbuf* f, // a quantization worker. static void * sixel_worker(void* v){ - sixel_engine *sengine = v; - pthread_mutex_lock(&globsengine.lock); - if(++sengine->workers < sengine->workers_wanted){ - pthread_mutex_unlock(&globsengine.lock); - // don't bail on a failure here - if(pthread_create(&globsengine.tids[sengine->workers], NULL, sixel_worker, sengine)){ - logerror("couldn't spin up sixel worker %u", sengine->workers); - } - }else{ - pthread_mutex_unlock(&globsengine.lock); - } + sixel_engine *sengine = &globsengine; + work_queue* wq = v; + qstate* qs = NULL; - pthread_mutex_lock(&globsengine.lock); + unsigned bufpos = 0; // index into worker queue do{ - while((sengine->qs == NULL || sengine->qs == qs) && !sengine->done){ - pthread_cond_wait(&globsengine.cond, &globsengine.lock); + pthread_mutex_lock(&sengine->lock); + while(wq->used == 0 && !sengine->done){ + pthread_cond_wait(&sengine->cond, &sengine->lock); } - if(sengine->done){ - pthread_mutex_unlock(&globsengine.lock); - return NULL; + if(!sengine->done){ + qs = wq->qstates[bufpos]; + }else{ + qs = NULL; + } + pthread_mutex_unlock(&sengine->lock); + if(qs == NULL){ + break; } - qs = sengine->qs; - pthread_mutex_unlock(&globsengine.lock); bandworker(qs); + bool sendsignal = false; + pthread_mutex_lock(&sengine->lock); + --wq->used; if(--qs->refcount == 0){ - pthread_mutex_lock(&globsengine.lock); - qstate* qnext = qs->next; - sengine->qs = qnext; - }else{ - pthread_mutex_lock(&globsengine.lock); + sendsignal = true; + } + pthread_mutex_unlock(&sengine->lock); + if(sendsignal){ + pthread_cond_broadcast(&sengine->cond); + } + if(++bufpos == WORKERDEPTH){ + bufpos = 0; } }while(1); + return NULL; } static int sixel_init_core(const char* initstr, int fd){ - globsengine.workers = 0; - globsengine.workers_wanted = sizeof(globsengine.tids) / sizeof(*globsengine.tids); - // don't fail on an error here - if(pthread_create(globsengine.tids, NULL, sixel_worker, &globsengine)){ - logerror("couldn't spin up sixel workers"); + const int workers_wanted = sizeof(globsengine.tids) / sizeof(*globsengine.tids); + for(int w = 0 ; w < workers_wanted ; ++w){ + if(pthread_create(&globsengine.tids[w], NULL, sixel_worker, &globsengine.queues[w])){ + logerror("couldn't spin up sixel worker %d/%d", w, workers_wanted); + // FIXME kill any created workers + return -1; + } } return tty_emit(initstr, fd); } @@ -1623,10 +1662,9 @@ int sixel_rebuild(sprixel* s, int ycell, int xcell, uint8_t* auxvec){ void sixel_cleanup(tinfo* ti){ (void)ti; // FIXME pick up globsengine from ti! - unsigned tids = 0; + const unsigned tids = POPULATION; pthread_mutex_lock(&globsengine.lock); globsengine.done = 1; - tids = globsengine.workers; pthread_mutex_unlock(&globsengine.lock); pthread_cond_broadcast(&globsengine.cond); // FIXME what if we spawned another worker since taking zee lock? From 34be289dda165a30215e01be4d71ea793c7d4842 Mon Sep 17 00:00:00 2001 From: nick black Date: Mon, 7 Feb 2022 01:10:49 -0500 Subject: [PATCH 10/10] [sixel] add comment on POPULATION --- src/lib/sixel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib/sixel.c b/src/lib/sixel.c index 9b6d33712..9764788c4 100644 --- a/src/lib/sixel.c +++ b/src/lib/sixel.c @@ -5,7 +5,8 @@ #define RGBSIZE 3 -// number of worker threads FIXME fit to local machine +// number of worker threads +// FIXME fit to local machine, but more than 3 never seems to help #define POPULATION 3 // a worker can have up to three qstates enqueued for work