From 75bb88d95bc25117d79ff7cc349bce5aa94b9999 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Tue, 29 Aug 2023 16:52:35 -0400
Subject: [PATCH 01/18] state_set: Improve `state_set_search` performance,
 correct result.

The description says "Return where an item would be, if it were
inserted", but it was returning the last element <= rather than
the first element >=, then the call to `state_set_cmpval` later
was shifting i by 1 for that specific case. Handle it correctly
inside the search function instead. Two other all call sites
need to check whether the result refers to the append position
(one past the end of the array) before checking `set->a[i] == state`,
update them.

Add a fast path upfront: It's VERY common to append states in order
to the state array, so before we binary search each first compare
against the last entry (unless empty).
---
 src/adt/stateset.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/adt/stateset.c b/src/adt/stateset.c
index c1cff9933..4d4de9438 100644
--- a/src/adt/stateset.c
+++ b/src/adt/stateset.c
@@ -138,7 +138,8 @@ state_set_cmp(const struct state_set *a, const struct state_set *b)
 }
 
 /*
- * Return where an item would be, if it were inserted
+ * Return where an item would be, if it were inserted.
+ * When insertion would append this returns one past the array.
  */
 static size_t
 state_set_search(const struct state_set *set, fsm_state_t state)
@@ -150,6 +151,11 @@ state_set_search(const struct state_set *set, fsm_state_t state)
 	assert(!IS_SINGLETON(set));
 	assert(set->a != NULL);
 
+	/* fast path: append case */
+	if (set->i > 0 && state > set->a[set->i - 1]) {
+		return set->i;
+	}
+
 	start = mid = 0;
 	end = set->i;
 
@@ -161,6 +167,12 @@ state_set_search(const struct state_set *set, fsm_state_t state)
 			end = mid;
 		} else if (r > 0) {
 			start = mid + 1;
+			/* update mid if we're about to halt, because
+			 * we're looking for the first position >= state,
+			 * not the last position <= */
+			if (start == end) {
+				mid = start;
+			}
 		} else {
 			return mid;
 		}
@@ -242,7 +254,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
 	 */
 	if (!state_set_empty(set)) {
 		i = state_set_search(set, state);
-		if (set->a[i] == state) {
+		if (i < set->i && set->a[i] == state) {
 			return 1;
 		}
 	}
@@ -261,9 +273,6 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
 			set->n *= 2;
 		}
 
-		if (state_set_cmpval(state, set->a[i]) > 0) {
-			i++;
-		}
 
 		if (i <= set->i) {
 			memmove(&set->a[i + 1], &set->a[i], (set->i - i) * (sizeof *set->a));
@@ -470,7 +479,7 @@ state_set_remove(struct state_set **setp, fsm_state_t state)
 	}
 
 	i = state_set_search(set, state);
-	if (set->a[i] == state) {
+	if (i < set->i && set->a[i] == state) {
 		if (i < set->i) {
 			memmove(&set->a[i], &set->a[i + 1], (set->i - i - 1) * (sizeof *set->a));
 		}
@@ -524,7 +533,7 @@ state_set_contains(const struct state_set *set, fsm_state_t state)
 	}
 
 	i = state_set_search(set, state);
-	if (set->a[i] == state) {
+	if (i < set->i && set->a[i] == state) {
 		return 1;
 	}
 

From 709b8cc11c5f425e6962386cd43ad36ae6eb2d68 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Tue, 29 Aug 2023 16:57:13 -0400
Subject: [PATCH 02/18] stateset: Avoid memmove of size 0.

---
 src/adt/stateset.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/adt/stateset.c b/src/adt/stateset.c
index 4d4de9438..645424555 100644
--- a/src/adt/stateset.c
+++ b/src/adt/stateset.c
@@ -273,8 +273,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
 			set->n *= 2;
 		}
 
-
-		if (i <= set->i) {
+		if (i < set->i) {
 			memmove(&set->a[i + 1], &set->a[i], (set->i - i) * (sizeof *set->a));
 		}
 

From cead0d99915faad077e3e22dd09761a61556c8e0 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Tue, 29 Aug 2023 17:09:01 -0400
Subject: [PATCH 03/18] stateset: Add note about potentially expensive
 assertion.

In -O0 this can become pretty expensive (~25% of overall runtime for
`time ./re -rpcre -C '^[ab]{0,2000}$'`), but when built with -O3 very
little overhead remains. I'm adding this comment because every time I
see this it seems to me like it should have `EXPENSIVE_CHECKS` around
it, but profiling is telling me it really doesn't matter.
---
 src/adt/stateset.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/adt/stateset.c b/src/adt/stateset.c
index 645424555..7839234d2 100644
--- a/src/adt/stateset.c
+++ b/src/adt/stateset.c
@@ -284,6 +284,8 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
 		set->i = 1;
 	}
 
+	/* This assert can be pretty expensive in -O0 but in -O3 it has very
+	 * little impact on the overall runtime. */
 	assert(state_set_contains(set, state));
 
 	return 1;

From cbfeddd9c54178fe34aae0606a619e9592c6dd69 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Tue, 29 Aug 2023 17:11:07 -0400
Subject: [PATCH 04/18] stateset: Comment struct fields.

---
 src/adt/stateset.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/adt/stateset.c b/src/adt/stateset.c
index 7839234d2..8d73a9b5f 100644
--- a/src/adt/stateset.c
+++ b/src/adt/stateset.c
@@ -44,8 +44,8 @@
 struct state_set {
 	const struct fsm_alloc *alloc;
 	fsm_state_t *a;
-	size_t i;
-	size_t n;
+	size_t i;		/* used */
+	size_t n;		/* ceil */
 };
 
 int

From c3dab77fc7785e3ec4df9e4fb60c6c760ca66b0c Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Tue, 29 Aug 2023 17:12:04 -0400
Subject: [PATCH 05/18] edgeset: Fix indentation for `#if`'d block.

---
 src/adt/edgeset.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/adt/edgeset.c b/src/adt/edgeset.c
index c718727ca..f1a9e9f31 100644
--- a/src/adt/edgeset.c
+++ b/src/adt/edgeset.c
@@ -223,9 +223,9 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
 	assert(set->count <= set->ceil);
 
 #if LOG_BITSET
-		fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
-		    symbols[0], symbols[1], symbols[2], symbols[3],
-		    state, (void *)set);
+	fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
+	    symbols[0], symbols[1], symbols[2], symbols[3],
+	    state, (void *)set);
 #endif
 
 	/* Linear search for a group with the same destination

From 1ada07cce13fd2982056963e5b1e2bae59b235e9 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Tue, 29 Aug 2023 17:12:31 -0400
Subject: [PATCH 06/18] edgeset: Switch from linear to binary searching in
 edge_set_add_bulk.

This is a major hotspot when doing epsilon removal over large runs of
potentially skipped states (as might appear from `^[ab]{0,2000}$`).

Add a fast path for appending, which is also very common.

Extract the edge set destination search into its own function,
`find_state_position`, and add a `#define` to switch between linear
search, binary search, or calling both and comparing the result.
I will remove linear search in the next commit, but am checking this
in as an intermediate step for checking & benchmarking.
---
 src/adt/edgeset.c | 182 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 165 insertions(+), 17 deletions(-)

diff --git a/src/adt/edgeset.c b/src/adt/edgeset.c
index f1a9e9f31..818b78493 100644
--- a/src/adt/edgeset.c
+++ b/src/adt/edgeset.c
@@ -11,6 +11,7 @@
 #include <inttypes.h>
 
 #define LOG_BITSET 0
+#define LOG_BSEARCH 0
 
 #include "libfsm/internal.h" /* XXX: for allocating struct fsm_edge, and the edges array */
 
@@ -184,6 +185,157 @@ edge_set_advise_growth(struct edge_set **pset, const struct fsm_alloc *alloc,
 	return 1;
 }
 
+enum fsp_res {
+	FSP_FOUND_INSERT_POSITION,
+	FSP_FOUND_VALUE_PRESENT,
+};
+
+/* Use binary search to find the first position N where set->groups[N].to >= state,
+ * which includes the position immediately following the last entry. Return an enum
+ * which indicates whether state is already present. */
+static enum fsp_res
+find_state_position_bsearch(const struct edge_set *set, fsm_state_t state, size_t *dst)
+{
+	size_t lo = 0, hi = set->count;
+	if (LOG_BSEARCH) {
+		fprintf(stderr, "%s: looking for %d in %p (count %zu)\n",
+		    __func__, state, (void *)set, set->count);
+	}
+
+#if EXPENSIVE_CHECKS
+	/* invariant: input is unique and sorted */
+	for (size_t i = 1; i < set->count; i++) {
+		assert(set->groups[i - 1].to < set->groups[i].to);
+	}
+#endif
+
+	if (set->count == 0) {
+		if (LOG_BSEARCH) {
+			fprintf(stderr, "%s: empty, returning 0\n", __func__);
+		}
+		*dst = 0;
+		return FSP_FOUND_INSERT_POSITION;
+	} else {
+		if (LOG_BSEARCH) {
+			fprintf(stderr, "%s: fast path: looking for %d, set->groups[last].to %d\n",
+			    __func__, state, set->groups[hi - 1].to);
+		}
+
+		/* Check the last entry so we can append in constant time. */
+		const fsm_state_t last = set->groups[hi - 1].to;
+		if (state > last) {
+			*dst = hi;
+			return FSP_FOUND_INSERT_POSITION;
+		} else if (state == last) {
+			*dst = hi - 1;
+			return FSP_FOUND_VALUE_PRESENT;
+		}
+	}
+
+	size_t mid;
+	while (lo < hi) {		/* lo <= mid < hi */
+		mid = lo + (hi - lo)/2; /* avoid overflow */
+		const struct edge_group *eg = &set->groups[mid];
+		const fsm_state_t cur = eg->to;
+		if (LOG_BSEARCH) {
+			fprintf(stderr, "%s: lo %zu, hi %zu, mid %zu, cur %d, looking for %d\n",
+			    __func__, lo, hi, mid, cur, state);
+		}
+
+		if (state == cur) {
+			*dst = mid;
+			return FSP_FOUND_VALUE_PRESENT;
+		} else if (state > cur) {
+			lo = mid + 1;
+			if (LOG_BSEARCH) {
+				fprintf(stderr, "%s: new lo %zd\n", __func__, lo);
+			}
+
+			/* Update mid if we're about to halt, because we're looking
+			 * for the first position >= state, not the last position <=. */
+			if (lo == hi) {
+				mid = lo;
+				if (LOG_BSEARCH) {
+					fprintf(stderr, "%s: special case, updating mid to %zd\n", __func__, mid);
+				}
+			}
+		} else if (state < cur) {
+			hi = mid;
+			if (LOG_BSEARCH) {
+				fprintf(stderr, "%s: new hi %zd\n", __func__, hi);
+			}
+		}
+	}
+
+	if (LOG_BSEARCH) {
+		fprintf(stderr, "%s: halting at %zd (looking for %d, cur %d)\n",
+		    __func__, mid, state, set->groups[mid].to);
+	}
+
+	/* dst is now the first position > state (== case is handled above),
+	 * which may be one past the end of the array. */
+	assert(mid == set->count || set->groups[mid].to > state);
+	*dst = mid;
+	return FSP_FOUND_INSERT_POSITION;
+}
+
+static enum fsp_res
+find_state_position_linear(const struct edge_set *set, fsm_state_t state, size_t *dst)
+{
+	/* Linear search for a group with the same destination
+	 * state, or the position where that group would go. */
+	size_t i;
+	for (i = 0; i < set->count; i++) {
+		const struct edge_group *eg = &set->groups[i];
+		if (eg->to == state) {
+			*dst = i;
+			return FSP_FOUND_VALUE_PRESENT;
+		} else if (eg->to > state) {
+			break;	/* will shift down and insert below */
+		} else {
+			continue;
+		}
+	}
+
+	*dst = i;
+	return FSP_FOUND_INSERT_POSITION;
+}
+
+/* Find the state in the edge set, or where it would be inserted if not present. */
+static enum fsp_res
+find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst)
+{
+	/* 0: linear, 1: bsearch, -1: call both, to check result */
+#define USE_BSEARCH 1
+
+	switch (USE_BSEARCH) {
+	case 0:
+		return find_state_position_linear(set, state, dst);
+	case 1:
+		return find_state_position_bsearch(set, state, dst);
+	case -1:
+	{
+		size_t dst_linear, dst_bsearch;
+		enum fsp_res res_linear = find_state_position_linear(set, state, &dst_linear);
+		enum fsp_res res_bsearch = find_state_position_bsearch(set, state, &dst_bsearch);
+
+		if (res_linear != res_bsearch || dst_linear != dst_bsearch) {
+			fprintf(stderr, "%s: disagreement for state %d: linear res %d, dst %zu, bsearch res %d, dst %zu\n",
+			    __func__, state,
+			    res_linear, dst_linear,
+			    res_bsearch, dst_bsearch);
+			for (size_t i = 0; i < set->count; i++) {
+				fprintf(stderr, "set->groups[%zu].to: %d\n", i, set->groups[i].to);
+			}
+		}
+		assert(res_linear == res_bsearch);
+		assert(dst_linear == dst_bsearch);
+		*dst = dst_linear;
+		return res_linear;
+	}
+	}
+}
+
 int
 edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
 	uint64_t symbols[256/64], fsm_state_t state)
@@ -228,25 +380,21 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
 	    state, (void *)set);
 #endif
 
-	/* Linear search for a group with the same destination
-	 * state, or the position where that group would go. */
-	for (i = 0; i < set->count; i++) {
+	switch (find_state_position(set, state, &i)) {
+	case FSP_FOUND_VALUE_PRESENT:
+		assert(i < set->count);
+		/* This API does not indicate whether that
+		 * symbol -> to edge was already present. */
 		eg = &set->groups[i];
-
-		if (eg->to == state) {
-			/* This API does not indicate whether that
-			 * symbol -> to edge was already present. */
-			size_t i;
-			for (i = 0; i < 256/64; i++) {
-				eg->symbols[i] |= symbols[i];
-			}
-			dump_edge_set(set);
-			return 1;
-		} else if (eg->to > state) {
-			break;	/* will shift down and insert below */
-		} else {
-			continue;
+		for (i = 0; i < 256/64; i++) {
+			eg->symbols[i] |= symbols[i];
 		}
+		dump_edge_set(set);
+		return 1;
+
+		break;
+	case FSP_FOUND_INSERT_POSITION:
+		break;		/* continue below */
 	}
 
 	/* insert/append at i */

From 7122d2fe739e106be8b422206b15e681027acd11 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Tue, 29 Aug 2023 17:19:24 -0400
Subject: [PATCH 07/18] edgeset: Commit to using binary search.

When I run `time ./re -rpcre -C '^[ab]{0,2000}$'` locally for -O3:
- linear search: 2.991s
- binary search: 1.521s
---
 src/adt/edgeset.c | 59 +----------------------------------------------
 1 file changed, 1 insertion(+), 58 deletions(-)

diff --git a/src/adt/edgeset.c b/src/adt/edgeset.c
index 818b78493..dd3b8853c 100644
--- a/src/adt/edgeset.c
+++ b/src/adt/edgeset.c
@@ -194,7 +194,7 @@ enum fsp_res {
  * which includes the position immediately following the last entry. Return an enum
  * which indicates whether state is already present. */
 static enum fsp_res
-find_state_position_bsearch(const struct edge_set *set, fsm_state_t state, size_t *dst)
+find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst)
 {
 	size_t lo = 0, hi = set->count;
 	if (LOG_BSEARCH) {
@@ -279,63 +279,6 @@ find_state_position_bsearch(const struct edge_set *set, fsm_state_t state, size_
 	return FSP_FOUND_INSERT_POSITION;
 }
 
-static enum fsp_res
-find_state_position_linear(const struct edge_set *set, fsm_state_t state, size_t *dst)
-{
-	/* Linear search for a group with the same destination
-	 * state, or the position where that group would go. */
-	size_t i;
-	for (i = 0; i < set->count; i++) {
-		const struct edge_group *eg = &set->groups[i];
-		if (eg->to == state) {
-			*dst = i;
-			return FSP_FOUND_VALUE_PRESENT;
-		} else if (eg->to > state) {
-			break;	/* will shift down and insert below */
-		} else {
-			continue;
-		}
-	}
-
-	*dst = i;
-	return FSP_FOUND_INSERT_POSITION;
-}
-
-/* Find the state in the edge set, or where it would be inserted if not present. */
-static enum fsp_res
-find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst)
-{
-	/* 0: linear, 1: bsearch, -1: call both, to check result */
-#define USE_BSEARCH 1
-
-	switch (USE_BSEARCH) {
-	case 0:
-		return find_state_position_linear(set, state, dst);
-	case 1:
-		return find_state_position_bsearch(set, state, dst);
-	case -1:
-	{
-		size_t dst_linear, dst_bsearch;
-		enum fsp_res res_linear = find_state_position_linear(set, state, &dst_linear);
-		enum fsp_res res_bsearch = find_state_position_bsearch(set, state, &dst_bsearch);
-
-		if (res_linear != res_bsearch || dst_linear != dst_bsearch) {
-			fprintf(stderr, "%s: disagreement for state %d: linear res %d, dst %zu, bsearch res %d, dst %zu\n",
-			    __func__, state,
-			    res_linear, dst_linear,
-			    res_bsearch, dst_bsearch);
-			for (size_t i = 0; i < set->count; i++) {
-				fprintf(stderr, "set->groups[%zu].to: %d\n", i, set->groups[i].to);
-			}
-		}
-		assert(res_linear == res_bsearch);
-		assert(dst_linear == dst_bsearch);
-		*dst = dst_linear;
-		return res_linear;
-	}
-	}
-}
-
 int
 edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
 	uint64_t symbols[256/64], fsm_state_t state)

From 937585a955d277231d558fe5ae20b0cc40c87eda Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Wed, 30 Aug 2023 12:08:44 -0400
Subject: [PATCH 08/18] determinise: Drastically reduce calls to qsort.

After the other changes in this PR, calls to qsort from
`sort_and_dedup_dst_buf` are one of the largest remaining hotspots in
the profile. We can often avoid calling qsort, though:

- If there is <= 1 entry, just return, it's sorted.

- Otherwise, first do a sweep through the array noting the min and max
  values. Unless there is a huge range between them, it's much faster to
  build a bitset from them in a small (max 10KB) stack-allocated array
  and then unpack the bitset (now sorted and unique). Only the needed
  portion of the array is initialized.

I have not done a lot of experimentation to find a cutoff point where
the bitset becomes slower than qsort (it may be much larger), I picked
10KB because it's likely to be safe to stack-allocate.

I tried changing the bitset unpacking to use an 8 or 16 bit mask and
jump forward faster through large sub-word ranges of 0 bits, but any
improvement was lost among random variation, so I decided it wasn't
worth the extra complexity. We already skip whole words that are 0.
---
 src/libfsm/determinise.c | 95 ++++++++++++++++++++++++++++++++--------
 1 file changed, 77 insertions(+), 18 deletions(-)

diff --git a/src/libfsm/determinise.c b/src/libfsm/determinise.c
index 56e135afd..ac15f05a8 100644
--- a/src/libfsm/determinise.c
+++ b/src/libfsm/determinise.c
@@ -2016,28 +2016,87 @@ static void
 sort_and_dedup_dst_buf(fsm_state_t *buf, size_t *used)
 {
 	const size_t orig_used = *used;
-	qsort(buf, orig_used, sizeof(buf[0]), cmp_fsm_state_t);
-
-	/* squash out duplicates */
-	size_t rd = 1;
-	size_t wr = 1;
-	while (rd < orig_used) {
-		if (buf[rd - 1] == buf[rd]) {
-			rd++;	/* skip */
-		} else {
-			buf[wr] = buf[rd];
-			rd++;
-			wr++;
-		}
+
+	if (orig_used <= 1) {
+		return;		/* no change */
 	}
 
-	*used = wr;
-#if EXPENSIVE_CHECKS
-	assert(wr <= orig_used);
-	for (size_t i = 1; i < *used; i++) {
-		assert(buf[i - 1] < buf[i]);
+	/* Figure out what the min and max values are, because
+	 * when the difference between them is not too large it
+	 * can be significantly faster to avoid qsort here. */
+	fsm_state_t min = (fsm_state_t)-1;
+	fsm_state_t max = 0;
+	for (size_t i = 0; i < orig_used; i++) {
+		const fsm_state_t cur = buf[i];
+		if (cur < min) { min = cur; }
+		if (cur > max) { max = cur; }
 	}
+
+	/* If there's only one unique value, then we're done. */
+	if (min == max) {
+		buf[0] = min;
+		*used = 1;
+		return;
+	}
+
+/* 81920 = 10 KB buffer on the stack. This must be divisible by 64.
+ * Set to 0 to disable. */
+#define QSORT_CUTOFF 81920
+
+	if (QSORT_CUTOFF == 0 || max - min > QSORT_CUTOFF) {
+		/* If the bitset would be very large but sparse due to
+		 * extreme values, then fall back on using qsort and
+		 * then sweeping over the array to squash out
+		 * duplicates. */
+		qsort(buf, orig_used, sizeof(buf[0]), cmp_fsm_state_t);
+
+		/* squash out duplicates */
+		size_t rd = 1;
+		size_t wr = 1;
+		while (rd < orig_used) {
+			if (buf[rd - 1] == buf[rd]) {
+				rd++;	/* skip */
+			} else {
+				buf[wr] = buf[rd];
+				rd++;
+				wr++;
+			}
+		}
+
+		*used = wr;
+#if EXPENSIVE_CHECKS
+		assert(wr <= orig_used);
+		for (size_t i = 1; i < *used; i++) {
+			assert(buf[i - 1] < buf[i]);
+		}
 #endif
+	} else {
+		/* Convert the array into a bitset and back, which sorts
+		 * and deduplicates in the process. Add 1 to avoid a zero-
+		 * zero-length array error if QSORT_CUTOFF is 0. */
+		uint64_t bitset[QSORT_CUTOFF/64 + 1];
+		const size_t words = u64bitset_words(max - min);
+		memset(bitset, 0x00, words * sizeof(bitset[0]));
+
+		for (size_t i = 0; i < orig_used; i++) {
+			u64bitset_set(bitset, buf[i] - min);
+		}
+
+		size_t dst = 0;
+		for (size_t i = 0; i < words; i++) {
+			const uint64_t w = bitset[i];
+			if (w != 0) { /* skip empty words */
+				uint64_t bit = 0x1;
+				for (size_t b_i = 0; b_i < 64; b_i++, bit <<= 1) {
+					if (w & bit) {
+						buf[dst] = 64*i + b_i + min;
+						dst++;
+					}
+				}
+			}
+		}
+		*used = dst;
+	}
 }
 
 static int

From 30e34ef9a739c78f788753919d68d6925171a0d5 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Wed, 30 Aug 2023 12:17:56 -0400
Subject: [PATCH 09/18] edgeset: Remove stale comment.

---
 src/adt/edgeset.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/adt/edgeset.c b/src/adt/edgeset.c
index dd3b8853c..9658213c8 100644
--- a/src/adt/edgeset.c
+++ b/src/adt/edgeset.c
@@ -326,8 +326,6 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
 	switch (find_state_position(set, state, &i)) {
 	case FSP_FOUND_VALUE_PRESENT:
 		assert(i < set->count);
-		/* This API does not indicate whether that
-		 * symbol -> to edge was already present. */
 		eg = &set->groups[i];
 		for (i = 0; i < 256/64; i++) {
 			eg->symbols[i] |= symbols[i];

From cf6051f26135802784795465232369e5dfc66c66 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Wed, 30 Aug 2023 12:46:35 -0400
Subject: [PATCH 10/18] UBSan: Avoid implicit signed/unsigned conversion.

---
 src/libre/ast_compile.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/libre/ast_compile.c b/src/libre/ast_compile.c
index 502faf8b4..aa0902d55 100644
--- a/src/libre/ast_compile.c
+++ b/src/libre/ast_compile.c
@@ -208,11 +208,11 @@ addedge_literal(struct comp_env *env, enum re_flags re_flags,
 	assert(to < env->fsm->statecount);
 
 	if (re_flags & RE_ICASE) {
-		if (!fsm_addedge_literal(fsm, from, to, tolower((unsigned char) c))) {
+		if (!fsm_addedge_literal(fsm, from, to, (char)tolower((unsigned char) c))) {
 			return 0;
 		}
 		
-		if (!fsm_addedge_literal(fsm, from, to, toupper((unsigned char) c))) {
+		if (!fsm_addedge_literal(fsm, from, to, (char)toupper((unsigned char) c))) {
 			return 0;
 		}
 	} else {

From c1e12828b2e79a11b61978d809a0e020f5d8a6e6 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Wed, 30 Aug 2023 12:49:22 -0400
Subject: [PATCH 11/18] UBSan: Avoid implicit signed/unsigned conversion.

---
 src/retest/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/retest/main.c b/src/retest/main.c
index b6b4c52f7..e01c93e7c 100644
--- a/src/retest/main.c
+++ b/src/retest/main.c
@@ -393,7 +393,7 @@ parse_escapes(char *s, char **errpos, int *lenp)
 
 					ndig++;
 				} else {
-					s[j++] = ccode;
+					s[j++] = (char)ccode;
 					st = ST_BARE;
 
 					if (!hexcurly) {

From 6eff0f98ce625e93ba025f0f4c7358153708cc92 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Wed, 30 Aug 2023 13:17:12 -0400
Subject: [PATCH 12/18] bugfix: The range is min..max inclusive, so add 1.

If min and max are exactly 64 states apart the upper value was getting
silently dropped due to an incorrect `words` value here.

One of the patterns in the PCRE suite triggers this:

    ./re -rpcre '(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))' "caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"

This should match, but did not.
---
 src/libfsm/determinise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/libfsm/determinise.c b/src/libfsm/determinise.c
index ac15f05a8..3dbdc429e 100644
--- a/src/libfsm/determinise.c
+++ b/src/libfsm/determinise.c
@@ -2075,7 +2075,7 @@ sort_and_dedup_dst_buf(fsm_state_t *buf, size_t *used)
 		 * and deduplicates in the process. Add 1 to avoid a zero-
 		 * zero-length array error if QSORT_CUTOFF is 0. */
 		uint64_t bitset[QSORT_CUTOFF/64 + 1];
-		const size_t words = u64bitset_words(max - min);
+		const size_t words = u64bitset_words(max - min + 1);
 		memset(bitset, 0x00, words * sizeof(bitset[0]));
 
 		for (size_t i = 0; i < orig_used; i++) {

From 98ee906d15426150020070db975778bf5f97a565 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Thu, 15 Jun 2023 14:00:25 -0400
Subject: [PATCH 13/18] Address a couple warnings from scan-build.

determinise: It's not possible to find a cached result in the hash
table without allocating a to-set buffer first, so assert that it
will be non-NULL.

fsm_findmode: This should never be used on a state without edges.

vm/v1.c and vm/v2.c: Free allocated return value on error.
---
 src/libfsm/determinise.c | 2 ++
 src/libfsm/mode.c        | 4 ++++
 src/libfsm/vm/v1.c       | 4 +++-
 src/libfsm/vm/v2.c       | 5 ++++-
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/libfsm/determinise.c b/src/libfsm/determinise.c
index 3dbdc429e..9c77f3a42 100644
--- a/src/libfsm/determinise.c
+++ b/src/libfsm/determinise.c
@@ -1339,6 +1339,7 @@ to_set_htab_check(struct analyze_closures_env *env,
 		if (b->count == 0) {
 			return 0; /* empty bucket -> not found */
 		} else if (b->count == count) {
+			assert(env->to_sets.buf != NULL);
 			assert(b->offset + count <= env->to_sets.used);
 			const fsm_state_t *ids = &env->to_sets.buf[b->offset];
 			if (0 == memcmp(ids, dst, count * sizeof(dst[0]))) {
@@ -1465,6 +1466,7 @@ save_to_set(struct analyze_closures_env *env,
 		env->to_sets.ceil = nceil;
 		env->to_sets.buf = nbuf;
 	}
+	assert(env->to_sets.buf != NULL);
 
 #if LOG_TO_SET
 	static size_t to_set_id;
diff --git a/src/libfsm/mode.c b/src/libfsm/mode.c
index 76c60b8ad..87af0bdf9 100644
--- a/src/libfsm/mode.c
+++ b/src/libfsm/mode.c
@@ -28,6 +28,7 @@ fsm_findmode(const struct fsm *fsm, fsm_state_t state, unsigned int *freq)
 	} mode;
 
 	mode.freq = 1;
+	mode.state = (fsm_state_t)-1;
 
 	edge_set_group_iter_reset(fsm->states[state].edges, EDGE_GROUP_ITER_ALL, &iter);
 	while (edge_set_group_iter_next(&iter, &info)) {
@@ -46,6 +47,9 @@ fsm_findmode(const struct fsm *fsm, fsm_state_t state, unsigned int *freq)
 		*freq = mode.freq;
 	}
 
+	/* It's not meaningful to call this on a state without edges. */
+	assert(mode.state != (fsm_state_t)-1);
+
 	assert(mode.freq >= 1);
 	return mode.state;
 }
diff --git a/src/libfsm/vm/v1.c b/src/libfsm/vm/v1.c
index a326b88d8..de1f6ea93 100644
--- a/src/libfsm/vm/v1.c
+++ b/src/libfsm/vm/v1.c
@@ -217,7 +217,9 @@ encode_opasm_v1(const struct dfavm_vm_op *instr, size_t ninstr, size_t total_byt
 	return ret;
 
 error:
-	/* XXX - cleanup */
+	if (ret != NULL) {
+		free(ret);
+	}
 	return NULL;
 }
 
diff --git a/src/libfsm/vm/v2.c b/src/libfsm/vm/v2.c
index c85edff98..07eb12ef4 100644
--- a/src/libfsm/vm/v2.c
+++ b/src/libfsm/vm/v2.c
@@ -155,7 +155,10 @@ encode_opasm_v2(const struct dfavm_vm_op *instr, size_t ninstr)
 	return ret;
 
 error:
-	/* XXX - cleanup */
+	if (ret != NULL) {
+		free(ret);
+	}
+
 	return NULL;
 }
 

From 51892e3745fd642f6bb8a6acd31b69cf065a43b3 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Thu, 16 Feb 2023 13:52:31 -0500
Subject: [PATCH 14/18] Add src/adt/idmap.c, a state -> ID set map.

---
 Makefile                  |   1 +
 include/adt/idmap.h       |  58 ++++++
 src/adt/Makefile          |   1 +
 src/adt/idmap.c           | 392 ++++++++++++++++++++++++++++++++++++++
 tests/idmap/Makefile      |  19 ++
 tests/idmap/idmap_basic.c | 136 +++++++++++++
 6 files changed, 607 insertions(+)
 create mode 100644 include/adt/idmap.h
 create mode 100644 src/adt/idmap.c
 create mode 100644 tests/idmap/Makefile
 create mode 100644 tests/idmap/idmap_basic.c

diff --git a/Makefile b/Makefile
index 8d742883e..f1f4f1396 100644
--- a/Makefile
+++ b/Makefile
@@ -108,6 +108,7 @@ SUBDIR += src
 SUBDIR += tests/capture
 SUBDIR += tests/complement
 SUBDIR += tests/gen
+SUBDIR += tests/idmap
 SUBDIR += tests/intersect
 #SUBDIR += tests/ir # XXX: fragile due to state numbering
 SUBDIR += tests/eclosure
diff --git a/include/adt/idmap.h b/include/adt/idmap.h
new file mode 100644
index 000000000..064fd15d1
--- /dev/null
+++ b/include/adt/idmap.h
@@ -0,0 +1,58 @@
+#ifndef IDMAP_H
+#define IDMAP_H
+
+/* Mapping between one fsm_state_t and a set of
+ * unsigned IDs. The implementation assumes that both
+ * IDs are sequentially assigned and don't need a sparse
+ * mapping -- it will handle 10 -> [1, 3, 47] well, but
+ * not 1000000 -> [14, 524288, 1073741823]. */
+
+#include <stdlib.h>
+
+#include "fsm/fsm.h"
+#include "fsm/alloc.h"
+
+struct idmap;			/* Opaque handle. */
+
+struct idmap *
+idmap_new(const struct fsm_alloc *alloc);
+
+void
+idmap_free(struct idmap *m);
+
+/* Associate a value with a state (if not already present.)
+ * Returns 1 on success, or 0 on allocation failure. */
+int
+idmap_set(struct idmap *m, fsm_state_t state_id, unsigned value);
+
+/* How many values are associated with an ID? */
+size_t
+idmap_get_value_count(const struct idmap *m, fsm_state_t state_id);
+
+/* Get the values associated with an ID.
+ *
+ * Returns 1 on success and writes them into the buffer, in ascending
+ * order, with the count in *written (if non-NULL).
+ *
+ * Returns 0 on error (insufficient buffer space). */
+int
+idmap_get(const struct idmap *m, fsm_state_t state_id,
+	size_t buf_size, unsigned *buf, size_t *written);
+
+/* Iterator callback. */
+typedef void
+idmap_iter_fun(fsm_state_t state_id, unsigned value, void *opaque);
+
+/* Iterate over the ID map. State IDs may be yielded out of order,
+ * values will be in ascending order. */
+void
+idmap_iter(const struct idmap *m,
+	idmap_iter_fun *cb, void *opaque);
+
+/* Iterate over the values associated with a single state
+ * (in ascending order). */
+void
+idmap_iter_for_state(const struct idmap *m, fsm_state_t state_id,
+	idmap_iter_fun *cb, void *opaque);
+
+#endif
diff --git a/src/adt/Makefile b/src/adt/Makefile
index 05199f2dc..64ad7429f 100644
--- a/src/adt/Makefile
+++ b/src/adt/Makefile
@@ -2,6 +2,7 @@
 
 SRC += src/adt/alloc.c
 SRC += src/adt/bitmap.c
+SRC += src/adt/idmap.c
 SRC += src/adt/internedstateset.c
 SRC += src/adt/priq.c
 SRC += src/adt/path.c
diff --git a/src/adt/idmap.c b/src/adt/idmap.c
new file mode 100644
index 000000000..ca169b71e
--- /dev/null
+++ b/src/adt/idmap.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2021 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include "adt/idmap.h"
+
+#include "adt/alloc.h"
+#include "adt/hash.h"
+#include "adt/u64bitset.h"
+
+#include <stdint.h>
+#include <assert.h>
+#include <stdio.h>
+
+#define NO_STATE ((fsm_state_t)-1)
+
+#define DEF_BUCKET_COUNT 4
+
+struct idmap {
+	const struct fsm_alloc *alloc;
+	unsigned bucket_count;
+	unsigned buckets_used;
+
+	/* All buckets' values are assumed to be large
+	 * enough to store this value, and they will all
+	 * grow as necessary. */
+	unsigned max_value;
+
+	/* Basic linear-probing, add-only hash table. */
+	struct idmap_bucket {
+		fsm_state_t state; /* Key. NO_STATE when empty. */
+
+		/* values[] is always either NULL or has at least
+		 * max_value + 1 bits; all grow on demand. */
+		uint64_t *values;
+	} *buckets;
+};
+
+static unsigned
+value_words(unsigned max_value) {
+	if (max_value == 0) {
+		/* Still allocate one word, for storing 0. */
+		return 1;
+	} else {
+		return u64bitset_words(max_value);
+	}
+}
+
+struct idmap *
+idmap_new(const struct fsm_alloc *alloc)
+{
+	struct idmap *res = NULL;
+	struct idmap_bucket *buckets = NULL;
+
+	res = f_malloc(alloc, sizeof(*res));
+	if (res == NULL) {
+		goto cleanup;
+	}
+
+	buckets = f_calloc(alloc,
+	    DEF_BUCKET_COUNT, sizeof(buckets[0]));
+	if (buckets == NULL) {
+		goto cleanup;
+	}
+
+	for (size_t i = 0; i < DEF_BUCKET_COUNT; i++) {
+		buckets[i].state = NO_STATE;
+	}
+
+	res->alloc = alloc;
+	res->buckets_used = 0;
+        res->bucket_count = DEF_BUCKET_COUNT;
+	res->max_value = 0;
+	res->buckets = buckets;
+
+	return res;
+
+cleanup:
+	f_free(alloc, res);
+	f_free(alloc, buckets);
+	return NULL;
+}
+
+void
+idmap_free(struct idmap *m)
+{
+	if (m == NULL) {
+		return;
+	}
+
+	for (size_t i = 0; i < m->bucket_count; i++) {
+		if (m->buckets[i].state == NO_STATE) {
+			continue;
+		}
+		f_free(m->alloc, m->buckets[i].values);
+	}
+
+	f_free(m->alloc, m->buckets);
+	f_free(m->alloc, m);
+}
+
+static int
+grow_bucket_values(struct idmap *m, unsigned old_words, unsigned new_words)
+{
+	assert(new_words > old_words);
+
+	for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
+		struct idmap_bucket *b = &m->buckets[b_i];
+		if (b->state == NO_STATE) {
+			assert(b->values == NULL);
+			continue;
+		}
+
+		uint64_t *nv = f_calloc(m->alloc,
+		    new_words, sizeof(nv[0]));
+		if (nv == NULL) {
+			return 0;
+		}
+
+		for (size_t w_i = 0; w_i < old_words; w_i++) {
+			nv[w_i] = b->values[w_i];
+		}
+		f_free(m->alloc, b->values);
+		b->values = nv;
+	}
+	return 1;
+}
+
+static int
+grow_buckets(struct idmap *m)
+{
+	const size_t ocount = m->bucket_count;
+	const size_t ncount = 2*ocount;
+	assert(ncount > m->bucket_count);
+
+	struct idmap_bucket *nbuckets = f_calloc(m->alloc,
+	    ncount, sizeof(nbuckets[0]));
+	if (nbuckets == NULL) {
+		return 0;
+	}
+	for (size_t nb_i = 0; nb_i < ncount; nb_i++) {
+		nbuckets[nb_i].state = NO_STATE;
+	}
+
+	const size_t nmask = ncount - 1;
+
+	for (size_t ob_i = 0; ob_i < ocount; ob_i++) {
+		const struct idmap_bucket *ob = &m->buckets[ob_i];
+		if (ob->state == NO_STATE) {
+			continue;
+		}
+
+		const uint64_t h = hash_id(ob->state);
+		for (size_t nb_i = 0; nb_i < ncount; nb_i++) {
+			struct idmap_bucket *nb = &nbuckets[(h + nb_i) & nmask];
+			if (nb->state == NO_STATE) {
+				nb->state = ob->state;
+				nb->values = ob->values;
+				break;
+			} else {
+				assert(nb->state != ob->state);
+				/* collision */
+				continue;
+			}
+		}
+	}
+
+	f_free(m->alloc, m->buckets);
+
+	m->buckets = nbuckets;
+	m->bucket_count = ncount;
+
+	return 1;
+}
+
+int
+idmap_set(struct idmap *m, fsm_state_t state_id,
+    unsigned value)
+{
+	assert(state_id != NO_STATE);
+
+	const uint64_t h = hash_id(state_id);
+	if (value > m->max_value) {
+		const unsigned ovw = value_words(m->max_value);
+		const unsigned nvw = value_words(value);
+		/* If this value won't fit in the existing value
+		 * arrays, then grow them all. We do not track the
+		 * number of bits in each individual array. */
+		if (nvw > ovw && !grow_bucket_values(m, ovw, nvw)) {
+			return 0;
+		}
+		m->max_value = value;
+	}
+
+	assert(m->max_value >= value);
+
+	if (m->buckets_used >= m->bucket_count/2) {
+		if (!grow_buckets(m)) {
+			return 0;
+		}
+	}
+
+	const uint64_t mask = m->bucket_count - 1;
+	for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
+		struct idmap_bucket *b = &m->buckets[(h + b_i) & mask];
+		if (b->state == state_id) {
+			assert(b->values != NULL);
+			u64bitset_set(b->values, value);
+			return 1;
+		} else if (b->state == NO_STATE) {
+			b->state = state_id;
+			assert(b->values == NULL);
+
+			const unsigned vw = value_words(m->max_value);
+			b->values = f_calloc(m->alloc,
+			    vw, sizeof(b->values[0]));
+			if (b->values == NULL) {
+				return 0;
+			}
+			m->buckets_used++;
+
+			u64bitset_set(b->values, value);
+			return 1;
+		} else {
+			continue; /* collision */
+		}
+
+	}
+
+	assert(!"unreachable");
+	return 0;
+}
+
+static const struct idmap_bucket *
+get_bucket(const struct idmap *m, fsm_state_t state_id)
+{
+	const uint64_t h = hash_id(state_id);
+	const uint64_t mask = m->bucket_count - 1;
+	for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
+		const struct idmap_bucket *b = &m->buckets[(h + b_i) & mask];
+		if (b->state == NO_STATE) {
+			return NULL;
+		} else if (b->state == state_id) {
+			return b;
+		}
+	}
+
+	return NULL;
+}
+
+size_t
+idmap_get_value_count(const struct idmap *m, fsm_state_t state_id)
+{
+	const struct idmap_bucket *b = get_bucket(m, state_id);
+	if (b == NULL) {
+		return 0;
+	}
+	assert(b->values != NULL);
+
+	size_t res = 0;
+	const size_t words = value_words(m->max_value);
+	for (size_t w_i = 0; w_i < words; w_i++) {
+		const uint64_t w = b->values[w_i];
+		/* This could use popcount64(w). */
+		if (w == 0) {
+			continue;
+		}
+		for (uint64_t bit = 1; bit; bit <<= 1) {
+			if (w & bit) {
+				res++;
+			}
+		}
+	}
+
+	return res;
+}
+
+int
+idmap_get(const struct idmap *m, fsm_state_t state_id,
+    size_t buf_size, unsigned *buf, size_t *written)
+{
+	const struct idmap_bucket *b = get_bucket(m, state_id);
+	if (b == NULL) {
+		if (written != NULL) {
+			*written = 0;
+		}
+		return 1;
+	}
+
+	size_t buf_offset = 0;
+	const size_t words = value_words(m->max_value);
+	for (size_t w_i = 0; w_i < words; w_i++) {
+		const uint64_t w = b->values[w_i];
+		if (w == 0) {
+			continue;
+		}
+
+		for (uint64_t b_i = 0; b_i < 64; b_i++) {
+			if (w & ((uint64_t)1 << b_i)) {
+				if (buf_offset * sizeof(buf[0]) >= buf_size) {
+					return 0;
+				}
+				buf[buf_offset] = 64*w_i + b_i;
+				buf_offset++;
+			}
+		}
+	}
+
+	if (written != NULL) {
+		*written = buf_offset;
+	}
+	return 1;
+}
+
+void
+idmap_iter(const struct idmap *m,
+    idmap_iter_fun *cb, void *opaque)
+{
+	const size_t words = value_words(m->max_value);
+
+	for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
+		const struct idmap_bucket *b = &m->buckets[b_i];
+		if (b->state == NO_STATE) {
+			continue;
+		}
+
+		for (size_t w_i = 0; w_i < words; w_i++) {
+			const uint64_t w = b->values[w_i];
+			if (w == 0) {
+				continue;
+			}
+			for (uint64_t b_i = 0; b_i < 64; b_i++) {
+				if (w & ((uint64_t)1 << b_i)) {
+					const unsigned v = 64*w_i + b_i;
+					cb(b->state, v, opaque);
+				}
+			}
+		}
+	}
+}
+
+void
+idmap_iter_for_state(const struct idmap *m, fsm_state_t state_id,
+    idmap_iter_fun *cb, void *opaque)
+{
+	const size_t words = value_words(m->max_value);
+	const struct idmap_bucket *b = get_bucket(m, state_id);
+	if (b == NULL) {
+		return;
+	}
+
+	for (size_t w_i = 0; w_i < words; w_i++) {
+		const uint64_t w = b->values[w_i];
+		if (w == 0) {
+			continue;
+		}
+		/* if N contiguous bits are all zero, skip them all at once */
+#define BLOCK_BITS 16
+		uint64_t block = ((uint64_t)1 << BLOCK_BITS) - 1;
+		size_t block_count = 0;
+
+		uint64_t b_i = 0;
+		while (b_i < 64) {
+			if ((w & block) == 0) {
+				block <<= BLOCK_BITS;
+				b_i += BLOCK_BITS;
+				continue;
+			}
+
+			if (w & ((uint64_t)1 << b_i)) {
+				const unsigned v = 64*w_i + b_i;
+				cb(b->state, v, opaque);
+				block_count++;
+			}
+			b_i++;
+			block <<= 1;
+		}
+
+#define CHECK 0
+#if CHECK
+		size_t check_count = 0;
+		for (uint64_t b_i = 0; b_i < 64; b_i++) {
+			if (w & ((uint64_t)1 << b_i)) {
+				check_count++;
+			}
+		}
+		assert(block_count == check_count);
+#endif
+	}
+}
diff --git a/tests/idmap/Makefile b/tests/idmap/Makefile
new file mode 100644
index 000000000..aee01f565
--- /dev/null
+++ b/tests/idmap/Makefile
@@ -0,0 +1,19 @@
+.include "../../share/mk/top.mk"
+
+TEST.tests/idmap != ls -1 tests/idmap/idmap*.c
+TEST_SRCDIR.tests/idmap = tests/idmap
+TEST_OUTDIR.tests/idmap = ${BUILD}/tests/idmap
+
+.for n in ${TEST.tests/idmap:T:R:C/^idmap//}
+INCDIR.${TEST_SRCDIR.tests/idmap}/idmap${n}.c += src/adt
+.endfor
+
+.for n in ${TEST.tests/idmap:T:R:C/^idmap//}
+test:: ${TEST_OUTDIR.tests/idmap}/res${n}
+SRC += ${TEST_SRCDIR.tests/idmap}/idmap${n}.c
+CFLAGS.${TEST_SRCDIR.tests/idmap}/idmap${n}.c += -UNDEBUG -D_DEFAULT_SOURCE -std=c99
+${TEST_OUTDIR.tests/idmap}/run${n}: ${TEST_OUTDIR.tests/idmap}/idmap${n}.o ${BUILD}/lib/adt.o
+	${CC} ${CFLAGS} ${CFLAGS.${TEST_SRCDIR.tests/idmap}/idmap${n}.c} -o ${TEST_OUTDIR.tests/idmap}/run${n} ${TEST_OUTDIR.tests/idmap}/idmap${n}.o ${BUILD}/lib/adt.o
+${TEST_OUTDIR.tests/idmap}/res${n}: ${TEST_OUTDIR.tests/idmap}/run${n}
+	( ${TEST_OUTDIR.tests/idmap}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/idmap}/res${n}
+.endfor
diff --git a/tests/idmap/idmap_basic.c b/tests/idmap/idmap_basic.c
new file mode 100644
index 000000000..19f44d56e
--- /dev/null
+++ b/tests/idmap/idmap_basic.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2021 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <adt/idmap.h>
+
+#define DEF_LIMIT 10
+#define DEF_SEED 0
+
+/* Thes numbers were chose to get a reasonable variety,
+ * but also some duplicated values as the input grows. */
+#define MAX_GEN_VALUES 23
+#define ID_MASK ((1 << 9) - 1)
+#define VALUE_MASK ((1 << 10) - 1)
+
+static void
+dump_cb(fsm_state_t state_id, unsigned value, void *opaque)
+{
+	/* fprintf(stderr, " -- state %d, value %u\n", state_id, value); */
+	assert(state_id <= ID_MASK);
+	assert(value <= VALUE_MASK);
+	(void)opaque;
+}
+
+static int
+cmp_u(const void *pa, const void *pb)
+{
+	const unsigned a = *(unsigned *)pa;
+	const unsigned b = *(unsigned *)pb;
+	return a < b ? -1 : a > b ? 1 : 0;
+}
+
+int main(int argc, char **argv) {
+	const size_t limit = (argc > 1 ? atoi(argv[1]) : DEF_LIMIT);
+	const unsigned seed = (argc > 2 ? atoi(argv[2]) : DEF_SEED);
+
+	(void)argc;
+	(void)argv;
+	struct idmap *m = idmap_new(NULL);
+
+	srandom(seed);
+
+	/* Fill the table with random data */
+	for (size_t id_i = 0; id_i < limit; id_i++) {
+		const fsm_state_t id = (fsm_state_t)(random() & ID_MASK);
+		const size_t value_count = random() % MAX_GEN_VALUES;
+
+		for (size_t v_i = 0; v_i < value_count; v_i++) {
+			const unsigned v = random() & VALUE_MASK;
+			if (!idmap_set(m, id, v)) {
+				assert(!"failed to set");
+			}
+		}
+	}
+
+	idmap_iter(m, dump_cb, NULL);
+
+	srandom(seed);
+
+	size_t got_buf_ceil = MAX_GEN_VALUES;
+	unsigned *got_buf = malloc(got_buf_ceil * sizeof(got_buf[0]));
+	assert(got_buf != NULL);
+
+	/* Reset the PRNG and read back the same data. */
+	for (size_t id_i = 0; id_i < limit; id_i++) {
+		const fsm_state_t id = (fsm_state_t)(random() & ID_MASK);
+		const size_t generated_value_count = random() % MAX_GEN_VALUES;
+
+		/* Note: This can occasionally differ from
+		 * generated_value_count, because the same id or values
+		 * may have been generated more than once. As long as
+		 * all the values match, it's fine. */
+		const size_t value_count = idmap_get_value_count(m, id);
+
+		if (value_count > got_buf_ceil) {
+			size_t nceil = got_buf_ceil;
+			while (nceil <= value_count) {
+				nceil *= 2;
+			}
+			free(got_buf);
+			got_buf = malloc(nceil * sizeof(got_buf[0]));
+			assert(got_buf != NULL);
+			got_buf_ceil = nceil;
+		}
+
+		size_t written;
+		if (!idmap_get(m, id,
+			got_buf_ceil * sizeof(got_buf[0]), got_buf,
+			&written)) {
+			assert(!"failed to get");
+		}
+		assert(written == value_count);
+
+		unsigned gen_buf[MAX_GEN_VALUES];
+
+		for (size_t v_i = 0; v_i < generated_value_count; v_i++) {
+			const unsigned v = random() & VALUE_MASK;
+			gen_buf[v_i] = v;
+		}
+		qsort(gen_buf, generated_value_count, sizeof(gen_buf[0]), cmp_u);
+
+		/* Every generated value should appear in the buffer.
+		 * There may be more in the buffer; ignore them. */
+		size_t v_i = 0;
+		for (size_t gen_i = 0; gen_i < generated_value_count; gen_i++) {
+			int found = 0;
+			const unsigned gv = gen_buf[gen_i];
+			assert(value_count <= got_buf_ceil);
+			/* got_buf should be sorted, so we can pick up where we left off */
+			while (v_i < value_count) {
+				if (gv == got_buf[v_i]) {
+					/* Intentionally don't increment v_i on match,
+					 * because gen_buf can repeat values. */
+					found = 1;
+					break;
+				}
+				v_i++;
+			}
+			if (!found) {
+				fprintf(stderr, "NOT FOUND: state %d -- value: %u\n",
+				    id, gv);
+				return EXIT_FAILURE;
+			}
+		}
+	}
+
+	free(got_buf);
+	idmap_free(m);
+	return EXIT_SUCCESS;
+}

From 7c6644f53b94c3be7246a67eaa985b03dfb0d526 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Mon, 10 Jul 2023 14:02:31 -0400
Subject: [PATCH 15/18] Remove theft test harness for deleted ADT (ipriq).

---
 theft/Makefile         |   1 -
 theft/fuzz_adt_ipriq.c | 197 -----------------------------------------
 2 files changed, 198 deletions(-)
 delete mode 100644 theft/fuzz_adt_ipriq.c

diff --git a/theft/Makefile b/theft/Makefile
index 0d38d8cfc..921c482a9 100644
--- a/theft/Makefile
+++ b/theft/Makefile
@@ -6,7 +6,6 @@ SRC += theft/util.c
 SRC += theft/wrap.c
 
 SRC += theft/fuzz_adt_edge_set.c
-SRC += theft/fuzz_adt_ipriq.c
 SRC += theft/fuzz_adt_priq.c
 SRC += theft/fuzz_capture_string_set.c
 SRC += theft/fuzz_literals.c
diff --git a/theft/fuzz_adt_ipriq.c b/theft/fuzz_adt_ipriq.c
deleted file mode 100644
index 1847ef6ce..000000000
--- a/theft/fuzz_adt_ipriq.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright 2021 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include "type_info_adt_ipriq.h"
-
-#include <adt/ipriq.h>
-#include <adt/xalloc.h>
-
-struct model {
-	size_t used;
-	size_t entries[];
-};
-
-static enum ipriq_cmp_res
-cmp_size_t(size_t a, size_t b, void *opaque)
-{
-	(void)opaque;
-	return a < b ? IPRIQ_CMP_LT :
-	    a > b ? IPRIQ_CMP_GT : IPRIQ_CMP_EQ;
-}
-
-static int exec_add(size_t x, struct model *m, struct ipriq *pq)
-{
-	if (!ipriq_add(pq, x)) {
-		return 0;
-	}
-
-	m->entries[m->used] = x;
-	m->used++;
-	return 1;
-}
-
-static int find_min_pos(const struct model *m, size_t *pos)
-{
-	size_t i;
-	if (m->used == 0) {
-		return 0;
-	}
-
-	size_t res, min;
-	res = 0;
-	min = m->entries[0];
-
-	for (i = 1; i < m->used; i++) {
-		if (m->entries[i] < min) {
-			res = i;
-			min = m->entries[i];
-		}
-	}
-	*pos = res;
-	return 1;
-}
-
-static int exec_peek(struct model *m, struct ipriq *pq)
-{
-	size_t res;
-
-	if (!ipriq_peek(pq, &res)) {
-		return m->used == 0;
-	}
-
-	size_t pos;
-	if (!find_min_pos(m, &pos)) {
-		assert(!"unreachable (peek)");
-	}
-
-	return res == m->entries[pos];
-}
-
-static int exec_pop(struct model *m, struct ipriq *pq)
-{
-	size_t res;
-
-	if (!ipriq_pop(pq, &res)) {
-		return m->used == 0;
-	}
-
-	size_t pos;
-	if (!find_min_pos(m, &pos)) {
-		assert(!"unreachable (pop)");
-	}
-
-	if (res != m->entries[pos]) {
-		return 0;
-	}
-
-	assert(m->used > 0);
-	if (pos < m->used - 1) {
-		m->entries[pos] = m->entries[m->used - 1];
-	}
-	m->used--;
-	return 1;
-}
-
-static enum theft_trial_res
-compare_against_model(const struct ipriq_scenario *scen)
-{
-	enum theft_trial_res res = THEFT_TRIAL_FAIL;
-	size_t i;
-
-	struct model *m = malloc(sizeof(*m)
-	    + scen->count * sizeof(m->entries[0]));
-	if (m == NULL) {
-		return THEFT_TRIAL_ERROR;
-	}
-	m->used = 0;
-
-	struct ipriq *pq = ipriq_new(NULL, cmp_size_t, NULL);
-	if (pq == NULL) {
-		return THEFT_TRIAL_ERROR;
-	}
-
-	for (i = 0; i < scen->count; i++) {
-		const struct ipriq_op *op = &scen->ops[i];
-
-		switch (op->t) {
-		case IPRIQ_OP_ADD:
-			if (!exec_add(op->u.add.x, m, pq)) {
-				goto cleanup;
-			}
-			break;
-
-		case IPRIQ_OP_PEEK:
-			if (!exec_peek(m, pq)) {
-				goto cleanup;
-			}
-			break;
-
-		case IPRIQ_OP_POP:
-			if (!exec_pop(m, pq)) {
-				goto cleanup;
-			}
-			break;
-
-		default:
-			assert(false); break;
-		}
-	}
-
-	res = THEFT_TRIAL_PASS;
-
-cleanup:
-	free(m);
-
-	return res;
-}
-
-static enum theft_trial_res
-prop_ipriq_model(struct theft *t, void *arg1)
-{
-	const struct ipriq_scenario *scen = arg1;
-	(void)t;
-	return compare_against_model(scen);
-}
-
-static bool
-test_ipriq(theft_seed seed, uintptr_t limit)
-{
-	enum theft_run_res res;
-
-	struct ipriq_hook_env env = {
-		.tag = 'I',
-		.limit = limit,
-	};
-
-	struct theft_run_config config = {
-		.name = __func__,
-		.prop1 = prop_ipriq_model,
-		.type_info = { &type_info_adt_ipriq },
-		.trials = 1000,
-		.hooks = {
-			.trial_pre  = theft_hook_first_fail_halt,
-			.env = &env,
-		},
-		.fork = {
-			.enable = true,
-		},
-
-		.seed = seed,
-	};
-
-	(void)limit;
-
-	res = theft_run(&config);
-	printf("%s: %s\n", __func__, theft_run_res_str(res));
-
-	return res == THEFT_RUN_PASS;
-}
-
-void
-register_test_adt_ipriq(void)
-{
-	reg_test1("adt_ipriq", test_ipriq, 10000);
-}

From c646868316ac8281ab38c33e2f9dae5cb7a5f7d8 Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Fri, 2 Jun 2023 11:41:28 -0400
Subject: [PATCH 16/18] Add pcre-anchor test for anchoring edge case.

---
 tests/pcre-anchor/in81.re   | 1 +
 tests/pcre-anchor/out81.fsm | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 tests/pcre-anchor/in81.re
 create mode 100644 tests/pcre-anchor/out81.fsm

diff --git a/tests/pcre-anchor/in81.re b/tests/pcre-anchor/in81.re
new file mode 100644
index 000000000..8b5fad7c3
--- /dev/null
+++ b/tests/pcre-anchor/in81.re
@@ -0,0 +1 @@
+($x)*
\ No newline at end of file
diff --git a/tests/pcre-anchor/out81.fsm b/tests/pcre-anchor/out81.fsm
new file mode 100644
index 000000000..2cdc2f023
--- /dev/null
+++ b/tests/pcre-anchor/out81.fsm
@@ -0,0 +1,5 @@
+0 -> 0 ?;
+0 -> 1 "\n";
+
+start: 0;
+end: 0, 1;
\ No newline at end of file

From 0789d614a22e2302132fb80f1fde4f6a056f793c Mon Sep 17 00:00:00 2001
From: Scott Vokes <svokes@fastly.com>
Date: Wed, 31 May 2023 17:14:35 -0400
Subject: [PATCH 17/18] fuzz/run_fuzzer: Run single seed file when given as
 argument.

---
 fuzz/run_fuzzer | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fuzz/run_fuzzer b/fuzz/run_fuzzer
index be8ba1d95..429ffa961 100755
--- a/fuzz/run_fuzzer
+++ b/fuzz/run_fuzzer
@@ -4,6 +4,8 @@ BUILD=../build
 FUZZER=${BUILD}/fuzz/fuzzer
 SEEDS=${BUILD}/fuzz/fuzzer_seeds
 
+ARG=$1
+
 SECONDS=${SECONDS:-60}
 WORKERS=${WORKERS:-4}
 SEEDS=${SEEDS:-seeds}
@@ -25,5 +27,9 @@ if [ ! -d "${SEEDS}" ]; then
    mkdir -p "${SEEDS}"
 fi
 
-echo "\n==== ${FUZZER}"
-${FUZZER} -jobs=${WORKERS} -workers=${WORKERS} -max_total_time=${SECONDS} ${SEEDS}
+if [ -z "${ARG}" ]; then
+    echo "\n==== ${FUZZER}"
+    exec ${FUZZER} -jobs=${WORKERS} -workers=${WORKERS} -max_total_time=${SECONDS} ${SEEDS}
+else
+    exec ${FUZZER} ${ARG}
+fi
\ No newline at end of file

From 1ca3726322abed8c02f7f16bffe8b32683189605 Mon Sep 17 00:00:00 2001
From: Kate F <kate@elide.org>
Date: Tue, 12 Sep 2023 12:14:20 +0100
Subject: [PATCH 18/18] Don't purge the seed cache for PRs syncing clones.

---
 .github/workflows/ci.yml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 429ec2523..27fdf78f7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -409,8 +409,15 @@ jobs:
         path: ${{ env.build }}
         key: build-${{ matrix.make }}-${{ matrix.os }}-${{ matrix.cc }}-${{ matrix.debug }}-${{ matrix.san }}-${{ github.sha }}
 
-    # note we do the fuzzing unconditionally; each run adds to the corpus
+    # note we do the fuzzing unconditionally; each run adds to the corpus.
+    #
+    # We only run fuzzing for PRs in the base repo, this prevents attempting
+    # to purge the seed cache from a PR syncing a forked repo, which fails
+    # due to a permissions error (I'm unsure why, I think PRs from clones can't
+    # purge a cache in CI presumably for security/DoS reasons). PRs from clones
+    # still run fuzzing, just from empty, and do not save their seeds.
     - name: Restore seeds (mode ${{ matrix.mode }})
+      if: github.repository == 'katef/libfsm'
       uses: actions/cache/restore@v3
       id: cache-seeds
       with: