Skip to content

Commit

Permalink
Speed up cluster merge by batch copying arrays
Browse files Browse the repository at this point in the history
merge_clusters was using zarray_add_all which was copying over elements
one-by-one doing 2 memcpys and a potential array resize per element.
Here we replace it by a range copy that does a single resize and memcpy
for the operation which is a lot faster.

In my testing it reduces the total runtime for an image that's 2000x3000
by 20%.
  • Loading branch information
bouk committed Jun 16, 2023
1 parent ed85cbc commit e1b143c
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 55 deletions.
73 changes: 32 additions & 41 deletions apriltag_quad_thresh.c
Original file line number Diff line number Diff line change
Expand Up @@ -1062,10 +1062,10 @@ static void do_quad_task(void *p)

for (int cidx = task->cidx0; cidx < task->cidx1; cidx++) {

zarray_t *cluster;
zarray_get(clusters, cidx, &cluster);
zarray_t **cluster;
zarray_get_volatile(clusters, cidx, &cluster);

if (zarray_size(cluster) < td->qtp.min_cluster_pixels)
if (zarray_size(*cluster) < td->qtp.min_cluster_pixels)
continue;

// a cluster should contain only boundary points around the
Expand All @@ -1074,14 +1074,14 @@ static void do_quad_task(void *p)
// fit quads to.) A typical point along an edge is added three
// times (because it has 3 neighbors). The maximum perimeter
// is 2w+2h.
if (zarray_size(cluster) > 3*(2*w+2*h)) {
if (zarray_size(*cluster) > 3*(2*w+2*h)) {
continue;
}

struct quad quad;
memset(&quad, 0, sizeof(struct quad));

if (fit_quad(td, task->im, cluster, &quad, task->tag_width, task->normal_border, task->reversed_border)) {
if (fit_quad(td, task->im, *cluster, &quad, task->tag_width, task->normal_border, task->reversed_border)) {
pthread_mutex_lock(&td->mutex);
zarray_add(quads, &quad);
pthread_mutex_unlock(&td->mutex);
Expand Down Expand Up @@ -1588,14 +1588,14 @@ zarray_t* do_gradient_clusters(image_u8_t* threshim, int ts, int y0, int y1, int
int n = end - start;
for (int j = 0; j < n - 1; j++) {
for (int k = 0; k < n - j - 1; k++) {
struct cluster_hash* hash1;
struct cluster_hash* hash2;
zarray_get(clusters, start + k, &hash1);
zarray_get(clusters, start + k + 1, &hash2);
if (hash1->id > hash2->id) {
struct cluster_hash tmp = *hash2;
*hash2 = *hash1;
*hash1 = tmp;
struct cluster_hash** hash1;
struct cluster_hash** hash2;
zarray_get_volatile(clusters, start + k, &hash1);
zarray_get_volatile(clusters, start + k + 1, &hash2);
if ((*hash1)->id > (*hash2)->id) {
struct cluster_hash tmp = **hash2;
**hash2 = **hash1;
**hash1 = tmp;
}
}
}
Expand Down Expand Up @@ -1626,38 +1626,29 @@ zarray_t* merge_clusters(zarray_t* c1, zarray_t* c2) {
int l2 = zarray_size(c2);

while (i1 < l1 && i2 < l2) {
struct cluster_hash* h1;
struct cluster_hash* h2;
zarray_get(c1, i1, &h1);
zarray_get(c2, i2, &h2);

if (h1->hash == h2->hash && h1->id == h2->id) {
zarray_add_all(h1->data, h2->data);
zarray_add(ret, &h1);
struct cluster_hash** h1;
struct cluster_hash** h2;
zarray_get_volatile(c1, i1, &h1);
zarray_get_volatile(c2, i2, &h2);

if ((*h1)->hash == (*h2)->hash && (*h1)->id == (*h2)->id) {
zarray_add_range((*h1)->data, (*h2)->data, 0, zarray_size((*h2)->data));
zarray_add(ret, h1);
i1++;
i2++;
zarray_destroy(h2->data);
free(h2);
} else if (h2->hash < h1->hash || (h2->hash == h1->hash && h2->id < h1->id)) {
zarray_add(ret, &h2);
zarray_destroy((*h2)->data);
free(*h2);
} else if ((*h2)->hash < (*h1)->hash || ((*h2)->hash == (*h1)->hash && (*h2)->id < (*h1)->id)) {
zarray_add(ret, h2);
i2++;
} else {
zarray_add(ret, &h1);
zarray_add(ret, h1);
i1++;
}
}

for (; i1 < l1; i1++) {
struct cluster_hash* h1;
zarray_get(c1, i1, &h1);
zarray_add(ret, &h1);
}

for (; i2 < l2; i2++) {
struct cluster_hash* h2;
zarray_get(c2, i2, &h2);
zarray_add(ret, &h2);
}
zarray_add_range(ret, c1, i1, l1);
zarray_add_range(ret, c2, i2, l2);

zarray_destroy(c1);
zarray_destroy(c2);
Expand Down Expand Up @@ -1716,10 +1707,10 @@ zarray_t* gradient_clusters(apriltag_detector_t *td, image_u8_t* threshim, int w
clusters = zarray_create(sizeof(zarray_t*));
zarray_ensure_capacity(clusters, zarray_size(clusters_list[0]));
for (int i = 0; i < zarray_size(clusters_list[0]); i++) {
struct cluster_hash* hash;
zarray_get(clusters_list[0], i, &hash);
zarray_add(clusters, &hash->data);
free(hash);
struct cluster_hash** hash;
zarray_get_volatile(clusters_list[0], i, &hash);
zarray_add(clusters, &(*hash)->data);
free(*hash);
}
zarray_destroy(clusters_list[0]);
free(clusters_list);
Expand Down
29 changes: 15 additions & 14 deletions common/zarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,26 +437,27 @@ static inline int zarray_index_of(const zarray_t *za, const void *p)
return -1;
}



/**
* Add all elements from 'source' into 'dest'. el_size must be the same
* for both lists
* Add elements from start up to and excluding end from 'source' into 'dest'.
* el_sz must be the same for both lists
**/
static inline void zarray_add_all(zarray_t * dest, const zarray_t * source)
static inline void zarray_add_range(zarray_t *dest, const zarray_t *source, int start, int end)
{
assert(dest->el_sz == source->el_sz);
assert(dest != NULL);
assert(source != NULL);
assert(start >= 0);
assert(end <= source->size);
if (start == end) {
return;
}
assert(start < end);

// Don't allocate on stack because el_sz could be larger than ~8 MB
// stack size
char *tmp = (char*)calloc(1, dest->el_sz);

for (int i = 0; i < zarray_size(source); i++) {
zarray_get(source, i, tmp);
zarray_add(dest, tmp);
}
int count = end - start;
zarray_ensure_capacity(dest, dest->size + count);

free(tmp);
memcpy(&dest->data[dest->size*dest->el_sz], &source->data[source->el_sz*start], dest->el_sz*count);
dest->size += count;
}

#ifdef __cplusplus
Expand Down

0 comments on commit e1b143c

Please sign in to comment.