version 0.93

assign surplus adjacency storage only to clumps that touch edge
document timings and memory usage
This commit is contained in:
Sean Barrett 2016-04-16 14:05:40 -07:00
parent 0b50f1e8d6
commit ad55a9b383
2 changed files with 145 additions and 54 deletions

View File

@ -1,4 +1,4 @@
// stb_connected_components - v0.91 - public domain connected components on grids // stb_connected_components - v0.93 - public domain connected components on grids
// http://github.com/nothings/stb // http://github.com/nothings/stb
// //
// Finds connected components on 2D grids for testing reachability between // Finds connected components on 2D grids for testing reachability between
@ -15,17 +15,25 @@
// #include "stb_connected_components.h" // #include "stb_connected_components.h"
// //
// The above creates an implementation that can run on maps up to 1024x1024. // The above creates an implementation that can run on maps up to 1024x1024.
// Map sizes must be a multiple of 32 on each axis. // Map sizes must be a multiple of (1<<(LOG2/2)) on each axis (e.g. 32 if LOG2=10,
// 16 if LOG2=8, etc.) (You can just pad your map with untraversable space.)
// //
// LICENSE // MEMORY USAGE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
// //
// Uses about 6-7 bytes per grid square (e.g. 7MB for a 1024x1024 grid).
//
// PERFORMANCE
//
// On a core i7-2700K at 3.5 Ghz, for a particular 1024x1024 map (map_03.png):
//
// Creating map : 44.85 ms
// Making one square traversable: 0.27 ms (average over 29,448 calls)
// Making one square untraversable: 0.23 ms (average over 30,123 calls)
// Reachability query: 0.00001 ms (average over 4,000,000 calls)
// //
// CHANGELOG // CHANGELOG
// //
// 0.93 (2016-04-16) Reduce memory by 10x for 1Kx1K map; small speedup
// 0.92 (2016-04-16) Compute sqrt(N) cluster size by default // 0.92 (2016-04-16) Compute sqrt(N) cluster size by default
// 0.91 (2016-04-15) Initial release // 0.91 (2016-04-15) Initial release
// //
@ -35,38 +43,37 @@
// - try re-integrating naive algorithm & compare performance // - try re-integrating naive algorithm & compare performance
// - more optimized batching (current approach still recomputes local clumps many times) // - more optimized batching (current approach still recomputes local clumps many times)
// - function for setting a grid of squares at once (just use batching) // - function for setting a grid of squares at once (just use batching)
// - shrink data by storing only, say, 2X max exits
// (instead of max exits per clump), and repack cluster
// if it runs out (possibly by just rebuilding from scratch,
// could even use dirty-cluster data structure)
// should reduce 1Kx1K from ~66MB to ~8MB
// //
// ALGORITHM // LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
// //
// The NxN grid map is split into sqrt(N) x sqrt(N) blocks called // ALGORITHM
// "clusters". Each cluster independently computes a set of connected
// components within that cluster (ignoring all connectivity out of
// that cluster) using a union-find disjoint set forest. This produces a bunch
// of locally connected components called "clumps". Each clump is (a) connected
// within its cluster, (b) does not directly connect to any other clumps in the
// cluster (though it may connect to them by paths that lead outside the cluster,
// but those are ignored at this step), and (c) maintains an adjacency list of
// all clumps in adjacent clusters that it _is_ connected to. Then a second
// union-find disjoint set forest is used to compute connected clumps
// globally, across the whole map. Reachability is then computed by
// finding which clump each input point belongs to, and checking whether
// those clumps are in the same "global" connected component.
// //
// The above data structure can be updated efficiently; on a change // The NxN grid map is split into sqrt(N) x sqrt(N) blocks called
// of a single grid square on the map, only one cluster changes its // "clusters". Each cluster independently computes a set of connected
// purely-local state, so only one cluster needs its clumps fully // components within that cluster (ignoring all connectivity out of
// recomputed. Clumps in adjacent clusters need their adjacency lists // that cluster) using a union-find disjoint set forest. This produces a bunch
// updated: first to remove all references to the old clumps in the // of locally connected components called "clumps". Each clump is (a) connected
// rebuilt cluster, then to add new references to the new clumps. Both // within its cluster, (b) does not directly connect to any other clumps in the
// of these operations can use the existing "find which clump each input // cluster (though it may connect to them by paths that lead outside the cluster,
// point belongs to" query to compute that adjacency information rapidly. // but those are ignored at this step), and (c) maintains an adjacency list of
// In one 1024x1024 test on a specific machine, a one-tile update was // all clumps in adjacent clusters that it _is_ connected to. Then a second
// about 250 times faster than a full disjoint-set-forest on the full map. // union-find disjoint set forest is used to compute connected clumps
// globally, across the whole map. Reachability is then computed by
// finding which clump each input point belongs to, and checking whether
// those clumps are in the same "global" connected component.
//
// The above data structure can be updated efficiently; on a change
// of a single grid square on the map, only one cluster changes its
// purely-local state, so only one cluster needs its clumps fully
// recomputed. Clumps in adjacent clusters need their adjacency lists
// updated: first to remove all references to the old clumps in the
// rebuilt cluster, then to add new references to the new clumps. Both
// of these operations can use the existing "find which clump each input
// point belongs to" query to compute that adjacency information rapidly.
#ifndef INCLUDE_STB_CONNECTED_COMPONENTS_H #ifndef INCLUDE_STB_CONNECTED_COMPONENTS_H
#define INCLUDE_STB_CONNECTED_COMPONENTS_H #define INCLUDE_STB_CONNECTED_COMPONENTS_H
@ -235,13 +242,14 @@ typedef struct
unsigned char on_edge; // 1 unsigned char on_edge; // 1
} stbcc__clump; // 8 } stbcc__clump; // 8
#define STBCC__CLUSTER_ADJACENCY_COUNT (STBCC__MAX_EXITS_PER_CLUSTER*4) #define STBCC__CLUSTER_ADJACENCY_COUNT (STBCC__MAX_EXITS_PER_CLUSTER*2)
typedef struct typedef struct
{ {
unsigned int num_clumps; short num_clumps;
unsigned char rebuild; unsigned char num_edge_clumps;
unsigned char rebuild_adjacency;
stbcc__clump clump[STBCC__MAX_CLUMPS_PER_CLUSTER]; // 8 * 2^9 = 4KB stbcc__clump clump[STBCC__MAX_CLUMPS_PER_CLUSTER]; // 8 * 2^9 = 4KB
stbcc__relative_clumpid adjacency_storage[STBCC__CLUSTER_ADJACENCY_COUNT]; // 512 bytes stbcc__relative_clumpid adjacency_storage[STBCC__CLUSTER_ADJACENCY_COUNT]; // 256 bytes
} stbcc__cluster; } stbcc__cluster;
struct st_stbcc_grid struct st_stbcc_grid
@ -251,7 +259,7 @@ struct st_stbcc_grid
//unsigned char cluster_dirty[STBCC__CLUSTER_COUNT_Y][STBCC__CLUSTER_COUNT_X]; // could bitpack, but: 1K x 1K => 1KB //unsigned char cluster_dirty[STBCC__CLUSTER_COUNT_Y][STBCC__CLUSTER_COUNT_X]; // could bitpack, but: 1K x 1K => 1KB
unsigned char map[STBCC__GRID_COUNT_Y][STBCC__MAP_STRIDE]; // 1K x 1K => 1K x 128 => 128KB unsigned char map[STBCC__GRID_COUNT_Y][STBCC__MAP_STRIDE]; // 1K x 1K => 1K x 128 => 128KB
stbcc__clumpid clump_for_node[STBCC__GRID_COUNT_Y][STBCC__GRID_COUNT_X]; // 1K x 1K x 2 = 2MB stbcc__clumpid clump_for_node[STBCC__GRID_COUNT_Y][STBCC__GRID_COUNT_X]; // 1K x 1K x 2 = 2MB
stbcc__cluster cluster[STBCC__CLUSTER_COUNT_Y][STBCC__CLUSTER_COUNT_X]; // 1K x 4.5KB = 9MB stbcc__cluster cluster[STBCC__CLUSTER_COUNT_Y][STBCC__CLUSTER_COUNT_X]; // 1K x 4.5KB = 4.5MB
}; };
int stbcc_query_grid_node_connection(stbcc_grid *g, int x1, int y1, int x2, int y2) int stbcc_query_grid_node_connection(stbcc_grid *g, int x1, int y1, int x2, int y2)
@ -402,8 +410,9 @@ static void stbcc__build_all_connections_for_cluster(stbcc_grid *g, int cx, int
int x = cx * STBCC__CLUSTER_SIZE_X; int x = cx * STBCC__CLUSTER_SIZE_X;
int y = cy * STBCC__CLUSTER_SIZE_Y; int y = cy * STBCC__CLUSTER_SIZE_Y;
int step_x, step_y=0, i, j, k, n, m, dx, dy, total; int step_x, step_y=0, i, j, k, n, m, dx, dy, total;
int extra;
g->cluster[cy][cx].rebuild = 0; g->cluster[cy][cx].rebuild_adjacency = 0;
total = 0; total = 0;
for (m=0; m < 4; ++m) { for (m=0; m < 4; ++m) {
@ -459,12 +468,36 @@ static void stbcc__build_all_connections_for_cluster(stbcc_grid *g, int cx, int
} }
} }
// decide how to apportion leftover... would be better if we knew WHICH clumps assert(total <= STBCC__CLUSTER_ADJACENCY_COUNT);
// were along the edge, but we should compute this at initial time, not above
// to minimize recompoutation // decide how to apportion unused adjacency slots; only clumps that lie
// on the edges of the cluster need adjacency slots, so divide them up
// evenly between those clumps
// we want:
// extra = (STBCC__CLUSTER_ADJACENCY_COUNT - total) / cluster->num_edge_clumps;
// but we efficiently approximate this without a divide, because
// ignoring edge-vs-non-edge with 'num_adj[i]*2' was faster than
// 'num_adj[i]+extra' with the divide
if (total + (cluster->num_edge_clumps<<2) <= STBCC__CLUSTER_ADJACENCY_COUNT)
extra = 4;
else if (total + (cluster->num_edge_clumps<<1) <= STBCC__CLUSTER_ADJACENCY_COUNT)
extra = 2;
else if (total + (cluster->num_edge_clumps<<0) <= STBCC__CLUSTER_ADJACENCY_COUNT)
extra = 1;
else
extra = 0;
total = 0; total = 0;
for (i=0; i < (int) cluster->num_clumps; ++i) { for (i=0; i < (int) cluster->num_clumps; ++i) {
int alloc = num_adj[i]*2; // every cluster gets room for 2x current adjacency int alloc=0;
if (cluster->clump[i].on_edge) {
alloc = num_adj[i]+extra;
if (alloc > STBCC__MAX_EXITS_PER_CLUSTER)
alloc = STBCC__MAX_EXITS_PER_CLUSTER;
}
else
assert(num_adj[i] == 0);
assert(total < 256); // must fit in byte assert(total < 256); // must fit in byte
cluster->clump[i].adjacent_clump_list_index = (unsigned char) total; cluster->clump[i].adjacent_clump_list_index = (unsigned char) total;
cluster->clump[i].max_adjacent = alloc; cluster->clump[i].max_adjacent = alloc;
@ -478,14 +511,14 @@ static void stbcc__build_all_connections_for_cluster(stbcc_grid *g, int cx, int
stbcc__add_connections_to_adjacent_cluster(g, cx, cy, 0,-1); stbcc__add_connections_to_adjacent_cluster(g, cx, cy, 0,-1);
stbcc__add_connections_to_adjacent_cluster(g, cx, cy, 0, 1); stbcc__add_connections_to_adjacent_cluster(g, cx, cy, 0, 1);
// make sure all of the above succeeded. // make sure all of the above succeeded.
assert(g->cluster[cy][cx].rebuild == 0); assert(g->cluster[cy][cx].rebuild_adjacency == 0);
} }
static void stbcc__add_connections_to_adjacent_cluster_with_rebuild(stbcc_grid *g, int cx, int cy, int dx, int dy) static void stbcc__add_connections_to_adjacent_cluster_with_rebuild(stbcc_grid *g, int cx, int cy, int dx, int dy)
{ {
if (cx >= 0 && cx < g->cw && cy >= 0 && cy < g->ch) { if (cx >= 0 && cx < g->cw && cy >= 0 && cy < g->ch) {
stbcc__add_connections_to_adjacent_cluster(g, cx, cy, dx, dy); stbcc__add_connections_to_adjacent_cluster(g, cx, cy, dx, dy);
if (g->cluster[cy][cx].rebuild) if (g->cluster[cy][cx].rebuild_adjacency)
stbcc__build_all_connections_for_cluster(g, cx, cy); stbcc__build_all_connections_for_cluster(g, cx, cy);
} }
} }
@ -621,7 +654,7 @@ static void stbcc__add_clump_connection(stbcc_grid *g, int x1, int y1, int x2, i
cluster = &g->cluster[cy1][cx1]; cluster = &g->cluster[cy1][cx1];
clump = &cluster->clump[c1]; clump = &cluster->clump[c1];
if (clump->num_adjacent == clump->max_adjacent) if (clump->num_adjacent == clump->max_adjacent)
g->cluster[cy1][cx1].rebuild = 1; g->cluster[cy1][cx1].rebuild_adjacency = 1;
else { else {
stbcc__relative_clumpid *adj = &cluster->adjacency_storage[clump->adjacent_clump_list_index]; stbcc__relative_clumpid *adj = &cluster->adjacency_storage[clump->adjacent_clump_list_index];
assert(clump->num_adjacent < STBCC__MAX_EXITS_PER_CLUMP); assert(clump->num_adjacent < STBCC__MAX_EXITS_PER_CLUMP);
@ -684,7 +717,7 @@ static void stbcc__add_connections_to_adjacent_cluster(stbcc_grid *g, int cx, in
if (cx+dx < 0 || cx+dx >= g->cw || cy+dy < 0 || cy+dy >= g->ch) if (cx+dx < 0 || cx+dx >= g->cw || cy+dy < 0 || cy+dy >= g->ch)
return; return;
if (g->cluster[cy][cx].rebuild) if (g->cluster[cy][cx].rebuild_adjacency)
return; return;
assert(abs(dx) + abs(dy) == 1); assert(abs(dx) + abs(dy) == 1);
@ -723,7 +756,7 @@ static void stbcc__add_connections_to_adjacent_cluster(stbcc_grid *g, int cx, in
if (0 == (connected[c>>3] & (1 << (c & 7)))) { if (0 == (connected[c>>3] & (1 << (c & 7)))) {
connected[c>>3] |= 1 << (c & 7); connected[c>>3] |= 1 << (c & 7);
stbcc__add_clump_connection(g, x+i, y+j, x+i+dx, y+j+dy); stbcc__add_clump_connection(g, x+i, y+j, x+i+dx, y+j+dy);
if (g->cluster[cy][cx].rebuild) if (g->cluster[cy][cx].rebuild_adjacency)
break; break;
} }
} }
@ -815,7 +848,7 @@ static void stbcc__build_clumps_for_cluster(stbcc_grid *g, int cx, int cy)
stbcc__cluster *c; stbcc__cluster *c;
stbcc__cluster_build_info cbi; stbcc__cluster_build_info cbi;
int label=0; int label=0;
int i,j; int i,j, num_on_edge;
int x = cx * STBCC__CLUSTER_SIZE_X; int x = cx * STBCC__CLUSTER_SIZE_X;
int y = cy * STBCC__CLUSTER_SIZE_Y; int y = cy * STBCC__CLUSTER_SIZE_Y;
@ -874,7 +907,30 @@ static void stbcc__build_clumps_for_cluster(stbcc_grid *g, int cx, int cy)
assert(g->clump_for_node[y+j][x+i] <= STBCC__NULL_CLUMPID); assert(g->clump_for_node[y+j][x+i] <= STBCC__NULL_CLUMPID);
} }
c->rebuild = 1; // flag that it has no valid data for (i=0; i < STBCC__CLUSTER_SIZE_X; ++i) {
int d = cbi.label[0][i];
if (d != STBCC__NULL_CLUMPID)
c->clump[d].on_edge = 1;
d = cbi.label[STBCC__CLUSTER_SIZE_Y-1][i];
if (d != STBCC__NULL_CLUMPID)
c->clump[d].on_edge = 1;
}
for (j=0; j < STBCC__CLUSTER_SIZE_Y; ++j) {
int d = cbi.label[j][0];
if (d != STBCC__NULL_CLUMPID)
c->clump[d].on_edge = 1;
d = cbi.label[j][STBCC__CLUSTER_SIZE_X-1];
if (d != STBCC__NULL_CLUMPID)
c->clump[d].on_edge = 1;
}
num_on_edge = 0;
for (i=0; i < (int) c->num_clumps; ++i)
num_on_edge += c->clump[i].on_edge;
c->num_edge_clumps = num_on_edge;
c->rebuild_adjacency = 1; // flag that it has no valid adjacency data
} }
#endif // STB_CONNECTED_COMPONENTS_IMPLEMENTATION #endif // STB_CONNECTED_COMPONENTS_IMPLEMENTATION

View File

@ -133,11 +133,12 @@ void end_timer(void)
printf("%6.4lf ms: %s\n", tm * 1000, message); printf("%6.4lf ms: %s\n", tm * 1000, message);
} }
int loc[5000][2];
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
stbcc_grid *g; stbcc_grid *g;
int w,h, i,j,k=0, count=0; int w,h, i,j,k=0, count=0, r;
uint8 *map = stbi_load("data/map_03.png", &w, &h, 0, 1); uint8 *map = stbi_load("data/map_03.png", &w, &h, 0, 1);
assert(map); assert(map);
@ -160,6 +161,25 @@ int main(int argc, char **argv)
end_timer(); end_timer();
write_map(g, w, h, "tests/output/stbcc/base.png"); write_map(g, w, h, "tests/output/stbcc/base.png");
for (i=0; i < 5000;) {
loc[i][0] = stb_rand() % w;
loc[i][1] = stb_rand() % h;
if (stbcc_query_grid_open(g, loc[i][0], loc[i][1]))
++i;
}
r = 0;
start_timer("reachable");
for (i=0; i < 2000; ++i) {
for (j=0; j < 2000; ++j) {
int x1 = loc[i][0], y1 = loc[i][1];
int x2 = loc[2000+j][0], y2 = loc[2000+j][1];
r += stbcc_query_grid_node_connection(g, x1,y1, x2,y2);
}
}
end_timer();
printf("%d reachable\n", r);
printf("Cluster size: %d,%d\n", STBCC__CLUSTER_SIZE_X, STBCC__CLUSTER_SIZE_Y); printf("Cluster size: %d,%d\n", STBCC__CLUSTER_SIZE_X, STBCC__CLUSTER_SIZE_Y);
#if 1 #if 1
@ -191,6 +211,20 @@ int main(int argc, char **argv)
end_timer(); end_timer();
printf("Removed %d grid spaces\n", count); printf("Removed %d grid spaces\n", count);
write_map(g, w, h, stb_sprintf("tests/output/stbcc/open_random_%d.png", i)); write_map(g, w, h, stb_sprintf("tests/output/stbcc/open_random_%d.png", i));
r = 0;
start_timer("reachable");
for (i=0; i < 1000; ++i) {
for (j=0; j < 1000; ++j) {
int x1 = loc[i][0], y1 = loc[i][1];
int x2 = loc[j][0], y2 = loc[j][1];
r += stbcc_query_grid_node_connection(g, x1,y1, x2,y2);
}
}
end_timer();
printf("%d reachable\n", r);
start_timer("adding"); start_timer("adding");
count = 0; count = 0;
for (i=0; i < 1800; ++i) { for (i=0; i < 1800; ++i) {
@ -221,6 +255,7 @@ int main(int argc, char **argv)
printf("Added %d grid spaces\n", count); printf("Added %d grid spaces\n", count);
#endif #endif
#if 0 // for map_02.png #if 0 // for map_02.png
start_timer("process"); start_timer("process");
for (k=0; k < 20; ++k) { for (k=0; k < 20; ++k) {