Choose the megatile features from those that will be in the next N zo…

…oms (#280) * Choose the megatile features from those that will be in the next N zooms * Take fractional zooms into account in multiplier feature choices * Fix more tests * Add a flag to retain multiplier features by minimum distance * Limit feature expansion from multiplier density to 2x * The multiplier cap was a bad idea * Revert "The multiplier cap was a bad idea" This reverts commit 6f8273a. * Revert "Limit feature expansion from multiplier density to 2x" This reverts commit a26e413. * Revert "Add a flag to retain multiplier features by minimum distance" This reverts commit 01f14a4. * Remove the multiplier sequence, which should no longer matter * Revert "Revert "Add a flag to retain multiplier features by minimum distance"" This reverts commit 776da4a. * Revert "Revert "Limit feature expansion from multiplier density to 2x"" This reverts commit 44a683d. * Revert "Revert "The multiplier cap was a bad idea"" This reverts commit 80f7cb1. * Track two kinds of previous index for next_feature * Fix multiplier density threshold, I think * Oh, I didn't git add the code changes * Update version and changelog * Try to install sqlite3 to fix the automated build * Deleted too much * Only let --preserve-point-density-threshold shift density around * Remove the density debt concept, since it doesn't help * Make the drop states a vector instead of an array * Revert "Make the drop states a vector instead of an array" This reverts commit 66c7abb. * Revert "Remove the density debt concept, since it doesn't help" This reverts commit 707bb0c. * Revert "Only let --preserve-point-density-threshold shift density around" This reverts commit ecf01f2.
felt · Oct 17, 2024 · 28efc40 · 28efc40
1 parent 78661f1
commit 28efc40
Show file tree

Hide file tree

Showing 15 changed files with 55,244 additions and 28,819 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -10,5 +10,6 @@ jobs:
         version: ['Release', 'Debug']
     steps:
       - uses: actions/checkout@v3
+      - run: sudo apt-get install libsqlite3-dev
       - run: uname -a; BUILDTYPE=${{ matrix.version }} make
-      - run: make test
+      - run: make test
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,8 @@
+# 2.65.0
+
+* Improve spatial distribution of --retain-points-multiplier features
+* Add --preserve-multiplier-density-threshold option to maintain minimum density of multiplier features
+
 # 2.64.0
 
 * Add --bin-by-id to overzoom

diff --git a/Makefile b/Makefile
@@ -527,38 +527,38 @@ accumulate-test:
 	# and 99 without it
 	test `grep '"POP1950": null' tests/ne_110m_populated_places_nulls/in.json | wc -l` == 99
 	./tippecanoe -yNAME -yPOP1950 -yclustered:cluster_size -yclustered:unrelated -q -z3 -r1.75 -b0 -f -e tests/pbf/accum.dir --accumulate-numeric-attributes=clustered --set-attribute '{"clustered:cluster_size":1}' --accumulate-attribute '{"clustered:cluster_size":"sum"}' --retain-points-multiplier 3 tests/ne_110m_populated_places_nulls/in.json
-	# at this drop rate, there are 6 points at z0 that have no POP1950s clustered onto them....
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'clustered:count:POP1950' | wc -l` == 78
-	# 35 of which have no POP1950 at all
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'POP1950' | wc -l` == 35
-	# 43 of which do have POP1950
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'clustered:count:POP1950' | grep 'POP1950' | wc -l` == 43
-	# plus 59 that are clustered
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep 'clustered:count:POP1950' | wc -l` == 59
-	# the 59 clustered POP1950s have a total count of 101
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep 'clustered:count:POP1950' | sed 's/.*"clustered:count:POP1950": //' | awk '{sum += $$1} END {print sum}'` == 101
-	# we have already established that there are 43 bare POP1950s
+	# at this drop rate, there are 61 points at z0 that have no POP1950s clustered onto them....
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'clustered:count:POP1950' | wc -l` == 61
+	# 26 of which have no POP1950 at all
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'POP1950' | wc -l` == 26
+	# 35 of which do have POP1950
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'clustered:count:POP1950' | grep 'POP1950' | wc -l` == 35
+	# plus 60 that are clustered
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep 'clustered:count:POP1950' | wc -l` == 60
+	# the 60 clustered POP1950s have a total count of 109
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep 'clustered:count:POP1950' | sed 's/.*"clustered:count:POP1950": //' | awk '{sum += $$1} END {print sum}'` == 109
+	# we have already established that there are 36 bare POP1950s
 	# which makes a total of 144, which is the total count expected
 	#
 	# meanwhile, regular attribute accumulation.
-	# there are 137 features in the z0 tile, and they all have clustered:cluster_size
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep 'clustered:cluster_size' | wc -l` == 137
+	# there are 121 features in the z0 tile, and they all have clustered:cluster_size
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep 'clustered:cluster_size' | wc -l` == 121
 	# there are no features that lack it.
 	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'clustered:cluster_size' | wc -l` == 0
 	# they add up to the 243 original features
 	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | sed 's/.*clustered:cluster_size": //' | awk '{sum += $$1} END {print sum}'` == 243
 	# Make sure we do *not* accumulate a numeric attribute that already has the magic prefix:
 	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep sum:clustered:unrelated | wc -l` == 0
 	# But that we *do* preserve those attributes into the output features:
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep clustered:unrelated | wc -l` == 66
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep clustered:unrelated | wc -l` == 61
 	#
 	# on to the sums:
 	# in the original data set, the POP1950s that are present add up to 161590
 	test `grep '"POP1950": [0-9]' tests/ne_110m_populated_places_nulls/in.json | sed 's/.*"POP1950": //' | awk '{sum += $$1} END {print sum}' ` == 161590
-	# in the z0 tile, the clustered POP1950s add up to 113357
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep 'clustered:sum:POP1950' | sed 's/.*"clustered:sum:POP1950": //' | awk '{sum += $$1} END {print sum}'` == 113357
-	# and the non-clustered ones add up to 48233
-	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'clustered:sum:POP1950' | grep POP1950 | sed 's/.*"POP1950": //' | awk '{sum += $$1} END {print sum}'` == 48233
+	# in the z0 tile, the clustered POP1950s add up to 116967
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep 'clustered:sum:POP1950' | sed 's/.*"clustered:sum:POP1950": //' | awk '{sum += $$1} END {print sum}'` == 116967
+	# and the non-clustered ones add up to 44623
+	test `./tippecanoe-decode -c tests/pbf/accum.dir/0/0/0.pbf 0 0 0 | grep -v 'clustered:sum:POP1950' | grep POP1950 | sed 's/.*"POP1950": //' | awk '{sum += $$1} END {print sum}'` == 44623
 	# which is the correct 161590
 	#
 	# OK, so do these still hold after megatile filtering?
@@ -599,11 +599,11 @@ accumulate-test:
 	# Now on to binning!
 	./tippecanoe-overzoom --assign-to-bins tests/pbf/h3-0-0-0.geojson --accumulate-numeric-attributes=clustered --accumulate-attribute '{"clustered:cluster_size":"sum"}' -o tests/pbf/bins-0-0-0.pbf tests/pbf/accum.dir/0/0/0.pbf 0/0/0 0/0/0
 	# Now there are 30 bins with POP1950 clusters
-	test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep 'clustered:count:POP1950' | wc -l` == 44
+	test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep 'clustered:count:POP1950' | wc -l` == 41
 	# There are none with bare POP1950 (which is expected; we should only have summary statistics)
 	test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep -v 'clustered:count:POP1950' | grep 'POP1950' | wc -l` == 0
 	# And 4 with no POP1950 at all
-	test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep -v 'POP1950' | wc -l` == 4
+	test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep -v 'POP1950' | wc -l` == 3
 	#
 	# the clustered and megatile-filtered and binned POP1950s add up to 161590
 	test `./tippecanoe-decode -c tests/pbf/bins-0-0-0.pbf 0 0 0 | grep 'clustered:sum:POP1950' | sed 's/.*"clustered:sum:POP1950": //' | awk '{sum += $$1} END {print sum}'` == 161590

diff --git a/main.cpp b/main.cpp
@@ -93,6 +93,7 @@ size_t limit_tile_feature_count_at_maxzoom = 0;
 unsigned int drop_denser = 0;
 std::map<std::string, serial_val> set_attributes;
 unsigned long long preserve_point_density_threshold = 0;
+unsigned long long preserve_multiplier_density_threshold = 0;
 long long extend_zooms_max = 0;
 int retain_points_multiplier = 1;
 std::vector<std::string> unidecode_data;
@@ -3109,6 +3110,7 @@ int main(int argc, char **argv) {
 		{"cluster-distance", required_argument, 0, 'K'},
 		{"cluster-maxzoom", required_argument, 0, 'k'},
 		{"preserve-point-density-threshold", required_argument, 0, '~'},
+		{"preserve-multiplier-density-threshold", required_argument, 0, '~'},
 
 		{"Dropping or merging a fraction of features to keep under tile size limits", 0, 0, 0},
 		{"drop-densest-as-needed", no_argument, &additional[A_DROP_DENSEST_AS_NEEDED], 1},
@@ -3320,6 +3322,8 @@ int main(int argc, char **argv) {
 				}
 			} else if (strcmp(opt, "preserve-point-density-threshold") == 0) {
 				preserve_point_density_threshold = atoll_require(optarg, "Preserve point density threshold");
+			} else if (strcmp(opt, "preserve-multiplier-density-threshold") == 0) {
+				preserve_multiplier_density_threshold = atoll_require(optarg, "Preserve multiplier density threshold");
 			} else if (strcmp(opt, "extend-zooms-if-still-dropping-maximum") == 0) {
 				extend_zooms_max = atoll_require(optarg, "Maximum number by which to extend zooms");
 			} else if (strcmp(opt, "retain-points-multiplier") == 0) {

diff --git a/main.hpp b/main.hpp
@@ -65,6 +65,7 @@ extern long long extend_zooms_max;
 extern int retain_points_multiplier;
 extern size_t maximum_string_attribute_length;
 extern std::string accumulate_numeric;
+extern unsigned long long preserve_multiplier_density_threshold;
 
 struct order_field {
 	std::string name;

diff --git a/serial.cpp b/serial.cpp
@@ -744,6 +744,7 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::
 	    additional[A_GENERATE_POLYGON_LABEL_POINTS] ||
 	    sst->uses_gamma ||
 	    retain_points_multiplier > 1 ||
+	    preserve_multiplier_density_threshold > 0 ||
 	    cluster_distance != 0) {
 		sf.index = bbox_index;
 	} else {

diff --git a/serial.hpp b/serial.hpp
@@ -94,9 +94,11 @@ struct serial_feature {
 
 #define FEATURE_DROPPED -1
 #define FEATURE_KEPT 0
+#define FEATURE_ADDED_FOR_MULTIPLIER_DENSITY INT_MAX
 	// <0: dropped
 	//  0: kept
 	// >0: sequence number of additional feature kept by retain-points-multiplier
+	// INT_MAX: additional feature kept by preserve-multiplier-density-threshold
 	int dropped = FEATURE_DROPPED;	// was this feature dropped by rate?
 
 	// unsigned long long drop_by;  // dot-dropping priority