From a4a05fbca039aaa7a3e85399a8c5b0126d8e4386 Mon Sep 17 00:00:00 2001 From: aoles Date: Fri, 21 Jun 2024 23:19:14 +0200 Subject: [PATCH 1/2] perf: pre-check geometries using bounding boxes --- src/location_area_service.cpp | 49 ++++++++++++++++++++++++++--------- src/location_area_service.h | 5 ++++ src/osm-transform.cpp | 1 + 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/location_area_service.cpp b/src/location_area_service.cpp index 70d5e25..8b4feeb 100644 --- a/src/location_area_service.cpp +++ b/src/location_area_service.cpp @@ -52,7 +52,9 @@ void LocationAreaService::load(const std::string &path) { std::cout << "WARNING: processed area mapping file is corrupted!" << std::endl; continue; } - mapping_area_.insert({std::stoi(row[0]), AreaIntersect{static_cast(std::stoi(row[1])), poGeom}}); + auto *poBBox = new OGREnvelope(); + poGeom->getEnvelope(poBBox); + mapping_area_.insert({std::stoi(row[0]), AreaIntersect{static_cast(std::stoi(row[1])), poGeom, poBBox}}); } area_file.close(); } @@ -176,12 +178,12 @@ void LocationAreaService::output_mapping() { } void LocationAreaService::add_area_to_mapping_index(area_id_t id, const std::string &geometry) { - OGRGeometry *poGeom; + OGRGeometry *countryGeom; OGRErr eErr = OGRERR_NONE; if (geo_type_ == "wkt") { - eErr = OGRGeometryFactory::createFromWkt(geometry.c_str(), nullptr, &poGeom); + eErr = OGRGeometryFactory::createFromWkt(geometry.c_str(), nullptr, &countryGeom); } else if (geo_type_ == "geojson") { - poGeom = OGRGeometryFactory::createFromGeoJson(geometry.c_str()); + countryGeom = OGRGeometryFactory::createFromGeoJson(geometry.c_str()); } else { } if (eErr != OGRERR_NONE) { @@ -203,30 +205,37 @@ void LocationAreaService::add_area_to_mapping_index(area_id_t id, const std::str return; } if (debug_mode_) { - std::cout << "Processing area " << id << ", valid: " << poGeom->IsValid(); + std::cout << "Processing area " << id << ", valid: " << countryGeom->IsValid(); } std::uint32_t intersecting_grid_tiles = 0; std::uint32_t contained_grid_tiles = 0; + OGREnvelope countryBBox, e; + countryGeom->getEnvelope(&countryBBox); for (grid_id_t i = 0; i < grid_size_; i++) { - OGRPolygon e = grid_[i]; - if (e.Intersects(poGeom)) { + OGRPolygon g = grid_[i]; + g.getEnvelope(&e); + if (e.Intersects(countryBBox) && g.Intersects(countryGeom)) { intersecting_grid_tiles++; - if (poGeom->Contains(&e)) { + if (countryGeom->Contains(&g)) { contained_grid_tiles++; mapping_index_[i] = id; } else { mapping_index_[i] = area_id_multiple_; - mapping_area_.insert({i, AreaIntersect{id, poGeom->Intersection(&e)}}); + OGRGeometry *poGeom = countryGeom->Intersection(&g); + auto *poBBox = new OGREnvelope(); + poGeom->getEnvelope(poBBox); + mapping_area_.insert({i, AreaIntersect{id, poGeom, poBBox}}); } } } if (debug_mode_) { std::cout << " => intersecting grid tiles: " << intersecting_grid_tiles << ", contained grid tiles: " << contained_grid_tiles << std::endl; } - OGRGeometryFactory::destroyGeometry(poGeom); + OGRGeometryFactory::destroyGeometry(countryGeom); } std::vector LocationAreaService::get_area(osmium::Location l) { + areaCheckCounter++; std::vector areas; if (!initialized_) { return areas; @@ -245,8 +254,16 @@ std::vector LocationAreaService::get_area(osmium::Location l) { case area_id_multiple_:// multiple areas auto range = mapping_area_.equal_range(grid_index); for (auto i = range.first; i != range.second; ++i) { - if (i->second.geo->Contains(&point)) { - areas.push_back(mapping_id_[i->second.id]); + bBoxCheckCounter++; + OGREnvelope *env = i->second.env; + if (env->MinX <= point.getX() && + env->MinY <= point.getY() && + env->MaxX >= point.getX() && + env->MaxY >= point.getY()) { + geomCheckCounter++; + if (i->second.geo->Contains(&point)) { + areas.push_back(mapping_id_[i->second.id]); + } } } break; @@ -261,6 +278,14 @@ std::vector LocationAreaService::get_area(osmium::Location l) { return areas; } +void LocationAreaService::printAreaMappingStats() const { + std::cout << "Area mapping stats "; + std::cout << "[ areaChecks: " << areaCheckCounter; + std::cout << ", bBoxChecks: " << bBoxCheckCounter; + std::cout << ", geomChecks: " << geomCheckCounter; + std::cout << "]" << std::endl << std::flush; +} + LocationAreaService::LocationAreaService(bool debug_mode, std::uint16_t id_col, std::uint16_t geo_col, std::string &geo_type, bool file_has_header, std::string &processed_file_prefix) : debug_mode_(debug_mode), id_col_(id_col), geo_col_(geo_col), geo_type_(geo_type), file_has_header_(file_has_header), processed_file_prefix_(processed_file_prefix) { GDALAllRegister(); for (std::uint16_t grid_lat = 0; grid_lat < 180; grid_lat++) { diff --git a/src/location_area_service.h b/src/location_area_service.h index 0d2b53b..0019d41 100644 --- a/src/location_area_service.h +++ b/src/location_area_service.h @@ -12,6 +12,7 @@ typedef std::uint16_t grid_id_t; struct AreaIntersect { area_id_t id; OGRGeometry *geo; + OGREnvelope *env; }; class LocationAreaService { @@ -35,6 +36,8 @@ class LocationAreaService { bool debug_mode_ = false; bool initialized_ = false; + std::uint32_t areaCheckCounter = 0, geomCheckCounter = 0, bBoxCheckCounter = 0; + void add_area_to_mapping_index(area_id_t id, const std::string& geometry); void output_mapping(); @@ -49,6 +52,8 @@ class LocationAreaService { bool is_initialized() { return initialized_; } + + void printAreaMappingStats() const; }; diff --git a/src/osm-transform.cpp b/src/osm-transform.cpp index 7a56be5..6606a01 100644 --- a/src/osm-transform.cpp +++ b/src/osm-transform.cpp @@ -277,6 +277,7 @@ void second_pass(Config &config, boost::regex &remove_tag_regex, std::cout << "About " << mem << " KBytes used for node location index (in main memory or on disk).\n"; } + location_area_service.printAreaMappingStats(); handler.printCountryStats(); const auto end = chrono::steady_clock::now(); From 17fff49c6aa5b068993d8506ad8356f5315c08fd Mon Sep 17 00:00:00 2001 From: aoles Date: Sun, 23 Jun 2024 09:20:06 +0200 Subject: [PATCH 2/2] perf: reduce grid size from 1.0 to 0.5 degrees --- src/location_area_service.cpp | 19 ++++++++++--------- src/location_area_service.h | 6 +++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/location_area_service.cpp b/src/location_area_service.cpp index 8b4feeb..838d5a0 100644 --- a/src/location_area_service.cpp +++ b/src/location_area_service.cpp @@ -240,7 +240,7 @@ std::vector LocationAreaService::get_area(osmium::Location l) { if (!initialized_) { return areas; } - grid_id_t grid_index = ((int) l.lat() + 90) * 360 + ((int) l.lon() + 180); + grid_id_t grid_index = ((int) (l.lat()*2) + 90*2) * 360*2 + ((int) (l.lon()*2) + 180*2); OGRPoint point(l.lon(), l.lat()); if (debug_mode_) { std::cout << "Lookup point: (" << l.lon() << " " << l.lat() << ") grid index " << grid_index << " => " << mapping_index_[grid_index] << std::endl; @@ -288,17 +288,18 @@ void LocationAreaService::printAreaMappingStats() const { LocationAreaService::LocationAreaService(bool debug_mode, std::uint16_t id_col, std::uint16_t geo_col, std::string &geo_type, bool file_has_header, std::string &processed_file_prefix) : debug_mode_(debug_mode), id_col_(id_col), geo_col_(geo_col), geo_type_(geo_type), file_has_header_(file_has_header), processed_file_prefix_(processed_file_prefix) { GDALAllRegister(); - for (std::uint16_t grid_lat = 0; grid_lat < 180; grid_lat++) { - for (std::uint16_t grid_lon = 0; grid_lon < 360; grid_lon++) { - grid_id_t grid_index = grid_lat * 360 + grid_lon; - int box_lon = grid_lon - 180; - int box_lat = grid_lat - 90; + grid_ = new OGRPolygon[grid_size_]; + for (area_id_t grid_lat = 0; grid_lat < 180*2; grid_lat++) { + for (area_id_t grid_lon = 0; grid_lon < 360*2; grid_lon++) { + grid_id_t grid_index = grid_lat * 360*2 + grid_lon; + double box_lon = (double) grid_lon / 2. - 180; + double box_lat = (double) grid_lat / 2. - 90; OGRLinearRing ring; OGRPolygon poly; ring.addPoint(box_lon, box_lat); - ring.addPoint(box_lon + 1, box_lat); - ring.addPoint(box_lon + 1, box_lat + 1); - ring.addPoint(box_lon, box_lat + 1); + ring.addPoint(box_lon + 0.5, box_lat); + ring.addPoint(box_lon + 0.5, box_lat + 0.5); + ring.addPoint(box_lon, box_lat + 0.5); ring.addPoint(box_lon, box_lat); ring.closeRings(); poly.addRing(&ring); diff --git a/src/location_area_service.h b/src/location_area_service.h index 0019d41..2790ab3 100644 --- a/src/location_area_service.h +++ b/src/location_area_service.h @@ -7,7 +7,7 @@ #include typedef std::uint16_t area_id_t; -typedef std::uint16_t grid_id_t; +typedef std::uint32_t grid_id_t; struct AreaIntersect { area_id_t id; @@ -18,11 +18,11 @@ struct AreaIntersect { class LocationAreaService { private: - static const grid_id_t grid_size_ = 64800; + static const grid_id_t grid_size_ = 259200; static const area_id_t area_id_multiple_ = std::numeric_limits::max(); static const std::string delim_str_; - OGRPolygon grid_[grid_size_]; + OGRPolygon *grid_; area_id_t mapping_index_[grid_size_] = {0}; std::multimap mapping_area_; std::unordered_map mapping_id_;