Skip to content
This repository was archived by the owner on Mar 12, 2025. It is now read-only.

perf: improvements to area processing #52

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 47 additions & 21 deletions src/location_area_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ void LocationAreaService::load(const std::string &path) {
std::cout << "WARNING: processed area mapping file is corrupted!" << std::endl;
continue;
}
mapping_area_.insert({std::stoi(row[0]), AreaIntersect{static_cast<area_id_t>(std::stoi(row[1])), poGeom}});
auto *poBBox = new OGREnvelope();
poGeom->getEnvelope(poBBox);
mapping_area_.insert({std::stoi(row[0]), AreaIntersect{static_cast<area_id_t>(std::stoi(row[1])), poGeom, poBBox}});
}
area_file.close();
}
Expand Down Expand Up @@ -176,12 +178,12 @@ void LocationAreaService::output_mapping() {
}

void LocationAreaService::add_area_to_mapping_index(area_id_t id, const std::string &geometry) {
OGRGeometry *poGeom;
OGRGeometry *countryGeom;
OGRErr eErr = OGRERR_NONE;
if (geo_type_ == "wkt") {
eErr = OGRGeometryFactory::createFromWkt(geometry.c_str(), nullptr, &poGeom);
eErr = OGRGeometryFactory::createFromWkt(geometry.c_str(), nullptr, &countryGeom);
} else if (geo_type_ == "geojson") {
poGeom = OGRGeometryFactory::createFromGeoJson(geometry.c_str());
countryGeom = OGRGeometryFactory::createFromGeoJson(geometry.c_str());
} else {
}
if (eErr != OGRERR_NONE) {
Expand All @@ -203,35 +205,42 @@ void LocationAreaService::add_area_to_mapping_index(area_id_t id, const std::str
return;
}
if (debug_mode_) {
std::cout << "Processing area " << id << ", valid: " << poGeom->IsValid();
std::cout << "Processing area " << id << ", valid: " << countryGeom->IsValid();
}
std::uint32_t intersecting_grid_tiles = 0;
std::uint32_t contained_grid_tiles = 0;
OGREnvelope countryBBox, e;
countryGeom->getEnvelope(&countryBBox);
for (grid_id_t i = 0; i < grid_size_; i++) {
OGRPolygon e = grid_[i];
if (e.Intersects(poGeom)) {
OGRPolygon g = grid_[i];
g.getEnvelope(&e);
if (e.Intersects(countryBBox) && g.Intersects(countryGeom)) {
intersecting_grid_tiles++;
if (poGeom->Contains(&e)) {
if (countryGeom->Contains(&g)) {
contained_grid_tiles++;
mapping_index_[i] = id;
} else {
mapping_index_[i] = area_id_multiple_;
mapping_area_.insert({i, AreaIntersect{id, poGeom->Intersection(&e)}});
OGRGeometry *poGeom = countryGeom->Intersection(&g);
auto *poBBox = new OGREnvelope();
poGeom->getEnvelope(poBBox);
mapping_area_.insert({i, AreaIntersect{id, poGeom, poBBox}});
}
}
}
if (debug_mode_) {
std::cout << " => intersecting grid tiles: " << intersecting_grid_tiles << ", contained grid tiles: " << contained_grid_tiles << std::endl;
}
OGRGeometryFactory::destroyGeometry(poGeom);
OGRGeometryFactory::destroyGeometry(countryGeom);
}

std::vector<std::string> LocationAreaService::get_area(osmium::Location l) {
areaCheckCounter++;
std::vector<std::string> areas;
if (!initialized_) {
return areas;
}
grid_id_t grid_index = ((int) l.lat() + 90) * 360 + ((int) l.lon() + 180);
grid_id_t grid_index = ((int) (l.lat()*2) + 90*2) * 360*2 + ((int) (l.lon()*2) + 180*2);
OGRPoint point(l.lon(), l.lat());
if (debug_mode_) {
std::cout << "Lookup point: (" << l.lon() << " " << l.lat() << ") grid index " << grid_index << " => " << mapping_index_[grid_index] << std::endl;
Expand All @@ -245,8 +254,16 @@ std::vector<std::string> LocationAreaService::get_area(osmium::Location l) {
case area_id_multiple_:// multiple areas
auto range = mapping_area_.equal_range(grid_index);
for (auto i = range.first; i != range.second; ++i) {
if (i->second.geo->Contains(&point)) {
areas.push_back(mapping_id_[i->second.id]);
bBoxCheckCounter++;
OGREnvelope *env = i->second.env;
if (env->MinX <= point.getX() &&
env->MinY <= point.getY() &&
env->MaxX >= point.getX() &&
env->MaxY >= point.getY()) {
geomCheckCounter++;
if (i->second.geo->Contains(&point)) {
areas.push_back(mapping_id_[i->second.id]);
}
}
}
break;
Expand All @@ -261,19 +278,28 @@ std::vector<std::string> LocationAreaService::get_area(osmium::Location l) {
return areas;
}

void LocationAreaService::printAreaMappingStats() const {
std::cout << "Area mapping stats ";
std::cout << "[ areaChecks: " << areaCheckCounter;
std::cout << ", bBoxChecks: " << bBoxCheckCounter;
std::cout << ", geomChecks: " << geomCheckCounter;
std::cout << "]" << std::endl << std::flush;
}

LocationAreaService::LocationAreaService(bool debug_mode, std::uint16_t id_col, std::uint16_t geo_col, std::string &geo_type, bool file_has_header, std::string &processed_file_prefix) : debug_mode_(debug_mode), id_col_(id_col), geo_col_(geo_col), geo_type_(geo_type), file_has_header_(file_has_header), processed_file_prefix_(processed_file_prefix) {
GDALAllRegister();
for (std::uint16_t grid_lat = 0; grid_lat < 180; grid_lat++) {
for (std::uint16_t grid_lon = 0; grid_lon < 360; grid_lon++) {
grid_id_t grid_index = grid_lat * 360 + grid_lon;
int box_lon = grid_lon - 180;
int box_lat = grid_lat - 90;
grid_ = new OGRPolygon[grid_size_];
for (area_id_t grid_lat = 0; grid_lat < 180*2; grid_lat++) {
for (area_id_t grid_lon = 0; grid_lon < 360*2; grid_lon++) {
grid_id_t grid_index = grid_lat * 360*2 + grid_lon;
double box_lon = (double) grid_lon / 2. - 180;
double box_lat = (double) grid_lat / 2. - 90;
OGRLinearRing ring;
OGRPolygon poly;
ring.addPoint(box_lon, box_lat);
ring.addPoint(box_lon + 1, box_lat);
ring.addPoint(box_lon + 1, box_lat + 1);
ring.addPoint(box_lon, box_lat + 1);
ring.addPoint(box_lon + 0.5, box_lat);
ring.addPoint(box_lon + 0.5, box_lat + 0.5);
ring.addPoint(box_lon, box_lat + 0.5);
ring.addPoint(box_lon, box_lat);
ring.closeRings();
poly.addRing(&ring);
Expand Down
11 changes: 8 additions & 3 deletions src/location_area_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@
#include <ogr_geometry.h>

typedef std::uint16_t area_id_t;
typedef std::uint16_t grid_id_t;
typedef std::uint32_t grid_id_t;

struct AreaIntersect {
area_id_t id;
OGRGeometry *geo;
OGREnvelope *env;
};

class LocationAreaService {

private:
static const grid_id_t grid_size_ = 64800;
static const grid_id_t grid_size_ = 259200;
static const area_id_t area_id_multiple_ = std::numeric_limits<area_id_t>::max();
static const std::string delim_str_;

OGRPolygon grid_[grid_size_];
OGRPolygon *grid_;
area_id_t mapping_index_[grid_size_] = {0};
std::multimap<grid_id_t, AreaIntersect> mapping_area_;
std::unordered_map<area_id_t, std::string> mapping_id_;
Expand All @@ -35,6 +36,8 @@ class LocationAreaService {
bool debug_mode_ = false;
bool initialized_ = false;

std::uint32_t areaCheckCounter = 0, geomCheckCounter = 0, bBoxCheckCounter = 0;

void add_area_to_mapping_index(area_id_t id, const std::string& geometry);

void output_mapping();
Expand All @@ -49,6 +52,8 @@ class LocationAreaService {
bool is_initialized() {
return initialized_;
}

void printAreaMappingStats() const;
};


Expand Down
1 change: 1 addition & 0 deletions src/osm-transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ void second_pass(Config &config, boost::regex &remove_tag_regex,
std::cout << "About " << mem << " KBytes used for node location index (in main memory or on disk).\n";
}

location_area_service.printAreaMappingStats();
handler.printCountryStats();

const auto end = chrono::steady_clock::now();
Expand Down
Loading