Skip to content

Commit

Permalink
Simplify code to count then length of strings, add test
Browse files Browse the repository at this point in the history
  • Loading branch information
Nakaner committed Dec 4, 2020
1 parent 9ba7602 commit 4267951
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 18 deletions.
37 changes: 21 additions & 16 deletions src/tagging_view_handler.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* © 2017 Geofabrik GmbH
* © 2017–2020 Geofabrik GmbH
*
* This file is part of osmi_simple_views.
*
Expand Down Expand Up @@ -559,22 +559,19 @@ void TaggingViewHandler::no_main_tags(const osmium::OSMObject& object) {
}
}

bool TaggingViewHandler::is_strlen_suspicious(const char* key, const osmium::TagList& tags, const size_t limit) {
const char* value = tags.get_value_by_key(key);
size_t TaggingViewHandler::char_length_utf8(const char* value) {
if (!value) {
return false;
}
size_t length = strlen(value);
if (length <= limit) {
return false;
}
// count follow bytes (0b10xxxxxx)
// We count char length with respect to UTF-8, so we need to count follow bytes (0b10xxxxxx).
int follow_bytes = 0;
size_t length = 0;
const char* ptr = value;
for (; *ptr != 0; ++ptr) {
follow_bytes += static_cast<int>(((*ptr & 0x80) == 0x80));
++length;
follow_bytes += static_cast<int>((((*ptr & 0xc0) ^ 0x70) == 0xf0));
}
return (length - follow_bytes > limit);
return length - follow_bytes;
}

void TaggingViewHandler::long_text(const osmium::OSMObject& object) {
Expand All @@ -587,12 +584,20 @@ void TaggingViewHandler::long_text(const osmium::OSMObject& object) {
return;
}

if (is_strlen_suspicious("note", object.tags(), 150)) {
write_feature_to_simple_layer(current_layer, object, "tags", tags_string(object.tags(), "note").c_str(), "text", object.get_value_by_key("note"));
} else if (is_strlen_suspicious("description", object.tags(), 150)) {
write_feature_to_simple_layer(current_layer, object, "tags", tags_string(object.tags(), "description").c_str(), "text", object.get_value_by_key("description"));
} else if (is_strlen_suspicious("name", object.tags(), 80)) {
write_feature_to_simple_layer(current_layer, object, "tags", tags_string(object.tags(), "name").c_str(), "text", object.get_value_by_key("description"));
for (const osmium::Tag& t : object.tags()) {
const auto keys = { "note", "description"};
for (auto&& k : keys) {
if (is_a_x_key_key(t.key(), k)) {
if (char_length_utf8(t.value()) > 150) {
write_feature_to_simple_layer(current_layer, object, "tags", tags_string(object.tags(), t.key()).c_str(), "text", t.value());
}
}
}
if (is_a_x_key_key(t.key(), "name")) {
if (char_length_utf8(t.value()) > 150) {
write_feature_to_simple_layer(current_layer, object, "tags", tags_string(object.tags(), t.key()).c_str(), "text", t.value());
}
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/tagging_view_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,9 @@ class TaggingViewHandler : public AbstractViewHandler {
const char* field_name, const char* value);

/**
* Check whether the provided key has a value longer than NON_SUSPICIOUS_MAX_LENGTH characters.
* Get length of a string with respect to multi-byte UTF-8 characters.
*/
static bool is_strlen_suspicious(const char* key, const osmium::TagList& tags, const size_t limit);
static size_t char_length_utf8(const char* value);

void relation(const osmium::Relation&) {};
void area(const osmium::Area&) {};
Expand Down
20 changes: 20 additions & 0 deletions test/t/test_tagging_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,26 @@

#include <tagging_view_handler.hpp>

TEST_CASE("test detection of long strings") {

SECTION("ASCII") {
REQUIRE(TaggingViewHandler::char_length_utf8("abcdefghijkl") == 12);
REQUIRE(TaggingViewHandler::char_length_utf8("abcdef01ijkl") == 12);
REQUIRE(TaggingViewHandler::char_length_utf8("abef01ijkl") == 10);
}

SECTION("Umlauts") {
REQUIRE(TaggingViewHandler::char_length_utf8("äbcdefghijkl") == 12);
REQUIRE(TaggingViewHandler::char_length_utf8("lmnöpqrstvwx") == 12);
REQUIRE(TaggingViewHandler::char_length_utf8("nöpqrstvwx") == 10);
REQUIRE(TaggingViewHandler::char_length_utf8("") == 2);
}

SECTION("Encodings with more than two bytes") {
REQUIRE(TaggingViewHandler::char_length_utf8("カールスルーエ") == 7);
}
}

TEST_CASE("is_a_x_key_key") {
const char* whitelist_base = "name";

Expand Down

0 comments on commit 4267951

Please sign in to comment.