From 84ac205bfabea45eafb72e0efe01e8840b023aa6 Mon Sep 17 00:00:00 2001 From: max Date: Thu, 5 Oct 2023 21:00:37 -0500 Subject: [PATCH 1/5] [llvm][Support] fix convertToSnakeFromCamelCase --- llvm/lib/Support/StringExtras.cpp | 18 +++++++----------- llvm/unittests/ADT/StringExtrasTest.cpp | 5 +++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp index 5683d7005584eb..fd5a34fb3d6e82 100644 --- a/llvm/lib/Support/StringExtras.cpp +++ b/llvm/lib/Support/StringExtras.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" #include @@ -96,18 +97,13 @@ std::string llvm::convertToSnakeFromCamelCase(StringRef input) { if (input.empty()) return ""; - std::string snakeCase; - snakeCase.reserve(input.size()); - for (char c : input) { - if (!std::isupper(c)) { - snakeCase.push_back(c); - continue; - } - - if (!snakeCase.empty() && snakeCase.back() != '_') - snakeCase.push_back('_'); - snakeCase.push_back(llvm::toLower(c)); + std::string snakeCase = input.str(); + for (int i = 0; i < 10; ++i) { + snakeCase = llvm::Regex("([A-Z]+)([A-Z][a-z])").sub("\\1_\\2", snakeCase); + snakeCase = llvm::Regex("([a-z0-9])([A-Z])").sub("\\1_\\2", snakeCase); } + std::transform(snakeCase.begin(), snakeCase.end(), snakeCase.begin(), + [](unsigned char c) { return std::tolower(c); }); return snakeCase; } diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp index 3f69c91b270a35..fab562f1ed0d59 100644 --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -184,6 +184,11 @@ TEST(StringExtrasTest, ConvertToSnakeFromCamelCase) { testConvertToSnakeCase("OpName", "op_name"); testConvertToSnakeCase("opName", "op_name"); + testConvertToSnakeCase("OPName", "op_name"); + testConvertToSnakeCase("opNAME", "op_name"); + testConvertToSnakeCase("opNAMe", "op_na_me"); + testConvertToSnakeCase("opnameE", "opname_e"); + testConvertToSnakeCase("OPNameOPName", "op_name_op_name"); testConvertToSnakeCase("_OpName", "_op_name"); testConvertToSnakeCase("Op_Name", "op_name"); testConvertToSnakeCase("", ""); From f005c3b9a92239d1335ab42fec77c005c350be88 Mon Sep 17 00:00:00 2001 From: max Date: Fri, 6 Oct 2023 12:39:35 -0500 Subject: [PATCH 2/5] hoist regexes --- llvm/lib/Support/StringExtras.cpp | 7 +++++-- llvm/unittests/ADT/StringExtrasTest.cpp | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp index fd5a34fb3d6e82..dce429a4eeaad6 100644 --- a/llvm/lib/Support/StringExtras.cpp +++ b/llvm/lib/Support/StringExtras.cpp @@ -97,10 +97,13 @@ std::string llvm::convertToSnakeFromCamelCase(StringRef input) { if (input.empty()) return ""; + llvm::Regex trailingCap = llvm::Regex("([A-Z]+)([A-Z][a-z])"); + llvm::Regex leadingCap = llvm::Regex("([a-z0-9])([A-Z])"); + std::string snakeCase = input.str(); for (int i = 0; i < 10; ++i) { - snakeCase = llvm::Regex("([A-Z]+)([A-Z][a-z])").sub("\\1_\\2", snakeCase); - snakeCase = llvm::Regex("([a-z0-9])([A-Z])").sub("\\1_\\2", snakeCase); + snakeCase = trailingCap.sub("\\1_\\2", snakeCase); + snakeCase = leadingCap.sub("\\1_\\2", snakeCase); } std::transform(snakeCase.begin(), snakeCase.end(), snakeCase.begin(), [](unsigned char c) { return std::tolower(c); }); diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp index fab562f1ed0d59..522daea5780f95 100644 --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -185,6 +185,7 @@ TEST(StringExtrasTest, ConvertToSnakeFromCamelCase) { testConvertToSnakeCase("OpName", "op_name"); testConvertToSnakeCase("opName", "op_name"); testConvertToSnakeCase("OPName", "op_name"); + testConvertToSnakeCase("Intel_OCL_BI", "intel_ocl_bi"); testConvertToSnakeCase("opNAME", "op_name"); testConvertToSnakeCase("opNAMe", "op_na_me"); testConvertToSnakeCase("opnameE", "opname_e"); From d131ddbf6841cbd1a1db631b6c3c00b0c9def700 Mon Sep 17 00:00:00 2001 From: max Date: Fri, 6 Oct 2023 13:30:00 -0500 Subject: [PATCH 3/5] make fixed-point --- llvm/lib/Support/StringExtras.cpp | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp index dce429a4eeaad6..69bd91506aff88 100644 --- a/llvm/lib/Support/StringExtras.cpp +++ b/llvm/lib/Support/StringExtras.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" #include @@ -100,14 +101,23 @@ std::string llvm::convertToSnakeFromCamelCase(StringRef input) { llvm::Regex trailingCap = llvm::Regex("([A-Z]+)([A-Z][a-z])"); llvm::Regex leadingCap = llvm::Regex("([a-z0-9])([A-Z])"); - std::string snakeCase = input.str(); - for (int i = 0; i < 10; ++i) { - snakeCase = trailingCap.sub("\\1_\\2", snakeCase); - snakeCase = leadingCap.sub("\\1_\\2", snakeCase); - } - std::transform(snakeCase.begin(), snakeCase.end(), snakeCase.begin(), + std::string curr = input.str(); + std::string prev = input.str(); + size_t iters = 0; + do { + prev = curr; + curr = trailingCap.sub("\\1_\\2", prev); + curr = leadingCap.sub("\\1_\\2", curr); + } while (curr != prev && ++iters < input.size()); + + if (iters == input.size()) + llvm::report_fatal_error( + input + Twine(" couldn't be converted to snake case after ") + + Twine(iters) + Twine("iterations.")); + + std::transform(curr.begin(), curr.end(), curr.begin(), [](unsigned char c) { return std::tolower(c); }); - return snakeCase; + return curr; } std::string llvm::convertToCamelFromSnakeCase(StringRef input, From 2d120c9d04e0cf52dc5e1d94c4962f1e93a235c6 Mon Sep 17 00:00:00 2001 From: max Date: Fri, 6 Oct 2023 15:18:14 -0500 Subject: [PATCH 4/5] use loop instead of regex --- llvm/lib/Support/StringExtras.cpp | 35 +++++++++++++------------------ 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp index 69bd91506aff88..1eb033a1bdac0e 100644 --- a/llvm/lib/Support/StringExtras.cpp +++ b/llvm/lib/Support/StringExtras.cpp @@ -12,8 +12,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" #include @@ -98,26 +96,21 @@ std::string llvm::convertToSnakeFromCamelCase(StringRef input) { if (input.empty()) return ""; - llvm::Regex trailingCap = llvm::Regex("([A-Z]+)([A-Z][a-z])"); - llvm::Regex leadingCap = llvm::Regex("([a-z0-9])([A-Z])"); - - std::string curr = input.str(); - std::string prev = input.str(); - size_t iters = 0; - do { - prev = curr; - curr = trailingCap.sub("\\1_\\2", prev); - curr = leadingCap.sub("\\1_\\2", curr); - } while (curr != prev && ++iters < input.size()); - - if (iters == input.size()) - llvm::report_fatal_error( - input + Twine(" couldn't be converted to snake case after ") + - Twine(iters) + Twine("iterations.")); - - std::transform(curr.begin(), curr.end(), curr.begin(), + std::string snakeCase; + snakeCase.reserve(input.size()); + auto check = [&input](size_t j, std::function check) { + return j < input.size() ? check(input[j]) : false; + }; + for (size_t i = 0; i < input.size(); ++i) { + snakeCase.push_back(input[i]); + if (check(i, isupper) && check(i + 1, isupper) && check(i + 2, islower)) + snakeCase.push_back('_'); + if ((check(i, islower) || check(i, isdigit)) && check(i + 1, isupper)) + snakeCase.push_back('_'); + } + std::transform(snakeCase.begin(), snakeCase.end(), snakeCase.begin(), [](unsigned char c) { return std::tolower(c); }); - return curr; + return snakeCase; } std::string llvm::convertToCamelFromSnakeCase(StringRef input, From 6bf2dc2b30dfa8ac148148ac8bc77f0de6e6c4d1 Mon Sep 17 00:00:00 2001 From: max Date: Fri, 6 Oct 2023 15:42:41 -0500 Subject: [PATCH 5/5] incorporate comments --- llvm/lib/Support/StringExtras.cpp | 9 ++++----- llvm/unittests/ADT/StringExtrasTest.cpp | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp index 1eb033a1bdac0e..6ae26267337b19 100644 --- a/llvm/lib/Support/StringExtras.cpp +++ b/llvm/lib/Support/StringExtras.cpp @@ -98,18 +98,17 @@ std::string llvm::convertToSnakeFromCamelCase(StringRef input) { std::string snakeCase; snakeCase.reserve(input.size()); - auto check = [&input](size_t j, std::function check) { - return j < input.size() ? check(input[j]) : false; + auto check = [&input](size_t j, function_ref predicate) { + return j < input.size() && predicate(input[j]); }; for (size_t i = 0; i < input.size(); ++i) { - snakeCase.push_back(input[i]); + snakeCase.push_back(tolower(input[i])); + // Handles "runs" of capitals, such as in OPName -> op_name. if (check(i, isupper) && check(i + 1, isupper) && check(i + 2, islower)) snakeCase.push_back('_'); if ((check(i, islower) || check(i, isdigit)) && check(i + 1, isupper)) snakeCase.push_back('_'); } - std::transform(snakeCase.begin(), snakeCase.end(), snakeCase.begin(), - [](unsigned char c) { return std::tolower(c); }); return snakeCase; } diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp index 522daea5780f95..d5117bcde56d74 100644 --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -186,6 +186,7 @@ TEST(StringExtrasTest, ConvertToSnakeFromCamelCase) { testConvertToSnakeCase("opName", "op_name"); testConvertToSnakeCase("OPName", "op_name"); testConvertToSnakeCase("Intel_OCL_BI", "intel_ocl_bi"); + testConvertToSnakeCase("I32Attr", "i32_attr"); testConvertToSnakeCase("opNAME", "op_name"); testConvertToSnakeCase("opNAMe", "op_na_me"); testConvertToSnakeCase("opnameE", "opname_e");