From 2aafa70c5346ff3437f9e019ee24ba2d4526efb6 Mon Sep 17 00:00:00 2001 From: SafeteeWoW <30888549+SafeteeWoW@users.noreply.github.com> Date: Mon, 19 Apr 2021 00:05:26 +0800 Subject: [PATCH] feat: support config line_separator Add config "line_separator" which supports the following options input: Determine line separator by the input content. This is the default. os: Determine line separator by the operating system lf: Use Unix Style ("\n") cr: Use classic Max Style ("\r") crlf: Use Windows Style ("\r\n") Note that the default behavior is changed. The previous behavior is "os", but I think "input" is a more appropriate default. --- .gitattributes | 1 + CMakeLists.txt | 1 + README.md | 6 +++ src/Config.cpp | 23 +++++++++ src/lua-format.cpp | 91 ++++++++++++++++++++++++++++++++++++ src/lua-format.h | 7 +++ src/main.cpp | 23 ++++++--- test/test_config.cpp | 28 +++++++++++ test/test_line_separator.cpp | 44 +++++++++++++++++ 9 files changed, 218 insertions(+), 6 deletions(-) create mode 100644 test/test_line_separator.cpp diff --git a/.gitattributes b/.gitattributes index 352f149..2e5b3db 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ +* text=auto eol=lf *.a filter=lfs diff=lfs merge=lfs -text diff --git a/CMakeLists.txt b/CMakeLists.txt index 11cba35..aa6c14b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,6 +119,7 @@ if(BUILD_TESTS) test/test_parser.cpp test/test_args.cpp test/test_validation.cpp + test/test_line_separator.cpp ) set_target_properties(lua-format-test PROPERTIES LINKER_LANGUAGE CXX) diff --git a/README.md b/README.md index 3bbdef5..0594f14 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,11 @@ luarocks install --server=https://luarocks.org/dev luaformatter in key/value fields --line-breaks-after-function-body Line brakes after function body + --line-separator=[line separator] input(determined by the input content), + os(Use line ending of the current + Operating system), lf(Unix style "\n"), + crlf(Windows style "\r\n"), cr(classic + Max style "\r") Lua scripts... Lua scripts to format "--" can be used to terminate flag options and force all following arguments to be treated as positional options @@ -182,6 +187,7 @@ double_quote_to_single_quote: false single_quote_to_double_quote: false spaces_around_equals_in_field: true line_breaks_after_function_body: 1 +line_separator: input ``` ### Disable formatting for a line or block Sometimes it may be useful to disable automatic formatting. This is done be putting the code between `LuaFormatter off` and `LuaFormatter on` tags: diff --git a/src/Config.cpp b/src/Config.cpp index d07618c..1475e48 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -49,6 +49,8 @@ Config::Config() { node["spaces_around_equals_in_field"] = true; node["line_breaks_after_function_body"] = 1; + node["line_separator"] = "input"; + // Validators // validate integer without 0s as configuration value auto validate_integer = [](const std::string& key, std::any elem) { @@ -117,6 +119,15 @@ Config::Config() { } return value; }; + auto validate_line_separator = [&](const std::string& key, std::any elem) { + (void)(key); + const auto value = std::any_cast(elem); + if (value != "os" && value != "input" && value != "lf" && value != "cr" && value != "crlf") { + throw std::domain_error( + "[ERROR] Configuration value of line_separator should be one of os/input/lf/cr/crlf"); + } + return value; + }; // Validators validators["spaces_before_call"] = validate_integer_zero; @@ -150,6 +161,7 @@ Config::Config() { validators["spaces_inside_table_braces"] = validate_boolean; validators["spaces_around_equals_in_field"] = validate_boolean; validators["line_breaks_after_function_body"] = validate_integer; + validators["line_separator"] = validate_line_separator; // DataType of every configuration field datatype["spaces_before_call"] = 'i'; @@ -183,6 +195,7 @@ Config::Config() { datatype["spaces_inside_table_braces"] = 'b'; datatype["spaces_around_equals_in_field"] = 'b'; datatype["line_breaks_after_function_body"] = 'i'; + datatype["line_separator"] = 's'; } void Config::readFromFile(const std::string& file) { @@ -224,6 +237,11 @@ void Config::readFromFile(const std::string& file) { node[key] = std::any_cast(validators[key](key, value)); break; } + case 's': { + auto value = kv.second.as(); + node[key] = std::any_cast(validators[key](key, value)); + break; + } } } if (key == CTL) { @@ -258,6 +276,11 @@ void Config::readFromMap(std::map& mp) { node[key] = std::any_cast(validators[key](key, kv.second)); break; } + case 's': { + assert(strcmp(kv.second.type().name(), "s") == 0); + node[key] = std::any_cast(validators[key](key, kv.second)); + break; + } } } if (key == CTL) { diff --git a/src/lua-format.cpp b/src/lua-format.cpp index 3f9d177..377d2de 100644 --- a/src/lua-format.cpp +++ b/src/lua-format.cpp @@ -1,3 +1,5 @@ +#include "lua-format.h" + #include #include "FormatVisitor.h" @@ -51,6 +53,34 @@ std::vector> findBlocksBetweenFormatSwitch(const std:: return blocks; } +static std::string getOSLineSeparator() { +#ifdef _WIN32 + return "\r\n"; +#else + return "\n"; +#endif +} + +std::string handleLineSeparator(const std::string& original, const std::string& formatted, const Config& config) { + const auto line_separator_config = config.get("line_separator"); + auto out = formatted; + if (line_separator_config == "os") { + out = convert_line_separator(out, getOSLineSeparator()); + } else if (line_separator_config == "input") { + auto line_sep = get_line_separator(original); + out = convert_line_separator(out, line_sep); + } else if (line_separator_config == "lf") { + out = convert_line_separator(out, "\n"); + } else if (line_separator_config == "cr") { + out = convert_line_separator(out, "\r"); + } else if (line_separator_config == "crlf") { + out = convert_line_separator(out, "\r\n"); + } else { + throw std::runtime_error("Should not reach here. Invalid line_separator config"); + } + return out; +} + std::string resetContentInDisableFormatBlocks(const std::string& original, const std::string& formatted) { std::vector> originalBlocks = findBlocksBetweenFormatSwitch(original); std::vector> formattedBlocks = findBlocksBetweenFormatSwitch(formatted); @@ -84,11 +114,72 @@ std::string lua_format(std::istream& is, const Config& config) { std::string original = os.str(); ANTLRInputStream input(original); std::string formatted = __format(input, config); + formatted = handleLineSeparator(original, formatted, config); return resetContentInDisableFormatBlocks(original, formatted); } std::string lua_format(const std::string& str, const Config& config) { ANTLRInputStream input(str); std::string formatted = __format(input, config); + formatted = handleLineSeparator(str, formatted, config); return resetContentInDisableFormatBlocks(str, formatted); } + +std::string get_line_separator(const std::string& input) { + constexpr char CR = '\r'; + constexpr char LF = '\n'; + size_t lf_count = 0; + size_t cr_count = 0; + size_t crlf_count = 0; + const auto length = input.length(); + for (size_t i = 0; i < length; i++) { + const auto cur = input[i]; + if (cur == LF) { + lf_count++; + } else if (cur == CR) { + const auto next = input[i + 1]; + if (next == LF) { + crlf_count++; + i++; + } else { + cr_count++; + } + } + } + + std::string result; + if (lf_count >= crlf_count && lf_count >= cr_count) { + result += LF; + } else if (crlf_count >= lf_count && crlf_count >= cr_count) { + result += CR; + result += LF; + } else { + result += CR; + } + return result; +} + +std::string convert_line_separator(const std::string& input, const std::string& line_sep) { + constexpr char CR = '\r'; + constexpr char LF = '\n'; + const auto length = input.length(); + + std::string result; + for (size_t i = 0; i < length; i++) { + const auto cur = input[i]; + if (cur == LF) { + result += line_sep; + } else if (cur == CR) { + const auto next = input[i + 1]; + if (next == LF) { + result += line_sep; + i++; + } else { + result += line_sep; + } + } else { + result += cur; + } + } + return result; +} diff --git a/src/lua-format.h b/src/lua-format.h index b1e6f0c..e9c2040 100644 --- a/src/lua-format.h +++ b/src/lua-format.h @@ -7,3 +7,10 @@ std::string lua_format(const std::string& input, const Config& config); std::string lua_format(std::istream& stream, const Config& config); + +/// Return the line separator (LF, CRLF, CR) that appears the most +/// When ties or the input has no newline, return in the order of LF > CRLF > CR +std::string get_line_separator(const std::string& input); + +/// Return input with all line separator replaced with the specified one +std::string convert_line_separator(const std::string& input, const std::string& line_sep); diff --git a/src/main.cpp b/src/main.cpp index 2addf13..68beade 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -155,7 +155,15 @@ int main(int argc, const char* argv[]) { args::Flag nospacesaroundequalsinfield(optspacesaroundequalsinfield, "spaces around equals sign in key/value fields", "Do not put spaces around the equal sign in key/value fields", {"no-spaces-around-equals-in-field"}); args::ValueFlag linebreaksafterfunctionbody(parser, "line breaks after function body", "Line breaks after function body", {"line-breaks-after-function-body"}); - + + args::ValueFlag lineseparator(parser, "line separator", + "input(determined by the input content), " + "os(Use line ending of the current Operating system), " + "lf(Unix style \"\\n\"), " + "crlf(Windows style \"\\r\\n\"), " + "cr(classic Max style \"\\r\")", + {"line-separator"}); + args::PositionalList files(parser, "Lua scripts", "Lua scripts to format"); Config config; @@ -340,7 +348,11 @@ int main(int argc, const char* argv[]) { if (linebreaksafterfunctionbody) { argmap["line_breaks_after_function_body"] = args::get(linebreaksafterfunctionbody); } - + + if (lineseparator) { + argmap["line_separator"] = args::get(lineseparator); + } + std::string configFileName = args::get(cFile); // Automatically look for a .lua-format on the current directory @@ -470,16 +482,15 @@ int main(int argc, const char* argv[]) { continue; } - std::ifstream ifs; - ifs.open(fileName); + std::ifstream ifs(fileName, std::ifstream::binary); try { std::string out = lua_format(ifs, config); if (!inplace) { - std::cout << out; + std::cout.write(out.c_str(), out.length()); } else { - std::ofstream fout(fileName); + std::ofstream fout(fileName, std::ofstream::binary); fout << out; fout.close(); diff --git a/test/test_config.cpp b/test/test_config.cpp index 23092e2..4211a42 100644 --- a/test/test_config.cpp +++ b/test/test_config.cpp @@ -283,3 +283,31 @@ TEST_CASE("read from file", "config") { REQUIRE("," == config.get("table_sep")); REQUIRE(false == config.get("extra_sep_at_table_end")); } + +TEST_CASE("line_separator", "config") { + Config config; + config.set("indent_width", 2); + + config.set("line_separator", "lf"); + REQUIRE("function W()\n print(1)\n print(2)\nend\n" == lua_format("function W() print(1) print(2) end", config)); + config.set("line_separator", "cr"); + REQUIRE("function W()\r print(1)\r print(2)\rend\r" == lua_format("function W() print(1) print(2) end", config)); + config.set("line_separator", "crlf"); + REQUIRE("function W()\r\n print(1)\r\n print(2)\r\nend\r\n" == lua_format("function W() print(1) print(2) end", config)); + + config.set("line_separator", "input"); + REQUIRE("function W()\n print(1)\n print(2)\nend\n" == lua_format("function W()\n print(1)\n print(2)\n end", config)); + REQUIRE("function W()\r print(1)\r print(2)\rend\r" == lua_format("function W()\r print(1)\r print(2)\r end", config)); + REQUIRE("function W()\r\n print(1)\r\n print(2)\r\nend\r\n" == lua_format("function W()\r\n print(1)\r\n print(2)\r\n end", config)); + + config.set("line_separator", "os"); +#ifdef _WIN32 + REQUIRE("function W()\r\n print(1)\r\n print(2)\r\nend\r\n" == lua_format("function W()\n print(1)\n print(2)\n end", config)); + REQUIRE("function W()\r\n print(1)\r\n print(2)\r\nend\r\n" == lua_format("function W()\r print(1)\r print(2)\r end", config)); + REQUIRE("function W()\r\n print(1)\r\n print(2)\r\nend\r\n" == lua_format("function W()\r\n print(1)\r\n print(2)\r\n end", config)); +#else + REQUIRE("function W()\n print(1)\n print(2)\nend\n" == lua_format("function W()\n print(1)\n print(2)\n end", config)); + REQUIRE("function W()\n print(1)\n print(2)\nend\n" == lua_format("function W()\r print(1)\r print(2)\r end", config)); + REQUIRE("function W()\n print(1)\n print(2)\nend\n" == lua_format("function W()\r\n print(1)\r\n print(2)\r\n end", config)); +#endif +} diff --git a/test/test_line_separator.cpp b/test/test_line_separator.cpp new file mode 100644 index 0000000..3f3b3cd --- /dev/null +++ b/test/test_line_separator.cpp @@ -0,0 +1,44 @@ +#include + +#include "lua-format.h" + +TEST_CASE("get_line_separator", "line_separator") { + REQUIRE(get_line_separator("\n") == "\n"); + REQUIRE(get_line_separator("\r") == "\r"); + REQUIRE(get_line_separator("\r\n") == "\r\n"); + + REQUIRE(get_line_separator("1\n2\n3\n") == "\n"); + REQUIRE(get_line_separator("1\r\n2\r\n3\r\n") == "\r\n"); + REQUIRE(get_line_separator("1\r2\r3\r") == "\r"); + + // Different separator appears equal times + REQUIRE(get_line_separator("") == "\n"); + REQUIRE(get_line_separator("\r\n\n\r") == "\n"); + REQUIRE(get_line_separator("\n\r") == "\n"); + REQUIRE(get_line_separator("\r\n\r") == "\r\n"); + REQUIRE(get_line_separator("1\r\n2\n3\r") == "\n"); + + // Different separator appears different times + REQUIRE(get_line_separator("1\r\n2\r\n3\n") == "\r\n"); + REQUIRE(get_line_separator("1\n2\r\n3\r\n") == "\r\n"); + REQUIRE(get_line_separator("1\r2\n3\n") == "\n"); + REQUIRE(get_line_separator("1\n2\r3\r") == "\r"); +} + +TEST_CASE("convert_line_separator", "line_separator") { + REQUIRE(convert_line_separator("", "\r\n").empty()); + REQUIRE(convert_line_separator("", "\n").empty()); + REQUIRE(convert_line_separator("", "\r").empty()); + + REQUIRE(convert_line_separator("1\r\n2\r\n3\r\n", "\n") == "1\n2\n3\n"); + REQUIRE(convert_line_separator("1\r\n2\r\n3\r\n", "\r") == "1\r2\r3\r"); + REQUIRE(convert_line_separator("1\r\n2\r\n3\r\n", "\r\n") == "1\r\n2\r\n3\r\n"); + + REQUIRE(convert_line_separator("1\n2\n3\n", "\n") == "1\n2\n3\n"); + REQUIRE(convert_line_separator("1\n2\n3\n", "\r") == "1\r2\r3\r"); + REQUIRE(convert_line_separator("1\n2\n3\n", "\r\n") == "1\r\n2\r\n3\r\n"); + + REQUIRE(convert_line_separator("1\r2\r3\r", "\n") == "1\n2\n3\n"); + REQUIRE(convert_line_separator("1\r2\r3\r", "\r") == "1\r2\r3\r"); + REQUIRE(convert_line_separator("1\r2\r3\r", "\r\n") == "1\r\n2\r\n3\r\n"); +}