From ac04c6434af0dfb0c4b2f26afd6b84d5f9ad3e8e Mon Sep 17 00:00:00 2001 From: IlyasShabi Date: Wed, 17 Apr 2024 07:30:05 +0200 Subject: [PATCH] src: remove regex usage for env file parsing PR-URL: https://github.com/nodejs/node/pull/52406 Fixes: https://github.com/nodejs/node/issues/52248 Reviewed-By: Yagiz Nizipli --- src/node_dotenv.cc | 161 ++++++++++++++++----- src/node_dotenv.h | 1 - test/fixtures/dotenv/eof-without-value.env | 2 + test/fixtures/dotenv/multiline.env | 7 + test/fixtures/dotenv/valid.env | 7 +- test/parallel/test-dotenv-edge-cases.js | 34 +++++ test/parallel/test-dotenv.js | 6 +- test/parallel/util-parse-env.js | 2 +- 8 files changed, 173 insertions(+), 47 deletions(-) create mode 100644 test/fixtures/dotenv/eof-without-value.env create mode 100644 test/fixtures/dotenv/multiline.env diff --git a/src/node_dotenv.cc b/src/node_dotenv.cc index 8d92e917082488..27f9c30c55714a 100644 --- a/src/node_dotenv.cc +++ b/src/node_dotenv.cc @@ -1,5 +1,4 @@ #include "node_dotenv.h" -#include // NOLINT(build/c++11) #include #include "env-inl.h" #include "node_file.h" @@ -12,15 +11,6 @@ using v8::NewStringType; using v8::Object; using v8::String; -/** - * The inspiration for this implementation comes from the original dotenv code, - * available at https://github.com/motdotla/dotenv - */ -const std::regex LINE( - "\\s*(?:export\\s+)?([\\w.-]+)(?:\\s*=\\s*?|:\\s+?)(\\s*'(?:\\\\'|[^']" - ")*'|\\s*\"(?:\\\\\"|[^\"])*\"|\\s*`(?:\\\\`|[^`])*`|[^#\r\n]+)?\\s*(?" - ":#.*)?"); // NOLINT(whitespace/line_length) - std::vector Dotenv::GetPathFromArgs( const std::vector& args) { const auto find_match = [](const std::string& arg) { @@ -101,35 +91,137 @@ Local Dotenv::ToObject(Environment* env) { return result; } -void Dotenv::ParseContent(const std::string_view content) { - std::string lines = std::string(content); - lines = std::regex_replace(lines, std::regex("\r\n?"), "\n"); +std::string_view trim_spaces(std::string_view input) { + if (input.empty()) return ""; + if (input.front() == ' ') { + input.remove_prefix(input.find_first_not_of(' ')); + } + if (!input.empty() && input.back() == ' ') { + input = input.substr(0, input.find_last_not_of(' ') + 1); + } + return input; +} + +void Dotenv::ParseContent(const std::string_view input) { + std::string lines(input); + + // Handle windows newlines "\r\n": remove "\r" and keep only "\n" + lines.erase(std::remove(lines.begin(), lines.end(), '\r'), lines.end()); + + std::string_view content = lines; + content = trim_spaces(content); + + std::string_view key; + std::string_view value; + + while (!content.empty()) { + // Skip empty lines and comments + if (content.front() == '\n' || content.front() == '#') { + auto newline = content.find('\n'); + if (newline != std::string_view::npos) { + content.remove_prefix(newline + 1); + continue; + } + } + + // If there is no equal character, then ignore everything + auto equal = content.find('='); + if (equal == std::string_view::npos) { + break; + } - std::smatch match; - while (std::regex_search(lines, match, LINE)) { - const std::string key = match[1].str(); + key = content.substr(0, equal); + content.remove_prefix(equal + 1); + key = trim_spaces(key); - // Default undefined or null to an empty string - std::string value = match[2].str(); + if (key.empty()) { + break; + } - // Remove leading whitespaces - value.erase(0, value.find_first_not_of(" \t")); + // Remove export prefix from key + auto have_export = key.compare(0, 7, "export ") == 0; + if (have_export) { + key.remove_prefix(7); + } - // Remove trailing whitespaces - if (!value.empty()) { - value.erase(value.find_last_not_of(" \t") + 1); + // SAFETY: Content is guaranteed to have at least one character + if (content.empty()) { + // In case the last line is a single key without value + // Example: KEY= (without a newline at the EOF) + store_.insert_or_assign(std::string(key), ""); + break; } - if (!value.empty() && value.front() == '"') { - value = std::regex_replace(value, std::regex("\\\\n"), "\n"); - value = std::regex_replace(value, std::regex("\\\\r"), "\r"); + // Expand new line if \n it's inside double quotes + // Example: EXPAND_NEWLINES = 'expand\nnew\nlines' + if (content.front() == '"') { + auto closing_quote = content.find(content.front(), 1); + if (closing_quote != std::string_view::npos) { + value = content.substr(1, closing_quote - 1); + std::string multi_line_value = std::string(value); + + size_t pos = 0; + while ((pos = multi_line_value.find("\\n", pos)) != + std::string_view::npos) { + multi_line_value.replace(pos, 2, "\n"); + pos += 1; + } + + store_.insert_or_assign(std::string(key), multi_line_value); + content.remove_prefix(content.find('\n', closing_quote + 1)); + continue; + } } - // Remove surrounding quotes - value = trim_quotes(value); + // Check if the value is wrapped in quotes, single quotes or backticks + if ((content.front() == '\'' || content.front() == '"' || + content.front() == '`')) { + auto closing_quote = content.find(content.front(), 1); + + // Check if the closing quote is not found + // Example: KEY="value + if (closing_quote == std::string_view::npos) { + // Check if newline exist. If it does, take the entire line as the value + // Example: KEY="value\nKEY2=value2 + // The value pair should be `"value` + auto newline = content.find('\n'); + if (newline != std::string_view::npos) { + value = content.substr(0, newline); + store_.insert_or_assign(std::string(key), value); + content.remove_prefix(newline); + } + } else { + // Example: KEY="value" + value = content.substr(1, closing_quote - 1); + store_.insert_or_assign(std::string(key), value); + // Select the first newline after the closing quotation mark + // since there could be newline characters inside the value. + content.remove_prefix(content.find('\n', closing_quote + 1)); + } + } else { + // Regular key value pair. + // Example: `KEY=this is value` + auto newline = content.find('\n'); + + if (newline != std::string_view::npos) { + value = content.substr(0, newline); + auto hash_character = value.find('#'); + // Check if there is a comment in the line + // Example: KEY=value # comment + // The value pair should be `value` + if (hash_character != std::string_view::npos) { + value = content.substr(0, hash_character); + } + content.remove_prefix(newline); + } else { + // In case the last line is a single key/value pair + // Example: KEY=VALUE (without a newline at the EOF) + value = content.substr(0); + } - store_.insert_or_assign(std::string(key), value); - lines = match.suffix(); + value = trim_spaces(value); + store_.insert_or_assign(std::string(key), value); + } } } @@ -179,13 +271,4 @@ void Dotenv::AssignNodeOptionsIfAvailable(std::string* node_options) { } } -std::string_view Dotenv::trim_quotes(std::string_view str) { - static const std::unordered_set quotes = {'"', '\'', '`'}; - if (str.size() >= 2 && quotes.count(str.front()) && - quotes.count(str.back())) { - str = str.substr(1, str.size() - 2); - } - return str; -} - } // namespace node diff --git a/src/node_dotenv.h b/src/node_dotenv.h index 5872627f0c25fe..33ed63a9bef33f 100644 --- a/src/node_dotenv.h +++ b/src/node_dotenv.h @@ -32,7 +32,6 @@ class Dotenv { private: std::map store_; - std::string_view trim_quotes(std::string_view str); }; } // namespace node diff --git a/test/fixtures/dotenv/eof-without-value.env b/test/fixtures/dotenv/eof-without-value.env new file mode 100644 index 00000000000000..1806141cfa01ae --- /dev/null +++ b/test/fixtures/dotenv/eof-without-value.env @@ -0,0 +1,2 @@ +BASIC=value +EMPTY= \ No newline at end of file diff --git a/test/fixtures/dotenv/multiline.env b/test/fixtures/dotenv/multiline.env new file mode 100644 index 00000000000000..b138a31e8001b1 --- /dev/null +++ b/test/fixtures/dotenv/multiline.env @@ -0,0 +1,7 @@ +JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY----- +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnNl1tL3QjKp3DZWM0T3u +LgGJQwu9WqyzHKZ6WIA5T+7zPjO1L8l3S8k8YzBrfH4mqWOD1GBI8Yjq2L1ac3Y/ +bTdfHN8CmQr2iDJC0C6zY8YV93oZB3x0zC/LPbRYpF8f6OqX1lZj5vo2zJZy4fI/ +kKcI5jHYc8VJq+KCuRZrvn+3V+KuL9tF9v8ZgjF2PZbU+LsCy5Yqg1M8f5Jp5f6V +u4QuUoobAgMBAAE= +-----END PUBLIC KEY-----" diff --git a/test/fixtures/dotenv/valid.env b/test/fixtures/dotenv/valid.env index ccffa779cb0d86..963c4c848a4225 100644 --- a/test/fixtures/dotenv/valid.env +++ b/test/fixtures/dotenv/valid.env @@ -1,5 +1,9 @@ BASIC=basic +# COMMENTS=work +#BASIC=basic2 +#BASIC=basic3 + # previous line intentionally left blank AFTER_LINE=after_line EMPTY= @@ -55,7 +59,8 @@ IS A "MULTILINE'S" STRING` +export EXPORT_EXAMPLE = ignore export + MULTI_NOT_VALID_QUOTE=" MULTI_NOT_VALID=THIS IS NOT MULTILINE -export EXAMPLE = ignore export diff --git a/test/parallel/test-dotenv-edge-cases.js b/test/parallel/test-dotenv-edge-cases.js index 23e3e037418351..f8cd262c8a8092 100644 --- a/test/parallel/test-dotenv-edge-cases.js +++ b/test/parallel/test-dotenv-edge-cases.js @@ -4,6 +4,7 @@ const common = require('../common'); const assert = require('node:assert'); const path = require('node:path'); const { describe, it } = require('node:test'); +const fixtures = require('../common/fixtures'); const validEnvFilePath = '../fixtures/dotenv/valid.env'; const nodeOptionsEnvFilePath = '../fixtures/dotenv/node-options.env'; @@ -64,4 +65,37 @@ describe('.env supports edge cases', () => { assert.strictEqual(child.stderr, ''); assert.strictEqual(child.code, 0); }); + + it('should handle multiline quoted values', async () => { + // Ref: https://github.com/nodejs/node/issues/52248 + const code = ` + process.loadEnvFile('./multiline.env'); + require('node:assert').ok(process.env.JWT_PUBLIC_KEY); + `.trim(); + const child = await common.spawnPromisified( + process.execPath, + [ '--eval', code ], + { cwd: fixtures.path('dotenv') }, + ); + assert.strictEqual(child.stdout, ''); + assert.strictEqual(child.stderr, ''); + assert.strictEqual(child.code, 0); + }); + + it('should handle empty value without a newline at the EOF', async () => { + // Ref: https://github.com/nodejs/node/issues/52466 + const code = ` + process.loadEnvFile('./eof-without-value.env'); + require('assert').strictEqual(process.env.BASIC, 'value'); + require('assert').strictEqual(process.env.EMPTY, ''); + `.trim(); + const child = await common.spawnPromisified( + process.execPath, + [ '--eval', code ], + { cwd: fixtures.path('dotenv') }, + ); + assert.strictEqual(child.stdout, ''); + assert.strictEqual(child.stderr, ''); + assert.strictEqual(child.code, 0); + }); }); diff --git a/test/parallel/test-dotenv.js b/test/parallel/test-dotenv.js index 45b89db468510d..88afd58b5d766e 100644 --- a/test/parallel/test-dotenv.js +++ b/test/parallel/test-dotenv.js @@ -58,10 +58,6 @@ assert.strictEqual(process.env.COMMENTS, undefined); assert.strictEqual(process.env.EQUAL_SIGNS, 'equals=='); // Retains inner quotes assert.strictEqual(process.env.RETAIN_INNER_QUOTES, '{"foo": "bar"}'); -// Respects equals signs in values -assert.strictEqual(process.env.EQUAL_SIGNS, 'equals=='); -// Retains inner quotes -assert.strictEqual(process.env.RETAIN_INNER_QUOTES, '{"foo": "bar"}'); assert.strictEqual(process.env.RETAIN_INNER_QUOTES_AS_STRING, '{"foo": "bar"}'); assert.strictEqual(process.env.RETAIN_INNER_QUOTES_AS_BACKTICKS, '{"foo": "bar\'s"}'); // Retains spaces in string @@ -83,4 +79,4 @@ assert.strictEqual(process.env.EXPAND_NEWLINES, 'expand\nnew\nlines'); assert.strictEqual(process.env.DONT_EXPAND_UNQUOTED, 'dontexpand\\nnewlines'); assert.strictEqual(process.env.DONT_EXPAND_SQUOTED, 'dontexpand\\nnewlines'); // Ignore export before key -assert.strictEqual(process.env.EXAMPLE, 'ignore export'); +assert.strictEqual(process.env.EXPORT_EXAMPLE, 'ignore export'); diff --git a/test/parallel/util-parse-env.js b/test/parallel/util-parse-env.js index 9e0ab4f6e2970e..755bd98f10317b 100644 --- a/test/parallel/util-parse-env.js +++ b/test/parallel/util-parse-env.js @@ -32,7 +32,7 @@ const fs = require('node:fs'); EMPTY_DOUBLE_QUOTES: '', EMPTY_SINGLE_QUOTES: '', EQUAL_SIGNS: 'equals==', - EXAMPLE: 'ignore export', + EXPORT_EXAMPLE: 'ignore export', EXPAND_NEWLINES: 'expand\nnew\nlines', INLINE_COMMENTS: 'inline comments', INLINE_COMMENTS_BACKTICKS: 'inline comments outside of #backticks',