Skip to content

Commit 9df5358

Browse files
committed
src: remove regex usage for env file parsing
1 parent 756acd0 commit 9df5358

File tree

5 files changed

+140
-37
lines changed

5 files changed

+140
-37
lines changed

src/node_dotenv.cc

Lines changed: 110 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,6 @@ using v8::NewStringType;
1212
using v8::Object;
1313
using v8::String;
1414

15-
/**
16-
* The inspiration for this implementation comes from the original dotenv code,
17-
* available at https://github.com/motdotla/dotenv
18-
*/
19-
const std::regex LINE(
20-
"\\s*(?:export\\s+)?([\\w.-]+)(?:\\s*=\\s*?|:\\s+?)(\\s*'(?:\\\\'|[^']"
21-
")*'|\\s*\"(?:\\\\\"|[^\"])*\"|\\s*`(?:\\\\`|[^`])*`|[^#\r\n]+)?\\s*(?"
22-
":#.*)?"); // NOLINT(whitespace/line_length)
23-
2415
std::vector<std::string> Dotenv::GetPathFromArgs(
2516
const std::vector<std::string>& args) {
2617
const auto find_match = [](const std::string& arg) {
@@ -102,34 +93,93 @@ Local<Object> Dotenv::ToObject(Environment* env) {
10293
}
10394

10495
void Dotenv::ParseContent(const std::string_view content) {
105-
std::string lines = std::string(content);
106-
lines = std::regex_replace(lines, std::regex("\r\n?"), "\n");
107-
108-
std::smatch match;
109-
while (std::regex_search(lines, match, LINE)) {
110-
const std::string key = match[1].str();
96+
std::string lines =
97+
std::regex_replace(std::string(content), std::regex("\r\n?"), "\n");
98+
99+
std::istringstream contentStream(lines);
100+
std::string currentLine;
101+
std::string multiLineKey;
102+
std::string multiLineValue;
103+
bool isMultiLine = false;
104+
char quoteChar = '\0';
105+
106+
while (std::getline(contentStream, currentLine)) {
107+
// Check if we are currently in a multi-line value
108+
if (isMultiLine) {
109+
// Check if the current line ends the multi-line value
110+
if (currentLine.back() == quoteChar) {
111+
// append multi-line value and trim quotes
112+
multiLineValue += "\n" + currentLine.substr(0, currentLine.size() - 1);
113+
multiLineValue = trimQuotes(multiLineValue);
114+
// add multi-line key/value
115+
store_.insert_or_assign(multiLineKey, multiLineValue);
116+
117+
// Reset multi-line trackers
118+
isMultiLine = false;
119+
quoteChar = '\0';
120+
121+
} else {
122+
// If the last char of currentLine is not the same as
123+
// multi-line first quote just append the value
124+
multiLineValue += "\n" + currentLine;
125+
}
111126

112-
// Default undefined or null to an empty string
113-
std::string value = match[2].str();
127+
continue;
114128

115-
// Remove leading whitespaces
116-
value.erase(0, value.find_first_not_of(" \t"));
129+
} else {
130+
bool isInQuotes = false;
131+
for (size_t i = 0; i < currentLine.length(); ++i) {
132+
char c = currentLine[i];
133+
134+
// If we found comment outside quotes ignore it
135+
if (c == '#' && !isInQuotes) {
136+
currentLine = currentLine.substr(0, i);
137+
break;
138+
}
139+
140+
// Handle entering/exiting quotes
141+
if ((c == '"' || c == '\'' || c == '`')) {
142+
isInQuotes = !isInQuotes;
143+
if (isInQuotes) {
144+
quoteChar = c;
145+
}
146+
}
147+
}
117148

118-
// Remove trailing whitespaces
119-
if (!value.empty()) {
120-
value.erase(value.find_last_not_of(" \t") + 1);
149+
// Trim the line from whitespace at both ends.
150+
currentLine = trimWhitespace(currentLine);
121151
}
122152

123-
if (!value.empty() && value.front() == '"') {
124-
value = std::regex_replace(value, std::regex("\\\\n"), "\n");
125-
value = std::regex_replace(value, std::regex("\\\\r"), "\r");
153+
size_t equalPos = currentLine.find('=');
154+
if (equalPos != std::string::npos) {
155+
auto value = currentLine.substr(equalPos + 1);
156+
auto key = currentLine.substr(0, equalPos);
157+
// Remove export prefix if found
158+
key = removeExport(key);
159+
160+
// Check for multi-line value start
161+
if ((value.front() == '"' || value.front() == '\'' ||
162+
value.front() == '`') &&
163+
value.back() != value.front()) {
164+
isMultiLine = true;
165+
multiLineKey = key;
166+
// Remove the opening quote for multiline value
167+
multiLineValue = value.substr(1);
168+
// Track the quote character used
169+
quoteChar = value.front();
170+
171+
} else {
172+
if (!value.empty() && value.front() == '"') {
173+
value = std::regex_replace(value, std::regex("\\\\n"), "\n");
174+
value = std::regex_replace(value, std::regex("\\\\r"), "\r");
175+
}
176+
177+
key = trimWhitespace(key);
178+
value = trimWhitespace(value);
179+
value = trimQuotes(value);
180+
store_.insert_or_assign(key, value);
181+
}
126182
}
127-
128-
// Remove surrounding quotes
129-
value = trim_quotes(value);
130-
131-
store_.insert_or_assign(std::string(key), value);
132-
lines = match.suffix();
133183
}
134184
}
135185

@@ -179,7 +229,7 @@ void Dotenv::AssignNodeOptionsIfAvailable(std::string* node_options) {
179229
}
180230
}
181231

182-
std::string_view Dotenv::trim_quotes(std::string_view str) {
232+
std::string_view Dotenv::trimQuotes(std::string_view str) {
183233
static const std::unordered_set<char> quotes = {'"', '\'', '`'};
184234
if (str.size() >= 2 && quotes.count(str.front()) &&
185235
quotes.count(str.back())) {
@@ -188,4 +238,32 @@ std::string_view Dotenv::trim_quotes(std::string_view str) {
188238
return str;
189239
}
190240

241+
std::string_view Dotenv::removeComment(std::string_view line) {
242+
auto firstNonWhitespace = line.find_first_not_of(" \t");
243+
// Check if line is empty or starts with '#'
244+
if (firstNonWhitespace == std::string::npos ||
245+
line[firstNonWhitespace] == '#') {
246+
return "";
247+
}
248+
return line;
249+
}
250+
251+
std::string_view Dotenv::trimWhitespace(std::string_view value) {
252+
size_t first = value.find_first_not_of(" \t");
253+
if (first == std::string::npos) {
254+
return "";
255+
}
256+
size_t last = value.find_last_not_of(" \t");
257+
return value.substr(first, (last - first + 1));
258+
}
259+
260+
std::string_view Dotenv::removeExport(std::string_view str) {
261+
// Check if "export " is at the beginning
262+
if (str.substr(0, 7) == "export ") {
263+
return str.substr(7);
264+
}
265+
266+
return str;
267+
}
268+
191269
} // namespace node

src/node_dotenv.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ class Dotenv {
3232

3333
private:
3434
std::map<std::string, std::string> store_;
35-
std::string_view trim_quotes(std::string_view str);
35+
std::string_view trimQuotes(const std::string_view str);
36+
std::string_view removeComment(const std::string_view line);
37+
std::string_view trimWhitespace(const std::string_view str);
38+
std::string_view removeExport(std::string_view str);
3639
};
3740

3841
} // namespace node

test/fixtures/dotenv/valid.env

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,11 @@ MULTI_NOT_VALID_QUOTE="
5959
MULTI_NOT_VALID=THIS
6060
IS NOT MULTILINE
6161
export EXAMPLE = ignore export
62+
63+
MULTI_PEM_DOUBLE_QUOTED="-----BEGIN PUBLIC KEY-----
64+
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnNl1tL3QjKp3DZWM0T3u
65+
LgGJQwu9WqyzHKZ6WIA5T+7zPjO1L8l3S8k8YzBrfH4mqWOD1GBI8Yjq2L1ac3Y/
66+
bTdfHN8CmQr2iDJC0C6zY8YV93oZB3x0zC/LPbRYpF8f6OqX1lZj5vo2zJZy4fI/
67+
kKcI5jHYc8VJq+KCuRZrvn+3V+KuL9tF9v8ZgjF2PZbU+LsCy5Yqg1M8f5Jp5f6V
68+
u4QuUoobAgMBAAE=
69+
-----END PUBLIC KEY-----"

test/parallel/test-dotenv.js

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@ assert.strictEqual(process.env.COMMENTS, undefined);
5858
assert.strictEqual(process.env.EQUAL_SIGNS, 'equals==');
5959
// Retains inner quotes
6060
assert.strictEqual(process.env.RETAIN_INNER_QUOTES, '{"foo": "bar"}');
61-
// Respects equals signs in values
62-
assert.strictEqual(process.env.EQUAL_SIGNS, 'equals==');
63-
// Retains inner quotes
64-
assert.strictEqual(process.env.RETAIN_INNER_QUOTES, '{"foo": "bar"}');
6561
assert.strictEqual(process.env.RETAIN_INNER_QUOTES_AS_STRING, '{"foo": "bar"}');
6662
assert.strictEqual(process.env.RETAIN_INNER_QUOTES_AS_BACKTICKS, '{"foo": "bar\'s"}');
6763
// Retains spaces in string
@@ -84,3 +80,12 @@ assert.strictEqual(process.env.DONT_EXPAND_UNQUOTED, 'dontexpand\\nnewlines');
8480
assert.strictEqual(process.env.DONT_EXPAND_SQUOTED, 'dontexpand\\nnewlines');
8581
// Ignore export before key
8682
assert.strictEqual(process.env.EXAMPLE, 'ignore export');
83+
84+
const multiPem = `-----BEGIN PUBLIC KEY-----
85+
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnNl1tL3QjKp3DZWM0T3u
86+
LgGJQwu9WqyzHKZ6WIA5T+7zPjO1L8l3S8k8YzBrfH4mqWOD1GBI8Yjq2L1ac3Y/
87+
bTdfHN8CmQr2iDJC0C6zY8YV93oZB3x0zC/LPbRYpF8f6OqX1lZj5vo2zJZy4fI/
88+
kKcI5jHYc8VJq+KCuRZrvn+3V+KuL9tF9v8ZgjF2PZbU+LsCy5Yqg1M8f5Jp5f6V
89+
u4QuUoobAgMBAAE=
90+
-----END PUBLIC KEY-----`;
91+
assert.strictEqual(process.env.MULTI_PEM_DOUBLE_QUOTED, multiPem);

test/parallel/util-parse-env.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@ const assert = require('node:assert');
66
const util = require('node:util');
77
const fs = require('node:fs');
88

9+
const multiPem = `-----BEGIN PUBLIC KEY-----
10+
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnNl1tL3QjKp3DZWM0T3u
11+
LgGJQwu9WqyzHKZ6WIA5T+7zPjO1L8l3S8k8YzBrfH4mqWOD1GBI8Yjq2L1ac3Y/
12+
bTdfHN8CmQr2iDJC0C6zY8YV93oZB3x0zC/LPbRYpF8f6OqX1lZj5vo2zJZy4fI/
13+
kKcI5jHYc8VJq+KCuRZrvn+3V+KuL9tF9v8ZgjF2PZbU+LsCy5Yqg1M8f5Jp5f6V
14+
u4QuUoobAgMBAAE=
15+
-----END PUBLIC KEY-----`;
16+
917
{
1018
const validEnvFilePath = fixtures.path('dotenv/valid.env');
1119
const validContent = fs.readFileSync(validEnvFilePath, 'utf8');
@@ -53,6 +61,7 @@ const fs = require('node:fs');
5361
SINGLE_QUOTES_SPACED: ' single quotes ',
5462
SPACED_KEY: 'parsed',
5563
TRIM_SPACE_FROM_UNQUOTED: 'some spaced out string',
64+
MULTI_PEM_DOUBLE_QUOTED: multiPem,
5665
});
5766
}
5867

0 commit comments

Comments
 (0)