buffer: add base64url encoding option

PR-URL: nodejs#36952 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net>
targos · Aug 13, 2021 · 0a9ef60 · 0a9ef60
1 parent 73e6781
commit 0a9ef60
Show file tree

Hide file tree

Showing 17 changed files with 310 additions and 126 deletions.
diff --git a/doc/api/buffer.md b/doc/api/buffer.md
@@ -50,6 +50,9 @@ const buf7 = Buffer.from('tést', 'latin1');
 ## Buffers and character encodings
 <!-- YAML
 changes:
+  - version: REPLACEME
+    pr-url: https://github.com/nodejs/node/pull/36952
+    description: Introduced `base64url` encoding.
   - version: v6.4.0
     pr-url: https://github.com/nodejs/node/pull/7111
     description: Introduced `latin1` as an alias for `binary`.
@@ -106,6 +109,11 @@ string into a `Buffer` as decoding.
   specified in [RFC 4648, Section 5][]. Whitespace characters such as spaces,
   tabs, and new lines contained within the base64-encoded string are ignored.
 
+* `'base64url'`: [base64url][] encoding as specified in
+  [RFC 4648, Section 5][]. When creating a `Buffer` from a string, this
+  encoding will also correctly accept regular base64-encoded strings. When
+  encoding a `Buffer` to a string, this encoding will omit padding.
+
 * `'hex'`: Encode each byte as two hexadecimal characters. Data truncation
   may occur when decoding strings that do exclusively contain valid hexadecimal
   characters. See below for an example.
@@ -469,9 +477,10 @@ Returns the byte length of a string when encoded using `encoding`.
 This is not the same as [`String.prototype.length`][], which does not account
 for the encoding that is used to convert the string into bytes.
 
-For `'base64'` and `'hex'`, this function assumes valid input. For strings that
-contain non-base64/hex-encoded data (e.g. whitespace), the return value might be
-greater than the length of a `Buffer` created from the string.
+For `'base64'`, `'base64url'`, and `'hex'`, this function assumes valid input.
+For strings that contain non-base64/hex-encoded data (e.g. whitespace), the
+return value might be greater than the length of a `Buffer` created from the
+string.
 
 ```js
 const str = '\u00bd + \u00bc = \u00be';
@@ -3427,6 +3436,7 @@ introducing security vulnerabilities into an application.
 [`buffer.kMaxLength`]: #buffer_buffer_kmaxlength
 [`util.inspect()`]: util.md#util_util_inspect_object_options
 [`v8::TypedArray::kMaxLength`]: https://v8.github.io/api/head/classv8_1_1TypedArray.html#a54a48f4373da0850663c4393d843b9b0
+[base64url]: https://tools.ietf.org/html/rfc4648#section-5
 [binary strings]: https://developer.mozilla.org/en-US/docs/Web/API/DOMString/Binary
 [endianness]: https://en.wikipedia.org/wiki/Endianness
 [iterator]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols
diff --git a/lib/buffer.js b/lib/buffer.js
@@ -659,6 +659,20 @@ const encodingOps = {
                     encodingsMap.base64,
                     dir)
   },
+  base64url: {
+    encoding: 'base64url',
+    encodingVal: encodingsMap.base64url,
+    byteLength: (string) => base64ByteLength(string, string.length),
+    write: (buf, string, offset, len) =>
+      buf.base64urlWrite(string, offset, len),
+    slice: (buf, start, end) => buf.base64urlSlice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfBuffer(buf,
+                    fromStringFast(val, encodingOps.base64url),
+                    byteOffset,
+                    encodingsMap.base64url,
+                    dir)
+  },
   hex: {
     encoding: 'hex',
     encodingVal: encodingsMap.hex,
@@ -715,6 +729,11 @@ function getEncodingOps(encoding) {
       if (encoding === 'hex' || StringPrototypeToLowerCase(encoding) === 'hex')
         return encodingOps.hex;
       break;
+    case 9:
+      if (encoding === 'base64url' ||
+          StringPrototypeToLowerCase(encoding) === 'base64url')
+        return encodingOps.base64url;
+      break;
   }
 }
 

diff --git a/lib/internal/buffer.js b/lib/internal/buffer.js
@@ -18,12 +18,14 @@ const { validateNumber } = require('internal/validators');
 const {
   asciiSlice,
   base64Slice,
+  base64urlSlice,
   latin1Slice,
   hexSlice,
   ucs2Slice,
   utf8Slice,
   asciiWrite,
   base64Write,
+  base64urlWrite,
   latin1Write,
   hexWrite,
   ucs2Write,
@@ -1026,12 +1028,14 @@ function addBufferPrototypeMethods(proto) {
 
   proto.asciiSlice = asciiSlice;
   proto.base64Slice = base64Slice;
+  proto.base64urlSlice = base64urlSlice;
   proto.latin1Slice = latin1Slice;
   proto.hexSlice = hexSlice;
   proto.ucs2Slice = ucs2Slice;
   proto.utf8Slice = utf8Slice;
   proto.asciiWrite = asciiWrite;
   proto.base64Write = base64Write;
+  proto.base64urlWrite = base64urlWrite;
   proto.latin1Write = latin1Write;
   proto.hexWrite = hexWrite;
   proto.ucs2Write = ucs2Write;

diff --git a/lib/internal/util.js b/lib/internal/util.js
@@ -177,6 +177,11 @@ function slowCases(enc) {
         `${enc}`.toLowerCase() === 'utf-16le')
         return 'utf16le';
       break;
+    case 9:
+      if (enc === 'base64url' || enc === 'BASE64URL' ||
+          `${enc}`.toLowerCase() === 'base64url')
+        return 'base64url';
+      break;
     default:
       if (enc === '') return 'utf8';
   }

diff --git a/src/api/encoding.cc b/src/api/encoding.cc
@@ -68,13 +68,17 @@ enum encoding ParseEncoding(const char* encoding,
       } else if (encoding[1] == 'a') {
         if (strncmp(encoding + 2, "se64", 5) == 0)
           return BASE64;
+        if (strncmp(encoding + 2, "se64url", 8) == 0)
+          return BASE64URL;
       }
       if (StringEqualNoCase(encoding, "binary"))
         return LATIN1;  // BINARY is a deprecated alias of LATIN1.
       if (StringEqualNoCase(encoding, "buffer"))
         return BUFFER;
       if (StringEqualNoCase(encoding, "base64"))
         return BASE64;
+      if (StringEqualNoCase(encoding, "base64url"))
+        return BASE64URL;
       break;
 
     case 'a':

diff --git a/src/node_buffer.cc b/src/node_buffer.cc
@@ -1163,13 +1163,15 @@ void Initialize(Local<Object> target,
 
   env->SetMethodNoSideEffect(target, "asciiSlice", StringSlice<ASCII>);
   env->SetMethodNoSideEffect(target, "base64Slice", StringSlice<BASE64>);
+  env->SetMethodNoSideEffect(target, "base64urlSlice", StringSlice<BASE64URL>);
   env->SetMethodNoSideEffect(target, "latin1Slice", StringSlice<LATIN1>);
   env->SetMethodNoSideEffect(target, "hexSlice", StringSlice<HEX>);
   env->SetMethodNoSideEffect(target, "ucs2Slice", StringSlice<UCS2>);
   env->SetMethodNoSideEffect(target, "utf8Slice", StringSlice<UTF8>);
 
   env->SetMethod(target, "asciiWrite", StringWrite<ASCII>);
   env->SetMethod(target, "base64Write", StringWrite<BASE64>);
+  env->SetMethod(target, "base64urlWrite", StringWrite<BASE64URL>);
   env->SetMethod(target, "latin1Write", StringWrite<LATIN1>);
   env->SetMethod(target, "hexWrite", StringWrite<HEX>);
   env->SetMethod(target, "ucs2Write", StringWrite<UCS2>);

diff --git a/src/string_decoder.cc b/src/string_decoder.cc
@@ -69,7 +69,10 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
 
   size_t nread = *nread_ptr;
 
-  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
+  if (Encoding() == UTF8 ||
+      Encoding() == UCS2 ||
+      Encoding() == BASE64 ||
+      Encoding() == BASE64URL) {
     // See if we want bytes to finish a character from the previous
     // chunk; if so, copy the new bytes to the missing bytes buffer
     // and create a small string from it that is to be prepended to the
@@ -197,7 +200,7 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
           state_[kBufferedBytes] = 2;
           state_[kMissingBytes] = 2;
         }
-      } else if (Encoding() == BASE64) {
+      } else if (Encoding() == BASE64 || Encoding() == BASE64URL) {
         state_[kBufferedBytes] = nread % 3;
         if (state_[kBufferedBytes] > 0)
           state_[kMissingBytes] = 3 - BufferedBytes();
@@ -310,6 +313,7 @@ void InitializeStringDecoder(Local<Object> target,
   ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
   ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
   ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
+  ADD_TO_ENCODINGS_ARRAY(BASE64URL, "base64url");
   ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
   ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
   ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");

diff --git a/test/addons/parse-encoding/binding.cc b/test/addons/parse-encoding/binding.cc
@@ -6,6 +6,7 @@ namespace {
 #define ENCODING_MAP(V) \
   V(ASCII)              \
   V(BASE64)             \
+  V(BASE64URL)          \
   V(BUFFER)             \
   V(HEX)                \
   V(LATIN1)             \

diff --git a/test/addons/parse-encoding/test.js b/test/addons/parse-encoding/test.js
@@ -8,6 +8,7 @@ assert.strictEqual(parseEncoding(''), 'UNKNOWN');
 
 assert.strictEqual(parseEncoding('ascii'), 'ASCII');
 assert.strictEqual(parseEncoding('base64'), 'BASE64');
+assert.strictEqual(parseEncoding('base64url'), 'BASE64URL');
 assert.strictEqual(parseEncoding('binary'), 'LATIN1');
 assert.strictEqual(parseEncoding('buffer'), 'BUFFER');
 assert.strictEqual(parseEncoding('hex'), 'HEX');