buffer: add base64url encoding option

Backport parts of dae283d PR-URL: #36952 Backport-PR-URL: #39702 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net>
nodejs · Aug 13, 2021 · a343956 · a343956
1 parent 73e6781
commit a343956
Show file tree

Hide file tree

Showing 22 changed files with 399 additions and 139 deletions.
diff --git a/doc/api/buffer.md b/doc/api/buffer.md
@@ -50,6 +50,9 @@ const buf7 = Buffer.from('tést', 'latin1');
 ## Buffers and character encodings
 <!-- YAML
 changes:
+  - version: REPLACEME
+    pr-url: https://github.com/nodejs/node/pull/36952
+    description: Introduced `base64url` encoding.
   - version: v6.4.0
     pr-url: https://github.com/nodejs/node/pull/7111
     description: Introduced `latin1` as an alias for `binary`.
@@ -106,6 +109,11 @@ string into a `Buffer` as decoding.
   specified in [RFC 4648, Section 5][]. Whitespace characters such as spaces,
   tabs, and new lines contained within the base64-encoded string are ignored.
 
+* `'base64url'`: [base64url][] encoding as specified in
+  [RFC 4648, Section 5][]. When creating a `Buffer` from a string, this
+  encoding will also correctly accept regular base64-encoded strings. When
+  encoding a `Buffer` to a string, this encoding will omit padding.
+
 * `'hex'`: Encode each byte as two hexadecimal characters. Data truncation
   may occur when decoding strings that do exclusively contain valid hexadecimal
   characters. See below for an example.
@@ -469,9 +477,10 @@ Returns the byte length of a string when encoded using `encoding`.
 This is not the same as [`String.prototype.length`][], which does not account
 for the encoding that is used to convert the string into bytes.
 
-For `'base64'` and `'hex'`, this function assumes valid input. For strings that
-contain non-base64/hex-encoded data (e.g. whitespace), the return value might be
-greater than the length of a `Buffer` created from the string.
+For `'base64'`, `'base64url'`, and `'hex'`, this function assumes valid input.
+For strings that contain non-base64/hex-encoded data (e.g. whitespace), the
+return value might be greater than the length of a `Buffer` created from the
+string.
 
 ```js
 const str = '\u00bd + \u00bc = \u00be';
@@ -3427,6 +3436,7 @@ introducing security vulnerabilities into an application.
 [`buffer.kMaxLength`]: #buffer_buffer_kmaxlength
 [`util.inspect()`]: util.md#util_util_inspect_object_options
 [`v8::TypedArray::kMaxLength`]: https://v8.github.io/api/head/classv8_1_1TypedArray.html#a54a48f4373da0850663c4393d843b9b0
+[base64url]: https://tools.ietf.org/html/rfc4648#section-5
 [binary strings]: https://developer.mozilla.org/en-US/docs/Web/API/DOMString/Binary
 [endianness]: https://en.wikipedia.org/wiki/Endianness
 [iterator]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols
diff --git a/lib/buffer.js b/lib/buffer.js
@@ -659,6 +659,20 @@ const encodingOps = {
                     encodingsMap.base64,
                     dir)
   },
+  base64url: {
+    encoding: 'base64url',
+    encodingVal: encodingsMap.base64url,
+    byteLength: (string) => base64ByteLength(string, string.length),
+    write: (buf, string, offset, len) =>
+      buf.base64urlWrite(string, offset, len),
+    slice: (buf, start, end) => buf.base64urlSlice(start, end),
+    indexOf: (buf, val, byteOffset, dir) =>
+      indexOfBuffer(buf,
+                    fromStringFast(val, encodingOps.base64url),
+                    byteOffset,
+                    encodingsMap.base64url,
+                    dir)
+  },
   hex: {
     encoding: 'hex',
     encodingVal: encodingsMap.hex,
@@ -715,6 +729,11 @@ function getEncodingOps(encoding) {
       if (encoding === 'hex' || StringPrototypeToLowerCase(encoding) === 'hex')
         return encodingOps.hex;
       break;
+    case 9:
+      if (encoding === 'base64url' ||
+          StringPrototypeToLowerCase(encoding) === 'base64url')
+        return encodingOps.base64url;
+      break;
   }
 }
 

diff --git a/lib/internal/buffer.js b/lib/internal/buffer.js
@@ -18,12 +18,14 @@ const { validateNumber } = require('internal/validators');
 const {
   asciiSlice,
   base64Slice,
+  base64urlSlice,
   latin1Slice,
   hexSlice,
   ucs2Slice,
   utf8Slice,
   asciiWrite,
   base64Write,
+  base64urlWrite,
   latin1Write,
   hexWrite,
   ucs2Write,
@@ -1026,12 +1028,14 @@ function addBufferPrototypeMethods(proto) {
 
   proto.asciiSlice = asciiSlice;
   proto.base64Slice = base64Slice;
+  proto.base64urlSlice = base64urlSlice;
   proto.latin1Slice = latin1Slice;
   proto.hexSlice = hexSlice;
   proto.ucs2Slice = ucs2Slice;
   proto.utf8Slice = utf8Slice;
   proto.asciiWrite = asciiWrite;
   proto.base64Write = base64Write;
+  proto.base64urlWrite = base64urlWrite;
   proto.latin1Write = latin1Write;
   proto.hexWrite = hexWrite;
   proto.ucs2Write = ucs2Write;

diff --git a/lib/internal/util.js b/lib/internal/util.js
@@ -177,6 +177,11 @@ function slowCases(enc) {
         `${enc}`.toLowerCase() === 'utf-16le')
         return 'utf16le';
       break;
+    case 9:
+      if (enc === 'base64url' || enc === 'BASE64URL' ||
+          `${enc}`.toLowerCase() === 'base64url')
+        return 'base64url';
+      break;
     default:
       if (enc === '') return 'utf8';
   }

diff --git a/src/api/encoding.cc b/src/api/encoding.cc
@@ -68,13 +68,17 @@ enum encoding ParseEncoding(const char* encoding,
       } else if (encoding[1] == 'a') {
         if (strncmp(encoding + 2, "se64", 5) == 0)
           return BASE64;
+        if (strncmp(encoding + 2, "se64url", 8) == 0)
+          return BASE64URL;
       }
       if (StringEqualNoCase(encoding, "binary"))
         return LATIN1;  // BINARY is a deprecated alias of LATIN1.
       if (StringEqualNoCase(encoding, "buffer"))
         return BUFFER;
       if (StringEqualNoCase(encoding, "base64"))
         return BASE64;
+      if (StringEqualNoCase(encoding, "base64url"))
+        return BASE64URL;
       break;
 
     case 'a':

diff --git a/src/base64-inl.h b/src/base64-inl.h
@@ -123,12 +123,13 @@ size_t base64_decode(char* const dst, const size_t dstlen,
 inline size_t base64_encode(const char* src,
                             size_t slen,
                             char* dst,
-                            size_t dlen) {
+                            size_t dlen,
+                            Base64Mode mode) {
   // We know how much we'll write, just make sure that there's space.
-  CHECK(dlen >= base64_encoded_size(slen) &&
+  CHECK(dlen >= base64_encoded_size(slen, mode) &&
         "not enough space provided for base64 encode");
 
-  dlen = base64_encoded_size(slen);
+  dlen = base64_encoded_size(slen, mode);
 
   unsigned a;
   unsigned b;
@@ -137,9 +138,7 @@ inline size_t base64_encode(const char* src,
   unsigned k;
   unsigned n;
 
-  static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-                              "abcdefghijklmnopqrstuvwxyz"
-                              "0123456789+/";
+  const char* table = base64_select_table(mode);
 
   i = 0;
   k = 0;
@@ -164,16 +163,19 @@ inline size_t base64_encode(const char* src,
       a = src[i + 0] & 0xff;
       dst[k + 0] = table[a >> 2];
       dst[k + 1] = table[(a & 3) << 4];
-      dst[k + 2] = '=';
-      dst[k + 3] = '=';
+      if (mode == Base64Mode::NORMAL) {
+        dst[k + 2] = '=';
+        dst[k + 3] = '=';
+      }
       break;
     case 2:
       a = src[i + 0] & 0xff;
       b = src[i + 1] & 0xff;
       dst[k + 0] = table[a >> 2];
       dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
       dst[k + 2] = table[(b & 0x0f) << 2];
-      dst[k + 3] = '=';
+      if (mode == Base64Mode::NORMAL)
+        dst[k + 3] = '=';
       break;
   }
 

diff --git a/src/base64.h b/src/base64.h
@@ -5,13 +5,40 @@
 
 #include "util.h"
 
+#include <cmath>
 #include <cstddef>
 #include <cstdint>
 
 namespace node {
 //// Base 64 ////
-static inline constexpr size_t base64_encoded_size(size_t size) {
-  return ((size + 2) / 3 * 4);
+
+enum class Base64Mode {
+  NORMAL,
+  URL
+};
+
+static constexpr char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                       "abcdefghijklmnopqrstuvwxyz"
+                                       "0123456789+/";
+
+static constexpr char base64_table_url[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                           "abcdefghijklmnopqrstuvwxyz"
+                                           "0123456789-_";
+
+static inline const char* base64_select_table(Base64Mode mode) {
+  switch (mode) {
+    case Base64Mode::NORMAL: return base64_table;
+    case Base64Mode::URL: return base64_table_url;
+    default: UNREACHABLE();
+  }
+}
+
+static inline constexpr size_t base64_encoded_size(
+    size_t size,
+    Base64Mode mode = Base64Mode::NORMAL) {
+  return mode == Base64Mode::NORMAL
+      ? ((size + 2) / 3 * 4)
+      : std::ceil(static_cast<double>(size * 4) / 3);
 }
 
 // Doesn't check for padding at the end.  Can be 1-2 bytes over.
@@ -32,7 +59,8 @@ size_t base64_decode(char* const dst, const size_t dstlen,
 inline size_t base64_encode(const char* src,
                             size_t slen,
                             char* dst,
-                            size_t dlen);
+                            size_t dlen,
+                            Base64Mode mode = Base64Mode::NORMAL);
 }  // namespace node
 
 

diff --git a/src/node.h b/src/node.h
@@ -664,7 +664,18 @@ inline void NODE_SET_PROTOTYPE_METHOD(v8::Local<v8::FunctionTemplate> recv,
 #define NODE_SET_PROTOTYPE_METHOD node::NODE_SET_PROTOTYPE_METHOD
 
 // BINARY is a deprecated alias of LATIN1.
-enum encoding {ASCII, UTF8, BASE64, UCS2, BINARY, HEX, BUFFER, LATIN1 = BINARY};
+// BASE64URL is not currently exposed to the JavaScript side.
+enum encoding {
+  ASCII,
+  UTF8,
+  BASE64,
+  UCS2,
+  BINARY,
+  HEX,
+  BUFFER,
+  BASE64URL,
+  LATIN1 = BINARY
+};
 
 NODE_EXTERN enum encoding ParseEncoding(
     v8::Isolate* isolate,

diff --git a/src/node_buffer.cc b/src/node_buffer.cc
@@ -1163,13 +1163,15 @@ void Initialize(Local<Object> target,
 
   env->SetMethodNoSideEffect(target, "asciiSlice", StringSlice<ASCII>);
   env->SetMethodNoSideEffect(target, "base64Slice", StringSlice<BASE64>);
+  env->SetMethodNoSideEffect(target, "base64urlSlice", StringSlice<BASE64URL>);
   env->SetMethodNoSideEffect(target, "latin1Slice", StringSlice<LATIN1>);
   env->SetMethodNoSideEffect(target, "hexSlice", StringSlice<HEX>);
   env->SetMethodNoSideEffect(target, "ucs2Slice", StringSlice<UCS2>);
   env->SetMethodNoSideEffect(target, "utf8Slice", StringSlice<UTF8>);
 
   env->SetMethod(target, "asciiWrite", StringWrite<ASCII>);
   env->SetMethod(target, "base64Write", StringWrite<BASE64>);
+  env->SetMethod(target, "base64urlWrite", StringWrite<BASE64URL>);
   env->SetMethod(target, "latin1Write", StringWrite<LATIN1>);
   env->SetMethod(target, "hexWrite", StringWrite<HEX>);
   env->SetMethod(target, "ucs2Write", StringWrite<UCS2>);

diff --git a/src/string_bytes.cc b/src/string_bytes.cc
@@ -358,6 +358,8 @@ size_t StringBytes::Write(Isolate* isolate,
       break;
     }
 
+    case BASE64URL:
+      // Fall through
     case BASE64:
       if (str->IsExternalOneByte()) {
         auto ext = str->GetExternalOneByteStringResource();
@@ -425,6 +427,8 @@ Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
       data_size = str->Length() * sizeof(uint16_t);
       break;
 
+    case BASE64URL:
+      // Fall through
     case BASE64:
       data_size = base64_decoded_size_fast(str->Length());
       break;
@@ -466,6 +470,8 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
     case UCS2:
       return Just(str->Length() * sizeof(uint16_t));
 
+    case BASE64URL:
+      // Fall through
     case BASE64: {
       String::Value value(isolate, str);
       return Just(base64_decoded_size(*value, value.length()));
@@ -691,6 +697,20 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
       return ExternOneByteString::New(isolate, dst, dlen, error);
     }
 
+    case BASE64URL: {
+      size_t dlen = base64_encoded_size(buflen, Base64Mode::URL);
+      char* dst = node::UncheckedMalloc(dlen);
+      if (dst == nullptr) {
+        *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
+        return MaybeLocal<Value>();
+      }
+
+      size_t written = base64_encode(buf, buflen, dst, dlen, Base64Mode::URL);
+      CHECK_EQ(written, dlen);
+
+      return ExternOneByteString::New(isolate, dst, dlen, error);
+    }
+
     case HEX: {
       size_t dlen = buflen * 2;
       char* dst = node::UncheckedMalloc(dlen);

diff --git a/src/string_decoder.cc b/src/string_decoder.cc
@@ -69,7 +69,10 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
 
   size_t nread = *nread_ptr;
 
-  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
+  if (Encoding() == UTF8 ||
+      Encoding() == UCS2 ||
+      Encoding() == BASE64 ||
+      Encoding() == BASE64URL) {
     // See if we want bytes to finish a character from the previous
     // chunk; if so, copy the new bytes to the missing bytes buffer
     // and create a small string from it that is to be prepended to the
@@ -197,7 +200,7 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
           state_[kBufferedBytes] = 2;
           state_[kMissingBytes] = 2;
         }
-      } else if (Encoding() == BASE64) {
+      } else if (Encoding() == BASE64 || Encoding() == BASE64URL) {
         state_[kBufferedBytes] = nread % 3;
         if (state_[kBufferedBytes] > 0)
           state_[kMissingBytes] = 3 - BufferedBytes();
@@ -310,6 +313,7 @@ void InitializeStringDecoder(Local<Object> target,
   ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
   ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
   ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
+  ADD_TO_ENCODINGS_ARRAY(BASE64URL, "base64url");
   ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
   ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
   ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");

diff --git a/test/addons/parse-encoding/binding.cc b/test/addons/parse-encoding/binding.cc
@@ -6,6 +6,7 @@ namespace {
 #define ENCODING_MAP(V) \
   V(ASCII)              \
   V(BASE64)             \
+  V(BASE64URL)          \
   V(BUFFER)             \
   V(HEX)                \
   V(LATIN1)             \

diff --git a/test/addons/parse-encoding/test.js b/test/addons/parse-encoding/test.js
@@ -8,6 +8,7 @@ assert.strictEqual(parseEncoding(''), 'UNKNOWN');
 
 assert.strictEqual(parseEncoding('ascii'), 'ASCII');
 assert.strictEqual(parseEncoding('base64'), 'BASE64');
+assert.strictEqual(parseEncoding('base64url'), 'BASE64URL');
 assert.strictEqual(parseEncoding('binary'), 'LATIN1');
 assert.strictEqual(parseEncoding('buffer'), 'BUFFER');
 assert.strictEqual(parseEncoding('hex'), 'HEX');

diff --git a/test/cctest/test_base64.cc b/test/cctest/test_base64.cc
@@ -44,6 +44,20 @@ TEST(Base64Test, Encode) {
        "IGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg==");
 }
 
+TEST(Base64Test, EncodeURL) {
+  auto test = [](const char* string, const char* base64_string) {
+    const size_t len = strlen(base64_string);
+    char* const buffer = new char[len + 1];
+    buffer[len] = 0;
+    base64_encode(string, strlen(string), buffer, len, node::Base64Mode::URL);
+    EXPECT_STREQ(base64_string, buffer);
+    delete[] buffer;
+  };
+
+  test("\x68\xd9\x16\x25\x5c\x1e\x40\x92\x2d\xfb", "aNkWJVweQJIt-w");
+  test("\xac\xc7\x93\xaa\x83\x6f\xc3\xe3\x3f\x75", "rMeTqoNvw-M_dQ");
+}
+
 TEST(Base64Test, Decode) {
   auto test = [](const char* base64_string, const char* string) {
     const size_t len = strlen(string);
@@ -75,6 +89,7 @@ TEST(Base64Test, Decode) {
   test("YWJj ZGVm", "abcdef");
   test("Y W J j Z G V m", "abcdef");
   test("Y   W\n JjZ \nG Vm", "abcdef");
+  test("rMeTqoNvw-M_dQ", "\xac\xc7\x93\xaa\x83\x6f\xc3\xe3\x3f\x75");
 
   const char* text =
       "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do "