crypto, string_bytes: treat buffer str as utf8

Do not treat crypto inputs as `binary` strings, convert them to Buffers using `new Buffer(..., 'utf8')`, or using newly updated StringBytes APIs. PR-URL: #5522 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: James M Snell <jasnell@gmail.com>
nodejs · Mar 2, 2016 · b010c87 · b010c87 · indutny · Apr 26, 2016
1 parent 0eda5f5
commit b010c87
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 20 deletions.
diff --git a/doc/api/crypto.markdown b/doc/api/crypto.markdown
@@ -601,7 +601,7 @@ called. Multiple calls will cause an error to be thrown.
 Updates the hash content with the given `data`, the encoding of which
 is given in `input_encoding` and can be `'utf8'`, `'ascii'` or
 `'binary'`. If `encoding` is not provided, and the `data` is a string, an
-encoding of `'binary'` is enforced. If `data` is a [`Buffer`][] then
+encoding of `'utf8'` is enforced. If `data` is a [`Buffer`][] then
 `input_encoding` is ignored.
 
 This can be called many times with new data as it is streamed.
@@ -811,7 +811,7 @@ or [buffers][`Buffer`]. The default value is `'buffer'`, which makes methods
 default to [`Buffer`][] objects.
 
 The `crypto.DEFAULT_ENCODING` mechanism is provided for backwards compatibility
-with legacy programs that expect `'binary'` to be the default encoding.
+with legacy programs that expect `'utf8'` to be the default encoding.
 
 New applications should expect the default to be `'buffer'`. This property may
 become deprecated in a future Node.js release.

diff --git a/lib/crypto.js b/lib/crypto.js
@@ -30,11 +30,10 @@ const DH_GENERATOR = 2;
 // any explicit encoding in older versions of node, and we don't want
 // to break them unnecessarily.
 function toBuf(str, encoding) {
- encoding = encoding || 'binary';
  if (typeof str === 'string') {
- if (encoding === 'buffer')
- encoding = 'binary';
- str = new Buffer(str, encoding);
+ if (encoding === 'buffer' || !encoding)
+ encoding = 'utf8';
+ return new Buffer(str, encoding);
  }
  return str;
 }
@@ -67,8 +66,6 @@ Hash.prototype._flush = function(callback) {
 
 Hash.prototype.update = function(data, encoding) {
  encoding = encoding || exports.DEFAULT_ENCODING;
- if (encoding === 'buffer' && typeof data === 'string')
- encoding = 'binary';
  this._handle.update(data, encoding);
  return this;
 };

diff --git a/src/node_crypto.cc b/src/node_crypto.cc
@@ -3369,7 +3369,7 @@ void CipherBase::Update(const FunctionCallbackInfo<Value>& args) {
  // Only copy the data if we have to, because it's a string
  if (args[0]->IsString()) {
  StringBytes::InlineDecoder decoder;
- if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+ if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
  return;
  r = cipher->Update(decoder.out(), decoder.size(), &out, &out_len);
  } else {
@@ -3548,7 +3548,7 @@ void Hmac::HmacUpdate(const FunctionCallbackInfo<Value>& args) {
  bool r;
  if (args[0]->IsString()) {
  StringBytes::InlineDecoder decoder;
- if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+ if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
  return;
  r = hmac->HmacUpdate(decoder.out(), decoder.size());
  } else {
@@ -3666,7 +3666,7 @@ void Hash::HashUpdate(const FunctionCallbackInfo<Value>& args) {
  bool r;
  if (args[0]->IsString()) {
  StringBytes::InlineDecoder decoder;
- if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+ if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
  return;
  r = hash->HashUpdate(decoder.out(), decoder.size());
  } else {
@@ -3818,7 +3818,7 @@ void Sign::SignUpdate(const FunctionCallbackInfo<Value>& args) {
  Error err;
  if (args[0]->IsString()) {
  StringBytes::InlineDecoder decoder;
- if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+ if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
  return;
  err = sign->SignUpdate(decoder.out(), decoder.size());
  } else {
@@ -4020,7 +4020,7 @@ void Verify::VerifyUpdate(const FunctionCallbackInfo<Value>& args) {
  Error err;
  if (args[0]->IsString()) {
  StringBytes::InlineDecoder decoder;
- if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+ if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
  return;
  err = verify->VerifyUpdate(decoder.out(), decoder.size());
  } else {
@@ -4119,12 +4119,11 @@ void Verify::VerifyFinal(const FunctionCallbackInfo<Value>& args) {
 
  THROW_AND_RETURN_IF_NOT_STRING_OR_BUFFER(args[1]);
 
- // BINARY works for both buffers and binary strings.
- enum encoding encoding = BINARY;
+ enum encoding encoding = UTF8;
  if (args.Length() >= 3) {
  encoding = ParseEncoding(env->isolate(),
  args[2]->ToString(env->isolate()),
- BINARY);
+ UTF8);
  }
 
  ssize_t hlen = StringBytes::Size(env->isolate(), args[1], encoding);

diff --git a/src/string_bytes.cc b/src/string_bytes.cc
@@ -368,7 +368,6 @@ size_t StringBytes::Write(Isolate* isolate,
  switch (encoding) {
  case ASCII:
  case BINARY:
- case BUFFER:
  if (is_extern && str->IsOneByte()) {
  memcpy(buf, data, nbytes);
  } else {
@@ -379,6 +378,7 @@ size_t StringBytes::Write(Isolate* isolate,
  *chars_written = nbytes;
  break;
 
+ case BUFFER:
  case UTF8:
  nbytes = str->WriteUtf8(buf, buflen, chars_written, flags);
  break;
@@ -480,11 +480,11 @@ size_t StringBytes::StorageSize(Isolate* isolate,
 
  switch (encoding) {
  case BINARY:
- case BUFFER:
  case ASCII:
  data_size = str->Length();
  break;
 
+ case BUFFER:
  case UTF8:
  // A single UCS2 codepoint never takes up more than 3 utf8 bytes.
  // It is an exercise for the caller to decide when a string is
@@ -532,11 +532,11 @@ size_t StringBytes::Size(Isolate* isolate,
 
  switch (encoding) {
  case BINARY:
- case BUFFER:
  case ASCII:
  data_size = str->Length();
  break;
 
+ case BUFFER:
  case UTF8:
  data_size = str->Utf8Length();
  break;

diff --git a/test/parallel/test-crypto-hash.js b/test/parallel/test-crypto-hash.js
@@ -13,7 +13,7 @@ var crypto = require('crypto');
 // Test hashing
 var a1 = crypto.createHash('sha1').update('Test123').digest('hex');
 var a2 = crypto.createHash('sha256').update('Test123').digest('base64');
-var a3 = crypto.createHash('sha512').update('Test123').digest(); // binary
+var a3 = crypto.createHash('sha512').update('Test123').digest(); // buffer
 var a4 = crypto.createHash('sha1').update('Test123').digest('buffer');
 
 // stream interface
@@ -87,3 +87,14 @@ fileStream.on('close', function() {
 assert.throws(function() {
  crypto.createHash('xyzzy');
 });
+
+// Default UTF-8 encoding
+var hutf8 = crypto.createHash('sha512').update('УТФ-8 text').digest('hex');
+assert.equal(
+ hutf8,
+ '4b21bbd1a68e690a730ddcb5a8bc94ead9879ffe82580767ad7ec6fa8ba2dea6' +
+ '43a821af66afa9a45b6a78c712fecf0e56dc7f43aef4bcfc8eb5b4d8dca6ea5b');
+
+assert.notEqual(
+ hutf8,
+ crypto.createHash('sha512').update('УТФ-8 text', 'binary').digest('hex'));