From 114f2f95f2c0eafc520d1a690b1f24342a1cdd49 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Fri, 5 Jan 2024 22:17:00 +0100 Subject: [PATCH] crypto: implement crypto.digest() This patch introduces a helper crypto.digest() that computes a digest from the input at one shot. This can be 20-50% faster than the object-based createHash() for smaller inputs that are hashed in one shot. --- benchmark/crypto/node-digest.js | 42 ++++++++++++++ lib/crypto.js | 2 + lib/internal/crypto/hash.js | 19 +++++++ src/api/encoding.cc | 10 ++++ src/crypto/crypto_hash.cc | 87 ++++++++++++++++++++++++++--- src/crypto/crypto_hash.h | 1 + src/node_internals.h | 4 ++ test/parallel/test-crypto-digest.js | 28 ++++++++++ 8 files changed, 185 insertions(+), 8 deletions(-) create mode 100644 benchmark/crypto/node-digest.js create mode 100644 test/parallel/test-crypto-digest.js diff --git a/benchmark/crypto/node-digest.js b/benchmark/crypto/node-digest.js new file mode 100644 index 00000000000000..75fcb490d88db4 --- /dev/null +++ b/benchmark/crypto/node-digest.js @@ -0,0 +1,42 @@ +'use strict'; + +const common = require('../common.js'); +const { createHash, digest } = require('crypto'); +const path = require('path'); +const filepath = path.resolve(__dirname, '../../test/fixtures/snapshot/typescript.js'); +const fs = require('fs'); +const assert = require('assert'); + +const bench = common.createBenchmark(main, { + length: [1000, 100_000], + method: ['md5', 'sha1', 'sha256'], + type: ['string', 'buffer'], + n: [100_000, 1000], +}, { + combinationFilter: ({ length, n }) => { + return length * n <= 100_000 * 1000; + }, +}); + +function main({ length, type, method, n }) { + let data = fs.readFileSync(filepath); + if (type === 'string') { + data = data.toString().slice(0, length); + } else { + data = Uint8Array.prototype.slice.call(data, 0, length); + } + + const hash = digest ? + (method, input) => digest(method, input, 'hex') : + (method, input) => createHash(method).update(input).digest('hex'); + const array = []; + for (let i = 0; i < n; i++) { + array.push(null); + } + bench.start(); + for (let i = 0; i < n; i++) { + array[i] = hash(method, data); + } + bench.end(n); + assert.strictEqual(typeof array[n - 1], 'string'); +} diff --git a/lib/crypto.js b/lib/crypto.js index ab9b9d99e11b8b..08e8e254cf44d2 100644 --- a/lib/crypto.js +++ b/lib/crypto.js @@ -107,6 +107,7 @@ const { const { Hash, Hmac, + digest, } = require('internal/crypto/hash'); const { X509Certificate, @@ -219,6 +220,7 @@ module.exports = { getFips, setFips, verify: verifyOneShot, + digest, // Classes Certificate, diff --git a/lib/internal/crypto/hash.js b/lib/internal/crypto/hash.js index f3072d61dd0be0..e6c7d4b9295320 100644 --- a/lib/internal/crypto/hash.js +++ b/lib/internal/crypto/hash.js @@ -11,6 +11,7 @@ const { HashJob, Hmac: _Hmac, kCryptoJobAsync, + oneShotDigest, } = internalBinding('crypto'); const { @@ -29,6 +30,8 @@ const { const { lazyDOMException, + normalizeEncoding, + encodingsMap, } = require('internal/util'); const { @@ -47,6 +50,7 @@ const { validateEncoding, validateString, validateUint32, + validateBuffer, } = require('internal/validators'); const { @@ -188,8 +192,23 @@ async function asyncDigest(algorithm, data) { throw lazyDOMException('Unrecognized algorithm name', 'NotSupportedError'); } +function digest(algorithm, input, outputEncoding = 'hex') { + validateString(algorithm, 'algorithm'); + if (typeof input !== 'string') { + validateBuffer(input, 'input'); + } + // Fast case: if it's 'hex', we don't need to validate it further. + if (outputEncoding !== 'hex') { + validateString(outputEncoding); + outputEncoding = normalizeEncoding(outputEncoding) || outputEncoding; + } + return oneShotDigest(algorithm, getCachedHashId(algorithm), getHashCache(), + input, outputEncoding, encodingsMap[outputEncoding]); +} + module.exports = { Hash, Hmac, asyncDigest, + digest, }; diff --git a/src/api/encoding.cc b/src/api/encoding.cc index 3ccfd6c84b7865..0bc3b23a344fa8 100644 --- a/src/api/encoding.cc +++ b/src/api/encoding.cc @@ -109,6 +109,16 @@ enum encoding ParseEncoding(const char* encoding, return default_encoding; } +enum encoding ParseEncoding(Isolate* isolate, + Local encoding_v, + Local encoding_id, + enum encoding default_encoding) { + if (encoding_id->IsUint32()) { + return static_cast(encoding_id.As()->Value()); + } + + return ParseEncoding(isolate, encoding_v, default_encoding); +} enum encoding ParseEncoding(Isolate* isolate, Local encoding_v, diff --git a/src/crypto/crypto_hash.cc b/src/crypto/crypto_hash.cc index 2a709126544833..72b0c0aa3800ae 100644 --- a/src/crypto/crypto_hash.cc +++ b/src/crypto/crypto_hash.cc @@ -23,6 +23,7 @@ using v8::MaybeLocal; using v8::Name; using v8::Nothing; using v8::Object; +using v8::String; using v8::Uint32; using v8::Value; @@ -202,6 +203,71 @@ const EVP_MD* GetDigestImplementation(Environment* env, #endif } +// crypto.digest(algorithm, algorithmId, algorithmCache, +// input, outputEncoding, outputEncodingId) +void Hash::OneShotDigest(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + CHECK_EQ(args.Length(), 6); + CHECK(args[0]->IsString()); // algorithm + CHECK(args[1]->IsInt32()); // algorithmId + CHECK(args[2]->IsObject()); // algorithmCache + CHECK(args[3]->IsString() || args[3]->IsArrayBufferView()); // input + CHECK(args[4]->IsString()); // outputEncoding + CHECK(args[5]->IsUint32() || args[5]->IsUndefined()); // outputEncodingId + + const EVP_MD* md = GetDigestImplementation(env, args[0], args[1], args[2]); + if (md == nullptr) { + Utf8Value method(isolate, args[0]); + std::string message = + "Digest method " + method.ToString() + " is not supported"; + return ThrowCryptoError(env, ERR_get_error(), message.c_str()); + } + + enum encoding output_enc = ParseEncoding(isolate, args[4], args[5], HEX); + + int md_len = EVP_MD_size(md); + unsigned int result_size; + ByteSource::Builder output(md_len); + int success; + // On smaller inputs, EVP_Digest() can be slower than the + // deprecated helpers e.g SHA256_XXX. The speedup may not + // be worth using deprecated APIs, however, so we use + // EVP_Digest(), unless there's a better alternative + // in the future. + // https://github.com/openssl/openssl/issues/19612 + if (args[3]->IsString()) { + Utf8Value utf8(isolate, args[3]); + success = EVP_Digest(utf8.out(), + utf8.length(), + output.data(), + &result_size, + md, + nullptr); + } else { + ArrayBufferViewContents input(args[3]); + success = EVP_Digest(input.data(), + input.length(), + output.data(), + &result_size, + md, + nullptr); + } + if (!success) { + return ThrowCryptoError(env, ERR_get_error()); + } + + Local error; + MaybeLocal rc = StringBytes::Encode( + env->isolate(), output.data(), md_len, output_enc, &error); + if (rc.IsEmpty()) { + CHECK(!error.IsEmpty()); + env->isolate()->ThrowException(error); + return; + } + args.GetReturnValue().Set(rc.FromMaybe(Local())); +} + void Hash::Initialize(Environment* env, Local target) { Isolate* isolate = env->isolate(); Local context = env->context(); @@ -216,6 +282,7 @@ void Hash::Initialize(Environment* env, Local target) { SetMethodNoSideEffect(context, target, "getHashes", GetHashes); SetMethodNoSideEffect(context, target, "getCachedAliases", GetCachedAliases); + SetMethodNoSideEffect(context, target, "oneShotDigest", OneShotDigest); HashJob::Initialize(env, target); @@ -229,6 +296,7 @@ void Hash::RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(HashDigest); registry->Register(GetHashes); registry->Register(GetCachedAliases); + registry->Register(OneShotDigest); HashJob::RegisterExternalReferences(registry); @@ -294,14 +362,17 @@ bool Hash::HashUpdate(const char* data, size_t len) { } void Hash::HashUpdate(const FunctionCallbackInfo& args) { - Decode(args, [](Hash* hash, const FunctionCallbackInfo& args, - const char* data, size_t size) { - Environment* env = Environment::GetCurrent(args); - if (UNLIKELY(size > INT_MAX)) - return THROW_ERR_OUT_OF_RANGE(env, "data is too long"); - bool r = hash->HashUpdate(data, size); - args.GetReturnValue().Set(r); - }); + Decode(args, + [](Hash* hash, + const FunctionCallbackInfo& args, + const char* data, + size_t size) { + Environment* env = Environment::GetCurrent(args); + if (UNLIKELY(size > INT_MAX)) + return THROW_ERR_OUT_OF_RANGE(env, "data is too long"); + bool r = hash->HashUpdate(data, size); + args.GetReturnValue().Set(r); + }); } void Hash::HashDigest(const FunctionCallbackInfo& args) { diff --git a/src/crypto/crypto_hash.h b/src/crypto/crypto_hash.h index a90acc895b97b2..07e3a2ae4635b8 100644 --- a/src/crypto/crypto_hash.h +++ b/src/crypto/crypto_hash.h @@ -26,6 +26,7 @@ class Hash final : public BaseObject { static void GetHashes(const v8::FunctionCallbackInfo& args); static void GetCachedAliases(const v8::FunctionCallbackInfo& args); + static void OneShotDigest(const v8::FunctionCallbackInfo& args); protected: static void New(const v8::FunctionCallbackInfo& args); diff --git a/src/node_internals.h b/src/node_internals.h index 1fa1f72fba9bdc..84a953f942b5ca 100644 --- a/src/node_internals.h +++ b/src/node_internals.h @@ -446,6 +446,10 @@ v8::HeapProfiler::HeapSnapshotOptions GetHeapSnapshotOptions( v8::Local options); } // namespace heap +enum encoding ParseEncoding(v8::Isolate* isolate, + v8::Local encoding_v, + v8::Local encoding_id, + enum encoding default_encoding); } // namespace node #endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS diff --git a/test/parallel/test-crypto-digest.js b/test/parallel/test-crypto-digest.js new file mode 100644 index 00000000000000..87d9e5d30e6993 --- /dev/null +++ b/test/parallel/test-crypto-digest.js @@ -0,0 +1,28 @@ +'use strict'; +const common = require('../common'); + +if (!common.hasCrypto) + common.skip('missing crypto'); + +const assert = require('assert'); +const crypto = require('crypto'); +const fixtures = require('../common/fixtures'); +const fs = require('fs'); + +const methods = crypto.getHashes(); +assert(methods.length > 0); + +function test(input) { + for (const method of methods) { + for (const outputEncoding of ['buffer', 'hex', 'base64', undefined]) { + const oldDigest = crypto.createHash(method).update(input).digest(outputEncoding || 'hex'); + const newDigest = crypto.digest(method, input, outputEncoding); + assert.deepStrictEqual(newDigest, oldDigest, + `different result from ${method} with encoding ${outputEncoding}`); + } + } +} + +const input = fs.readFileSync(fixtures.path('utf8_test_text.txt')); +test(input); +test(input.toString());