Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 27 additions & 16 deletions cpp/src/parquet/encryption/aes_encryption.cc
Original file line number Diff line number Diff line change
Expand Up @@ -320,36 +320,47 @@ int32_t AesEncryptor::CtrEncrypt(
return length_buffer_length_ + buffer_size;
}

int32_t AesEncryptorFactory::MapKeyLenToEncryptorArrayIndex(int32_t key_len) {
if (key_len == 16)
return 0;
else if (key_len == 24)
return 1;
else if (key_len == 32)
return 2;
throw ParquetException("encryption key must be 16, 24 or 32 bytes in length");
uint64_t AesEncryptorFactory::MakeCacheKey(
ParquetCipher::type alg_id, int32_t key_len, bool metadata) {
uint64_t key = 0;
// Set the algorithm id in the most significant 32 bits.
key |= static_cast<uint64_t>(static_cast<uint32_t>(alg_id)) << 32;
Copy link
Collaborator

@avalerio-tkd avalerio-tkd Aug 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I absolutely love this!

Let's just add a comment for each bit encoded field.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

// Set the key length in the next 8 bits.
key |= static_cast<uint64_t>(static_cast<uint8_t>(key_len));
// Set the metadata flag in the next 8 bits.
key |= static_cast<uint64_t>(metadata ? 1 : 0) << 8;
return key;
}

AesEncryptor* AesEncryptorFactory::GetMetaAesEncryptor(
ParquetCipher::type alg_id, int32_t key_size) {
auto key_len = static_cast<int32_t>(key_size);
int index = MapKeyLenToEncryptorArrayIndex(key_len);
// Create the cache key using the algorithm id, key length, and metadata flag
// to avoid collisions for encryptors with the same key length.
uint64_t cache_key = MakeCacheKey(alg_id, key_len, /*metadata=*/true);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the last param commented out? /*metadata=*/true

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not, it's just the comment as to what the parameter is.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, yes. Misread it.


if (meta_encryptor_cache_[index] == nullptr) {
meta_encryptor_cache_[index] = AesEncryptor::Make(alg_id, key_len, true);
// If no encryptor exists for this cache key, create one.
if (encryptor_cache_.find(cache_key) == encryptor_cache_.end()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is essentially the same as before but in find/end format, right? Or is there more to it?
Let's add a comment either way.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's the same. Added comment.

encryptor_cache_[cache_key] = AesEncryptor::Make(
alg_id, key_len, /*metadata=*/true);
}
return meta_encryptor_cache_[index].get();

return encryptor_cache_[cache_key].get();
}

AesEncryptor* AesEncryptorFactory::GetDataAesEncryptor(
ParquetCipher::type alg_id, int32_t key_size) {
auto key_len = static_cast<int32_t>(key_size);
int index = MapKeyLenToEncryptorArrayIndex(key_len);
// Create the cache key using the algorithm id, key length, and metadata flag
// to avoid collisions for encryptors with the same key length.
uint64_t cache_key = MakeCacheKey(alg_id, key_len, /*metadata=*/false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah! We need a comment here since it's the critical part that changed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. Also check the comment in the .h file


if (data_encryptor_cache_[index] == nullptr) {
data_encryptor_cache_[index] = AesEncryptor::Make(alg_id, key_len, false);
// If no encryptor exists for this cache key, create one.
if (encryptor_cache_.find(cache_key) == encryptor_cache_.end()) {
encryptor_cache_[cache_key] = AesEncryptor::Make(
alg_id, key_len, /*metadata=*/false);
}
return data_encryptor_cache_[index].get();
return encryptor_cache_[cache_key].get();
}

AesDecryptor::AesDecryptor(
Expand Down
9 changes: 4 additions & 5 deletions cpp/src/parquet/encryption/aes_encryption.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,11 @@ class AesEncryptorFactory {
AesEncryptor* GetDataAesEncryptor(ParquetCipher::type alg_id, int32_t key_size);

private:
/// Map the key length to the index of the encryptor array. Since only 16, 24, or 32 bytes
/// are allowed for key length, these correspond to the indices 0, 1, and 2.
int32_t MapKeyLenToEncryptorArrayIndex(int32_t key_len);
/// Build a cache key including algorithm id, key length, and metadata flag.
static uint64_t MakeCacheKey(
ParquetCipher::type alg_id, int32_t key_len, bool metadata);

std::unique_ptr<AesEncryptor> meta_encryptor_cache_[3];
std::unique_ptr<AesEncryptor> data_encryptor_cache_[3];
std::unordered_map<uint64_t, std::unique_ptr<AesEncryptor>> encryptor_cache_;
};

/// Performs AES decryption operations with GCM or CTR ciphers.
Expand Down
Loading