Skip to content

[ntuple] support rules with source class != dest class #18357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 14, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tree/ntuple/v7/inc/ROOT/RNTupleReader.hxx
Original file line number Diff line number Diff line change
@@ -50,7 +50,7 @@ enum class ENTupleInfo {
\ingroup NTuple
\brief Reads RNTuple data from storage

The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel.
The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel.
It infers this model from the RNTuple's on-disk metadata, or uses a model imposed by the user.
The latter case allows users to read into a specialized RNTuple model that covers
only a subset of the fields in the RNTuple. The RNTuple model is used when reading complete entries through LoadEntry().
34 changes: 23 additions & 11 deletions tree/ntuple/v7/src/RFieldMeta.cxx
Original file line number Diff line number Diff line change
@@ -26,6 +26,7 @@
#include <TBaseClass.h>
#include <TBufferFile.h>
#include <TClass.h>
#include <TClassEdit.h>
#include <TDataMember.h>
#include <TEnum.h>
#include <TObject.h>
@@ -205,13 +206,15 @@ std::vector<const ROOT::TSchemaRule *> ROOT::RClassField::FindRules(const ROOT::
// of the class
rules = ruleset->FindRules(fClass->GetName(), fClass->GetClassVersion(), fClass->GetCheckSum());
} else {
// We need to change (back) the name normalization from RNTuple to ROOT Meta
std::string normalizedName;
TClassEdit::GetNormalizedName(normalizedName, fieldDesc->GetTypeName());
// We do have an on-disk field that correspond to the current RClassField instance. Ask for rules matching the
// on-disk version of the field.
if (fieldDesc->GetTypeChecksum()) {
rules =
ruleset->FindRules(fieldDesc->GetTypeName(), fieldDesc->GetTypeVersion(), *fieldDesc->GetTypeChecksum());
rules = ruleset->FindRules(normalizedName, fieldDesc->GetTypeVersion(), *fieldDesc->GetTypeChecksum());
} else {
rules = ruleset->FindRules(fieldDesc->GetTypeName(), fieldDesc->GetTypeVersion());
rules = ruleset->FindRules(normalizedName, fieldDesc->GetTypeVersion());
}
}

@@ -319,10 +322,15 @@ void ROOT::RClassField::SetStagingClass(const std::string &className, unsigned i
TClass::GetClass(className.c_str())->GetStreamerInfo(classVersion);
if (classVersion != GetTypeVersion()) {
fStagingClass = TClass::GetClass((className + std::string("@@") + std::to_string(classVersion)).c_str());
if (!fStagingClass) {
// For a rename rule, we may simply ask for the old class name
fStagingClass = TClass::GetClass(className.c_str());
}
} else {
fStagingClass = fClass;
}
R__ASSERT(fStagingClass);
R__ASSERT(static_cast<unsigned int>(fStagingClass->GetClassVersion()) == classVersion);
}

void ROOT::RClassField::PrepareStagingArea(const std::vector<const TSchemaRule *> &rules,
@@ -367,7 +375,10 @@ void ROOT::RClassField::PrepareStagingArea(const std::vector<const TSchemaRule *
void ROOT::RClassField::AddReadCallbacksFromIORule(const TSchemaRule *rule)
{
auto func = rule->GetReadFunctionPointer();
R__ASSERT(func != nullptr);
if (func == nullptr) {
// Can happen for rename rules
return;
}
fReadCallbacks.emplace_back([func, stagingClass = fStagingClass, stagingArea = fStagingArea.get()](void *target) {
TVirtualObject onfileObj{nullptr};
onfileObj.fClass = stagingClass;
@@ -394,19 +405,20 @@ void ROOT::RClassField::BeforeConnectPageSource(ROOT::Internal::RPageSource &pag
const ROOT::RNTupleDescriptor &desc = descriptorGuard.GetRef();
const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId());

// Check that we have the same type.
// TODO(jblomer): relax for class rename rule
if (GetTypeName() != fieldDesc.GetTypeName()) {
throw RException(R__FAIL("incompatible type name for field " + GetFieldName() + ": " + GetTypeName() +
" vs. " + fieldDesc.GetTypeName()));
}

for (auto linkId : fieldDesc.GetLinkIds()) {
const auto &subFieldDesc = desc.GetFieldDescriptor(linkId);
regularSubfields.insert(subFieldDesc.GetFieldName());
}

rules = FindRules(&fieldDesc);

// If the field's type name is not the on-disk name but we found a rule, we know it is valid to read
// on-disk data because we found the rule according to the on-disk (source) type name and version/checksum.
if ((GetTypeName() != fieldDesc.GetTypeName()) && rules.empty()) {
throw RException(R__FAIL("incompatible type name for field " + GetFieldName() + ": " + GetTypeName() +
" vs. " + fieldDesc.GetTypeName()));
}

if (!rules.empty()) {
SetStagingClass(fieldDesc.GetTypeName(), fieldDesc.GetTypeVersion());
PrepareStagingArea(rules, desc, fieldDesc);
16 changes: 14 additions & 2 deletions tree/ntuple/v7/src/RNTupleView.cxx
Original file line number Diff line number Diff line change
@@ -19,22 +19,34 @@
#include <ROOT/RNTupleView.hxx>
#include <ROOT/RPageStorage.hxx>

#include <deque>

ROOT::RNTupleGlobalRange
ROOT::Internal::GetFieldRange(const ROOT::RFieldBase &field, const ROOT::Internal::RPageSource &pageSource)
{
const auto &desc = pageSource.GetSharedDescriptorGuard().GetRef();

auto fnGetPrincipalColumnId = [&desc](ROOT::DescriptorId_t fieldId) -> ROOT::DescriptorId_t {
R__ASSERT(fieldId != ROOT::kInvalidDescriptorId);
auto columnIterable = desc.GetColumnIterable(fieldId);
return (columnIterable.size() > 0) ? columnIterable.begin()->GetPhysicalId() : ROOT::kInvalidDescriptorId;
};

auto columnId = fnGetPrincipalColumnId(field.GetOnDiskId());
if (columnId == ROOT::kInvalidDescriptorId) {
for (const auto &f : field) {
columnId = fnGetPrincipalColumnId(f.GetOnDiskId());
// We need to iterate the field descriptor tree, not the sub fields of `field`, because in the presence of
// read rules, the in-memory sub fields may be artificial and not have valid on-disk IDs.
const auto &linkIds = desc.GetFieldDescriptor(field.GetOnDiskId()).GetLinkIds();
std::deque<ROOT::DescriptorId_t> subFields(linkIds.begin(), linkIds.end());
while (!subFields.empty()) {
auto subFieldId = subFields.front();
subFields.pop_front();
columnId = fnGetPrincipalColumnId(subFieldId);
if (columnId != ROOT::kInvalidDescriptorId)
break;

const auto &subLinkIds = desc.GetFieldDescriptor(subFieldId).GetLinkIds();
subFields.insert(subFields.end(), subLinkIds.begin(), subLinkIds.end());
}
}

15 changes: 15 additions & 0 deletions tree/ntuple/v7/test/CustomStruct.hxx
Original file line number Diff line number Diff line change
@@ -259,6 +259,11 @@ struct StructWithIORules : StructWithIORulesBase {
StructWithIORules(float _a, char _c[4]) : StructWithIORulesBase{_a, 0.0f}, s{{_c[0], _c[1], _c[2], _c[3]}, {}} {}
};

struct OldCoordinates {
float fOldX;
float fOldY;
};

struct CoordinatesWithIORules {
float fX;
float fY;
@@ -271,6 +276,16 @@ struct LowPrecisionFloatWithIORules {
float fLast8BitsZero; // the I/O rule will "randomize" the last 8 bits
};

template <typename T>
struct OldName {
T fValue;
};

template <typename T>
struct NewName {
T fValue;
};

struct Cyclic {
std::vector<Cyclic> fMember;
};
14 changes: 14 additions & 0 deletions tree/ntuple/v7/test/CustomStructLinkDef.h
Original file line number Diff line number Diff line change
@@ -100,12 +100,20 @@
#pragma read sourceClass = "StructWithIORules" source = "" checksum = "[1]" targetClass = "StructWithIORules" target = \
"checksumB" code = "{ checksumB = 0.0; }"

#pragma link C++ options = version(3) class OldCoordinates + ;

#pragma link C++ options = version(3) class CoordinatesWithIORules + ;

#pragma read sourceClass = "CoordinatesWithIORules" source = "float fX; float fY" version = "[3]" targetClass = \
"CoordinatesWithIORules" target = "fPhi,fR" include = "cmath" code = \
"{ fR = sqrt(onfile.fX * onfile.fX + onfile.fY * onfile.fY); fPhi = atan2(onfile.fY, onfile.fX); }"

#pragma read sourceClass = "OldCoordinates" source = "float fOldX; float fOldY" version = "[3]" targetClass = \
"CoordinatesWithIORules" target = "fX,fY,fPhi,fR" include = "cmath" code = \
"{ fR = sqrt(onfile.fOldX * onfile.fOldX + onfile.fOldY * onfile.fOldY); \
fPhi = atan2(onfile.fOldY, onfile.fOldX); \
fX = onfile.fOldX; fY = onfile.fOldY; }"

#pragma link C++ options = version(3) class LowPrecisionFloatWithIORules + ;

#pragma read sourceClass = "LowPrecisionFloatWithIORules" source = "float fLast8BitsZero;" version = \
@@ -114,6 +122,12 @@
bits |= 137; /* placeholder for randomizing the 8 LSBs */ \
std::memcpy(&fLast8BitsZero, &bits, sizeof(fLast8BitsZero)); }"

#pragma link C++ options = version(3) class OldName < int> + ;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#pragma link C++ options=version(3) class OldName<int>+;

is more typical white spacing.

#pragma link C++ options = version(3) class OldName < OldName < int>> + ;
#pragma link C++ options = version(3) class NewName < int> + ;
#pragma link C++ options = version(3) class NewName < NewName < int>> + ;
#pragma read sourceClass = "OldName<OldName<int>>" targetClass = "NewName<OldName<int>>" version = "[3]"

#pragma link C++ class Cyclic + ;
#pragma link C++ class CyclicCollectionProxy + ;
#pragma link C++ class Unsupported + ;
15 changes: 13 additions & 2 deletions tree/ntuple/v7/test/rfield_class.cxx
Original file line number Diff line number Diff line change
@@ -250,11 +250,14 @@ TEST(RNTuple, TClassReadRules)
auto model = RNTupleModel::Create();
auto ptrClass = model->MakeField<StructWithIORules>("class");
auto ptrCoord = model->MakeField<CoordinatesWithIORules>("coord");
auto ptrOldCoord = model->MakeField<OldCoordinates>("oldCoord");
auto ptrLowPrecisionFloat = model->MakeField<LowPrecisionFloatWithIORules>("lowPrecisionFloat");
ptrCoord->fX = 1.0;
ptrCoord->fY = 1.0;
auto ptrOldName = model->MakeField<OldName<OldName<int>>>("rename");
ptrCoord->fX = ptrOldCoord->fOldX = 1.0;
ptrCoord->fY = ptrOldCoord->fOldY = 1.0;
ptrLowPrecisionFloat->fFoo = 1.0;
ptrLowPrecisionFloat->fLast8BitsZero = last8BitsZero;
ptrOldName->fValue.fValue = 42;
auto writer = RNTupleWriter::Recreate(std::move(model), "f", fileGuard.GetPath());
for (int i = 0; i < 5; i++) {
*ptrClass = StructWithIORules{/*a=*/static_cast<float>(i), /*chars=*/c};
@@ -294,4 +297,12 @@ TEST(RNTuple, TClassReadRules)
EXPECT_FLOAT_EQ(1.0, viewLowPrecisionFloat(0).fFoo);
EXPECT_NE(last8BitsZero, viewLowPrecisionFloat(0).fLast8BitsZero);
EXPECT_NEAR(2.0, viewLowPrecisionFloat(0).fLast8BitsZero, 0.001);
auto viewOldCoordTransformed = reader->GetView<CoordinatesWithIORules>("oldCoord");
EXPECT_FLOAT_EQ(1.0, viewOldCoordTransformed(0).fX);
EXPECT_FLOAT_EQ(1.0, viewOldCoordTransformed(0).fY);
EXPECT_FLOAT_EQ(sqrt(2), viewOldCoordTransformed(0).fR);
EXPECT_FLOAT_EQ(M_PI / 4., viewOldCoordTransformed(0).fPhi);

auto viewRename = reader->GetView<NewName<OldName<int>>>("rename");
EXPECT_EQ(42, viewRename(0).fValue.fValue);
}

Unchanged files with check annotations Beta

zmemzero((uint8_t *)s->head, (unsigned)(s->hash_size)*sizeof(*s->head));
/* ========================================================================= */
int ZEXPORT deflateInit_(strm, level, version, stream_size)

Check warning on line 1243 in builtins/zlib/deflate_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
z_streamp strm;
int level;
const char *version;
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
/* ========================================================================= */
local unsigned long crc32_generic(crc, buf, len)

Check warning on line 243 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
unsigned long crc;
const unsigned char FAR *buf;
uInt len;
return crc ^ 0xffffffffUL;
}
uLong crc32(crc, buf, len)

Check warning on line 277 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
uLong crc;
const Bytef *buf;
uInt len;
#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
/* ========================================================================= */
local unsigned long crc32_little(crc, buf, len)

Check warning on line 294 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
unsigned long crc;
const unsigned char FAR *buf;
unsigned len;
#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
/* ========================================================================= */
local unsigned long crc32_big(crc, buf, len)

Check warning on line 334 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
unsigned long crc;
const unsigned char FAR *buf;
unsigned len;
#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
/* ========================================================================= */
local unsigned long gf2_matrix_times(mat, vec)

Check warning on line 374 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
unsigned long *mat;
unsigned long vec;
{
}
/* ========================================================================= */
local void gf2_matrix_square(square, mat)

Check warning on line 391 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
unsigned long *square;
unsigned long *mat;
{
}
/* ========================================================================= */
local uLong crc32_combine_(crc1, crc2, len2)

Check warning on line 402 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
uLong crc1;
uLong crc2;
z_off64_t len2;
}
/* ========================================================================= */
uLong ZEXPORT crc32_combine(crc1, crc2, len2)

Check warning on line 458 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
uLong crc1;
uLong crc2;
z_off_t len2;
return crc32_combine_(crc1, crc2, len2);
}
uLong ZEXPORT crc32_combine64(crc1, crc2, len2)

Check warning on line 466 in builtins/zlib/crc32_cf.c

GitHub Actions / fedora42 CMAKE_CXX_STANDARD=20

old-style function definition [-Wold-style-definition]
uLong crc1;
uLong crc2;
z_off64_t len2;