Skip to content

[ntuple] support rules with source class != dest class #18357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tree/ntuple/v7/inc/ROOT/RNTupleReader.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ enum class ENTupleInfo {
\ingroup NTuple
\brief Reads RNTuple data from storage

The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel.
The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel.
It infers this model from the RNTuple's on-disk metadata, or uses a model imposed by the user.
The latter case allows users to read into a specialized RNTuple model that covers
only a subset of the fields in the RNTuple. The RNTuple model is used when reading complete entries through LoadEntry().
Expand Down
34 changes: 23 additions & 11 deletions tree/ntuple/v7/src/RFieldMeta.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <TBaseClass.h>
#include <TBufferFile.h>
#include <TClass.h>
#include <TClassEdit.h>
#include <TDataMember.h>
#include <TEnum.h>
#include <TObject.h>
Expand Down Expand Up @@ -205,13 +206,15 @@ std::vector<const ROOT::TSchemaRule *> ROOT::RClassField::FindRules(const ROOT::
// of the class
rules = ruleset->FindRules(fClass->GetName(), fClass->GetClassVersion(), fClass->GetCheckSum());
} else {
// We need to change (back) the name normalization from RNTuple to ROOT Meta
std::string normalizedName;
TClassEdit::GetNormalizedName(normalizedName, fieldDesc->GetTypeName());
// We do have an on-disk field that correspond to the current RClassField instance. Ask for rules matching the
// on-disk version of the field.
if (fieldDesc->GetTypeChecksum()) {
rules =
ruleset->FindRules(fieldDesc->GetTypeName(), fieldDesc->GetTypeVersion(), *fieldDesc->GetTypeChecksum());
rules = ruleset->FindRules(normalizedName, fieldDesc->GetTypeVersion(), *fieldDesc->GetTypeChecksum());
} else {
rules = ruleset->FindRules(fieldDesc->GetTypeName(), fieldDesc->GetTypeVersion());
rules = ruleset->FindRules(normalizedName, fieldDesc->GetTypeVersion());
}
}

Expand Down Expand Up @@ -319,10 +322,15 @@ void ROOT::RClassField::SetStagingClass(const std::string &className, unsigned i
TClass::GetClass(className.c_str())->GetStreamerInfo(classVersion);
if (classVersion != GetTypeVersion()) {
fStagingClass = TClass::GetClass((className + std::string("@@") + std::to_string(classVersion)).c_str());
if (!fStagingClass) {
// For a rename rule, we may simply ask for the old class name
fStagingClass = TClass::GetClass(className.c_str());
}
} else {
fStagingClass = fClass;
}
R__ASSERT(fStagingClass);
R__ASSERT(static_cast<unsigned int>(fStagingClass->GetClassVersion()) == classVersion);
}

void ROOT::RClassField::PrepareStagingArea(const std::vector<const TSchemaRule *> &rules,
Expand Down Expand Up @@ -367,7 +375,10 @@ void ROOT::RClassField::PrepareStagingArea(const std::vector<const TSchemaRule *
void ROOT::RClassField::AddReadCallbacksFromIORule(const TSchemaRule *rule)
{
auto func = rule->GetReadFunctionPointer();
R__ASSERT(func != nullptr);
if (func == nullptr) {
// Can happen for rename rules
return;
}
fReadCallbacks.emplace_back([func, stagingClass = fStagingClass, stagingArea = fStagingArea.get()](void *target) {
TVirtualObject onfileObj{nullptr};
onfileObj.fClass = stagingClass;
Expand All @@ -394,19 +405,20 @@ void ROOT::RClassField::BeforeConnectPageSource(ROOT::Internal::RPageSource &pag
const ROOT::RNTupleDescriptor &desc = descriptorGuard.GetRef();
const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId());

// Check that we have the same type.
// TODO(jblomer): relax for class rename rule
if (GetTypeName() != fieldDesc.GetTypeName()) {
throw RException(R__FAIL("incompatible type name for field " + GetFieldName() + ": " + GetTypeName() +
" vs. " + fieldDesc.GetTypeName()));
}

for (auto linkId : fieldDesc.GetLinkIds()) {
const auto &subFieldDesc = desc.GetFieldDescriptor(linkId);
regularSubfields.insert(subFieldDesc.GetFieldName());
}

rules = FindRules(&fieldDesc);

// If the field's type name is not the on-disk name but we found a rule, we know it is valid to read
// on-disk data because we found the rule according to the on-disk (source) type name and version/checksum.
if ((GetTypeName() != fieldDesc.GetTypeName()) && rules.empty()) {
throw RException(R__FAIL("incompatible type name for field " + GetFieldName() + ": " + GetTypeName() +
" vs. " + fieldDesc.GetTypeName()));
}

if (!rules.empty()) {
SetStagingClass(fieldDesc.GetTypeName(), fieldDesc.GetTypeVersion());
PrepareStagingArea(rules, desc, fieldDesc);
Expand Down
16 changes: 14 additions & 2 deletions tree/ntuple/v7/src/RNTupleView.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,34 @@
#include <ROOT/RNTupleView.hxx>
#include <ROOT/RPageStorage.hxx>

#include <deque>

ROOT::RNTupleGlobalRange
ROOT::Internal::GetFieldRange(const ROOT::RFieldBase &field, const ROOT::Internal::RPageSource &pageSource)
{
const auto &desc = pageSource.GetSharedDescriptorGuard().GetRef();

auto fnGetPrincipalColumnId = [&desc](ROOT::DescriptorId_t fieldId) -> ROOT::DescriptorId_t {
R__ASSERT(fieldId != ROOT::kInvalidDescriptorId);
auto columnIterable = desc.GetColumnIterable(fieldId);
return (columnIterable.size() > 0) ? columnIterable.begin()->GetPhysicalId() : ROOT::kInvalidDescriptorId;
};

auto columnId = fnGetPrincipalColumnId(field.GetOnDiskId());
if (columnId == ROOT::kInvalidDescriptorId) {
for (const auto &f : field) {
columnId = fnGetPrincipalColumnId(f.GetOnDiskId());
// We need to iterate the field descriptor tree, not the sub fields of `field`, because in the presence of
// read rules, the in-memory sub fields may be artificial and not have valid on-disk IDs.
const auto &linkIds = desc.GetFieldDescriptor(field.GetOnDiskId()).GetLinkIds();
std::deque<ROOT::DescriptorId_t> subFields(linkIds.begin(), linkIds.end());
while (!subFields.empty()) {
auto subFieldId = subFields.front();
subFields.pop_front();
columnId = fnGetPrincipalColumnId(subFieldId);
if (columnId != ROOT::kInvalidDescriptorId)
break;

const auto &subLinkIds = desc.GetFieldDescriptor(subFieldId).GetLinkIds();
subFields.insert(subFields.end(), subLinkIds.begin(), subLinkIds.end());
}
}

Expand Down
15 changes: 15 additions & 0 deletions tree/ntuple/v7/test/CustomStruct.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@ struct StructWithIORules : StructWithIORulesBase {
StructWithIORules(float _a, char _c[4]) : StructWithIORulesBase{_a, 0.0f}, s{{_c[0], _c[1], _c[2], _c[3]}, {}} {}
};

struct OldCoordinates {
float fOldX;
float fOldY;
};

struct CoordinatesWithIORules {
float fX;
float fY;
Expand All @@ -271,6 +276,16 @@ struct LowPrecisionFloatWithIORules {
float fLast8BitsZero; // the I/O rule will "randomize" the last 8 bits
};

template <typename T>
struct OldName {
T fValue;
};

template <typename T>
struct NewName {
T fValue;
};

struct Cyclic {
std::vector<Cyclic> fMember;
};
Expand Down
14 changes: 14 additions & 0 deletions tree/ntuple/v7/test/CustomStructLinkDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,20 @@
#pragma read sourceClass = "StructWithIORules" source = "" checksum = "[1]" targetClass = "StructWithIORules" target = \
"checksumB" code = "{ checksumB = 0.0; }"

#pragma link C++ options = version(3) class OldCoordinates + ;

#pragma link C++ options = version(3) class CoordinatesWithIORules + ;

#pragma read sourceClass = "CoordinatesWithIORules" source = "float fX; float fY" version = "[3]" targetClass = \
"CoordinatesWithIORules" target = "fPhi,fR" include = "cmath" code = \
"{ fR = sqrt(onfile.fX * onfile.fX + onfile.fY * onfile.fY); fPhi = atan2(onfile.fY, onfile.fX); }"

#pragma read sourceClass = "OldCoordinates" source = "float fOldX; float fOldY" version = "[3]" targetClass = \
"CoordinatesWithIORules" target = "fX,fY,fPhi,fR" include = "cmath" code = \
"{ fR = sqrt(onfile.fOldX * onfile.fOldX + onfile.fOldY * onfile.fOldY); \
fPhi = atan2(onfile.fOldY, onfile.fOldX); \
fX = onfile.fOldX; fY = onfile.fOldY; }"

#pragma link C++ options = version(3) class LowPrecisionFloatWithIORules + ;

#pragma read sourceClass = "LowPrecisionFloatWithIORules" source = "float fLast8BitsZero;" version = \
Expand All @@ -114,6 +122,12 @@
bits |= 137; /* placeholder for randomizing the 8 LSBs */ \
std::memcpy(&fLast8BitsZero, &bits, sizeof(fLast8BitsZero)); }"

#pragma link C++ options = version(3) class OldName < int> + ;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#pragma link C++ options=version(3) class OldName<int>+;

is more typical white spacing.

#pragma link C++ options = version(3) class OldName < OldName < int>> + ;
#pragma link C++ options = version(3) class NewName < int> + ;
#pragma link C++ options = version(3) class NewName < NewName < int>> + ;
#pragma read sourceClass = "OldName<OldName<int>>" targetClass = "NewName<OldName<int>>" version = "[3]"

#pragma link C++ class Cyclic + ;
#pragma link C++ class CyclicCollectionProxy + ;
#pragma link C++ class Unsupported + ;
Expand Down
15 changes: 13 additions & 2 deletions tree/ntuple/v7/test/rfield_class.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -250,11 +250,14 @@ TEST(RNTuple, TClassReadRules)
auto model = RNTupleModel::Create();
auto ptrClass = model->MakeField<StructWithIORules>("class");
auto ptrCoord = model->MakeField<CoordinatesWithIORules>("coord");
auto ptrOldCoord = model->MakeField<OldCoordinates>("oldCoord");
auto ptrLowPrecisionFloat = model->MakeField<LowPrecisionFloatWithIORules>("lowPrecisionFloat");
ptrCoord->fX = 1.0;
ptrCoord->fY = 1.0;
auto ptrOldName = model->MakeField<OldName<OldName<int>>>("rename");
ptrCoord->fX = ptrOldCoord->fOldX = 1.0;
ptrCoord->fY = ptrOldCoord->fOldY = 1.0;
ptrLowPrecisionFloat->fFoo = 1.0;
ptrLowPrecisionFloat->fLast8BitsZero = last8BitsZero;
ptrOldName->fValue.fValue = 42;
auto writer = RNTupleWriter::Recreate(std::move(model), "f", fileGuard.GetPath());
for (int i = 0; i < 5; i++) {
*ptrClass = StructWithIORules{/*a=*/static_cast<float>(i), /*chars=*/c};
Expand Down Expand Up @@ -294,4 +297,12 @@ TEST(RNTuple, TClassReadRules)
EXPECT_FLOAT_EQ(1.0, viewLowPrecisionFloat(0).fFoo);
EXPECT_NE(last8BitsZero, viewLowPrecisionFloat(0).fLast8BitsZero);
EXPECT_NEAR(2.0, viewLowPrecisionFloat(0).fLast8BitsZero, 0.001);
auto viewOldCoordTransformed = reader->GetView<CoordinatesWithIORules>("oldCoord");
EXPECT_FLOAT_EQ(1.0, viewOldCoordTransformed(0).fX);
EXPECT_FLOAT_EQ(1.0, viewOldCoordTransformed(0).fY);
EXPECT_FLOAT_EQ(sqrt(2), viewOldCoordTransformed(0).fR);
EXPECT_FLOAT_EQ(M_PI / 4., viewOldCoordTransformed(0).fPhi);

auto viewRename = reader->GetView<NewName<OldName<int>>>("rename");
EXPECT_EQ(42, viewRename(0).fValue.fValue);
}
Loading