Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README/ReleaseNotes/v638/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ The following people have contributed to this new version:

* Behaviour change: the behaviour of `TChain::SetBranchStatus` has been aligned to the one of `TTree::SetBranchStatus`. In particular, when `SetBranchStatus` is called to deactivate all branches, a subsequent call to `TChain::SetBranchAddress` would override the previous instruction and activate that single branch. Instead `TTree::SetBranchAddress` respects the rule imposed by `SetBranchStatus`. If a user needs to activate only one or more branches, they should call `SetBranchStatus("brName", true)` on each branch that needs to be active in the TChain, like it was already necessary for a TTree. See https://github.com/root-project/root/pull/19221 for more details.

### RNTuple

* The parallel writer is now part of the public, stable API. The `RNTupleParallelWriter` and the closely related `RNTupleFillContext` moved from the `ROOT::Experimental` to the `ROOT` namespace.

## Math

### Minuit2
Expand Down
10 changes: 4 additions & 6 deletions tree/dataframe/inc/ROOT/RDF/SnapshotHelpers.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,10 @@ class TBranch;
class TFile;

namespace ROOT {
namespace Experimental {
class RNTupleFillContext;
class RNTupleParallelWriter;
} // namespace Experimental
class REntry;
class RFieldToken;
class RNTupleFillContext;
class RNTupleParallelWriter;
class TBufferMerger;
class TBufferMergerFile;
} // namespace ROOT
Expand All @@ -54,11 +52,11 @@ class R__CLING_PTRCHECK(off) UntypedSnapshotRNTupleHelper final : public RAction
ROOT::Detail::RDF::RLoopManager *fOutputLoopManager;
ColumnNames_t fInputFieldNames; // This contains the resolved aliases
ColumnNames_t fOutputFieldNames;
std::unique_ptr<ROOT::Experimental::RNTupleParallelWriter> fWriter;
std::unique_ptr<ROOT::RNTupleParallelWriter> fWriter;
std::vector<ROOT::RFieldToken> fFieldTokens;

unsigned int fNSlots;
std::vector<std::shared_ptr<ROOT::Experimental::RNTupleFillContext>> fFillContexts;
std::vector<std::shared_ptr<ROOT::RNTupleFillContext>> fFillContexts;
std::vector<std::unique_ptr<ROOT::REntry>> fEntries;

std::vector<const std::type_info *> fInputColumnTypeIDs; // Types for the input columns
Expand Down
2 changes: 1 addition & 1 deletion tree/dataframe/src/RDFSnapshotHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Initialize()

// The RNTupleParallelWriter has exclusive access to the underlying TFile, no further synchronization is needed for
// calls to Fill() (in Exec) and FlushCluster() (in FinalizeTask).
fWriter = ROOT::Experimental::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
fWriter = ROOT::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
}

void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::InitTask(TTreeReader *, unsigned int slot)
Expand Down
4 changes: 2 additions & 2 deletions tree/ntuple/inc/ROOT/REntry.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@

namespace ROOT {

class RNTupleFillContext;
class RNTupleReader;

namespace Experimental {
class RNTupleFillContext;
class RNTupleProcessor;
class RNTupleSingleProcessor;
class RNTupleChainProcessor;
Expand All @@ -52,9 +52,9 @@ with values are managed through shared pointers.
*/
// clang-format on
class REntry {
friend class RNTupleFillContext;
friend class RNTupleModel;
friend class RNTupleReader;
friend class Experimental::RNTupleFillContext;
friend class Experimental::RNTupleProcessor;
friend class Experimental::RNTupleSingleProcessor;
friend class Experimental::RNTupleChainProcessor;
Expand Down
18 changes: 7 additions & 11 deletions tree/ntuple/inc/ROOT/RNTupleFillContext.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
/// \ingroup NTuple
/// \author Jakob Blomer <[email protected]>
/// \date 2024-02-22
/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
/// is welcome!

/*************************************************************************
* Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
Expand Down Expand Up @@ -32,11 +30,10 @@
#include <vector>

namespace ROOT {
namespace Experimental {

// clang-format off
/**
\class ROOT::Experimental::RNTupleFillContext
\class ROOT::RNTupleFillContext
\ingroup NTuple
\brief A context for filling entries (data) into clusters of an RNTuple

Expand All @@ -61,7 +58,7 @@ private:
/// Needs to be destructed before fSink
std::unique_ptr<ROOT::RNTupleModel> fModel;

Detail::RNTupleMetrics fMetrics;
Experimental::Detail::RNTupleMetrics fMetrics;

ROOT::NTupleSize_t fLastFlushed = 0;
ROOT::NTupleSize_t fNEntries = 0;
Expand Down Expand Up @@ -132,14 +129,14 @@ public:
///
/// This method will check the entry's model ID to ensure it comes from the context's own model or throw an exception
/// otherwise.
void FillNoFlush(Detail::RRawPtrWriteEntry &entry, ROOT::RNTupleFillStatus &status)
void FillNoFlush(Experimental::Detail::RRawPtrWriteEntry &entry, ROOT::RNTupleFillStatus &status)
{
FillNoFlushImpl(entry, status);
}
/// Fill an RRawPtrWriteEntry into this context. This method will check the entry's model ID to ensure it comes
/// from the context's own model or throw an exception otherwise.
/// \return The number of uncompressed bytes written.
std::size_t Fill(Detail::RRawPtrWriteEntry &entry) { return FillImpl(entry); }
std::size_t Fill(Experimental::Detail::RRawPtrWriteEntry &entry) { return FillImpl(entry); }

/// Flush column data, preparing for CommitCluster or to reduce memory usage. This will trigger compression of pages,
/// but not actually write to storage.
Expand All @@ -151,7 +148,7 @@ public:

const ROOT::RNTupleModel &GetModel() const { return *fModel; }
std::unique_ptr<ROOT::REntry> CreateEntry() const { return fModel->CreateEntry(); }
std::unique_ptr<Detail::RRawPtrWriteEntry> CreateRawPtrWriteEntry() const
std::unique_ptr<Experimental::Detail::RRawPtrWriteEntry> CreateRawPtrWriteEntry() const
{
return fModel->CreateRawPtrWriteEntry();
}
Expand All @@ -171,10 +168,9 @@ public:
bool IsStagedClusterCommittingEnabled() const { return fStagedClusterCommitting; }

void EnableMetrics() { fMetrics.Enable(); }
const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
}; // class RNTupleFillContext
const Experimental::Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
};

} // namespace Experimental
} // namespace ROOT

#endif // ROOT_RNTupleFillContext
10 changes: 4 additions & 6 deletions tree/ntuple/inc/ROOT/RNTupleFillStatus.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,20 @@

namespace ROOT {

namespace Experimental {
class RNTupleFillContext;
}

// clang-format off
/**
\class ROOT::RNTupleFillStatus
\ingroup NTuple
\brief A status object after filling an entry

After passing an instance to RNTupleWriter::FillNoFlush or RNTupleFillContext::FillNoFlush, the caller must check
ShouldFlushCluster and call RNTupleWriter::FlushCluster or RNTupleFillContext::FlushCluster if necessary.
After passing an instance to RNTupleWriter::FillNoFlush() or RNTupleFillContext::FillNoFlush(), the caller must check
ShouldFlushCluster() and call RNTupleWriter::FlushCluster() or RNTupleFillContext::FlushCluster() if necessary.
*/
// clang-format on
class RNTupleFillStatus {
friend class Experimental::RNTupleFillContext;
friend class RNTupleFillContext;

private:
/// Number of entries written into the current cluster
Expand All @@ -55,7 +53,7 @@ public:
std::size_t GetLastEntrySize() const { return fLastEntrySize; }
/// Return true if the caller should call FlushCluster.
bool ShouldFlushCluster() const { return fShouldFlushCluster; }
}; // class RNTupleFillContext
};

} // namespace ROOT

Expand Down
42 changes: 21 additions & 21 deletions tree/ntuple/inc/ROOT/RNTupleParallelWriter.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
/// \ingroup NTuple
/// \author Jonas Hahnfeld <[email protected]>
/// \date 2024-02-01
/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
/// is welcome!

/*************************************************************************
* Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
Expand Down Expand Up @@ -34,30 +32,27 @@ namespace Internal {
class RPageSink;
} // namespace Internal

namespace Experimental {

class RNTupleFillContext;

/**
\class ROOT::Experimental::RNTupleParallelWriter
\class ROOT::RNTupleParallelWriter
\ingroup NTuple
\brief A writer to fill an RNTuple from multiple contexts

Compared to the sequential RNTupleWriter, a parallel writer enables the creation of multiple RNTupleFillContext (see
RNTupleParallelWriter::CreateFillContext). Each fill context prepares independent clusters that are appended to the
common ntuple with internal synchronization. Before destruction, all fill contexts must have flushed their data and
been destroyed (or data could be lost!).
CreateFillContext()). Each fill context prepares independent clusters that are appended to the common RNTuple with
internal synchronization. Before destruction, all fill contexts must have flushed their data and been destroyed (or
data could be lost!).

For user convenience, RNTupleParallelWriter::CreateFillContext is thread-safe and may be called from multiple threads
in parallel at any time, also after some data has already been written. Internally, the original model is cloned and
ownership is passed to a newly created RNTupleFillContext. For that reason, it is recommended to use
RNTupleModel::CreateBare when creating the model for parallel writing and avoid the allocation of a useless default
REntry per context.
For user convenience, CreateFillContext() is thread-safe and may be called from multiple threads in parallel at any
time, also after some data has already been written. Internally, the original model is cloned and ownership is passed
to a newly created RNTupleFillContext. For that reason, it is recommended to use RNTupleModel::CreateBare when creating
the model for parallel writing and avoid the allocation of a useless default REntry per context.

Note that the sequence of independently prepared clusters is indeterminate and therefore entries are only partially
ordered: Entries from one context are totally ordered as they were filled. However, there is no orderering with other
contexts and the entries may be appended to the ntuple either before or after other entries written in parallel into
other contexts. In addition, two consecutive entries in one fill context can end up separated in the final ntuple, if
contexts and the entries may be appended to the RNTuple either before or after other entries written in parallel into
other contexts. In addition, two consecutive entries in one fill context can end up separated in the final RNTuple, if
they happen to fall onto a cluster boundary and other contexts append more entries before the next cluster is full.

At the moment, the parallel writer does not (yet) support incremental updates of the underlying model. Please refer to
Expand All @@ -73,7 +68,7 @@ private:
std::unique_ptr<ROOT::Internal::RPageSink> fSink;
/// The original RNTupleModel connected to fSink; needs to be destructed before it.
std::unique_ptr<ROOT::RNTupleModel> fModel;
Detail::RNTupleMetrics fMetrics;
Experimental::Detail::RNTupleMetrics fMetrics;
/// List of all created helpers. They must be destroyed before this RNTupleParallelWriter is destructed.
std::vector<std::weak_ptr<RNTupleFillContext>> fFillContexts;

Expand All @@ -82,11 +77,17 @@ private:
RNTupleParallelWriter &operator=(const RNTupleParallelWriter &) = delete;

public:
/// Recreate a new file and return a writer to write an ntuple.
/// Recreate a new file and return a writer to write an RNTuple.
static std::unique_ptr<RNTupleParallelWriter>
Recreate(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName, std::string_view storage,
const ROOT::RNTupleWriteOptions &options = ROOT::RNTupleWriteOptions());
/// Append an ntuple to the existing file, which must not be accessed while data is filled into any created context.
/// Append an RNTuple to the existing file.
///
/// While the writer synchronizes between multiple fill contexts created from the same writer, there is no
/// synchronization with other writers or other clients that write into the same file. The caller must ensure that
/// the underlying file is not be accessed while data is filled into any created context. To improve performance, it
/// is allowed to use special methods that are guaranteed to not interact with the underlying file, such as
/// RNTupleFillContext::FillNoFlush().
static std::unique_ptr<RNTupleParallelWriter>
Append(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName, TDirectory &fileOrDirectory,
const ROOT::RNTupleWriteOptions &options = ROOT::RNTupleWriteOptions());
Expand All @@ -97,17 +98,16 @@ public:
/// thread-safe and may be called from multiple threads in parallel at any time, also after some data has already
/// been written.
///
/// Note that all fill contexts must be destroyed before RNTupleParallelWriter::CommitDataset() is called.
/// Note that all fill contexts must be destroyed before CommitDataset() is called.
std::shared_ptr<RNTupleFillContext> CreateFillContext();

/// Automatically called by the destructor
void CommitDataset();

void EnableMetrics() { fMetrics.Enable(); }
const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
const Experimental::Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
};

} // namespace Experimental
} // namespace ROOT

#endif
2 changes: 1 addition & 1 deletion tree/ntuple/inc/ROOT/RNTupleWriter.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class RNTupleWriter {
Internal::CreateRNTupleWriter(std::unique_ptr<ROOT::RNTupleModel>, std::unique_ptr<Internal::RPageSink>);

private:
Experimental::RNTupleFillContext fFillContext;
RNTupleFillContext fFillContext;
Experimental::Detail::RNTupleMetrics fMetrics;

ROOT::NTupleSize_t fLastCommittedClusterGroup = 0;
Expand Down
5 changes: 2 additions & 3 deletions tree/ntuple/inc/ROOT/RRawPtrWriteEntry.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@ namespace ROOT {

class RNTupleModel;

namespace Experimental {

class RNTupleFillContext;

namespace Experimental {
namespace Detail {

// clang-format off
Expand All @@ -46,7 +45,7 @@ provided by REntry, with safe interfaces, type checks, and shared object ownersh
// clang-format on
class RRawPtrWriteEntry {
friend class ROOT::RNTupleModel;
friend class ROOT::Experimental::RNTupleFillContext;
friend class ROOT::RNTupleFillContext;

private:
/// The entry must be linked to a specific model, identified by a model ID
Expand Down
14 changes: 6 additions & 8 deletions tree/ntuple/src/RNTupleFillContext.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
/// \ingroup NTuple
/// \author Jakob Blomer <[email protected]>
/// \date 2024-02-22
/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
/// is welcome!

/*************************************************************************
* Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
Expand All @@ -27,8 +25,8 @@
#include <algorithm>
#include <utility>

ROOT::Experimental::RNTupleFillContext::RNTupleFillContext(std::unique_ptr<ROOT::RNTupleModel> model,
std::unique_ptr<ROOT::Internal::RPageSink> sink)
ROOT::RNTupleFillContext::RNTupleFillContext(std::unique_ptr<ROOT::RNTupleModel> model,
std::unique_ptr<ROOT::Internal::RPageSink> sink)
: fSink(std::move(sink)), fModel(std::move(model)), fMetrics("RNTupleFillContext")
{
fModel->Freeze();
Expand All @@ -42,7 +40,7 @@ ROOT::Experimental::RNTupleFillContext::RNTupleFillContext(std::unique_ptr<ROOT:
fUnzippedClusterSizeEst = scale * writeOpts.GetApproxZippedClusterSize();
}

ROOT::Experimental::RNTupleFillContext::~RNTupleFillContext()
ROOT::RNTupleFillContext::~RNTupleFillContext()
{
try {
FlushCluster();
Expand All @@ -56,14 +54,14 @@ ROOT::Experimental::RNTupleFillContext::~RNTupleFillContext()
}
}

void ROOT::Experimental::RNTupleFillContext::FlushColumns()
void ROOT::RNTupleFillContext::FlushColumns()
{
for (auto &field : ROOT::Internal::GetFieldZeroOfModel(*fModel)) {
ROOT::Internal::CallFlushColumnsOnField(field);
}
}

void ROOT::Experimental::RNTupleFillContext::FlushCluster()
void ROOT::RNTupleFillContext::FlushCluster()
{
if (fNEntries == fLastFlushed) {
return;
Expand Down Expand Up @@ -91,7 +89,7 @@ void ROOT::Experimental::RNTupleFillContext::FlushCluster()
fUnzippedClusterSize = 0;
}

void ROOT::Experimental::RNTupleFillContext::CommitStagedClusters()
void ROOT::RNTupleFillContext::CommitStagedClusters()
{
if (fStagedClusters.empty()) {
return;
Expand Down
Loading
Loading