diff --git a/.gitignore b/.gitignore index bbc0145..71659f2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ *.json *.root + +# Ignore files related to dictionary generation +*.pcm +Nested*.cxx diff --git a/Makefile b/Makefile index c220bce..cadf867 100644 --- a/Makefile +++ b/Makefile @@ -5,17 +5,23 @@ endif .PHONY: all all: + $(MAKE) dict $(MAKE) write $(MAKE) read # This assumes there is no whitespace in any of the paths... +DICT_MAKEFILE_DIR := $(sort $(shell find */ -name Makefile -printf "%h\n")) WRITE_C := $(sort $(shell find . -name write.C)) READ_C := $(sort $(shell find . -name read.C)) +.PHONY: dict +dict: + @$(foreach d,$(DICT_MAKEFILE_DIR),make -C $(d) &&) true + .PHONY: write write: - @$(foreach c,$(WRITE_C),$(ROOT_EXE) -q -l $(c) &&) true + @$(foreach c,$(WRITE_C),LD_LIBRARY_PATH=$(shell dirname $(c)) $(ROOT_EXE) -q -l $(c) &&) true .PHONY: read read: - @$(foreach c,$(READ_C),$(ROOT_EXE) -q -l $(c) &&) true + @$(foreach c,$(READ_C),LD_LIBRARY_PATH=$(shell dirname $(c)) $(ROOT_EXE) -q -l $(c) &&) true diff --git a/types/README.md b/types/README.md index 7a8c70b..2f28d4d 100644 --- a/types/README.md +++ b/types/README.md @@ -1,6 +1,10 @@ # Types * [`fundamental`](fundamental): fundamental column types + * [`multiset`](multiset): `std::multiset` with all `[Split]Index{32,64}` column types + * [`set`](set): `std::set` with all `[Split]Index{32,64}` column types * [`string`](string): `std::string` with all `[Split]Index{32,64}` column types + * [`unordered_multiset`](unordered_multiset): `std::unordered_multiset` with all `[Split]Index{32,64}` column types + * [`unordered_set`](unordered_set): `std::unordered_set` with all `[Split]Index{32,64}` column types * [`variant`](variant): `std::variant` with `Switch` column type * [`vector`](vector): `std::vector` with all `[Split]Index{32,64}` column types diff --git a/types/multiset/README.md b/types/multiset/README.md new file mode 100644 index 0000000..637bdcf --- /dev/null +++ b/types/multiset/README.md @@ -0,0 +1,4 @@ +# `std::multiset` + + * [`fundamental`](fundamental): `std::multiset` + * [`nested`](nested): `std::multiset>` diff --git a/types/multiset/fundamental/README.md b/types/multiset/fundamental/README.md new file mode 100644 index 0000000..0111614 --- /dev/null +++ b/types/multiset/fundamental/README.md @@ -0,0 +1,17 @@ +# `std::multiset` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the collection parent field. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the set: + one element in the first field, two elements in the second field, etc. +4. Duplicate elements passed to the set constructor +5. Unordered elements passed to the set constructor diff --git a/types/multiset/fundamental/read.C b/types/multiset/fundamental/read.C new file mode 100644 index 0000000..9e24295 --- /dev/null +++ b/types/multiset/fundamental/read.C @@ -0,0 +1,68 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include +#include +#include +#include +#include +#include + +using Multiset = std::multiset; + +static void PrintMultisetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + Multiset &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool first = true; + for (auto element : value) { + if (first) { + first = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.multiset.fundamental.root", + std::string_view output = "types.multiset.fundamental.json") { + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintMultisetValue(entry, "Index32", os); + PrintMultisetValue(entry, "Index64", os); + PrintMultisetValue(entry, "SplitIndex32", os); + PrintMultisetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/multiset/fundamental/write.C b/types/multiset/fundamental/write.C new file mode 100644 index 0000000..c3ad639 --- /dev/null +++ b/types/multiset/fundamental/write.C @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include +#include +#include +#include + +using Multiset = std::multiset; + +static std::shared_ptr MakeMultisetField(RNTupleModel &model, + std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.multiset.fundamental.root") { + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = MakeMultisetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = MakeMultisetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeMultisetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeMultisetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {1}; + *Index64 = {2}; + *SplitIndex32 = {3}; + *SplitIndex64 = {4}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {1}; + *Index64 = {2, 3}; + *SplitIndex32 = {4, 5, 6}; + *SplitIndex64 = {7, 8, 9, 10}; + writer->Fill(); + + // Fourth entry: duplicate elements in the set + *Index32 = {1, 1}; + *Index64 = {2, 2}; + *SplitIndex32 = {3, 3}; + *SplitIndex64 = {4, 4}; + writer->Fill(); + + // Fifth entry: unordered elements in the set + *Index32 = {2, 1}; + *Index64 = {4, 3}; + *SplitIndex32 = {6, 5}; + *SplitIndex64 = {8, 7}; + writer->Fill(); +} diff --git a/types/multiset/nested/LinkDef.h b/types/multiset/nested/LinkDef.h new file mode 100644 index 0000000..c5651ce --- /dev/null +++ b/types/multiset/nested/LinkDef.h @@ -0,0 +1,6 @@ +#include +#include + +#ifdef __CLING__ +#pragma link C++ class std::multiset>+; +#endif diff --git a/types/multiset/nested/Makefile b/types/multiset/nested/Makefile new file mode 100644 index 0000000..9cce17d --- /dev/null +++ b/types/multiset/nested/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS_ROOT=$(shell root-config --cflags) +ifeq ($(CXXFLAGS_ROOT),) + $(error cannot find root-config: make sure to source thisroot.sh) +endif +CXXFLAGS=-Wall $(CXXFLAGS_ROOT) +LDFLAGS=$(shell root-config --libs) + +.PHONY: all clean + +all: NestedMultiset.cxx libNestedMultiset.so + +NestedMultiset.cxx: NestedMultiset.hxx LinkDef.h + rootcling -f $@ $^ + +libNestedMultiset.so: NestedMultiset.cxx + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +clean: + rm -f NestedMultiset.cxx NestedMultiset_rdict.pcm libNestedMultiset.so diff --git a/types/multiset/nested/NestedMultiset.hxx b/types/multiset/nested/NestedMultiset.hxx new file mode 100644 index 0000000..b52b7e0 --- /dev/null +++ b/types/multiset/nested/NestedMultiset.hxx @@ -0,0 +1,4 @@ +#pragma once + +#include +#include diff --git a/types/multiset/nested/README.md b/types/multiset/nested/README.md new file mode 100644 index 0000000..e7a2eb0 --- /dev/null +++ b/types/multiset/nested/README.md @@ -0,0 +1,20 @@ +# `std::multiset>` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the two collection parent fields. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the outer set, with arbitrary lengths of the inner sets +4. Duplicate sets passed to the set constructor of the outer set +5. Unordered sets (of arbitrary length) passed to the set constructor of the outer set + +## Dictionaries + +These tests require ROOT dictionaries, which can be generated with the provided `Makefile` in this directory. This will create a `libNestedMultiset` shared object. diff --git a/types/multiset/nested/read.C b/types/multiset/nested/read.C new file mode 100644 index 0000000..b19ffb7 --- /dev/null +++ b/types/multiset/nested/read.C @@ -0,0 +1,88 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include + +#include +#include +#include +#include +#include +#include +#include + +using Multiset = std::multiset>; + +static void PrintNestedMultisetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + Multiset &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool outerFirst = true; + for (auto inner : value) { + if (outerFirst) { + outerFirst = false; + } else { + os << ","; + } + os << "\n ["; + bool innerFirst = true; + for (auto element : inner) { + if (innerFirst) { + innerFirst = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!inner.empty()) { + os << "\n "; + } + os << "]"; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.multiset.nested.root", + std::string_view output = "types.multiset.nested.json") { + if (gSystem->Load("libNestedMultiset") == -1) + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintNestedMultisetValue(entry, "Index32", os); + PrintNestedMultisetValue(entry, "Index64", os); + PrintNestedMultisetValue(entry, "SplitIndex32", os); + PrintNestedMultisetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/multiset/nested/write.C b/types/multiset/nested/write.C new file mode 100644 index 0000000..6db8bb3 --- /dev/null +++ b/types/multiset/nested/write.C @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include + +#include +#include +#include +#include +#include + +using Multiset = std::multiset>; + +static std::shared_ptr MakeMultisetField(RNTupleModel &model, + std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + field->GetSubFields()[0]->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.multiset.nested.root") { + if (gSystem->Load("libNestedMultiset") == -1) + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = MakeMultisetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = MakeMultisetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeMultisetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeMultisetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {{1}}; + *Index64 = {{2}}; + *SplitIndex32 = {{3}}; + *SplitIndex64 = {{4}}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {{1}}; + *Index64 = {{}, {2, 3}}; + *SplitIndex32 = {{4}, {}, {5, 6}}; + *SplitIndex64 = {{}, {7, 8, 9}, {}, {10}}; + writer->Fill(); + + // Fourth entry: duplicate elements in the set + *Index32 = {{1}, {1}}; + *Index64 = {{2}, {2}}; + *SplitIndex32 = {{3}, {3}}; + *SplitIndex64 = {{4}, {4}}; + writer->Fill(); + + // Fifth entry: unordered elements in the set + *Index32 = {{3, 2}, {1}}; + *Index64 = {{}, {5, 4}}; + *SplitIndex32 = {{7}, {6}}; + *SplitIndex64 = {{9, 8}, {}}; + writer->Fill(); +} diff --git a/types/set/README.md b/types/set/README.md new file mode 100644 index 0000000..59141d6 --- /dev/null +++ b/types/set/README.md @@ -0,0 +1,4 @@ +# `std::set` + + * [`fundamental`](fundamental): `std::set` + * [`nested`](nested): `std::set>` diff --git a/types/set/fundamental/README.md b/types/set/fundamental/README.md new file mode 100644 index 0000000..75ce80c --- /dev/null +++ b/types/set/fundamental/README.md @@ -0,0 +1,17 @@ +# `std::set` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the collection parent field. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the set: + one element in the first field, two elements in the second field, etc. +4. Duplicate elements passed to the set constructor +5. Unordered elements passed to the set constructor diff --git a/types/set/fundamental/read.C b/types/set/fundamental/read.C new file mode 100644 index 0000000..e9da3a7 --- /dev/null +++ b/types/set/fundamental/read.C @@ -0,0 +1,68 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include +#include +#include +#include +#include +#include + +using Set = std::set; + +static void PrintSetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + Set &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool first = true; + for (auto element : value) { + if (first) { + first = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.set.fundamental.root", + std::string_view output = "types.set.fundamental.json") { + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintSetValue(entry, "Index32", os); + PrintSetValue(entry, "Index64", os); + PrintSetValue(entry, "SplitIndex32", os); + PrintSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/set/fundamental/write.C b/types/set/fundamental/write.C new file mode 100644 index 0000000..c726cae --- /dev/null +++ b/types/set/fundamental/write.C @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include +#include +#include +#include + +using Set = std::set; + +static std::shared_ptr MakeSetField(RNTupleModel &model, + std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.set.fundamental.root") { + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = MakeSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = MakeSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeSetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeSetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {1}; + *Index64 = {2}; + *SplitIndex32 = {3}; + *SplitIndex64 = {4}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {1}; + *Index64 = {2, 3}; + *SplitIndex32 = {4, 5, 6}; + *SplitIndex64 = {7, 8, 9, 10}; + writer->Fill(); + + // Fourth entry: duplicate elements in the set + *Index32 = {1, 1}; + *Index64 = {2, 2}; + *SplitIndex32 = {3, 3}; + *SplitIndex64 = {4, 4}; + writer->Fill(); + + // Fifth entry: unordered elements in the set + *Index32 = {2, 1}; + *Index64 = {4, 3}; + *SplitIndex32 = {6, 5}; + *SplitIndex64 = {8, 7}; + writer->Fill(); +} diff --git a/types/set/nested/LinkDef.h b/types/set/nested/LinkDef.h new file mode 100644 index 0000000..232c236 --- /dev/null +++ b/types/set/nested/LinkDef.h @@ -0,0 +1,6 @@ +#include +#include + +#ifdef __CLING__ +#pragma link C++ class std::set>+; +#endif diff --git a/types/set/nested/Makefile b/types/set/nested/Makefile new file mode 100644 index 0000000..836e1a2 --- /dev/null +++ b/types/set/nested/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS_ROOT=$(shell root-config --cflags) +ifeq ($(CXXFLAGS_ROOT),) + $(error cannot find root-config: make sure to source thisroot.sh) +endif +CXXFLAGS=-Wall $(CXXFLAGS_ROOT) +LDFLAGS=$(shell root-config --libs) + +.PHONY: all clean + +all: NestedSet.cxx libNestedSet.so + +NestedSet.cxx: NestedSet.hxx LinkDef.h + rootcling -f $@ $^ + +libNestedSet.so: NestedSet.cxx + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +clean: + rm -f NestedSet.cxx NestedSet_rdict.pcm libNestedSet.so diff --git a/types/set/nested/NestedSet.hxx b/types/set/nested/NestedSet.hxx new file mode 100644 index 0000000..b52b7e0 --- /dev/null +++ b/types/set/nested/NestedSet.hxx @@ -0,0 +1,4 @@ +#pragma once + +#include +#include diff --git a/types/set/nested/README.md b/types/set/nested/README.md new file mode 100644 index 0000000..4976abd --- /dev/null +++ b/types/set/nested/README.md @@ -0,0 +1,20 @@ +# `std::set>` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the two collection parent fields. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the outer set, with arbitrary lengths of the inner sets +4. Duplicate sets passed to the set constructor of the outer set +5. Unordered sets (of arbitrary length) passed to the set constructor of the outer set + +## Dictionaries + +These tests require ROOT dictionaries, which can be generated with the provided `Makefile` in this directory. This will create a `libNestedSet` shared object. diff --git a/types/set/nested/read.C b/types/set/nested/read.C new file mode 100644 index 0000000..30ba4d2 --- /dev/null +++ b/types/set/nested/read.C @@ -0,0 +1,88 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include + +#include +#include +#include +#include +#include +#include +#include + +using Set = std::set>; + +static void PrintNestedSetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + Set &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool outerFirst = true; + for (auto inner : value) { + if (outerFirst) { + outerFirst = false; + } else { + os << ","; + } + os << "\n ["; + bool innerFirst = true; + for (auto element : inner) { + if (innerFirst) { + innerFirst = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!inner.empty()) { + os << "\n "; + } + os << "]"; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.set.nested.root", + std::string_view output = "types.set.nested.json") { + if (gSystem->Load("libNestedSet") == -1) + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintNestedSetValue(entry, "Index32", os); + PrintNestedSetValue(entry, "Index64", os); + PrintNestedSetValue(entry, "SplitIndex32", os); + PrintNestedSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/set/nested/write.C b/types/set/nested/write.C new file mode 100644 index 0000000..7c6be4b --- /dev/null +++ b/types/set/nested/write.C @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include + +#include +#include +#include +#include +#include + +using Set = std::set>; + +static std::shared_ptr MakeSetField(RNTupleModel &model, + std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + field->GetSubFields()[0]->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.set.nested.root") { + if (gSystem->Load("libNestedSet") == -1) + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = MakeSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = MakeSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeSetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeSetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {{1}}; + *Index64 = {{2}}; + *SplitIndex32 = {{3}}; + *SplitIndex64 = {{4}}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {{1}}; + *Index64 = {{}, {2, 3}}; + *SplitIndex32 = {{4}, {}, {5, 6}}; + *SplitIndex64 = {{}, {7, 8, 9}, {}, {10}}; + writer->Fill(); + + // Fourth entry: duplicate elements in the set + *Index32 = {{1}, {1}}; + *Index64 = {{2}, {2}}; + *SplitIndex32 = {{3}, {3}}; + *SplitIndex64 = {{4}, {4}}; + writer->Fill(); + + // Fifth entry: unordered elements in the set + *Index32 = {{3, 2}, {1}}; + *Index64 = {{}, {5, 4}}; + *SplitIndex32 = {{7}, {6}}; + *SplitIndex64 = {{9, 8}, {}}; + writer->Fill(); +} diff --git a/types/unordered_multiset/README.md b/types/unordered_multiset/README.md new file mode 100644 index 0000000..21eb0a2 --- /dev/null +++ b/types/unordered_multiset/README.md @@ -0,0 +1,4 @@ +# `std::unordered_multiset` + + * [`fundamental`](fundamental): `std::unordered_multiset` + * [`nested`](nested): `std::unordered_multiset>` diff --git a/types/unordered_multiset/fundamental/README.md b/types/unordered_multiset/fundamental/README.md new file mode 100644 index 0000000..4f4bffd --- /dev/null +++ b/types/unordered_multiset/fundamental/README.md @@ -0,0 +1,18 @@ +# `std::unordered_multiset` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the collection parent field. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the set: + one element in the first field, two elements in the second field, etc. +4. Duplicate elements passed to the set constructor +5. Unordered elements passed to the set constructor +6. Duplicate and unordered elements passed to the set constructor diff --git a/types/unordered_multiset/fundamental/read.C b/types/unordered_multiset/fundamental/read.C new file mode 100644 index 0000000..807914d --- /dev/null +++ b/types/unordered_multiset/fundamental/read.C @@ -0,0 +1,70 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include +#include +#include +#include +#include +#include + +using UnorderedMultiSet = std::unordered_multiset; + +static void PrintUnorderedMultiSetValue(const REntry &entry, + std::string_view name, std::ostream &os, + bool last = false) { + UnorderedMultiSet &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool first = true; + for (auto element : value) { + if (first) { + first = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read( + std::string_view input = "types.unordered_multiset.fundamental.root", + std::string_view output = "types.unordered_multiset.fundamental.json") { + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintUnorderedMultiSetValue(entry, "Index32", os); + PrintUnorderedMultiSetValue(entry, "Index64", os); + PrintUnorderedMultiSetValue(entry, "SplitIndex32", os); + PrintUnorderedMultiSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/unordered_multiset/fundamental/write.C b/types/unordered_multiset/fundamental/write.C new file mode 100644 index 0000000..ae62336 --- /dev/null +++ b/types/unordered_multiset/fundamental/write.C @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include +#include +#include +#include + +using UnorderedMultiSet = std::unordered_multiset; + +static std::shared_ptr +MakeUnorderedMultiSetField(RNTupleModel &model, std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write( + std::string_view filename = "types.unordered_multiset.fundamental.root") { + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = + MakeUnorderedMultiSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = + MakeUnorderedMultiSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = MakeUnorderedMultiSetField(*model, "SplitIndex32", + EColumnType::kSplitIndex32); + auto SplitIndex64 = MakeUnorderedMultiSetField(*model, "SplitIndex64", + EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {1}; + *Index64 = {2}; + *SplitIndex32 = {3}; + *SplitIndex64 = {4}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {1}; + *Index64 = {2, 3}; + *SplitIndex32 = {4, 5, 6}; + *SplitIndex64 = {7, 8, 9, 10}; + writer->Fill(); + + // Fourth entry: duplicate elements in the set + *Index32 = {1, 1}; + *Index64 = {2, 2}; + *SplitIndex32 = {3, 3}; + *SplitIndex64 = {4, 4}; + writer->Fill(); + + // Fifth entry: unordered elements in the set + *Index32 = {2, 1}; + *Index64 = {4, 3}; + *SplitIndex32 = {6, 5}; + *SplitIndex64 = {8, 7}; + writer->Fill(); + + // Sixth entry: duplicate and unordered elements in the set + *Index32 = {2, 1, 2}; + *Index64 = {4, 4, 3}; + *SplitIndex32 = {6, 6, 5}; + *SplitIndex64 = {7, 8, 8}; + writer->Fill(); +} diff --git a/types/unordered_multiset/nested/LinkDef.h b/types/unordered_multiset/nested/LinkDef.h new file mode 100644 index 0000000..0f5799a --- /dev/null +++ b/types/unordered_multiset/nested/LinkDef.h @@ -0,0 +1,6 @@ +#include +#include + +#ifdef __CLING__ +#pragma link C++ class std::unordered_multiset>+; +#endif diff --git a/types/unordered_multiset/nested/Makefile b/types/unordered_multiset/nested/Makefile new file mode 100644 index 0000000..2eadb02 --- /dev/null +++ b/types/unordered_multiset/nested/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS_ROOT=$(shell root-config --cflags) +ifeq ($(CXXFLAGS_ROOT),) + $(error cannot find root-config: make sure to source thisroot.sh) +endif +CXXFLAGS=-Wall $(CXXFLAGS_ROOT) +LDFLAGS=$(shell root-config --libs) + +.PHONY: all clean + +all: NestedUnorderedMultiset.cxx libNestedUnorderedMultiset.so + +NestedUnorderedMultiset.cxx: NestedUnorderedMultiset.hxx LinkDef.h + rootcling -f $@ $^ + +libNestedUnorderedMultiset.so: NestedUnorderedMultiset.cxx + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +clean: + rm -f NestedUnorderedMultiset.cxx NestedUnorderedMultiset_rdict.pcm libNestedUnorderedMultiset.so diff --git a/types/unordered_multiset/nested/NestedUnorderedMultiset.hxx b/types/unordered_multiset/nested/NestedUnorderedMultiset.hxx new file mode 100644 index 0000000..d4b10d8 --- /dev/null +++ b/types/unordered_multiset/nested/NestedUnorderedMultiset.hxx @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +template<> +struct std::hash> { + std::size_t + operator()(const std::unordered_multiset &s) const noexcept { + std::size_t h = 0; + for (const auto &el : s) { + h ^= std::hash{}(el); + } + return h; + } +}; diff --git a/types/unordered_multiset/nested/README.md b/types/unordered_multiset/nested/README.md new file mode 100644 index 0000000..f7cac60 --- /dev/null +++ b/types/unordered_multiset/nested/README.md @@ -0,0 +1,20 @@ +# `std::unordered_multiset>` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the two collection parent fields. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the outer set, with arbitrary lengths of the inner sets +4. Duplicate sets passed to the set constructor of the outer set +5. Unordered sets (of arbitrary length) passed to the set constructor of the outer set + +## Dictionaries + +These tests require ROOT dictionaries, which can be generated with the provided `Makefile` in this directory. This will create a `libNestedUnorderedMultiset` shared object. diff --git a/types/unordered_multiset/nested/read.C b/types/unordered_multiset/nested/read.C new file mode 100644 index 0000000..9f3540c --- /dev/null +++ b/types/unordered_multiset/nested/read.C @@ -0,0 +1,91 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include + +#include +#include +#include +#include +#include +#include +#include + +using UnorderedMultiset = + std::unordered_multiset>; + +static void PrintNestedUnorderedMultisetValue(const REntry &entry, + std::string_view name, + std::ostream &os, + bool last = false) { + UnorderedMultiset &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool outerFirst = true; + for (auto inner : value) { + if (outerFirst) { + outerFirst = false; + } else { + os << ","; + } + os << "\n ["; + bool innerFirst = true; + for (auto element : inner) { + if (innerFirst) { + innerFirst = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!inner.empty()) { + os << "\n "; + } + os << "]"; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.unordered_multiset.nested.root", + std::string_view output = "types.unordered_multiset.nested.json") { + if (gSystem->Load("libNestedUnorderedMultiset") == -1) + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintNestedUnorderedMultisetValue(entry, "Index32", os); + PrintNestedUnorderedMultisetValue(entry, "Index64", os); + PrintNestedUnorderedMultisetValue(entry, "SplitIndex32", os); + PrintNestedUnorderedMultisetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/unordered_multiset/nested/write.C b/types/unordered_multiset/nested/write.C new file mode 100644 index 0000000..12805e0 --- /dev/null +++ b/types/unordered_multiset/nested/write.C @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include + +#include +#include +#include +#include +#include + +#include "NestedUnorderedMultiset.hxx" + +using UnorderedMultiset = + std::unordered_multiset>; + +static std::shared_ptr +MakeUnorderedMultisetField(RNTupleModel &model, std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + field->GetSubFields()[0]->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.unordered_multiset.nested.root") { + if (gSystem->Load("libNestedUnorderedMultiset") == -1) + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = + MakeUnorderedMultisetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = + MakeUnorderedMultisetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = MakeUnorderedMultisetField(*model, "SplitIndex32", + EColumnType::kSplitIndex32); + auto SplitIndex64 = MakeUnorderedMultisetField(*model, "SplitIndex64", + EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {{1}}; + *Index64 = {{2}}; + *SplitIndex32 = {{3}}; + *SplitIndex64 = {{4}}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {{1}}; + *Index64 = {{}, {2, 3}}; + *SplitIndex32 = {{4}, {}, {5, 6}}; + *SplitIndex64 = {{}, {7, 8, 9}, {}, {10}}; + writer->Fill(); + + // Fourth entry: duplicate elements in the set + *Index32 = {{1}, {1}}; + *Index64 = {{2}, {2}}; + *SplitIndex32 = {{3}, {3}}; + *SplitIndex64 = {{4}, {4}}; + writer->Fill(); + + // Fifth entry: unordered elements in the set + *Index32 = {{3, 2}, {1}}; + *Index64 = {{}, {5, 4}}; + *SplitIndex32 = {{7}, {6}}; + *SplitIndex64 = {{9, 8}, {}}; + writer->Fill(); + + // Sixth entry: duplicate and unordered elements in the set + *Index32 = {{2, 1}, {}, {2, 1}}; + *Index64 = {{4, 4}, {3, 3}, {4, 4}}; + *SplitIndex32 = {{}, {6, 5}, {6, 5}}; + *SplitIndex64 = {{7, 7}, {7, 7}, {8, 8}}; + writer->Fill(); +} diff --git a/types/unordered_set/README.md b/types/unordered_set/README.md new file mode 100644 index 0000000..3a8ff4b --- /dev/null +++ b/types/unordered_set/README.md @@ -0,0 +1,4 @@ +# `std::unordered_set` + + * [`fundamental`](fundamental): `std::unordered_set` + * [`nested`](nested): `std::unordered_set>` diff --git a/types/unordered_set/fundamental/README.md b/types/unordered_set/fundamental/README.md new file mode 100644 index 0000000..16a1558 --- /dev/null +++ b/types/unordered_set/fundamental/README.md @@ -0,0 +1,17 @@ +# `std::unordered_set` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the collection parent field. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the set: + one element in the first field, two elements in the second field, etc. +4. Duplicate elements passed to the set constructor +5. Unordered elements passed to the set constructor diff --git a/types/unordered_set/fundamental/read.C b/types/unordered_set/fundamental/read.C new file mode 100644 index 0000000..633c4f3 --- /dev/null +++ b/types/unordered_set/fundamental/read.C @@ -0,0 +1,68 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include +#include +#include +#include +#include +#include + +using UnorderedSet = std::unordered_set; + +static void PrintUnorderedSetValue(const REntry &entry, std::string_view name, + std::ostream &os, bool last = false) { + UnorderedSet &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool first = true; + for (auto element : value) { + if (first) { + first = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.unordered_set.fundamental.root", + std::string_view output = "types.unordered_set.fundamental.json") { + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintUnorderedSetValue(entry, "Index32", os); + PrintUnorderedSetValue(entry, "Index64", os); + PrintUnorderedSetValue(entry, "SplitIndex32", os); + PrintUnorderedSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/unordered_set/fundamental/write.C b/types/unordered_set/fundamental/write.C new file mode 100644 index 0000000..e01828d --- /dev/null +++ b/types/unordered_set/fundamental/write.C @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include +#include +#include +#include + +using UnorderedSet = std::unordered_set; + +static std::shared_ptr +MakeUnorderedSetField(RNTupleModel &model, std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.unordered_set.fundamental.root") { + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = + MakeUnorderedSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = + MakeUnorderedSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeUnorderedSetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeUnorderedSetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {1}; + *Index64 = {2}; + *SplitIndex32 = {3}; + *SplitIndex64 = {4}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {1}; + *Index64 = {2, 3}; + *SplitIndex32 = {4, 5, 6}; + *SplitIndex64 = {7, 8, 9, 10}; + writer->Fill(); + + // Fourth entry: duplicate elements in the set + *Index32 = {1, 1}; + *Index64 = {2, 2}; + *SplitIndex32 = {3, 3}; + *SplitIndex64 = {4, 4}; + writer->Fill(); + + // Fifth entry: unordered elements in the set + *Index32 = {2, 1}; + *Index64 = {4, 3}; + *SplitIndex32 = {6, 5}; + *SplitIndex64 = {8, 7}; + writer->Fill(); +} diff --git a/types/unordered_set/nested/LinkDef.h b/types/unordered_set/nested/LinkDef.h new file mode 100644 index 0000000..74a49be --- /dev/null +++ b/types/unordered_set/nested/LinkDef.h @@ -0,0 +1,6 @@ +#include +#include + +#ifdef __CLING__ +#pragma link C++ class std::unordered_set>+; +#endif diff --git a/types/unordered_set/nested/Makefile b/types/unordered_set/nested/Makefile new file mode 100644 index 0000000..66c4e16 --- /dev/null +++ b/types/unordered_set/nested/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS_ROOT=$(shell root-config --cflags) +ifeq ($(CXXFLAGS_ROOT),) + $(error cannot find root-config: make sure to source thisroot.sh) +endif +CXXFLAGS=-Wall $(CXXFLAGS_ROOT) +LDFLAGS=$(shell root-config --libs) + +.PHONY: all clean + +all: NestedUnorderedSet.cxx libNestedUnorderedSet.so + +NestedUnorderedSet.cxx: NestedUnorderedSet.hxx LinkDef.h + rootcling -f $@ $^ + +libNestedUnorderedSet.so: NestedUnorderedSet.cxx + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +clean: + rm -f NestedUnorderedSet.cxx NestedUnorderedSet_rdict.pcm libNestedUnorderedSet.so diff --git a/types/unordered_set/nested/NestedUnorderedSet.hxx b/types/unordered_set/nested/NestedUnorderedSet.hxx new file mode 100644 index 0000000..9629461 --- /dev/null +++ b/types/unordered_set/nested/NestedUnorderedSet.hxx @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +template <> struct std::hash> { + std::size_t + operator()(const std::unordered_set &s) const noexcept { + std::size_t h = 0; + for (const auto &el : s) { + h ^= std::hash{}(el); + } + return h; + } +}; diff --git a/types/unordered_set/nested/README.md b/types/unordered_set/nested/README.md new file mode 100644 index 0000000..e760dcd --- /dev/null +++ b/types/unordered_set/nested/README.md @@ -0,0 +1,20 @@ +# `std::unordered_set>` + +## Fields + + * `[Split]Index{32,64}` + +with the corresponding column type for the offset column of the two collection parent fields. +All child fields use the default column encoding `Int32`. + +## Entries + +1. Single-element sets, with ascending values +2. Empty sets +3. Increasing number of elements in the outer set, with arbitrary lengths of the inner sets +4. Duplicate sets passed to the set constructor of the outer set +5. Unordered sets (of arbitrary length) passed to the set constructor of the outer set + +## Dictionaries + +These tests require ROOT dictionaries, which can be generated with the provided `Makefile` in this directory. This will create a `libNestedUnorderedSet` shared object. diff --git a/types/unordered_set/nested/read.C b/types/unordered_set/nested/read.C new file mode 100644 index 0000000..ca43f9a --- /dev/null +++ b/types/unordered_set/nested/read.C @@ -0,0 +1,89 @@ +#include +#include + +using ROOT::Experimental::REntry; +using ROOT::Experimental::RNTupleReader; + +#include + +#include +#include +#include +#include +#include +#include +#include + +using UnorderedSet = std::unordered_set>; + +static void PrintNestedUnorderedSetValue(const REntry &entry, + std::string_view name, + std::ostream &os, bool last = false) { + UnorderedSet &value = *entry.GetPtr(name); + os << " \"" << name << "\": ["; + bool outerFirst = true; + for (auto inner : value) { + if (outerFirst) { + outerFirst = false; + } else { + os << ","; + } + os << "\n ["; + bool innerFirst = true; + for (auto element : inner) { + if (innerFirst) { + innerFirst = false; + } else { + os << ","; + } + os << "\n " << element; + } + if (!inner.empty()) { + os << "\n "; + } + os << "]"; + } + if (!value.empty()) { + os << "\n "; + } + os << "]"; + if (!last) { + os << ","; + } + os << "\n"; +} + +void read(std::string_view input = "types.unordered_set.nested.root", + std::string_view output = "types.unordered_set.nested.json") { + if (gSystem->Load("libNestedUnorderedSet") == -1) + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + + std::ofstream os(std::string{output}); + os << "[\n"; + + auto reader = RNTupleReader::Open("ntpl", input); + auto &entry = reader->GetModel().GetDefaultEntry(); + bool first = true; + for (auto index : *reader) { + reader->LoadEntry(index); + + if (first) { + first = false; + } else { + os << ",\n"; + } + os << " {\n"; + + PrintNestedUnorderedSetValue(entry, "Index32", os); + PrintNestedUnorderedSetValue(entry, "Index64", os); + PrintNestedUnorderedSetValue(entry, "SplitIndex32", os); + PrintNestedUnorderedSetValue(entry, "SplitIndex64", os, /*last=*/true); + + os << " }"; + // Newline is intentionally missing, may need to print a comma before the + // next entry. + } + os << "\n"; + os << "]\n"; +} diff --git a/types/unordered_set/nested/write.C b/types/unordered_set/nested/write.C new file mode 100644 index 0000000..5db6c43 --- /dev/null +++ b/types/unordered_set/nested/write.C @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include + +using ROOT::Experimental::EColumnType; +using ROOT::Experimental::RField; +using ROOT::Experimental::RNTupleModel; +using ROOT::Experimental::RNTupleWriteOptions; +using ROOT::Experimental::RNTupleWriter; + +#include + +#include +#include +#include +#include +#include + +#include "NestedUnorderedSet.hxx" + +using UnorderedSet = std::unordered_set>; + +static std::shared_ptr +MakeUnorderedSetField(RNTupleModel &model, std::string_view name, + EColumnType indexType) { + auto field = std::make_unique>(name); + field->SetColumnRepresentatives({{indexType}}); + field->GetSubFields()[0]->SetColumnRepresentatives({{indexType}}); + model.AddField(std::move(field)); + return model.GetDefaultEntry().GetPtr(name); +} + +void write(std::string_view filename = "types.unordered_set.nested.root") { + if (gSystem->Load("libNestedUnorderedSet") == -1) + throw std::runtime_error("could not find the required ROOT dictionaries, " + "please make sure to run `make` first"); + + auto model = RNTupleModel::Create(); + + // Non-split index encoding + auto Index32 = + MakeUnorderedSetField(*model, "Index32", EColumnType::kIndex32); + auto Index64 = + MakeUnorderedSetField(*model, "Index64", EColumnType::kIndex64); + + // Split index encoding + auto SplitIndex32 = + MakeUnorderedSetField(*model, "SplitIndex32", EColumnType::kSplitIndex32); + auto SplitIndex64 = + MakeUnorderedSetField(*model, "SplitIndex64", EColumnType::kSplitIndex64); + + RNTupleWriteOptions options; + options.SetCompression(0); + auto writer = + RNTupleWriter::Recreate(std::move(model), "ntpl", filename, options); + + // First entry: single-element sets, with ascending values + *Index32 = {{1}}; + *Index64 = {{2}}; + *SplitIndex32 = {{3}}; + *SplitIndex64 = {{4}}; + writer->Fill(); + + // Second entry: empty sets + *Index32 = {}; + *Index64 = {}; + *SplitIndex32 = {}; + *SplitIndex64 = {}; + writer->Fill(); + + // Third entry: increasing number of elements in the set + *Index32 = {{1}}; + *Index64 = {{}, {2, 3}}; + *SplitIndex32 = {{4}, {}, {5, 6}}; + *SplitIndex64 = {{}, {7, 8, 9}, {}, {10}}; + writer->Fill(); + + // Fourth entry: duplicate elements in the set + *Index32 = {{1}, {1}}; + *Index64 = {{2}, {2}}; + *SplitIndex32 = {{3}, {3}}; + *SplitIndex64 = {{4}, {4}}; + writer->Fill(); + + // Fifth entry: unordered elements in the set + *Index32 = {{3, 2}, {1}}; + *Index64 = {{}, {5, 4}}; + *SplitIndex32 = {{7}, {6}}; + *SplitIndex64 = {{9, 8}, {}}; + writer->Fill(); +}