From a9006235875b7a3bfa2236130b7722cc39a3cd66 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Sat, 25 Jan 2025 19:26:56 +0200 Subject: [PATCH 1/6] release query result after materialization & transformation --- src/reltoaltrep.cpp | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index d5d9faa3f..d1dee8f99 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -94,7 +94,7 @@ struct AltrepRelationWrapper { } AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_) - : allow_materialization(allow_materialization_), rel_eptr(rel_), rel(rel_->rel) { + : allow_materialization(allow_materialization_), rel_eptr(rel_) , rel(rel_->rel) , rowcount(0) , rowcount_retrieved(false) , ncols(0) , cols_transformed(0) { } bool HasQueryResult() const { @@ -148,6 +148,9 @@ struct AltrepRelationWrapper { cpp11::stop("Error evaluating duckdb query: %s", res->GetError().c_str()); } D_ASSERT(res->type == QueryResultType::MATERIALIZED_RESULT); + + rowcount = ((MaterializedQueryResult *)res.get())->RowCount(); + rowcount_retrieved = true; } D_ASSERT(res); return (MaterializedQueryResult *)res.get(); @@ -155,6 +158,10 @@ struct AltrepRelationWrapper { bool allow_materialization; + size_t ncols; + size_t cols_transformed; + R_xlen_t rowcount; + bool rowcount_retrieved; rel_extptr_t rel_eptr; duckdb::shared_ptr rel; duckdb::unique_ptr res; @@ -162,7 +169,7 @@ struct AltrepRelationWrapper { struct AltrepRownamesWrapper { - AltrepRownamesWrapper(duckdb::shared_ptr rel_p) : rel(rel_p) { + AltrepRownamesWrapper(duckdb::shared_ptr rel_p) : rel(rel_p) , rowlen_data_retrieved(false) { rowlen_data[0] = NA_INTEGER; } @@ -170,6 +177,7 @@ struct AltrepRownamesWrapper { return GetFromExternalPtr(x); } + bool rowlen_data_retrieved; int32_t rowlen_data[2]; duckdb::shared_ptr rel; }; @@ -194,6 +202,14 @@ struct AltrepVectorWrapper { duckdb_r_transform(chunk.data[column_index], dest, dest_offset, chunk.size(), false); dest_offset += chunk.size(); } + // keep tabs on how many of the columns have been transformed + // to their R-representation + rel->cols_transformed++; + // if all of the columns have been transformed, we can reset + // the query-result pointer and free the memory + if (rel->cols_transformed == rel->ncols) { + rel->res.reset(); + } } return DATAPTR(transformed_vector); } @@ -272,16 +288,27 @@ const void *RelToAltrep::RownamesDataptrOrNull(SEXP x) { void *RelToAltrep::DoRownamesDataptrGet(SEXP x) { auto rownames_wrapper = AltrepRownamesWrapper::Get(x); + + // the query has been materialized, return the rowcount + // (and void recomputing the query if it's been reset) + if (rownames_wrapper->rowlen_data_retrieved) { + return rownames_wrapper->rowlen_data; + } + auto row_count = rownames_wrapper->rel->GetQueryResult()->RowCount(); if (row_count > (idx_t)NumericLimits::Maximum()) { cpp11::stop("Integer overflow for row.names attribute"); } rownames_wrapper->rowlen_data[1] = -row_count; + rownames_wrapper->rowlen_data_retrieved = true; return rownames_wrapper->rowlen_data; } R_xlen_t RelToAltrep::VectorLength(SEXP x) { BEGIN_CPP11 + if (AltrepVectorWrapper::Get(x)->rel->rowcount_retrieved) { + return AltrepVectorWrapper::Get(x)->rel->rowcount; + } return AltrepVectorWrapper::Get(x)->rel->GetQueryResult()->RowCount(); END_CPP11_EX(0) } @@ -378,6 +405,9 @@ static R_altrep_class_t LogicalTypeToAltrepType(const LogicalType &type) { } SET_NAMES(data_frame, StringsToSexp(names)); + // Colcount + relation_wrapper->ncols = ncols; + // Row names cpp11::external_pointer ptr(new AltrepRownamesWrapper(relation_wrapper)); R_SetExternalPtrTag(ptr, RStrings::get().duckdb_row_names_sym); From 091179b8c0b4e1d937537297994dd31eafe965f2 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Sat, 25 Jan 2025 21:51:40 +0200 Subject: [PATCH 2/6] members declared in init order --- src/reltoaltrep.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index fbd0e5145..66d586a04 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -204,13 +204,16 @@ struct AltrepRelationWrapper { const size_t n_rows; const size_t n_cells; - size_t ncols; - size_t cols_transformed; - R_xlen_t rowcount; - bool rowcount_retrieved; rel_extptr_t rel_eptr; duckdb::shared_ptr rel; duckdb::unique_ptr res; + + R_xlen_t rowcount; + bool rowcount_retrieved; + + size_t ncols; + size_t cols_transformed; + }; struct AltrepRownamesWrapper { From a8e3f8fd891c809bdf25bf13463c11f2c278d5ed Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Sat, 25 Jan 2025 22:44:07 +0200 Subject: [PATCH 3/6] all members declared in init order.. --- src/reltoaltrep.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index 66d586a04..8a1421419 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -226,9 +226,10 @@ struct AltrepRownamesWrapper { return GetFromExternalPtr(x); } - bool rowlen_data_retrieved; int32_t rowlen_data[2]; duckdb::shared_ptr rel; + bool rowlen_data_retrieved; + }; struct AltrepVectorWrapper { From c8eaf0a2f8f2205c28126d3850411dfd2254abda Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Mon, 3 Feb 2025 21:55:12 +0200 Subject: [PATCH 4/6] Reset query results after transformed to R --- src/include/reltoaltrep.hpp | 7 +++++++ src/reltoaltrep.cpp | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/include/reltoaltrep.hpp b/src/include/reltoaltrep.hpp index 17b2e35ca..d7b63f379 100644 --- a/src/include/reltoaltrep.hpp +++ b/src/include/reltoaltrep.hpp @@ -24,6 +24,13 @@ struct AltrepRelationWrapper { rel_extptr_t rel_eptr; duckdb::shared_ptr rel; duckdb::unique_ptr res; + + R_xlen_t rowcount; + bool rowcount_retrieved; + + size_t ncols; + size_t cols_transformed; + }; } diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index bbb5de1c4..3d76a2c88 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -101,7 +101,7 @@ AltrepRelationWrapper *AltrepRelationWrapper::Get(SEXP x) { } AltrepRelationWrapper::AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_, size_t n_rows_, size_t n_cells_) - : allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel) { + : allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel), rowcount(0), rowcount_retrieved(false), ncols(0), cols_transformed(0) { } bool AltrepRelationWrapper::HasQueryResult() const { @@ -149,6 +149,10 @@ MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() { cpp11::stop("Query execution was interrupted"); } + + rowcount = ((MaterializedQueryResult *)res.get())->RowCount(); + rowcount_retrieved = true; + signal_handler.Disable(); } D_ASSERT(res); @@ -196,7 +200,7 @@ duckdb::unique_ptr AltrepRelationWrapper::Materialize() { struct AltrepRownamesWrapper { - AltrepRownamesWrapper(duckdb::shared_ptr rel_p) : rel(rel_p) { + AltrepRownamesWrapper(duckdb::shared_ptr rel_p) : rel(rel_p), rowlen_data_retrieved(false) { rowlen_data[0] = NA_INTEGER; } @@ -206,6 +210,7 @@ struct AltrepRownamesWrapper { int32_t rowlen_data[2]; duckdb::shared_ptr rel; + bool rowlen_data_retrieved; }; struct AltrepVectorWrapper { @@ -219,6 +224,7 @@ struct AltrepVectorWrapper { void *Dataptr() { if (transformed_vector.data() == R_NilValue) { + printf("transformed_vector.data() == R_NilValue\n"); auto res = rel->GetQueryResult(); transformed_vector = duckdb_r_allocate(res->types[column_index], res->RowCount()); @@ -228,6 +234,17 @@ struct AltrepVectorWrapper { duckdb_r_transform(chunk.data[column_index], dest, dest_offset, chunk.size(), false); dest_offset += chunk.size(); } + // keep tabs on how many of the columns have been transformed + // to their R-representation + rel->cols_transformed++; + // if all of the columns have been transformed, we can reset + // the query-result pointer and free the memory + if (rel->cols_transformed == rel->ncols) { + printf("Resetting query results\n"); + rel->res.reset(); + } else { + printf("cols_transformed: %ld vs. ncols %ld\n", rel->cols_transformed, rel->ncols); + } } return DATAPTR(transformed_vector); } @@ -306,6 +323,13 @@ const void *RelToAltrep::RownamesDataptrOrNull(SEXP x) { void *RelToAltrep::DoRownamesDataptrGet(SEXP x) { auto rownames_wrapper = AltrepRownamesWrapper::Get(x); + + // the query has been materialized, return the rowcount + // (and void recomputing the query if it's been reset) + if (rownames_wrapper->rowlen_data_retrieved) { + return rownames_wrapper->rowlen_data; + } + auto row_count = rownames_wrapper->rel->GetQueryResult()->RowCount(); if (row_count > (idx_t)NumericLimits::Maximum()) { cpp11::stop("Integer overflow for row.names attribute"); @@ -316,6 +340,9 @@ void *RelToAltrep::DoRownamesDataptrGet(SEXP x) { R_xlen_t RelToAltrep::VectorLength(SEXP x) { BEGIN_CPP11 + if (AltrepVectorWrapper::Get(x)->rel->rowcount_retrieved) { + return AltrepVectorWrapper::Get(x)->rel->rowcount; + } return AltrepVectorWrapper::Get(x)->rel->GetQueryResult()->RowCount(); END_CPP11_EX(0) } @@ -404,6 +431,8 @@ size_t DoubleToSize(double d) { auto relation_wrapper = make_shared_ptr(rel, allow_materialization, DoubleToSize(n_rows), DoubleToSize(n_cells)); + relation_wrapper->ncols = drel->Columns().size(); + cpp11::writable::list data_frame; data_frame.reserve(ncols); @@ -427,6 +456,8 @@ size_t DoubleToSize(double d) { } SET_NAMES(data_frame, StringsToSexp(names)); + relation_wrapper->ncols = drel->Columns().size(); + // Row names cpp11::external_pointer ptr(new AltrepRownamesWrapper(relation_wrapper)); R_SetExternalPtrTag(ptr, RStrings::get().duckdb_row_names_sym); From 1cbea2d47f49222132294464cd7aed2ee169e1fa Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sat, 15 Feb 2025 11:31:42 +0200 Subject: [PATCH 5/6] move logic under AltrepRelationWrapper; rm debug printf --- src/include/reltoaltrep.hpp | 2 ++ src/reltoaltrep.cpp | 25 +++++++++++++------------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/include/reltoaltrep.hpp b/src/include/reltoaltrep.hpp index d7b63f379..fd8ac99bc 100644 --- a/src/include/reltoaltrep.hpp +++ b/src/include/reltoaltrep.hpp @@ -17,6 +17,8 @@ struct AltrepRelationWrapper { duckdb::unique_ptr Materialize(); + void MarkColumnAsMaterialized(); + const bool allow_materialization; const size_t n_rows; const size_t n_cells; diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index 3d76a2c88..0a7c55c7a 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -108,6 +108,17 @@ bool AltrepRelationWrapper::HasQueryResult() const { return (bool)res; } +void AltrepRelationWrapper::MarkColumnAsMaterialized() { + // AltrepRelationWrapper keeps tabs on how many of the columns have been transformed + // to their R-representation + cols_transformed++; + // If all of the columns have been transformed, we can reset + // the query-result pointer and free the memory + if (cols_transformed == ncols) { + res.reset(); + } +} + MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() { if (!res) { if (!allow_materialization || n_cells == 0) { @@ -224,7 +235,6 @@ struct AltrepVectorWrapper { void *Dataptr() { if (transformed_vector.data() == R_NilValue) { - printf("transformed_vector.data() == R_NilValue\n"); auto res = rel->GetQueryResult(); transformed_vector = duckdb_r_allocate(res->types[column_index], res->RowCount()); @@ -234,17 +244,8 @@ struct AltrepVectorWrapper { duckdb_r_transform(chunk.data[column_index], dest, dest_offset, chunk.size(), false); dest_offset += chunk.size(); } - // keep tabs on how many of the columns have been transformed - // to their R-representation - rel->cols_transformed++; - // if all of the columns have been transformed, we can reset - // the query-result pointer and free the memory - if (rel->cols_transformed == rel->ncols) { - printf("Resetting query results\n"); - rel->res.reset(); - } else { - printf("cols_transformed: %ld vs. ncols %ld\n", rel->cols_transformed, rel->ncols); - } + + rel->MarkColumnAsMaterialized(); } return DATAPTR(transformed_vector); } From c8256a707dac9064d44920265dd4d8df112e26a3 Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sat, 15 Feb 2025 12:06:46 +0200 Subject: [PATCH 6/6] improve naming: column is transformed, not materialized --- src/include/reltoaltrep.hpp | 2 +- src/reltoaltrep.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/reltoaltrep.hpp b/src/include/reltoaltrep.hpp index fd8ac99bc..9270181b3 100644 --- a/src/include/reltoaltrep.hpp +++ b/src/include/reltoaltrep.hpp @@ -17,7 +17,7 @@ struct AltrepRelationWrapper { duckdb::unique_ptr Materialize(); - void MarkColumnAsMaterialized(); + void MarkColumnAsTransformed(); const bool allow_materialization; const size_t n_rows; diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index 890abe658..c9b2cc27e 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -108,7 +108,7 @@ bool AltrepRelationWrapper::HasQueryResult() const { return (bool)res; } -void AltrepRelationWrapper::MarkColumnAsMaterialized() { +void AltrepRelationWrapper::MarkColumnAsTransformed() { // AltrepRelationWrapper keeps tabs on how many of the columns have been transformed // to their R-representation cols_transformed++; @@ -245,7 +245,7 @@ struct AltrepVectorWrapper { dest_offset += chunk.size(); } - rel->MarkColumnAsMaterialized(); + rel->MarkColumnAsTransformed(); } return DATAPTR(transformed_vector); }