From 421170eb1a69da0fa6f745830f5493483ebd48e6 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 28 Dec 2024 14:16:27 +0300 Subject: [PATCH 01/24] Introduce the growable vector API --- src/data.table.h | 8 ++++++++ src/growable.c | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 src/growable.c diff --git a/src/data.table.h b/src/data.table.h index 252f5e3b58..f9b8bbe68d 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -298,6 +298,14 @@ void hash_set(hashtab *, SEXP key, R_xlen_t value); // Returns the value corresponding to the key present in the hash, otherwise returns ifnotfound. R_xlen_t hash_lookup(const hashtab *, SEXP key, R_xlen_t ifnotfound); +// growable.c +// Return a new vector of given type. Initially its xlength() is equal to size. Using growable_resize(), it can be increased to up to max_size. +SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size); +// Return the max_size of a growable vector. Behaviour is undefined if x was not allocated by growable_allocate. +R_xlen_t growable_max_size(SEXP x); +// Resize a growable vector to newsize. Will signal an error if newsize exceeds max_size. +void growable_resize(SEXP x, R_xlen_t newsize); + // functions called from R level .Call/.External and registered in init.c // these now live here to pass -Wstrict-prototypes, #5477 // all arguments must be SEXP since they are called from R level diff --git a/src/growable.c b/src/growable.c new file mode 100644 index 0000000000..05069fe804 --- /dev/null +++ b/src/growable.c @@ -0,0 +1,23 @@ +#include "data.table.h" + +SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) { + SEXP ret = PROTECT(allocVector(type, max_size)); + SET_TRUELENGTH(ret, max_size); + SET_GROWABLE_BIT(ret); + SETLENGTH(ret, size); + UNPROTECT(1); + return ret; +} + +R_xlen_t growable_max_size(SEXP x) { + return TRUELENGTH(x); +} + +void growable_resize(SEXP x, R_xlen_t newsize) { + R_xlen_t max_size; + if (newsize > (max_size = growable_max_size(x))) internal_error( + __func__, "newsize=%g > max_size=%g", + (double)newsize, (double)max_size + ); + SETLENGTH(x, newsize); +} From 3b3185085119cc7262a47fc5680ce00868291192 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 28 Dec 2024 16:21:18 +0300 Subject: [PATCH 02/24] Use growable_* instead of SETLENGTH et al in fread --- src/dogroups.c | 2 +- src/freadR.c | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/dogroups.c b/src/dogroups.c index c2f84e9f74..e5efb3a2ac 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -523,7 +523,7 @@ SEXP growVector(SEXP x, const R_len_t newlen) SEXP newx; R_len_t len = length(x); if (isNull(x)) error(_("growVector passed NULL")); - PROTECT(newx = allocVector(TYPEOF(x), newlen)); // TO DO: R_realloc(?) here? + PROTECT(newx = growable_allocate(TYPEOF(x), newlen, newlen)); // may be shrunk later by fread if (newlen < len) len=newlen; // i.e. shrink switch (TYPEOF(x)) { case RAWSXP: memcpy(RAW(newx), RAW(x), len*SIZEOF(x)); break; diff --git a/src/freadR.c b/src/freadR.c index 4f94239cab..735708537a 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -257,7 +257,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int if (typeSize[CT_BOOL8_N]!=1) internal_error(__func__, "typeSize[CT_BOOL8_N] != 1"); // # nocov if (typeSize[CT_STRING]!=8) internal_error(__func__, "typeSize[CT_STRING] != 1"); // # nocov colNamesSxp = R_NilValue; - SET_VECTOR_ELT(RCHK, 1, colNamesSxp=allocVector(STRSXP, ncol)); + SET_VECTOR_ELT(RCHK, 1, colNamesSxp=growable_allocate(STRSXP, ncol, ncol)); for (int i=0; i 0) && (newDT || TYPEOF(col) != typeSxp[type[i]] || oldIsInt64 != newIsInt64); int nrowChanged = (allocNrow != dtnrows); if (typeChanged || nrowChanged) { - SEXP thiscol = typeChanged ? allocVector(typeSxp[type[i]], allocNrow) // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1 + SEXP thiscol = typeChanged ? growable_allocate(typeSxp[type[i]], allocNrow, allocNrow) // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1 : growVector(col, allocNrow); SET_VECTOR_ELT(DT,resi,thiscol); if (type[i]==CT_INT64) { @@ -519,7 +519,6 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size setAttrib(thiscol, sym_tzone, ScalarString(char_UTC)); // see news for v1.13.0 } - SET_TRUELENGTH(thiscol, allocNrow); DTbytes += SIZEOF(thiscol)*allocNrow; } resi++; @@ -536,9 +535,7 @@ void setFinalNrow(size_t nrow) { return; const int ncol=LENGTH(DT); for (int i=0; i Date: Sat, 28 Dec 2024 19:31:43 +0300 Subject: [PATCH 03/24] Switch shallow() to use growable_allocate() The resulting data.tables now have GROWABLE_BIT set, therefore: - the finalizer is not needed on R >= 3.4 - duplicates of data.tables (which are not over-allocated) now have TRUELENGTH of 0 instead of whatever it was before, which is detected earlier in selfrefok() As a result, assign.c only uses TRUELENGTH on R < 3.4. --- inst/tests/tests.Rraw | 6 ----- src/assign.c | 52 ++++++++++++++++++++++++------------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 657478c61f..8af4d920ef 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19303,12 +19303,6 @@ test(2290.3, DT[, `:=`(a, c := 3)], error="It looks like you re-used `:=` in arg # partially-named `:=`(...) --> different branch, same error test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in argument 2") -# segfault when selfref is not ok before set #6410 -df = data.frame(a=1:3) -setDT(df) -attr(df, "att") = 1 -test(2291.1, set(df, NULL, "new", "new"), error="attributes .* have been reassigned") - # ns-qualified bysub error, #6493 DT = data.table(a = 1) test(2292.1, DT[, .N, by=base::mget("a")], data.table(a = 1, N = 1L)) diff --git a/src/assign.c b/src/assign.c index a7b083cb81..c36b72d6df 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1,5 +1,6 @@ #include "data.table.h" +#if R_VERSION < R_Version(3,4,0) // not needed with GROWABLE_BIT static void finalizer(SEXP p) { SEXP x; @@ -22,6 +23,7 @@ static void finalizer(SEXP p) UNPROTECT(1); return; } +#endif void setselfref(SEXP x) { if(!INHERITS(x, char_datatable)) return; // #5286 @@ -38,7 +40,9 @@ void setselfref(SEXP x) { R_NilValue )) )); +#if R_VERSION < R_Version(3,4,0) // not needed with GROWABLE_BIT R_RegisterCFinalizerEx(p, finalizer, FALSE); +#endif UNPROTECT(2); /* @@ -126,15 +130,24 @@ static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) { tag = R_ExternalPtrTag(v); if (!(isNull(tag) || isString(tag))) internal_error(__func__, ".internal.selfref tag is neither NULL nor a character vector"); // # nocov names = getAttrib(x, R_NamesSymbol); - if (names!=tag && isString(names) && !ALTREP(names)) // !ALTREP for #4734 + // On R >= 3.4, either + // (1) we allocate the data.table and/or its names, so it has the GROWABLE_BIT set, so copies will have zero TRUELENGTH, or + // (2) someone else creates them from scratch, so (only using the API) will have zero TRUELENGTH. + // We then return false and either re-create the data.table from scratch or signal an error, so the current object having a zero TRUELENGTH is fine. + // R < 3.4 doesn't have the GROWABLE_BIT, so let's reset the TRUELENGTH just in case. +#if R_VERSION < R_Version(3,4,0) + if (names!=tag && isString(names)) SET_TRUELENGTH(names, LENGTH(names)); // R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated // because R copies the original vector's tl over despite allocating length. +#endif prot = R_ExternalPtrProtected(v); if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")). return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r - if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x)) +#if R_VERSION < R_Version(3,4,0) + if (x!=R_ExternalPtrAddr(prot)) SET_TRUELENGTH(x, LENGTH(x)); // R copied this vector not data.table, it's not actually over-allocated +#endif return checkNames ? names==tag : x==R_ExternalPtrAddr(prot); } @@ -151,7 +164,8 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) // called from alloccol where n is checked carefully, or from shallow() at R level // where n is set to truelength (i.e. a shallow copy only with no size change) int protecti=0; - SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here? + const int l = isNull(cols) ? length(dt) : length(cols); + SEXP newdt = PROTECT(growable_allocate(VECSXP, l, n)); protecti++; // to do, use growVector here? SHALLOW_DUPLICATE_ATTRIB(newdt, dt); // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It @@ -169,8 +183,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) setAttrib(newdt, sym_sorted, duplicate(sorted)); SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; - SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++; - const int l = isNull(cols) ? LENGTH(dt) : length(cols); + SEXP newnames = PROTECT(growable_allocate(STRSXP, l, n)); protecti++; if (isNull(cols)) { for (int i=0; il) ? n : l); // e.g. test 848 and 851 in R > 3.0.2 // added (n>l) ? ... for #970, see test 1481. // TO DO: test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example. - // if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl) - // internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov - tl = TRUELENGTH(dt); + tl = growable_max_size(dt); // R <= 2.13.2 and we didn't catch uninitialized tl somehow if (tl<0) internal_error(__func__, "tl of class is marked but tl<0"); // # nocov if (tl>0 && tl Date: Sat, 28 Dec 2024 20:22:09 +0300 Subject: [PATCH 04/24] Use growable_allocate() in subsetDT() --- src/subset.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/subset.c b/src/subset.c index c7653b893e..bf9e0ac557 100644 --- a/src/subset.c +++ b/src/subset.c @@ -297,7 +297,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md } int overAlloc = checkOverAlloc(GetOption(install("datatable.alloccol"), R_NilValue)); - SEXP ans = PROTECT(allocVector(VECSXP, LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated. + SEXP ans = PROTECT(growable_allocate(VECSXP, LENGTH(cols), LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated. // user-defined and superclass attributes get copied as from v1.12.0 copyMostAttrib(x, ans); @@ -305,8 +305,6 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md // includes row.names (oddly, given other dims aren't) and "sorted" dealt with below // class is also copied here which retains superclass name in class vector as has been the case for many years; e.g. tests 1228.* for #64 - SET_TRUELENGTH(ans, LENGTH(ans)); - SETLENGTH(ans, LENGTH(cols)); int ansn; if (isNull(rows)) { ansn = nrow; @@ -329,9 +327,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md subsetVectorRaw(target, source, rows, anyNA); // parallel within column } } - SEXP tmp = PROTECT(allocVector(STRSXP, LENGTH(cols)+overAlloc)); nprotect++; - SET_TRUELENGTH(tmp, LENGTH(tmp)); - SETLENGTH(tmp, LENGTH(cols)); + SEXP tmp = PROTECT(growable_allocate(STRSXP, LENGTH(cols), LENGTH(cols)+overAlloc)); nprotect++; setAttrib(ans, R_NamesSymbol, tmp); subsetVectorRaw(tmp, getAttrib(x, R_NamesSymbol), cols, /*anyNA=*/false); From 06e7db71c45bd306acde52588b0f8f448a1574ca Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 28 Dec 2024 22:10:55 +0300 Subject: [PATCH 05/24] Use growable_* instead of SETLENGTH in dogroups() Since dogroups() relies on being able to shrink vectors it hasn't allocated, introduce make_growable() and is_growable() to adapt. Since dogroups() relies on SD and SDall having shared columns, use the setgrowable() wrapper on the R side at the time when SD and SDall are being created. (In the ALTREP case, setgrowable() will re-create the columns.) --- R/data.table.R | 2 ++ R/wrappers.R | 2 ++ src/data.table.h | 5 +++++ src/dogroups.c | 20 +++++++++----------- src/growable.c | 17 +++++++++++++++++ src/init.c | 1 + src/wrappers.c | 10 ++++++++++ 7 files changed, 46 insertions(+), 11 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index bac200b8a5..0c0d1dfd32 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1353,6 +1353,7 @@ replace_dot_alias = function(e) { } if (!with || missing(j)) return(ans) if (!is.data.table(ans)) setattr(ans, "class", c("data.table","data.frame")) # DF |> DT(,.SD[...]) .SD should be data.table, test 2212.013 + setgrowable(ans) SDenv$.SDall = ans SDenv$.SD = if (length(non_sdvars)) shallow(SDenv$.SDall, sdvars) else SDenv$.SDall SDenv$.N = nrow(ans) @@ -1591,6 +1592,7 @@ replace_dot_alias = function(e) { SDenv$.SDall = .Call(CsubsetDT, x, if (length(len__)) seq_len(max(len__)) else 0L, xcols) # must be deep copy when largest group is a subset if (!is.data.table(SDenv$.SDall)) setattr(SDenv$.SDall, "class", c("data.table","data.frame")) # DF |> DT(,.SD[...],by=grp) needs .SD to be data.table, test 2022.012 if (xdotcols) setattr(SDenv$.SDall, 'names', ansvars[xcolsAns]) # now that we allow 'x.' prefix in 'j', #2313 bug fix - [xcolsAns] + setgrowable(SDenv$.SDall) SDenv$.SD = if (length(non_sdvars)) shallow(SDenv$.SDall, sdvars) else SDenv$.SDall } if (nrow(SDenv$.SDall)==0L) { diff --git a/R/wrappers.R b/R/wrappers.R index 80b7a64e99..2a48c90173 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -21,3 +21,5 @@ fitsInInt32 = function(x) .Call(CfitsInInt32R, x) fitsInInt64 = function(x) .Call(CfitsInInt64R, x) coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) + +setgrowable = function(x) .Call(Csetgrowable, x) diff --git a/src/data.table.h b/src/data.table.h index f9b8bbe68d..48c2a68f9c 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -305,6 +305,10 @@ SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size); R_xlen_t growable_max_size(SEXP x); // Resize a growable vector to newsize. Will signal an error if newsize exceeds max_size. void growable_resize(SEXP x, R_xlen_t newsize); +// Return TRUE if growable_resize(x) and growable_max_size(x) are valid operations. +Rboolean is_growable(SEXP x); +// Transform x into a growable vector. The return value must be reprotected in place of x. What happens to x is deliberately not specified, but no copying occurs. +SEXP make_growable(SEXP x); // functions called from R level .Call/.External and registered in init.c // these now live here to pass -Wstrict-prototypes, #5477 @@ -379,4 +383,5 @@ SEXP dt_has_zlib(void); SEXP startsWithAny(SEXP, SEXP, SEXP); SEXP convertDate(SEXP, SEXP); SEXP fastmean(SEXP); +SEXP setgrowable(SEXP x); diff --git a/src/dogroups.c b/src/dogroups.c index e5efb3a2ac..db32b9906f 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -124,7 +124,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (R_len_t i=0; i maxGrpSize) maxGrpSize = ilens[i]; } - defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); nprotect++; + defineVar(install(".I"), I = PROTECT(growable_allocate(INTSXP, maxGrpSize, maxGrpSize)), env); nprotect++; hash_set(specials, I, -maxGrpSize); // marker for anySpecialStatic(); see its comments R_LockBinding(install(".I"), env); @@ -197,7 +197,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX INTEGER(GRP)[0] = i+1; // group counter exposed as .GRP INTEGER(rownames)[1] = -grpn; // the .set_row_names() of .SD. Not .N when nomatch=NA and this is a nomatch for (int j=0; j0; diff --git a/src/growable.c b/src/growable.c index 05069fe804..6bba73faf7 100644 --- a/src/growable.c +++ b/src/growable.c @@ -3,7 +3,9 @@ SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) { SEXP ret = PROTECT(allocVector(type, max_size)); SET_TRUELENGTH(ret, max_size); +#if R_VERSION >= R_Version(3, 4, 0) SET_GROWABLE_BIT(ret); +#endif // otherwise perceived memory use will end up higher SETLENGTH(ret, size); UNPROTECT(1); return ret; @@ -21,3 +23,18 @@ void growable_resize(SEXP x, R_xlen_t newsize) { ); SETLENGTH(x, newsize); } + +Rboolean is_growable(SEXP x) { + return isVector(x) && TRUELENGTH(x) >= XLENGTH(x) +#if R_VERSION >= R_Version(3, 4, 0) + && IS_GROWABLE(x) +#endif + ; +} + +// Assuming no ALTREP for now +SEXP make_growable(SEXP x) { + if (TRUELENGTH(x) < XLENGTH(x)) SET_TRUELENGTH(x, XLENGTH(x)); + SET_GROWABLE_BIT(x); + return x; +} diff --git a/src/init.c b/src/init.c index 204dc1088d..e87b6ee038 100644 --- a/src/init.c +++ b/src/init.c @@ -150,6 +150,7 @@ R_CallMethodDef callMethods[] = { {"CconvertDate", (DL_FUNC)&convertDate, -1}, {"Cnotchin", (DL_FUNC)¬chin, -1}, {"Cwarn_matrix_column_r", (DL_FUNC)&warn_matrix_column_r, -1}, +{"Csetgrowable", (DL_FUNC)&setgrowable, -1}, {NULL, NULL, 0} }; diff --git a/src/wrappers.c b/src/wrappers.c index 6587caa97a..fb698bce07 100644 --- a/src/wrappers.c +++ b/src/wrappers.c @@ -124,3 +124,13 @@ SEXP warn_matrix_column_r(SEXP i) { warn_matrix_column(INTEGER(i)[0]); return R_NilValue; } + +SEXP setgrowable(SEXP x) { + for (R_xlen_t i = 0; i < xlength(x); ++i) { + SEXP this = VECTOR_ELT(x, i); + // relying on the rest of data.table machinery to avoid the need for resizing + if (!is_growable(this) && !ALTREP(this)) + SET_VECTOR_ELT(x, i, make_growable(this)); + } + return R_NilValue; +} From 7b9b1417208b669f7c92919f4de8ef4179a2f38d Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sun, 29 Dec 2024 01:06:28 +0300 Subject: [PATCH 06/24] Mark internal_error*() as NORET This helps avoid false positive warnings about unreachable places in the code. --- src/data.table.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index 48c2a68f9c..613e74f81b 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -143,7 +143,7 @@ uint64_t dtwiddle(double x); SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsArg, SEXP ascArg, SEXP naArg); SEXP forderReuseSorting(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsArg, SEXP ascArg, SEXP naArg, SEXP reuseSortingArg); // reuseSorting wrapper to forder int getNumericRounding_C(void); -void internal_error_with_cleanup(const char *call_name, const char *format, ...); +NORET void internal_error_with_cleanup(const char *call_name, const char *format, ...); // reorder.c SEXP reorder(SEXP x, SEXP order); @@ -259,7 +259,7 @@ SEXP islockedR(SEXP x); bool need2utf8(SEXP x); SEXP coerceUtf8IfNeeded(SEXP x); SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg); -void internal_error(const char *call_name, const char *format, ...); +NORET void internal_error(const char *call_name, const char *format, ...); // types.c char *end(char *start); From 61fdc064c2bda8a4b96074588c423c8d395037c3 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sun, 29 Dec 2024 11:10:52 +0300 Subject: [PATCH 07/24] ALTREP implementation of growable_* Currently broken: - 1 errors out of 11637 - won't work with expression vectors at all --- src/assign.c | 4 +- src/data.table.h | 6 ++ src/dogroups.c | 5 +- src/growable.c | 238 ++++++++++++++++++++++++++++++++++++++++++++++- src/init.c | 4 + src/reorder.c | 6 ++ src/utils.c | 11 ++- src/wrappers.c | 16 +++- 8 files changed, 280 insertions(+), 10 deletions(-) diff --git a/src/assign.c b/src/assign.c index c36b72d6df..4be306251c 100644 --- a/src/assign.c +++ b/src/assign.c @@ -323,7 +323,7 @@ SEXP shallowwrapper(SEXP dt, SEXP cols) { } SEXP truelength(SEXP x) { - return ScalarInteger(isNull(x) ? 0 : growable_max_size(x)); + return ScalarInteger(is_growable(x) ? growable_max_size(x) : 0); } SEXP selfrefokwrapper(SEXP x, SEXP verbose) { @@ -520,7 +520,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) // modify DT by reference. Other than if new columns are being added and the allocVec() fails with // out-of-memory. In that case the user will receive hard halt and know to rerun. if (length(newcolnames)) { - oldtncol = growable_max_size(dt); // TO DO: oldtncol can be just called tl now, as we won't realloc here any more. + oldtncol = is_growable(dt) ? growable_max_size(dt) : 0; // TO DO: oldtncol can be just called tl now, as we won't realloc here any more. if (oldtncol= R_Version(4, 3, 0) +# define USE_GROWABLE_ALTREP +#endif #include #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT #include // for uint64_t rather than unsigned long long @@ -309,6 +312,9 @@ void growable_resize(SEXP x, R_xlen_t newsize); Rboolean is_growable(SEXP x); // Transform x into a growable vector. The return value must be reprotected in place of x. What happens to x is deliberately not specified, but no copying occurs. SEXP make_growable(SEXP x); +#if R_VERSION >= R_Version(4, 3, 0) +void register_altrep_classes(DllInfo*); +#endif // functions called from R level .Call/.External and registered in init.c // these now live here to pass -Wstrict-prototypes, #5477 diff --git a/src/dogroups.c b/src/dogroups.c index db32b9906f..9c8c62d968 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -45,11 +45,10 @@ static bool anySpecialStatic(SEXP x, hashtab * specials) { // (see data.table.h), and isNewList() is true for NULL if (n==0) return false; + if (hash_lookup(specials, x, 0)<0) return true; // test 2158 if (isVectorAtomic(x)) - return ALTREP(x) || hash_lookup(specials, x, 0)<0; + return ALTREP(x); // see test 2156: ALTREP is a source of sharing we can't trace reliably if (isNewList(x)) { - if (hash_lookup(specials, x, 0)<0) - return true; // test 2158 for (int i=0; i + +static R_altrep_class_t dta_grow_string, dta_grow_integer, dta_grow_logical, dta_grow_real, dta_grow_complex, dta_grow_raw, dta_grow_list; +static Rcomplex NA_COMPLEX = { 0, }; + +/* +ALTREP class layout: +data1 = underlying vector +data2 = its current length stored as a length-1 REALSXP +Unless we implement an Unserialize method, this can be changed any time. +Classes have been released on CRAN with a Serialized_state/Unserialize pair will have to stay as they have been defined in order to keep *.rds files readable. +*/ + +static R_xlen_t altall_Length(SEXP x) { + return (R_xlen_t)REAL(R_altrep_data2(x))[0]; +} + +#define make_inspect_method(classname) \ + static Rboolean alt##classname##_Inspect( \ + SEXP x, int pre, int deep, int pvec, \ + void (*inspect_subtree)(SEXP x, int pre, int deep, int pvec) \ + ) { \ + (void)pre; (void)deep; (void)pvec; (void)inspect_subtree; \ + Rprintf("data.table::growable" #classname "_v0(truelength=%g) ", (double)XLENGTH(R_altrep_data1(x))); \ + return FALSE; \ + } +make_inspect_method(string) +make_inspect_method(integer) +make_inspect_method(logical) +make_inspect_method(real) +make_inspect_method(complex) +make_inspect_method(raw) +make_inspect_method(list) +#undef make_inspect_method + +#define make_dataptr_method(class, accessor) \ + static void * alt##class##_Dataptr(SEXP x, Rboolean writable) { \ + (void)writable; \ + return (void*)accessor(R_altrep_data1(x)); \ + } +make_dataptr_method(string, STRING_PTR_RO) +make_dataptr_method(integer, INTEGER) +make_dataptr_method(logical, LOGICAL) +make_dataptr_method(real, REAL) +make_dataptr_method(complex, COMPLEX) +make_dataptr_method(raw, RAW) +make_dataptr_method(list, DATAPTR_RO) // VECTOR_PTR_RO to appear in R-4.5 +#undef make_dataptr_method + +static const void * altall_Dataptr_or_null(SEXP x) { return DATAPTR_RO(x); } + +// lots of boilerplate, but R calling *_ELT one by one would be far too slow +#define make_extract_subset_method(class, type, accessor, NA) \ + static SEXP alt##class##_Extract_subset(SEXP x, SEXP indx, SEXP call) { \ + (void)call; \ + indx = PROTECT(coerceVector(indx, REALSXP)); \ + double * ii = REAL(indx); \ + R_xlen_t rlen = XLENGTH(indx), mylen = XLENGTH(x); \ + SEXP ret = PROTECT(allocVector(TYPEOF(x), rlen)); \ + type *rdata = accessor(ret), *mydata = accessor(x); \ + for (R_xlen_t i = 0; i < rlen; ++i) \ + rdata[i] = (ii[i] >= 1 && ii[i] <= mylen) ? mydata[(R_xlen_t)ii[i]-1] : NA; \ + UNPROTECT(2); \ + return ret; \ + } +make_extract_subset_method(integer, int, INTEGER, NA_INTEGER) +make_extract_subset_method(logical, int, LOGICAL, NA_LOGICAL) +make_extract_subset_method(real, double, REAL, NA_REAL) +make_extract_subset_method(complex, Rcomplex, COMPLEX, NA_COMPLEX) +make_extract_subset_method(raw, Rbyte, RAW, 0) +// not implementing the string and list methods because those do require the write barrier and are thus no better than calling *_ELT one by one +#undef make_extract_subset_method + +#define make_elt_method(class, accessor) \ + static SEXP alt##class##_Elt(SEXP x, R_xlen_t i) { \ + return accessor(R_altrep_data1(x), i); \ + } +make_elt_method(string, STRING_ELT) +make_elt_method(list, VECTOR_ELT) +#undef make_elt_method + +#define make_set_elt_method(class, accessor) \ + static void alt##class##_Set_elt(SEXP x, R_xlen_t i, SEXP v) { \ + accessor(R_altrep_data1(x), i, v); \ + } +make_set_elt_method(string, SET_STRING_ELT) +make_set_elt_method(list, SET_VECTOR_ELT) +#undef make_set_elt_method + +// liked the Extract_subset methods? say hello to Get_region +#define make_get_region_method(class, type, accessor) \ + static R_xlen_t alt##class##_Get_region( \ + SEXP x, R_xlen_t i, R_xlen_t n, type * buf \ + ) { \ + R_xlen_t j = 0, mylen = XLENGTH(x); \ + type * data = accessor(x); \ + for (; j < n && i < mylen; ++i, ++j) buf[j] = data[i]; \ + return j; \ + } +make_get_region_method(integer, int, INTEGER) +make_get_region_method(logical, int, LOGICAL) +make_get_region_method(real, double, REAL) +make_get_region_method(complex, Rcomplex, COMPLEX) +make_get_region_method(raw, Rbyte, RAW) +#undef make_get_region_method + +void register_altrep_classes(DllInfo * info) { + // Used by the altcomplex_Extract_subset method + NA_COMPLEX = (Rcomplex){ .r = NA_REAL, .i = NA_REAL }; + + dta_grow_string = R_make_altstring_class("growable_string_v0", "data.table", info); + R_set_altrep_Length_method(dta_grow_string, altall_Length); + R_set_altrep_Inspect_method(dta_grow_string, altstring_Inspect); + R_set_altvec_Dataptr_method(dta_grow_string, altstring_Dataptr); + R_set_altvec_Dataptr_or_null_method(dta_grow_string, altall_Dataptr_or_null); + R_set_altstring_Elt_method(dta_grow_string, altstring_Elt); + R_set_altstring_Set_elt_method(dta_grow_string, altstring_Set_elt); + dta_grow_integer = R_make_altinteger_class("growable_integer_v0", "data.table", info); + R_set_altrep_Length_method(dta_grow_integer, altall_Length); + R_set_altrep_Inspect_method(dta_grow_integer, altinteger_Inspect); + R_set_altvec_Dataptr_method(dta_grow_integer, altinteger_Dataptr); + R_set_altvec_Dataptr_or_null_method(dta_grow_integer, altall_Dataptr_or_null); + R_set_altvec_Extract_subset_method(dta_grow_integer, altinteger_Extract_subset); + R_set_altinteger_Get_region_method(dta_grow_integer, altinteger_Get_region); + dta_grow_logical = R_make_altlogical_class("growable_logical_v0", "data.table", info); + R_set_altrep_Length_method(dta_grow_logical, altall_Length); + R_set_altrep_Inspect_method(dta_grow_logical, altlogical_Inspect); + R_set_altvec_Dataptr_method(dta_grow_logical, altlogical_Dataptr); + R_set_altvec_Dataptr_or_null_method(dta_grow_logical, altall_Dataptr_or_null); + R_set_altvec_Extract_subset_method(dta_grow_logical, altlogical_Extract_subset); + R_set_altlogical_Get_region_method(dta_grow_logical, altlogical_Get_region); + dta_grow_real = R_make_altreal_class("growable_real_v0", "data.table", info); + R_set_altrep_Length_method(dta_grow_real, altall_Length); + R_set_altrep_Inspect_method(dta_grow_real, altreal_Inspect); + R_set_altvec_Dataptr_method(dta_grow_real, altreal_Dataptr); + R_set_altvec_Dataptr_or_null_method(dta_grow_real, altall_Dataptr_or_null); + R_set_altvec_Extract_subset_method(dta_grow_real, altreal_Extract_subset); + R_set_altreal_Get_region_method(dta_grow_real, altreal_Get_region); + dta_grow_complex = R_make_altcomplex_class("growable_complex_v0", "data.table", info); + R_set_altrep_Length_method(dta_grow_complex, altall_Length); + R_set_altrep_Inspect_method(dta_grow_complex, altcomplex_Inspect); + R_set_altvec_Dataptr_method(dta_grow_complex, altcomplex_Dataptr); + R_set_altvec_Dataptr_or_null_method(dta_grow_complex, altall_Dataptr_or_null); + R_set_altvec_Extract_subset_method(dta_grow_complex, altcomplex_Extract_subset); + R_set_altcomplex_Get_region_method(dta_grow_complex, altcomplex_Get_region); + dta_grow_raw = R_make_altraw_class("growable_raw_v0", "data.table", info); + R_set_altrep_Length_method(dta_grow_raw, altall_Length); + R_set_altrep_Inspect_method(dta_grow_raw, altraw_Inspect); + R_set_altvec_Dataptr_method(dta_grow_raw, altraw_Dataptr); + R_set_altvec_Dataptr_or_null_method(dta_grow_raw, altall_Dataptr_or_null); + R_set_altvec_Extract_subset_method(dta_grow_raw, altraw_Extract_subset); + R_set_altraw_Get_region_method(dta_grow_raw, altraw_Get_region); + dta_grow_list = R_make_altlist_class("growable_list_v0", "data.table", info); + R_set_altrep_Length_method(dta_grow_list, altall_Length); + R_set_altrep_Inspect_method(dta_grow_list, altlist_Inspect); + R_set_altvec_Dataptr_method(dta_grow_list, altlist_Dataptr); + R_set_altvec_Dataptr_or_null_method(dta_grow_list, altall_Dataptr_or_null); + R_set_altlist_Elt_method(dta_grow_list, altlist_Elt); + R_set_altlist_Set_elt_method(dta_grow_list, altlist_Set_elt); +} + +static R_altrep_class_t dta_grow_string, dta_grow_integer, dta_grow_logical, dta_grow_real, dta_grow_complex, dta_grow_raw, dta_grow_list; + +static R_altrep_class_t type2class(SEXPTYPE type) { + switch(type) { + case STRSXP: + return dta_grow_string; + case INTSXP: + return dta_grow_integer; + case LGLSXP: + return dta_grow_logical; + case REALSXP: + return dta_grow_real; + case CPLXSXP: + return dta_grow_complex; + case RAWSXP: + return dta_grow_raw; + case VECSXP: + case EXPRSXP: + return dta_grow_list; + default: + internal_error(__func__, "Can't create a growable vector of type '%s'", type2char(type)); + } +} + +SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) { + SEXP ret = PROTECT(R_new_altrep(type2class(type), R_NilValue, R_NilValue)); + R_set_altrep_data1(ret, allocVector(type, max_size)); + R_set_altrep_data2(ret, ScalarReal(size)); + UNPROTECT(1); + return ret; +} + +R_xlen_t growable_max_size(SEXP x) { + return XLENGTH(R_altrep_data1(x)); +} + +void growable_resize(SEXP x, R_xlen_t newsize) { + R_xlen_t max_size; + if (newsize > (max_size = growable_max_size(x))) internal_error( + __func__, "newsize=%g > max_size=%g", + (double)newsize, (double)max_size + ); + REAL(R_altrep_data2(x))[0] = newsize; +} + +Rboolean is_growable(SEXP x) { + switch(TYPEOF(x)) { + case STRSXP: + case INTSXP: + case LGLSXP: + case REALSXP: + case CPLXSXP: + case RAWSXP: + case VECSXP: + return R_altrep_inherits(x, type2class(TYPEOF(x))); + default: + return FALSE; + } +} + +SEXP make_growable(SEXP x) { + SEXP ret = PROTECT(R_new_altrep(type2class(TYPEOF(x)), R_NilValue, R_NilValue)); + R_set_altrep_data1(ret, x); + R_set_altrep_data2(ret, ScalarReal(XLENGTH(x))); + SHALLOW_DUPLICATE_ATTRIB(ret, x); + UNPROTECT(1); + return ret; +} + +#endif diff --git a/src/init.c b/src/init.c index e87b6ee038..f5c2647c88 100644 --- a/src/init.c +++ b/src/init.c @@ -203,8 +203,12 @@ void attribute_visible R_init_data_table(DllInfo *info) SEXP tmp = PROTECT(allocVector(INTSXP,2)); if (LENGTH(tmp)!=2) error(_("Checking LENGTH(allocVector(INTSXP,2)) [%d] is 2 %s"), LENGTH(tmp), msg); +#if R_VERSION >= R_Version(4, 3, 0) + register_altrep_classes(info); +#else // Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768 if (TRUELENGTH(tmp)!=0) error(_("Checking TRUELENGTH(allocVector(INTSXP,2)) [%lld] is 0 %s"), (long long)TRUELENGTH(tmp), msg); +#endif UNPROTECT(1); // According to IEEE (http://en.wikipedia.org/wiki/IEEE_754-1985#Zero) we can rely on 0.0 being all 0 bits. diff --git a/src/reorder.c b/src/reorder.c index a91b4a2bc4..1f00da681c 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -24,13 +24,17 @@ SEXP reorder(SEXP x, SEXP order) error(_("Column %d is length %d which differs from length of column 1 (%d). Invalid data.table."), i+1, length(v), nrow); if (SIZEOF(v) > maxSize) maxSize=SIZEOF(v); +#ifndef USE_GROWABLE_ALTREP if (ALTREP(v)) SET_VECTOR_ELT(x, i, copyAsPlain(v)); +#endif } copySharedColumns(x); // otherwise two columns which point to the same vector would be reordered and then re-reordered, issues linked in PR#3768 } else { if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16 && SIZEOF(x)!=1) error(_("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%zu)"), type2char(TYPEOF(x)), SIZEOF(x)); +#ifndef USE_GROWABLE_ALTREP if (ALTREP(x)) internal_error(__func__, "cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4"); // # nocov +#endif maxSize = SIZEOF(x); nrow = length(x); ncol = 1; @@ -40,7 +44,9 @@ SEXP reorder(SEXP x, SEXP order) if (length(order) != nrow) error("nrow(x)[%d]!=length(order)[%d]", nrow, length(order)); // # notranslate int nprotect = 0; +#ifndef USE_GROWABLE_ALTREP if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; } // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand +#endif const int *restrict idx = INTEGER(order); int i=0; diff --git a/src/utils.c b/src/utils.c index 0c9d23fe6b..58917e7fcf 100644 --- a/src/utils.c +++ b/src/utils.c @@ -202,6 +202,9 @@ inline bool INHERITS(SEXP x, SEXP char_) { return false; } +#ifdef USE_GROWABLE_ALTREP +SEXP copyAsPlain(SEXP x) { return duplicate(x); } +#else SEXP copyAsPlain(SEXP x) { // v1.12.2 and before used standard R duplicate() to do this. But duplicate() is not guaranteed to not return an ALTREP. // e.g. ALTREP 'wrapper' on factor column (with materialized INTSXP) in package VIM under example(hotdeck) @@ -256,6 +259,7 @@ SEXP copyAsPlain(SEXP x) { UNPROTECT(1); return ans; } +#endif void copySharedColumns(SEXP x) { const int ncol = length(x); @@ -266,7 +270,12 @@ void copySharedColumns(SEXP x) { int nShared=0; for (int i=0; i Date: Sat, 22 Mar 2025 22:16:00 +0300 Subject: [PATCH 08/24] Drop redundant variable definitions Co-Authored-By: Benjamin Schwendinger --- src/growable.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/growable.c b/src/growable.c index b7e01b38ed..9b7d4f1914 100644 --- a/src/growable.c +++ b/src/growable.c @@ -204,8 +204,6 @@ void register_altrep_classes(DllInfo * info) { R_set_altlist_Set_elt_method(dta_grow_list, altlist_Set_elt); } -static R_altrep_class_t dta_grow_string, dta_grow_integer, dta_grow_logical, dta_grow_real, dta_grow_complex, dta_grow_raw, dta_grow_list; - static R_altrep_class_t type2class(SEXPTYPE type) { switch(type) { case STRSXP: From cebe420ccd913f302a101c00352720d3972864e3 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 25 Sep 2025 19:24:28 +0200 Subject: [PATCH 09/24] update growable for patch --- src/data.table.h | 14 --- src/growable.c | 274 ----------------------------------------------- src/init.c | 4 - src/reorder.c | 6 -- src/utils.c | 6 -- src/wrappers.c | 7 -- 6 files changed, 311 deletions(-) delete mode 100644 src/growable.c diff --git a/src/data.table.h b/src/data.table.h index 8f135bdc5c..ba0db9435e 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -341,20 +341,6 @@ void hash_set(hashtab *, SEXP key, R_xlen_t value); // Returns the value corresponding to the key present in the hash, otherwise returns ifnotfound. R_xlen_t hash_lookup(const hashtab *, SEXP key, R_xlen_t ifnotfound); -// growable.c -// Return a new vector of given type. Initially its xlength() is equal to size. Using growable_resize(), it can be increased to up to max_size. -SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size); -// Return the max_size of a growable vector. Behaviour is undefined if x was not allocated by growable_allocate. -R_xlen_t growable_max_size(SEXP x); -// Resize a growable vector to newsize. Will signal an error if newsize exceeds max_size. -void growable_resize(SEXP x, R_xlen_t newsize); -// Return TRUE if growable_resize(x) and growable_max_size(x) are valid operations. -Rboolean is_growable(SEXP x); -// Transform x into a growable vector. The return value must be reprotected in place of x. What happens to x is deliberately not specified, but no copying occurs. -SEXP make_growable(SEXP x); -#if R_VERSION >= R_Version(4, 3, 0) -void register_altrep_classes(DllInfo*); -#endif // The dynamically-allocated hash table has a public field for the R protection wrapper. // Keep it PROTECTed while the table is in use. typedef struct dhash_tab { diff --git a/src/growable.c b/src/growable.c deleted file mode 100644 index 9b7d4f1914..0000000000 --- a/src/growable.c +++ /dev/null @@ -1,274 +0,0 @@ -#include "data.table.h" - -#ifndef USE_GROWABLE_ALTREP - -SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) { - SEXP ret = PROTECT(allocVector(type, max_size)); - SET_TRUELENGTH(ret, max_size); -#if R_VERSION >= R_Version(3, 4, 0) - SET_GROWABLE_BIT(ret); -#endif // otherwise perceived memory use will end up higher - SETLENGTH(ret, size); - UNPROTECT(1); - return ret; -} - -R_xlen_t growable_max_size(SEXP x) { - return TRUELENGTH(x); -} - -void growable_resize(SEXP x, R_xlen_t newsize) { - R_xlen_t max_size; - if (newsize > (max_size = growable_max_size(x))) internal_error( - __func__, "newsize=%g > max_size=%g", - (double)newsize, (double)max_size - ); - SETLENGTH(x, newsize); -} - -Rboolean is_growable(SEXP x) { - return isVector(x) && TRUELENGTH(x) >= XLENGTH(x) -#if R_VERSION >= R_Version(3, 4, 0) - && IS_GROWABLE(x) -#endif - ; -} - -// Assuming no ALTREP columns -SEXP make_growable(SEXP x) { - if (TRUELENGTH(x) < XLENGTH(x)) SET_TRUELENGTH(x, XLENGTH(x)); - SET_GROWABLE_BIT(x); - return x; -} - -#else - -#include - -static R_altrep_class_t dta_grow_string, dta_grow_integer, dta_grow_logical, dta_grow_real, dta_grow_complex, dta_grow_raw, dta_grow_list; -static Rcomplex NA_COMPLEX = { 0, }; - -/* -ALTREP class layout: -data1 = underlying vector -data2 = its current length stored as a length-1 REALSXP -Unless we implement an Unserialize method, this can be changed any time. -Classes have been released on CRAN with a Serialized_state/Unserialize pair will have to stay as they have been defined in order to keep *.rds files readable. -*/ - -static R_xlen_t altall_Length(SEXP x) { - return (R_xlen_t)REAL(R_altrep_data2(x))[0]; -} - -#define make_inspect_method(classname) \ - static Rboolean alt##classname##_Inspect( \ - SEXP x, int pre, int deep, int pvec, \ - void (*inspect_subtree)(SEXP x, int pre, int deep, int pvec) \ - ) { \ - (void)pre; (void)deep; (void)pvec; (void)inspect_subtree; \ - Rprintf("data.table::growable" #classname "_v0(truelength=%g) ", (double)XLENGTH(R_altrep_data1(x))); \ - return FALSE; \ - } -make_inspect_method(string) -make_inspect_method(integer) -make_inspect_method(logical) -make_inspect_method(real) -make_inspect_method(complex) -make_inspect_method(raw) -make_inspect_method(list) -#undef make_inspect_method - -#define make_dataptr_method(class, accessor) \ - static void * alt##class##_Dataptr(SEXP x, Rboolean writable) { \ - (void)writable; \ - return (void*)accessor(R_altrep_data1(x)); \ - } -make_dataptr_method(string, STRING_PTR_RO) -make_dataptr_method(integer, INTEGER) -make_dataptr_method(logical, LOGICAL) -make_dataptr_method(real, REAL) -make_dataptr_method(complex, COMPLEX) -make_dataptr_method(raw, RAW) -make_dataptr_method(list, DATAPTR_RO) // VECTOR_PTR_RO to appear in R-4.5 -#undef make_dataptr_method - -static const void * altall_Dataptr_or_null(SEXP x) { return DATAPTR_RO(x); } - -// lots of boilerplate, but R calling *_ELT one by one would be far too slow -#define make_extract_subset_method(class, type, accessor, NA) \ - static SEXP alt##class##_Extract_subset(SEXP x, SEXP indx, SEXP call) { \ - (void)call; \ - indx = PROTECT(coerceVector(indx, REALSXP)); \ - double * ii = REAL(indx); \ - R_xlen_t rlen = XLENGTH(indx), mylen = XLENGTH(x); \ - SEXP ret = PROTECT(allocVector(TYPEOF(x), rlen)); \ - type *rdata = accessor(ret), *mydata = accessor(x); \ - for (R_xlen_t i = 0; i < rlen; ++i) \ - rdata[i] = (ii[i] >= 1 && ii[i] <= mylen) ? mydata[(R_xlen_t)ii[i]-1] : NA; \ - UNPROTECT(2); \ - return ret; \ - } -make_extract_subset_method(integer, int, INTEGER, NA_INTEGER) -make_extract_subset_method(logical, int, LOGICAL, NA_LOGICAL) -make_extract_subset_method(real, double, REAL, NA_REAL) -make_extract_subset_method(complex, Rcomplex, COMPLEX, NA_COMPLEX) -make_extract_subset_method(raw, Rbyte, RAW, 0) -// not implementing the string and list methods because those do require the write barrier and are thus no better than calling *_ELT one by one -#undef make_extract_subset_method - -#define make_elt_method(class, accessor) \ - static SEXP alt##class##_Elt(SEXP x, R_xlen_t i) { \ - return accessor(R_altrep_data1(x), i); \ - } -make_elt_method(string, STRING_ELT) -make_elt_method(list, VECTOR_ELT) -#undef make_elt_method - -#define make_set_elt_method(class, accessor) \ - static void alt##class##_Set_elt(SEXP x, R_xlen_t i, SEXP v) { \ - accessor(R_altrep_data1(x), i, v); \ - } -make_set_elt_method(string, SET_STRING_ELT) -make_set_elt_method(list, SET_VECTOR_ELT) -#undef make_set_elt_method - -// liked the Extract_subset methods? say hello to Get_region -#define make_get_region_method(class, type, accessor) \ - static R_xlen_t alt##class##_Get_region( \ - SEXP x, R_xlen_t i, R_xlen_t n, type * buf \ - ) { \ - R_xlen_t j = 0, mylen = XLENGTH(x); \ - type * data = accessor(x); \ - for (; j < n && i < mylen; ++i, ++j) buf[j] = data[i]; \ - return j; \ - } -make_get_region_method(integer, int, INTEGER) -make_get_region_method(logical, int, LOGICAL) -make_get_region_method(real, double, REAL) -make_get_region_method(complex, Rcomplex, COMPLEX) -make_get_region_method(raw, Rbyte, RAW) -#undef make_get_region_method - -void register_altrep_classes(DllInfo * info) { - // Used by the altcomplex_Extract_subset method - NA_COMPLEX = (Rcomplex){ .r = NA_REAL, .i = NA_REAL }; - - dta_grow_string = R_make_altstring_class("growable_string_v0", "data.table", info); - R_set_altrep_Length_method(dta_grow_string, altall_Length); - R_set_altrep_Inspect_method(dta_grow_string, altstring_Inspect); - R_set_altvec_Dataptr_method(dta_grow_string, altstring_Dataptr); - R_set_altvec_Dataptr_or_null_method(dta_grow_string, altall_Dataptr_or_null); - R_set_altstring_Elt_method(dta_grow_string, altstring_Elt); - R_set_altstring_Set_elt_method(dta_grow_string, altstring_Set_elt); - dta_grow_integer = R_make_altinteger_class("growable_integer_v0", "data.table", info); - R_set_altrep_Length_method(dta_grow_integer, altall_Length); - R_set_altrep_Inspect_method(dta_grow_integer, altinteger_Inspect); - R_set_altvec_Dataptr_method(dta_grow_integer, altinteger_Dataptr); - R_set_altvec_Dataptr_or_null_method(dta_grow_integer, altall_Dataptr_or_null); - R_set_altvec_Extract_subset_method(dta_grow_integer, altinteger_Extract_subset); - R_set_altinteger_Get_region_method(dta_grow_integer, altinteger_Get_region); - dta_grow_logical = R_make_altlogical_class("growable_logical_v0", "data.table", info); - R_set_altrep_Length_method(dta_grow_logical, altall_Length); - R_set_altrep_Inspect_method(dta_grow_logical, altlogical_Inspect); - R_set_altvec_Dataptr_method(dta_grow_logical, altlogical_Dataptr); - R_set_altvec_Dataptr_or_null_method(dta_grow_logical, altall_Dataptr_or_null); - R_set_altvec_Extract_subset_method(dta_grow_logical, altlogical_Extract_subset); - R_set_altlogical_Get_region_method(dta_grow_logical, altlogical_Get_region); - dta_grow_real = R_make_altreal_class("growable_real_v0", "data.table", info); - R_set_altrep_Length_method(dta_grow_real, altall_Length); - R_set_altrep_Inspect_method(dta_grow_real, altreal_Inspect); - R_set_altvec_Dataptr_method(dta_grow_real, altreal_Dataptr); - R_set_altvec_Dataptr_or_null_method(dta_grow_real, altall_Dataptr_or_null); - R_set_altvec_Extract_subset_method(dta_grow_real, altreal_Extract_subset); - R_set_altreal_Get_region_method(dta_grow_real, altreal_Get_region); - dta_grow_complex = R_make_altcomplex_class("growable_complex_v0", "data.table", info); - R_set_altrep_Length_method(dta_grow_complex, altall_Length); - R_set_altrep_Inspect_method(dta_grow_complex, altcomplex_Inspect); - R_set_altvec_Dataptr_method(dta_grow_complex, altcomplex_Dataptr); - R_set_altvec_Dataptr_or_null_method(dta_grow_complex, altall_Dataptr_or_null); - R_set_altvec_Extract_subset_method(dta_grow_complex, altcomplex_Extract_subset); - R_set_altcomplex_Get_region_method(dta_grow_complex, altcomplex_Get_region); - dta_grow_raw = R_make_altraw_class("growable_raw_v0", "data.table", info); - R_set_altrep_Length_method(dta_grow_raw, altall_Length); - R_set_altrep_Inspect_method(dta_grow_raw, altraw_Inspect); - R_set_altvec_Dataptr_method(dta_grow_raw, altraw_Dataptr); - R_set_altvec_Dataptr_or_null_method(dta_grow_raw, altall_Dataptr_or_null); - R_set_altvec_Extract_subset_method(dta_grow_raw, altraw_Extract_subset); - R_set_altraw_Get_region_method(dta_grow_raw, altraw_Get_region); - dta_grow_list = R_make_altlist_class("growable_list_v0", "data.table", info); - R_set_altrep_Length_method(dta_grow_list, altall_Length); - R_set_altrep_Inspect_method(dta_grow_list, altlist_Inspect); - R_set_altvec_Dataptr_method(dta_grow_list, altlist_Dataptr); - R_set_altvec_Dataptr_or_null_method(dta_grow_list, altall_Dataptr_or_null); - R_set_altlist_Elt_method(dta_grow_list, altlist_Elt); - R_set_altlist_Set_elt_method(dta_grow_list, altlist_Set_elt); -} - -static R_altrep_class_t type2class(SEXPTYPE type) { - switch(type) { - case STRSXP: - return dta_grow_string; - case INTSXP: - return dta_grow_integer; - case LGLSXP: - return dta_grow_logical; - case REALSXP: - return dta_grow_real; - case CPLXSXP: - return dta_grow_complex; - case RAWSXP: - return dta_grow_raw; - case VECSXP: - case EXPRSXP: - return dta_grow_list; - default: - internal_error(__func__, "Can't create a growable vector of type '%s'", type2char(type)); - } -} - -SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) { - SEXP ret = PROTECT(R_new_altrep(type2class(type), R_NilValue, R_NilValue)); - R_set_altrep_data1(ret, allocVector(type, max_size)); - R_set_altrep_data2(ret, ScalarReal(size)); - UNPROTECT(1); - return ret; -} - -R_xlen_t growable_max_size(SEXP x) { - return XLENGTH(R_altrep_data1(x)); -} - -void growable_resize(SEXP x, R_xlen_t newsize) { - R_xlen_t max_size; - if (newsize > (max_size = growable_max_size(x))) internal_error( - __func__, "newsize=%g > max_size=%g", - (double)newsize, (double)max_size - ); - REAL(R_altrep_data2(x))[0] = newsize; -} - -Rboolean is_growable(SEXP x) { - switch(TYPEOF(x)) { - case STRSXP: - case INTSXP: - case LGLSXP: - case REALSXP: - case CPLXSXP: - case RAWSXP: - case VECSXP: - return R_altrep_inherits(x, type2class(TYPEOF(x))); - default: - return FALSE; - } -} - -SEXP make_growable(SEXP x) { - SEXP ret = PROTECT(R_new_altrep(type2class(TYPEOF(x)), R_NilValue, R_NilValue)); - R_set_altrep_data1(ret, x); - R_set_altrep_data2(ret, ScalarReal(XLENGTH(x))); - SHALLOW_DUPLICATE_ATTRIB(ret, x); - UNPROTECT(1); - return ret; -} - -#endif diff --git a/src/init.c b/src/init.c index 94e86f4d09..ef81a7a0e0 100644 --- a/src/init.c +++ b/src/init.c @@ -212,12 +212,8 @@ void attribute_visible R_init_data_table(DllInfo *info) SEXP tmp = PROTECT(allocVector(INTSXP,2)); if (LENGTH(tmp)!=2) error(_("Checking LENGTH(allocVector(INTSXP,2)) [%d] is 2 %s"), LENGTH(tmp), msg); -#if R_VERSION >= R_Version(4, 3, 0) - register_altrep_classes(info); -#else // Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768 if (TRUELENGTH(tmp)!=0) error(_("Checking TRUELENGTH(allocVector(INTSXP,2)) [%lld] is 0 %s"), (long long)TRUELENGTH(tmp), msg); -#endif UNPROTECT(1); // According to IEEE (http://en.wikipedia.org/wiki/IEEE_754-1985#Zero) we can rely on 0.0 being all 0 bits. diff --git a/src/reorder.c b/src/reorder.c index f07d888e2c..0992b19728 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -24,17 +24,13 @@ SEXP reorder(SEXP x, SEXP order) error(_("Column %d is length %d which differs from length of column 1 (%d). Invalid data.table."), i+1, length(v), nrow); if (RTYPE_SIZEOF(v) > maxSize) maxSize=RTYPE_SIZEOF(v); -#ifndef USE_GROWABLE_ALTREP if (ALTREP(v)) SET_VECTOR_ELT(x, i, copyAsPlain(v)); -#endif } copySharedColumns(x); // otherwise two columns which point to the same vector would be reordered and then re-reordered, issues linked in PR#3768 } else { if (RTYPE_SIZEOF(x)!=4 && RTYPE_SIZEOF(x)!=8 && RTYPE_SIZEOF(x)!=16 && RTYPE_SIZEOF(x)!=1) error(_("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (RTYPE_SIZEOF=%zu)"), type2char(TYPEOF(x)), RTYPE_SIZEOF(x)); - #ifndef USE_GROWABLE_ALTREP if (ALTREP(x)) internal_error(__func__, "cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4"); // # nocov - #endif maxSize = RTYPE_SIZEOF(x); nrow = length(x); ncol = 1; @@ -44,9 +40,7 @@ SEXP reorder(SEXP x, SEXP order) if (length(order) != nrow) error("nrow(x)[%d]!=length(order)[%d]", nrow, length(order)); // # notranslate int nprotect = 0; -#ifndef USE_GROWABLE_ALTREP if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; } // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand -#endif const int *restrict idx = INTEGER(order); int i=0; diff --git a/src/utils.c b/src/utils.c index ff9d443ab8..85554ddfaa 100644 --- a/src/utils.c +++ b/src/utils.c @@ -203,9 +203,6 @@ inline bool INHERITS(SEXP x, SEXP char_) { return false; } -#ifdef USE_GROWABLE_ALTREP -SEXP copyAsPlain(SEXP x) { return duplicate(x); } -#else SEXP copyAsPlain(SEXP x) { // v1.12.2 and before used standard R duplicate() to do this. But duplicate() is not guaranteed to not return an ALTREP. // e.g. ALTREP 'wrapper' on factor column (with materialized INTSXP) in package VIM under example(hotdeck) @@ -265,7 +262,6 @@ SEXP copyAsPlain(SEXP x) { UNPROTECT(1); return ans; } -#endif void copySharedColumns(SEXP x) { const int ncol = length(x); @@ -278,9 +274,7 @@ void copySharedColumns(SEXP x) { SEXP thiscol = xp[i]; if ( hash_lookup(marks, thiscol, 0)<0 -#ifndef USE_GROWABLE_ALTREP || ALTREP(thiscol) -#endif ) { shared[i] = true; // we mark ALTREP as 'shared' too, whereas 'tocopy' would be better word to use for ALTREP nShared++; diff --git a/src/wrappers.c b/src/wrappers.c index 36cc35f8e4..a7d125d87e 100644 --- a/src/wrappers.c +++ b/src/wrappers.c @@ -87,12 +87,6 @@ SEXP address(SEXP x) return(mkString(buffer)); } -#ifdef USE_GROWABLE_ALTREP -SEXP expandAltRep(SEXP x) { - (void)x; - return R_NilValue; -} -#else SEXP expandAltRep(SEXP x) { // used by setDT to ensure altrep vectors in columns are expanded. Such altrep objects typically come from tests or demos, since @@ -111,7 +105,6 @@ SEXP expandAltRep(SEXP x) } return R_NilValue; } -#endif SEXP dim(SEXP x) { From 2b5444bb79b4bef0ec900608485d98aadf264aac Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 25 Sep 2025 22:13:50 +0200 Subject: [PATCH 10/24] tweak patch update --- src/assign.c | 12 ++++++------ src/data.table.h | 1 + src/dogroups.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/assign.c b/src/assign.c index 292cf43b78..05899c3790 100644 --- a/src/assign.c +++ b/src/assign.c @@ -266,7 +266,7 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose) // added (n>l) ? ... for #970, see test 1481. // TO DO: test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example. - tl = growable_max_size(dt); + tl = growable_capacity(dt); // R <= 2.13.2 and we didn't catch uninitialized tl somehow if (tl<0) internal_error(__func__, "tl of class is marked but tl<0"); // # nocov if (tl>0 && tl +#include #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT #include // for uint64_t rather than unsigned long long #include // for va_list, va_start diff --git a/src/dogroups.c b/src/dogroups.c index 89be6cd180..a4b3bb828f 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -317,7 +317,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX RHS = VECTOR_ELT(jval,j%LENGTH(jval)); if (isNull(target)) { // first time adding to new column - if (growable_max_size(dt) < colj+1) internal_error(__func__, "Trying to add new column by reference but table is full; setalloccol should have run first at R level before getting to this point"); // # nocov + if (growable_capacity(dt) < colj+1) internal_error(__func__, "Trying to add new column by reference but table is full; setalloccol should have run first at R level before getting to this point"); // # nocov target = PROTECT(allocNAVectorLike(RHS, n)); // Even if we could know reliably to switch from allocNAVectorLike to allocVector for slight speedup, user code could still // contain a switched halt, and in that case we'd want the groups not yet done to have NA rather than 0 or uninitialized. From 248bab7ca005344915749d8af6b458db19230b4f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 25 Sep 2025 22:31:29 +0200 Subject: [PATCH 11/24] reset truelength definition --- src/assign.c | 2 +- src/freadR.c | 1 - src/wrappers.c | 13 ------------- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/assign.c b/src/assign.c index 05899c3790..e20e6e00b4 100644 --- a/src/assign.c +++ b/src/assign.c @@ -320,7 +320,7 @@ SEXP shallowwrapper(SEXP dt, SEXP cols) { } SEXP truelength(SEXP x) { - return ScalarInteger(is_growable(x) ? growable_capacity(x) : 0); + return ScalarInteger(isNull(x) ? 0 : growable_capacity(x)); } SEXP selfrefokwrapper(SEXP x, SEXP verbose) { diff --git a/src/freadR.c b/src/freadR.c index a858e2f6cc..507c8a492f 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -265,7 +265,6 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int colNamesSxp = R_NilValue; SET_VECTOR_ELT(RCHK, 1, colNamesSxp=growable_allocate(STRSXP, ncol, ncol)); for (int i=0; i Date: Thu, 25 Sep 2025 22:33:57 +0200 Subject: [PATCH 12/24] remove last TRUELENGTH --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index ef81a7a0e0..45a7095d21 100644 --- a/src/init.c +++ b/src/init.c @@ -213,7 +213,7 @@ void attribute_visible R_init_data_table(DllInfo *info) SEXP tmp = PROTECT(allocVector(INTSXP,2)); if (LENGTH(tmp)!=2) error(_("Checking LENGTH(allocVector(INTSXP,2)) [%d] is 2 %s"), LENGTH(tmp), msg); // Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768 - if (TRUELENGTH(tmp)!=0) error(_("Checking TRUELENGTH(allocVector(INTSXP,2)) [%lld] is 0 %s"), (long long)TRUELENGTH(tmp), msg); + if (growable_capacity(tmp)!=0) error(_("Checking TRUELENGTH(allocVector(INTSXP,2)) [%lld] is 0 %s"), (long long)growable_capacity(tmp), msg); UNPROTECT(1); // According to IEEE (http://en.wikipedia.org/wiki/IEEE_754-1985#Zero) we can rely on 0.0 being all 0 bits. From c2c3bfdc0bdf0a58cbd09bb34b0c024ed7146c6b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 25 Sep 2025 22:45:05 +0200 Subject: [PATCH 13/24] update setgrowable --- src/frollapply.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/frollapply.c b/src/frollapply.c index 1e04059c9b..e2f293982e 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -76,18 +76,12 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { // needed in adaptive=TRUE SEXP setgrowable(SEXP x) { - if (!isNewList(x)) { - SET_GROWABLE_BIT(x); - SET_TRUELENGTH(x, LENGTH(x)); // important because gc() uses TRUELENGTH to keep counts - } else { - // # nocov start ## does not seem to be reported to codecov most likely due to running in a fork, I manually debugged that it is being called when running froll.Rraw - for (int i = 0; i < LENGTH(x); i++) { - //Rprintf("%d",3); // manual code coverage to confirm it is reached when marking nocov - SEXP col = VECTOR_ELT(x, i); - SET_GROWABLE_BIT(col); - SET_TRUELENGTH(col, LENGTH(col)); - } - // # nocov end + for (R_xlen_t i = 0; i < xlength(x); ++i) { + SEXP this = VECTOR_ELT(x, i); + if ( + !is_growable(this) + && !ALTREP(this) + ) SET_VECTOR_ELT(x, i, make_growable(this)); } - return x; + return R_NilValue; } From db6b6b96ac2cfb59c93914e8337e9a1097ad271a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 25 Sep 2025 22:48:51 +0200 Subject: [PATCH 14/24] remove change --- src/utils.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/utils.c b/src/utils.c index 85554ddfaa..a2a9f38bd0 100644 --- a/src/utils.c +++ b/src/utils.c @@ -272,10 +272,7 @@ void copySharedColumns(SEXP x) { int nShared=0; for (int i=0; i Date: Thu, 25 Sep 2025 22:49:40 +0200 Subject: [PATCH 15/24] delete extra spaces --- src/reorder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reorder.c b/src/reorder.c index 0992b19728..66f1cc6441 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -30,7 +30,7 @@ SEXP reorder(SEXP x, SEXP order) } else { if (RTYPE_SIZEOF(x)!=4 && RTYPE_SIZEOF(x)!=8 && RTYPE_SIZEOF(x)!=16 && RTYPE_SIZEOF(x)!=1) error(_("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (RTYPE_SIZEOF=%zu)"), type2char(TYPEOF(x)), RTYPE_SIZEOF(x)); - if (ALTREP(x)) internal_error(__func__, "cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4"); // # nocov + if (ALTREP(x)) internal_error(__func__, "cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4"); // # nocov maxSize = RTYPE_SIZEOF(x); nrow = length(x); ncol = 1; From 78f45f82aa7a8bdb80a9f7c8adfd2d0452991155 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 26 Sep 2025 06:24:37 +0200 Subject: [PATCH 16/24] Update src/data.table.h remove growable_altrep define Co-authored-by: aitap --- src/data.table.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index 7cbdd39ded..26ad5555af 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -16,9 +16,6 @@ #if R_VERSION < R_Version(4, 5, 0) # define isDataFrame(x) isFrame(x) // #6180 #endif -#if R_VERSION >= R_Version(4, 3, 0) -# define USE_GROWABLE_ALTREP -#endif #include #include #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT From a5874bd4430f74e07cec2e2a79b7b75b850c572d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 26 Sep 2025 06:28:46 +0200 Subject: [PATCH 17/24] remove R 3.4.0 backports Co-authored-by: aitap --- src/assign.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/assign.c b/src/assign.c index e20e6e00b4..e3f0ef69e7 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1,6 +1,5 @@ #include "data.table.h" -#if R_VERSION < R_Version(3,4,0) // not needed with GROWABLE_BIT static void finalizer(SEXP p) { SEXP x; @@ -23,7 +22,6 @@ static void finalizer(SEXP p) UNPROTECT(1); return; } -#endif void setselfref(SEXP x) { if(!INHERITS(x, char_datatable)) return; // #5286 @@ -40,9 +38,6 @@ void setselfref(SEXP x) { R_NilValue )) )); -#if R_VERSION < R_Version(3,4,0) // not needed with GROWABLE_BIT - R_RegisterCFinalizerEx(p, finalizer, FALSE); -#endif UNPROTECT(2); /* @@ -134,20 +129,9 @@ static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) { // (1) we allocate the data.table and/or its names, so it has the GROWABLE_BIT set, so copies will have zero TRUELENGTH, or // (2) someone else creates them from scratch, so (only using the API) will have zero TRUELENGTH. // We then return false and either re-create the data.table from scratch or signal an error, so the current object having a zero TRUELENGTH is fine. - // R < 3.4 doesn't have the GROWABLE_BIT, so let's reset the TRUELENGTH just in case. -#if R_VERSION < R_Version(3,4,0) - if (names!=tag && isString(names)) - SET_TRUELENGTH(names, LENGTH(names)); - // R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated - // because R copies the original vector's tl over despite allocating length. -#endif prot = R_ExternalPtrProtected(v); if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")). return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r -#if R_VERSION < R_Version(3,4,0) - if (x!=R_ExternalPtrAddr(prot)) - SET_TRUELENGTH(x, LENGTH(x)); // R copied this vector not data.table, it's not actually over-allocated -#endif return checkNames ? names==tag : x==R_ExternalPtrAddr(prot); } From 2db2a90cd1e6de6391cf41559377c31c7dc577c6 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 26 Sep 2025 08:16:03 +0200 Subject: [PATCH 18/24] remove whole finalizer --- src/assign.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/assign.c b/src/assign.c index e3f0ef69e7..f8fd952e3f 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1,28 +1,5 @@ #include "data.table.h" -static void finalizer(SEXP p) -{ - SEXP x; - R_len_t n, l, tl; - if(!R_ExternalPtrAddr(p)) internal_error(__func__, "didn't receive an ExternalPtr"); // # nocov - p = R_ExternalPtrTag(p); - if (!isString(p)) internal_error(__func__, "ExternalPtr doesn't see names in tag"); // # nocov - l = LENGTH(p); - tl = TRUELENGTH(p); - if (l<0 || tl Date: Fri, 26 Sep 2025 08:59:02 +0200 Subject: [PATCH 19/24] patch froll --- R/data.table.R | 2 +- src/frollapply.c | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index af528e4d3f..616b993c15 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1595,7 +1595,7 @@ replace_dot_alias = function(e) { SDenv$.SDall = .Call(CsubsetDT, x, if (length(len__)) seq_len(max(len__)) else 0L, xcols) # must be deep copy when largest group is a subset if (!is.data.table(SDenv$.SDall)) setattr(SDenv$.SDall, "class", c("data.table","data.frame")) # DF |> DT(,.SD[...],by=grp) needs .SD to be data.table, test 2022.012 if (xdotcols) setattr(SDenv$.SDall, 'names', ansvars[xcolsAns]) # now that we allow 'x.' prefix in 'j', #2313 bug fix - [xcolsAns] - setgrowable(SDenv$.SDall) + SDenv$.SDall = setgrowable(SDenv$.SDall) SDenv$.SD = if (length(non_sdvars)) shallow(SDenv$.SDall, sdvars) else SDenv$.SDall } if (nrow(SDenv$.SDall)==0L) { diff --git a/src/frollapply.c b/src/frollapply.c index e2f293982e..c60324fa19 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -50,7 +50,7 @@ SEXP memcpyVectoradaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { const size_t oi = INTEGER_RO(offset)[0]; const int nrow = INTEGER_RO(size)[oi - 1]; const size_t o = oi - nrow; // oi should always be bigger than nrow because we filter out incomplete window using ansMask - SETLENGTH(dest, nrow); // must be before memcpy_sexp because attempt to set index 1/1 in SET_STRING_ELT test 6010.150 + growable_resize(dest, nrow); // must be before memcpy_sexp because attempt to set index 1/1 in SET_STRING_ELT test 6010.150 if (nrow) { // support k[i]==0 memcpy_sexp(dest, o, src, nrow); } @@ -65,7 +65,7 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { const int ncol = LENGTH(dest); for (int i = 0; i < ncol; i++) { SEXP d = VECTOR_ELT(dest, i); - SETLENGTH(d, nrow); + growable_resize(d, nrow); if (nrow) { // support k[i]==0 memcpy_sexp(d, o, VECTOR_ELT(src, i), nrow); } @@ -76,12 +76,22 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { // needed in adaptive=TRUE SEXP setgrowable(SEXP x) { - for (R_xlen_t i = 0; i < xlength(x); ++i) { - SEXP this = VECTOR_ELT(x, i); - if ( - !is_growable(this) - && !ALTREP(this) - ) SET_VECTOR_ELT(x, i, make_growable(this)); + if (!isNewList(x)) { + if (!is_growable(x)) { + return make_growable(x); + } + return x; + } else { + // # nocov start ## does not seem to be reported to codecov most likely due to running in a fork, I manually debugged that it is being called when running froll.Rraw + for (R_xlen_t i = 0; i < xlength(x); ++i) { + //Rprintf("%d",3); // manual code coverage to confirm it is reached when marking nocov + SEXP this = VECTOR_ELT(x, i); + if ( + !is_growable(this) + && !ALTREP(this) + ) SET_VECTOR_ELT(x, i, make_growable(this)); + } + // # nocov end + return x; } - return R_NilValue; } From ce5fcb53e8e5b7f8cd696df34617c8caa554f287 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 26 Sep 2025 09:08:55 +0200 Subject: [PATCH 20/24] assume no altrep at that point --- src/frollapply.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/frollapply.c b/src/frollapply.c index c60324fa19..bd1bfb1fe3 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -77,6 +77,7 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { // needed in adaptive=TRUE SEXP setgrowable(SEXP x) { if (!isNewList(x)) { + if (ALTREP(x)) internal_error(__func__, "frollapply's adaptive=T setgrowable should have not been called with an ALTREP input"); // # nocov if (!is_growable(x)) { return make_growable(x); } @@ -86,10 +87,9 @@ SEXP setgrowable(SEXP x) { for (R_xlen_t i = 0; i < xlength(x); ++i) { //Rprintf("%d",3); // manual code coverage to confirm it is reached when marking nocov SEXP this = VECTOR_ELT(x, i); - if ( - !is_growable(this) - && !ALTREP(this) - ) SET_VECTOR_ELT(x, i, make_growable(this)); + if (ALTREP(this)) internal_error(__func__, "frollapply's adaptive=T setgrowable should have not been called with an ALTREP input"); // # nocov + if (!is_growable(this)) + SET_VECTOR_ELT(x, i, make_growable(this)); } // # nocov end return x; From 1117f6faba2f3006cb5075704260a8e6912fe50c Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 26 Sep 2025 09:46:46 +0200 Subject: [PATCH 21/24] add patch to devcontainer --- .devcontainer/r-devel-growable/Dockerfile | 55 ++++ .../r-devel-growable/devcontainer.json | 15 ++ .../r-devel-growable/growable-api.patch | 254 ++++++++++++++++++ 3 files changed, 324 insertions(+) create mode 100644 .devcontainer/r-devel-growable/Dockerfile create mode 100644 .devcontainer/r-devel-growable/devcontainer.json create mode 100644 .devcontainer/r-devel-growable/growable-api.patch diff --git a/.devcontainer/r-devel-growable/Dockerfile b/.devcontainer/r-devel-growable/Dockerfile new file mode 100644 index 0000000000..c1c84ab541 --- /dev/null +++ b/.devcontainer/r-devel-growable/Dockerfile @@ -0,0 +1,55 @@ +FROM registry.gitlab.com/rdatatable/dockerfiles/r-devel-gcc + +RUN apt-get -qq update \ + && apt-get install -y --no-install-recommends \ + git \ + patch \ + subversion \ + rsync \ + texinfo \ + texlive-latex-base \ + texlive-latex-recommended \ + texlive-fonts-recommended \ + && rm -rf /var/lib/apt/lists/* + +COPY .devcontainer/r-devel-growable/growable-api.patch /opt/growable-api.patch + +WORKDIR /opt/R-source +RUN svn checkout https://svn.r-project.org/R/trunk R-devel && \ + cd R-devel && \ + patch -p0 < /opt/growable-api.patch + +# Build patched R +WORKDIR /opt/R-source/R-devel +RUN ./configure \ + --prefix=/usr/local \ + --enable-R-shlib \ + --enable-memory-profiling \ + --with-blas \ + --with-lapack \ + --with-readline \ + --with-cairo \ + --with-libpng \ + --with-jpeglib \ + --with-libtiff \ + --disable-nls && \ + make -j$(nproc) && \ + make install + +RUN echo 'Testing growable vectors...' && \ + /usr/local/bin/R --slave -e '.Internal(growvec_alloc(13L, 0, 10))' + +ENV PATH=/usr/local/bin:$PATH +ENV R_HOME=/usr/local/lib/R + +COPY DESCRIPTION . +RUN /usr/local/bin/Rscript -e ' \ +read.dcf("DESCRIPTION", c("Imports", "Suggests")) |> \ + tools:::.split_dependencies() |> \ + names() |> \ + setdiff(tools:::.get_standard_package_names()$base) |> \ + install.packages() \ +' + +WORKDIR /root +COPY .devcontainer/.Rprofile . \ No newline at end of file diff --git a/.devcontainer/r-devel-growable/devcontainer.json b/.devcontainer/r-devel-growable/devcontainer.json new file mode 100644 index 0000000000..c2d45094a2 --- /dev/null +++ b/.devcontainer/r-devel-growable/devcontainer.json @@ -0,0 +1,15 @@ +{ + "name": "R-devel with Growable Vectors", + "build": { + "dockerfile": "Dockerfile", + "context": "../.." + }, + "customizations": { + "vscode": { + "extensions": [ + "REditorSupport.r", + "ms-vscode.cpptools-extension-pack" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/r-devel-growable/growable-api.patch b/.devcontainer/r-devel-growable/growable-api.patch new file mode 100644 index 0000000000..6cf6ce1a6b --- /dev/null +++ b/.devcontainer/r-devel-growable/growable-api.patch @@ -0,0 +1,254 @@ +Index: src/include/Internal.h +=================================================================== +--- src/include/Internal.h (revision 88869) ++++ src/include/Internal.h (working copy) +@@ -545,6 +545,13 @@ + SEXP do_retracemem(SEXP, SEXP, SEXP, SEXP); + SEXP do_untracemem(SEXP, SEXP, SEXP, SEXP); + ++SEXP do_growvec_alloc(SEXP, SEXP, SEXP, SEXP); ++SEXP do_growvec_max_size(SEXP, SEXP, SEXP, SEXP); ++SEXP do_growvec_resize(SEXP, SEXP, SEXP, SEXP); ++SEXP do_growvec_is_growable(SEXP, SEXP, SEXP, SEXP); ++SEXP do_growvec_make_growable(SEXP, SEXP, SEXP, SEXP); ++SEXP do_growvec_trim(SEXP, SEXP, SEXP, SEXP); ++ + /* ALTREP-related */ + + SEXP do_sorted_fpass(SEXP, SEXP, SEXP, SEXP); +Index: src/include/Rgrowable.h +=================================================================== +--- src/include/Rgrowable.h (nonexistent) ++++ src/include/Rgrowable.h (working copy) +@@ -0,0 +1,14 @@ ++#ifndef R_GROWABLE_H_ ++#define R_GROWABLE_H_ ++ ++#include ++ ++/* Minimal internal API for growable vectors. */ ++SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size); ++R_xlen_t growable_capacity(SEXP x); ++void growable_resize(SEXP x, R_xlen_t newsize); ++Rboolean is_growable(SEXP x); ++SEXP make_growable(SEXP x); ++SEXP growable_trim(SEXP x); ++ ++#endif + +=================================================================== +--- src/main/Makefile.in (revision 88869) ++++ src/main/Makefile.in (working copy) +@@ -23,7 +23,7 @@ + dotcode.c dounzip.c dstruct.c duplicate.c \ + edit.c engine.c envir.c errors.c eval.c \ + flexiblas.c format.c \ +- gevents.c gram.c gram-ex.c graphics.c grep.c \ ++ gevents.c gram.c gram-ex.c graphics.c grep.c growable.c \ + identical.c inlined.c inspect.c internet.c iosupport.c \ + lapack.c list.c localecharset.c logic.c \ + machine.c main.c mapply.c mask.c match.c memory.c \ +Index: src/main/growable.c +=================================================================== +--- src/main/growable.c (nonexistent) ++++ src/main/growable.c (working copy) +@@ -0,0 +1,131 @@ ++#include ++#include ++#include ++#include ++ ++static SEXP ensure_materialized(SEXP x) { ++ if (!ALTREP(x)) return x; ++ if (!isVector(x)) ++ Rf_error("make_growable: not a vector"); ++ ++ SEXPTYPE t = TYPEOF(x); ++ R_xlen_t len = XLENGTH(x); ++ SEXP y = PROTECT(Rf_allocVector(t, len)); ++ SETLENGTH(y, len); ++ SET_TRUELENGTH(y, len); ++ SET_GROWABLE_BIT(y); ++ /* Copy using safe element accessors */ ++ switch (t) { ++ case LGLSXP: ++ case INTSXP: { ++ int *yp = INTEGER(y); ++ for (R_xlen_t i = 0; i < len; ++i) yp[i] = INTEGER_ELT(x, i); ++ break; ++ } ++ case REALSXP: { ++ double *yp = REAL(y); ++ for (R_xlen_t i = 0; i < len; ++i) yp[i] = REAL_ELT(x, i); ++ break; ++ } ++ case CPLXSXP: { ++ Rcomplex *yp = COMPLEX(y); ++ for (R_xlen_t i = 0; i < len; ++i) yp[i] = COMPLEX_ELT(x, i); ++ break; ++ } ++ case RAWSXP: { ++ Rbyte *yp = RAW(y); ++ for (R_xlen_t i = 0; i < len; ++i) yp[i] = RAW_ELT(x, i); ++ break; ++ } ++ case STRSXP: { ++ for (R_xlen_t i = 0; i < len; ++i) SET_STRING_ELT(y, i, STRING_ELT(x, i)); ++ break; ++ } ++ case VECSXP: ++ case EXPRSXP: { ++ for (R_xlen_t i = 0; i < len; ++i) SET_VECTOR_ELT(y, i, VECTOR_ELT(x, i)); ++ break; ++ } ++ default: ++ Rf_error("make_growable: unsupported type %d", (int)t); ++ } ++ DUPLICATE_ATTRIB(y, x); ++ UNPROTECT(1); ++ return y; ++} ++ ++SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) { ++ if (!(type == LGLSXP || type == INTSXP || type == REALSXP || type == CPLXSXP || type == STRSXP || type == RAWSXP || type == VECSXP)) ++ Rf_error("growable_allocate: unsupported type %d", (int)type); ++ if (size < 0 || max_size < 0) ++ Rf_error("growable_allocate: negative size/capacity"); ++ if (size > max_size) ++ Rf_error("growable_allocate: size (%lld) > max_size (%lld)", ++ (long long)size, (long long)max_size); ++ ++ SEXP ret = PROTECT(Rf_allocVector(type, max_size)); ++ SET_TRUELENGTH(ret, max_size); ++ SET_GROWABLE_BIT(ret); ++ SETLENGTH(ret, size); ++ UNPROTECT(1); ++ return ret; ++} ++ ++R_xlen_t growable_capacity(SEXP x) { ++ return TRUELENGTH(x); ++} ++ ++void growable_resize(SEXP x, R_xlen_t newsize) { ++ if (!isVector(x)) ++ Rf_error("growable_resize: 'x' must be a vector"); ++ if (newsize < 0) ++ Rf_error("growable_resize: negative size"); ++ if (ALTREP(x)) ++ Rf_error("growable_resize: ALTREP must be materialized first (use make_growable())"); ++ ++ R_xlen_t cap = TRUELENGTH(x); ++ if (newsize > cap) { ++ Rf_error("growable_resize: newsize (%lld) > max_size (%lld)", ++ (long long)newsize, (long long)cap); ++ } ++ SETLENGTH(x, newsize); ++} ++ ++Rboolean is_growable(SEXP x) { ++ if (!isVector(x)) return FALSE; ++ if (ALTREP(x)) return FALSE; // conservative, could be supported ++ if (TRUELENGTH(x) < XLENGTH(x)) return FALSE; ++ if (!IS_GROWABLE(x)) return FALSE; ++ return TRUE; ++} ++ ++SEXP make_growable(SEXP x) { ++ if (!isVector(x)) ++ Rf_error("make_growable: not a vector"); ++ if (ALTREP(x)) { ++ return ensure_materialized(x); ++ } ++ if (TRUELENGTH(x) < XLENGTH(x)) { ++ SET_TRUELENGTH(x, XLENGTH(x)); ++ } ++ SET_GROWABLE_BIT(x); ++ return x; ++} ++ ++SEXP growable_trim(SEXP x) { ++ R_xlen_t len = XLENGTH(x); ++ R_xlen_t max_len = TRUELENGTH(x); ++ if (len == max_len) { ++ return x; ++ } ++ ++ if (!is_growable(x)) { ++ Rf_error("growable_trim: object is not growable"); ++ } ++ ++ growable_resize(x, max_len); // set length to max_len to force reallocation ++ SEXP ans = PROTECT(Rf_xlengthgets(x, len)); ++ SETLENGTH(x, len); ++ UNPROTECT(1); ++ return ans; ++} + +=================================================================== +--- src/main/names.c (revision 88869) ++++ src/main/names.c (working copy) +@@ -230,6 +230,12 @@ + /* .Internals */ + + {"vector", do_makevector, 0, 11, 2, {PP_FUNCALL, PREC_FN, 0}}, ++{"growvec_alloc", do_growvec_alloc, 0, 11, 3, {PP_FUNCALL, PREC_FN, 0}}, ++{"growvec_max_size", do_growvec_max_size, 0, 11, 1, {PP_FUNCALL, PREC_FN, 0}}, ++{"growvec_resize", do_growvec_resize, 0, 11, 2, {PP_FUNCALL, PREC_FN, 0}}, ++{"growvec_is_growable", do_growvec_is_growable, 0, 11, 1, {PP_FUNCALL, PREC_FN, 0}}, ++{"growvec_make_growable", do_growvec_make_growable, 0, 11, 1, {PP_FUNCALL, PREC_FN, 0}}, ++{"growvec_trim", do_growvec_trim, 0, 11, 1, {PP_FUNCALL, PREC_FN, 0}}, + {"complex", do_complex, 0, 11, 3, {PP_FUNCALL, PREC_FN, 0}}, + {"matrix", do_matrix, 0, 11, 7, {PP_FUNCALL, PREC_FN, 0}}, + {"array", do_array, 0, 11, 3, {PP_FUNCALL, PREC_FN, 0}}, +@@ -1451,3 +1457,50 @@ + { + return PRIMNAME(object); + } ++ ++/* Growable vector wrapper functions */ ++extern SEXP growable_allocate(SEXPTYPE, R_xlen_t, R_xlen_t); ++extern R_xlen_t growable_capacity(SEXP); ++extern void growable_resize(SEXP, R_xlen_t); ++extern Rboolean is_growable(SEXP); ++extern SEXP make_growable(SEXP); ++extern SEXP growable_trim(SEXP); ++ ++SEXP do_growvec_alloc(SEXP call, SEXP op, SEXP args, SEXP env) { ++ SEXP typeS = CAR(args); args = CDR(args); ++ SEXP sizeS = CAR(args); args = CDR(args); ++ SEXP capS = CAR(args); ++ return growable_allocate(asInteger(typeS), ++ (R_xlen_t)asReal(sizeS), ++ (R_xlen_t)asReal(capS)); ++} ++ ++SEXP do_growvec_max_size(SEXP call, SEXP op, SEXP args, SEXP env) { ++ SEXP x = CAR(args); ++ R_xlen_t max_size = growable_capacity(x); ++ return ScalarReal((double)max_size); ++} ++ ++SEXP do_growvec_resize(SEXP call, SEXP op, SEXP args, SEXP env) { ++ SEXP x = CAR(args); args = CDR(args); ++ SEXP newsizeS = CAR(args); ++ growable_resize(x, (R_xlen_t)asReal(newsizeS)); ++ return R_NilValue; ++} ++ ++SEXP do_growvec_is_growable(SEXP call, SEXP op, SEXP args, SEXP env) { ++ SEXP x = CAR(args); ++ Rboolean result = is_growable(x); ++ return ScalarLogical(result); ++} ++ ++SEXP do_growvec_make_growable(SEXP call, SEXP op, SEXP args, SEXP env) { ++ SEXP x = CAR(args); ++ return make_growable(x); ++} ++ ++SEXP do_growvec_trim(SEXP call, SEXP op, SEXP args, SEXP env) { ++ SEXP x = CAR(args); ++ SEXP result = growable_trim(x); ++ return result; ++} From 13a8b7d59753e37c1600e765ccf36eb4f5fdaa00 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 26 Sep 2025 10:23:15 +0200 Subject: [PATCH 22/24] update devcontainer to build from dockerfile --- .devcontainer/r-devel-growable/Dockerfile | 44 +-- .../r-devel-growable/growable-api.patch | 254 ------------------ 2 files changed, 1 insertion(+), 297 deletions(-) delete mode 100644 .devcontainer/r-devel-growable/growable-api.patch diff --git a/.devcontainer/r-devel-growable/Dockerfile b/.devcontainer/r-devel-growable/Dockerfile index c1c84ab541..75d4ee64a6 100644 --- a/.devcontainer/r-devel-growable/Dockerfile +++ b/.devcontainer/r-devel-growable/Dockerfile @@ -1,46 +1,4 @@ -FROM registry.gitlab.com/rdatatable/dockerfiles/r-devel-gcc - -RUN apt-get -qq update \ - && apt-get install -y --no-install-recommends \ - git \ - patch \ - subversion \ - rsync \ - texinfo \ - texlive-latex-base \ - texlive-latex-recommended \ - texlive-fonts-recommended \ - && rm -rf /var/lib/apt/lists/* - -COPY .devcontainer/r-devel-growable/growable-api.patch /opt/growable-api.patch - -WORKDIR /opt/R-source -RUN svn checkout https://svn.r-project.org/R/trunk R-devel && \ - cd R-devel && \ - patch -p0 < /opt/growable-api.patch - -# Build patched R -WORKDIR /opt/R-source/R-devel -RUN ./configure \ - --prefix=/usr/local \ - --enable-R-shlib \ - --enable-memory-profiling \ - --with-blas \ - --with-lapack \ - --with-readline \ - --with-cairo \ - --with-libpng \ - --with-jpeglib \ - --with-libtiff \ - --disable-nls && \ - make -j$(nproc) && \ - make install - -RUN echo 'Testing growable vectors...' && \ - /usr/local/bin/R --slave -e '.Internal(growvec_alloc(13L, 0, 10))' - -ENV PATH=/usr/local/bin:$PATH -ENV R_HOME=/usr/local/lib/R +FROM registry.gitlab.com/rdatatable/dockerfiles/r-devel-growable COPY DESCRIPTION . RUN /usr/local/bin/Rscript -e ' \ diff --git a/.devcontainer/r-devel-growable/growable-api.patch b/.devcontainer/r-devel-growable/growable-api.patch deleted file mode 100644 index 6cf6ce1a6b..0000000000 --- a/.devcontainer/r-devel-growable/growable-api.patch +++ /dev/null @@ -1,254 +0,0 @@ -Index: src/include/Internal.h -=================================================================== ---- src/include/Internal.h (revision 88869) -+++ src/include/Internal.h (working copy) -@@ -545,6 +545,13 @@ - SEXP do_retracemem(SEXP, SEXP, SEXP, SEXP); - SEXP do_untracemem(SEXP, SEXP, SEXP, SEXP); - -+SEXP do_growvec_alloc(SEXP, SEXP, SEXP, SEXP); -+SEXP do_growvec_max_size(SEXP, SEXP, SEXP, SEXP); -+SEXP do_growvec_resize(SEXP, SEXP, SEXP, SEXP); -+SEXP do_growvec_is_growable(SEXP, SEXP, SEXP, SEXP); -+SEXP do_growvec_make_growable(SEXP, SEXP, SEXP, SEXP); -+SEXP do_growvec_trim(SEXP, SEXP, SEXP, SEXP); -+ - /* ALTREP-related */ - - SEXP do_sorted_fpass(SEXP, SEXP, SEXP, SEXP); -Index: src/include/Rgrowable.h -=================================================================== ---- src/include/Rgrowable.h (nonexistent) -+++ src/include/Rgrowable.h (working copy) -@@ -0,0 +1,14 @@ -+#ifndef R_GROWABLE_H_ -+#define R_GROWABLE_H_ -+ -+#include -+ -+/* Minimal internal API for growable vectors. */ -+SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size); -+R_xlen_t growable_capacity(SEXP x); -+void growable_resize(SEXP x, R_xlen_t newsize); -+Rboolean is_growable(SEXP x); -+SEXP make_growable(SEXP x); -+SEXP growable_trim(SEXP x); -+ -+#endif - -=================================================================== ---- src/main/Makefile.in (revision 88869) -+++ src/main/Makefile.in (working copy) -@@ -23,7 +23,7 @@ - dotcode.c dounzip.c dstruct.c duplicate.c \ - edit.c engine.c envir.c errors.c eval.c \ - flexiblas.c format.c \ -- gevents.c gram.c gram-ex.c graphics.c grep.c \ -+ gevents.c gram.c gram-ex.c graphics.c grep.c growable.c \ - identical.c inlined.c inspect.c internet.c iosupport.c \ - lapack.c list.c localecharset.c logic.c \ - machine.c main.c mapply.c mask.c match.c memory.c \ -Index: src/main/growable.c -=================================================================== ---- src/main/growable.c (nonexistent) -+++ src/main/growable.c (working copy) -@@ -0,0 +1,131 @@ -+#include -+#include -+#include -+#include -+ -+static SEXP ensure_materialized(SEXP x) { -+ if (!ALTREP(x)) return x; -+ if (!isVector(x)) -+ Rf_error("make_growable: not a vector"); -+ -+ SEXPTYPE t = TYPEOF(x); -+ R_xlen_t len = XLENGTH(x); -+ SEXP y = PROTECT(Rf_allocVector(t, len)); -+ SETLENGTH(y, len); -+ SET_TRUELENGTH(y, len); -+ SET_GROWABLE_BIT(y); -+ /* Copy using safe element accessors */ -+ switch (t) { -+ case LGLSXP: -+ case INTSXP: { -+ int *yp = INTEGER(y); -+ for (R_xlen_t i = 0; i < len; ++i) yp[i] = INTEGER_ELT(x, i); -+ break; -+ } -+ case REALSXP: { -+ double *yp = REAL(y); -+ for (R_xlen_t i = 0; i < len; ++i) yp[i] = REAL_ELT(x, i); -+ break; -+ } -+ case CPLXSXP: { -+ Rcomplex *yp = COMPLEX(y); -+ for (R_xlen_t i = 0; i < len; ++i) yp[i] = COMPLEX_ELT(x, i); -+ break; -+ } -+ case RAWSXP: { -+ Rbyte *yp = RAW(y); -+ for (R_xlen_t i = 0; i < len; ++i) yp[i] = RAW_ELT(x, i); -+ break; -+ } -+ case STRSXP: { -+ for (R_xlen_t i = 0; i < len; ++i) SET_STRING_ELT(y, i, STRING_ELT(x, i)); -+ break; -+ } -+ case VECSXP: -+ case EXPRSXP: { -+ for (R_xlen_t i = 0; i < len; ++i) SET_VECTOR_ELT(y, i, VECTOR_ELT(x, i)); -+ break; -+ } -+ default: -+ Rf_error("make_growable: unsupported type %d", (int)t); -+ } -+ DUPLICATE_ATTRIB(y, x); -+ UNPROTECT(1); -+ return y; -+} -+ -+SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) { -+ if (!(type == LGLSXP || type == INTSXP || type == REALSXP || type == CPLXSXP || type == STRSXP || type == RAWSXP || type == VECSXP)) -+ Rf_error("growable_allocate: unsupported type %d", (int)type); -+ if (size < 0 || max_size < 0) -+ Rf_error("growable_allocate: negative size/capacity"); -+ if (size > max_size) -+ Rf_error("growable_allocate: size (%lld) > max_size (%lld)", -+ (long long)size, (long long)max_size); -+ -+ SEXP ret = PROTECT(Rf_allocVector(type, max_size)); -+ SET_TRUELENGTH(ret, max_size); -+ SET_GROWABLE_BIT(ret); -+ SETLENGTH(ret, size); -+ UNPROTECT(1); -+ return ret; -+} -+ -+R_xlen_t growable_capacity(SEXP x) { -+ return TRUELENGTH(x); -+} -+ -+void growable_resize(SEXP x, R_xlen_t newsize) { -+ if (!isVector(x)) -+ Rf_error("growable_resize: 'x' must be a vector"); -+ if (newsize < 0) -+ Rf_error("growable_resize: negative size"); -+ if (ALTREP(x)) -+ Rf_error("growable_resize: ALTREP must be materialized first (use make_growable())"); -+ -+ R_xlen_t cap = TRUELENGTH(x); -+ if (newsize > cap) { -+ Rf_error("growable_resize: newsize (%lld) > max_size (%lld)", -+ (long long)newsize, (long long)cap); -+ } -+ SETLENGTH(x, newsize); -+} -+ -+Rboolean is_growable(SEXP x) { -+ if (!isVector(x)) return FALSE; -+ if (ALTREP(x)) return FALSE; // conservative, could be supported -+ if (TRUELENGTH(x) < XLENGTH(x)) return FALSE; -+ if (!IS_GROWABLE(x)) return FALSE; -+ return TRUE; -+} -+ -+SEXP make_growable(SEXP x) { -+ if (!isVector(x)) -+ Rf_error("make_growable: not a vector"); -+ if (ALTREP(x)) { -+ return ensure_materialized(x); -+ } -+ if (TRUELENGTH(x) < XLENGTH(x)) { -+ SET_TRUELENGTH(x, XLENGTH(x)); -+ } -+ SET_GROWABLE_BIT(x); -+ return x; -+} -+ -+SEXP growable_trim(SEXP x) { -+ R_xlen_t len = XLENGTH(x); -+ R_xlen_t max_len = TRUELENGTH(x); -+ if (len == max_len) { -+ return x; -+ } -+ -+ if (!is_growable(x)) { -+ Rf_error("growable_trim: object is not growable"); -+ } -+ -+ growable_resize(x, max_len); // set length to max_len to force reallocation -+ SEXP ans = PROTECT(Rf_xlengthgets(x, len)); -+ SETLENGTH(x, len); -+ UNPROTECT(1); -+ return ans; -+} - -=================================================================== ---- src/main/names.c (revision 88869) -+++ src/main/names.c (working copy) -@@ -230,6 +230,12 @@ - /* .Internals */ - - {"vector", do_makevector, 0, 11, 2, {PP_FUNCALL, PREC_FN, 0}}, -+{"growvec_alloc", do_growvec_alloc, 0, 11, 3, {PP_FUNCALL, PREC_FN, 0}}, -+{"growvec_max_size", do_growvec_max_size, 0, 11, 1, {PP_FUNCALL, PREC_FN, 0}}, -+{"growvec_resize", do_growvec_resize, 0, 11, 2, {PP_FUNCALL, PREC_FN, 0}}, -+{"growvec_is_growable", do_growvec_is_growable, 0, 11, 1, {PP_FUNCALL, PREC_FN, 0}}, -+{"growvec_make_growable", do_growvec_make_growable, 0, 11, 1, {PP_FUNCALL, PREC_FN, 0}}, -+{"growvec_trim", do_growvec_trim, 0, 11, 1, {PP_FUNCALL, PREC_FN, 0}}, - {"complex", do_complex, 0, 11, 3, {PP_FUNCALL, PREC_FN, 0}}, - {"matrix", do_matrix, 0, 11, 7, {PP_FUNCALL, PREC_FN, 0}}, - {"array", do_array, 0, 11, 3, {PP_FUNCALL, PREC_FN, 0}}, -@@ -1451,3 +1457,50 @@ - { - return PRIMNAME(object); - } -+ -+/* Growable vector wrapper functions */ -+extern SEXP growable_allocate(SEXPTYPE, R_xlen_t, R_xlen_t); -+extern R_xlen_t growable_capacity(SEXP); -+extern void growable_resize(SEXP, R_xlen_t); -+extern Rboolean is_growable(SEXP); -+extern SEXP make_growable(SEXP); -+extern SEXP growable_trim(SEXP); -+ -+SEXP do_growvec_alloc(SEXP call, SEXP op, SEXP args, SEXP env) { -+ SEXP typeS = CAR(args); args = CDR(args); -+ SEXP sizeS = CAR(args); args = CDR(args); -+ SEXP capS = CAR(args); -+ return growable_allocate(asInteger(typeS), -+ (R_xlen_t)asReal(sizeS), -+ (R_xlen_t)asReal(capS)); -+} -+ -+SEXP do_growvec_max_size(SEXP call, SEXP op, SEXP args, SEXP env) { -+ SEXP x = CAR(args); -+ R_xlen_t max_size = growable_capacity(x); -+ return ScalarReal((double)max_size); -+} -+ -+SEXP do_growvec_resize(SEXP call, SEXP op, SEXP args, SEXP env) { -+ SEXP x = CAR(args); args = CDR(args); -+ SEXP newsizeS = CAR(args); -+ growable_resize(x, (R_xlen_t)asReal(newsizeS)); -+ return R_NilValue; -+} -+ -+SEXP do_growvec_is_growable(SEXP call, SEXP op, SEXP args, SEXP env) { -+ SEXP x = CAR(args); -+ Rboolean result = is_growable(x); -+ return ScalarLogical(result); -+} -+ -+SEXP do_growvec_make_growable(SEXP call, SEXP op, SEXP args, SEXP env) { -+ SEXP x = CAR(args); -+ return make_growable(x); -+} -+ -+SEXP do_growvec_trim(SEXP call, SEXP op, SEXP args, SEXP env) { -+ SEXP x = CAR(args); -+ SEXP result = growable_trim(x); -+ return result; -+} From 9e635d62c34b68c17a62cc25baff40d7f61758f4 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 26 Sep 2025 14:20:27 +0200 Subject: [PATCH 23/24] apply Jans loop suggestion --- src/frollapply.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frollapply.c b/src/frollapply.c index bd1bfb1fe3..402b10e7ce 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -84,7 +84,7 @@ SEXP setgrowable(SEXP x) { return x; } else { // # nocov start ## does not seem to be reported to codecov most likely due to running in a fork, I manually debugged that it is being called when running froll.Rraw - for (R_xlen_t i = 0; i < xlength(x); ++i) { + for (int i = 0; i < LENGTH(x); i++) { //Rprintf("%d",3); // manual code coverage to confirm it is reached when marking nocov SEXP this = VECTOR_ELT(x, i); if (ALTREP(this)) internal_error(__func__, "frollapply's adaptive=T setgrowable should have not been called with an ALTREP input"); // # nocov From e06aaa7bf5ca6cf952dece521a4f69ca5111cf73 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 26 Sep 2025 14:21:20 +0200 Subject: [PATCH 24/24] remove spaces --- src/frollapply.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frollapply.c b/src/frollapply.c index 402b10e7ce..af3419988e 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -92,6 +92,6 @@ SEXP setgrowable(SEXP x) { SET_VECTOR_ELT(x, i, make_growable(this)); } // # nocov end - return x; + return x; } }