@@ -196,7 +196,10 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
196196# ' @return rows from `x` that either (a) don't have a (0-tolerance) matching
197197# ' ukey in `y`, or (b) have a matching ukey in `y`, but don't have
198198# ' approximately equal value column values
199+ # '
200+ # ' @keywords internal
199201tbl_fast_anti_join <- function (x , y , ukey_names , val_names , abs_tol = 0 ) {
202+ x_orig <- x
200203 x <- x [c(ukey_names , val_names )]
201204 y <- y [c(ukey_names , val_names )]
202205 xy <- vec_rbind(x , y )
@@ -211,7 +214,7 @@ tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
211214 xy_vals <- xy [val_names ]
212215 x_exclude [xy_dup_inds1 ] <- vec_approx_equal(xy_vals , inds1 = xy_dup_inds2 , xy_vals , inds2 = xy_dup_inds1 , na_equal = TRUE , abs_tol = abs_tol )
213216 }
214- vec_slice(x , ! x_exclude )
217+ vec_slice(x_orig , ! x_exclude )
215218}
216219
217220# ' Calculate compact patch to move from one snapshot/update to another
@@ -244,41 +247,53 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
244247 # Most input validation + handle NULL earlier_snapshot. This is a small function so
245248 # use faster validation variants:
246249 if (! is_tibble(later_tbl )) {
247- cli_abort(" `later_tbl` must be a tibble" ,
248- class = " epiprocess__tbl_diff2__later_tbl_invalid"
249- )
250+ cli_abort(
251+ " `later_tbl` must be a tibble" ,
252+ class = " epiprocess__tbl_diff2__later_tbl_invalid"
253+ )
250254 }
251255 if (is.null(earlier_snapshot )) {
252256 return (later_tbl )
253257 }
254258 if (! is_tibble(earlier_snapshot )) {
255- cli_abort(" `earlier_snapshot` must be a tibble or `NULL`" ,
256- class = " epiprocess__tbl_diff2__earlier_tbl_class_invalid"
257- )
259+ cli_abort(
260+ " `earlier_snapshot` must be a tibble or `NULL`" ,
261+ class = " epiprocess__tbl_diff2__earlier_tbl_class_invalid"
262+ )
258263 }
259264 if (! is.character(ukey_names ) || ! all(ukey_names %in% names(earlier_snapshot ))) {
260- cli_abort(" `ukey_names` must be a subset of column names" ,
261- class = " epiprocess__tbl_diff2__ukey_names_class_invalid"
262- )
265+ cli_abort(
266+ " `ukey_names` must be a subset of column names" ,
267+ class = " epiprocess__tbl_diff2__ukey_names_class_invalid"
268+ )
263269 }
264270 later_format <- arg_match0(later_format , c(" snapshot" , " update" ))
265271 if (! (is.vector(compactify_abs_tol , mode = " numeric" ) &&
266- length(compactify_abs_tol ) == 1L && # nolint:indentation_linter
267- compactify_abs_tol > = 0 )) {
272+ length(compactify_abs_tol ) == 1L && # nolint:indentation_linter
273+ compactify_abs_tol > = 0 )) {
268274 # Give a specific message:
269275 assert_numeric(compactify_abs_tol , lower = 0 , any.missing = FALSE , len = 1L )
270276 # Fallback e.g. for invalid classes not caught by assert_numeric:
271- cli_abort(" `compactify_abs_tol` must be a length-1 double/integer >= 0" ,
272- class = " epiprocess__tbl_diff2__compactify_abs_tol_invalid" )
277+ cli_abort(
278+ " `compactify_abs_tol` must be a length-1 double/integer >= 0" ,
279+ class = " epiprocess__tbl_diff2__compactify_abs_tol_invalid"
280+ )
273281 }
274282
275283 all_names <- names(later_tbl )
276- val_names <- all_names [! (all_names %in% ukey_names )]
284+ val_names <- all_names [! (all_names %in% ukey_names )]
277285 updates <- tbl_fast_anti_join(later_tbl , earlier_snapshot , ukey_names , val_names , compactify_abs_tol )
278286 if (later_format == " snapshot" ) {
287+ # Interpret `later_tbl` as a full snapshot, rather than a diff / sparse
288+ # update. That means that any ukeys in `earlier_snapshot` that don't appear
289+ # in `later_tbl` were deleted in the later snapshot.
279290 deletions <- tbl_fast_anti_join(earlier_snapshot [ukey_names ], later_tbl [ukey_names ], ukey_names , character (), 0 )
280- updates <- vec_rbind(updates , deletions ) # fills vals with NAs
291+ updates <- vec_rbind(updates , deletions ) # fills val cols with NAs
281292 }
293+ # If `later_format == "update"`, we don't need to do anything special about
294+ # the above ukeys. The full snapshot for the later version would include the
295+ # corresponding rows unchanged, and the diff for these unchanged rows would be
296+ # empty.
282297 updates
283298}
284299
@@ -299,20 +314,23 @@ tbl_patch <- function(snapshot, update, ukey_names) {
299314 # Most input validation. This is a small function so use faster validation
300315 # variants:
301316 if (! is_tibble(update )) {
302- cli_abort(" `update` must be a tibble" ,
317+ cli_abort(
318+ " `update` must be a tibble" ,
303319 class = " epiprocess__tbl_patch__update_class_invalid"
304320 )
305321 }
306322 if (is.null(snapshot )) {
307323 return (update )
308324 }
309325 if (! is_tibble(snapshot )) {
310- cli_abort(" `snapshot` must be a tibble" ,
326+ cli_abort(
327+ " `snapshot` must be a tibble" ,
311328 class = " epiprocess__tbl_patch__snapshot_class_invalid"
312329 )
313330 }
314331 if (! is.character(ukey_names ) || ! all(ukey_names %in% names(snapshot ))) {
315- cli_abort(" `ukey_names` must be a subset of column names" ,
332+ cli_abort(
333+ " `ukey_names` must be a subset of column names" ,
316334 class = " epiprocess__tbl_patch__ukey_names_invalid"
317335 )
318336 }
@@ -333,8 +351,8 @@ tbl_patch <- function(snapshot, update, ukey_names) {
333351 # This is like `!duplicated()` but faster, and like `vec_unique_loc()` but guaranteeing
334352 # that we get the first appearance since `vec_duplicate_id()` guarantees that
335353 # it points to the first appearance.
336- not_overwritten <- dup_ids == vec_seq_along(result_tbl )
337- result_tbl <- result_tbl [not_overwritten , ]
354+ is_only_or_favored_appearance <- dup_ids == vec_seq_along(result_tbl )
355+ result_tbl <- result_tbl [is_only_or_favored_appearance , ]
338356
339357 result_tbl
340358}
0 commit comments