@@ -199,20 +199,39 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
199199# '
200200# ' @keywords internal
201201tbl_fast_anti_join <- function (x , y , ukey_names , val_names , abs_tol = 0 ) {
202- x_orig <- x
203- x <- x [c(ukey_names , val_names )]
204- y <- y [c(ukey_names , val_names )]
205- xy <- vec_rbind(x , y )
202+ x_keyvals <- x [c(ukey_names , val_names )]
203+ y_keyvals <- y [c(ukey_names , val_names )]
204+ xy_keyvals <- vec_rbind(x , y )
206205 if (abs_tol == 0 ) {
207- x_exclude <- vec_duplicate_detect(xy )
206+ # perf: 0 tolerance is just like a normal `anti_join` by both ukey_names and
207+ # val_names together. We can do that more quickly than `anti_join` with
208+ # `vctrs` by checking for keyvals of `x` that are not duplicated in `y`.
209+ # (`vec_duplicate_detect` will mark those, unlike `duplicated`.)
210+ x_exclude <- vec_duplicate_detect(xy_keyvals )
208211 x_exclude <- vec_slice(x_exclude , seq_len(nrow(x )))
209212 } else {
210- xy_dup_ids <- vec_duplicate_id(xy [ukey_names ])
211- xy_dup_inds2 <- which(xy_dup_ids != seq_along(xy_dup_ids ))
212- xy_dup_inds1 <- xy_dup_ids [xy_dup_inds2 ]
213+ xy_ukeys <- xy_keyvals [ukey_names ]
214+ # Locate ukeys in `y` that match ukeys in `x` and where in `x` they map back
215+ # to. It's faster to do this with `vec_duplicate_id` on `xy_ukeys` than to
216+ # perform a `inner_join`.
217+ xy_ukey_dup_ids <- vec_duplicate_id(xy_ukeys )
218+ xy_ukey_dup_inds2 <- which(xy_ukey_dup_ids != seq_along(xy_ukey_dup_ids ))
219+ # ^ these should point to rows from y that had a ukey match in x
220+ xy_ukey_dup_inds1 <- xy_ukey_dup_ids [xy_ukey_dup_inds2 ]
221+ # ^ these should point to the respectively corresponding rows from x
222+
223+ # Anything in `x` without a ukey match in `y` should be kept; start off with
224+ # `FALSE` for everything and just fill in `TRUE`/`FALSE` results for the
225+ # ukeys with matches in `y`:
213226 x_exclude <- rep(FALSE , nrow(x ))
214227 xy_vals <- xy [val_names ]
215- x_exclude [xy_dup_inds1 ] <- vec_approx_equal(xy_vals , inds1 = xy_dup_inds2 , xy_vals , inds2 = xy_dup_inds1 , na_equal = TRUE , abs_tol = abs_tol )
228+ x_exclude [xy_ukey_dup_inds1 ] <- vec_approx_equal(
229+ xy_vals ,
230+ inds1 = xy_ukey_dup_inds2 ,
231+ xy_vals ,
232+ inds2 = xy_ukey_dup_inds1 ,
233+ na_equal = TRUE , abs_tol = abs_tol
234+ )
216235 }
217236 vec_slice(x_orig , ! x_exclude )
218237}
@@ -269,7 +288,7 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
269288 }
270289 later_format <- arg_match0(later_format , c(" snapshot" , " update" ))
271290 if (! (is.vector(compactify_abs_tol , mode = " numeric" ) &&
272- length(compactify_abs_tol ) == 1L && # nolint:indentation_linter
291+ length(compactify_abs_tol ) == 1L && # nolint: indentation_linter
273292 compactify_abs_tol > = 0 )) {
274293 # Give a specific message:
275294 assert_numeric(compactify_abs_tol , lower = 0 , any.missing = FALSE , len = 1L )
0 commit comments