From 5559177a1791a9fc38723ec74f8b1fb9d37b4694 Mon Sep 17 00:00:00 2001 From: benholmgren <44175897+benholmgren@users.noreply.github.com> Date: Tue, 19 Feb 2019 14:11:41 -0700 Subject: [PATCH 1/3] Update dtm.R --- R/dtm.R | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/R/dtm.R b/R/dtm.R index d91e548..1d269cd 100644 --- a/R/dtm.R +++ b/R/dtm.R @@ -1,6 +1,28 @@ +#' @param X an n by d matrix of coordinates of points used to construct the uniform +#' empirical measure for the distance to measure, where n is the number of points +#' and d is the dimension. +#' +#' @param Grid an m by d matrix of coordinates of points where the distance to measure +#' is computed, where m is the number of points in Grid and d is the dimension. +#' +#' @param m0 a numeric variable for the smoothing parameter of the distance to measure. +#' Roughly, m0 is the the percentage of points of X that are considered when the distance +#' to measure is computed for each point of Grid. The value of m0 should be in (0,1). +#' +#' @param r a numeric variable for the tuning parameter of the distance to measure. +#' The value of r should be in [1,∞), and the default value is 2. +#' +#' @param weight either a number, or a vector of length n. If it is a number, then same +#' weight is applied to each points of X. If it is a vector, weight represents weights of +#' each points of X. The default value is 1. +#' +#' @return a vector of length m (the number of points stored in Grid) +#' containing the value of the distance to measure function evaluated at each point of Grid. + dtm <- function(X, Grid, m0, r = 2, weight = 1) { - + + # check that parameters X and Grid are both matrices of matching dimension if (!is.numeric(X) && !is.data.frame(X)) { stop("X should be a matrix of coordinates") } @@ -10,12 +32,18 @@ function(X, Grid, m0, r = 2, weight = 1) { if (NCOL(X) != NCOL(Grid)) { stop("dimensions of X and Grid do not match") } + + # ensure that smoothing parameter m0 is a value between 0 and 1 if (!is.numeric(m0) || length(m0) != 1 || m0 < 0 || m0 > 1) { stop("m0 should be a number between 0 and 1") } + + # ensure tuning parameter is a number in [1,∞) if (!is.numeric(r) || length(r) != 1 || r < 1) { stop("r should be a number greater than or equal to 1") } + + # verify that weight is either constant or that it provides a correspondence with every point if (!is.numeric(weight) || (length(weight) != 1 && length(weight) != NROW(X))) { stop("weight should be either a number or a vector of length equals the number of sample") @@ -24,29 +52,35 @@ function(X, Grid, m0, r = 2, weight = 1) { # without weight if (length(weight) == 1) { X <- as.matrix(X) - weightBound <- m0 * NROW(X) + weightBound <- m0 * NROW(X) + # use fast nearest neighbor search algorithm to find distances to k nearest neighbors knnDistance <- FNN::knnx.dist( data = X, query = as.matrix(Grid), k = ceiling(weightBound), algorithm = c("kd_tree")) + # utilize embedded Dtm function to find distance to measure return (Dtm(knnDistance = knnDistance, weightBound = weightBound, r = r)) # with weight } else { + # establish the weightbound and weight parameters to be used in final DtmWeight function X0 <- as.matrix(X[weight != 0, , drop = FALSE]) weight0 <- weight[weight != 0] weight0sort <- sort(weight0) weightBound <- m0 * sum(weight0) weightSumTemp <- 0 + # add sorted weight values to a sum until that sum reaches weight bound for (k0 in seq(along = weight0)) { weightSumTemp <- weightSumTemp + weight0sort[k0] if (weightSumTemp >= weightBound) { break } } + # create a matrix of nearest neighbor indeces using the kd tree algorithm knnDistanceIndex <- FNN::get.knnx( data = X0, query = as.matrix(Grid), k = k0, algorithm = c("kd_tree")) + # use embedded DtmWeight function return (DtmWeight( knnDistance = knnDistanceIndex[["nn.dist"]], weightBound = weightBound, r = r, knnIndex = knnDistanceIndex[["nn.index"]], weight = weight0)) } -} \ No newline at end of file +} From 8885d86fa7625fd02f2c3f1ec260733c7d8bea8c Mon Sep 17 00:00:00 2001 From: benholmgren <44175897+benholmgren@users.noreply.github.com> Date: Mon, 25 Feb 2019 10:44:06 -0700 Subject: [PATCH 2/3] Revised dtm comments Streamlined comments again in similar manner to previous PR --- R/dtm.R | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/R/dtm.R b/R/dtm.R index 1d269cd..8c2fdac 100644 --- a/R/dtm.R +++ b/R/dtm.R @@ -22,7 +22,6 @@ dtm <- function(X, Grid, m0, r = 2, weight = 1) { - # check that parameters X and Grid are both matrices of matching dimension if (!is.numeric(X) && !is.data.frame(X)) { stop("X should be a matrix of coordinates") } @@ -33,12 +32,10 @@ function(X, Grid, m0, r = 2, weight = 1) { stop("dimensions of X and Grid do not match") } - # ensure that smoothing parameter m0 is a value between 0 and 1 if (!is.numeric(m0) || length(m0) != 1 || m0 < 0 || m0 > 1) { stop("m0 should be a number between 0 and 1") } - # ensure tuning parameter is a number in [1,∞) if (!is.numeric(r) || length(r) != 1 || r < 1) { stop("r should be a number greater than or equal to 1") } @@ -57,7 +54,7 @@ function(X, Grid, m0, r = 2, weight = 1) { knnDistance <- FNN::knnx.dist( data = X, query = as.matrix(Grid), k = ceiling(weightBound), algorithm = c("kd_tree")) - # utilize embedded Dtm function to find distance to measure + # find dtm without considering a weight return (Dtm(knnDistance = knnDistance, weightBound = weightBound, r = r)) # with weight @@ -75,10 +72,10 @@ function(X, Grid, m0, r = 2, weight = 1) { break } } - # create a matrix of nearest neighbor indeces using the kd tree algorithm + # create a matrix of nearest neighbor indeces using the fast nearest neighbor kd tree algorithm knnDistanceIndex <- FNN::get.knnx( data = X0, query = as.matrix(Grid), k = k0, algorithm = c("kd_tree")) - # use embedded DtmWeight function + # find dtm with weight established return (DtmWeight( knnDistance = knnDistanceIndex[["nn.dist"]], weightBound = weightBound, r = r, knnIndex = knnDistanceIndex[["nn.index"]], weight = weight0)) From 00ba91c4c3e7c1f759f2b9c0525279336a7f9c17 Mon Sep 17 00:00:00 2001 From: benholmgren <44175897+benholmgren@users.noreply.github.com> Date: Tue, 26 Feb 2019 12:50:45 -0700 Subject: [PATCH 3/3] Added title to dtm roxygen needs a title to make nice documentation reliably --- R/dtm.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/dtm.R b/R/dtm.R index 8c2fdac..efa7ebf 100644 --- a/R/dtm.R +++ b/R/dtm.R @@ -1,6 +1,7 @@ +#' @title dtm #' @param X an n by d matrix of coordinates of points used to construct the uniform -#' empirical measure for the distance to measure, where n is the number of points -#' and d is the dimension. +#' empirical measure for the distance to measure, where n is the number of points +#' and d is the dimension. #' #' @param Grid an m by d matrix of coordinates of points where the distance to measure #' is computed, where m is the number of points in Grid and d is the dimension.