Skip to content

Commit 5e560ac

Browse files
committed
edits for ms
1 parent 1b83c51 commit 5e560ac

File tree

126 files changed

+3222
-73
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+3222
-73
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Description: Estimate significance of importance metrics
77
metrics for each predictor variable and p-value of
88
observed. Provides summary and visualization functions for 'randomForest'
99
results.
10-
Version: 2.1.2
10+
Version: 2.1.5
1111
Authors@R: c(
1212
EA = person("Eric", "Archer", email = "[email protected]", role = c("aut", "cre")))
1313
BugReports: https://github.com/EricArcher/rfPermute/issues

NAMESPACE

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@ S3method(plot,rp.importance)
44
S3method(rfPermute,default)
55
S3method(rfPermute,formula)
66
export(classConfInt)
7-
export(clean.rf.data)
7+
export(cleanRFdata)
88
export(confusionMatrix)
9-
export(exptd.err.rate)
9+
export(exptdErrRate)
1010
export(impHeatmap)
1111
export(pctCorrect)
1212
export(plotNull)
1313
export(plotVotes)
14-
export(proximity.plot)
14+
export(proximityPlot)
1515
export(rfPermute)
1616
export(rfPermute.default)
1717
export(rfPermute.formula)
@@ -23,11 +23,15 @@ importFrom(ggplot2,aes_string)
2323
importFrom(ggplot2,coord_flip)
2424
importFrom(ggplot2,element_blank)
2525
importFrom(ggplot2,element_rect)
26+
importFrom(ggplot2,facet_wrap)
2627
importFrom(ggplot2,geom_bar)
28+
importFrom(ggplot2,geom_density)
29+
importFrom(ggplot2,geom_histogram)
2730
importFrom(ggplot2,geom_point)
2831
importFrom(ggplot2,geom_polygon)
2932
importFrom(ggplot2,geom_raster)
3033
importFrom(ggplot2,geom_rect)
34+
importFrom(ggplot2,geom_vline)
3135
importFrom(ggplot2,ggplot)
3236
importFrom(ggplot2,ggtitle)
3337
importFrom(ggplot2,guide_colorbar)
@@ -39,9 +43,6 @@ importFrom(ggplot2,xlab)
3943
importFrom(ggplot2,ylab)
4044
importFrom(grDevices,chull)
4145
importFrom(grDevices,rainbow)
42-
importFrom(graphics,abline)
43-
importFrom(graphics,par)
44-
importFrom(graphics,plot)
4546
importFrom(gridExtra,grid.arrange)
4647
importFrom(parallel,detectCores)
4748
importFrom(parallel,makeForkCluster)
@@ -54,7 +55,6 @@ importFrom(reshape2,melt)
5455
importFrom(stats,binom.test)
5556
importFrom(stats,cmdscale)
5657
importFrom(stats,complete.cases)
57-
importFrom(stats,density)
5858
importFrom(stats,model.frame)
5959
importFrom(stats,model.response)
6060
importFrom(stats,na.fail)

R/clean.rf.data.R renamed to R/cleanRFdata.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#' @importFrom stats complete.cases
1515
#' @export
1616
#'
17-
clean.rf.data <- function(x, y, data, max.levels = 30) {
17+
cleanRFdata <- function(x, y, data, max.levels = 30) {
1818
data <- as.data.frame(data)
1919
if (is.null(colnames(data))) colnames(data) <- 1:ncol(data)
2020
x <- setdiff(x, y)

R/confusionMatrix.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#'
1010
#' @author Eric Archer \email{eric.archer@@noaa.gov}
1111
#'
12-
#' @seealso \code{\link{classConfInt}}
12+
#' @seealso \code{\link{classConfInt}}, \code{\link{exptdErrRate}}
1313
#'
1414
#' @examples
1515
#' data(mtcars)
@@ -26,7 +26,7 @@ confusionMatrix <- function(rf, conf.level = 0.95, threshold = 0.8) {
2626
# Get confidence intervals
2727
ci <- classConfInt(rf, conf.level = conf.level, threshold = threshold)
2828
# Get expected error rate (prior)
29-
prior <- exptd.err.rate(rf)
29+
prior <- exptdErrRate(rf)
3030
prior <- (1 - prior[c(2:length(prior), 1)]) * 100
3131
# Add rows and columns
3232
conf <- rbind(conf, Overall = rep(NA, ncol(conf)))

R/dataSets.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#' @docType data
2+
#' @name symb.metab
3+
#' @title Symbiodinium type metabolite profiles
4+
#' @description A data.frame of 155 metabolite relative concentrations for
5+
#' 64 samples of four Symbiodinium clade types.
6+
#' @usage data(symb.metab)
7+
#' @format data.frame
8+
#' @references Klueter, A.; Crandall, J.B.; Archer, F.I.; Teece, M.A.; Coffroth, M.A.
9+
#' Taxonomic and Environmental Variation of Metabolite Profiles in Marine
10+
#' Dinoflagellates of the Genus Symbiodinium. Metabolites 2015, 5, 74-99.
11+
#' @keywords datasets
12+
NULL

R/exptd.err.rate.R renamed to R/exptdErrRate.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
#' @title Expected Error Rate
2-
#' @description Calculate expected OOB error rates for randomForest
3-
#' classification model based on random assignment and class sizes (prior).
2+
#' @description Calculate expected OOB error rates (priors) for randomForest
3+
#' classification model based on random assignment and class sizes.
44
#'
55
#' @param rf an object inheriting from \code{link{randomForest}}.
66
#'
7-
#' @return a vector of expected error rates for each class.
7+
#' @return a vector of expected error rates (priors) for each class.
88
#'
99
#' @author Eric Archer \email{eric.archer@@noaa.gov}
1010
#'
1111
#' @export
1212
#'
13-
exptd.err.rate <- function(rf) {
13+
exptdErrRate <- function(rf) {
1414
if(!inherits(rf, "randomForest")) {
1515
stop("'rf' is not a randomForest or rfPermute object.")
1616
}

R/plotNull.R

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,19 @@
55
#' call to \code{\link{rfPermute}}.
66
#'
77
#' @param x An object produced by a call to \code{\link{rfPermute}}.
8+
#' @param preds a character vector of predictors to plot. If \code{NULL}, then
9+
#' all predictors are plotted.
810
#' @param imp.type Either a numeric or character vector giving the
911
#' importance metric(s) to plot.
1012
#' @param scale Plot importance measures scaled (divided by) standard errors?
11-
#' @param ... Optional graphical arguments to be sent to \code{\link[graphics]{par}}.
13+
#' @param plot.type type of plot to produce: \code{"density"} for smoothed density
14+
#' plot, or \code{"hist"} for histogram.
1215
#'
13-
#' @details The function will generate an individual plot for
14-
#' each variable and importance metric on the default graphics
15-
#' device.
16+
#' @details The function will generate an plot for each predictor, with facetted
17+
#' importance metrics. The vertical red line shows the observed importance
18+
#' score and the p-value is given in the facet label.
19+
#'
20+
#' @return A named list of the \code{ggplot} figures produced is invisbly returned.
1621
#'
1722
#' @author Eric Archer \email{eric.archer@@noaa.gov}
1823
#'
@@ -22,44 +27,67 @@
2227
#' ozone.rfP <- rfPermute(Ozone ~ ., data = airquality, ntree = 100, na.action = na.omit, nrep = 50)
2328
#'
2429
#' # Plot the null distributions and observed values.
25-
#' layout(matrix(1:6, nrow = 2))
2630
#' plotNull(ozone.rfP)
27-
#' layout(matrix(1))
2831
#'
29-
#' @importFrom graphics abline par plot
30-
#' @importFrom stats density
32+
#' @importFrom reshape2 melt
33+
#' @importFrom ggplot2 ggplot aes_string geom_histogram geom_density xlab
34+
#' ggtitle geom_vline facet_wrap
3135
#' @export
3236
#'
33-
plotNull <- function(x, imp.type = 1, scale = TRUE, ...) {
37+
plotNull <- function(x, preds = NULL, imp.type = NULL, scale = TRUE,
38+
plot.type = c("density", "hist")) {
39+
3440
if(!inherits(x, "rfPermute")) stop("'x' is not of class 'rfPermute'")
35-
imp <- randomForest::importance(x, scale = scale)
36-
imp <- imp[, c(ncol(imp) - 1, ncol(imp))]
41+
imp <- randomForest::importance(x, type = NULL, class = NULL, scale = scale)
3742

43+
if(is.null(imp.type)) imp.type <- colnames(imp)
44+
imp.type <- unique(imp.type)
3845
if(is.character(imp.type)) {
3946
not.found <- imp.type[!(imp.type %in% colnames(imp))]
4047
if(length(not.found) > 0) {
4148
imp <- paste(not.found, collapse = ", ")
4249
stop(paste("imp.type: ", imp, " is not in 'x'", sep = ""))
4350
}
4451
} else if(is.numeric(imp.type)) {
52+
imp <- imp[, c(ncol(imp) -1, ncol(imp))]
4553
if(!all(imp.type <= ncol(imp))) stop("some 'imp.type' out of range")
4654
imp.type <- colnames(imp)[imp.type]
4755
} else stop("'imp.type' is not a character or numeric vector")
4856

4957
sc <- if(scale) "scaled" else "unscaled"
5058

51-
op <- par(..., no.readonly = TRUE)
52-
for(p in rownames(imp)) {
53-
for(i in imp.type) {
54-
n <- x$null.dist[[sc]][p, i, ]
55-
o <- imp[p, i]
56-
xlab <- if(is.character(i)) i else colnames(imp)[i]
57-
pval <- x$pval[p, i, sc]
58-
main <- c(paste("Variable:", p),
59-
paste("P(null >= obs) =", sprintf("%0.3f", pval)))
60-
plot(density(n), xlim = range(c(n, o)), xlab = xlab, main = main)
61-
abline(v = o, lwd = 2)
62-
}
59+
if(is.null(preds)) preds <- rownames(imp)
60+
preds.not.found <- setdiff(preds, rownames(imp))
61+
if(length(preds.not.found) > 0) {
62+
not.found <- paste(preds.not.found, collapse = ", ")
63+
stop(paste("The following predictors could not be found:", not.found))
6364
}
64-
par(op)
65+
66+
plot.type <- match.arg(plot.type)
67+
g <- sapply(preds, function(p) {
68+
df <- melt(
69+
sapply(imp.type, function(i) x$null.dist[[sc]][p, i, ]),
70+
value.name = "importance",
71+
varnames = c("rep", "imp.type")
72+
)
73+
obs <- melt(
74+
imp[p, imp.type, drop = FALSE],
75+
value.name = "importance",
76+
varnames = c("predictor", "imp.type")
77+
)
78+
79+
pval <- x$pval[p, imp.type, sc]
80+
labels <- paste0(names(pval), " (p = ", sprintf("%0.3f", pval), ")")
81+
levels(df$imp.type) <- levels(obs$imp.type) <- labels
82+
83+
pl <- ggplot(df, aes_string("importance"))
84+
pl <- pl + if(plot.type == "hist") geom_histogram() else geom_density()
85+
pl <- pl + xlab("Importance") + ggtitle(p)
86+
pl <- pl + geom_vline(aes_string(xintercept = "importance"), color = "red", data = obs)
87+
pl <- pl + facet_wrap(~imp.type, scales = "free")
88+
89+
print(pl)
90+
}, simplify = FALSE, USE.NAMES = TRUE)
91+
92+
invisible(g)
6593
}

R/proximity.plot.r renamed to R/proximityPlot.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@
3030
#' data(mtcars)
3131
#' rf <- randomForest(factor(am) ~ ., mtcars, proximity = TRUE)
3232
#' rf
33-
#' proximity.plot(rf)
33+
#' proximityPlot(rf)
3434
#'
3535
#' @importFrom stats cmdscale
3636
#' @importFrom grDevices chull rainbow
3737
#' @importFrom ggplot2 ggplot aes geom_point labs theme element_blank geom_polygon element_rect
3838
#' @export
3939
#'
40-
proximity.plot <- function(rf, dim.x = 1, dim.y = 2,
40+
proximityPlot <- function(rf, dim.x = 1, dim.y = 2,
4141
legend.loc = c("top", "bottom", "left", "right"),
4242
point.size = 2, circle.size = 8, circle.border = 1,
4343
hull.alpha = 0.3, plot = TRUE) {

R/rfPermute.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#' \code{\link{plotNull}} for plotting null distributions from the \code{rfPermute} objects. \cr
3232
#' \code{\link{rp.importance}} for extracting importance measures. \cr
3333
#' \code{\link{rp.combine}} for combining multiple \code{rfPermute} objects.\cr
34-
#' \code{\link{proximity.plot}} for plotting case proximities.\cr
34+
#' \code{\link{proximityPlot}} for plotting case proximities.\cr
3535
#' \code{\link{impHeatmap}} for plotting a heatmap of importance scores.\cr
3636
#' \code{\link{randomForest}}
3737
#'

README.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ To install the latest version from GitHub:
2727
```r
2828
# make sure you have Rtools installed
2929
if (!require('devtools')) install.packages('devtools')
30+
3031
# install from GitHub
3132
devtools::install_github('EricArcher/rfPermute')
3233
```
@@ -40,23 +41,26 @@ devtools::install_github('EricArcher/rfPermute')
4041
## Current Functions
4142

4243
`classConfInt` Classification Confidence Intervals
43-
`clean.rf.data` Clean Random Forest Input Data
44+
`cleanRFdata` Clean Random Forest Input Data
4445
`confusionMatrix` Confusion Matrix
45-
`exptd.err.rate` Expected Error Rate
46+
`exptdErrRate` Expected Error Rate
4647
`impHeatmap` Importance Heatmap
4748
`pctCorrect` Percent Correctly Classified
4849
`plotNull` Plot Random Forest Importance Null Distributions
49-
`plot.rp.importance` Plot Random Forest Importance Distributions
5050
`plotVotes` Vote Distribution
51-
`proximity.plot` Plot Random Forest Proximity Scores
51+
`plot.rp.importance` Plot Random Forest Importance Distributions
52+
`proximityPlot` Plot Random Forest Proximity Scores
5253
`rfPermute` Estimate Permutation p-values for Random Forest Importance Metrics
5354
`rp.combine` Combine rfPermute Objects
5455
`rp.importance` Extract rfPermute Importance Scores and p-values
5556

56-
## version 2.1.2 (current on GitHub)
57+
## current on GitHub (version 2.1.5)
5758

5859
* Added `type` argument to `plotVotes` to choose between area and bar charts.
5960
* Changed plot.rfPermute to plotNull to avoid clashes and maintain functionality of plot.randomForest.
61+
* Changed name of `proximity.plot` to `proximityPlot`, `exptd.err.rate` to `exptdErrRate`, and `clean.rf.data` to `cleanRFdata` to make camelCase naming scheme more consistent in package.
62+
* Changed `plotNull` from base graphics to ggplot2.
63+
* Added `symb.metab` data set.
6064

6165
## version 2.1.1
6266

data/symb.metab.rda

73.8 KB
Binary file not shown.

0 commit comments

Comments
 (0)