diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..e69de29 diff --git a/wrapper.xgb.cv.logistic.r b/wrapper.xgb.cv.logistic.r index 80abed4..a1f3f6d 100644 --- a/wrapper.xgb.cv.logistic.r +++ b/wrapper.xgb.cv.logistic.r @@ -42,16 +42,30 @@ PredClass = ifelse(cv$pred >0.5,1,0) ###Test accuracy of predictions Confusion = confusionMatrix(as.factor(PredClass),as.factor(CVtrain_y)) -###Calculate ROC +###Calculate Out of Bag ROC Pred = cv$pred[order(CVtrain_y)] Truth = CVtrain_y[order(CVtrain_y)] -ROC = roc_auc_vec( +CVROC = roc_auc_vec( estimate = Pred, truth = as.factor(Truth),event_level="second") - +###Calculate ROC for mean training preds across fold models +Preds = vector(length = 0) +Truth = vector(length = 0) +for(fold in 1:Nfolds) + { + Model = xgb.Booster.complete(cv$models[[fold]]) + Preds = c(Preds,predict(Model, newdata = CVtrain_x[-(cv$folds[[fold]]),])) + Truth = c(Truth,CVtrain_y[-(cv$folds[[fold]])]) + } +Preds = Preds[order(Truth)] +Truth = Truth[order(Truth)] + +TrainingROC = roc_auc_vec( + estimate = Preds, + truth = as.factor(Truth),event_level="second") ###Print box plots of predicted probabilities against observed occurrences for each class -xgbm.cv.fit.boxplot.logistic(cv$pred,Data[, colnames(Data) == Response],ROC,path) +xgbm.cv.fit.boxplot.logistic(cv$pred,Data[, colnames(Data) == Response],ROC = c(TrainingROC,CVROC),path) ####Use custom function to generate predictor importance bar plots Filename = paste0(path,"PredictorImportance.png") @@ -80,8 +94,10 @@ if(DoInteraction == TRUE) OutList = list() Key = "Model" OutList[[Key]] = cv -Key = "ROC" -OutList[[Key]] = ROC +Key = "OOBROC" +OutList[[Key]] = CVROC +Key = "TrainingROC" +OutList[[Key]] = TrainingROC Key = "ConfusionMatrix" OutList[[Key]] = Confusion Key = "Predictor importance" diff --git a/xgb.cv.fit.boxplot.r b/xgb.cv.fit.boxplot.r index fe514d5..080f9db 100644 --- a/xgb.cv.fit.boxplot.r +++ b/xgb.cv.fit.boxplot.r @@ -1,8 +1,10 @@ ################################################## -###Simple boxplot of predicted probabilities for +###Simple boxplot of predicted probabilities for out of bag observations for ###binary and multiclass responses ###Separate plots fitted for each level of multiclass responses ###Designed for easy inspection of xgb.cv predictions +###Use out of bag predictions as better indication of ability to +###discriminate success or failure in new data ################################################## xgbm.cv.fit.boxplot.multi = function(pred, ###$pred from xgb.cv output @@ -32,7 +34,7 @@ xgbm.cv.fit.boxplot.logistic = function(pred,###$pred from xgb.cv output Y = CVtrain_y Pred = pred[order(Y)] Y=Y[order(Y)] - Title = paste0("ROC = ",round(ROC,digits = 3)) + Title = paste0("Training ROC = ",round(ROC[1],digits = 3),"; OOB ROC = ",round(ROC[2],digits = 3)) Filename = paste0(path,"FitBoxplot.png") png(Filename, height = 1600,width = 1600) par(mar = c(10,12,12,2), cex.main = 4,cex.lab = 3.6,cex.axis = 3.4,mgp = c(7,2,0)) diff --git a/xgb.cv.fit.scatterplot.r b/xgb.cv.fit.scatterplot.r index e63491a..1c60cfd 100644 --- a/xgb.cv.fit.scatterplot.r +++ b/xgb.cv.fit.scatterplot.r @@ -11,7 +11,8 @@ xgbm.cv.fit.scatterplot = function(pred,CVtrain_y,path) png(Filename, height = 1600,width = 1600) par(mar = c(10,12,12,2), cex.main = 4,cex.lab = 3.6,cex.axis = 3.4,mgp = c(7,3.5,0)) plot(pred~CVtrain_y, main = Title, - xlab = paste0("Observed response"),ylab = paste0("Fitted response")) + xlab = paste0("Observed response"),ylab = paste0("Fitted response"), pch = NA) + points(CVtrain_y,pred, cex = 2, col = 1) abline(0,1,col = 2,lwd = 3) dev.off() } diff --git a/xgb.cv.predict.r b/xgb.cv.predict.r index 2295f0c..7d9b9cc 100644 --- a/xgb.cv.predict.r +++ b/xgb.cv.predict.r @@ -12,15 +12,13 @@ xgb.cv.predict = function(cv, ###xgb.cv model object ) { ###Predict function requires data as a matrix -PredData = as.matrix(PredData[,colnames(PredData) %in% Predictors]) -Preds = vector(length = 0) -Fold = vector(length = 0) +PredX = as.matrix(PredData[,colnames(PredData) %in% Predictors]) +Preds = as.data.frame(matrix(nrow = nrow(PredX), ncol = 0)) for(fold in 1:Nfolds) { Model = xgb.Booster.complete(cv$models[[fold]]) - Preds = c(Preds,predict(Model, newdata = PredData)) - Fold = c(Fold, rep(fold, times = nrow(PredData))) + Preds = cbind(Preds,predict(Model, newdata = PredX)) } -return(cbind(Fold,Preds)) +return(Preds) }