Skip to content

Commit

Permalink
Increase point size on fit scatterplot
Browse files Browse the repository at this point in the history
  • Loading branch information
KauriGiant committed Dec 5, 2024
1 parent 197d65d commit c55934a
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 15 deletions.
Empty file added .Rhistory
Empty file.
28 changes: 22 additions & 6 deletions wrapper.xgb.cv.logistic.r
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,30 @@ PredClass = ifelse(cv$pred >0.5,1,0)
###Test accuracy of predictions
Confusion = confusionMatrix(as.factor(PredClass),as.factor(CVtrain_y))

###Calculate ROC
###Calculate Out of Bag ROC
Pred = cv$pred[order(CVtrain_y)]
Truth = CVtrain_y[order(CVtrain_y)]
ROC = roc_auc_vec(
CVROC = roc_auc_vec(
estimate = Pred,
truth = as.factor(Truth),event_level="second")


###Calculate ROC for mean training preds across fold models
Preds = vector(length = 0)
Truth = vector(length = 0)
for(fold in 1:Nfolds)
{
Model = xgb.Booster.complete(cv$models[[fold]])
Preds = c(Preds,predict(Model, newdata = CVtrain_x[-(cv$folds[[fold]]),]))
Truth = c(Truth,CVtrain_y[-(cv$folds[[fold]])])
}
Preds = Preds[order(Truth)]
Truth = Truth[order(Truth)]

TrainingROC = roc_auc_vec(
estimate = Preds,
truth = as.factor(Truth),event_level="second")
###Print box plots of predicted probabilities against observed occurrences for each class
xgbm.cv.fit.boxplot.logistic(cv$pred,Data[, colnames(Data) == Response],ROC,path)
xgbm.cv.fit.boxplot.logistic(cv$pred,Data[, colnames(Data) == Response],ROC = c(TrainingROC,CVROC),path)

####Use custom function to generate predictor importance bar plots
Filename = paste0(path,"PredictorImportance.png")
Expand Down Expand Up @@ -80,8 +94,10 @@ if(DoInteraction == TRUE)
OutList = list()
Key = "Model"
OutList[[Key]] = cv
Key = "ROC"
OutList[[Key]] = ROC
Key = "OOBROC"
OutList[[Key]] = CVROC
Key = "TrainingROC"
OutList[[Key]] = TrainingROC
Key = "ConfusionMatrix"
OutList[[Key]] = Confusion
Key = "Predictor importance"
Expand Down
6 changes: 4 additions & 2 deletions xgb.cv.fit.boxplot.r
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
##################################################
###Simple boxplot of predicted probabilities for
###Simple boxplot of predicted probabilities for out of bag observations for
###binary and multiclass responses
###Separate plots fitted for each level of multiclass responses
###Designed for easy inspection of xgb.cv predictions
###Use out of bag predictions as better indication of ability to
###discriminate success or failure in new data
##################################################

xgbm.cv.fit.boxplot.multi = function(pred, ###$pred from xgb.cv output
Expand Down Expand Up @@ -32,7 +34,7 @@ xgbm.cv.fit.boxplot.logistic = function(pred,###$pred from xgb.cv output
Y = CVtrain_y
Pred = pred[order(Y)]
Y=Y[order(Y)]
Title = paste0("ROC = ",round(ROC,digits = 3))
Title = paste0("Training ROC = ",round(ROC[1],digits = 3),"; OOB ROC = ",round(ROC[2],digits = 3))
Filename = paste0(path,"FitBoxplot.png")
png(Filename, height = 1600,width = 1600)
par(mar = c(10,12,12,2), cex.main = 4,cex.lab = 3.6,cex.axis = 3.4,mgp = c(7,2,0))
Expand Down
3 changes: 2 additions & 1 deletion xgb.cv.fit.scatterplot.r
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ xgbm.cv.fit.scatterplot = function(pred,CVtrain_y,path)
png(Filename, height = 1600,width = 1600)
par(mar = c(10,12,12,2), cex.main = 4,cex.lab = 3.6,cex.axis = 3.4,mgp = c(7,3.5,0))
plot(pred~CVtrain_y, main = Title,
xlab = paste0("Observed response"),ylab = paste0("Fitted response"))
xlab = paste0("Observed response"),ylab = paste0("Fitted response"), pch = NA)
points(CVtrain_y,pred, cex = 2, col = 1)
abline(0,1,col = 2,lwd = 3)
dev.off()
}
10 changes: 4 additions & 6 deletions xgb.cv.predict.r
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@ xgb.cv.predict = function(cv, ###xgb.cv model object
)
{
###Predict function requires data as a matrix
PredData = as.matrix(PredData[,colnames(PredData) %in% Predictors])
Preds = vector(length = 0)
Fold = vector(length = 0)
PredX = as.matrix(PredData[,colnames(PredData) %in% Predictors])
Preds = as.data.frame(matrix(nrow = nrow(PredX), ncol = 0))
for(fold in 1:Nfolds)
{
Model = xgb.Booster.complete(cv$models[[fold]])
Preds = c(Preds,predict(Model, newdata = PredData))
Fold = c(Fold, rep(fold, times = nrow(PredData)))
Preds = cbind(Preds,predict(Model, newdata = PredX))
}
return(cbind(Fold,Preds))
return(Preds)
}

0 comments on commit c55934a

Please sign in to comment.