|
| 1 | +#################################### |
| 2 | +# Data Professor # |
| 3 | +# http://youtube.com/dataprofessor # |
| 4 | +# http://github.com/dataprofessor # |
| 5 | +#################################### |
| 6 | + |
| 7 | +# Importing libraries |
| 8 | +library(datasets) # Contains the Iris data set |
| 9 | +library(caret) # Package for machine learning algorithms / CARET stands for Classification And REgression Training |
| 10 | + |
| 11 | +# Importing the Iris data set |
| 12 | +data(iris) |
| 13 | + |
| 14 | +# Check to see if there are missing data? |
| 15 | +sum(is.na(iris)) |
| 16 | + |
| 17 | +# To achieve reproducible model; set the random seed number |
| 18 | +set.seed(100) |
| 19 | + |
| 20 | +# Performs stratified random split of the data set |
| 21 | +TrainingIndex <- createDataPartition(iris$Species, p=0.8, list = FALSE) |
| 22 | +TrainingSet <- iris[TrainingIndex,] # Training Set |
| 23 | +TestingSet <- iris[-TrainingIndex,] # Test Set |
| 24 | + |
| 25 | +# Compare scatter plot of the 80 and 20 data subsets |
| 26 | + |
| 27 | + |
| 28 | + |
| 29 | + |
| 30 | +############################### |
| 31 | +# SVM model (polynomial kernel) |
| 32 | + |
| 33 | +# Build Training model |
| 34 | +Model <- train(Species ~ ., data = TrainingSet, |
| 35 | + method = "svmPoly", |
| 36 | + na.action = na.omit, |
| 37 | + preProcess=c("scale","center"), |
| 38 | + trControl= trainControl(method="none"), |
| 39 | + tuneGrid = data.frame(degree=1,scale=1,C=1) |
| 40 | +) |
| 41 | + |
| 42 | +# Build CV model |
| 43 | +Model.cv <- train(Species ~ ., data = TrainingSet, |
| 44 | + method = "svmPoly", |
| 45 | + na.action = na.omit, |
| 46 | + preProcess=c("scale","center"), |
| 47 | + trControl= trainControl(method="cv", number=10), |
| 48 | + tuneGrid = data.frame(degree=1,scale=1,C=1) |
| 49 | +) |
| 50 | + |
| 51 | + |
| 52 | +# Apply model for prediction |
| 53 | +Model.training <-predict(Model, TrainingSet) # Apply model to make prediction on Training set |
| 54 | +Model.testing <-predict(Model, TestingSet) # Apply model to make prediction on Testing set |
| 55 | +Model.cv <-predict(Model.cv, TrainingSet) # Perform cross-validation |
| 56 | + |
| 57 | +# Model performance (Displays confusion matrix and statistics) |
| 58 | +Model.training.confusion <-confusionMatrix(Model.training, TrainingSet$Species) |
| 59 | +Model.testing.confusion <-confusionMatrix(Model.testing, TestingSet$Species) |
| 60 | +Model.cv.confusion <-confusionMatrix(Model.cv, TrainingSet$Species) |
| 61 | + |
| 62 | +print(Model.training.confusion) |
| 63 | +print(Model.testing.confusion) |
| 64 | +print(Model.cv.confusion) |
| 65 | + |
| 66 | +# Feature importance |
| 67 | +Importance <- varImp(Model) |
| 68 | +plot(Importance) |
| 69 | +plot(Importance, col = "red") |
0 commit comments