JingweiToo
diff --git a/‎Demo_KNN.py
Lines changed: 43 additions & 0 deletions b/‎Demo_KNN.py
Lines changed: 43 additions & 0 deletions
diff --git a/‎Demo_PSO.py
Lines changed: 61 additions & 0 deletions b/‎Demo_PSO.py
Lines changed: 61 additions & 0 deletions
diff --git a/‎ML/__init__.py
Lines changed: 1 addition & 0 deletions b/‎ML/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎ML/dt.py
Lines changed: 144 additions & 0 deletions b/‎ML/dt.py
Lines changed: 144 additions & 0 deletions
@@ -0,0 +1,43 @@
+import numpy as np
+import pandas as pd
+# change this to switch algorithm & types of validation (jho, jkfold, jloo)
+from ML.knn import jkfold 
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+# load data
+data  = pd.read_csv('ionosphere.csv')
+data  = data.values
+feat  = np.asarray(data[:, 0:-1])
+label = np.asarray(data[:, -1])
+
+# parameters
+k     = 5
+kfold = 10
+opts  = {'k':k, 'kfold':kfold}
+# KNN with k-fold
+mdl   = jkfold(feat, label, opts) 
+
+# overall accuracy
+accuracy = mdl['acc']
+
+# confusion matrix
+confmat  = mdl['con']
+print(confmat)
+
+# precision & recall
+result   = mdl['r']
+print(result)
+
+
+# plot confusion matrix
+uni     = np.unique(label)
+# Normalise
+con     = confmat.astype('float') / confmat.sum(axis=1)[:, np.newaxis]
+fig, ax = plt.subplots()
+sns.heatmap(con, annot=True, fmt='.2f', xticklabels=uni, yticklabels=uni, cmap="YlGnBu")
+plt.ylabel('Actual')
+plt.xlabel('Predicted')
+plt.title('KNN')
+plt.show()
@@ -0,0 +1,61 @@
+import numpy as np
+import pandas as pd
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.model_selection import train_test_split
+from FS.pso import jfs   # change this to switch algorithm 
+import matplotlib.pyplot as plt
+
+
+# load data
+data  = pd.read_csv('ionosphere.csv')
+data  = data.values
+feat  = np.asarray(data[:, 0:-1])
+label = np.asarray(data[:, -1])
+
+# split data into train & validation (70 -- 30)
+xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.3, stratify=label)
+fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}
+
+# parameter
+k    = 5     # k-value in KNN
+N    = 10    # number of particles
+T    = 100   # maximum number of iterations
+opts = {'k':k, 'fold':fold, 'N':N, 'T':T}
+
+# perform feature selection
+fmdl = jfs(feat, label, opts)
+sf   = fmdl['sf']
+
+# model with selected features
+num_train = np.size(xtrain, 0)
+num_valid = np.size(xtest, 0)
+x_train   = xtrain[:, sf]
+y_train   = ytrain.reshape(num_train)  # Solve bug
+x_valid   = xtest[:, sf]
+y_valid   = ytest.reshape(num_valid)  # Solve bug
+
+mdl       = KNeighborsClassifier(n_neighbors = k) 
+mdl.fit(x_train, y_train)
+
+# accuracy
+y_pred    = mdl.predict(x_valid)
+Acc       = np.sum(y_valid == y_pred)  / num_valid
+print("Accuracy:", 100 * Acc)
+
+# number of selected features
+num_feat = fmdl['nf']
+print("Feature Size:", num_feat)
+
+# plot convergence
+curve   = fmdl['c']
+curve   = curve.reshape(np.size(curve,1))
+x       = np.arange(0, opts['T'], 1.0) + 1.0
+
+fig, ax = plt.subplots()
+ax.plot(x, curve, 'o-')
+ax.set_xlabel('Number of Iterations')
+ax.set_ylabel('Fitness')
+ax.set_title('PSO')
+ax.grid()
+plt.show()
+
@@ -0,0 +1 @@
+#
@@ -0,0 +1,144 @@
+import numpy as np
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.model_selection import StratifiedKFold
+from sklearn.model_selection import LeaveOneOut
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import classification_report
+
+
+def jho(feat, label, opts):
+    ho = 0.3   # ratio of testing set
+    
+    if 'ho' in opts:
+        ho = opts['ho']
+    
+    # number of instances
+    num_data = np.size(feat, 0)
+    label    = label.reshape(num_data)  # Solve bug
+    
+    # prepare data
+    xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=ho, stratify=label) 
+    # train model
+    mdl     = DecisionTreeClassifier(criterion="gini")
+    mdl.fit(xtrain, ytrain)
+    
+    # prediction
+    ypred   = mdl.predict(xtest)
+    # confusion matric
+    uni     = np.unique(ytest)
+    confmat = confusion_matrix(ytest, ypred, labels=uni)
+    # report
+    report  = classification_report(ytest, ypred)
+    # accuracy
+    acc     = np.sum(ytest == ypred) / np.size(xtest,0)
+    
+    print("Accuracy (DT_HO):", 100 * acc)
+    
+    dt = {'acc': acc, 'con': confmat, 'r': report}
+    
+    return dt
+    
+
+def jkfold(feat, label, opts):
+    kfold = 10   # number of k in kfold
+    
+    if 'kfold' in opts:
+        kfold = opts['kfold']
+    
+    # number of instances
+    num_data = np.size(feat, 0)
+    # define selected features
+    x_data   = feat
+    y_data   = label.reshape(num_data)  # Solve bug
+    
+    fold     = StratifiedKFold(n_splits = kfold)
+    fold.get_n_splits(x_data, y_data)
+    
+    ytest2 = []
+    ypred2 = []
+    Afold2 = []
+    for train_idx, test_idx in fold.split(x_data, y_data):
+        xtrain  = x_data[train_idx,:] 
+        ytrain  = y_data[train_idx]
+        xtest   = x_data[test_idx,:]
+        ytest   = y_data[test_idx]
+        # train model
+        mdl     = DecisionTreeClassifier(criterion="gini")
+        mdl.fit(xtrain, ytrain)
+        # prediction
+        ypred   = mdl.predict(xtest)
+        # accuracy
+        Afold   = np.sum(ytest == ypred) / np.size(xtest,0)
+        
+        ytest2  = np.concatenate((ytest2, ytest), axis=0)
+        ypred2  = np.concatenate((ypred2, ypred), axis=0)
+        Afold2.append(Afold) 
+    
+    # average accuracy
+    Afold2  = np.array(Afold2)
+    acc     = np.mean(Afold2)
+    # confusion matric
+    uni     = np.unique(ytest2)
+    confmat = confusion_matrix(ytest2, ypred2, labels=uni)
+    # report
+    report  = classification_report(ytest2, ypred2)
+        
+    print("Accuracy (DT_K-fold):", 100 * acc)
+    
+    dt = {'acc': acc, 'con': confmat, 'r': report}
+    
+    return dt
+
+
+def jloo(feat, label, opts):
+    
+    # number of instances
+    num_data = np.size(feat, 0)
+    # define selected features
+    x_data   = feat
+    y_data   = label.reshape(num_data)  # Solve bug
+ 
+    loo      = LeaveOneOut()
+    loo.get_n_splits(x_data)
+    
+    ytest2 = []
+    ypred2 = []
+    Afold2 = []
+    for train_idx, test_idx in loo.split(x_data):
+        xtrain = x_data[train_idx,:] 
+        ytrain = y_data[train_idx]
+        xtest  = x_data[test_idx,:]
+        ytest  = y_data[test_idx]
+        # train model
+        mdl     = DecisionTreeClassifier(criterion="gini")
+        mdl.fit(xtrain, ytrain)
+        # prediction
+        ypred   = mdl.predict(xtest)
+        # accuracy
+        Afold   = np.sum(ytest == ypred) / np.size(xtest,0)
+        
+        ytest2  = np.concatenate((ytest2, ytest), axis=0)
+        ypred2  = np.concatenate((ypred2, ypred), axis=0)
+        Afold2.append(Afold) 
+    
+    # average accuracy
+    Afold2  = np.array(Afold2)
+    acc     = np.mean(Afold2)
+    # confusion matric
+    uni     = np.unique(ytest2)
+    confmat = confusion_matrix(ytest2, ypred2, labels=uni)
+    # report
+    report  = classification_report(ytest2, ypred2)
+        
+    print("Accuracy (DT_LOO):", 100 * acc)
+    
+    dt = {'acc': acc, 'con': confmat, 'r': report}
+    
+    return dt
+
+    
+    
+    
+    
+