Skip to content

Commit c432db5

Browse files
authored
Add files via upload
1 parent 85ed834 commit c432db5

File tree

10 files changed

+1354
-0
lines changed

10 files changed

+1354
-0
lines changed

Demo_KNN.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import numpy as np
2+
import pandas as pd
3+
# change this to switch algorithm & types of validation (jho, jkfold, jloo)
4+
from ML.knn import jkfold
5+
import matplotlib.pyplot as plt
6+
import seaborn as sns
7+
8+
9+
# load data
10+
data = pd.read_csv('ionosphere.csv')
11+
data = data.values
12+
feat = np.asarray(data[:, 0:-1])
13+
label = np.asarray(data[:, -1])
14+
15+
# parameters
16+
k = 5
17+
kfold = 10
18+
opts = {'k':k, 'kfold':kfold}
19+
# KNN with k-fold
20+
mdl = jkfold(feat, label, opts)
21+
22+
# overall accuracy
23+
accuracy = mdl['acc']
24+
25+
# confusion matrix
26+
confmat = mdl['con']
27+
print(confmat)
28+
29+
# precision & recall
30+
result = mdl['r']
31+
print(result)
32+
33+
34+
# plot confusion matrix
35+
uni = np.unique(label)
36+
# Normalise
37+
con = confmat.astype('float') / confmat.sum(axis=1)[:, np.newaxis]
38+
fig, ax = plt.subplots()
39+
sns.heatmap(con, annot=True, fmt='.2f', xticklabels=uni, yticklabels=uni, cmap="YlGnBu")
40+
plt.ylabel('Actual')
41+
plt.xlabel('Predicted')
42+
plt.title('KNN')
43+
plt.show()

Demo_PSO.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import numpy as np
2+
import pandas as pd
3+
from sklearn.neighbors import KNeighborsClassifier
4+
from sklearn.model_selection import train_test_split
5+
from FS.pso import jfs # change this to switch algorithm
6+
import matplotlib.pyplot as plt
7+
8+
9+
# load data
10+
data = pd.read_csv('ionosphere.csv')
11+
data = data.values
12+
feat = np.asarray(data[:, 0:-1])
13+
label = np.asarray(data[:, -1])
14+
15+
# split data into train & validation (70 -- 30)
16+
xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.3, stratify=label)
17+
fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}
18+
19+
# parameter
20+
k = 5 # k-value in KNN
21+
N = 10 # number of particles
22+
T = 100 # maximum number of iterations
23+
opts = {'k':k, 'fold':fold, 'N':N, 'T':T}
24+
25+
# perform feature selection
26+
fmdl = jfs(feat, label, opts)
27+
sf = fmdl['sf']
28+
29+
# model with selected features
30+
num_train = np.size(xtrain, 0)
31+
num_valid = np.size(xtest, 0)
32+
x_train = xtrain[:, sf]
33+
y_train = ytrain.reshape(num_train) # Solve bug
34+
x_valid = xtest[:, sf]
35+
y_valid = ytest.reshape(num_valid) # Solve bug
36+
37+
mdl = KNeighborsClassifier(n_neighbors = k)
38+
mdl.fit(x_train, y_train)
39+
40+
# accuracy
41+
y_pred = mdl.predict(x_valid)
42+
Acc = np.sum(y_valid == y_pred) / num_valid
43+
print("Accuracy:", 100 * Acc)
44+
45+
# number of selected features
46+
num_feat = fmdl['nf']
47+
print("Feature Size:", num_feat)
48+
49+
# plot convergence
50+
curve = fmdl['c']
51+
curve = curve.reshape(np.size(curve,1))
52+
x = np.arange(0, opts['T'], 1.0) + 1.0
53+
54+
fig, ax = plt.subplots()
55+
ax.plot(x, curve, 'o-')
56+
ax.set_xlabel('Number of Iterations')
57+
ax.set_ylabel('Fitness')
58+
ax.set_title('PSO')
59+
ax.grid()
60+
plt.show()
61+

ML/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#

ML/dt.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import numpy as np
2+
from sklearn.tree import DecisionTreeClassifier
3+
from sklearn.model_selection import train_test_split
4+
from sklearn.model_selection import StratifiedKFold
5+
from sklearn.model_selection import LeaveOneOut
6+
from sklearn.metrics import confusion_matrix
7+
from sklearn.metrics import classification_report
8+
9+
10+
def jho(feat, label, opts):
11+
ho = 0.3 # ratio of testing set
12+
13+
if 'ho' in opts:
14+
ho = opts['ho']
15+
16+
# number of instances
17+
num_data = np.size(feat, 0)
18+
label = label.reshape(num_data) # Solve bug
19+
20+
# prepare data
21+
xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=ho, stratify=label)
22+
# train model
23+
mdl = DecisionTreeClassifier(criterion="gini")
24+
mdl.fit(xtrain, ytrain)
25+
26+
# prediction
27+
ypred = mdl.predict(xtest)
28+
# confusion matric
29+
uni = np.unique(ytest)
30+
confmat = confusion_matrix(ytest, ypred, labels=uni)
31+
# report
32+
report = classification_report(ytest, ypred)
33+
# accuracy
34+
acc = np.sum(ytest == ypred) / np.size(xtest,0)
35+
36+
print("Accuracy (DT_HO):", 100 * acc)
37+
38+
dt = {'acc': acc, 'con': confmat, 'r': report}
39+
40+
return dt
41+
42+
43+
def jkfold(feat, label, opts):
44+
kfold = 10 # number of k in kfold
45+
46+
if 'kfold' in opts:
47+
kfold = opts['kfold']
48+
49+
# number of instances
50+
num_data = np.size(feat, 0)
51+
# define selected features
52+
x_data = feat
53+
y_data = label.reshape(num_data) # Solve bug
54+
55+
fold = StratifiedKFold(n_splits = kfold)
56+
fold.get_n_splits(x_data, y_data)
57+
58+
ytest2 = []
59+
ypred2 = []
60+
Afold2 = []
61+
for train_idx, test_idx in fold.split(x_data, y_data):
62+
xtrain = x_data[train_idx,:]
63+
ytrain = y_data[train_idx]
64+
xtest = x_data[test_idx,:]
65+
ytest = y_data[test_idx]
66+
# train model
67+
mdl = DecisionTreeClassifier(criterion="gini")
68+
mdl.fit(xtrain, ytrain)
69+
# prediction
70+
ypred = mdl.predict(xtest)
71+
# accuracy
72+
Afold = np.sum(ytest == ypred) / np.size(xtest,0)
73+
74+
ytest2 = np.concatenate((ytest2, ytest), axis=0)
75+
ypred2 = np.concatenate((ypred2, ypred), axis=0)
76+
Afold2.append(Afold)
77+
78+
# average accuracy
79+
Afold2 = np.array(Afold2)
80+
acc = np.mean(Afold2)
81+
# confusion matric
82+
uni = np.unique(ytest2)
83+
confmat = confusion_matrix(ytest2, ypred2, labels=uni)
84+
# report
85+
report = classification_report(ytest2, ypred2)
86+
87+
print("Accuracy (DT_K-fold):", 100 * acc)
88+
89+
dt = {'acc': acc, 'con': confmat, 'r': report}
90+
91+
return dt
92+
93+
94+
def jloo(feat, label, opts):
95+
96+
# number of instances
97+
num_data = np.size(feat, 0)
98+
# define selected features
99+
x_data = feat
100+
y_data = label.reshape(num_data) # Solve bug
101+
102+
loo = LeaveOneOut()
103+
loo.get_n_splits(x_data)
104+
105+
ytest2 = []
106+
ypred2 = []
107+
Afold2 = []
108+
for train_idx, test_idx in loo.split(x_data):
109+
xtrain = x_data[train_idx,:]
110+
ytrain = y_data[train_idx]
111+
xtest = x_data[test_idx,:]
112+
ytest = y_data[test_idx]
113+
# train model
114+
mdl = DecisionTreeClassifier(criterion="gini")
115+
mdl.fit(xtrain, ytrain)
116+
# prediction
117+
ypred = mdl.predict(xtest)
118+
# accuracy
119+
Afold = np.sum(ytest == ypred) / np.size(xtest,0)
120+
121+
ytest2 = np.concatenate((ytest2, ytest), axis=0)
122+
ypred2 = np.concatenate((ypred2, ypred), axis=0)
123+
Afold2.append(Afold)
124+
125+
# average accuracy
126+
Afold2 = np.array(Afold2)
127+
acc = np.mean(Afold2)
128+
# confusion matric
129+
uni = np.unique(ytest2)
130+
confmat = confusion_matrix(ytest2, ypred2, labels=uni)
131+
# report
132+
report = classification_report(ytest2, ypred2)
133+
134+
print("Accuracy (DT_LOO):", 100 * acc)
135+
136+
dt = {'acc': acc, 'con': confmat, 'r': report}
137+
138+
return dt
139+
140+
141+
142+
143+
144+

0 commit comments

Comments
 (0)