-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkNNClassifier.py
More file actions
71 lines (55 loc) · 2.48 KB
/
kNNClassifier.py
File metadata and controls
71 lines (55 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy
# Classifier for Nearest Neighbor Algorithm
class kNNClassifier:
def __init__(self, k):
self.k = k
pass
# Fits the data for the model
def fit(self, data, target):
return kNNModel(data, target, self.k)
# Model for the Nearest Neighbor Algorithm
class kNNModel:
def __init__(self, data, target, k):
self.data = data
self.target = target
self.k = k
# returns the predicted values from the given test and target values
def predict(self, test_data):
return self.kNearestNeighbors(test_data)
# returns an array of the nearest neighbors
def kNearestNeighbors(self, test_data):
# Gets the number of tests
nInputs = numpy.shape(test_data)[0]
# Creates an empty array of length nInputs, this will hold the closest values
closest = numpy.empty(nInputs, dtype=object)
# Used as a counter to let the user know the program is still running
num_computes = 0
for n in range(nInputs):
# Calculate Distance
distances = numpy.sum((self.data - test_data[n, :]) ** 2, axis=1)
# Every thousand computations the message "Computing..." will display
# so the user knows the program is still running.
num_computes += 1
if num_computes % 1000 == 0:
print("Computing...")
# Gets the indices of the sorted list of distances
indices = numpy.argsort(distances, axis=0)
# Get nearest neighbors within k distance, only looking at unique distances
classes = numpy.unique(self.target[indices[:self.k]])
# If there is only one neighbor closest, joins that group
if len(classes) == 1:
closest[n] = classes[0]
# Otherwise, figure out which of the closest neighbors
# appears most often and joins that group
else:
# Creates a dictionary with the key being the closest targets
counts = dict()
for class_value in classes:
counts[class_value] = 0
# Gives the closest targets a count based on how many are k close
for i in range(self.k):
counts[self.target[indices[i]]] += 1
# The closest target was found! Append it to the closest array
closest[n] = max(counts, key=counts.get)
# Returns the closest predicted values
return closest