1
+ import warnings
2
+
1
3
import matplotlib .pyplot as plt
2
4
import numpy as np
3
-
4
- from utils import cmap
5
+ from numpy .random import choice
5
6
6
7
7
8
class WeakClassifier :
8
9
"""
9
- Function that models a WeakClassifier
10
+ Class that models a WeakClassifier
10
11
"""
11
-
12
12
def __init__ (self ):
13
-
14
- # initialize a few stuff
15
13
self ._dim = None
16
14
self ._threshold = None
17
15
self ._label_above_split = None
18
16
19
17
def fit (self , X : np .ndarray , Y : np .ndarray ):
20
18
21
- n , d = X .shape
22
- possible_labels = np .unique (Y )
23
-
24
- # select random feature (see np.random.choice)
25
- self ._dim = np .random .choice (a = range (0 , d ))
19
+ # Select random feature (see np.random.choice)
20
+ _ , n_feats = X .shape
21
+ self ._dim = choice (a = range (0 , n_feats ))
26
22
27
- # select random split (see np.random.uniform)
28
- M , m = np .max (X [:, self ._dim ]), np .min (X [:, self ._dim ])
29
- self ._threshold = np .random .uniform (low = m , high = M )
23
+ # Select random split threshold
24
+ feat_min = np .min (X [:, self ._dim ])
25
+ feat_max = np .max (X [:, self ._dim ])
26
+ self ._threshold = np .random .uniform (low = feat_min , high = feat_max )
30
27
31
- # select random verse (see np.random.choice)
32
- self ._label_above_split = np .random .choice (a = possible_labels )
28
+ # Select random verse
29
+ possible_labels = np .unique (Y )
30
+ self ._label_above_split = choice (a = possible_labels )
33
31
34
32
def predict (self , X : np .ndarray ):
35
-
36
- num_samples = X .shape [0 ]
37
- y_pred = np .zeros (shape = num_samples )
33
+ y_pred = np .zeros (shape = X .shape [0 ])
38
34
y_pred [X [:, self ._dim ] >= self ._threshold ] = self ._label_above_split
39
35
y_pred [X [:, self ._dim ] < self ._threshold ] = - 1 * self ._label_above_split
40
36
@@ -43,20 +39,17 @@ def predict(self, X: np.ndarray):
43
39
44
40
class AdaBoostClassifier :
45
41
"""
46
- Function that models a Adaboost classifier
42
+ Class encapsulating AdaBoost classifier
47
43
"""
48
-
49
44
def __init__ (self , n_learners : int , n_max_trials : int = 200 ):
50
45
"""
51
- Model constructor
46
+ Initialize an AdaBoost classifier.
52
47
53
48
Parameters
54
49
----------
55
50
n_learners: int
56
- number of weak classifiers.
51
+ Number of weak classifiers.
57
52
"""
58
-
59
- # initialize a few stuff
60
53
self .n_learners = n_learners
61
54
self .learners = []
62
55
self .alphas = np .zeros (shape = n_learners )
@@ -69,74 +62,68 @@ def fit(self, X: np.ndarray, Y: np.ndarray, verbose: bool = False):
69
62
70
63
Parameters
71
64
----------
72
- X: ndarray
73
- features having shape (n_samples, dim).
74
- Y: ndarray
75
- class labels having shape (n_samples,).
65
+ X: np. ndarray
66
+ Features having shape (n_samples, dim).
67
+ Y: np. ndarray
68
+ Class labels having shape (n_samples,).
76
69
verbose: bool
77
- whether or not to visualize the learning process.
78
- Default is False
70
+ Whether or not to visualize the learning process (default=False).
79
71
"""
80
72
81
- # some inits
82
- n , d = X .shape
83
- if d != 2 :
84
- verbose = False # only plot learning if 2 dimensional
73
+ n_examples , n_feats = X .shape
85
74
86
- possible_labels = np .unique (Y )
75
+ distinct_labels = len (np .unique (Y ))
76
+ if distinct_labels == 1 :
77
+ warnings .warn ('Fitting {} on a dataset with only one label.' .format (
78
+ self .__class__ .__name__ ))
79
+ elif distinct_labels > 2 :
80
+ raise NotImplementedError ('Only binary classification is supported.' )
87
81
88
- # only binary problems please
89
- assert possible_labels . size == 2 , 'Error: data is not binary'
82
+ # Initialize all examples with equal weights
83
+ weights = np . ones ( shape = n_examples ) / n_examples
90
84
91
- # initialize the sample weights as equally probable
92
- sample_weights = np .ones (shape = n ) / n
93
-
94
- # start training
85
+ # Train ensemble
95
86
for l in range (self .n_learners ):
96
-
97
- # choose the indexes of 'difficult' samples (np.random.choice)
98
- cur_idx = np .random .choice (a = range (0 , n ), size = n , replace = True , p = sample_weights )
99
-
100
- # extract 'difficult' samples
101
- cur_X = X [cur_idx ]
102
- cur_Y = Y [cur_idx ]
103
-
104
- # search for a weak classifier
105
- error = 1
87
+ # Perform a weighted re-sampling (with replacement) of the dataset
88
+ # to create a new dataset on which the current weak learner will
89
+ # be trained.
90
+ sampled_idxs = choice (a = range (0 , n_examples ), size = n_examples ,
91
+ replace = True , p = weights )
92
+ cur_X = X [sampled_idxs ]
93
+ cur_Y = Y [sampled_idxs ]
94
+
95
+ # Search for a weak classifier
106
96
n_trials = 0
107
- cur_wclass = None
108
- y_pred = None
109
-
97
+ error = 1.
110
98
while error > 0.5 :
99
+ weak_learner = WeakClassifier ()
100
+ weak_learner .fit (cur_X , cur_Y )
101
+ y_pred = weak_learner .predict (cur_X )
111
102
112
- cur_wclass = WeakClassifier ()
113
- cur_wclass .fit (cur_X , cur_Y )
114
- y_pred = cur_wclass .predict (cur_X )
115
-
116
- # compute error
117
- error = np .sum (sample_weights [cur_idx [cur_Y != y_pred ]])
103
+ # Compute current weak learner error
104
+ error = np .sum (weights [sampled_idxs [cur_Y != y_pred ]])
118
105
106
+ # Re-initialize sample weights if number of trials is exceeded
119
107
n_trials += 1
120
108
if n_trials > self .n_max_trials :
121
- # initialize the sample weights again
122
- sample_weights = np .ones (shape = n ) / n
109
+ weights = np .ones (shape = n_examples ) / n_examples
123
110
124
- # save weak learner parameter
111
+ # Store weak learner parameter
125
112
self .alphas [l ] = alpha = np .log ((1 - error ) / error ) / 2
126
113
127
- # append the weak classifier to the chain
128
- self .learners .append (cur_wclass )
114
+ # Append the weak classifier to the chain
115
+ self .learners .append (weak_learner )
129
116
130
- # update sample weights
131
- sample_weights [ cur_idx [cur_Y != y_pred ]] *= np .exp (alpha )
132
- sample_weights [ cur_idx [cur_Y == y_pred ]] *= np .exp (- alpha )
133
- sample_weights /= np .sum (sample_weights )
117
+ # Update examples weights
118
+ weights [ sampled_idxs [cur_Y != y_pred ]] *= np .exp (alpha )
119
+ weights [ sampled_idxs [cur_Y == y_pred ]] *= np .exp (- alpha )
120
+ weights /= np .sum (weights ) # re-normalize
134
121
135
- if verbose :
136
- self ._plot (cur_X , y_pred , sample_weights [cur_idx ],
122
+ # Possibly plot the predictions (if these are 2D)
123
+ if verbose and n_feats == 2 :
124
+ self ._plot (cur_X , y_pred , weights [sampled_idxs ],
137
125
self .learners [- 1 ], l )
138
126
139
-
140
127
def predict (self , X : np .ndarray ):
141
128
"""
142
129
Function to perform predictions over a set of samples.
@@ -167,10 +154,10 @@ def predict(self, X: np.ndarray):
167
154
168
155
return pred
169
156
170
- def _plot (self , X : np .ndarray , y_pred : np .ndarray , weights : np .ndarray ,
171
- learner : WeakClassifier , iteration : int ):
157
+ @staticmethod
158
+ def _plot (X : np .ndarray , y_pred : np .ndarray , weights : np .ndarray ,
159
+ learner : WeakClassifier , iteration : int , cmap : str = 'jet' ):
172
160
173
- # plot
174
161
plt .clf ()
175
162
plt .scatter (X [:, 0 ], X [:, 1 ], c = y_pred , s = weights * 50000 ,
176
163
cmap = cmap , edgecolors = 'k' )
0 commit comments