-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlab_utils.py
74 lines (58 loc) · 2.13 KB
/
lab_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
__all__ = [
"create_random_data",
"plot_dataset",
]
def plot_dataset(X, y):
fig, ax = plt.subplots()
ax.scatter(X[y == -1][:,0], X[y == -1][:,1], alpha=0.5)
ax.scatter(X[y == 1][:,0], X[y == 1][:,1], alpha=0.5)
def _gen_linear_data(n_samples, noise_level):
fst_half = n_samples // 2
snd_half = n_samples - fst_half
Y = np.ones((n_samples, ))
Y[:fst_half] = -1
X1 = np.random.normal([5, 5], scale=[1*noise_level], size=(fst_half, 2))
X2 = np.random.normal([8, 5], scale=[1*noise_level], size=(snd_half, 2))
return np.concatenate((X1, X2), 0), Y
def _gen_moons(n_samples, noise_level):
X, Y = datasets.make_moons(n_samples=n_samples, shuffle=False, noise=noise_level)
Y[Y == 0] = -1
return X, Y
def _gen_circles(n_samples, noise_level):
X, Y = datasets.make_circles(n_samples=n_samples, shuffle=True, noise=noise_level)
Y[Y == 0] = -1
return X, Y
def create_random_data(n_samples, noise_level, dataset="linear", seed=0):
"""Generates a random dataset. Can generate 'linear', 'moons' or 'circles'.
Parameters
----------
n_samples
The total number of samples. These will be equally divided between positive
and negative samples.
noise_level
The amount of noise: higher noise -> harder problem. The meaning of the noise
is different for each dataset.
dataset
A string to specify the desired dataset. Can be 'linear', 'moons', 'circles'.
seed
Random seed for reproducibility.
Returns
-------
X
A 2D array of features
Y
A vector of targets (-1 or 1)
"""
np.random.seed(seed)
if dataset.lower() == "linear":
return _gen_linear_data(n_samples, noise_level)
elif dataset.lower() == "moons":
return _gen_moons(n_samples, noise_level)
elif dataset.lower() == "circles":
return _gen_circles(n_samples, noise_level)
else:
raise ValueError(("Dataset '%s' is not valid. Valid datasets are:"
" 'linear', 'moons', 'circles'") % (dataset))