-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsoftmax.py
More file actions
96 lines (70 loc) · 2.32 KB
/
softmax.py
File metadata and controls
96 lines (70 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
from random import shuffle
def softmax_loss_naive(W, X, y, reg):
"""
Softmax loss function, naive implementation (with loops)
Inputs have dimension D, there are C classes, and we operate on minibatches
of N examples.
Inputs:
- W: A numpy array of shape (D, C) containing weights.
- X: A numpy array of shape (N, D) containing a minibatch of data.
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
that X[i] has label c, where 0 <= c < C.
- reg: (float) regularization strength
Returns a tuple of:
- loss as single float
- gradient with respect to weights W; an array of same shape as W
"""
# Initialize the loss and gradient to zero.
loss = 0.0
num_train = X.shape[0]
num_classes = W.shape[1]
num_features = W.shape[0]
dW = np.zeros_like(W.shape)
dLdW = np.zeros(W.shape)
dLdy = np.zeros((num_features, num_classes))
for i in range(0, num_train):
#per sample
stableP1 = X[i].dot(W)
stableP1 -= np.max(stableP1)
p2 = np.exp(stableP1)
allExp = np.sum(p2)
p3 = p2/allExp
p3_correct = p3[y[i]]
#print "X ", X.shape
#print "dLdW ", dLdW.shape
dLdW[: , y[i]] += -X[i]
loss += -np.log(p3_correct)
for j in range(0, num_classes):
dLdW[:, j] += p3[j]*X[i]
loss /= num_train
loss += 0.5 * reg* np.sum(W*W)
dW = dLdW/num_train + reg*W
return loss, dW
def softmax_loss_vectorized(W, X, y, reg):
"""
Softmax loss function, vectorized version.
Inputs and outputs are the same as softmax_loss_naive.
"""
# Initialize the loss and gradient to zero.
loss = 0.0
num_train = X.shape[0]
num_classes = W.shape[1]
dW = np.zeros(W.shape)
stableP1 = X.dot(W)
stableP1 -= np.max(stableP1, axis=1, keepdims=True) #TODO: don't blow up
p2 = np.exp(stableP1)
allExp = np.sum(p2, axis=1, keepdims=True)
p3 = p2/allExp
p3_correct = p3[np.arange(p3.shape[0]), y]
loss = -np.sum(np.log(p3_correct))
loss /= num_train
loss += 0.5 * reg*np.sum(W*W)
p3[np.arange(p3.shape[0]),y] -= 1
#print "dldy ", dLdy.shape
#print "X ", X.shape
#print "W ", W.shape
dLdW = (X.T).dot(p3)
#print "dLdW ", dLdW.shape
dW = dLdW/num_train + reg*W
return loss, dW