tiny-nn/tiny_nn_torch.py at main · Tim2othy/tiny-nn · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import torch as to

from load_mnist import to_torch, x_test, x_train, y_test, y_train

x_test, x_train, y_test, y_train = to_torch(x_test, x_train, y_test, y_train)


def cross_entropy(y_true, y_pred):
    return -to.log(y_pred[0, to.argmax(y_true)])


def test_error_rate(y_true, y_pred):
    return to.argmax(y_true) != to.argmax(y_pred)

relu = lambda x: to.clamp(x, min=0)
relu_prime = lambda x: (x > 0)


def softmax(x):
    exp_values = to.exp(x - to.max(x, dim=1, keepdim=True)[0])
    return exp_values / to.sum(exp_values, dim=1, keepdim=True)


def backprop_fc(bias: to.Tensor, weights: to.Tensor, input: to.Tensor, output_error: to.Tensor):
    input_error = to.matmul(output_error, weights.T)
    weights -= lr * to.matmul(input.T, output_error)
    bias -= lr * to.sum(output_error, dim=0, keepdim=True)
    return input_error


def train():
    train_loss = 0
    for i in range(60000):
        """forward propagation"""
        pixels = x_train[i]
        label = y_train[i]

        output1 = relu(to.matmul(pixels, w1) + b1)
        output2 = relu(to.matmul(output1, w2) + b2)
        output3 = to.matmul(output2, w3) + b3
        prediction = softmax(output3)

        """backward propagation"""
        error_direct = prediction - label
        error3 = backprop_fc(b3, w3, output2, error_direct)
        error3 *= relu_prime(output2)
        error2 = backprop_fc(b2, w2, output1, error3)
        error2 *= relu_prime(output1)
        backprop_fc(b1, w1, pixels, error2)

        train_loss += cross_entropy(label, prediction)
        if (i + 1) % 7500 == 0:
            print(f"At {i + 1}/{60000} the error is {train_loss / 7500:.3f}")
            train_loss = 0


def test():
    test_loss = 0
    for i in range(10000):
        """forward propagation"""
        pixels = x_test[i]
        label = y_test[i]

        output1 = relu(to.matmul(pixels, w1) + b1)
        output2 = relu(to.matmul(output1, w2) + b2)
        prediction = to.matmul(output2, w3) + b3

        test_loss += test_error_rate(label, prediction)
    print(f"Test loss: {test_loss / 10000:.3f}")

lr = 0.02

w1 = to.rand(28 * 28, 100) - 0.5
b1 = to.rand(1, 100) - 0.5
w2 = to.rand(100, 50) - 0.5
b2 = to.rand(1, 50) - 0.5
w3 = to.rand(50, 10) - 0.5
b3 = to.rand(1, 10) - 0.5

train()
test()