手搓卷积神经网络+mnist手写数字识别

124 阅读 0 评论 82 点赞

我是靠谱客的博主欣喜小丸子，这篇文章主要介绍手搓卷积神经网络+mnist手写数字识别，现在分享给大家，希望可以做个参考。

烦，摆烂

输入：28 x 28 x 1 图片

卷积层：8个 3 * 3的卷积核，得到的输出：26x26x8

池化层：4x4x8的最大池化层，得到的输出：13x13x8

将上诉13x13x8的数据展开成全连接层（展平），得到1352个全连接神经元

softmax层：输出10维数据，对应手写数字的10个分类结果

复制代码

import numpy as np
import pandas as pd

class Conv3x3:
    def __init__(self, num_filters):
        self.num_filters = num_filters

self.filters = np.random.randn(num_filters, 3, 3) / 9

def iterate_regions(self, image):

h, w = image.shape

for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + 3), j:(j + 3)]
                yield im_region, i, j

def forward(self, input):
        # input 为 image，即输入数据
        # output 为输出框架，默认都为 0，都为 1 也可以，反正后面会覆盖
        # input: 28x28
        # output: 26x26x8
        h, w = input.shape
        output = np.zeros((h - 2, w - 2, self.num_filters))

self.last_input = input

for im_region, i, j in self.iterate_regions(input):
            # 卷积运算，点乘再相加，ouput[i, j] 为向量，8 层
            output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))
        # 最后将输出数据返回，便于下一层的输入使用
        return output

def backprop(self, d_L_d_out, learn_rate):

# 初始化一组为 0 的 gradient，3x3x8
        d_L_d_filters = np.zeros(self.filters.shape)

# im_region，一个个 3x3 小矩阵
        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                # 按 f 分层计算，一次算一层，然后累加起来

d_L_d_filters[f] += d_L_d_out[i, j, f] * im_region

# 更新卷积核参数
        self.filters -= learn_rate * d_L_d_filters

return None

class MaxPool2:
    # A Max Pooling layer using a pool size of 2.

def iterate_regions(self, image):

h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2

for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                yield im_region, i, j

def forward(self, input):

# input: 卷基层的输出，池化层的输入
        h, w, num_filters = input.shape
        output = np.zeros((h // 2, w // 2, num_filters))
        # 存储 池化层 的输入参数，26x26x8
        self.last_input = input

for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.amax(im_region, axis=(0, 1))
        return output

def backprop(self, d_L_d_out):
        '''
        Performs a backward pass of the maxpool layer.
        Returns the loss gradient for this layer's inputs.
        - d_L_d_out is the loss gradient for this layer's outputs.
        '''
        # 池化层输入数据，26x26x8，默认初始化为 0
        d_L_d_input = np.zeros(self.last_input.shape)

# 每一个 im_region 都是一个 3x3x8 的8层小矩阵
        # 修改 max 的部分，首先查找 max
        for im_region, i, j in self.iterate_regions(self.last_input):
            h, w, f = im_region.shape
            # 获取 im_region 里面最大值的索引向量，一叠的感觉
            amax = np.amax(im_region, axis=(0, 1))

# 遍历整个 im_region，对于传递下去的像素点，修改 gradient 为 loss 对 output 的gradient
            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                        # If this pixel was the max value, copy the gradient to it.
                        if im_region[i2, j2, f2] == amax[f2]:
                            d_L_d_input[i * 2 + i2, j * 2 + j2, f2] = d_L_d_out[i, j, f2]

return d_L_d_input

class Softmax:

def __init__(self, input_len, nodes):

# input_len: 输入层的节点个数，池化层输出拉平之后的
        # nodes: 输出层的节点个数，本例中为 10
        # 构建权重矩阵，初始化随机数，不能太大
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.biases = np.zeros(nodes)

def forward(self, input):

self.last_input_shape = input.shape

input = input.flatten()

self.last_input = input

input_len, nodes = self.weights.shape

totals = np.dot(input, self.weights) + self.biases

self.last_totals = totals

exp = np.exp(totals)
        return exp / np.sum(exp, axis=0)

def backprop(self, d_L_d_out, learn_rate):

for i, gradient in enumerate(d_L_d_out):
            if gradient == 0:
                continue

# e^totals
            t_exp = np.exp(self.last_totals)

# Sum of all e^totals
            S = np.sum(t_exp)

# Gradients of out[i] against totals
            d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
            d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)

# Gradients of totals against weights/biases/input
            d_t_d_w = self.last_input
            d_t_d_b = 1
            d_t_d_inputs = self.weights

# Gradients of loss against totals
            d_L_d_t = gradient * d_out_d_t

# Gradients of loss against weights/biases/input
            d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
            d_L_d_b = d_L_d_t * d_t_d_b
            d_L_d_inputs = d_t_d_inputs @ d_L_d_t

self.weights -= learn_rate * d_L_d_w
            self.biases -= learn_rate * d_L_d_b
            # 将矩阵从 1d 转为 3d
            # 1352 to 13x13x8
            return d_L_d_inputs.reshape(self.last_input_shape)

import mnist
# 加载手写数字识别数据
train_images = mnist.train_images()[:1000]
train_labels = mnist.train_labels()[:1000]
test_images = mnist.test_images()[:1000]
test_labels = mnist.test_labels()[:1000]

conv = Conv3x3(8)  # 28x28x1 -> 26x26x8
pool = MaxPool2()  # 26x26x8 -> 13x13x8
softmax = Softmax(13 * 13 * 8, 10)  # 13x13x8 -> 10

def forward(image, label):

out = conv.forward((image / 255) - 0.5)
    out = pool.forward(out)
    out = softmax.forward(out)

loss = -np.log(out[label])
    acc = 1 if np.argmax(out) == label else 0

return out, loss, acc

def train(im, label, lr=.005):

out, loss, acc = forward(im, label)

gradient = np.zeros(10)
    gradient[label] = -1 / out[label]

gradient = softmax.backprop(gradient, lr)
    gradient = pool.backprop(gradient)
    gradient = conv.backprop(gradient, lr)

return loss, acc

print('开始训练')

# Train the CNN for 3 epochs
for epoch in range(5):
    print('--- Epoch %d ---' % (epoch + 1))

# 打乱数据的顺序
    permutation = np.random.permutation(len(train_images))
    train_images = train_images[permutation]
    train_labels = train_labels[permutation]

# 开始训练
    loss = 0
    num_correct = 0

# i: index
    # im: image
    # label: label
    for i, (im, label) in enumerate(zip(train_images, train_labels)):
        if i > 0 and i % 100 == 99:
            print(
                '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
                (i + 1, loss / 100, num_correct)
            )
            loss = 0
            num_correct = 0

l, acc = train(im, label)
        loss += l
        num_correct += acc

# Test the CNN
print('n--- Testing the CNN ---')
loss = 0
num_correct = 0
for im, label in zip(test_images, test_labels):
    _, l, acc = forward(im, label)
    loss += l
    num_correct += acc

num_tests = len(test_images)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import numpy as np
import pandas as pd


class Conv3x3:
    def __init__(self, num_filters):
        self.num_filters = num_filters

        self.filters = np.random.randn(num_filters, 3, 3) / 9

    def iterate_regions(self, image):

        h, w = image.shape

        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + 3), j:(j + 3)]
                yield im_region, i, j

    def forward(self, input):
        # input 为 image，即输入数据
        # output 为输出框架，默认都为 0，都为 1 也可以，反正后面会覆盖
        # input: 28x28
        # output: 26x26x8
        h, w = input.shape
        output = np.zeros((h - 2, w - 2, self.num_filters))

        self.last_input = input

        for im_region, i, j in self.iterate_regions(input):
            # 卷积运算，点乘再相加，ouput[i, j] 为向量，8 层
            output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))
        # 最后将输出数据返回，便于下一层的输入使用
        return output

    def backprop(self, d_L_d_out, learn_rate):

        # 初始化一组为 0 的 gradient，3x3x8
        d_L_d_filters = np.zeros(self.filters.shape)

        # im_region，一个个 3x3 小矩阵
        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                # 按 f 分层计算，一次算一层，然后累加起来

                d_L_d_filters[f] += d_L_d_out[i, j, f] * im_region

        # 更新卷积核参数
        self.filters -= learn_rate * d_L_d_filters

        return None


class MaxPool2:
    # A Max Pooling layer using a pool size of 2.

    def iterate_regions(self, image):

        h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2

        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                yield im_region, i, j

    def forward(self, input):

        # input: 卷基层的输出，池化层的输入
        h, w, num_filters = input.shape
        output = np.zeros((h // 2, w // 2, num_filters))
        # 存储 池化层 的输入参数，26x26x8
        self.last_input = input

        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.amax(im_region, axis=(0, 1))
        return output

    def backprop(self, d_L_d_out):
        '''
        Performs a backward pass of the maxpool layer.
        Returns the loss gradient for this layer's inputs.
        - d_L_d_out is the loss gradient for this layer's outputs.
        '''
        # 池化层输入数据，26x26x8，默认初始化为 0
        d_L_d_input = np.zeros(self.last_input.shape)

        # 每一个 im_region 都是一个 3x3x8 的8层小矩阵
        # 修改 max 的部分，首先查找 max
        for im_region, i, j in self.iterate_regions(self.last_input):
            h, w, f = im_region.shape
            # 获取 im_region 里面最大值的索引向量，一叠的感觉
            amax = np.amax(im_region, axis=(0, 1))

            # 遍历整个 im_region，对于传递下去的像素点，修改 gradient 为 loss 对 output 的gradient
            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                        # If this pixel was the max value, copy the gradient to it.
                        if im_region[i2, j2, f2] == amax[f2]:
                            d_L_d_input[i * 2 + i2, j * 2 + j2, f2] = d_L_d_out[i, j, f2]

        return d_L_d_input


class Softmax:


    def __init__(self, input_len, nodes):

        # input_len: 输入层的节点个数，池化层输出拉平之后的
        # nodes: 输出层的节点个数，本例中为 10
        # 构建权重矩阵，初始化随机数，不能太大
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.biases = np.zeros(nodes)

    def forward(self, input):

        self.last_input_shape = input.shape

        input = input.flatten()


        self.last_input = input

        input_len, nodes = self.weights.shape

        totals = np.dot(input, self.weights) + self.biases


        self.last_totals = totals

        exp = np.exp(totals)
        return exp / np.sum(exp, axis=0)


    def backprop(self, d_L_d_out, learn_rate):


        for i, gradient in enumerate(d_L_d_out):
            if gradient == 0:
                continue

            # e^totals
            t_exp = np.exp(self.last_totals)

            # Sum of all e^totals
            S = np.sum(t_exp)

            # Gradients of out[i] against totals
            d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
            d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)

            # Gradients of totals against weights/biases/input
            d_t_d_w = self.last_input
            d_t_d_b = 1
            d_t_d_inputs = self.weights

            # Gradients of loss against totals
            d_L_d_t = gradient * d_out_d_t

            # Gradients of loss against weights/biases/input
            d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
            d_L_d_b = d_L_d_t * d_t_d_b
            d_L_d_inputs = d_t_d_inputs @ d_L_d_t


            self.weights -= learn_rate * d_L_d_w
            self.biases -= learn_rate * d_L_d_b
            # 将矩阵从 1d 转为 3d
            # 1352 to 13x13x8
            return d_L_d_inputs.reshape(self.last_input_shape)


import mnist
# 加载手写数字识别数据
train_images = mnist.train_images()[:1000]
train_labels = mnist.train_labels()[:1000]
test_images = mnist.test_images()[:1000]
test_labels = mnist.test_labels()[:1000]


conv = Conv3x3(8)  # 28x28x1 -> 26x26x8
pool = MaxPool2()  # 26x26x8 -> 13x13x8
softmax = Softmax(13 * 13 * 8, 10)  # 13x13x8 -> 10


def forward(image, label):

    out = conv.forward((image / 255) - 0.5)
    out = pool.forward(out)
    out = softmax.forward(out)


    loss = -np.log(out[label])
    acc = 1 if np.argmax(out) == label else 0

    return out, loss, acc



def train(im, label, lr=.005):


    out, loss, acc = forward(im, label)


    gradient = np.zeros(10)
    gradient[label] = -1 / out[label]


    gradient = softmax.backprop(gradient, lr)
    gradient = pool.backprop(gradient)
    gradient = conv.backprop(gradient, lr)

    return loss, acc


print('开始训练')

# Train the CNN for 3 epochs
for epoch in range(5):
    print('--- Epoch %d ---' % (epoch + 1))

    # 打乱数据的顺序
    permutation = np.random.permutation(len(train_images))
    train_images = train_images[permutation]
    train_labels = train_labels[permutation]

    # 开始训练
    loss = 0
    num_correct = 0

    # i: index
    # im: image
    # label: label
    for i, (im, label) in enumerate(zip(train_images, train_labels)):
        if i > 0 and i % 100 == 99:
            print(
                '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
                (i + 1, loss / 100, num_correct)
            )
            loss = 0
            num_correct = 0

        l, acc = train(im, label)
        loss += l
        num_correct += acc

# Test the CNN
print('n--- Testing the CNN ---')
loss = 0
num_correct = 0
for im, label in zip(test_images, test_labels):
    _, l, acc = forward(im, label)
    loss += l
    num_correct += acc

num_tests = len(test_images)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)