烦,摆烂
输入:28 x 28 x 1 图片
卷积层:8个 3 * 3的卷积核,得到的输出:26x26x8
池化层:4x4x8的最大池化层,得到的输出:13x13x8
将上诉13x13x8的数据展开成全连接层(展平),得到1352个全连接神经元
softmax层:输出10维数据,对应手写数字的10个分类结果
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262import numpy as np import pandas as pd class Conv3x3: def __init__(self, num_filters): self.num_filters = num_filters self.filters = np.random.randn(num_filters, 3, 3) / 9 def iterate_regions(self, image): h, w = image.shape for i in range(h - 2): for j in range(w - 2): im_region = image[i:(i + 3), j:(j + 3)] yield im_region, i, j def forward(self, input): # input 为 image,即输入数据 # output 为输出框架,默认都为 0,都为 1 也可以,反正后面会覆盖 # input: 28x28 # output: 26x26x8 h, w = input.shape output = np.zeros((h - 2, w - 2, self.num_filters)) self.last_input = input for im_region, i, j in self.iterate_regions(input): # 卷积运算,点乘再相加,ouput[i, j] 为向量,8 层 output[i, j] = np.sum(im_region * self.filters, axis=(1, 2)) # 最后将输出数据返回,便于下一层的输入使用 return output def backprop(self, d_L_d_out, learn_rate): # 初始化一组为 0 的 gradient,3x3x8 d_L_d_filters = np.zeros(self.filters.shape) # im_region,一个个 3x3 小矩阵 for im_region, i, j in self.iterate_regions(self.last_input): for f in range(self.num_filters): # 按 f 分层计算,一次算一层,然后累加起来 d_L_d_filters[f] += d_L_d_out[i, j, f] * im_region # 更新卷积核参数 self.filters -= learn_rate * d_L_d_filters return None class MaxPool2: # A Max Pooling layer using a pool size of 2. def iterate_regions(self, image): h, w, _ = image.shape new_h = h // 2 new_w = w // 2 for i in range(new_h): for j in range(new_w): im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)] yield im_region, i, j def forward(self, input): # input: 卷基层的输出,池化层的输入 h, w, num_filters = input.shape output = np.zeros((h // 2, w // 2, num_filters)) # 存储 池化层 的输入参数,26x26x8 self.last_input = input for im_region, i, j in self.iterate_regions(input): output[i, j] = np.amax(im_region, axis=(0, 1)) return output def backprop(self, d_L_d_out): ''' Performs a backward pass of the maxpool layer. Returns the loss gradient for this layer's inputs. - d_L_d_out is the loss gradient for this layer's outputs. ''' # 池化层输入数据,26x26x8,默认初始化为 0 d_L_d_input = np.zeros(self.last_input.shape) # 每一个 im_region 都是一个 3x3x8 的8层小矩阵 # 修改 max 的部分,首先查找 max for im_region, i, j in self.iterate_regions(self.last_input): h, w, f = im_region.shape # 获取 im_region 里面最大值的索引向量,一叠的感觉 amax = np.amax(im_region, axis=(0, 1)) # 遍历整个 im_region,对于传递下去的像素点,修改 gradient 为 loss 对 output 的gradient for i2 in range(h): for j2 in range(w): for f2 in range(f): # If this pixel was the max value, copy the gradient to it. if im_region[i2, j2, f2] == amax[f2]: d_L_d_input[i * 2 + i2, j * 2 + j2, f2] = d_L_d_out[i, j, f2] return d_L_d_input class Softmax: def __init__(self, input_len, nodes): # input_len: 输入层的节点个数,池化层输出拉平之后的 # nodes: 输出层的节点个数,本例中为 10 # 构建权重矩阵,初始化随机数,不能太大 self.weights = np.random.randn(input_len, nodes) / input_len self.biases = np.zeros(nodes) def forward(self, input): self.last_input_shape = input.shape input = input.flatten() self.last_input = input input_len, nodes = self.weights.shape totals = np.dot(input, self.weights) + self.biases self.last_totals = totals exp = np.exp(totals) return exp / np.sum(exp, axis=0) def backprop(self, d_L_d_out, learn_rate): for i, gradient in enumerate(d_L_d_out): if gradient == 0: continue # e^totals t_exp = np.exp(self.last_totals) # Sum of all e^totals S = np.sum(t_exp) # Gradients of out[i] against totals d_out_d_t = -t_exp[i] * t_exp / (S ** 2) d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2) # Gradients of totals against weights/biases/input d_t_d_w = self.last_input d_t_d_b = 1 d_t_d_inputs = self.weights # Gradients of loss against totals d_L_d_t = gradient * d_out_d_t # Gradients of loss against weights/biases/input d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis] d_L_d_b = d_L_d_t * d_t_d_b d_L_d_inputs = d_t_d_inputs @ d_L_d_t self.weights -= learn_rate * d_L_d_w self.biases -= learn_rate * d_L_d_b # 将矩阵从 1d 转为 3d # 1352 to 13x13x8 return d_L_d_inputs.reshape(self.last_input_shape) import mnist # 加载手写数字识别数据 train_images = mnist.train_images()[:1000] train_labels = mnist.train_labels()[:1000] test_images = mnist.test_images()[:1000] test_labels = mnist.test_labels()[:1000] conv = Conv3x3(8) # 28x28x1 -> 26x26x8 pool = MaxPool2() # 26x26x8 -> 13x13x8 softmax = Softmax(13 * 13 * 8, 10) # 13x13x8 -> 10 def forward(image, label): out = conv.forward((image / 255) - 0.5) out = pool.forward(out) out = softmax.forward(out) loss = -np.log(out[label]) acc = 1 if np.argmax(out) == label else 0 return out, loss, acc def train(im, label, lr=.005): out, loss, acc = forward(im, label) gradient = np.zeros(10) gradient[label] = -1 / out[label] gradient = softmax.backprop(gradient, lr) gradient = pool.backprop(gradient) gradient = conv.backprop(gradient, lr) return loss, acc print('开始训练') # Train the CNN for 3 epochs for epoch in range(5): print('--- Epoch %d ---' % (epoch + 1)) # 打乱数据的顺序 permutation = np.random.permutation(len(train_images)) train_images = train_images[permutation] train_labels = train_labels[permutation] # 开始训练 loss = 0 num_correct = 0 # i: index # im: image # label: label for i, (im, label) in enumerate(zip(train_images, train_labels)): if i > 0 and i % 100 == 99: print( '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' % (i + 1, loss / 100, num_correct) ) loss = 0 num_correct = 0 l, acc = train(im, label) loss += l num_correct += acc # Test the CNN print('n--- Testing the CNN ---') loss = 0 num_correct = 0 for im, label in zip(test_images, test_labels): _, l, acc = forward(im, label) loss += l num_correct += acc num_tests = len(test_images) print('Test Loss:', loss / num_tests) print('Test Accuracy:', num_correct / num_tests)
参考地址:【455】Python 徒手实现 卷积神经网络 CNN - McDelfino - 博客园 (cnblogs.com)
最后
以上就是欣喜小丸子最近收集整理的关于手搓卷积神经网络+mnist手写数字识别的全部内容,更多相关手搓卷积神经网络+mnist手写数字识别内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复