-
Notifications
You must be signed in to change notification settings - Fork 13
/
example4.py
165 lines (127 loc) · 4.89 KB
/
example4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#############################################################
### BASE LINE + Cross Entropy Loss + L1 Regularization ###
#############################################################
import numpy as np
import mnist_loader
### Data Loading
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
### Parameters
n_epoch = 30
learning_rate = 0.5
batch_size = 10
lamda = 5
### Network Architecture
n_node_input = 784
n_node_hidden = 30
n_node_output = 10
### Weight & Bias
W2=np.random.randn(n_node_hidden, n_node_input)
b2=np.random.randn(n_node_hidden, 1)
W3=np.random.randn(n_node_output, n_node_hidden)
b3=np.random.randn(n_node_output, 1)
### Activation Functions
def sigmoid(z):
"""The sigmoid function."""
return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z):
"""Derivative of the sigmoid function."""
return sigmoid(z)*(1-sigmoid(z))
### Training
test_errors = []
training_errors = []
n = len(training_data)
file_name_common = 'l1reg_ce'+'_nHidden'+str(n_node_hidden)+'.txt'
try:
training_errors = np.loadtxt(fname='tr_'+file_name_common)
test_errors = np.loadtxt(fname='test_'+file_name_common)
except:
for j in range(n_epoch):
## Stochastic Gradient Descent
np.random.shuffle(training_data)
# for each batch
sum_of_training_error = 0
for k in range(0, n, batch_size):
batch = training_data[k:k+batch_size]
# average gradient for samples in a batch
sum_gradient_b3 = 0
sum_gradient_b2 = 0
sum_gradient_W3 = 0
sum_gradient_W2 = 0
# for each sample
for x, y in batch:
## Feed forward
a1 = x
z2 = np.dot(W2, a1) + b2
a2 = sigmoid(z2)
z3 = np.dot(W3, a2) + b3
a3 = sigmoid(z3)
## Backpropagation
# Step 1: Error at the output layer [Cross-Entropy Cost]
delta_3 = (a3-y)
# Step 2: Error relationship between two adjacent layers
delta_2 = sigmoid_prime(z2)*np.dot(W3.transpose(), delta_3)
# Step 3: Gradient of C in terms of bias
gradient_b3 = delta_3
gradient_b2 = delta_2
# Step 4: Gradient of C in terms of weight
gradient_W3 = np.dot(delta_3, a2.transpose())
gradient_W2 = np.dot(delta_2, a1.transpose())
# update gradients
sum_gradient_b3 += gradient_b3
sum_gradient_b2 += gradient_b2
sum_gradient_W3 += gradient_W3
sum_gradient_W2 += gradient_W2
## Training Error
sum_of_training_error += int(np.argmax(a3) != np.argmax(y))
# Update Biases
b3 -= learning_rate * sum_gradient_b3 / batch_size
b2 -= learning_rate * sum_gradient_b2 / batch_size
# Update Weights
# L1 regularization
W3 -= (learning_rate * lamda / n)*np.sign(W3)
W2 -= (learning_rate * lamda / n)*np.sign(W2)
# update
W3 -= learning_rate * sum_gradient_W3 / batch_size
W2 -= learning_rate * sum_gradient_W2 / batch_size
# Report Training Error
print("[TRAIN_ERROR] Epoch %02d: %5d / %05d" % (j, sum_of_training_error, n))
training_errors.append(np.float(sum_of_training_error) / n)
### Test
n_test = len(test_data)
sum_of_test_error = 0
for x, y in test_data:
## Feed forward
a1 = x
z2 = np.dot(W2, a1) + b2
a2 = sigmoid(z2)
z3 = np.dot(W3, a2) + b3
a3 = sigmoid(z3)
## Test Error
# in test data, label info is a number not one-hot vector as in training data
sum_of_test_error += int(np.argmax(a3) != y)
# Report Test Error
print("[ TEST_ERROR] Epoch %02d: %5d / %05d" % (j, sum_of_test_error, n_test))
test_errors.append(np.float(sum_of_test_error)/n_test)
## Save Results
np.savetxt('tr_'+file_name_common, np.array(training_errors), fmt='%.5f')
np.savetxt('test_'+file_name_common, np.array(test_errors), fmt='%.5f')
### Plot results
import matplotlib.pyplot as plt
idx = np.arange(1,n_epoch+1)
plt.plot(idx, np.array(test_errors)*100,'ro-', label='with L1 regularization')
try:
# Load baseline
file_name_common = 'ce'+'_nHidden'+str(n_node_hidden)+'.txt'
mse = np.loadtxt(fname='test_'+file_name_common)
plt.plot(idx,np.array(mse)*100,'bo-', label='without L1 regularization')
except:
print ('There is no result of baseline')
legend = plt.legend(loc='upper center', shadow=True)
font = {'family' : 'normal',
'weight' : 'bold',
'size' : 15}
plt.rc('font', **font)
plt.xlabel('Epoch', fontsize=22)
plt.ylabel('Test error rate [%]', fontsize=22)
plt.grid(True)
plt.show()