import numpy as np
from keras.utils import to_categorical
path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()
train_X = train_X.reshape(-1, 28,28,1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10).reshape(-1,10,1)
test_X = test_X.reshape(-1, 28,28,1)
test_X = test_X.astype('float32')
test_X /= 255
test_y = to_categorical(test_y, 10).reshape(-1,10,1)
def meanpool(Input):
R=Input.shape[0]
C=Input.shape[1]
D=Input.shape[2]
K=2
output=np.zeros((R//2,C//2,D),dtype="float32")
for i in range(output.shape[0]):
for j in range(output.shape[1]):
for d in range(output.shape[2]):
output[i,j,d]=np.sum(Input[2*i:2*i+K,2*j:2*j+K,d])/(K*K)
return output
def map_to_kernel(Input):
R=Input.shape[0]
C=Input.shape[1]
D=Input.shape[2]
output=np.zeros((D,R,C))
for n in range(D):
output[n,:,:]=Input[:,:,n]
return output
def conv(Input,W,b):
R=Input.shape[0]
C=Input.shape[1]
D=Input.shape[2]
N=W.shape[0]
K=W.shape[1]
output=np.zeros((R-K+1,C-K+1,N),dtype="float32")
for i in range(output.shape[0]):
for j in range(output.shape[1]):
for n in range(N):
output[i,j,n]=np.sum(Input[i:i+K,j:j+K,:]*W[n,:,:,:])+b[n,0]
return output
def relu(Input):
return np.maximum(Input,0)
def softmax(Input):
s=np.sum(np.exp(Input))
return np.exp(Input)/s
def drelu(Input):
return np.array(Input > 0, dtype=np.int)
def mat_sum(Input):
D=Input.shape[2]
output=np.zeros((D,1))
for d in range(D):
output[d,0]=np.sum(Input[:,:,d])
return output
def upsample(Input):
R=Input.shape[0]
C=Input.shape[1]
D=Input.shape[2]
output=np.zeros((2*R,2*C,D),dtype="float32")
for i in range(R):
for j in range(C):
for d in range(D):
output[2*i:2*i+2,2*j:2*j+2,d]=Input[i,j,d]/4*np.ones((2,2),dtype="float32")
return output
def rot180(Input):
N=Input.shape[0]
R=Input.shape[1]
C=Input.shape[2]
D=Input.shape[3]
output=np.zeros((D,R,C,N),dtype="float32")
for d in range(D):
for n in range(N):
for i in range(R):
for j in range(C):
output[d,i,j,n]=Input[n,R-i-1,C-j-1,d]
return output
def padding(Input,K):
R=Input.shape[0]
C=Input.shape[1]
D=Input.shape[2]
output=np.zeros((R+2*K-2,C+2*K-2,D),dtype="float32")
for i in range(R):
for j in range(C):
output[K-1+i,K-1+j,:]=Input[i,j,:]
return output
def back_conv(Input,delta):
r=Input.shape[0]
c=Input.shape[1]
d=Input.shape[2]
kx=delta.shape[0]
ky=delta.shape[1]
n=delta.shape[2]
output=np.zeros((n,r-kx+1,c-ky+1,d))
for nn in range(n):
for dd in range(d):
for i in range(output.shape[1]):
for j in range(output.shape[2]):
output[nn,i,j,dd]=np.sum(Input[i:i+kx,j:j+ky,dd]*delta[:,:,nn])
return output
def train_test(train_X,train_y,test_X,test_y,epoch,learn_rate):
Wc1=np.sqrt(2/(5*5*1))*np.random.randn(5,5,5,1)
bc1=np.zeros((5,1))
Wc2=np.sqrt(2/(3*3*5))*np.random.randn(5,3,3,5)
bc2=np.zeros((5,1))
Wf1=np.sqrt(2/(125+60))*np.random.randn(60,125)
bf1=np.zeros((60,1))
Wf2=np.sqrt(2/(60+10))*np.random.randn(10,60)
bf2=np.zeros((10,1))
print("TRAIN:")
for e in range(epoch):
loss=0
precision=0
print("Epoch {}".format(e+1))
for i in range(train_X.shape[0]):
x=train_X[i]
y=train_y[i]
conv1_z=conv(x,Wc1,bc1)
conv1_a=relu(conv1_z)
pool1_z=meanpool(conv1_a)
pool1_a=pool1_z
conv2_z=conv(pool1_a,Wc2,bc2)
conv2_a=relu(conv2_z)
pool2_z=meanpool(conv2_a)
pool2_a=pool2_z.reshape(-1,1)
fc1_z=np.dot(Wf1,pool2_a)+bf1
fc1_a=relu(fc1_z)
fc2_z=np.dot(Wf2,fc1_a)+bf2
fc2_a=softmax(fc2_z)
dfc2=fc2_a-y
dWf2=np.dot(dfc2,fc1_a.T)
dbf2=dfc2
dfc1=np.dot(Wf2.T,dfc2)*drelu(fc1_z)
dWf1=np.dot(dfc1,pool2_a.T)
dbf1=dfc1
dpool2=np.dot(Wf1.T,dfc1)
dpool2=dpool2.reshape(5,5,5)
dconv2=upsample(dpool2)*drelu(conv2_z)
dWc2=back_conv(pool1_a,dconv2)
dbc2=mat_sum(dconv2)
dpool1=conv(padding(dconv2,3),rot180(Wc2),np.zeros((5,1)))
dconv1=upsample(dpool1)*drelu(conv1_z)
dWc1=back_conv(x,dconv1)
dbc1=mat_sum(dconv1)
Wc1-=learn_rate*dWc1
bc1-=learn_rate*dbc1
Wc2-=learn_rate*dWc2
bc2-=learn_rate*dbc2
Wf1-=learn_rate*dWf1
bf1-=learn_rate*dbf1
Wf2-=learn_rate*dWf2
bf2-=learn_rate*dbf2
loss+=-np.sum(y*np.log(fc2_a))
precision+=np.equal(np.argmax(fc2_a),np.argmax(y))
if i%100==0:
print(loss/100)
print(precision/100)
loss=0
precision=0
loss=0
precision=0
for i in range(test_X.shape[0]):
x=test_X[i]
y=test_y[i]
conv1_z = conv(x, Wc1, bc1)
conv1_a = relu(conv1_z)
pool1_z = meanpool(conv1_a)
pool1_a = pool1_z
conv2_z = conv(pool1_a, Wc2, bc2)
conv2_a = relu(conv2_z)
pool2_z = meanpool(conv2_a)
pool2_a = pool2_z.reshape(-1, 1)
fc1_z = np.dot(Wf1, pool2_a) + bf1
fc1_a = relu(fc1_z)
fc2_z = np.dot(Wf2, fc1_a) + bf2
fc2_a = softmax(fc2_z)
loss+=-np.sum(y*np.log(fc2_a))
precision+=np.equal(np.argmax(fc2_a),np.argmax(y))
print("TEST:")
print(loss/test_X.shape[0])
print(precision/test_X.shape[0])
train_test(train_X,train_y,test_X,test_y,4,0.005)