1 # coding:utf8 2 import numpy as np 3 import cPickle 4 import theano 5 import os 6 import theano.tensor as T 7 8 class SoftMax: 9 def __init__(self,MAXT=50,step=0.15,landa=0): 10 self.MAXT = MAXT 11 self.step = step 12 self.landa = landa #在此权重衰减项未能提升正确率 13 14 def load_theta(self,datapath): 15 self.theta = cPickle.load(open(datapath,'rb')) 16 17 def process_train(self,data,label,typenum,batch_size=500): 18 valuenum=data.shape[1] 19 batches = data.shape[0] / batch_size 20 data = theano.shared(np.asarray(data,dtype=theano.config.floatX)) 21 label = T.cast(theano.shared(np.asarray(label,dtype=theano.config.floatX)), 'int32') 22 x = T.matrix('x') 23 y = T.ivector('y') 24 index = T.lscalar() 25 theta = theano.shared(value=0.001*np.zeros((valuenum,typenum), 26 dtype=theano.config.floatX), 27 name='theta',borrow=True) 28 hx=T.nnet.softmax(T.dot(x,theta)) 29 cost = -T.mean(T.log(hx)[T.arange(y.shape[0]), y]) +0.5*self.landa*T.sum(theta ** 2) #权重衰减项 30 g_theta = T.grad(cost, theta) 31 updates = [(theta, theta - self.step * g_theta)] 32 train_model = theano.function( 33 inputs=[index],outputs=cost,updates=updates,givens={ 34 x: data[index * batch_size: (index + 1) * batch_size], 35 y: label[index * batch_size: (index + 1) * batch_size] 36 },allow_input_downcast=True 37 ) 38 lastcostJ = np.inf 39 stop = False 40 epoch = 0 41 costj=[] 42 while (epoch < self.MAXT) and (not stop): 43 epoch = epoch + 1 44 for minibatch_index in xrange(batches): 45 costj.append(train_model(minibatch_index)) 46 if np.mean(costj)>=lastcostJ: 47 print "costJ is increasing !!!" 48 stop=True 49 else: 50 lastcostJ=np.mean(costj) 51 print(( 'epoch %i, minibatch %i/%i,averange cost is %f') % 52 (epoch,minibatch_index + 1,batches,lastcostJ)) 53 self.theta=theta 54 if not os.path.exists('data/softmax.pkl'): 55 f= open("data/softmax.pkl",'wb') 56 cPickle.dump(self.theta.get_value(),f) 57 f.close() 58 return self.theta.get_value() 59 60 def process_test(self,data,label,batch_size=500): 61 batches = label.shape[0] / batch_size 62 data = theano.shared(np.asarray(data,dtype=theano.config.floatX)) 63 label = T.cast(theano.shared(np.asarray(label,dtype=theano.config.floatX)), 'int32') 64 x = T.matrix('x') 65 y = T.ivector('y') 66 index = T.lscalar() 67 hx=T.nnet.softmax(T.dot(x,self.theta)) 68 predict = T.argmax(hx, axis=1) 69 errors=T.mean(T.neq(predict, y)) 70 test_model = theano.function( 71 inputs=[index],outputs=errors,givens={ 72 x: data[index * batch_size: (index + 1) * batch_size], 73 y: label[index * batch_size: (index + 1) * batch_size] 74 },allow_input_downcast=True 75 ) 76 test_losses=[] 77 for minibatch_index in xrange(batches): 78 test_losses.append(test_model(minibatch_index)) 79 test_score = np.mean(test_losses) 80 print(( 'minibatch %i/%i, test error of model %f %%') % 81 (minibatch_index + 1,batches,test_score * 100.)) 82 83 def h(self,x): 84 m = np.exp(np.dot(x,self.theta)) 85 sump = np.sum(m,axis=1) 86 return m/sump 87 88 def predict(self,x): 89 return np.argmax(self.h(x),axis=1) 90 91 if __name__ == '__main__': 92 f = open('mnist.pkl', 'rb') 93 training_data, validation_data, test_data = cPickle.load(f) 94 training_inputs = [np.reshape(x, 784) for x in training_data[0]] 95 data = np.array(training_inputs) 96 training_inputs = [np.reshape(x, 784) for x in validation_data[0]] 97 vdata = np.array(training_inputs) 98 f.close() 99 softmax = SoftMax()100 softmax.process_train(data,training_data[1],10)101 softmax.process_test(vdata,validation_data[1])102 #minibatch 20/20, test error of model 7.530000 %