Skip to content
Snippets Groups Projects
Select Git revision
  • b147b9bb49700870e6c47eb3558653b18a63188d
  • master default protected
2 results

nntoolbox.py

Blame
  • user avatar
    cedricnimpa authored
    a32f5eb4
    History
    nntoolbox.py 7.35 KiB
    # NNTOOLBOX.PY
    #
    # This file contains a collection of function for training
    # neural networks.
    #
    # date: July 28, 2019
    # author: Benjamin Riggan
    #
    import numpy as np
    import time
    
    def nnsetup(architecture):
    	nn = {} # define empty dictionary
    	nn['size'] = architecture
    	nn['n'] = len(nn['size'])
    	#print(nn['size'])
    	#print(nn['n'])
    
    	nn['activation_function'] = 'tanh_opt' # Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh).
    	nn['learning_rate'] = 2 # earning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs.
    	nn['momentum'] = 0.9 # Momentum
    	nn['scaling_learningRate'] = 1 # scaling factor for the learning rate (each epoch)
    	nn['weightPenaltyL2'] = 0.00001 # L2 regularization
    	nn['nonSparsityPenalty'] = 0 # non sparsity penalty
    	nn['sparsityTarget'] = 0.05 # sparsity target
    	nn['inputZeroMaskedFraction'] = 0 # used for denoising autoencoders
    	nn['dropoutFraction'] = 0 # dropout level
    	nn['testing'] =0
    	nn['output'] = 'sigm' # output unit 'sigm' (=logistic), 'softmax', and 'linear'
    	nn['W'] = [None] * (nn['n']-1) # empty list of weight matrices
    	nn['vW'] = [None] * (nn['n']-1) # empty list of momentum terms
    	nn['verbose'] = True # change to True if printed output is desired
    
    	for i in range(1,nn['n']):
    		# weights and weight momentum		
    		nn['W'][i-1] = ((np.random.rand(int(nn['size'][i]), int(nn['size'][i-1])+1) - 0.5) * 2.0 * np.sqrt(6.0 / (nn['size'][i] + nn['size'][i-1])))
    		nn['W'][i-1][:,0] = 0 # init bias to 0
    		nn['vW'][i-1] = np.zeros(nn['W'][i-1].shape)
    		
    		# average activations (for use with sparsity)
    		nn['p'] = np.zeros((1, int(nn['size'][i])))
    
    	return nn
    
    # stacked autoencoder setup
    def saesetup(architecture):
    	sae = {'n': len(architecture)-1} # number of autoencoders
    	for i in range(1,len(architecture)):
    		key = 'ae{}'.format(i-1)
    		sae[key] = nnsetup([architecture[i-1], architecture[i], architecture[i-1]])
    	return sae
    
    # nnff performs a feedforward pass
    def nnff(nn, x, y=None):
    	n = nn['n']
    	m = x.shape[0]
    
    	x = np.hstack((np.ones((m,1)), x))
    	nn['a'] = [None] * n
    	nn['a'][0] = x
    
    	if nn['dropoutFraction'] > 0:
    		nn['dropoutMask'] = [None] * n
    
    	# feed forward passnn['a'].append(np.dot(nn['a'][i-1] * nn['W'][i-1].T)
    	for i in range(1,n-1):
    		if nn['activation_function'] == 'plin':
    			nn['a'][i] = np.dot(nn['a'][i-1],nn['W'][i-1].T)
    		elif nn['activation_function'] == 'sigm':
    			nn['a'][i] = 1. / (1. + np.exp(- np.dot(nn['a'][i-1],nn['W'][i-1].T)))
    		elif nn['activation_function'] == 'tanh_opt':
    			nn['a'][i] = np.tanh(np.dot(nn['a'][i-1],nn['W'][i-1].T))
    		
    		# dropout
    		if nn['dropoutFraction'] > 0:
    			if nn['testing']:
    				nn['a'][i] = nn['a'][i] * (1 - nn['dropoutFraction'])
    			else:
    				nn['dropoutMask'][i] = np.random.rand(nn['a'][i].shape[0],nn['a'][i].shape[1])>nn['dropoutFraction']
    				nn['a'][i] = nn['a'][i] * nn['dropoutMask'][i]
    
    		# Add the bias term 
    		nn['a'][i] = np.hstack((np.ones((m,1)), nn['a'][i]))	
    
    	if nn['output'] == 'plin':
    		nn['a'][n-1] = np.dot(nn['a'][n-2],nn['W'][n-2].T)
    	elif nn['output'] == 'sigm':
    		nn['a'][n-1] = 1. / (1. + np.exp(-np.dot(nn['a'][n-2],nn['W'][n-2].T)))
    	elif nn['output'] == 'tanh_opt':
    		nn['a'][n-1] = np.tanh(np.dot(nn['a'][n-2],nn['W'][n-2].T))
    	
    	# error and loss
    	nn['e'] = y - nn['a'][n-1]
    	nn['L'] = 0.5 * np.sum(nn['e']**2) / m
    
    	return nn
    
    def nnbp(nn):
    	n = nn['n']
    	d = [None] * n
    
    	if nn['output'] == 'sigm':
    		d[n-1] = -nn['e'] * (nn['a'][n-1] * (1- nn['a'][n-1]))
    	elif nn['output'] == 'tanh_opt':
    		d[n-1] = - nn['e'] * (1-nn['a'][n-1]**2)
    	elif nn['output'] == 'plin':
    		d[n-1] = - nn['e']
    
    	for i in range(n-2,0,-1):
    		# derivative of activation function
    		if nn['activation_function'] == 'plin':
    			d_act = 1
    		elif nn['activation_function'] == 'sigm':
    			d_act = nn['a'][i] * (1 - nn['a'][i])
    		elif nn['activation_function'] == 'tanh_opt':
    			d_act = (1 - nn['a'][i]**2)
    		
    		# Backpropagate first derivative
    		if i+2 == n:
    			d[i] = np.dot(d[i+1], nn['W'][i]) * d_act
    		else:
    			d[i] = np.dot(d[i+1][:,1:], nn['W'][i]) * d_act 
    		
    		if nn['dropoutFraction'] > 0:
    			d[i] = d[i] * np.hstack((np.ones((d[i].shape[0], 1)), nn['dropoutMask'][i]))
    	nn['dW'] = [None] * n
    	for i in range(n-1):
    		if i+2 == n:
    			nn['dW'][i] = np.dot(d[i+1].T, nn['a'][i]) / d[i+1].shape[0]
    		else:
    			nn['dW'][i] = np.dot(d[i+1][:,1:].T, nn['a'][i]) / d[i+1].shape[0]
    
    	return nn
    
    # nnapplygrads updates weights and biases with calculated gradients
    def nnapplygrads(nn):
    	for i in range(0,nn['n']-1):
    		if nn['weightPenaltyL2']>0:
    			dW = nn['dW'][i] + nn['weightPenaltyL2'] * np.hstack((np.zeros((nn['W'][i].shape[0],1)), nn['W'][i][:,1:]))
    		else:
    			dW = nn['dW'][i]
    		
    		dW = nn['learning_rate'] * dW
    
    		if nn['momentum'] > 0:
    			nn['vW'][i] = nn['momentum'] * nn['vW'][i] + dW
    			dW = nn['vW'][i]
    		nn['W'][i] = nn['W'][i] - dW
    
    	return nn
    def nneval(nn, loss, train_x, train_y, val_x=None, val_y=None):
    	nn['testing'] = 1
    	nn = nnff(nn, train_x, train_y)
    	loss['train']['e'].append(nn['L'])
    
    	if val_x is not None and val_y is not None:
    		nn = nnff(nn, val_x, val_y)
    		loss['val']['e'].append(nn['L'])
    	nn['testing'] = 0
    
    	return loss
    
    def nntrain(nn, train_x, train_y, opts, val_x=None, val_y=None):
    	# trains a neural net
    	loss={'train': {'e': [], 'e_frac': []} , 'val': {'e': [], 'e_frac': []} } 
    	opts['validation'] = 0
    	if val_x is not None and val_y is not None:
    		opts['validation'] = 1
    
    	m = train_x.shape[0]
    	batchsize = opts['batchsize']
    	numepochs = opts['numepochs']
    	eta0 = nn['learning_rate']
    	numbatches = np.int(np.floor(m / batchsize))
    	if nn['verbose'] is True:
    		print('numbatches = {}'.format(numbatches))
    
    	L = np.zeros((numepochs*numbatches,1));
    	n = 0
    
    	for i in range(numepochs):
    		start = time.time()
    		kk = np.random.permutation(m)
    		#kk = np.arange(m)
    		for l in range(numbatches):
    			batch_x = train_x[ kk[l * batchsize : (l+1) * batchsize], :]
    			batch_y = train_y[ kk[l * batchsize : (l+1) * batchsize], :]
    			
    			#print('feed forward...')
    			nn = nnff(nn, batch_x, batch_y)
    			#print('backprop...')
    			nn = nnbp(nn)
    			#print('applygrads...')
    			nn = nnapplygrads(nn);
    
    			L[n] = nn['L']
    			n = n + 1
    		elapsed = time.time() - start
    		if opts['validation'] == 1:
    			loss = nneval(nn, loss, train_x, train_y, val_x, val_y)
    			str_perf = '; Full-batch train mse = {}, val mse = {}'.format(loss['train']['e'][-1], loss['val']['e'][-1])
    		else:
    			#print('eval...')
    			loss = nneval(nn, loss, train_x, train_y)
    			str_perf = '; Full-batch train mse = {}'.format(loss['train']['e'][-1])
    		if nn['verbose'] is True:
    			print('epoch {} / {}. Took {} seconds. Mini-batch mean squared error on training set is {} {}'.format(i+1,opts['numepochs'],elapsed,np.mean(L[n-numbatches:(n-1)]), str_perf))
    		if 'epsilon' in opts:
    			if opts['epsilon'] > 0 and i>0:
    				absdiff = np.abs(loss['train']['e'][-1] - loss['train']['e'][-2])
    				if absdiff < opts['epsilon']:
    					if nn['verbose'] is True:
    						print('Network converged: {}'.format(absdiff))
    	return nn
    
    def saetrain(sae, x, opts, val_x=None):
    	for i in range(sae['n']):
    		print('Training AE {} / {}'.format(i, sae['n']-1))
    		key = 'ae{}'.format(i)
    		if val_x is None:
    			sae[key] = nntrain(sae[key], x, x, opts)
    			t = nnff(sae[key], x, x)
    			x = t['a'][1]
    			# remove bias
    			x = x[:,1:]
    		else:
    			sae[key] = nntrain(sae[key], x, x, opts, val_x, val_x)
    			t = nnff(sae[key], x, x)
    			x = t['a'][1]
    			# remove bias
    			x = x[:,1:]
    			t = nnff(sae[key], val_x, val_x)
    			val_x = t['a'][1]
    			# remove bias
    			val_x = val_x[:,1:]
    	return sae