Skip to content
Snippets Groups Projects
Commit c270ff0a authored by briggan2's avatar briggan2
Browse files

initial commit

parent a0cc4638
Branches
No related tags found
No related merge requests found
import numpy as np
def relu(x):
''' Rectified Linear Unit (ReLU) '''
return np.maximum(0., x)
def drelu(x):
''' derivative of ReLU '''
y=np.zeros(x.shape)
y[np.where(x>0)]=1
return y
def softmax(x):
''' softmax '''
z = np.exp(x)
return z / np.sum(z,axis=-1,keepdims=True)
cnn.py 0 → 100755
import numpy as np
import argparse
import scipy.linalg
from cnn_helper import *
from cnnff import cnnff
from cnnbp import cnnbp
from cifar10 import cifar10
import matplotlib.pyplot as plt
def cross_entropy_loss(labels, pred):
''' cross_entropy_loss
compute the average cross_entropy between the ground truth labels and predictions over a single batch
inputs:
labels: ground truth labels (N dim. integer array)
pred: prediction from CNN (N x Nc numpy array, where Nc is the number of classes)
outputs:
loss: average cross_entropy loss over batch (scalar)
'''
loss = -np.sum(labels * np.log(pred))/labels.shape[0]
return loss
def accuracy(labels, pred, k=1):
''' cross_entropy_loss
compute the average cross_entropy between the ground truth labels and predictions over a single batch
inputs:
labels: ground truth labels (N dim. integer array)
pred: prediction from CNN (N x Nc numpy array, where Nc is the number of classes)
outputs:
loss: average cross_entropy loss over batch (scalar)
'''
labels = np.argmax(labels, axis=-1)
pred = np.argmax(labels, axis=-1)
return np.mean(pred == labels)
def apply_gradients(net, lr=0.001):
''' apply gradients
performs gradient descent based updates
inputs:
net: cnn network (which stores gradients)
lr: learning rate (default 0.001)
output:
net: updated networ
'''
for n in range(1,len(net)):
layer = net[n]
if layer['type'] is 'Conv':
layer['W'] = layer['W'] - lr * layer['gradW']
layer['b'] = layer['b'] - lr * layer['gradb']
return net
def trainCNN(X, Y, **args):
''' trainCNN
inputs:
X: images (n x 32 x 32 x 3 array)
Y: labels (n x 10 one_hot array)
args:
nepochs: number of epochs (defualt 100)
batch_size: batch_size (default 32)
lr: learning rate (default 0.001)
returns:
L: loss per epoch
A: accuracy per epoch
'''
# default parameters
if not len(args):
args['nepochs'] = 100
args['bsize'] = 32
args['lr'] = 0.001
nepochs = args['nepochs']
bsize = args['bsize']
lr = args['lr']
# define CNN
net = [ {'type': 'Input', 'output': None}, # Layer 0
{'type': 'Conv', 'shape': (16, 5, 5, 3), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 1
{'type': 'Pool', 'shape': (1, 2, 2, 1), 'stride': 2, 'activation': None, 'd': None, 'output': None}, # Layer 2
{'type': 'Conv', 'shape': (32, 5, 5, 16), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 3
{'type': 'Pool', 'shape': (1, 2, 2, 1), 'stride': 2, 'activation': None, 'd': None, 'output': None}, # Layer 4
{'type': 'Conv', 'shape': (64, 5, 5, 32), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 5
{'type': 'Conv', 'shape': (10, 1, 1, 64), 'stride': 1, 'activation': 'softmax', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}] # Layer 6
# initialize CNN
net = initCNN(net)
for epoch in range(nepochs):
# shuffle images and labels
# compute cross_entropy_loss and accuracy over all images
#*** feed foward
#*** loss
#*** accuracy
# print loss and accuracy every epoch
print('epoch: ', epoch, 'loss: ', loss, 'acc: ', acc)
# for each batch of images
for i in range(np.floor(X.shape[0] / bsize)):
# batch i
# feedfoward
# backprop
# apply / update gradients
return L, A
if __name__ == '__main__':
# command line arg parser
parser = argparse.ArgumentParser(description='Perform ICA on cifar-10')
parser.add_argument('-b',
'--batch',
required=False,
default=['data_batch_1'],
nargs='+',
help="cifar10 file(s) data_batch_1, ..., data_batch_5, or test_batch")
parser.add_argument('--nepochs',
type=int,
required=False,
default=100,
help="maximum number of epochs")
parser.add_argument('--lr',
type=float,
required=False,
default=0.001,
help="learning rate")
parser.add_argument('--bsize',
type=float,
required=False,
default=32,
help="batch size")
args = parser.parse_args()
# data batch, nepochs, learning rate, and batch size
batch = args.batch
nepochs = args.nepochs
lr = args.lr
bsize = args.bsize
# load cifar10 data
images, labels = cifar10(batch=batch)
images = images / 255. # normalize
# convert labels to one_hot vectors
labels = to_one_hot(labels)
# reshape images and permute dimensions so that images is a N x H x W x C numpy array
images = images.reshape((-1,3,32,32))
images = images.transpose((0,2,3,1))
# train CNN
# *** complete trainCNN function in cnn.py ***
L, A = trainCNN(images, labels, nepochs=nepochs, bsize=bsize, lr=lr)
import numpy as np
def init_conv_layer(shape):
''' int_conv_layer
inputs:
shape: numpy array indicating convolution filter # channels out (Cout), height (H), width (W), and # channels in (Cin)
output:
W: convolutional filters (Cout x H x W x Cin numpy array)
b: bias vector (Cout dim. numpy array, i.e., one bias per H x W x Cin filter)
'''
# Xavier Uniform intialization
cin = np.prod(shape[1:])
cout = shape[0]
s = np.sqrt(6. / (cin + cout))
W = np.random.uniform(low=-s, high=s, size=shape)
# bias initialization
b = np.zeros((cout,))
return W, b
def initCNN(net):
''' initCNN
initializes parameters for each network layer
inputs:
net: List structure describing the network architecture.
Each element of the list should be a dictionary provides the follwing information in the form of a dictionary key:
- 'type' (layer type; either 'Conv' or 'Pool')
- 'shape' (layer shape)
- 'stride' (layer stride)
- 'activation' (None, 'ReLu', 'Softmax')
- 'W' (layer weights, if applicable)
- 'b' (layer bias, if applicable)
- 'd' (local gradient)
- 'gradW' (layer weights, if applicable)
- 'gradb' (layer bias, if applicable)
- 'output' (layer output)
output:
net: initialized net (see above)
'''
for layer in net:
if layer['type'] is 'Conv':
layer['W'], layer['b'] = init_conv_layer(layer['shape'])
return net
def to_one_hot(x):
''' to_one_hot
convert integer labels to one_hot vectors
inputs:
x: N dim. numpy array of integers
outputs: one_hot matrix containing N one_hot vectors
example:
given x = [0, 1, 3]
y = [[1., 0., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 0., 1.]]
'''
y = np.zeros((len(x), x.max()+1))
y[np.arange(len(x)),x] = 1
return y
\ No newline at end of file
cnnbp.py 0 → 100755
from scipy.signal import correlate as conv
from activations import relu, drelu, softmax
import numpy as np
def cnnbp(labels, net):
# batch size
batch_size = net[0]['output'].shape[0]
# local gradient final layer:
# derivative of softmax loss w.r.t presynaptic response
''' *** put code here *** '''
# compute local gradients other layers
for n in range(len(net)-2,0,-1):
# current layer
layer = net[n]
# next_layer
layer_ = net[n+1]
# if next layer type is Conv
if layer_['type'] is 'Conv':
''' *** put code here *** '''
# if next layer type is Pool
elif layer_['type'] is 'Pool':
''' *** put code here *** '''
# compute gradW and gradb for each layer
for n in range(1,len(net)):
# current
layer = net[n]
# prev
layer_ = net[n-1]
if layer['type'] is 'Conv':
# compute gradient wrt convolutional filters
''' *** put code here *** '''
# compute gradient wrt biases
''' *** put code here *** '''
# save for gradient update (see cnn.py)
layer['gradW'] = gradW
layer['gradb'] = gradb
return net
\ No newline at end of file
cnnff.py 0 → 100755
from scipy.signal import correlate as conv
import numpy as np
from activations import relu, softmax
def cnnff(x, net):
''' cnnff
perform feed-forward pass for convolutional network
inputs:
x: batch of input images (N x H x W x Cin numpy array)
net: List structure describing the network architecture (see cnn.py for details)
outputs:
net: updated net data structure that stores outputs from each layer
'''
# set input layer
# loop over layers 1...L
for n in range(1,len(net)):
# current input
inp = net[n-1]['output']
# current layer
layer = net[n]
# if layer type is Conv
if layer['type'] is 'Conv':
# conv followed by activation function
''' *** put code here *** '''
# if layer type is Pool
elif layer['type'] is 'Pool':
''' *** put code here *** '''
return net
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment