From c270ff0a9c831194ab6531bc1688061f5bf60856 Mon Sep 17 00:00:00 2001 From: briggan2 <hulk@hulk-gpu> Date: Tue, 27 Oct 2020 01:02:37 -0500 Subject: [PATCH] initial commit --- activations.py | 17 +++++ cnn.py | 178 +++++++++++++++++++++++++++++++++++++++++++++++++ cnn_helper.py | 76 +++++++++++++++++++++ cnnbp.py | 54 +++++++++++++++ cnnff.py | 37 ++++++++++ 5 files changed, 362 insertions(+) create mode 100755 activations.py create mode 100755 cnn.py create mode 100755 cnn_helper.py create mode 100755 cnnbp.py create mode 100755 cnnff.py diff --git a/activations.py b/activations.py new file mode 100755 index 0000000..fec8551 --- /dev/null +++ b/activations.py @@ -0,0 +1,17 @@ +import numpy as np + +def relu(x): + ''' Rectified Linear Unit (ReLU) ''' + return np.maximum(0., x) + +def drelu(x): + ''' derivative of ReLU ''' + y=np.zeros(x.shape) + y[np.where(x>0)]=1 + return y + +def softmax(x): + ''' softmax ''' + z = np.exp(x) + return z / np.sum(z,axis=-1,keepdims=True) + diff --git a/cnn.py b/cnn.py new file mode 100755 index 0000000..6592e5b --- /dev/null +++ b/cnn.py @@ -0,0 +1,178 @@ +import numpy as np +import argparse +import scipy.linalg +from cnn_helper import * +from cnnff import cnnff +from cnnbp import cnnbp +from cifar10 import cifar10 +import matplotlib.pyplot as plt + + +def cross_entropy_loss(labels, pred): + ''' cross_entropy_loss + + compute the average cross_entropy between the ground truth labels and predictions over a single batch + + inputs: + labels: ground truth labels (N dim. integer array) + pred: prediction from CNN (N x Nc numpy array, where Nc is the number of classes) + + outputs: + loss: average cross_entropy loss over batch (scalar) + ''' + + loss = -np.sum(labels * np.log(pred))/labels.shape[0] + + return loss + + +def accuracy(labels, pred, k=1): + ''' cross_entropy_loss + + compute the average cross_entropy between the ground truth labels and predictions over a single batch + + inputs: + labels: ground truth labels (N dim. integer array) + pred: prediction from CNN (N x Nc numpy array, where Nc is the number of classes) + + outputs: + loss: average cross_entropy loss over batch (scalar) + ''' + labels = np.argmax(labels, axis=-1) + pred = np.argmax(labels, axis=-1) + + return np.mean(pred == labels) + +def apply_gradients(net, lr=0.001): + ''' apply gradients + + performs gradient descent based updates + + inputs: + net: cnn network (which stores gradients) + lr: learning rate (default 0.001) + + output: + net: updated networ + ''' + + for n in range(1,len(net)): + layer = net[n] + if layer['type'] is 'Conv': + layer['W'] = layer['W'] - lr * layer['gradW'] + layer['b'] = layer['b'] - lr * layer['gradb'] + + return net + + +def trainCNN(X, Y, **args): + ''' trainCNN + + inputs: + X: images (n x 32 x 32 x 3 array) + Y: labels (n x 10 one_hot array) + args: + nepochs: number of epochs (defualt 100) + batch_size: batch_size (default 32) + lr: learning rate (default 0.001) + + returns: + L: loss per epoch + A: accuracy per epoch + + ''' + + # default parameters + if not len(args): + args['nepochs'] = 100 + args['bsize'] = 32 + args['lr'] = 0.001 + + nepochs = args['nepochs'] + bsize = args['bsize'] + lr = args['lr'] + + # define CNN + net = [ {'type': 'Input', 'output': None}, # Layer 0 + {'type': 'Conv', 'shape': (16, 5, 5, 3), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 1 + {'type': 'Pool', 'shape': (1, 2, 2, 1), 'stride': 2, 'activation': None, 'd': None, 'output': None}, # Layer 2 + {'type': 'Conv', 'shape': (32, 5, 5, 16), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 3 + {'type': 'Pool', 'shape': (1, 2, 2, 1), 'stride': 2, 'activation': None, 'd': None, 'output': None}, # Layer 4 + {'type': 'Conv', 'shape': (64, 5, 5, 32), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 5 + {'type': 'Conv', 'shape': (10, 1, 1, 64), 'stride': 1, 'activation': 'softmax', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}] # Layer 6 + + # initialize CNN + net = initCNN(net) + + for epoch in range(nepochs): + # shuffle images and labels + + # compute cross_entropy_loss and accuracy over all images + #*** feed foward + #*** loss + #*** accuracy + + # print loss and accuracy every epoch + print('epoch: ', epoch, 'loss: ', loss, 'acc: ', acc) + + # for each batch of images + for i in range(np.floor(X.shape[0] / bsize)): + # batch i + + # feedfoward + + # backprop + + # apply / update gradients + + + return L, A + + +if __name__ == '__main__': + # command line arg parser + parser = argparse.ArgumentParser(description='Perform ICA on cifar-10') + parser.add_argument('-b', + '--batch', + required=False, + default=['data_batch_1'], + nargs='+', + help="cifar10 file(s) data_batch_1, ..., data_batch_5, or test_batch") + parser.add_argument('--nepochs', + type=int, + required=False, + default=100, + help="maximum number of epochs") + parser.add_argument('--lr', + type=float, + required=False, + default=0.001, + help="learning rate") + parser.add_argument('--bsize', + type=float, + required=False, + default=32, + help="batch size") + args = parser.parse_args() + + # data batch, nepochs, learning rate, and batch size + batch = args.batch + nepochs = args.nepochs + lr = args.lr + bsize = args.bsize + + # load cifar10 data + images, labels = cifar10(batch=batch) + images = images / 255. # normalize + + # convert labels to one_hot vectors + labels = to_one_hot(labels) + + # reshape images and permute dimensions so that images is a N x H x W x C numpy array + images = images.reshape((-1,3,32,32)) + images = images.transpose((0,2,3,1)) + + # train CNN + # *** complete trainCNN function in cnn.py *** + L, A = trainCNN(images, labels, nepochs=nepochs, bsize=bsize, lr=lr) + diff --git a/cnn_helper.py b/cnn_helper.py new file mode 100755 index 0000000..30358ea --- /dev/null +++ b/cnn_helper.py @@ -0,0 +1,76 @@ +import numpy as np + +def init_conv_layer(shape): + ''' int_conv_layer + + inputs: + shape: numpy array indicating convolution filter # channels out (Cout), height (H), width (W), and # channels in (Cin) + + output: + W: convolutional filters (Cout x H x W x Cin numpy array) + b: bias vector (Cout dim. numpy array, i.e., one bias per H x W x Cin filter) + ''' + + # Xavier Uniform intialization + cin = np.prod(shape[1:]) + cout = shape[0] + s = np.sqrt(6. / (cin + cout)) + W = np.random.uniform(low=-s, high=s, size=shape) + + # bias initialization + b = np.zeros((cout,)) + + return W, b + +def initCNN(net): + ''' initCNN + + initializes parameters for each network layer + + inputs: + net: List structure describing the network architecture. + Each element of the list should be a dictionary provides the follwing information in the form of a dictionary key: + - 'type' (layer type; either 'Conv' or 'Pool') + - 'shape' (layer shape) + - 'stride' (layer stride) + - 'activation' (None, 'ReLu', 'Softmax') + - 'W' (layer weights, if applicable) + - 'b' (layer bias, if applicable) + - 'd' (local gradient) + - 'gradW' (layer weights, if applicable) + - 'gradb' (layer bias, if applicable) + - 'output' (layer output) + + + output: + net: initialized net (see above) + ''' + + for layer in net: + if layer['type'] is 'Conv': + layer['W'], layer['b'] = init_conv_layer(layer['shape']) + + return net + +def to_one_hot(x): + ''' to_one_hot + + convert integer labels to one_hot vectors + + inputs: + x: N dim. numpy array of integers + + outputs: one_hot matrix containing N one_hot vectors + + example: + given x = [0, 1, 3] + + y = [[1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 1.]] + ''' + + y = np.zeros((len(x), x.max()+1)) + y[np.arange(len(x)),x] = 1 + + return y \ No newline at end of file diff --git a/cnnbp.py b/cnnbp.py new file mode 100755 index 0000000..a9c2c0e --- /dev/null +++ b/cnnbp.py @@ -0,0 +1,54 @@ +from scipy.signal import correlate as conv +from activations import relu, drelu, softmax +import numpy as np + +def cnnbp(labels, net): + + # batch size + batch_size = net[0]['output'].shape[0] + + # local gradient final layer: + # derivative of softmax loss w.r.t presynaptic response + ''' *** put code here *** ''' + + # compute local gradients other layers + for n in range(len(net)-2,0,-1): + + # current layer + layer = net[n] + # next_layer + layer_ = net[n+1] + + # if next layer type is Conv + if layer_['type'] is 'Conv': + ''' *** put code here *** ''' + # if next layer type is Pool + elif layer_['type'] is 'Pool': + ''' *** put code here *** ''' + + # compute gradW and gradb for each layer + for n in range(1,len(net)): + # current + layer = net[n] + # prev + layer_ = net[n-1] + + if layer['type'] is 'Conv': + # compute gradient wrt convolutional filters + ''' *** put code here *** ''' + # compute gradient wrt biases + ''' *** put code here *** ''' + # save for gradient update (see cnn.py) + layer['gradW'] = gradW + layer['gradb'] = gradb + + return net + + + + + + + + + \ No newline at end of file diff --git a/cnnff.py b/cnnff.py new file mode 100755 index 0000000..2b04560 --- /dev/null +++ b/cnnff.py @@ -0,0 +1,37 @@ +from scipy.signal import correlate as conv +import numpy as np +from activations import relu, softmax + + +def cnnff(x, net): + ''' cnnff + + perform feed-forward pass for convolutional network + + inputs: + x: batch of input images (N x H x W x Cin numpy array) + net: List structure describing the network architecture (see cnn.py for details) + + outputs: + net: updated net data structure that stores outputs from each layer + ''' + + # set input layer + + + # loop over layers 1...L + for n in range(1,len(net)): + # current input + inp = net[n-1]['output'] + # current layer + layer = net[n] + + # if layer type is Conv + if layer['type'] is 'Conv': + # conv followed by activation function + ''' *** put code here *** ''' + # if layer type is Pool + elif layer['type'] is 'Pool': + ''' *** put code here *** ''' + + return net -- GitLab