diff --git a/activations.py b/activations.py
new file mode 100755
index 0000000000000000000000000000000000000000..fec855114b2c1877d5c664cbaaa55209b0c419d3
--- /dev/null
+++ b/activations.py
@@ -0,0 +1,17 @@
+import numpy as np
+
+def relu(x):
+ ''' Rectified Linear Unit (ReLU) '''
+ return np.maximum(0., x)
+
+def drelu(x):
+ ''' derivative of ReLU '''
+ y=np.zeros(x.shape)
+ y[np.where(x>0)]=1
+ return y
+
+def softmax(x):
+ ''' softmax '''
+ z = np.exp(x)
+ return z / np.sum(z,axis=-1,keepdims=True)
+
diff --git a/cnn.py b/cnn.py
new file mode 100755
index 0000000000000000000000000000000000000000..6592e5bb98c2cd37905978fb50b11e31f21279bd
--- /dev/null
+++ b/cnn.py
@@ -0,0 +1,178 @@
+import numpy as np
+import argparse
+import scipy.linalg
+from cnn_helper import *
+from cnnff import cnnff
+from cnnbp import cnnbp
+from cifar10 import cifar10
+import matplotlib.pyplot as plt
+
+
+def cross_entropy_loss(labels, pred):
+ ''' cross_entropy_loss
+
+ compute the average cross_entropy between the ground truth labels and predictions over a single batch
+
+ inputs:
+ labels: ground truth labels (N dim. integer array)
+ pred: prediction from CNN (N x Nc numpy array, where Nc is the number of classes)
+
+ outputs:
+ loss: average cross_entropy loss over batch (scalar)
+ '''
+
+ loss = -np.sum(labels * np.log(pred))/labels.shape[0]
+
+ return loss
+
+
+def accuracy(labels, pred, k=1):
+ ''' cross_entropy_loss
+
+ compute the average cross_entropy between the ground truth labels and predictions over a single batch
+
+ inputs:
+ labels: ground truth labels (N dim. integer array)
+ pred: prediction from CNN (N x Nc numpy array, where Nc is the number of classes)
+
+ outputs:
+ loss: average cross_entropy loss over batch (scalar)
+ '''
+ labels = np.argmax(labels, axis=-1)
+ pred = np.argmax(labels, axis=-1)
+
+ return np.mean(pred == labels)
+
+def apply_gradients(net, lr=0.001):
+ ''' apply gradients
+
+ performs gradient descent based updates
+
+ inputs:
+ net: cnn network (which stores gradients)
+ lr: learning rate (default 0.001)
+
+ output:
+ net: updated networ
+ '''
+
+ for n in range(1,len(net)):
+ layer = net[n]
+ if layer['type'] is 'Conv':
+ layer['W'] = layer['W'] - lr * layer['gradW']
+ layer['b'] = layer['b'] - lr * layer['gradb']
+
+ return net
+
+
+def trainCNN(X, Y, **args):
+ ''' trainCNN
+
+ inputs:
+ X: images (n x 32 x 32 x 3 array)
+ Y: labels (n x 10 one_hot array)
+ args:
+ nepochs: number of epochs (defualt 100)
+ batch_size: batch_size (default 32)
+ lr: learning rate (default 0.001)
+
+ returns:
+ L: loss per epoch
+ A: accuracy per epoch
+
+ '''
+
+ # default parameters
+ if not len(args):
+ args['nepochs'] = 100
+ args['bsize'] = 32
+ args['lr'] = 0.001
+
+ nepochs = args['nepochs']
+ bsize = args['bsize']
+ lr = args['lr']
+
+ # define CNN
+ net = [ {'type': 'Input', 'output': None}, # Layer 0
+ {'type': 'Conv', 'shape': (16, 5, 5, 3), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 1
+ {'type': 'Pool', 'shape': (1, 2, 2, 1), 'stride': 2, 'activation': None, 'd': None, 'output': None}, # Layer 2
+ {'type': 'Conv', 'shape': (32, 5, 5, 16), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 3
+ {'type': 'Pool', 'shape': (1, 2, 2, 1), 'stride': 2, 'activation': None, 'd': None, 'output': None}, # Layer 4
+ {'type': 'Conv', 'shape': (64, 5, 5, 32), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 5
+ {'type': 'Conv', 'shape': (10, 1, 1, 64), 'stride': 1, 'activation': 'softmax', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}] # Layer 6
+
+ # initialize CNN
+ net = initCNN(net)
+
+ for epoch in range(nepochs):
+ # shuffle images and labels
+
+ # compute cross_entropy_loss and accuracy over all images
+ #*** feed foward
+ #*** loss
+ #*** accuracy
+
+ # print loss and accuracy every epoch
+ print('epoch: ', epoch, 'loss: ', loss, 'acc: ', acc)
+
+ # for each batch of images
+ for i in range(np.floor(X.shape[0] / bsize)):
+ # batch i
+
+ # feedfoward
+
+ # backprop
+
+ # apply / update gradients
+
+
+ return L, A
+
+
+if __name__ == '__main__':
+ # command line arg parser
+ parser = argparse.ArgumentParser(description='Perform ICA on cifar-10')
+ parser.add_argument('-b',
+ '--batch',
+ required=False,
+ default=['data_batch_1'],
+ nargs='+',
+ help="cifar10 file(s) data_batch_1, ..., data_batch_5, or test_batch")
+ parser.add_argument('--nepochs',
+ type=int,
+ required=False,
+ default=100,
+ help="maximum number of epochs")
+ parser.add_argument('--lr',
+ type=float,
+ required=False,
+ default=0.001,
+ help="learning rate")
+ parser.add_argument('--bsize',
+ type=float,
+ required=False,
+ default=32,
+ help="batch size")
+ args = parser.parse_args()
+
+ # data batch, nepochs, learning rate, and batch size
+ batch = args.batch
+ nepochs = args.nepochs
+ lr = args.lr
+ bsize = args.bsize
+
+ # load cifar10 data
+ images, labels = cifar10(batch=batch)
+ images = images / 255. # normalize
+
+ # convert labels to one_hot vectors
+ labels = to_one_hot(labels)
+
+ # reshape images and permute dimensions so that images is a N x H x W x C numpy array
+ images = images.reshape((-1,3,32,32))
+ images = images.transpose((0,2,3,1))
+
+ # train CNN
+ # *** complete trainCNN function in cnn.py ***
+ L, A = trainCNN(images, labels, nepochs=nepochs, bsize=bsize, lr=lr)
+
diff --git a/cnn_helper.py b/cnn_helper.py
new file mode 100755
index 0000000000000000000000000000000000000000..30358ea1568e36423284a32b09f76cc849c2e3ad
--- /dev/null
+++ b/cnn_helper.py
@@ -0,0 +1,76 @@
+import numpy as np
+
+def init_conv_layer(shape):
+ ''' int_conv_layer
+
+ inputs:
+ shape: numpy array indicating convolution filter # channels out (Cout), height (H), width (W), and # channels in (Cin)
+
+ output:
+ W: convolutional filters (Cout x H x W x Cin numpy array)
+ b: bias vector (Cout dim. numpy array, i.e., one bias per H x W x Cin filter)
+ '''
+
+ # Xavier Uniform intialization
+ cin = np.prod(shape[1:])
+ cout = shape[0]
+ s = np.sqrt(6. / (cin + cout))
+ W = np.random.uniform(low=-s, high=s, size=shape)
+
+ # bias initialization
+ b = np.zeros((cout,))
+
+ return W, b
+
+def initCNN(net):
+ ''' initCNN
+
+ initializes parameters for each network layer
+
+ inputs:
+ net: List structure describing the network architecture.
+ Each element of the list should be a dictionary provides the follwing information in the form of a dictionary key:
+ - 'type' (layer type; either 'Conv' or 'Pool')
+ - 'shape' (layer shape)
+ - 'stride' (layer stride)
+ - 'activation' (None, 'ReLu', 'Softmax')
+ - 'W' (layer weights, if applicable)
+ - 'b' (layer bias, if applicable)
+ - 'd' (local gradient)
+ - 'gradW' (layer weights, if applicable)
+ - 'gradb' (layer bias, if applicable)
+ - 'output' (layer output)
+
+
+ output:
+ net: initialized net (see above)
+ '''
+
+ for layer in net:
+ if layer['type'] is 'Conv':
+ layer['W'], layer['b'] = init_conv_layer(layer['shape'])
+
+ return net
+
+def to_one_hot(x):
+ ''' to_one_hot
+
+ convert integer labels to one_hot vectors
+
+ inputs:
+ x: N dim. numpy array of integers
+
+ outputs: one_hot matrix containing N one_hot vectors
+
+ example:
+ given x = [0, 1, 3]
+
+ y = [[1., 0., 0., 0.],
+ [0., 1., 0., 0.],
+ [0., 0., 0., 1.]]
+ '''
+
+ y = np.zeros((len(x), x.max()+1))
+ y[np.arange(len(x)),x] = 1
+
+ return y
\ No newline at end of file
diff --git a/cnnbp.py b/cnnbp.py
new file mode 100755
index 0000000000000000000000000000000000000000..a9c2c0e3884bd93e50e9dc2eb5df777c6413c39d
--- /dev/null
+++ b/cnnbp.py
@@ -0,0 +1,54 @@
+from scipy.signal import correlate as conv
+from activations import relu, drelu, softmax
+import numpy as np
+
+def cnnbp(labels, net):
+
+ # batch size
+ batch_size = net[0]['output'].shape[0]
+
+ # local gradient final layer:
+ # derivative of softmax loss w.r.t presynaptic response
+ ''' *** put code here *** '''
+
+ # compute local gradients other layers
+ for n in range(len(net)-2,0,-1):
+
+ # current layer
+ layer = net[n]
+ # next_layer
+ layer_ = net[n+1]
+
+ # if next layer type is Conv
+ if layer_['type'] is 'Conv':
+ ''' *** put code here *** '''
+ # if next layer type is Pool
+ elif layer_['type'] is 'Pool':
+ ''' *** put code here *** '''
+
+ # compute gradW and gradb for each layer
+ for n in range(1,len(net)):
+ # current
+ layer = net[n]
+ # prev
+ layer_ = net[n-1]
+
+ if layer['type'] is 'Conv':
+ # compute gradient wrt convolutional filters
+ ''' *** put code here *** '''
+ # compute gradient wrt biases
+ ''' *** put code here *** '''
+ # save for gradient update (see cnn.py)
+ layer['gradW'] = gradW
+ layer['gradb'] = gradb
+
+ return net
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/cnnff.py b/cnnff.py
new file mode 100755
index 0000000000000000000000000000000000000000..2b04560c456a4e2dbac5e9f8e35c56bafeca9921
--- /dev/null
+++ b/cnnff.py
@@ -0,0 +1,37 @@
+from scipy.signal import correlate as conv
+import numpy as np
+from activations import relu, softmax
+
+
+def cnnff(x, net):
+ ''' cnnff
+
+ perform feed-forward pass for convolutional network
+
+ inputs:
+ x: batch of input images (N x H x W x Cin numpy array)
+ net: List structure describing the network architecture (see cnn.py for details)
+
+ outputs:
+ net: updated net data structure that stores outputs from each layer
+ '''
+
+ # set input layer
+
+
+ # loop over layers 1...L
+ for n in range(1,len(net)):
+ # current input
+ inp = net[n-1]['output']
+ # current layer
+ layer = net[n]
+
+ # if layer type is Conv
+ if layer['type'] is 'Conv':
+ # conv followed by activation function
+ ''' *** put code here *** '''
+ # if layer type is Pool
+ elif layer['type'] is 'Pool':
+ ''' *** put code here *** '''
+
+ return net