diff --git a/activations.py b/activations.py
new file mode 100755
index 0000000000000000000000000000000000000000..fec855114b2c1877d5c664cbaaa55209b0c419d3
--- /dev/null
+++ b/activations.py
@@ -0,0 +1,17 @@
+import numpy as np
+
+def relu(x):
+    ''' Rectified Linear Unit (ReLU) '''
+    return np.maximum(0., x)
+    
+def drelu(x):
+    ''' derivative of ReLU '''
+    y=np.zeros(x.shape) 
+    y[np.where(x>0)]=1
+    return y 
+
+def softmax(x):    
+    ''' softmax '''
+    z = np.exp(x)
+    return z / np.sum(z,axis=-1,keepdims=True)
+
diff --git a/cnn.py b/cnn.py
new file mode 100755
index 0000000000000000000000000000000000000000..6592e5bb98c2cd37905978fb50b11e31f21279bd
--- /dev/null
+++ b/cnn.py
@@ -0,0 +1,178 @@
+import numpy as np
+import argparse
+import scipy.linalg
+from cnn_helper import *
+from cnnff import cnnff
+from cnnbp import cnnbp
+from cifar10 import cifar10
+import matplotlib.pyplot as plt
+
+
+def cross_entropy_loss(labels, pred):
+    ''' cross_entropy_loss
+        
+        compute the average cross_entropy between the ground truth labels and predictions over a single batch
+        
+        inputs:
+            labels: ground truth labels (N dim. integer array)
+            pred: prediction from CNN (N x Nc numpy array, where Nc is the number of classes)
+            
+        outputs: 
+            loss: average cross_entropy loss over batch (scalar)
+    '''
+        
+    loss = -np.sum(labels * np.log(pred))/labels.shape[0]
+    
+    return loss
+
+
+def accuracy(labels, pred, k=1):
+    ''' cross_entropy_loss
+        
+        compute the average cross_entropy between the ground truth labels and predictions over a single batch
+        
+        inputs:
+            labels: ground truth labels (N dim. integer array)
+            pred: prediction from CNN (N x Nc numpy array, where Nc is the number of classes)
+            
+        outputs: 
+            loss: average cross_entropy loss over batch (scalar)
+    '''
+    labels = np.argmax(labels, axis=-1)
+    pred = np.argmax(labels, axis=-1)
+    
+    return np.mean(pred == labels)
+    
+def apply_gradients(net, lr=0.001):
+    ''' apply gradients
+        
+        performs gradient descent based updates
+        
+        inputs:
+            net: cnn network (which stores gradients)
+            lr: learning rate (default 0.001)
+            
+        output:
+            net: updated networ
+    '''
+    
+    for n in range(1,len(net)):
+        layer = net[n]
+        if layer['type'] is 'Conv':
+            layer['W'] = layer['W'] - lr * layer['gradW']
+            layer['b'] = layer['b'] - lr * layer['gradb']
+    
+    return net
+
+
+def trainCNN(X, Y, **args):
+    ''' trainCNN
+    
+        inputs:
+            X: images (n x 32 x 32 x 3 array)
+            Y: labels (n x 10 one_hot array)
+            args:
+                nepochs: number of epochs (defualt 100)
+                batch_size: batch_size (default 32)
+                lr: learning rate (default 0.001)
+        
+        returns:
+            L: loss per epoch
+            A: accuracy per epoch 
+            
+    '''
+
+    # default parameters
+    if not len(args):
+        args['nepochs'] = 100
+        args['bsize'] = 32
+        args['lr'] = 0.001
+    
+    nepochs = args['nepochs']
+    bsize = args['bsize']
+    lr = args['lr']
+
+    # define CNN
+    net = [ {'type': 'Input', 'output': None}, # Layer 0 
+            {'type': 'Conv', 'shape': (16, 5, 5, 3), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 1
+            {'type': 'Pool', 'shape': (1, 2, 2, 1), 'stride': 2, 'activation': None, 'd': None, 'output': None}, # Layer 2
+            {'type': 'Conv', 'shape': (32, 5, 5, 16), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 3
+            {'type': 'Pool', 'shape': (1, 2, 2, 1), 'stride': 2, 'activation': None, 'd': None, 'output': None}, # Layer 4
+            {'type': 'Conv', 'shape': (64, 5, 5, 32), 'stride': 1, 'activation': 'ReLU', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}, # Layer 5
+            {'type': 'Conv', 'shape': (10, 1, 1, 64), 'stride': 1, 'activation': 'softmax', 'W': None, 'b': None, 'd': None, 'gradW': None, 'gradb': None, 'output': None}] # Layer 6
+    
+    # initialize CNN
+    net = initCNN(net)
+    
+    for epoch in range(nepochs):
+        # shuffle images and labels
+        
+        # compute cross_entropy_loss and accuracy over all images
+        #*** feed foward
+        #*** loss
+        #*** accuracy
+        
+        # print loss and accuracy every epoch
+        print('epoch: ', epoch, 'loss: ', loss, 'acc: ', acc)
+        
+        # for each batch of images
+        for i in range(np.floor(X.shape[0] / bsize)):
+            # batch i
+            
+            # feedfoward
+            
+            # backprop           
+            
+            # apply / update gradients
+            
+        
+    return L, A
+
+
+if __name__ == '__main__':
+    # command line arg parser
+    parser = argparse.ArgumentParser(description='Perform ICA on cifar-10')
+    parser.add_argument('-b',
+                        '--batch', 
+                        required=False,
+                        default=['data_batch_1'],
+                        nargs='+', 
+                        help="cifar10 file(s) data_batch_1, ..., data_batch_5, or test_batch")
+    parser.add_argument('--nepochs',
+                        type=int,
+                        required=False,
+                        default=100,
+                        help="maximum number of epochs")
+    parser.add_argument('--lr',
+                        type=float,
+                        required=False,
+                        default=0.001,
+                        help="learning rate")
+    parser.add_argument('--bsize',
+                        type=float,
+                        required=False,
+                        default=32,
+                        help="batch size")
+    args = parser.parse_args()
+    
+    # data batch, nepochs, learning rate, and batch size  
+    batch = args.batch
+    nepochs = args.nepochs
+    lr = args.lr
+    bsize = args.bsize
+    
+    # load cifar10 data
+    images, labels = cifar10(batch=batch)
+    images = images / 255. # normalize
+    
+    # convert labels to one_hot vectors
+    labels = to_one_hot(labels)
+    
+    # reshape images and permute dimensions so that images is a N x H x W x C numpy array
+    images = images.reshape((-1,3,32,32))
+    images = images.transpose((0,2,3,1))
+      
+    # train CNN 
+    # *** complete trainCNN function in cnn.py ***
+    L, A = trainCNN(images, labels, nepochs=nepochs, bsize=bsize, lr=lr)      
+    
diff --git a/cnn_helper.py b/cnn_helper.py
new file mode 100755
index 0000000000000000000000000000000000000000..30358ea1568e36423284a32b09f76cc849c2e3ad
--- /dev/null
+++ b/cnn_helper.py
@@ -0,0 +1,76 @@
+import numpy as np
+
+def init_conv_layer(shape):
+    ''' int_conv_layer
+
+        inputs: 
+            shape: numpy array indicating convolution filter # channels out (Cout), height (H), width (W), and # channels in (Cin)
+
+        output:
+            W: convolutional filters (Cout x H x W x Cin numpy array)
+            b: bias vector (Cout dim. numpy array, i.e., one bias per H x W x Cin filter)
+    '''
+
+    # Xavier Uniform intialization
+    cin = np.prod(shape[1:])
+    cout = shape[0]
+    s = np.sqrt(6. / (cin + cout))
+    W = np.random.uniform(low=-s, high=s, size=shape)
+
+    # bias initialization
+    b = np.zeros((cout,))
+
+    return W, b
+
+def initCNN(net):
+    ''' initCNN
+        
+        initializes parameters for each network layer
+
+        inputs:
+            net: List structure describing the network architecture.
+                 Each element of the list should be a dictionary provides the follwing information in the form of a dictionary key:
+                 - 'type' (layer type; either 'Conv' or 'Pool')
+                 - 'shape' (layer shape)
+                 - 'stride' (layer stride)
+                 - 'activation' (None, 'ReLu', 'Softmax')
+                 - 'W' (layer weights, if applicable)
+                 - 'b' (layer bias, if applicable)
+                 - 'd' (local gradient)
+                 - 'gradW' (layer weights, if applicable)
+                 - 'gradb' (layer bias, if applicable)
+                 - 'output' (layer output)
+
+
+        output: 
+            net: initialized net (see above)
+    '''
+
+    for layer in net:
+        if layer['type'] is 'Conv':
+            layer['W'], layer['b'] = init_conv_layer(layer['shape'])
+    
+    return net
+
+def to_one_hot(x):
+    ''' to_one_hot
+    
+        convert integer labels to one_hot vectors
+        
+        inputs: 
+            x: N dim. numpy array of integers
+            
+        outputs: one_hot matrix containing N one_hot vectors
+        
+        example: 
+            given x = [0, 1, 3]
+            
+            y = [[1., 0., 0., 0.],
+                 [0., 1., 0., 0.],
+                 [0., 0., 0., 1.]]
+    '''
+    
+    y = np.zeros((len(x), x.max()+1))
+    y[np.arange(len(x)),x] = 1
+    
+    return y
\ No newline at end of file
diff --git a/cnnbp.py b/cnnbp.py
new file mode 100755
index 0000000000000000000000000000000000000000..a9c2c0e3884bd93e50e9dc2eb5df777c6413c39d
--- /dev/null
+++ b/cnnbp.py
@@ -0,0 +1,54 @@
+from scipy.signal import correlate as conv
+from activations import relu, drelu, softmax
+import numpy as np
+
+def cnnbp(labels, net):
+    
+    # batch size
+    batch_size = net[0]['output'].shape[0]
+      
+    # local gradient final layer:
+    # derivative of softmax loss w.r.t presynaptic response
+    ''' *** put code here *** '''
+   
+    # compute local gradients other layers
+    for n in range(len(net)-2,0,-1):
+        
+        # current layer
+        layer = net[n] 
+        # next_layer
+        layer_ = net[n+1]
+        
+        # if next layer type is Conv
+        if layer_['type'] is 'Conv':
+            ''' *** put code here *** '''
+        # if next layer type is Pool
+        elif layer_['type'] is 'Pool':
+            ''' *** put code here *** '''
+    
+    # compute gradW and gradb for each layer
+    for n in range(1,len(net)):
+        # current
+        layer = net[n]
+        # prev
+        layer_ = net[n-1]
+        
+        if layer['type'] is 'Conv':        
+            # compute gradient wrt convolutional filters
+            ''' *** put code here *** '''
+            # compute gradient wrt biases           
+            ''' *** put code here *** '''
+            # save for gradient update (see cnn.py)
+            layer['gradW'] = gradW
+            layer['gradb'] = gradb 
+    
+    return net
+    
+            
+            
+            
+    
+    
+    
+    
+        
\ No newline at end of file
diff --git a/cnnff.py b/cnnff.py
new file mode 100755
index 0000000000000000000000000000000000000000..2b04560c456a4e2dbac5e9f8e35c56bafeca9921
--- /dev/null
+++ b/cnnff.py
@@ -0,0 +1,37 @@
+from scipy.signal import correlate as conv
+import numpy as np
+from activations import relu, softmax
+
+
+def cnnff(x, net):
+    ''' cnnff
+    
+        perform feed-forward pass for convolutional network
+        
+        inputs: 
+            x: batch of input images (N x H x W x Cin numpy array)
+            net: List structure describing the network architecture (see cnn.py for details)
+        
+        outputs:
+            net: updated net data structure that stores outputs from each layer
+    '''
+    
+    # set input layer
+    
+
+    # loop over layers 1...L
+    for n in range(1,len(net)):
+        # current input
+        inp = net[n-1]['output'] 
+        # current layer
+        layer = net[n] 
+
+        # if layer type is Conv
+        if layer['type'] is 'Conv':
+            # conv followed by activation function
+            ''' *** put code here *** '''    
+        # if layer type is Pool
+        elif layer['type'] is 'Pool':
+            ''' *** put code here *** '''
+    
+    return net