Update model.ipynb

7da9b33a · Zeynep Hakguder · 5e2a680a · 7da9b33a
Commit 7da9b33a authored 7 years ago by Zeynep Hakguder
--- a/ProgrammingAssignment_1/model.ipynb
+++ b/ProgrammingAssignment_1/model.ipynb
@@ -58,9 +58,7 @@
    "    '''\n",
    "    \n",
    "    # read in features and labels\n",
-    "    features = np.genfromtxt(feature_file)\n",
-    "    labels = np.genfromtxt(label_file)\n",
-    "    \n",
    "    return features, labels"
   ]
  },
@@ -87,17 +85,6 @@
    "    '''\n",
    "    \n",
    "    # number of test and validation examples\n",
-    "    t_size = np.int(np.ceil(size*t))\n",
-    "    v_size = np.int(np.ceil(size*v))\n",
-    "\n",
-    "    # shuffle the indices\n",
-    "    permuted = np.random.permutation(size)\n",
-    "    \n",
-    "    # spare the first t_size for test\n",
-    "    test_indices =  permuted[:t_size]\n",
-    "    # and the next v_size for validation\n",
-    "    val_indices = permuted[t_size+1:t_size+v_size+1]\n",
-    "    train_indices = np.delete(np.arange(size), np.append(test_indices, val_indices), 0)\n",
    "    \n",
    "    return test_indices, val_indices, train_indices"
   ]

 %% Cell type:markdown id: tags:
 # JUPYTER NOTEBOOK TIPS
 Each rectangular box is called a cell.
 * Ctrl+ENTER evaluates the current cell; if it contains Python code, it runs the code, if it contains Markdown, it returns rendered text.
 * Alt+ENTER evaluates the current cell and adds a new cell below it.
 * If you click to the left of a cell, you'll notice the frame changes color to blue. You can erase a cell by hitting 'dd' (that's two "d"s in a row) when the frame is blue.
 %% Cell type:markdown id: tags:
 # Supervised Learning Model Skeleton
 We'll use this skeleton for implementing different supervised learning algorithms.
 %% Cell type:code id: tags:
 ``` python
 class Model:
    def fit(self):
        raise NotImplementedError
    def predict(self, test_points):
        raise NotImplementedError
 ```
 %% Cell type:code id: tags:
 ``` python
 def preprocess(feature_file, label_file):
    '''
    Args:
        feature_file: str
            file containing features
        label_file: str
            file containing labels
    Returns:
        features: ndarray
            nxd features
        labels: ndarray
            nx1 labels
    '''
    # read in features and labels
-    features = np.genfromtxt(feature_file)
-    labels = np.genfromtxt(label_file)
    return features, labels
 ```
 %% Cell type:code id: tags:
 ``` python
 def partition(size, t, v = 0):
    '''
    Args:
        size: int
            number of examples in the whole dataset
        t: float
            proportion kept for test
        v: float
            proportion kept for validation
    Returns:
        test_indices: ndarray
            1D array containing test set indices
        val_indices: ndarray
            1D array containing validation set indices
    '''
    # number of test and validation examples
-    t_size = np.int(np.ceil(size*t))
-    v_size = np.int(np.ceil(size*v))
-    # shuffle the indices
-    permuted = np.random.permutation(size)
-    # spare the first t_size for test
-    test_indices =  permuted[:t_size]
-    # and the next v_size for validation
-    val_indices = permuted[t_size+1:t_size+v_size+1]
-    train_indices = np.delete(np.arange(size), np.append(test_indices, val_indices), 0)
    return test_indices, val_indices, train_indices
 ```
 %% Cell type:markdown id: tags:
 ## TASK 1: Implement `distance` function
 %% Cell type:markdown id: tags:
 "distance" function will be used in calculating cost of *k*-NN. It should take two data points and the name of the metric and return a scalar value.
 %% Cell type:code id: tags:
 ``` python
 #TODO: Programming Assignment 1
 def distance(x, y, metric):
    '''
    Args:
        x: ndarray
            1D array containing coordinates for a point
        y: ndarray
            1D array containing coordinates for a point
        metric: str
            Euclidean, Manhattan
    Returns:
        dist: float
    '''
    if metric == 'Euclidean':
        raise NotImplementedError
    elif metric == 'Manhattan':
        raise NotImplementedError
    else:
        raise ValueError('{} is not a valid metric.'.format(metric))
    return dist # scalar distance btw x and y
 ```
 %% Cell type:markdown id: tags:
 ## General supervised learning performance related functions
 %% Cell type:markdown id: tags:
 Implement the "conf_matrix" function that takes as input an array of true labels (*true*) and an array of predicted labels (*pred*). It should output a numpy.ndarray.
 %% Cell type:code id: tags:
 ``` python
 # TODO: Programming Assignment 1
 def conf_matrix(true, pred, n_classes):
    '''
    Args:
        true:  ndarray
            nx1 array of true labels for test set
        pred: ndarray
            nx1 array of predicted labels for test set
        n_classes: int
    Returns:
        result: ndarray
            n_classes x n_classes array confusion matrix
    '''
    raise NotImplementedError
    result = np.ndarray([n_classes, n_classes])
    # returns the confusion matrix as numpy.ndarray
    return result
 ```
 %% Cell type:markdown id: tags:
 ROC curves are a good way to visualize sensitivity vs. 1-specificity for varying cut off points. "ROC" takes a list containing different *threshold* parameter values to try and returns two arrays; one where each entry is the sensitivity at a given threshold and the other where entries are 1-specificities.
 %% Cell type:code id: tags:
 ``` python
 # TODO: Programming Assignment 1
 def ROC(true_labels, preds, value_list):
    '''
    Args:
        true_labels: ndarray
            1D array containing true labels
        preds: ndarray
            1D array containing thresholded value (e.g. proportion of neighbors in kNN)
        value_list: ndarray
            1D array containing different threshold values
    Returns:
        sens: ndarray
            1D array containing sensitivities
        spec_: ndarray
            1D array containing 1-specifities
    '''
    # calculate sensitivity, 1-specificity
    # return two arrays
    raise NotImplementedError
    return sens, spec_
 ```