diff --git a/ProgrammingAssignment_0/GettingFamiliar_solution.ipynb b/ProgrammingAssignment_0/GettingFamiliar_solution.ipynb index 25d9397e2fea0e0b2630973e837ad802c098f6b6..001ab5a51b5bbe33989c981a809d1ed92488382d 100644 --- a/ProgrammingAssignment_0/GettingFamiliar_solution.ipynb +++ b/ProgrammingAssignment_0/GettingFamiliar_solution.ipynb @@ -132,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -179,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 61, "metadata": {}, "outputs": [ { @@ -214,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 62, "metadata": {}, "outputs": [], "source": [ @@ -276,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 63, "metadata": {}, "outputs": [ { @@ -312,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 64, "metadata": {}, "outputs": [], "source": [ @@ -351,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 67, "metadata": {}, "outputs": [ { @@ -367,7 +367,7 @@ "# obtain features and labels from files\n", "features, labels = preprocess('../data/madelon.data', '../data/madelon.labels')\n", "# partition the data set\n", - "val_indices, test_indices, train_indices = partition(size, 0.3, 0.1)\n", + "val_indices, test_indices, train_indices = partition(features.shape[0], 0.3, 0.1)\n", "# pass the training features and labels to the fit method\n", "my_model.fit(features[train_indices], labels[train_indices])" ] diff --git a/ProgrammingAssignment_1/model.ipynb b/ProgrammingAssignment_1/model.ipynb index 62a4e1553f3b72b3fa3db9b91ef742f7e2a9ed8a..1613f4927df4dd15a3af4f181209ba28e70bc618 100644 --- a/ProgrammingAssignment_1/model.ipynb +++ b/ProgrammingAssignment_1/model.ipynb @@ -7,8 +7,8 @@ "# JUPYTER NOTEBOOK TIPS\n", "\n", "Each rectangular box is called a cell. \n", - "* ctrl+ENTER evaluates the current cell; if it contains Python code, it runs the code, if it contains Markdown, it returns rendered text.\n", - "* alt+ENTER evaluates the current cell and adds a new cell below it.\n", + "* Ctrl+ENTER evaluates the current cell; if it contains Python code, it runs the code, if it contains Markdown, it returns rendered text.\n", + "* Alt+ENTER evaluates the current cell and adds a new cell below it.\n", "* If you click to the left of a cell, you'll notice the frame changes color to blue. You can erase a cell by hitting 'dd' (that's two \"d\"s in a row) when the frame is blue." ] }, @@ -28,23 +28,6 @@ "outputs": [], "source": [ "class Model:\n", - " # preprocess_f and partition_f expect functions\n", - " # use kwargs to pass arguments to preprocessor_f and partition_f\n", - " # kwargs is a dictionary and should contain t, v, feature_file, label_file\n", - " # e.g. {'t': 0.3, 'v': 0.1, 'feature_file': 'some_file_name', 'label_file': 'some_file_name'}\n", - " \n", - " def __init__(self, preprocessor_f, partition_f, **kwargs):\n", - " \n", - " self.features, self.labels = preprocessor_f(kwargs['feature_file'], kwargs['label_file'])\n", - " self.size = len(self.labels) # number of examples in dataset \n", - " self.feat_dim = self.features.shape[1] # number of features\n", - " \n", - " self.val_indices, self.test_indices = partition_f(self.size, kwargs['t'], kwargs['v'])\n", - " self.val_size = len(self.val_indices)\n", - " self.test_size = len(self.test_indices)\n", - " \n", - " self.train_indices = np.delete(np.arange(self.size), np.append(self.test_indices, self.val_indices), 0)\n", - " self.train_size = len(self.train_indices)\n", " \n", " def fit(self):\n", " \n", @@ -102,7 +85,7 @@ " val_indices: ndarray\n", " 1D array containing validation set indices\n", " '''\n", - " \n", + " \n", " # number of test and validation examples\n", " t_size = np.int(np.ceil(size*t))\n", " v_size = np.int(np.ceil(size*v))\n", @@ -114,9 +97,9 @@ " test_indices = permuted[:t_size]\n", " # and the next v_size for validation\n", " val_indices = permuted[t_size+1:t_size+v_size+1]\n", + " train_indices = np.delete(np.arange(size), np.append(test_indices, val_indices), 0)\n", " \n", - " \n", - " return test_indices, val_indices" + " return test_indices, val_indices, train_indices" ] }, { @@ -207,7 +190,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "ROC curves are a good way to visualize sensitivity vs. 1-specificity for varying cut off points. Now, implement a \"ROC\" function that predicts the labels of the test set examples using different *threshold* values in \"predict\" and plot the ROC curve. \"ROC\" takes a list containing different *threshold* parameter values to try and returns two arrays; one where each entry is the sensitivity at a given threshold and the other where entries are 1-specificities." + "ROC curves are a good way to visualize sensitivity vs. 1-specificity for varying cut off points. \"ROC\" takes a list containing different *threshold* parameter values to try and returns two arrays; one where each entry is the sensitivity at a given threshold and the other where entries are 1-specificities." ] }, { @@ -218,12 +201,13 @@ "source": [ "# TODO: Programming Assignment 1\n", "\n", - "def ROC(model, indices, value_list):\n", + "def ROC(true_labels, preds, value_list):\n", " '''\n", " Args:\n", - " model: a fitted supervised learning model\n", - " indices: ndarray\n", - " 1D array containing indices\n", + " true_labels: ndarray\n", + " 1D array containing true labels\n", + " preds: ndarray\n", + " 1D array containing thresholded value (e.g. proportion of positive neighbors in kNN)\n", " value_list: ndarray\n", " 1D array containing different threshold values\n", " Returns:\n", @@ -233,7 +217,6 @@ " 1D array containing 1-specifities\n", " '''\n", " \n", - " # use predict method to obtain predicted labels at different threshold values\n", " # use conf_matrix to calculate tp, tn, fp, fn\n", " # calculate sensitivity, 1-specificity\n", " # return two arrays\n", diff --git a/ProgrammingAssignment_1/model_solution.ipynb b/ProgrammingAssignment_1/model_solution.ipynb index a4582f3c30f305943118e6756d059094d013902c..c2183591492e04492c23080d1f99d853075e7f74 100644 --- a/ProgrammingAssignment_1/model_solution.ipynb +++ b/ProgrammingAssignment_1/model_solution.ipynb @@ -7,8 +7,8 @@ "# JUPYTER NOTEBOOK TIPS\n", "\n", "Each rectangular box is called a cell. \n", - "* ctrl+ENTER evaluates the current cell; if it contains Python code, it runs the code, if it contains Markdown, it returns rendered text.\n", - "* alt+ENTER evaluates the current cell and adds a new cell below it.\n", + "* Ctrl+ENTER evaluates the current cell; if it contains Python code, it runs the code, if it contains Markdown, it returns rendered text.\n", + "* Alt+ENTER evaluates the current cell and adds a new cell below it.\n", "* If you click to the left of a cell, you'll notice the frame changes color to blue. You can erase a cell by hitting 'dd' (that's two \"d\"s in a row) when the frame is blue." ] }, @@ -131,7 +131,7 @@ " y: ndarray\n", " 1D array containing coordinates for a point\n", " metric: str\n", - " Euclidean, Hamming \n", + " Euclidean, Manhattan \n", " Returns:\n", " dist: float\n", " '''\n", @@ -206,7 +206,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "ROC curves are a good way to visualize sensitivity vs. 1-specificity for varying cut off points. Now, implement a \"ROC\" function that predicts the labels of the test set examples using different *threshold* values in \"predict\" and plot the ROC curve. \"ROC\" takes a list containing different *threshold* parameter values to try and returns two arrays; one where each entry is the sensitivity at a given threshold and the other where entries are 1-specificities." + "ROC curves are a good way to visualize sensitivity vs. 1-specificity for varying cut off points. \"ROC\" takes a list containing different *threshold* parameter values to try and returns two arrays; one where each entry is the sensitivity at a given threshold and the other where entries are 1-specificities." ] }, { @@ -233,7 +233,6 @@ " 1D array containing 1-specifities\n", " '''\n", " \n", - " # use predict method to obtain predicted labels at different threshold values\n", " # use conf_matrix to calculate tp, tn, fp, fn\n", " # calculate sensitivity, 1-specificity\n", " # return two arrays\n",