diff --git a/ProgrammingAssignment_0/GettingFamiliar_solution.ipynb b/ProgrammingAssignment_0/GettingFamiliar_solution.ipynb index 272d1d763113a77be1c7c5b1d0cec08c78537167..73b654683801b1f268cdb6f6c3578f9f941d107f 100644 --- a/ProgrammingAssignment_0/GettingFamiliar_solution.ipynb +++ b/ProgrammingAssignment_0/GettingFamiliar_solution.ipynb @@ -132,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -152,6 +152,9 @@ " # You might find np.genfromtxt useful for reading in the file. Be careful with the file delimiter, \n", " # e.g. for comma-separated files use delimiter=',' argument.\n", " \n", + " #TODO\n", + " \n", + " # read in features and labels\n", " features = np.genfromtxt(feature_file)\n", " labels = np.genfromtxt(label_file)\n", " \n", @@ -176,21 +179,23 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/home/zh/miniconda3/lib/python3.6/site-packages/ipykernel_launcher.py:17: UserWarning: genfromtxt: Empty input file: \"../data/madelon.data\"\n", - "/home/zh/miniconda3/lib/python3.6/site-packages/ipykernel_launcher.py:18: UserWarning: genfromtxt: Empty input file: \"../data/madelon.labels\"\n" + "Shape of features: (2000, 500)\n", + "Shape of labels: (2000,)\n" ] } ], "source": [ "features, labels = preprocess(feature_file = '../data/madelon.data', label_file = '../data/madelon.labels')\n", - "# TODO: Output the dimension of both features and labels." + "# TODO: Output the dimension of both features and labels.\n", + "print('Shape of features: {}'.format(features.shape))\n", + "print('Shape of labels: {}'.format(labels.shape))" ] }, { @@ -209,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -237,7 +242,18 @@ " \n", " # TODO\n", " \n", - " raise NotImplementedError\n", + " # number of test and validation examples\n", + " t_size = np.int(np.ceil(size*t))\n", + " v_size = np.int(np.ceil(size*v))\n", + "\n", + " # shuffle the indices\n", + " permuted = np.random.permutation(size)\n", + " \n", + " # spare the first t_size for test\n", + " test_indices = permuted[:t_size]\n", + " # and the next v_size for validation\n", + " val_indices = permuted[t_size+1:t_size+v_size+1]\n", + " \n", " \n", " return test_indices, val_indices" ] @@ -260,15 +276,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test size: 600, validation size: 200\n" + ] + } + ], "source": [ "# TODO\n", "# Pass the correct size argument (number of examples in the whole dataset)\n", - "test_indices, val_indices = partition(size=..., t = 0.3, v = 0.1)\n", + "test_indices, val_indices = partition(size = features.shape[0], t = 0.3, v = 0.1)\n", "\n", - "# Output the size of both features and labels." + "# Output the length of both features and labels.\n", + "print('Test size: {}, validation size: {}'.format(test_indices.shape[0], val_indices.shape[0]))" ] }, { @@ -287,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -342,15 +367,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 1200 data points in training partition.\n" + ] + } + ], "source": [ "# TODO\n", "# pass the correct arguments to preprocessor_f and partition_f\n", - "kwargs = {'t': 0.3, 'v': 0.1, 'feature_file': ..., 'label_file': ...}\n", - "my_model = Model(preprocessor_f=..., partition_f=..., **kwargs)\n", - "# Output size of the training partition" + "kwargs = {'t': 0.3, 'v': 0.1, 'feature_file': '../data/madelon.data', 'label_file': '../data/madelon.labels'}\n", + "my_model = Model(preprocessor_f=preprocess, partition_f=partition, **kwargs)\n", + "# Output size of the training partition\n", + "print('There are {} data points in training partition.'.format(my_model.train_size))" ] }, { @@ -384,29 +418,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "# TODO\n", "# args is a placeholder for the parameters of the function\n", "# Args and Returns are as in \"preprocess\"\n", - "def normalized_preprocess(args=...):\n", - " raise NotImplementedError" + "def normalized_preprocess(feature_file, label_file):\n", + " \n", + " # read in features\n", + " raw_features = np.genfromtxt(feature_file)\n", + " \n", + " # initialize an empty ndarray with the shape of raw_features\n", + " dims = raw_features.shape\n", + " features = np.empty(dims)\n", + " \n", + " # divide each column with the max value in it\n", + " for col in range(dims[1]):\n", + " col_values = raw_features[:, col]\n", + " max_val = max(col_values)\n", + " features[:, col] = col_values/max_val \n", + " \n", + " # read in labels\n", + " labels = np.genfromtxt(label_file)\n", + " \n", + " return features.T, labels" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Max value: 1.0, min value: 0.0\n" + ] + } + ], "source": [ "# TODO\n", "\n", - "kwargs = {'t': 0.3, 'v': 0.1, 'feature_file': ..., 'label_file': ...}\n", - "my_model = Model(preprocessor_f=..., partition_f=..., **kwargs)\n", + "kwargs = {'t': 0.3, 'v': 0.1, 'feature_file': '../data/madelon.data', 'label_file': '../data/madelon.labels'}\n", + "my_model = Model(preprocessor_f=normalized_preprocess, partition_f=partition, **kwargs)\n", + "# Check that the range of each feature in the training set is in range [0, 1]\n", "\n", - "# Check that the range of each feature in the training set is in range [0, 1]" + "print('Max value: {}, min value: {}'.format(my_model.features.max(), my_model.features.min()))" ] } ],