diff --git a/Final_project/cbrown_btripp.ipynb b/Final_project/cbrown_btripp.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ca4ecbb333c0850fa80586f4f7ee371943255aff --- /dev/null +++ b/Final_project/cbrown_btripp.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.svm import LinearSVC\n", + "from sklearn import preprocessing\n", + "from sklearn.decomposition import PCA, NMF\n", + "from sklearn.metrics import average_precision_score" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "soma_data = pd.read_csv('preop_soma_all.csv', header=None).T\n", + "np_data = np.array(soma_data)\n", + "soma_data.columns = np_data[2]\n", + "\n", + "#y = soma_data.iloc[13:len(soma_data),1]\n", + "y = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]\n", + "X = soma_data.iloc[13:soma_data.shape[0],2:soma_data.shape[1]]\n", + "X = preprocessing.normalize(X, norm='l2', axis=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/otu/btripp2/.conda/envs/py36/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", + " \"This module will be removed in 0.20.\", DeprecationWarning)\n" + ] + } + ], + "source": [ + "#partition data into test and train\n", + "from sklearn.cross_validation import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.25, random_state=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 0 0 1 1 1 0 0 0]\n", + "[1, 1, 0, 1, 1, 0, 0, 0, 0]\n", + "Score: 0.78\n", + "Average precision-recall score: 0.59\n" + ] + } + ], + "source": [ + "pipe = Pipeline([\n", + " ('reduce_dim', PCA()),\n", + " ('classify', LinearSVC())\n", + "])\n", + "\n", + "N_FEATURES_OPTIONS = [26] #[25, 50, 75, 100]\n", + "C_OPTIONS = [(n) for n in range(1,6)]\n", + "LOSS_OPTIONS = ['squared_hinge','hinge']\n", + "\n", + "\n", + "param_grid = [\n", + " {\n", + " 'reduce_dim': [PCA(iterated_power=\"auto\")],\n", + " 'reduce_dim__n_components': N_FEATURES_OPTIONS,\n", + " 'classify__C': C_OPTIONS,\n", + " 'classify__loss': LOSS_OPTIONS\n", + " },\n", + "]\n", + "reducer_labels = ['PCA']\n", + "\n", + "grid = GridSearchCV(pipe, cv=5, n_jobs=1, param_grid=param_grid, refit='AUC')\n", + "grid.fit(X_train, y_train)\n", + "\n", + "print(grid.predict(X_test))\n", + "print(y_test)\n", + "print('Score: {0:0.2f}'.format(grid.score(X_test,y_test)))\n", + "\n", + "y_score = grid.decision_function(X_test)\n", + "average_precision = average_precision_score(y_test, y_score)\n", + "print('Average precision-recall score: {0:0.2f}'.format(\n", + " average_precision))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pipeline(memory=None,\n", + " steps=[('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=26, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False)), ('classify', LinearSVC(C=1, class_weight=None, dual=True, fit_intercept=True,\n", + " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n", + " multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n", + " verbose=0))])\n" + ] + } + ], + "source": [ + "#grid.get_params().keys()\n", + "print(grid.best_estimator_)\n", + "#cv_dict = grid.cv_results_\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (myenv)", + "language": "python", + "name": "myenv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}