diff --git a/deeplearning.ipynb b/deeplearning.ipynb index fc53f37..7950a30 100644 --- a/deeplearning.ipynb +++ b/deeplearning.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -230,7 +230,7 @@ "[5 rows x 41 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -245,6 +245,90 @@ "df.head()" ] }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 CONFIRMED\n", + "1 CONFIRMED\n", + "2 False_Positive\n", + "3 False_Positive\n", + "4 CONFIRMED\n", + "5 CONFIRMED\n", + "6 CONFIRMED\n", + "7 CONFIRMED\n", + "8 False_Positive\n", + "9 CONFIRMED\n", + "10 CONFIRMED\n", + "11 CONFIRMED\n", + "12 CONFIRMED\n", + "13 CONFIRMED\n", + "14 False_Positive\n", + "15 False_Positive\n", + "16 False_Positive\n", + "17 False_Positive\n", + "18 CONFIRMED\n", + "19 CONFIRMED\n", + "20 False_Positive\n", + "21 CONFIRMED\n", + "22 CONFIRMED\n", + "23 CONFIRMED\n", + "24 False_Positive\n", + "25 CONFIRMED\n", + "26 CONFIRMED\n", + "27 CONFIRMED\n", + "28 False_Positive\n", + "29 False_Positive\n", + " ... \n", + "9532 False_Positive\n", + "9533 CANDIDATE\n", + "9535 False_Positive\n", + "9536 False_Positive\n", + "9537 False_Positive\n", + "9538 False_Positive\n", + "9539 CANDIDATE\n", + "9540 CONFIRMED\n", + "9541 False_Positive\n", + "9542 CANDIDATE\n", + "9543 False_Positive\n", + "9544 CANDIDATE\n", + "9545 False_Positive\n", + "9546 CANDIDATE\n", + "9548 False_Positive\n", + "9549 False_Positive\n", + "9550 CANDIDATE\n", + "9551 CANDIDATE\n", + "9552 False_Positive\n", + "9553 False_Positive\n", + "9554 False_Positive\n", + "9555 False_Positive\n", + "9556 False_Positive\n", + "9557 CANDIDATE\n", + "9558 CANDIDATE\n", + "9559 False_Positive\n", + "9560 False_Positive\n", + "9561 CANDIDATE\n", + "9562 False_Positive\n", + "9563 False_Positive\n", + "Name: koi_disposition, Length: 8744, dtype: object" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Remove the space for 'FALSE POSITIVE'\n", + "new = df[\"koi_disposition\"] == \"FALSE POSITIVE\"\n", + "df.loc[new, \"koi_disposition\"] = \"False_Positive\"\n", + "df[\"koi_disposition\"]" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -254,20 +338,31 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(8744, 40) (8744,)\n" + ] + } + ], "source": [ + "# Asssign the X and y values\n", "X = df.drop(\"koi_disposition\", axis=1)\n", - "y = df[\"koi_disposition\"]" + "y = df[\"koi_disposition\"]\n", + "print(X.shape, y.shape)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ + "# Split the data into training and testing data\n", "from sklearn.model_selection import train_test_split\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)" @@ -275,7 +370,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -487,7 +582,7 @@ "[5 rows x 40 columns]" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -511,6 +606,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Fit data to the MinMax Scaler\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "X_scaler = MinMaxScaler().fit(X_train)" @@ -526,6 +622,406 @@ "X_test_scaled = X_scaler.transform(X_test)" ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", + "from keras.utils import to_categorical\n", + "\n", + "# Step 1: Label-encode data set\n", + "label_encoder = LabelEncoder()\n", + "label_encoder.fit(y_train)\n", + "encoded_y_train = label_encoder.transform(y_train)\n", + "encoded_y_test = label_encoder.transform(y_test)\n", + "\n", + "# Step 2: Convert encoded labels to one-hot-encoding\n", + "y_train_categorical = to_categorical(encoded_y_train)\n", + "y_test_categorical = to_categorical(encoded_y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create a Deep Learning Model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /Users/angie/anaconda3/envs/PythonData/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Colocations handled automatically by placer.\n" + ] + } + ], + "source": [ + "from keras.models import Sequential\n", + "from keras.layers import Dense\n", + "\n", + "model = Sequential()\n", + "model.add(Dense(units=120, activation='relu', input_dim=40))\n", + "model.add(Dense(units=120, activation='relu'))\n", + "model.add(Dense(units=3, activation='softmax'))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the model\n", + "model.compile(optimizer='adam',\n", + " loss='categorical_crossentropy',\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_1 (Dense) (None, 120) 4920 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 120) 14520 \n", + "_________________________________________________________________\n", + "dense_3 (Dense) (None, 3) 363 \n", + "=================================================================\n", + "Total params: 19,803\n", + "Trainable params: 19,803\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /Users/angie/anaconda3/envs/PythonData/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.cast instead.\n", + "Epoch 1/100\n", + " - 2s - loss: 0.5112 - acc: 0.7438\n", + "Epoch 2/100\n", + " - 1s - loss: 0.3684 - acc: 0.8134\n", + "Epoch 3/100\n", + " - 1s - loss: 0.3509 - acc: 0.8193\n", + "Epoch 4/100\n", + " - 1s - loss: 0.3451 - acc: 0.8256\n", + "Epoch 5/100\n", + " - 1s - loss: 0.3326 - acc: 0.8425\n", + "Epoch 6/100\n", + " - 1s - loss: 0.3319 - acc: 0.8452\n", + "Epoch 7/100\n", + " - 1s - loss: 0.3247 - acc: 0.8464\n", + "Epoch 8/100\n", + " - 1s - loss: 0.3161 - acc: 0.8533\n", + "Epoch 9/100\n", + " - 1s - loss: 0.3127 - acc: 0.8544\n", + "Epoch 10/100\n", + " - 1s - loss: 0.3053 - acc: 0.8608\n", + "Epoch 11/100\n", + " - 1s - loss: 0.3031 - acc: 0.8618\n", + "Epoch 12/100\n", + " - 1s - loss: 0.2949 - acc: 0.8693\n", + "Epoch 13/100\n", + " - 1s - loss: 0.2985 - acc: 0.8634\n", + "Epoch 14/100\n", + " - 1s - loss: 0.2952 - acc: 0.8644\n", + "Epoch 15/100\n", + " - 1s - loss: 0.2913 - acc: 0.8678\n", + "Epoch 16/100\n", + " - 1s - loss: 0.2860 - acc: 0.8759\n", + "Epoch 17/100\n", + " - 1s - loss: 0.2861 - acc: 0.8704\n", + "Epoch 18/100\n", + " - 1s - loss: 0.2865 - acc: 0.8753\n", + "Epoch 19/100\n", + " - 1s - loss: 0.2756 - acc: 0.8794\n", + "Epoch 20/100\n", + " - 1s - loss: 0.2882 - acc: 0.8708\n", + "Epoch 21/100\n", + " - 1s - loss: 0.2747 - acc: 0.8792\n", + "Epoch 22/100\n", + " - 1s - loss: 0.2714 - acc: 0.8850\n", + "Epoch 23/100\n", + " - 1s - loss: 0.2711 - acc: 0.8809\n", + "Epoch 24/100\n", + " - 1s - loss: 0.2657 - acc: 0.8832\n", + "Epoch 25/100\n", + " - 1s - loss: 0.2743 - acc: 0.8782\n", + "Epoch 26/100\n", + " - 1s - loss: 0.2672 - acc: 0.8855\n", + "Epoch 27/100\n", + " - 1s - loss: 0.2691 - acc: 0.8803\n", + "Epoch 28/100\n", + " - 1s - loss: 0.2656 - acc: 0.8840\n", + "Epoch 29/100\n", + " - 1s - loss: 0.2654 - acc: 0.8808\n", + "Epoch 30/100\n", + " - 1s - loss: 0.2575 - acc: 0.8890\n", + "Epoch 31/100\n", + " - 1s - loss: 0.2625 - acc: 0.8850\n", + "Epoch 32/100\n", + " - 1s - loss: 0.2629 - acc: 0.8847\n", + "Epoch 33/100\n", + " - 1s - loss: 0.2600 - acc: 0.8875\n", + "Epoch 34/100\n", + " - 1s - loss: 0.2552 - acc: 0.8873\n", + "Epoch 35/100\n", + " - 1s - loss: 0.2550 - acc: 0.8884\n", + "Epoch 36/100\n", + " - 1s - loss: 0.2530 - acc: 0.8901\n", + "Epoch 37/100\n", + " - 1s - loss: 0.2639 - acc: 0.8801\n", + "Epoch 38/100\n", + " - 1s - loss: 0.2543 - acc: 0.8902\n", + "Epoch 39/100\n", + " - 1s - loss: 0.2520 - acc: 0.8887\n", + "Epoch 40/100\n", + " - 1s - loss: 0.2526 - acc: 0.8933\n", + "Epoch 41/100\n", + " - 1s - loss: 0.2524 - acc: 0.8879\n", + "Epoch 42/100\n", + " - 1s - loss: 0.2471 - acc: 0.8902\n", + "Epoch 43/100\n", + " - 1s - loss: 0.2526 - acc: 0.8919\n", + "Epoch 44/100\n", + " - 1s - loss: 0.2497 - acc: 0.8899\n", + "Epoch 45/100\n", + " - 1s - loss: 0.2541 - acc: 0.8901\n", + "Epoch 46/100\n", + " - 1s - loss: 0.2484 - acc: 0.8867\n", + "Epoch 47/100\n", + " - 1s - loss: 0.2480 - acc: 0.8902\n", + "Epoch 48/100\n", + " - 1s - loss: 0.2461 - acc: 0.8925\n", + "Epoch 49/100\n", + " - 1s - loss: 0.2450 - acc: 0.8928\n", + "Epoch 50/100\n", + " - 1s - loss: 0.2395 - acc: 0.8965\n", + "Epoch 51/100\n", + " - 1s - loss: 0.2421 - acc: 0.8957\n", + "Epoch 52/100\n", + " - 1s - loss: 0.2394 - acc: 0.8948\n", + "Epoch 53/100\n", + " - 1s - loss: 0.2414 - acc: 0.8943\n", + "Epoch 54/100\n", + " - 1s - loss: 0.2428 - acc: 0.8927\n", + "Epoch 55/100\n", + " - 1s - loss: 0.2383 - acc: 0.8951\n", + "Epoch 56/100\n", + " - 1s - loss: 0.2443 - acc: 0.8899\n", + "Epoch 57/100\n", + " - 1s - loss: 0.2392 - acc: 0.8954\n", + "Epoch 58/100\n", + " - 1s - loss: 0.2347 - acc: 0.8963\n", + "Epoch 59/100\n", + " - 1s - loss: 0.2413 - acc: 0.8975\n", + "Epoch 60/100\n", + " - 1s - loss: 0.2403 - acc: 0.8913\n", + "Epoch 61/100\n", + " - 1s - loss: 0.2350 - acc: 0.8986\n", + "Epoch 62/100\n", + " - 1s - loss: 0.2342 - acc: 0.8983\n", + "Epoch 63/100\n", + " - 1s - loss: 0.2365 - acc: 0.8975\n", + "Epoch 64/100\n", + " - 1s - loss: 0.2334 - acc: 0.9012\n", + "Epoch 65/100\n", + " - 1s - loss: 0.2378 - acc: 0.8955\n", + "Epoch 66/100\n", + " - 1s - loss: 0.2365 - acc: 0.8989\n", + "Epoch 67/100\n", + " - 1s - loss: 0.2342 - acc: 0.8975\n", + "Epoch 68/100\n", + " - 1s - loss: 0.2275 - acc: 0.9012\n", + "Epoch 69/100\n", + " - 1s - loss: 0.2350 - acc: 0.8962\n", + "Epoch 70/100\n", + " - 1s - loss: 0.2325 - acc: 0.8984\n", + "Epoch 71/100\n", + " - 1s - loss: 0.2299 - acc: 0.8998\n", + "Epoch 72/100\n", + " - 1s - loss: 0.2290 - acc: 0.8981\n", + "Epoch 73/100\n", + " - 1s - loss: 0.2306 - acc: 0.9009\n", + "Epoch 74/100\n", + " - 1s - loss: 0.2249 - acc: 0.9039\n", + "Epoch 75/100\n", + " - 1s - loss: 0.2281 - acc: 0.9007\n", + "Epoch 76/100\n", + " - 1s - loss: 0.2259 - acc: 0.8997\n", + "Epoch 77/100\n", + " - 1s - loss: 0.2311 - acc: 0.8963\n", + "Epoch 78/100\n", + " - 1s - loss: 0.2330 - acc: 0.8966\n", + "Epoch 79/100\n", + " - 1s - loss: 0.2281 - acc: 0.8992\n", + "Epoch 80/100\n", + " - 1s - loss: 0.2230 - acc: 0.9045\n", + "Epoch 81/100\n", + " - 1s - loss: 0.2227 - acc: 0.9055\n", + "Epoch 82/100\n", + " - 1s - loss: 0.2285 - acc: 0.8984\n", + "Epoch 83/100\n", + " - 1s - loss: 0.2258 - acc: 0.9003\n", + "Epoch 84/100\n", + " - 1s - loss: 0.2277 - acc: 0.8984\n", + "Epoch 85/100\n", + " - 1s - loss: 0.2237 - acc: 0.9044\n", + "Epoch 86/100\n", + " - 1s - loss: 0.2255 - acc: 0.9023\n", + "Epoch 87/100\n", + " - 1s - loss: 0.2206 - acc: 0.9033\n", + "Epoch 88/100\n", + " - 1s - loss: 0.2283 - acc: 0.9021\n", + "Epoch 89/100\n", + " - 1s - loss: 0.2244 - acc: 0.9052\n", + "Epoch 90/100\n", + " - 1s - loss: 0.2246 - acc: 0.9010\n", + "Epoch 91/100\n", + " - 1s - loss: 0.2211 - acc: 0.9053\n", + "Epoch 92/100\n", + " - 1s - loss: 0.2235 - acc: 0.8975\n", + "Epoch 93/100\n", + " - 1s - loss: 0.2197 - acc: 0.9055\n", + "Epoch 94/100\n", + " - 1s - loss: 0.2235 - acc: 0.9035\n", + "Epoch 95/100\n", + " - 1s - loss: 0.2174 - acc: 0.9052\n", + "Epoch 96/100\n", + " - 1s - loss: 0.2236 - acc: 0.9006\n", + "Epoch 97/100\n", + " - 1s - loss: 0.2188 - acc: 0.9047\n", + "Epoch 98/100\n", + " - 1s - loss: 0.2217 - acc: 0.9000\n", + "Epoch 99/100\n", + " - 1s - loss: 0.2160 - acc: 0.9058\n", + "Epoch 100/100\n", + " - 1s - loss: 0.2160 - acc: 0.9058\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(\n", + " X_train_scaled,\n", + " y_train_categorical,\n", + " epochs=100,\n", + " shuffle=True,\n", + " verbose=2\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quantify Model" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normal Neural Network - Loss: 0.24744392827848502, Accuracy: 0.890210429900083\n" + ] + } + ], + "source": [ + "model_loss, model_accuracy = model.evaluate(\n", + " X_test_scaled, y_test_categorical, verbose=2)\n", + "print(\n", + " f\"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Make Predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "encoded_predictions = model.predict_classes(X_test_scaled[:5])\n", + "prediction_labels = label_encoder.inverse_transform(encoded_predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted classes: ['FALSE POSITIVE' 'CONFIRMED' 'FALSE POSITIVE' 'FALSE POSITIVE'\n", + " 'CONFIRMED']\n", + "Actual Labels: ['FALSE POSITIVE', 'CONFIRMED', 'FALSE POSITIVE', 'FALSE POSITIVE', 'CONFIRMED']\n" + ] + } + ], + "source": [ + "print(f\"Predicted classes: {prediction_labels}\")\n", + "print(f\"Actual Labels: {list(y_test[:5])}\")" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/logisticregression.ipynb b/logisticregression.ipynb index c3418db..be7e478 100644 --- a/logisticregression.ipynb +++ b/logisticregression.ipynb @@ -535,7 +535,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Train the Support Vector Machine" + "# Create and Train the Logistic Regression Model" ] }, {