From d0ded9e83fc7559f983b95b7fa23a2c04a9d2fae Mon Sep 17 00:00:00 2001 From: Aditi Date: Fri, 11 Oct 2024 11:47:37 +0530 Subject: [PATCH] Added startup profit predition --- .../Startup-profit-prediction/50_Startups.csv | 51 ++ .../Startup-profit-prediction/Readme.md | 19 + .../Startup-profit-prediction/app.py | 36 ++ .../linear_regression_scikit.ipynb | 511 ++++++++++++++++++ .../Startup-profit-prediction/startup.pkl | Bin 0 -> 528 bytes .../templates/home.html | 32 ++ .../templates/output.html | 12 + 7 files changed, 661 insertions(+) create mode 100644 Prediction Models/Startup-profit-prediction/50_Startups.csv create mode 100644 Prediction Models/Startup-profit-prediction/Readme.md create mode 100644 Prediction Models/Startup-profit-prediction/app.py create mode 100644 Prediction Models/Startup-profit-prediction/linear_regression_scikit.ipynb create mode 100644 Prediction Models/Startup-profit-prediction/startup.pkl create mode 100644 Prediction Models/Startup-profit-prediction/templates/home.html create mode 100644 Prediction Models/Startup-profit-prediction/templates/output.html diff --git a/Prediction Models/Startup-profit-prediction/50_Startups.csv b/Prediction Models/Startup-profit-prediction/50_Startups.csv new file mode 100644 index 00000000..b1cc5f20 --- /dev/null +++ b/Prediction Models/Startup-profit-prediction/50_Startups.csv @@ -0,0 +1,51 @@ +R&D Spend,Administration,Marketing Spend,State,Profit +165349.2,136897.8,471784.1,New York,192261.83 +162597.7,151377.59,443898.53,California,191792.06 +153441.51,101145.55,407934.54,Florida,191050.39 +144372.41,118671.85,383199.62,New York,182901.99 +142107.34,91391.77,366168.42,Florida,166187.94 +131876.9,99814.71,362861.36,New York,156991.12 +134615.46,147198.87,127716.82,California,156122.51 +130298.13,145530.06,323876.68,Florida,155752.6 +120542.52,148718.95,311613.29,New York,152211.77 +123334.88,108679.17,304981.62,California,149759.96 +101913.08,110594.11,229160.95,Florida,146121.95 +100671.96,91790.61,249744.55,California,144259.4 +93863.75,127320.38,249839.44,Florida,141585.52 +91992.39,135495.07,252664.93,California,134307.35 +119943.24,156547.42,256512.92,Florida,132602.65 +114523.61,122616.84,261776.23,New York,129917.04 +78013.11,121597.55,264346.06,California,126992.93 +94657.16,145077.58,282574.31,New York,125370.37 +91749.16,114175.79,294919.57,Florida,124266.9 +86419.7,153514.11,0,New York,122776.86 +76253.86,113867.3,298664.47,California,118474.03 +78389.47,153773.43,299737.29,New York,111313.02 +73994.56,122782.75,303319.26,Florida,110352.25 +67532.53,105751.03,304768.73,Florida,108733.99 +77044.01,99281.34,140574.81,New York,108552.04 +64664.71,139553.16,137962.62,California,107404.34 +75328.87,144135.98,134050.07,Florida,105733.54 +72107.6,127864.55,353183.81,New York,105008.31 +66051.52,182645.56,118148.2,Florida,103282.38 +65605.48,153032.06,107138.38,New York,101004.64 +61994.48,115641.28,91131.24,Florida,99937.59 +61136.38,152701.92,88218.23,New York,97483.56 +63408.86,129219.61,46085.25,California,97427.84 +55493.95,103057.49,214634.81,Florida,96778.92 +46426.07,157693.92,210797.67,California,96712.8 +46014.02,85047.44,205517.64,New York,96479.51 +28663.76,127056.21,201126.82,Florida,90708.19 +44069.95,51283.14,197029.42,California,89949.14 +20229.59,65947.93,185265.1,New York,81229.06 +38558.51,82982.09,174999.3,California,81005.76 +28754.33,118546.05,172795.67,California,78239.91 +27892.92,84710.77,164470.71,Florida,77798.83 +23640.93,96189.63,148001.11,California,71498.49 +15505.73,127382.3,35534.17,New York,69758.98 +22177.74,154806.14,28334.72,California,65200.33 +1000.23,124153.04,1903.93,New York,64926.08 +1315.46,115816.21,297114.46,Florida,49490.75 +0,135426.92,0,California,42559.73 +542.05,51743.15,0,New York,35673.41 +0,116983.8,45173.06,California,14681.4 \ No newline at end of file diff --git a/Prediction Models/Startup-profit-prediction/Readme.md b/Prediction Models/Startup-profit-prediction/Readme.md new file mode 100644 index 00000000..cff46ff2 --- /dev/null +++ b/Prediction Models/Startup-profit-prediction/Readme.md @@ -0,0 +1,19 @@ +## **Startup Profit Prediction** +**GOAL** + +The goal of this project is to analyze and predict the profit of a startup using features such as 'R&D Spend', 'Administration', 'Marketing Spend', 'State', etc. By leveraging multiple regression techniques, this project aims to identify the most significant factors influencing startup profitability and build a robust predictive model. + +**DATASET** + +Dataset can be downloaded from [here](https://www.kaggle.com/sonalisingh1411/startup50). + +**LIBRARIES NEEDED** +- pandas +- NumPy +- Matplotlib +- sklearn (For data training, importing models and performance check) + + +**CONCLUSION** + +* The analysis of the startup dataset reveals significant correlations between the features and the profits, providing valuable insights for potential investors and decision-makers. \ No newline at end of file diff --git a/Prediction Models/Startup-profit-prediction/app.py b/Prediction Models/Startup-profit-prediction/app.py new file mode 100644 index 00000000..5b96ea21 --- /dev/null +++ b/Prediction Models/Startup-profit-prediction/app.py @@ -0,0 +1,36 @@ +from flask import Flask, redirect, render_template, url_for, request +import numpy as np +import pickle + +regressor = pickle.load(open('startup.pkl', 'rb')) +app = Flask(__name__) + + +@app.route('/') +def home(): + return render_template("home.html") + + +@app.route('/submit', methods=['POST', 'GET']) +def submit(): + if request.method == "POST": + state = request.form["state"] + rdspend = float(request.form["rdspend"]) + adspend = float(request.form["adspend"]) + mkspend = float(request.form["mkspend"]) + if state == "New York": + state_list = [0.0, 1.0] + elif state == "California": + state_list = [0.0, 0.0] + else: + state_list = [1.0, 0.0] + + input = np.array(state_list+[rdspend, adspend, mkspend]) + input = input.reshape(1, len(input)) + pred = regressor.predict(input)[0] + + return render_template("output.html", pred=pred) + + +if __name__ == "__main__": + app.run(debug=True) diff --git a/Prediction Models/Startup-profit-prediction/linear_regression_scikit.ipynb b/Prediction Models/Startup-profit-prediction/linear_regression_scikit.ipynb new file mode 100644 index 00000000..f6e2579b --- /dev/null +++ b/Prediction Models/Startup-profit-prediction/linear_regression_scikit.ipynb @@ -0,0 +1,511 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.9.1 64-bit" + }, + "interpreter": { + "hash": "d9feab5a1f5d805ccfa6afac8eb9a08ce4745df9a38ca9beb1bb90cbf90b919c" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "## Importing Libraries\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "## Importing Dataset\n", + "df = pd.read_csv(\"50_Startups.csv\")\n", + "X = df.iloc[:,:-1].values\n", + "y = df.iloc[:,-1].values" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[165349.2 136897.8 471784.1 'New York']\n [162597.7 151377.59 443898.53 'California']\n [153441.51 101145.55 407934.54 'Florida']\n [144372.41 118671.85 383199.62 'New York']\n [142107.34 91391.77 366168.42 'Florida']\n [131876.9 99814.71 362861.36 'New York']\n [134615.46 147198.87 127716.82 'California']\n [130298.13 145530.06 323876.68 'Florida']\n [120542.52 148718.95 311613.29 'New York']\n [123334.88 108679.17 304981.62 'California']\n [101913.08 110594.11 229160.95 'Florida']\n [100671.96 91790.61 249744.55 'California']\n [93863.75 127320.38 249839.44 'Florida']\n [91992.39 135495.07 252664.93 'California']\n [119943.24 156547.42 256512.92 'Florida']\n [114523.61 122616.84 261776.23 'New York']\n [78013.11 121597.55 264346.06 'California']\n [94657.16 145077.58 282574.31 'New York']\n [91749.16 114175.79 294919.57 'Florida']\n [86419.7 153514.11 0.0 'New York']\n [76253.86 113867.3 298664.47 'California']\n [78389.47 153773.43 299737.29 'New York']\n [73994.56 122782.75 303319.26 'Florida']\n [67532.53 105751.03 304768.73 'Florida']\n [77044.01 99281.34 140574.81 'New York']\n [64664.71 139553.16 137962.62 'California']\n [75328.87 144135.98 134050.07 'Florida']\n [72107.6 127864.55 353183.81 'New York']\n [66051.52 182645.56 118148.2 'Florida']\n [65605.48 153032.06 107138.38 'New York']\n [61994.48 115641.28 91131.24 'Florida']\n [61136.38 152701.92 88218.23 'New York']\n [63408.86 129219.61 46085.25 'California']\n [55493.95 103057.49 214634.81 'Florida']\n [46426.07 157693.92 210797.67 'California']\n [46014.02 85047.44 205517.64 'New York']\n [28663.76 127056.21 201126.82 'Florida']\n [44069.95 51283.14 197029.42 'California']\n [20229.59 65947.93 185265.1 'New York']\n [38558.51 82982.09 174999.3 'California']\n [28754.33 118546.05 172795.67 'California']\n [27892.92 84710.77 164470.71 'Florida']\n [23640.93 96189.63 148001.11 'California']\n [15505.73 127382.3 35534.17 'New York']\n [22177.74 154806.14 28334.72 'California']\n [1000.23 124153.04 1903.93 'New York']\n [1315.46 115816.21 297114.46 'Florida']\n [0.0 135426.92 0.0 'California']\n [542.05 51743.15 0.0 'New York']\n [0.0 116983.8 45173.06 'California']]\n" + ] + } + ], + "source": [ + "print(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[192261.83 191792.06 191050.39 182901.99 166187.94 156991.12 156122.51\n 155752.6 152211.77 149759.96 146121.95 144259.4 141585.52 134307.35\n 132602.65 129917.04 126992.93 125370.37 124266.9 122776.86 118474.03\n 111313.02 110352.25 108733.99 108552.04 107404.34 105733.54 105008.31\n 103282.38 101004.64 99937.59 97483.56 97427.84 96778.92 96712.8\n 96479.51 90708.19 89949.14 81229.06 81005.76 78239.91 77798.83\n 71498.49 69758.98 65200.33 64926.08 49490.75 42559.73 35673.41\n 14681.4 ]\n" + ] + } + ], + "source": [ + "print(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[165349.2, 136897.8, 471784.1, 'New York'],\n", + " [162597.7, 151377.59, 443898.53, 'California'],\n", + " [153441.51, 101145.55, 407934.54, 'Florida'],\n", + " [144372.41, 118671.85, 383199.62, 'New York'],\n", + " [142107.34, 91391.77, 366168.42, 'Florida'],\n", + " [131876.9, 99814.71, 362861.36, 'New York'],\n", + " [134615.46, 147198.87, 127716.82, 'California'],\n", + " [130298.13, 145530.06, 323876.68, 'Florida'],\n", + " [120542.52, 148718.95, 311613.29, 'New York'],\n", + " [123334.88, 108679.17, 304981.62, 'California'],\n", + " [101913.08, 110594.11, 229160.95, 'Florida'],\n", + " [100671.96, 91790.61, 249744.55, 'California'],\n", + " [93863.75, 127320.38, 249839.44, 'Florida'],\n", + " [91992.39, 135495.07, 252664.93, 'California'],\n", + " [119943.24, 156547.42, 256512.92, 'Florida'],\n", + " [114523.61, 122616.84, 261776.23, 'New York'],\n", + " [78013.11, 121597.55, 264346.06, 'California'],\n", + " [94657.16, 145077.58, 282574.31, 'New York'],\n", + " [91749.16, 114175.79, 294919.57, 'Florida'],\n", + " [86419.7, 153514.11, 0.0, 'New York'],\n", + " [76253.86, 113867.3, 298664.47, 'California'],\n", + " [78389.47, 153773.43, 299737.29, 'New York'],\n", + " [73994.56, 122782.75, 303319.26, 'Florida'],\n", + " [67532.53, 105751.03, 304768.73, 'Florida'],\n", + " [77044.01, 99281.34, 140574.81, 'New York'],\n", + " [64664.71, 139553.16, 137962.62, 'California'],\n", + " [75328.87, 144135.98, 134050.07, 'Florida'],\n", + " [72107.6, 127864.55, 353183.81, 'New York'],\n", + " [66051.52, 182645.56, 118148.2, 'Florida'],\n", + " [65605.48, 153032.06, 107138.38, 'New York'],\n", + " [61994.48, 115641.28, 91131.24, 'Florida'],\n", + " [61136.38, 152701.92, 88218.23, 'New York'],\n", + " [63408.86, 129219.61, 46085.25, 'California'],\n", + " [55493.95, 103057.49, 214634.81, 'Florida'],\n", + " [46426.07, 157693.92, 210797.67, 'California'],\n", + " [46014.02, 85047.44, 205517.64, 'New York'],\n", + " [28663.76, 127056.21, 201126.82, 'Florida'],\n", + " [44069.95, 51283.14, 197029.42, 'California'],\n", + " [20229.59, 65947.93, 185265.1, 'New York'],\n", + " [38558.51, 82982.09, 174999.3, 'California'],\n", + " [28754.33, 118546.05, 172795.67, 'California'],\n", + " [27892.92, 84710.77, 164470.71, 'Florida'],\n", + " [23640.93, 96189.63, 148001.11, 'California'],\n", + " [15505.73, 127382.3, 35534.17, 'New York'],\n", + " [22177.74, 154806.14, 28334.72, 'California'],\n", + " [1000.23, 124153.04, 1903.93, 'New York'],\n", + " [1315.46, 115816.21, 297114.46, 'Florida'],\n", + " [0.0, 135426.92, 0.0, 'California'],\n", + " [542.05, 51743.15, 0.0, 'New York'],\n", + " [0.0, 116983.8, 45173.06, 'California']], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 73 + } + ], + "source": [ + "from sklearn.impute import SimpleImputer\n", + "si = SimpleImputer(missing_values=np.nan,strategy=\"mean\")\n", + "X[:,:3] = si.fit_transform(X[:,:3])\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0.0, 1.0, 165349.2, 136897.8, 471784.1],\n", + " [0.0, 0.0, 162597.7, 151377.59, 443898.53],\n", + " [1.0, 0.0, 153441.51, 101145.55, 407934.54],\n", + " [0.0, 1.0, 144372.41, 118671.85, 383199.62],\n", + " [1.0, 0.0, 142107.34, 91391.77, 366168.42],\n", + " [0.0, 1.0, 131876.9, 99814.71, 362861.36],\n", + " [0.0, 0.0, 134615.46, 147198.87, 127716.82],\n", + " [1.0, 0.0, 130298.13, 145530.06, 323876.68],\n", + " [0.0, 1.0, 120542.52, 148718.95, 311613.29],\n", + " [0.0, 0.0, 123334.88, 108679.17, 304981.62],\n", + " [1.0, 0.0, 101913.08, 110594.11, 229160.95],\n", + " [0.0, 0.0, 100671.96, 91790.61, 249744.55],\n", + " [1.0, 0.0, 93863.75, 127320.38, 249839.44],\n", + " [0.0, 0.0, 91992.39, 135495.07, 252664.93],\n", + " [1.0, 0.0, 119943.24, 156547.42, 256512.92],\n", + " [0.0, 1.0, 114523.61, 122616.84, 261776.23],\n", + " [0.0, 0.0, 78013.11, 121597.55, 264346.06],\n", + " [0.0, 1.0, 94657.16, 145077.58, 282574.31],\n", + " [1.0, 0.0, 91749.16, 114175.79, 294919.57],\n", + " [0.0, 1.0, 86419.7, 153514.11, 0.0],\n", + " [0.0, 0.0, 76253.86, 113867.3, 298664.47],\n", + " [0.0, 1.0, 78389.47, 153773.43, 299737.29],\n", + " [1.0, 0.0, 73994.56, 122782.75, 303319.26],\n", + " [1.0, 0.0, 67532.53, 105751.03, 304768.73],\n", + " [0.0, 1.0, 77044.01, 99281.34, 140574.81],\n", + " [0.0, 0.0, 64664.71, 139553.16, 137962.62],\n", + " [1.0, 0.0, 75328.87, 144135.98, 134050.07],\n", + " [0.0, 1.0, 72107.6, 127864.55, 353183.81],\n", + " [1.0, 0.0, 66051.52, 182645.56, 118148.2],\n", + " [0.0, 1.0, 65605.48, 153032.06, 107138.38],\n", + " [1.0, 0.0, 61994.48, 115641.28, 91131.24],\n", + " [0.0, 1.0, 61136.38, 152701.92, 88218.23],\n", + " [0.0, 0.0, 63408.86, 129219.61, 46085.25],\n", + " [1.0, 0.0, 55493.95, 103057.49, 214634.81],\n", + " [0.0, 0.0, 46426.07, 157693.92, 210797.67],\n", + " [0.0, 1.0, 46014.02, 85047.44, 205517.64],\n", + " [1.0, 0.0, 28663.76, 127056.21, 201126.82],\n", + " [0.0, 0.0, 44069.95, 51283.14, 197029.42],\n", + " [0.0, 1.0, 20229.59, 65947.93, 185265.1],\n", + " [0.0, 0.0, 38558.51, 82982.09, 174999.3],\n", + " [0.0, 0.0, 28754.33, 118546.05, 172795.67],\n", + " [1.0, 0.0, 27892.92, 84710.77, 164470.71],\n", + " [0.0, 0.0, 23640.93, 96189.63, 148001.11],\n", + " [0.0, 1.0, 15505.73, 127382.3, 35534.17],\n", + " [0.0, 0.0, 22177.74, 154806.14, 28334.72],\n", + " [0.0, 1.0, 1000.23, 124153.04, 1903.93],\n", + " [1.0, 0.0, 1315.46, 115816.21, 297114.46],\n", + " [0.0, 0.0, 0.0, 135426.92, 0.0],\n", + " [0.0, 1.0, 542.05, 51743.15, 0.0],\n", + " [0.0, 0.0, 0.0, 116983.8, 45173.06]], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 74 + } + ], + "source": [ + "##Encoding categorical column\n", + "\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "\n", + "ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(drop='first'), [3])], remainder='passthrough')\n", + "X = ct.fit_transform(X)\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "##Train test split\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[0.0 0.0 0.0 135426.92 0.0]\n [0.0 0.0 28754.33 118546.05 172795.67]\n [1.0 0.0 142107.34 91391.77 366168.42]\n [1.0 0.0 93863.75 127320.38 249839.44]\n [0.0 0.0 123334.88 108679.17 304981.62]\n [0.0 0.0 64664.71 139553.16 137962.62]\n [0.0 1.0 86419.7 153514.11 0.0]\n [0.0 1.0 46014.02 85047.44 205517.64]\n [1.0 0.0 67532.53 105751.03 304768.73]\n [0.0 0.0 76253.86 113867.3 298664.47]\n [1.0 0.0 91749.16 114175.79 294919.57]\n [0.0 1.0 77044.01 99281.34 140574.81]\n [0.0 0.0 78013.11 121597.55 264346.06]\n [1.0 0.0 119943.24 156547.42 256512.92]\n [0.0 0.0 162597.7 151377.59 443898.53]\n [1.0 0.0 75328.87 144135.98 134050.07]\n [1.0 0.0 61994.48 115641.28 91131.24]\n [0.0 1.0 1000.23 124153.04 1903.93]\n [0.0 0.0 22177.74 154806.14 28334.72]\n [1.0 0.0 130298.13 145530.06 323876.68]\n [0.0 0.0 0.0 116983.8 45173.06]\n [0.0 0.0 23640.93 96189.63 148001.11]\n [0.0 1.0 94657.16 145077.58 282574.31]\n [1.0 0.0 27892.92 84710.77 164470.71]\n [0.0 1.0 61136.38 152701.92 88218.23]\n [1.0 0.0 1315.46 115816.21 297114.46]\n [0.0 1.0 15505.73 127382.3 35534.17]\n [0.0 1.0 65605.48 153032.06 107138.38]\n [0.0 1.0 20229.59 65947.93 185265.1]\n [0.0 0.0 44069.95 51283.14 197029.42]\n [0.0 0.0 63408.86 129219.61 46085.25]\n [0.0 0.0 38558.51 82982.09 174999.3]\n [1.0 0.0 28663.76 127056.21 201126.82]\n [0.0 1.0 165349.2 136897.8 471784.1]\n [0.0 1.0 120542.52 148718.95 311613.29]\n [1.0 0.0 101913.08 110594.11 229160.95]\n [0.0 1.0 542.05 51743.15 0.0]\n [0.0 1.0 114523.61 122616.84 261776.23]\n [0.0 1.0 144372.41 118671.85 383199.62]\n [1.0 0.0 153441.51 101145.55 407934.54]]\n" + ] + } + ], + "source": [ + "print(train_X)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[0.0 0.0 91992.39 135495.07 252664.93]\n [0.0 1.0 131876.9 99814.71 362861.36]\n [1.0 0.0 66051.52 182645.56 118148.2]\n [1.0 0.0 73994.56 122782.75 303319.26]\n [0.0 0.0 134615.46 147198.87 127716.82]\n [1.0 0.0 55493.95 103057.49 214634.81]\n [0.0 1.0 78389.47 153773.43 299737.29]\n [0.0 0.0 100671.96 91790.61 249744.55]\n [0.0 0.0 46426.07 157693.92 210797.67]\n [0.0 1.0 72107.6 127864.55 353183.81]]\n" + ] + } + ], + "source": [ + "print(test_X)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 42559.73 78239.91 166187.94 141585.52 149759.96 107404.34 122776.86\n 96479.51 108733.99 118474.03 124266.9 108552.04 126992.93 132602.65\n 191792.06 105733.54 99937.59 64926.08 65200.33 155752.6 14681.4\n 71498.49 125370.37 77798.83 97483.56 49490.75 69758.98 101004.64\n 81229.06 89949.14 97427.84 81005.76 90708.19 192261.83 152211.77\n 146121.95 35673.41 129917.04 182901.99 191050.39]\n" + ] + } + ], + "source": [ + "print(train_y)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[134307.35 156991.12 103282.38 110352.25 156122.51 96778.92 111313.02\n 144259.4 96712.8 105008.31]\n" + ] + } + ], + "source": [ + "print(test_y)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "metadata": {}, + "execution_count": 80 + } + ], + "source": [ + "##Training on Train set\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "regressor = LinearRegression()\n", + "regressor.fit(train_X, train_y)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[125939.28715005 134307.35 ]\n", + " [165473.46179251 156991.12 ]\n", + " [101597.46482047 103282.38 ]\n", + " [116005.19350898 110352.25 ]\n", + " [154218.16592915 156122.51 ]\n", + " [ 98514.22644332 96778.92 ]\n", + " [120287.61253326 111313.02 ]\n", + " [133407.31178869 144259.4 ]\n", + " [ 88337.98212335 96712.8 ]\n", + " [117907.68618409 105008.31 ]]\n" + ] + } + ], + "source": [ + "#Testing on test set\n", + "\n", + "y_pred = regressor.predict(test_X)\n", + "print(np.concatenate((y_pred.reshape(-1,1),test_y.reshape(-1,1)), axis=-1))" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0.8810554098802109\n" + ] + } + ], + "source": [ + "##Measuring the performance\n", + "from sklearn.metrics import r2_score\n", + "print(r2_score(test_y, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.8818828729900751" + ] + }, + "metadata": {}, + "execution_count": 83 + } + ], + "source": [ + "d1 = y_pred - test_y\n", + "d2 = y_pred - test_y.mean()\n", + "r_squared = 1 - (d1.dot(d1)/d2.dot(d2))\n", + "r_squared" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0.95974976 0.82098981 0.87839569 0.77317085 0.92473977]\n0.8714091782955137\n0.06766077623532518\n" + ] + } + ], + "source": [ + "## Using k-fold cross validation\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "scores = cross_val_score(regressor, train_X, train_y, scoring=\"r2\", cv=5)\n", + "print(scores)\n", + "print(scores.mean())\n", + "print(scores.std())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1.94516581e+03, 3.50082922e+03, 7.81004120e-01, -1.83350759e-02,\n", + " 3.83785272e-02])" + ] + }, + "metadata": {}, + "execution_count": 85 + } + ], + "source": [ + "regressor.coef_\n" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([101597.46482047])" + ] + }, + "metadata": {}, + "execution_count": 89 + } + ], + "source": [ + "regressor.predict([[1.0, 0.0, 66051.52, 182645.56, 118148.2]])" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "101597.46482046762" + ] + }, + "metadata": {}, + "execution_count": 88 + } + ], + "source": [ + "np.dot([1.0,1.0, 0.0, 66051.52, 182645.56, 118148.2],np.array([regressor.intercept_]+list(regressor.coef_)))" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "## Saving model\n", + "import pickle\n", + "pickle.dump(regressor, open('startup.pkl', 'wb') )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/Prediction Models/Startup-profit-prediction/startup.pkl b/Prediction Models/Startup-profit-prediction/startup.pkl new file mode 100644 index 0000000000000000000000000000000000000000..174ff11fc9b8f5388cd515bd2ed64b9ee5a35ba2 GIT binary patch literal 528 zcmYk3!D|yi6vj8T8NNZv`*npJ1(sp1oG;WFz7`%)oo!eBW<)_lGy_AuB6=R<6bb z3+BX>nHx2ENMZ;1SQ5~q3t4lCtOz0{q5dMRdlUCFEi92Sf$3+m>}?MZ3mn*5a@B1Hj|bUa6z1A z8Y_y0z%9@vBuK!SR3Z(O;m~#_1bvc)1IY$>);iR3K0JmcsCYTgUWe$kwnNoQNdL)D z;|g@zgXsyKy9)Js{l>AW?C>lFey3L1dvf{XZn))sZC`u3v;5MvcFx^C`u$_x`f@PI z_PZY}YwFeVi|6xT + + + + + + +

Profit prediction for startups

+

Predict the profit of a startup basis their expenditure on R&D, Marketing etc.

+
+
+ +
+ +
+ +

+
+
+
+
+
+

+ + + +
+ + + + + \ No newline at end of file diff --git a/Prediction Models/Startup-profit-prediction/templates/output.html b/Prediction Models/Startup-profit-prediction/templates/output.html new file mode 100644 index 00000000..588bcaa0 --- /dev/null +++ b/Prediction Models/Startup-profit-prediction/templates/output.html @@ -0,0 +1,12 @@ + + + + + + + +

+

The estimated profit is {{"{:.2f}".format(pred)}}

+ + + \ No newline at end of file