From d0ded9e83fc7559f983b95b7fa23a2c04a9d2fae Mon Sep 17 00:00:00 2001
From: Aditi <aditijain132005@gmail.com>
Date: Fri, 11 Oct 2024 11:47:37 +0530
Subject: [PATCH] Added startup profit predition

---
 .../Startup-profit-prediction/50_Startups.csv |  51 ++
 .../Startup-profit-prediction/Readme.md       |  19 +
 .../Startup-profit-prediction/app.py          |  36 ++
 .../linear_regression_scikit.ipynb            | 511 ++++++++++++++++++
 .../Startup-profit-prediction/startup.pkl     | Bin 0 -> 528 bytes
 .../templates/home.html                       |  32 ++
 .../templates/output.html                     |  12 +
 7 files changed, 661 insertions(+)
 create mode 100644 Prediction Models/Startup-profit-prediction/50_Startups.csv
 create mode 100644 Prediction Models/Startup-profit-prediction/Readme.md
 create mode 100644 Prediction Models/Startup-profit-prediction/app.py
 create mode 100644 Prediction Models/Startup-profit-prediction/linear_regression_scikit.ipynb
 create mode 100644 Prediction Models/Startup-profit-prediction/startup.pkl
 create mode 100644 Prediction Models/Startup-profit-prediction/templates/home.html
 create mode 100644 Prediction Models/Startup-profit-prediction/templates/output.html

diff --git a/Prediction Models/Startup-profit-prediction/50_Startups.csv b/Prediction Models/Startup-profit-prediction/50_Startups.csv
new file mode 100644
index 00000000..b1cc5f20
--- /dev/null
+++ b/Prediction Models/Startup-profit-prediction/50_Startups.csv	
@@ -0,0 +1,51 @@
+R&D Spend,Administration,Marketing Spend,State,Profit
+165349.2,136897.8,471784.1,New York,192261.83
+162597.7,151377.59,443898.53,California,191792.06
+153441.51,101145.55,407934.54,Florida,191050.39
+144372.41,118671.85,383199.62,New York,182901.99
+142107.34,91391.77,366168.42,Florida,166187.94
+131876.9,99814.71,362861.36,New York,156991.12
+134615.46,147198.87,127716.82,California,156122.51
+130298.13,145530.06,323876.68,Florida,155752.6
+120542.52,148718.95,311613.29,New York,152211.77
+123334.88,108679.17,304981.62,California,149759.96
+101913.08,110594.11,229160.95,Florida,146121.95
+100671.96,91790.61,249744.55,California,144259.4
+93863.75,127320.38,249839.44,Florida,141585.52
+91992.39,135495.07,252664.93,California,134307.35
+119943.24,156547.42,256512.92,Florida,132602.65
+114523.61,122616.84,261776.23,New York,129917.04
+78013.11,121597.55,264346.06,California,126992.93
+94657.16,145077.58,282574.31,New York,125370.37
+91749.16,114175.79,294919.57,Florida,124266.9
+86419.7,153514.11,0,New York,122776.86
+76253.86,113867.3,298664.47,California,118474.03
+78389.47,153773.43,299737.29,New York,111313.02
+73994.56,122782.75,303319.26,Florida,110352.25
+67532.53,105751.03,304768.73,Florida,108733.99
+77044.01,99281.34,140574.81,New York,108552.04
+64664.71,139553.16,137962.62,California,107404.34
+75328.87,144135.98,134050.07,Florida,105733.54
+72107.6,127864.55,353183.81,New York,105008.31
+66051.52,182645.56,118148.2,Florida,103282.38
+65605.48,153032.06,107138.38,New York,101004.64
+61994.48,115641.28,91131.24,Florida,99937.59
+61136.38,152701.92,88218.23,New York,97483.56
+63408.86,129219.61,46085.25,California,97427.84
+55493.95,103057.49,214634.81,Florida,96778.92
+46426.07,157693.92,210797.67,California,96712.8
+46014.02,85047.44,205517.64,New York,96479.51
+28663.76,127056.21,201126.82,Florida,90708.19
+44069.95,51283.14,197029.42,California,89949.14
+20229.59,65947.93,185265.1,New York,81229.06
+38558.51,82982.09,174999.3,California,81005.76
+28754.33,118546.05,172795.67,California,78239.91
+27892.92,84710.77,164470.71,Florida,77798.83
+23640.93,96189.63,148001.11,California,71498.49
+15505.73,127382.3,35534.17,New York,69758.98
+22177.74,154806.14,28334.72,California,65200.33
+1000.23,124153.04,1903.93,New York,64926.08
+1315.46,115816.21,297114.46,Florida,49490.75
+0,135426.92,0,California,42559.73
+542.05,51743.15,0,New York,35673.41
+0,116983.8,45173.06,California,14681.4
\ No newline at end of file
diff --git a/Prediction Models/Startup-profit-prediction/Readme.md b/Prediction Models/Startup-profit-prediction/Readme.md
new file mode 100644
index 00000000..cff46ff2
--- /dev/null
+++ b/Prediction Models/Startup-profit-prediction/Readme.md	
@@ -0,0 +1,19 @@
+## **Startup Profit Prediction**
+**GOAL**
+
+The goal of this project is to analyze and predict the profit of a startup using features such as 'R&D Spend', 'Administration', 'Marketing Spend', 'State', etc. By leveraging multiple regression techniques, this project aims to identify the most significant factors influencing startup profitability and build a robust predictive model.
+
+**DATASET**
+
+Dataset can be downloaded from [here](https://www.kaggle.com/sonalisingh1411/startup50).
+
+**LIBRARIES NEEDED**
+- pandas
+- NumPy
+- Matplotlib
+- sklearn (For data training, importing models and performance check)
+
+
+**CONCLUSION**
+
+* The analysis of the startup dataset reveals significant correlations between the features and the profits, providing valuable insights for potential investors and decision-makers.
\ No newline at end of file
diff --git a/Prediction Models/Startup-profit-prediction/app.py b/Prediction Models/Startup-profit-prediction/app.py
new file mode 100644
index 00000000..5b96ea21
--- /dev/null
+++ b/Prediction Models/Startup-profit-prediction/app.py	
@@ -0,0 +1,36 @@
+from flask import Flask, redirect, render_template, url_for, request
+import numpy as np
+import pickle
+
+regressor = pickle.load(open('startup.pkl', 'rb'))
+app = Flask(__name__)
+
+
+@app.route('/')
+def home():
+    return render_template("home.html")
+
+
+@app.route('/submit', methods=['POST', 'GET'])
+def submit():
+    if request.method == "POST":
+        state = request.form["state"]
+        rdspend = float(request.form["rdspend"])
+        adspend = float(request.form["adspend"])
+        mkspend = float(request.form["mkspend"])
+        if state == "New York":
+            state_list = [0.0, 1.0]
+        elif state == "California":
+            state_list = [0.0, 0.0]
+        else:
+            state_list = [1.0, 0.0]
+
+        input = np.array(state_list+[rdspend, adspend, mkspend])
+        input = input.reshape(1, len(input))
+        pred = regressor.predict(input)[0]
+
+    return render_template("output.html", pred=pred)
+
+
+if __name__ == "__main__":
+    app.run(debug=True)
diff --git a/Prediction Models/Startup-profit-prediction/linear_regression_scikit.ipynb b/Prediction Models/Startup-profit-prediction/linear_regression_scikit.ipynb
new file mode 100644
index 00000000..f6e2579b
--- /dev/null
+++ b/Prediction Models/Startup-profit-prediction/linear_regression_scikit.ipynb	
@@ -0,0 +1,511 @@
+{
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  },
+  "orig_nbformat": 4,
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.9.1 64-bit"
+  },
+  "interpreter": {
+   "hash": "d9feab5a1f5d805ccfa6afac8eb9a08ce4745df9a38ca9beb1bb90cbf90b919c"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Importing Libraries\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Importing Dataset\n",
+    "df = pd.read_csv(\"50_Startups.csv\")\n",
+    "X = df.iloc[:,:-1].values\n",
+    "y = df.iloc[:,-1].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[[165349.2 136897.8 471784.1 'New York']\n [162597.7 151377.59 443898.53 'California']\n [153441.51 101145.55 407934.54 'Florida']\n [144372.41 118671.85 383199.62 'New York']\n [142107.34 91391.77 366168.42 'Florida']\n [131876.9 99814.71 362861.36 'New York']\n [134615.46 147198.87 127716.82 'California']\n [130298.13 145530.06 323876.68 'Florida']\n [120542.52 148718.95 311613.29 'New York']\n [123334.88 108679.17 304981.62 'California']\n [101913.08 110594.11 229160.95 'Florida']\n [100671.96 91790.61 249744.55 'California']\n [93863.75 127320.38 249839.44 'Florida']\n [91992.39 135495.07 252664.93 'California']\n [119943.24 156547.42 256512.92 'Florida']\n [114523.61 122616.84 261776.23 'New York']\n [78013.11 121597.55 264346.06 'California']\n [94657.16 145077.58 282574.31 'New York']\n [91749.16 114175.79 294919.57 'Florida']\n [86419.7 153514.11 0.0 'New York']\n [76253.86 113867.3 298664.47 'California']\n [78389.47 153773.43 299737.29 'New York']\n [73994.56 122782.75 303319.26 'Florida']\n [67532.53 105751.03 304768.73 'Florida']\n [77044.01 99281.34 140574.81 'New York']\n [64664.71 139553.16 137962.62 'California']\n [75328.87 144135.98 134050.07 'Florida']\n [72107.6 127864.55 353183.81 'New York']\n [66051.52 182645.56 118148.2 'Florida']\n [65605.48 153032.06 107138.38 'New York']\n [61994.48 115641.28 91131.24 'Florida']\n [61136.38 152701.92 88218.23 'New York']\n [63408.86 129219.61 46085.25 'California']\n [55493.95 103057.49 214634.81 'Florida']\n [46426.07 157693.92 210797.67 'California']\n [46014.02 85047.44 205517.64 'New York']\n [28663.76 127056.21 201126.82 'Florida']\n [44069.95 51283.14 197029.42 'California']\n [20229.59 65947.93 185265.1 'New York']\n [38558.51 82982.09 174999.3 'California']\n [28754.33 118546.05 172795.67 'California']\n [27892.92 84710.77 164470.71 'Florida']\n [23640.93 96189.63 148001.11 'California']\n [15505.73 127382.3 35534.17 'New York']\n [22177.74 154806.14 28334.72 'California']\n [1000.23 124153.04 1903.93 'New York']\n [1315.46 115816.21 297114.46 'Florida']\n [0.0 135426.92 0.0 'California']\n [542.05 51743.15 0.0 'New York']\n [0.0 116983.8 45173.06 'California']]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[192261.83 191792.06 191050.39 182901.99 166187.94 156991.12 156122.51\n 155752.6  152211.77 149759.96 146121.95 144259.4  141585.52 134307.35\n 132602.65 129917.04 126992.93 125370.37 124266.9  122776.86 118474.03\n 111313.02 110352.25 108733.99 108552.04 107404.34 105733.54 105008.31\n 103282.38 101004.64  99937.59  97483.56  97427.84  96778.92  96712.8\n  96479.51  90708.19  89949.14  81229.06  81005.76  78239.91  77798.83\n  71498.49  69758.98  65200.33  64926.08  49490.75  42559.73  35673.41\n  14681.4 ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([[165349.2, 136897.8, 471784.1, 'New York'],\n",
+       "       [162597.7, 151377.59, 443898.53, 'California'],\n",
+       "       [153441.51, 101145.55, 407934.54, 'Florida'],\n",
+       "       [144372.41, 118671.85, 383199.62, 'New York'],\n",
+       "       [142107.34, 91391.77, 366168.42, 'Florida'],\n",
+       "       [131876.9, 99814.71, 362861.36, 'New York'],\n",
+       "       [134615.46, 147198.87, 127716.82, 'California'],\n",
+       "       [130298.13, 145530.06, 323876.68, 'Florida'],\n",
+       "       [120542.52, 148718.95, 311613.29, 'New York'],\n",
+       "       [123334.88, 108679.17, 304981.62, 'California'],\n",
+       "       [101913.08, 110594.11, 229160.95, 'Florida'],\n",
+       "       [100671.96, 91790.61, 249744.55, 'California'],\n",
+       "       [93863.75, 127320.38, 249839.44, 'Florida'],\n",
+       "       [91992.39, 135495.07, 252664.93, 'California'],\n",
+       "       [119943.24, 156547.42, 256512.92, 'Florida'],\n",
+       "       [114523.61, 122616.84, 261776.23, 'New York'],\n",
+       "       [78013.11, 121597.55, 264346.06, 'California'],\n",
+       "       [94657.16, 145077.58, 282574.31, 'New York'],\n",
+       "       [91749.16, 114175.79, 294919.57, 'Florida'],\n",
+       "       [86419.7, 153514.11, 0.0, 'New York'],\n",
+       "       [76253.86, 113867.3, 298664.47, 'California'],\n",
+       "       [78389.47, 153773.43, 299737.29, 'New York'],\n",
+       "       [73994.56, 122782.75, 303319.26, 'Florida'],\n",
+       "       [67532.53, 105751.03, 304768.73, 'Florida'],\n",
+       "       [77044.01, 99281.34, 140574.81, 'New York'],\n",
+       "       [64664.71, 139553.16, 137962.62, 'California'],\n",
+       "       [75328.87, 144135.98, 134050.07, 'Florida'],\n",
+       "       [72107.6, 127864.55, 353183.81, 'New York'],\n",
+       "       [66051.52, 182645.56, 118148.2, 'Florida'],\n",
+       "       [65605.48, 153032.06, 107138.38, 'New York'],\n",
+       "       [61994.48, 115641.28, 91131.24, 'Florida'],\n",
+       "       [61136.38, 152701.92, 88218.23, 'New York'],\n",
+       "       [63408.86, 129219.61, 46085.25, 'California'],\n",
+       "       [55493.95, 103057.49, 214634.81, 'Florida'],\n",
+       "       [46426.07, 157693.92, 210797.67, 'California'],\n",
+       "       [46014.02, 85047.44, 205517.64, 'New York'],\n",
+       "       [28663.76, 127056.21, 201126.82, 'Florida'],\n",
+       "       [44069.95, 51283.14, 197029.42, 'California'],\n",
+       "       [20229.59, 65947.93, 185265.1, 'New York'],\n",
+       "       [38558.51, 82982.09, 174999.3, 'California'],\n",
+       "       [28754.33, 118546.05, 172795.67, 'California'],\n",
+       "       [27892.92, 84710.77, 164470.71, 'Florida'],\n",
+       "       [23640.93, 96189.63, 148001.11, 'California'],\n",
+       "       [15505.73, 127382.3, 35534.17, 'New York'],\n",
+       "       [22177.74, 154806.14, 28334.72, 'California'],\n",
+       "       [1000.23, 124153.04, 1903.93, 'New York'],\n",
+       "       [1315.46, 115816.21, 297114.46, 'Florida'],\n",
+       "       [0.0, 135426.92, 0.0, 'California'],\n",
+       "       [542.05, 51743.15, 0.0, 'New York'],\n",
+       "       [0.0, 116983.8, 45173.06, 'California']], dtype=object)"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 73
+    }
+   ],
+   "source": [
+    "from sklearn.impute import SimpleImputer\n",
+    "si = SimpleImputer(missing_values=np.nan,strategy=\"mean\")\n",
+    "X[:,:3] = si.fit_transform(X[:,:3])\n",
+    "X"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([[0.0, 1.0, 165349.2, 136897.8, 471784.1],\n",
+       "       [0.0, 0.0, 162597.7, 151377.59, 443898.53],\n",
+       "       [1.0, 0.0, 153441.51, 101145.55, 407934.54],\n",
+       "       [0.0, 1.0, 144372.41, 118671.85, 383199.62],\n",
+       "       [1.0, 0.0, 142107.34, 91391.77, 366168.42],\n",
+       "       [0.0, 1.0, 131876.9, 99814.71, 362861.36],\n",
+       "       [0.0, 0.0, 134615.46, 147198.87, 127716.82],\n",
+       "       [1.0, 0.0, 130298.13, 145530.06, 323876.68],\n",
+       "       [0.0, 1.0, 120542.52, 148718.95, 311613.29],\n",
+       "       [0.0, 0.0, 123334.88, 108679.17, 304981.62],\n",
+       "       [1.0, 0.0, 101913.08, 110594.11, 229160.95],\n",
+       "       [0.0, 0.0, 100671.96, 91790.61, 249744.55],\n",
+       "       [1.0, 0.0, 93863.75, 127320.38, 249839.44],\n",
+       "       [0.0, 0.0, 91992.39, 135495.07, 252664.93],\n",
+       "       [1.0, 0.0, 119943.24, 156547.42, 256512.92],\n",
+       "       [0.0, 1.0, 114523.61, 122616.84, 261776.23],\n",
+       "       [0.0, 0.0, 78013.11, 121597.55, 264346.06],\n",
+       "       [0.0, 1.0, 94657.16, 145077.58, 282574.31],\n",
+       "       [1.0, 0.0, 91749.16, 114175.79, 294919.57],\n",
+       "       [0.0, 1.0, 86419.7, 153514.11, 0.0],\n",
+       "       [0.0, 0.0, 76253.86, 113867.3, 298664.47],\n",
+       "       [0.0, 1.0, 78389.47, 153773.43, 299737.29],\n",
+       "       [1.0, 0.0, 73994.56, 122782.75, 303319.26],\n",
+       "       [1.0, 0.0, 67532.53, 105751.03, 304768.73],\n",
+       "       [0.0, 1.0, 77044.01, 99281.34, 140574.81],\n",
+       "       [0.0, 0.0, 64664.71, 139553.16, 137962.62],\n",
+       "       [1.0, 0.0, 75328.87, 144135.98, 134050.07],\n",
+       "       [0.0, 1.0, 72107.6, 127864.55, 353183.81],\n",
+       "       [1.0, 0.0, 66051.52, 182645.56, 118148.2],\n",
+       "       [0.0, 1.0, 65605.48, 153032.06, 107138.38],\n",
+       "       [1.0, 0.0, 61994.48, 115641.28, 91131.24],\n",
+       "       [0.0, 1.0, 61136.38, 152701.92, 88218.23],\n",
+       "       [0.0, 0.0, 63408.86, 129219.61, 46085.25],\n",
+       "       [1.0, 0.0, 55493.95, 103057.49, 214634.81],\n",
+       "       [0.0, 0.0, 46426.07, 157693.92, 210797.67],\n",
+       "       [0.0, 1.0, 46014.02, 85047.44, 205517.64],\n",
+       "       [1.0, 0.0, 28663.76, 127056.21, 201126.82],\n",
+       "       [0.0, 0.0, 44069.95, 51283.14, 197029.42],\n",
+       "       [0.0, 1.0, 20229.59, 65947.93, 185265.1],\n",
+       "       [0.0, 0.0, 38558.51, 82982.09, 174999.3],\n",
+       "       [0.0, 0.0, 28754.33, 118546.05, 172795.67],\n",
+       "       [1.0, 0.0, 27892.92, 84710.77, 164470.71],\n",
+       "       [0.0, 0.0, 23640.93, 96189.63, 148001.11],\n",
+       "       [0.0, 1.0, 15505.73, 127382.3, 35534.17],\n",
+       "       [0.0, 0.0, 22177.74, 154806.14, 28334.72],\n",
+       "       [0.0, 1.0, 1000.23, 124153.04, 1903.93],\n",
+       "       [1.0, 0.0, 1315.46, 115816.21, 297114.46],\n",
+       "       [0.0, 0.0, 0.0, 135426.92, 0.0],\n",
+       "       [0.0, 1.0, 542.05, 51743.15, 0.0],\n",
+       "       [0.0, 0.0, 0.0, 116983.8, 45173.06]], dtype=object)"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 74
+    }
+   ],
+   "source": [
+    "##Encoding categorical column\n",
+    "\n",
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.preprocessing import OneHotEncoder\n",
+    "\n",
+    "ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(drop='first'), [3])], remainder='passthrough')\n",
+    "X = ct.fit_transform(X)\n",
+    "X"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##Train test split\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[[0.0 0.0 0.0 135426.92 0.0]\n [0.0 0.0 28754.33 118546.05 172795.67]\n [1.0 0.0 142107.34 91391.77 366168.42]\n [1.0 0.0 93863.75 127320.38 249839.44]\n [0.0 0.0 123334.88 108679.17 304981.62]\n [0.0 0.0 64664.71 139553.16 137962.62]\n [0.0 1.0 86419.7 153514.11 0.0]\n [0.0 1.0 46014.02 85047.44 205517.64]\n [1.0 0.0 67532.53 105751.03 304768.73]\n [0.0 0.0 76253.86 113867.3 298664.47]\n [1.0 0.0 91749.16 114175.79 294919.57]\n [0.0 1.0 77044.01 99281.34 140574.81]\n [0.0 0.0 78013.11 121597.55 264346.06]\n [1.0 0.0 119943.24 156547.42 256512.92]\n [0.0 0.0 162597.7 151377.59 443898.53]\n [1.0 0.0 75328.87 144135.98 134050.07]\n [1.0 0.0 61994.48 115641.28 91131.24]\n [0.0 1.0 1000.23 124153.04 1903.93]\n [0.0 0.0 22177.74 154806.14 28334.72]\n [1.0 0.0 130298.13 145530.06 323876.68]\n [0.0 0.0 0.0 116983.8 45173.06]\n [0.0 0.0 23640.93 96189.63 148001.11]\n [0.0 1.0 94657.16 145077.58 282574.31]\n [1.0 0.0 27892.92 84710.77 164470.71]\n [0.0 1.0 61136.38 152701.92 88218.23]\n [1.0 0.0 1315.46 115816.21 297114.46]\n [0.0 1.0 15505.73 127382.3 35534.17]\n [0.0 1.0 65605.48 153032.06 107138.38]\n [0.0 1.0 20229.59 65947.93 185265.1]\n [0.0 0.0 44069.95 51283.14 197029.42]\n [0.0 0.0 63408.86 129219.61 46085.25]\n [0.0 0.0 38558.51 82982.09 174999.3]\n [1.0 0.0 28663.76 127056.21 201126.82]\n [0.0 1.0 165349.2 136897.8 471784.1]\n [0.0 1.0 120542.52 148718.95 311613.29]\n [1.0 0.0 101913.08 110594.11 229160.95]\n [0.0 1.0 542.05 51743.15 0.0]\n [0.0 1.0 114523.61 122616.84 261776.23]\n [0.0 1.0 144372.41 118671.85 383199.62]\n [1.0 0.0 153441.51 101145.55 407934.54]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(train_X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[[0.0 0.0 91992.39 135495.07 252664.93]\n [0.0 1.0 131876.9 99814.71 362861.36]\n [1.0 0.0 66051.52 182645.56 118148.2]\n [1.0 0.0 73994.56 122782.75 303319.26]\n [0.0 0.0 134615.46 147198.87 127716.82]\n [1.0 0.0 55493.95 103057.49 214634.81]\n [0.0 1.0 78389.47 153773.43 299737.29]\n [0.0 0.0 100671.96 91790.61 249744.55]\n [0.0 0.0 46426.07 157693.92 210797.67]\n [0.0 1.0 72107.6 127864.55 353183.81]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(test_X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[ 42559.73  78239.91 166187.94 141585.52 149759.96 107404.34 122776.86\n  96479.51 108733.99 118474.03 124266.9  108552.04 126992.93 132602.65\n 191792.06 105733.54  99937.59  64926.08  65200.33 155752.6   14681.4\n  71498.49 125370.37  77798.83  97483.56  49490.75  69758.98 101004.64\n  81229.06  89949.14  97427.84  81005.76  90708.19 192261.83 152211.77\n 146121.95  35673.41 129917.04 182901.99 191050.39]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(train_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[134307.35 156991.12 103282.38 110352.25 156122.51  96778.92 111313.02\n 144259.4   96712.8  105008.31]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(test_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 80
+    }
+   ],
+   "source": [
+    "##Training on Train set\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "\n",
+    "regressor = LinearRegression()\n",
+    "regressor.fit(train_X, train_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[[125939.28715005 134307.35      ]\n",
+      " [165473.46179251 156991.12      ]\n",
+      " [101597.46482047 103282.38      ]\n",
+      " [116005.19350898 110352.25      ]\n",
+      " [154218.16592915 156122.51      ]\n",
+      " [ 98514.22644332  96778.92      ]\n",
+      " [120287.61253326 111313.02      ]\n",
+      " [133407.31178869 144259.4       ]\n",
+      " [ 88337.98212335  96712.8       ]\n",
+      " [117907.68618409 105008.31      ]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Testing on test set\n",
+    "\n",
+    "y_pred = regressor.predict(test_X)\n",
+    "print(np.concatenate((y_pred.reshape(-1,1),test_y.reshape(-1,1)), axis=-1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "0.8810554098802109\n"
+     ]
+    }
+   ],
+   "source": [
+    "##Measuring the performance\n",
+    "from sklearn.metrics import r2_score\n",
+    "print(r2_score(test_y, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "0.8818828729900751"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 83
+    }
+   ],
+   "source": [
+    "d1 = y_pred - test_y\n",
+    "d2 = y_pred - test_y.mean()\n",
+    "r_squared = 1 - (d1.dot(d1)/d2.dot(d2))\n",
+    "r_squared"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[0.95974976 0.82098981 0.87839569 0.77317085 0.92473977]\n0.8714091782955137\n0.06766077623532518\n"
+     ]
+    }
+   ],
+   "source": [
+    "## Using k-fold cross validation\n",
+    "from sklearn.model_selection import cross_val_score\n",
+    "\n",
+    "scores = cross_val_score(regressor, train_X, train_y, scoring=\"r2\", cv=5)\n",
+    "print(scores)\n",
+    "print(scores.mean())\n",
+    "print(scores.std())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([ 1.94516581e+03,  3.50082922e+03,  7.81004120e-01, -1.83350759e-02,\n",
+       "        3.83785272e-02])"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 85
+    }
+   ],
+   "source": [
+    "regressor.coef_\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([101597.46482047])"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 89
+    }
+   ],
+   "source": [
+    "regressor.predict([[1.0, 0.0, 66051.52, 182645.56, 118148.2]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "101597.46482046762"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 88
+    }
+   ],
+   "source": [
+    "np.dot([1.0,1.0, 0.0, 66051.52, 182645.56, 118148.2],np.array([regressor.intercept_]+list(regressor.coef_)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Saving model\n",
+    "import pickle\n",
+    "pickle.dump(regressor, open('startup.pkl', 'wb') )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ]
+}
\ No newline at end of file
diff --git a/Prediction Models/Startup-profit-prediction/startup.pkl b/Prediction Models/Startup-profit-prediction/startup.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..174ff11fc9b8f5388cd515bd2ed64b9ee5a35ba2
GIT binary patch
literal 528
zcmYk3!D|yi6vj8T8<P@ai}Ye?551UP)+$y!DBYNw+vZR&*2^%vJL!(y%q%mrg$RNQ
z)dU8G^!1n)5u`=%pa>NNZv`*npJ1(sp1oG;WFz7`%)oo!eBW<)_lGy_AuB6=R<6bb
z3+BX>nHx2ENMZ;1SQ5~q3t4lCtOz0{<qS4p<^kM;iayn#3Q?wr2uPx!t&5C{CXVS{
z0v(+Xc+x_*Ogqoe9qvo0>q5dMRdlUCFEi92Sf$3+m>}?MZ3mn*5a@B1Hj|bUa6z1A
z8Y_y0z%9@vBuK!SR3Z(O;m~#_1bvc)1IY$>);iR3K0JmcsCYTgUWe$kwnNoQNdL)D
z;|g@zgXsyKy9)Js{l>AW?C>lFey3L1dvf{XZn))sZC`u3v;5MvcFx^C`u$_x`f@PI
z_PZY}YwFeVi|6xT<hp3EBn?v{Au8#-3~-Ev$%-awh0oV}=O4cLS%pW2N819k)$FT9
zN!dzjbO@qyG}fn%&iv0jVJM3G-Ct;LruurWy?e+;s*7*m9e(nU-DhKSUawqo54L;L
v-J6!1QH=cCgt46NXCIBfzfk!0-5q3)qQ7fKYeZy=)cING;w5JmQs4Olp?=t+

literal 0
HcmV?d00001

diff --git a/Prediction Models/Startup-profit-prediction/templates/home.html b/Prediction Models/Startup-profit-prediction/templates/home.html
new file mode 100644
index 00000000..9315e447
--- /dev/null
+++ b/Prediction Models/Startup-profit-prediction/templates/home.html	
@@ -0,0 +1,32 @@
+<html>
+
+<head>
+
+</head>
+
+<body>
+    <h1>Profit prediction for startups</h1>
+    <p>Predict the profit of a startup basis their expenditure on R&D, Marketing etc.</p>
+    <form action="/submit" , method="POST">
+        <label for="state">Startup is in which state?</label><br>
+        <input type="radio" id="newyork" name="state" value="New York">
+        <label for="newyork">New York</label><br>
+        <input type="radio" id="florida" name="state" value="Florida">
+        <label for="florida">Florida</label><br>
+        <input type="radio" id="california" name="state" value="California">
+        <label for="california">California</label><br><br>
+        <label for="rdspend">What is the total expenditure on R&D?</label><br>
+        <input type="text" , id="rdspend" , name="rdspend" , value="0"><br>
+        <label for="adspend">What is the total expenditure on Administration?</label><br>
+        <input type="text" , id="adspend" , name="adspend" , value="0"><br>
+        <label for="mkspend">What is the total expenditure on Marketing?</label><br>
+        <input type="text" , id="mkspend" , name="mkspend" , value="0"><br><br>
+        <input type="submit" , value="Submit">
+
+
+    </form>
+
+
+</body>
+
+</html>
\ No newline at end of file
diff --git a/Prediction Models/Startup-profit-prediction/templates/output.html b/Prediction Models/Startup-profit-prediction/templates/output.html
new file mode 100644
index 00000000..588bcaa0
--- /dev/null
+++ b/Prediction Models/Startup-profit-prediction/templates/output.html	
@@ -0,0 +1,12 @@
+<html>
+
+<head>
+
+</head>
+
+<body>
+    <br><br>
+    <h1>The estimated profit is {{"{:.2f}".format(pred)}}</h1>
+</body>
+
+</html>
\ No newline at end of file