diff --git a/Market Trend Classification Model/Market_Trend_Classification_Eda.ipynb b/Market Trend Classification Model/Market_Trend_Classification_Eda.ipynb new file mode 100644 index 0000000..b72f80f --- /dev/null +++ b/Market Trend Classification Model/Market_Trend_Classification_Eda.ipynb @@ -0,0 +1,742 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "141dc98c-e1f3-40fd-98b5-83299444940d", + "metadata": { + "id": "141dc98c-e1f3-40fd-98b5-83299444940d" + }, + "source": [ + "# Market Trend Classification Model\n", + "\n", + "## Overview\n", + "The Market Trend Classification Model aims to identify different market conditions (regimes) in historical stock price data using clustering techniques. By classifying these regimes, the project provides insights into periods of market behavior, such as bull, bear, or neutral phases, helping investors or financial analysts understand market trends and develop effective strategies.\n" + ] + }, + { + "cell_type": "markdown", + "id": "49439176-3972-44de-9788-4a389d912e40", + "metadata": { + "id": "49439176-3972-44de-9788-4a389d912e40" + }, + "source": [ + "# Requirements" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cbbda166-cb81-431e-acc7-f8866f9aae7e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cbbda166-cb81-431e-acc7-f8866f9aae7e", + "outputId": "ac6b64c0-f70a-455c-d1fb-450350eff06c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.2.2)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.26.4)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.7.1)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.5.2)\n", + "Requirement already satisfied: yfinance in /usr/local/lib/python3.10/dist-packages (0.2.48)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.3.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.54.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.7)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (24.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (10.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.2.0)\n", + "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.13.1)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n", + "Requirement already satisfied: requests>=2.31 in /usr/local/lib/python3.10/dist-packages (from yfinance) (2.32.3)\n", + "Requirement already satisfied: multitasking>=0.0.7 in /usr/local/lib/python3.10/dist-packages (from yfinance) (0.0.11)\n", + "Requirement already satisfied: lxml>=4.9.1 in /usr/local/lib/python3.10/dist-packages (from yfinance) (4.9.4)\n", + "Requirement already satisfied: platformdirs>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from yfinance) (4.3.6)\n", + "Requirement already satisfied: frozendict>=2.3.4 in /usr/local/lib/python3.10/dist-packages (from yfinance) (2.4.6)\n", + "Requirement already satisfied: peewee>=3.16.2 in /usr/local/lib/python3.10/dist-packages (from yfinance) (3.17.7)\n", + "Requirement already satisfied: beautifulsoup4>=4.11.1 in /usr/local/lib/python3.10/dist-packages (from yfinance) (4.12.3)\n", + "Requirement already satisfied: html5lib>=1.1 in /usr/local/lib/python3.10/dist-packages (from yfinance) (1.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4>=4.11.1->yfinance) (2.6)\n", + "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.10/dist-packages (from html5lib>=1.1->yfinance) (1.16.0)\n", + "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from html5lib>=1.1->yfinance) (0.5.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31->yfinance) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31->yfinance) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31->yfinance) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31->yfinance) (2024.8.30)\n" + ] + } + ], + "source": [ + "pip install pandas numpy matplotlib scikit-learn yfinance\n" + ] + }, + { + "cell_type": "markdown", + "id": "76abe973-63cc-4a65-ab3e-b3a1d2e32fe8", + "metadata": { + "id": "76abe973-63cc-4a65-ab3e-b3a1d2e32fe8" + }, + "source": [ + "## Steps Involved\n", + "### 1: **Import Libraries**" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f07cd4fc-d510-4ce2-ad19-78c369a7aad4", + "metadata": { + "id": "f07cd4fc-d510-4ce2-ad19-78c369a7aad4" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import yfinance as yf\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.cluster import KMeans\n", + "from sklearn.metrics import silhouette_score\n" + ] + }, + { + "cell_type": "markdown", + "id": "4d2a2991-9ffe-4cdb-88e2-fb74eb3e66c9", + "metadata": { + "id": "4d2a2991-9ffe-4cdb-88e2-fb74eb3e66c9" + }, + "source": [ + "### 2. **Data Collection**\n", + "Historical stock price data is collected using Yahoo Finance (via the `yfinance` library). In this example, the data of the S&P 500 index is fetched to observe trends and detect market regimes over time.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cbf3670a-ca22-41a2-91c0-c9f4a2f92869", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cbf3670a-ca22-41a2-91c0-c9f4a2f92869", + "outputId": "cd1599bb-3806-4e84-c991-a164fddcce69" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r[*********************100%***********************] 1 of 1 completed\n" + ] + } + ], + "source": [ + "# Download historical stock data for a selected stock (e.g., S&P 500: ^GSPC)\n", + "stock_data = yf.download('^GSPC', start='2010-01-01', end='2024-01-01')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8b09ddac-6d12-4adc-857d-86a17e6a0d6d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8b09ddac-6d12-4adc-857d-86a17e6a0d6d", + "outputId": "67d6fb97-c29a-400b-c6d1-16d8a274be39" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Dataset Information:\n", + "===================\n", + "Number of records: 3323\n", + "Date range: 2010-10-18 00:00:00+00:00 to 2023-12-29 00:00:00+00:00\n", + "\n", + "Dataset Overview:\n", + "Price Adj Close Close High Low \\\n", + "Ticker ^GSPC ^GSPC ^GSPC ^GSPC \n", + "Date \n", + "2010-10-18 00:00:00+00:00 1184.709961 1184.709961 1185.530029 1174.550049 \n", + "2010-10-19 00:00:00+00:00 1165.900024 1165.900024 1178.640015 1159.709961 \n", + "2010-10-20 00:00:00+00:00 1178.170044 1178.170044 1182.939941 1166.739990 \n", + "2010-10-21 00:00:00+00:00 1180.260010 1180.260010 1189.430054 1171.170044 \n", + "2010-10-22 00:00:00+00:00 1183.079956 1183.079956 1183.930054 1178.989990 \n", + "\n", + "Price Open Volume Return MA_50 \\\n", + "Ticker ^GSPC ^GSPC \n", + "Date \n", + "2010-10-18 00:00:00+00:00 1176.829956 4450050000 0.007244 1116.760398 \n", + "2010-10-19 00:00:00+00:00 1178.640015 5600120000 -0.015877 1117.522598 \n", + "2010-10-20 00:00:00+00:00 1166.739990 5027880000 0.010524 1118.664797 \n", + "2010-10-21 00:00:00+00:00 1179.819946 4625470000 0.001774 1120.480598 \n", + "2010-10-22 00:00:00+00:00 1180.520020 3177890000 0.002389 1122.469998 \n", + "\n", + "Price MA_200 Volatility \n", + "Ticker \n", + "Date \n", + "2010-10-18 00:00:00+00:00 1120.764647 0.010213 \n", + "2010-10-19 00:00:00+00:00 1120.929197 0.010472 \n", + "2010-10-20 00:00:00+00:00 1121.137447 0.010516 \n", + "2010-10-21 00:00:00+00:00 1121.353047 0.009634 \n", + "2010-10-22 00:00:00+00:00 1121.559997 0.009580 \n", + "\n", + "Basic Statistics:\n", + "Price Adj Close Close High Low Open \\\n", + "Ticker ^GSPC ^GSPC ^GSPC ^GSPC ^GSPC \n", + "count 3323.000000 3323.000000 3323.000000 3323.000000 3323.000000 \n", + "mean 2619.809187 2619.809187 2633.475802 2604.081766 2619.245409 \n", + "std 1042.822413 1042.822413 1049.058587 1036.072671 1042.689151 \n", + "min 1099.229980 1099.229980 1125.119995 1074.770020 1097.420044 \n", + "25% 1838.005005 1838.005005 1843.010010 1827.880005 1838.400024 \n", + "50% 2399.290039 2399.290039 2405.580078 2386.919922 2397.040039 \n", + "75% 3379.804932 3379.804932 3392.160034 3361.514893 3372.414917 \n", + "max 4796.560059 4796.560059 4818.620117 4780.979980 4804.509766 \n", + "\n", + "Price Volume Return MA_50 MA_200 Volatility \n", + "Ticker ^GSPC \n", + "count 3.323000e+03 3323.000000 3323.000000 3323.000000 3323.000000 \n", + "mean 3.894202e+09 0.000482 2594.151396 2520.301043 0.009567 \n", + "std 9.320337e+08 0.010964 1032.975622 1011.734048 0.005476 \n", + "min 1.025000e+09 -0.119841 1116.760398 1120.764647 0.002859 \n", + "25% 3.340490e+09 -0.003809 1811.823796 1709.873926 0.006515 \n", + "50% 3.740400e+09 0.000636 2371.234575 2260.488489 0.008054 \n", + "75% 4.266845e+09 0.005652 3319.311709 3096.986422 0.010848 \n", + "max 9.976520e+09 0.093828 4681.011611 4496.905588 0.043354 \n", + "\n", + "Missing Values:\n", + "Price Ticker\n", + "Adj Close ^GSPC 0\n", + "Close ^GSPC 0\n", + "High ^GSPC 0\n", + "Low ^GSPC 0\n", + "Open ^GSPC 0\n", + "Volume ^GSPC 0\n", + "Return 0\n", + "MA_50 0\n", + "MA_200 0\n", + "Volatility 0\n", + "dtype: int64\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":18: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-