Skip to content

Commit

Permalink
Added Clustering Models and dataset. Updated the README
Browse files Browse the repository at this point in the history
  • Loading branch information
screwgoth committed Jun 2, 2020
1 parent 80c6f6b commit 9640bb1
Show file tree
Hide file tree
Showing 4 changed files with 620 additions and 0 deletions.
206 changes: 206 additions & 0 deletions Clustering/Hierarchical Clustering/hierarchical_clustering.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "JKkbeQi2Mzug"
},
"source": [
"# Hierarchical Clustering"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "TaQI437hM1Ho"
},
"source": [
"## Importing the libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "2UW48DgcM4YS"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "gFeTEtDxM7K4"
},
"source": [
"## Importing the dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "4fS2J3HGM99q"
},
"outputs": [],
"source": [
"dataset = pd.read_csv('../../datasets/Mall_Customers.csv')\n",
"X = dataset.iloc[:, [3, 4]].values"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "czYMlG7cNBsu"
},
"source": [
"## Using the dendrogram to find the optimal number of clusters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 295
},
"colab_type": "code",
"executionInfo": {
"elapsed": 4948,
"status": "ok",
"timestamp": 1588363683148,
"user": {
"displayName": "Hadelin de Ponteves",
"photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhEuXdT7eQweUmRPW8_laJuPggSK6hfvpl5a6WBaA=s64",
"userId": "15047218817161520419"
},
"user_tz": -240
},
"id": "RDQODpAFNILO",
"outputId": "8743058d-09a8-43f5-892d-6b1c140792a0"
},
"outputs": [],
"source": [
"import scipy.cluster.hierarchy as sch\n",
"dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward'))\n",
"plt.title('Dendrogram')\n",
"plt.xlabel('Customers')\n",
"plt.ylabel('Euclidean distances')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "KDbXbo9INLF6"
},
"source": [
"## Training the Hierarchical Clustering model on the dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "IoH3zs2KNSw6"
},
"outputs": [],
"source": [
"from sklearn.cluster import AgglomerativeClustering\n",
"hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')\n",
"y_hc = hc.fit_predict(X)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "X-SYG7l9NVmU"
},
"source": [
"## Visualising the clusters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 295
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1290,
"status": "ok",
"timestamp": 1588363703003,
"user": {
"displayName": "Hadelin de Ponteves",
"photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhEuXdT7eQweUmRPW8_laJuPggSK6hfvpl5a6WBaA=s64",
"userId": "15047218817161520419"
},
"user_tz": -240
},
"id": "-91tDJrnNY2p",
"outputId": "fc9652fa-6e3f-4b68-c4ff-e6fd6b4bce7d"
},
"outputs": [],
"source": [
"plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red', label = 'Cluster 1')\n",
"plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')\n",
"plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green', label = 'Cluster 3')\n",
"plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')\n",
"plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')\n",
"plt.title('Clusters of customers')\n",
"plt.xlabel('Annual Income (k$)')\n",
"plt.ylabel('Spending Score (1-100)')\n",
"plt.legend()\n",
"plt.show()"
]
}
],
"metadata": {
"colab": {
"authorship_tag": "ABX9TyOE/Ghkv22sqrXHjexUJwPA",
"collapsed_sections": [],
"name": "hierarchical_clustering.ipynb",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Loading

0 comments on commit 9640bb1

Please sign in to comment.