adding the cpp file

borgestassio · Apr 24, 2022 · 2cb7286 · 2cb7286
1 parent 382a046
commit 2cb7286
Show file tree

Hide file tree

Showing 8 changed files with 188 additions and 180 deletions.
diff --git a/KMeans_cpp.cbp b/KMeans_cpp.cbp
@@ -32,6 +32,7 @@
 			<Add option="-Wall" />
 			<Add option="-fexceptions" />
 		</Compiler>
+		<Unit filename="kmeans.h" />
 		<Unit filename="main.cpp" />
 		<Extensions>
 			<lib_finder disable_auto="1" />

diff --git a/KMeans_cpp.depend b/KMeans_cpp.depend
@@ -1,11 +1,11 @@
 # depslib dependency file v1.0
-1650835842 source:c:\users\avell 1513\cpp_projects\kmeans_cpp\main.cpp
+1650837515 source:c:\users\avell 1513\cpp_projects\kmeans_cpp\main.cpp
 	<iostream>
 	<fstream>
 	<vector>
 	"kmeans.h"
 
-1650835976 c:\users\avell 1513\cpp_projects\kmeans_cpp\kmeans.h
+1650837906 c:\users\avell 1513\cpp_projects\kmeans_cpp\kmeans.h
 	<iostream>
 	<fstream>
 	<vector>

diff --git a/README.md b/README.md
@@ -1,2 +1,5 @@
 # KMeans_cpp
-An algorithm to implement K Means clustering method in C++
+
+An algorithm to implement K Means clustering method in C++.
+
+Curently it only takes 1D dataset of floats. It'll be updated to take 3D dataset soon.
diff --git a/bin/Debug/KMeans_cpp.exe b/bin/Debug/KMeans_cpp.exe
diff --git a/kmeans.cpp b/kmeans.cpp
@@ -0,0 +1,181 @@
+#include "kmeans.h"
+
+
+
+//The constructor initializes some variables
+KMeans::KMeans(int n_clusters, int data_size_in)
+{
+    num_clusters = n_clusters;
+    data_size = data_size_in;
+    vector<vector<float>> R (data_size, vector<float>(num_clusters));
+    static vector<vector<int>> indices (num_clusters, vector<int> (data_size, -1));
+    this->indices = indices;
+};
+
+//To find the initial centroids, "random" points are taken from the dataset
+void KMeans::def_initial_centroids(vector<float> data_in, int data_size_in, int n_clusters, char method)
+{
+    srand(time(0));
+    //TODO: Implement a check for data_size_in and n_clusters
+    vector<float>::iterator it;
+    data = data_in;
+    data_size = data_size_in;
+    num_clusters = n_clusters;
+    int index[num_clusters];
+
+    //First we initialize an array with n_clusters
+    for (int j=0;j<data_size;j++)
+    {
+        index[j] = j;
+    }
+    //now we shuffle it to get "random" centroids
+    shuffle(index,data_size);
+    // and we assign an initial value to the cluster
+    vector<int> temp;
+    for (int i=0;i<num_clusters;i++)
+    {
+        it = data_in.begin() + index[i];
+        temp.push_back(index[i]);
+        centroids.push_back(*it);
+    }
+    // indices.insert(indices.begin(),temp);
+}
+
+//The clustering function will assign everypoint of the dataset to a cluster and recalculate the centroid as the mean of everypoint on it
+
+void KMeans::clustering(vector<float> data_in)
+{
+    vector<vector<float>> R (data_size, vector<float>(num_clusters));
+    vector<float> ctemp;
+    vector<float> sub_vector;
+    vector<float>::iterator it_d;
+    vector<float>::iterator it_c;
+    ctemp = centroids;
+    vector<vector<float>> dists(data_size, vector<float> (num_clusters, 0));
+    int min_ind;
+    float min_dis;
+
+
+    R.insert(R.begin(), ctemp);
+
+    //Calculate all distances between the centroids and the data points
+    int n =0, i=0;
+    for (it_d = data_in.begin();it_d<data_in.end();it_d++)
+    {
+        i=0;
+        for(it_c = centroids.begin();it_c<centroids.end();it_c++)
+        {
+            dists[n][i] = distance(*it_d,*it_c);
+            i++;
+        }
+        //Find the closest centroid and assign the data point to it
+        sub_vector.reserve(num_clusters);
+        sub_vector = dists[n];
+        vector<float>::iterator result = min_element(sub_vector.begin(), sub_vector.end());
+        min_ind = std::distance(sub_vector.begin(), result);
+        min_dis = *result;
+        R[n+1][min_ind]=data_in[n];
+        indices[min_ind][n] = n;
+        recalculate_centroid(min_ind,n,R);
+
+         n++;
+    }
+    //clear the '-1' from the vector
+    vector<int>::iterator ind;
+    for(int i = 0;i<num_clusters;i++)
+    {
+        do{
+        ind = find(indices[i].begin(), indices[i].end(), -1);
+        if(ind != indices[i].end() ) indices[i].erase(ind);
+        }while(ind != indices[i].end() );
+    }
+
+}
+
+//This function will print the centroids
+void KMeans::print_centroids()
+{
+    vector<float>::iterator it;
+    cout<<"There are " << num_clusters << " centroids :"<<endl;
+    for (it = centroids.begin(); it != centroids.end(); ++it)
+    cout<<*it<<" ";
+
+    cout<<endl;
+}
+
+//This function is auxiliary to define the initial centroids
+void KMeans::shuffle(int *arr, size_t n)
+{
+    if (n > 1)
+    {
+        size_t i;
+        srand(time(NULL));
+        for (i = 0; i < n - 1; i++)
+        {
+          size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
+          int t = arr[j];
+          arr[j] = arr[i];
+          arr[i] = t;
+        }
+    }
+}
+
+//function to calculate the distance betweem two points
+float KMeans::distance(float in1, float in2)
+{
+    return abs(abs(in1)-abs(in2));
+
+}
+
+//function to update the centroid based
+void KMeans::recalculate_centroid(int c_index, int d_index,vector<vector<float>> vIn)
+{
+    int count=0;
+    float res=0;
+    for(int i=0;i<(d_index+2);i++)
+    {
+        if(vIn[i][c_index]>0)
+        {
+            count ++;
+            res += vIn[i][c_index];
+        }
+
+    }
+    res = res/count;
+    centroids[c_index] =res;
+}
+
+//function to print the members of a cluster
+void KMeans::print_members(int ind_centroid)
+{
+    // cout<<indices[0][0]<<endl;
+    // cout<<indices[0][1]<<endl;
+    vector<int>::iterator it;
+    cout<<"These are the members of cluster: " << ind_centroid <<endl;
+    for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it) cout<<data[*it]<<" ";
+    cout<<endl;
+
+}
+
+//function that return the members of a cluster
+vector<float> KMeans::get_members(int ind_centroid)
+{
+    vector<float> sub;
+    vector<int>::iterator it;
+    for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it)
+     sub.push_back(data[*it]);
+
+    return sub;
+}
+
+//function that returns the indices of the cluster members.
+//Note that the indices are related to the original dataset
+vector<int> KMeans::get_index_members(int ind_centroid)
+{
+    vector<int> sub;
+    vector<int>::iterator it;
+    for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it)
+     sub.push_back(*it);
+
+    return sub;
+}
diff --git a/kmeans.h b/kmeans.h
@@ -42,183 +42,6 @@ class KMeans
     vector<int> get_index_members(int ind_centroid); // return the indices of the cluster members, indices are related to the original dataset
 };
 
-//The constructor initializes some variables
-KMeans::KMeans(int n_clusters, int data_size_in)
-{
-    num_clusters = n_clusters;
-    data_size = data_size_in;
-    vector<vector<float>> R (data_size, vector<float>(num_clusters));
-    static vector<vector<int>> indices (num_clusters, vector<int> (data_size, -1));
-    this->indices = indices;
-};
-
-//To find the initial centroids, "random" points are taken from the dataset
-void KMeans::def_initial_centroids(vector<float> data_in, int data_size_in, int n_clusters, char method)
-{
-    srand(time(0));
-    //TODO: Implement a check for data_size_in and n_clusters
-    vector<float>::iterator it;
-    data = data_in;
-    data_size = data_size_in;
-    num_clusters = n_clusters;
-    int index[num_clusters];
-
-    //First we initialize an array with n_clusters
-    for (int j=0;j<data_size;j++)
-    {
-        index[j] = j;
-    }
-    //now we shuffle it to get "random" centroids
-    shuffle(index,data_size);
-    // and we assign an initial value to the cluster
-    vector<int> temp;
-    for (int i=0;i<num_clusters;i++)
-    {
-        it = data_in.begin() + index[i];
-        temp.push_back(index[i]);
-        centroids.push_back(*it);
-    }
-    // indices.insert(indices.begin(),temp);
-}
-
-//The clustering function will assign everypoint of the dataset to a cluster and recalculate the centroid as the mean of everypoint on it
-
-void KMeans::clustering(vector<float> data_in)
-{
-    vector<vector<float>> R (data_size, vector<float>(num_clusters));
-    vector<float> ctemp;
-    vector<float> sub_vector;
-    vector<float>::iterator it_d;
-    vector<float>::iterator it_c;
-    ctemp = centroids;
-    vector<vector<float>> dists(data_size, vector<float> (num_clusters, 0));
-    int min_ind;
-    float min_dis;
-
-
-    R.insert(R.begin(), ctemp);
-
-    //Calculate all distances between the centroids and the data points
-    int n =0, i=0;
-    for (it_d = data_in.begin();it_d<data_in.end();it_d++)
-    {
-        i=0;
-        for(it_c = centroids.begin();it_c<centroids.end();it_c++)
-        {
-            dists[n][i] = distance(*it_d,*it_c);
-            i++;
-        }
-        //Find the closest centroid and assign the data point to it
-        sub_vector.reserve(num_clusters);
-        sub_vector = dists[n];
-        vector<float>::iterator result = min_element(sub_vector.begin(), sub_vector.end());
-        min_ind = std::distance(sub_vector.begin(), result);
-        min_dis = *result;
-        R[n+1][min_ind]=data_in[n];
-        indices[min_ind][n] = n;
-        recalculate_centroid(min_ind,n,R);
-
-         n++;
-    }
-    //clear the '-1' from the vector
-    vector<int>::iterator ind;
-    for(int i = 0;i<num_clusters;i++)
-    {
-        do{
-        ind = find(indices[i].begin(), indices[i].end(), -1);
-        if(ind != indices[i].end() ) indices[i].erase(ind);
-        }while(ind != indices[i].end() );
-    }
-
-}
-
-//This function will print the centroids
-void KMeans::print_centroids()
-{
-    vector<float>::iterator it;
-    cout<<"There are " << num_clusters << " centroids :"<<endl;
-    for (it = centroids.begin(); it != centroids.end(); ++it)
-    cout<<*it<<" ";
-
-    cout<<endl;
-}
-
-//This function is auxiliary to define the initial centroids
-void KMeans::shuffle(int *arr, size_t n)
-{
-    if (n > 1)
-    {
-        size_t i;
-        srand(time(NULL));
-        for (i = 0; i < n - 1; i++)
-        {
-          size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
-          int t = arr[j];
-          arr[j] = arr[i];
-          arr[i] = t;
-        }
-    }
-}
-
-//function to calculate the distance betweem two points
-float KMeans::distance(float in1, float in2)
-{
-    return abs(abs(in1)-abs(in2));
-
-}
-
-//function to update the centroid based
-void KMeans::recalculate_centroid(int c_index, int d_index,vector<vector<float>> vIn)
-{
-    int count=0;
-    float res=0;
-    for(int i=0;i<(d_index+2);i++)
-    {
-        if(vIn[i][c_index]>0)
-        {
-            count ++;
-            res += vIn[i][c_index];
-        }
-
-    }
-    res = res/count;
-    centroids[c_index] =res;
-}
-
-//function to print the members of a cluster
-void KMeans::print_members(int ind_centroid)
-{
-    // cout<<indices[0][0]<<endl;
-    // cout<<indices[0][1]<<endl;
-    vector<int>::iterator it;
-    cout<<"These are the members of cluster: " << ind_centroid <<endl;
-    for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it) cout<<data[*it]<<" ";
-    cout<<endl;
-
-}
-
-//function that return the members of a cluster
-vector<float> KMeans::get_members(int ind_centroid)
-{
-    vector<float> sub;
-    vector<int>::iterator it;
-    for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it)
-     sub.push_back(data[*it]);
-
-    return sub;
-}
-
-//function that returns the indices of the cluster members.
-//Note that the indices are related to the original dataset
-vector<int> KMeans::get_index_members(int ind_centroid)
-{
-    vector<int> sub;
-    vector<int>::iterator it;
-    for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it)
-     sub.push_back(*it);
-
-    return sub;
-}
 
 
 #endif // KMEANS_H
diff --git a/obj/Debug/kmeans.o b/obj/Debug/kmeans.o
diff --git a/obj/Debug/main.o b/obj/Debug/main.o