Skip to content

Commit

Permalink
adding the cpp file
Browse files Browse the repository at this point in the history
  • Loading branch information
borgestassio committed Apr 24, 2022
1 parent 382a046 commit 2cb7286
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 180 deletions.
1 change: 1 addition & 0 deletions KMeans_cpp.cbp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
<Add option="-Wall" />
<Add option="-fexceptions" />
</Compiler>
<Unit filename="kmeans.h" />
<Unit filename="main.cpp" />
<Extensions>
<lib_finder disable_auto="1" />
Expand Down
4 changes: 2 additions & 2 deletions KMeans_cpp.depend
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# depslib dependency file v1.0
1650835842 source:c:\users\avell 1513\cpp_projects\kmeans_cpp\main.cpp
1650837515 source:c:\users\avell 1513\cpp_projects\kmeans_cpp\main.cpp
<iostream>
<fstream>
<vector>
"kmeans.h"

1650835976 c:\users\avell 1513\cpp_projects\kmeans_cpp\kmeans.h
1650837906 c:\users\avell 1513\cpp_projects\kmeans_cpp\kmeans.h
<iostream>
<fstream>
<vector>
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
# KMeans_cpp
An algorithm to implement K Means clustering method in C++

An algorithm to implement K Means clustering method in C++.

Curently it only takes 1D dataset of floats. It'll be updated to take 3D dataset soon.
Binary file modified bin/Debug/KMeans_cpp.exe
Binary file not shown.
181 changes: 181 additions & 0 deletions kmeans.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#include "kmeans.h"



//The constructor initializes some variables
KMeans::KMeans(int n_clusters, int data_size_in)
{
num_clusters = n_clusters;
data_size = data_size_in;
vector<vector<float>> R (data_size, vector<float>(num_clusters));
static vector<vector<int>> indices (num_clusters, vector<int> (data_size, -1));
this->indices = indices;
};

//To find the initial centroids, "random" points are taken from the dataset
void KMeans::def_initial_centroids(vector<float> data_in, int data_size_in, int n_clusters, char method)
{
srand(time(0));
//TODO: Implement a check for data_size_in and n_clusters
vector<float>::iterator it;
data = data_in;
data_size = data_size_in;
num_clusters = n_clusters;
int index[num_clusters];

//First we initialize an array with n_clusters
for (int j=0;j<data_size;j++)
{
index[j] = j;
}
//now we shuffle it to get "random" centroids
shuffle(index,data_size);
// and we assign an initial value to the cluster
vector<int> temp;
for (int i=0;i<num_clusters;i++)
{
it = data_in.begin() + index[i];
temp.push_back(index[i]);
centroids.push_back(*it);
}
// indices.insert(indices.begin(),temp);
}

//The clustering function will assign everypoint of the dataset to a cluster and recalculate the centroid as the mean of everypoint on it

void KMeans::clustering(vector<float> data_in)
{
vector<vector<float>> R (data_size, vector<float>(num_clusters));
vector<float> ctemp;
vector<float> sub_vector;
vector<float>::iterator it_d;
vector<float>::iterator it_c;
ctemp = centroids;
vector<vector<float>> dists(data_size, vector<float> (num_clusters, 0));
int min_ind;
float min_dis;


R.insert(R.begin(), ctemp);

//Calculate all distances between the centroids and the data points
int n =0, i=0;
for (it_d = data_in.begin();it_d<data_in.end();it_d++)
{
i=0;
for(it_c = centroids.begin();it_c<centroids.end();it_c++)
{
dists[n][i] = distance(*it_d,*it_c);
i++;
}
//Find the closest centroid and assign the data point to it
sub_vector.reserve(num_clusters);
sub_vector = dists[n];
vector<float>::iterator result = min_element(sub_vector.begin(), sub_vector.end());
min_ind = std::distance(sub_vector.begin(), result);
min_dis = *result;
R[n+1][min_ind]=data_in[n];
indices[min_ind][n] = n;
recalculate_centroid(min_ind,n,R);

n++;
}
//clear the '-1' from the vector
vector<int>::iterator ind;
for(int i = 0;i<num_clusters;i++)
{
do{
ind = find(indices[i].begin(), indices[i].end(), -1);
if(ind != indices[i].end() ) indices[i].erase(ind);
}while(ind != indices[i].end() );
}

}

//This function will print the centroids
void KMeans::print_centroids()
{
vector<float>::iterator it;
cout<<"There are " << num_clusters << " centroids :"<<endl;
for (it = centroids.begin(); it != centroids.end(); ++it)
cout<<*it<<" ";

cout<<endl;
}

//This function is auxiliary to define the initial centroids
void KMeans::shuffle(int *arr, size_t n)
{
if (n > 1)
{
size_t i;
srand(time(NULL));
for (i = 0; i < n - 1; i++)
{
size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
int t = arr[j];
arr[j] = arr[i];
arr[i] = t;
}
}
}

//function to calculate the distance betweem two points
float KMeans::distance(float in1, float in2)
{
return abs(abs(in1)-abs(in2));

}

//function to update the centroid based
void KMeans::recalculate_centroid(int c_index, int d_index,vector<vector<float>> vIn)
{
int count=0;
float res=0;
for(int i=0;i<(d_index+2);i++)
{
if(vIn[i][c_index]>0)
{
count ++;
res += vIn[i][c_index];
}

}
res = res/count;
centroids[c_index] =res;
}

//function to print the members of a cluster
void KMeans::print_members(int ind_centroid)
{
// cout<<indices[0][0]<<endl;
// cout<<indices[0][1]<<endl;
vector<int>::iterator it;
cout<<"These are the members of cluster: " << ind_centroid <<endl;
for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it) cout<<data[*it]<<" ";
cout<<endl;

}

//function that return the members of a cluster
vector<float> KMeans::get_members(int ind_centroid)
{
vector<float> sub;
vector<int>::iterator it;
for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it)
sub.push_back(data[*it]);

return sub;
}

//function that returns the indices of the cluster members.
//Note that the indices are related to the original dataset
vector<int> KMeans::get_index_members(int ind_centroid)
{
vector<int> sub;
vector<int>::iterator it;
for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it)
sub.push_back(*it);

return sub;
}
177 changes: 0 additions & 177 deletions kmeans.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,183 +42,6 @@ class KMeans
vector<int> get_index_members(int ind_centroid); // return the indices of the cluster members, indices are related to the original dataset
};

//The constructor initializes some variables
KMeans::KMeans(int n_clusters, int data_size_in)
{
num_clusters = n_clusters;
data_size = data_size_in;
vector<vector<float>> R (data_size, vector<float>(num_clusters));
static vector<vector<int>> indices (num_clusters, vector<int> (data_size, -1));
this->indices = indices;
};

//To find the initial centroids, "random" points are taken from the dataset
void KMeans::def_initial_centroids(vector<float> data_in, int data_size_in, int n_clusters, char method)
{
srand(time(0));
//TODO: Implement a check for data_size_in and n_clusters
vector<float>::iterator it;
data = data_in;
data_size = data_size_in;
num_clusters = n_clusters;
int index[num_clusters];

//First we initialize an array with n_clusters
for (int j=0;j<data_size;j++)
{
index[j] = j;
}
//now we shuffle it to get "random" centroids
shuffle(index,data_size);
// and we assign an initial value to the cluster
vector<int> temp;
for (int i=0;i<num_clusters;i++)
{
it = data_in.begin() + index[i];
temp.push_back(index[i]);
centroids.push_back(*it);
}
// indices.insert(indices.begin(),temp);
}

//The clustering function will assign everypoint of the dataset to a cluster and recalculate the centroid as the mean of everypoint on it

void KMeans::clustering(vector<float> data_in)
{
vector<vector<float>> R (data_size, vector<float>(num_clusters));
vector<float> ctemp;
vector<float> sub_vector;
vector<float>::iterator it_d;
vector<float>::iterator it_c;
ctemp = centroids;
vector<vector<float>> dists(data_size, vector<float> (num_clusters, 0));
int min_ind;
float min_dis;


R.insert(R.begin(), ctemp);

//Calculate all distances between the centroids and the data points
int n =0, i=0;
for (it_d = data_in.begin();it_d<data_in.end();it_d++)
{
i=0;
for(it_c = centroids.begin();it_c<centroids.end();it_c++)
{
dists[n][i] = distance(*it_d,*it_c);
i++;
}
//Find the closest centroid and assign the data point to it
sub_vector.reserve(num_clusters);
sub_vector = dists[n];
vector<float>::iterator result = min_element(sub_vector.begin(), sub_vector.end());
min_ind = std::distance(sub_vector.begin(), result);
min_dis = *result;
R[n+1][min_ind]=data_in[n];
indices[min_ind][n] = n;
recalculate_centroid(min_ind,n,R);

n++;
}
//clear the '-1' from the vector
vector<int>::iterator ind;
for(int i = 0;i<num_clusters;i++)
{
do{
ind = find(indices[i].begin(), indices[i].end(), -1);
if(ind != indices[i].end() ) indices[i].erase(ind);
}while(ind != indices[i].end() );
}

}

//This function will print the centroids
void KMeans::print_centroids()
{
vector<float>::iterator it;
cout<<"There are " << num_clusters << " centroids :"<<endl;
for (it = centroids.begin(); it != centroids.end(); ++it)
cout<<*it<<" ";

cout<<endl;
}

//This function is auxiliary to define the initial centroids
void KMeans::shuffle(int *arr, size_t n)
{
if (n > 1)
{
size_t i;
srand(time(NULL));
for (i = 0; i < n - 1; i++)
{
size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
int t = arr[j];
arr[j] = arr[i];
arr[i] = t;
}
}
}

//function to calculate the distance betweem two points
float KMeans::distance(float in1, float in2)
{
return abs(abs(in1)-abs(in2));

}

//function to update the centroid based
void KMeans::recalculate_centroid(int c_index, int d_index,vector<vector<float>> vIn)
{
int count=0;
float res=0;
for(int i=0;i<(d_index+2);i++)
{
if(vIn[i][c_index]>0)
{
count ++;
res += vIn[i][c_index];
}

}
res = res/count;
centroids[c_index] =res;
}

//function to print the members of a cluster
void KMeans::print_members(int ind_centroid)
{
// cout<<indices[0][0]<<endl;
// cout<<indices[0][1]<<endl;
vector<int>::iterator it;
cout<<"These are the members of cluster: " << ind_centroid <<endl;
for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it) cout<<data[*it]<<" ";
cout<<endl;

}

//function that return the members of a cluster
vector<float> KMeans::get_members(int ind_centroid)
{
vector<float> sub;
vector<int>::iterator it;
for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it)
sub.push_back(data[*it]);

return sub;
}

//function that returns the indices of the cluster members.
//Note that the indices are related to the original dataset
vector<int> KMeans::get_index_members(int ind_centroid)
{
vector<int> sub;
vector<int>::iterator it;
for (it = this->indices[ind_centroid].begin(); it != this->indices[ind_centroid].end(); ++it)
sub.push_back(*it);

return sub;
}


#endif // KMEANS_H
Binary file added obj/Debug/kmeans.o
Binary file not shown.
Binary file modified obj/Debug/main.o
Binary file not shown.

0 comments on commit 2cb7286

Please sign in to comment.