Skip to content

Commit

Permalink
Merge pull request #4 from Stealeristaken/deneme
Browse files Browse the repository at this point in the history
added optinal pca and optinal iteration
  • Loading branch information
cobanov authored Mar 13, 2024
2 parents 0d69d97 + 9434fdd commit 1adc725
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
7 changes: 5 additions & 2 deletions tasnif/calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,13 @@ def calculate_pca(embeddings, pca_dim):
return pca_embeddings


def calculate_kmeans(pca_embeddings, num_classes):
def calculate_kmeans(pca_embeddings, num_classes, iter=10):
"""
The function `calculate_kmeans` performs KMeans clustering on PCA embeddings data to assign
labels and centroids.
:param pca_embeddings: The `pca_embeddings` parameter is a NumPy array containing the data points.
:param num_classes: The `num_classes` parameter is an integer that specifies the number of clusters.
:param iter: The `iter` parameter is an integer that specifies the number of iterations for the KMeans algorithm. Default is 10. Should be a positive integer.
"""

if not isinstance(pca_embeddings, np.ndarray):
Expand All @@ -64,7 +67,7 @@ def calculate_kmeans(pca_embeddings, num_classes):
)

try:
centroid, labels = kmeans2(data=pca_embeddings, k=num_classes, minit="points")
centroid, labels = kmeans2(data=pca_embeddings, k=num_classes, minit="points", iter=iter)
counts = np.bincount(labels)
info("KMeans calculated.")
return centroid, labels, counts
Expand Down
16 changes: 11 additions & 5 deletions tasnif/tasnif.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,26 @@ def read(self, folder_path):
self.image_paths = read_images_from_directory(folder_path)
self.images = read_with_pil(self.image_paths)

def calculate(self):
def calculate(self, pca=True, iter=10):
"""
The function calculates embeddings, performs PCA, and applies K-means clustering to the
embeddings. It will not perform these operations if no images have been read.
:param pca: The `pca` parameter is a boolean that specifies whether to perform PCA or not. Default is True
:param iter: The `iter` parameter is an integer that specifies the number of iterations for the KMeans algorithm. Default is 10.
"""

if not self.images:
raise ValueError("The images list can not be empty. Please call the read method before calculating.")

self.embeddings = get_embeddings(use_gpu=self.use_gpu, images=self.images)
self.pca_embeddings = calculate_pca(self.embeddings, self.pca_dim)
self.centroid, self.labels, self.counts = calculate_kmeans(
self.pca_embeddings, self.num_classes
)
if pca:
self.pca_embeddings = calculate_pca(self.embeddings, self.pca_dim)
self.centroid, self.labels, self.counts = calculate_kmeans(self.pca_embeddings, self.num_classes, iter = iter)
else:
self.centroid, self.labels, self.counts = calculate_kmeans(
self.embeddings, self.num_classes, iter = iter
)

def export(self, output_folder="./"):
"""
Expand Down

0 comments on commit 1adc725

Please sign in to comment.