-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers2.py
58 lines (47 loc) · 1.66 KB
/
helpers2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
def simulate_data(n = 500, features = 10, centroids = 3):
'''
Simulates n data points, each with number of features equal to features, with a number of centers equal to centroids
INPUT (defaults)
n = number of rows (500)
features = number of columns (10)
centroids = number of centers (3)
Output
dataset = a dataset with the the specified characteristics
'''
dataset, y = make_blobs(n_samples=n, n_features=features, centers=centroids, random_state=42)
return dataset
def plot_data(data, labels):
'''
Plot data with colors associated with labels
'''
fig = plt.figure();
ax = Axes3D(fig)
ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=labels, cmap='tab10');
data = simulate_data(200, 5, 4)
def get_kmeans_score(data, center):
'''
returns the kmeans score regarding SSE for points to centers
INPUT:
data - the dataset you want to fit kmeans to
center - the number of centers you want (the k value)
OUTPUT:
score - the SSE score for the kmeans model fit to the data
'''
#instantiate kmeans
kmeans = KMeans(n_clusters=center)
# Then fit the model to your data using the fit method
model = kmeans.fit(data)
# Obtain a score related to the model fit
score = np.abs(model.score(data))
return score
def fit_mods():
scores = []
centers = list(range(1,11))
for center in centers:
scores.append(get_kmeans_score(data, center))
return centers, scores