-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIris-Dataset_Subplots
47 lines (46 loc) · 1.79 KB
/
Iris-Dataset_Subplots
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# This program aims to find which combination of features from the iris dataset will create the most distinct groups to use for KNN
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt
# load data, cast as numpy array
iris = load_iris()
arr = np.array(iris.data)
# separate the data into the three types of irises
setosa = arr[:50]
versicolor = arr[50:100]
virginica = arr[100:]
# function: creates coordinates based on the features given
target = {"sepal_length": 0, "sepal_width": 1, "petal_length": 2, "petal_width": 3}
def points(flower, first_char, second_char):
x = []
y = []
for i in flower:
x.append(i[target[first_char]])
y.append(i[target[second_char]])
return [x, y]
# experiment with which combination of features makes the most distinct groups on the scatterplot (6 possible combinations)
combos = [["sepal_length", "sepal_width"],
["sepal_length", "petal_length"],
["sepal_length", "petal_width"],
["sepal_width", "petal_length"],
["sepal_width", "petal_width"],
["petal_length", "petal_width"]]
# create subplots to fit all of the six scatterplots together
name_arrays = {"setosa": setosa, "versicolor": versicolor, "virginica": virginica}
fig, axs = plt.subplots(2, 3)
row = 0
column = 0
for combo in combos:
for string, array in name_arrays.items():
coord = points(array, combo[0], combo[1])
graph = axs[row-1][column-1]
graph.scatter(coord[0], coord[1], label=string if combos.index(combo) == 0 else "")
graph.set_title(combo[0] + " by " + combo[1])
graph.legend()
column += 1
if column == 3:
row = 1
column = 0
plt.suptitle("Comparison of Iris Features")
plt.show()
plt.savefig("iris_classification_char_comparison.png")