-
Notifications
You must be signed in to change notification settings - Fork 0
/
myKmeans.py
70 lines (55 loc) · 2.37 KB
/
myKmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
#### Generate random data
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=20, centers=5, n_features=2,
random_state=3)
import numpy as np
import matplotlib.pyplot as plt
#### Inertia is the mean squared distance between each instance and its closest centroid.
def inertia(X, centroids, labels):
sqd_list=[]
for i in range(3):
sqd_list.append((([X[labels==j] for j in range(3)][i]-centroids[i])**2).mean())
inertia = (np.mean(sqd_list))
return inertia
#### My k_means function
def k_means(X, k, n_iter):
#### Initialise centroids randomly using numpy randint to select k number of integers from 0 to n.
centroids = X[np.random.randint(low=0, high=len(X), size=k)]
#### Enter a for loop
for i in range(1,n_iter):
inertia_list = []
centroid_list = []
label_list = []
#### Enter while loop.
while True:
centroid_list.append(centroids)
#### Calculate the euclidean distance between each point and each of the centroids, and assign label.
distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
labels = np.argmin(distances, axis=0)
label_list.append(labels)
#### Find the mean of each cluster. These are my new centroids
new_centroids = np.array([X[labels==i].mean(axis=0) for i in range(k)])
inertia_list.append(inertia(X, centroids, labels))
#### Condition to break the while loop is when my new_centroids are equal to the set centroids
if np.all(centroids == new_centroids):
break
centroids = new_centroids
#### Find the index of the minimum inertia
idx = np.argmin(inertia_list)
centroids = centroid_list[idx]
labels = label_list[idx]
return centroids, labels
#### Function to plot my clusters and centroids
def plot_kmeans(X, centroids, labels):
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='rainbow')
plt.scatter(centroids[:,0] ,centroids[:,1], color='black')
##### Function to plot my clusters and centroids
#def elbow_method(X, Kmax):
# inertias = []
# K = range(2,Kmax)
##### Calculate the inertias
# for k in K:
# centroids, labels = k_means(X,k)
# inertias.append(inertia(X,centroids))
#