-
Notifications
You must be signed in to change notification settings - Fork 0
/
k_means.py
64 lines (60 loc) · 2.13 KB
/
k_means.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# %%
# [task] Use K-MEANS to cluster the data
#
from random import uniform
from math import dist
n_clusters = k;max_iter = 6;iteration = 0;
prev_centroids = None
p_data = p_data.astype(np.float64)
# 1. Initialize the centroids
min = np.min(p_data, axis=0)
max = np.max(p_data, axis=0)
centroids = []
for _ in range(n_clusters):
centroids.append(
np.random.uniform(low=min, high=max, size=p_data.shape[1])
)
datas = [list(x)[1:] for x in p_data.itertuples()]
while np.not_equal(centroids, prev_centroids).any() and iteration < max_iter:
# 2. Sort point and assign to the nearest centroid
prev_centroids = centroids
sorted_points = [[] for _ in range(n_clusters)]
for x in datas:
dists = [dist(x, centroids[i]) for i in range(n_clusters)]
sorted_points[dists.index(np.min(dists))].append(x)
centroids = [np.mean(sorted_points[i], axis=0) for i in range(n_clusters)]
for i, centroid in enumerate(centroids):
if (np.isnan(centroid).any()):
centroids[i] = prev_centroids[i]
iteration += 1
print("K-means status: {}/{}".format(iteration, max_iter))
print("K-mean done")
# %% Prediction after training
y_pred = np.empty((0))
for x in datas:
dists = [dist(x, centroids[i]) for i in range(n_clusters)]
y_pred = np.append(y_pred, dists.index(np.min(dists)))
# Associate the labels to the clusters
labels = {
'LAYING': 0,
'SITTING': 1,
'STANDING': 2,
'WALKING': 3,
'WALKING_DOWNSTAIRS': 4,
'WALKING_UPSTAIRS': 5
}
y_train_id = [labels[x] for x in y_train]
# Test the shifting of the labels to find the best shift
fit = np.zeros(k)
for i in range(k):
# Find how much labels are equals with shift of i
fit[i] = np.count_nonzero(np.equal((y_pred+i)%k,y_train_id))
# %%
from sklearn.metrics import confusion_matrix
y_pred = (y_pred + 2) % k
labels_name = ['LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS', 'WALKING_UPSTAIRS']
y_pred = y_pred.astype(np.int64)
y_pred_labels = [labels_name[x] for x in y_pred]
p_data = p_data.astype(np.float64)
plts.scatter_with_labels(p_data, y_train)
plts.scatter_with_labels(p_data, y_pred_labels)