-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathk_means clustring problem .py
128 lines (61 loc) · 2.04 KB
/
k_means clustring problem .py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python
# coding: utf-8
# In[130]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# In[131]:
dataset=pd.read_csv('data.kmeans_problem.csv')
dataset.info()
# In[132]:
x=dataset[['Annual Income (k$)', 'Spending Score (1-100)']]
# In[133]:
x.info()
# In[134]:
from sklearn.cluster import KMeans
# In[135]:
wcss = []
# In[136]:
for i in range(1, 11):
kmeans = KMeans(n_clusters = i,init = 'k-means++', max_iter= 300,n_init = 10,random_state = 0)
kmeans.fit(x)
wcss.append(kmeans.inertia_)
plt.plot(range(1, 11),wcss)
plt.title('The Elbow Methode')
plt.xlabel('Number of cluster')
plt.ylabel('wcss')
plt.show()
# In[137]:
kmeans = KMeans(n_clusters = 5,init = 'k-means++', max_iter= 300,n_init = 10,random_state = 0)
# In[138]:
y_kmeans = kmeans.fit_predict(x)
# In[139]:
plt.scatter(x[y_kmeans == 0,0], x[y_kmeans==0,1], s = 15, c= 'red', label = 'Cluster_1')
plt.scatter(x[y_kmeans == 1,0], x[y_kmeans==1,1], s = 15, c= 'blue', label = 'Cluster_2')
plt.scatter(x[y_kmeans == 2,0], x[y_kmeans==2,1], s = 15, c= 'green', label = 'Cluster_3')
plt.scatter(x[y_kmeans == 3,0], x[y_kmeans==3,1], s = 15, c= 'cyan', label = 'Cluster_4')
plt.scatter(x[y_kmeans == 4,0], x[y_kmeans==4,1], s = 15, c= 'magenta', label = 'Cluster_5')
# In[ ]:
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c ='yellow', label = 'Centroids')
plt.title('Clusters of clients')
plt.xlabel('Annual income')
plt.ylabel('spending score')
plt.legend()
plt.show()
# In[140]:
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c ='yellow', label = 'Centroids')
plt.title('Clusters of clients')
plt.xlabel('Annual income')
plt.ylabel('spending score')
plt.legend()
plt.show()
# In[141]:
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c ='yellow', label = 'Centroids')
plt.title('Clusters of clients')
plt.xlabel('Annual income')
plt.ylabel('spending score')
plt.legend()
plt.show()
# In[ ]:
# In[ ]:
# In[ ]: