import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
data = {
'Hours_Studied': [1, 2, 3, 4, 6, 7, 8, 9,8,10,12,11,13,14],
'Hours_Slept': [7, 6, 5, 5, 3, 3, 2, 2,7,8,9,5,10,12]
}
df = pd.DataFrame(data)
print(df)
Hours_Studied Hours_Slept
0 1 7
1 2 6
2 3 5
3 4 5
4 6 3
5 7 3
6 8 2
7 9 2
8 8 7
9 10 8
10 12 9
11 11 5
12 13 10
13 14 12
plt.scatter(df['Hours_Studied'], df['Hours_Slept'])
# "viridis", "plasma", "inferno", "magma", "cividis"
plt.xlabel("Hours Studied")
plt.ylabel("Hours Slept")
plt.title("Before K-Means Clustering")
plt.show()
<Image>
kmeans = KMeans(n_clusters=3, random_state=0)
df['Cluster'] = kmeans.fit_predict(df[['Hours_Studied', 'Hours_Slept']])
print("Cluster Centers:")
print(kmeans.cluster_centers_)
Cluster Centers:
[[ 8.16666667 3.66666667]
[12.25 9.75 ]
[ 2.5 5.75 ]]
plt.scatter(df['Hours_Studied'], df['Hours_Slept'],
c=df['Cluster'], cmap='viridis', s=100)
# Plot cluster centers
centers = kmeans.cluster_centers_
plt.scatter(centers[:,0], centers[:,1], c='red', s=200, marker='X')
plt.xlabel("Hours Studied")
plt.ylabel("Hours Slept")
plt.title("K-Means Clustering Example")
plt.show()
<Image>