Online Learning Platform

Data Analysis Using Python > Classifications > KNN Classification

import pandas as pd

from sklearn.neighbors import KNeighborsClassifier

data = {

'Hours_Studied': [1, 2, 3, 6, 7, 8],

'Hours_Slept': [5, 4, 6, 2, 3, 2],

'Result': ['Fail', 'Fail', 'Fail', 'Pass', 'Pass', 'Pass']

}

df = pd.DataFrame(data)

print(df)

   Hours_Studied  Hours_Slept Result
0              1            5   Fail
1              2            4   Fail
2              3            6   Fail
3              6            2   Pass
4              7            3   Pass
5              8            2   Pass

X = df[['Hours_Studied', 'Hours_Slept']]

y = df['Result']

knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(X, y)

new_data = pd.DataFrame({'Hours_Studied': [4], 'Hours_Slept': [4]})

prediction = knn.predict(new_data)

print("Predicted Result:", prediction[0])

Predicted Result: Fail

distances, indices = knn.kneighbors(new_data)

print("Distances:", distances)

print("Indices:", indices)

Distances: [[2.         2.23606798 2.82842712]]
Indices: [[1 2 3]]

from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

df['Result_enc'] = encoder.fit_transform(df['Result'])

import matplotlib.pyplot as plt

plt.scatter(df['Hours_Studied'], df['Hours_Slept'], c=df['Result_enc'])

plt.show()

new_pred=1

if prediction[0] == 'Fail':

new_pred = 0

X_all = pd.concat([X, new_data], ignore_index=True)

y_all = pd.concat([df['Result_enc'], pd.Series([new_pred])], ignore_index=True)

plt.scatter(X_all['Hours_Studied'] , X_all['Hours_Slept'] , c=y_all )

plt.text(x=new_data['Hours_Studied']-1.7, y=new_data['Hours_Slept']-0.7, s=f"new point, class: {new_pred}")

plt.show()

Random Forest

Feedback

ABOUT

Statlearner

Statlearner STUDY

Statlearner