import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
Load Dataset:
star_df = pd.read_csv('star_classification.csv')
star_df.head()
star_df.columns
Drop Unwanted Columns:
star_df = star_df[['alpha', 'delta', 'u', 'g', 'r', 'i', 'z','class','redshift']]
star_df.head()
EDA:
galaxy = star_df[star_df['class']=='GALAXY']
star = star_df[star_df['class']=='STAR']
qso = star_df[star_df['class']=='QSO']
Alpha Vs Redshift:
plt.figure(figsize=(9,7))
sns.scatterplot(x='alpha',y='redshift',data=galaxy,color='r');
plt.title("Alpha Vs Redshift for Galaxy",{'fontsize':20});

plt.figure(figsize=(9,7))
sns.scatterplot(x='alpha',y='redshift',data=star,color='r');
plt.title("Alpha Vs Redshift for Star",{'fontsize':20});
plt.figure(figsize=(9,7))
sns.scatterplot(x='alpha',y='redshift',data=qso,color='r');
plt.title("Alpha Vs Redshift for QSO",{'fontsize':20});
Green filter Vs Red Filter:
plt.figure(figsize=(9,7))
sns.scatterplot(x='g',y='r',data=galaxy,color='b');
plt.title("Green filter Vs Red Filter for Galaxy",{'fontsize':20});
plt.figure(figsize=(9,7))
sns.scatterplot(x='g',y='r',data=star,color='b');
plt.title("Green filter Vs Red Filter for Star",{'fontsize':20});
plt.figure(figsize=(9,7))
sns.scatterplot(x='g',y='r',data=qso,color='b');
plt.title("Green filter Vs Red Filter for QSO",{'fontsize':20});
Target Feature:
plt.figure(figsize=(8,7))
sns.countplot(star_df['class']);
plt.title("Distribution of Target Feature",{'fontsize':20});
Data Spliting:
X = star_df.drop('class',axis=1)
y = star_df['class']
Conver Categorical column into numeric:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
y = encoder.fit_transform(y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
Data Scaling:
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)
Model Training:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import math
models = {
KNeighborsClassifier(n_neighbors=3):'K-Neighbors Classifier',
SVC():"Support Vector Machine",
RandomForestClassifier():'Random Forest Classifier'
}
for m in models.keys():
m.fit(X_train,y_train)
for model,name in models.items():
print(f"Accuracy Score for {name} is : ",math.floor(model.score(X_test,y_test)),"%")
Random Forest gives higher accuracy on test data. So, we choose it for prediction.
Classification Report for Each Model:
Class 0: Galaxy
Class 1: QSO
Class 2: Star
from sklearn.metrics import classification_report
for model,name in models.items():
y_pred = model.predict(X_test)
print(f"Classification Report for : {name}")
print(classification_report(y_test,y_pred))
Thank You ):