import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
df= sns.load_dataset('titanic')
df
df.isnull().sum()
cols_to_drop = ['who','adult_male','deck','embark_town','alive','alone']
df = df.drop(cols_to_drop, axis=1)
df
sns.heatmap(df.isnull())
# replace missing values with interpolated values
df['age'] = df['age'].interpolate()
sns.heatmap(df.isnull())
df.info()
cols_to_drop = ['class']
df = df.drop(cols_to_drop, axis=1)
df.info()
# conver categorical columns to binary
# to do that create dummy columns for the you want to convert concatenate with the dataframe, then drop existinc columns
embarkedcolumndummy = pd.get_dummies(df['embarked'])
sexcolumndummy = pd.get_dummies(df['sex'])
df = pd.concat((df,embarkedcolumndummy,sexcolumndummy),axis=1)
df.head(10)
# drop the redundant columns thus converted
df = df.drop(['sex','embarked'],axis=1)
df.head(10)
#seperate dataframe int x and y values
x = df.values
y = df['survived'].values
# delete survived colums from x
x= np.delete(x,0,axis=1)
df
#Split the dataset
from sklearn.model_selection import train_test_split
x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=0)
# Buid Decision tree classifier
from sklearn import tree
df_clf =tree.DecisionTreeClassifier(max_depth=5) #build
df_clf.fit(x_train, y_train) #train
df_clf.score(x_test,y_test) # make prediction
y_pred = df_clf.predict(x_test)
df_clf.score(x_test,y_test)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)
# build randomForest classifier
from sklearn import ensemble
rf_clf = ensemble.RandomForestClassifier(n_estimators=100)
rf_clf.fit(x_train, y_train)
rf_clf.score(x_test,y_test)
# build gradient boosting classifier
gb_clf = ensemble.GradientBoostingClassifier()
gb_clf.fit(x_train, y_train)
gb_clf.score(x_test,y_test)
# naive bayes classifier
from sklearn.naive_bayes import GaussianNB
nb_clf = GaussianNB()
nb_clf.fit(x_train, y_train)
nb_clf.score(x_test,y_test)
#K-nearest neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(x_train, y_train)
knn_clf.score(x_test,y_test)
from sklearn.linear_model import LogisticRegression
lr_clf = LogisticRegression()
lr_clf.fit(x_train, y_train)
lr_clf.score(x_test,y_test)
# SVM classifier
from sklearn.svm import SVC
sv_clf = SVC(probability = True)
sv_clf.fit(x_train, y_train)
sv_clf.score(x_test,y_test)
1 Comments
Thanks
ReplyDelete