10.Write a program to build predictive model using classification techniques


import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

df= sns.load_dataset('titanic')

df

df.isnull().sum()

cols_to_drop = ['who','adult_male','deck','embark_town','alive','alone']
df = df.drop(cols_to_drop, axis=1)

df

sns.heatmap(df.isnull())


# replace missing values with interpolated values
df['age'] = df['age'].interpolate()

sns.heatmap(df.isnull())

df.info()

cols_to_drop = ['class']
df = df.drop(cols_to_drop, axis=1)
df.info()

# conver categorical columns to binary
# to do that create dummy columns for the you want to convert concatenate with the dataframe, then drop existinc columns
embarkedcolumndummy = pd.get_dummies(df['embarked'])
sexcolumndummy = pd.get_dummies(df['sex'])

df = pd.concat((df,embarkedcolumndummy,sexcolumndummy),axis=1)

df.head(10)

# drop the redundant columns thus converted
df = df.drop(['sex','embarked'],axis=1)
df.head(10)

#seperate dataframe int x and y values
x = df.values
y = df['survived'].values

# delete survived colums from x
x= np.delete(x,0,axis=1)
df

#Split the dataset
from sklearn.model_selection import train_test_split
x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=0)

# Buid Decision tree classifier
from sklearn import tree
df_clf =tree.DecisionTreeClassifier(max_depth=5) #build
df_clf.fit(x_train, y_train) #train

df_clf.score(x_test,y_test) # make prediction
y_pred = df_clf.predict(x_test)
df_clf.score(x_test,y_test)

from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)

# build randomForest classifier
from sklearn import ensemble
rf_clf = ensemble.RandomForestClassifier(n_estimators=100)
rf_clf.fit(x_train, y_train)
rf_clf.score(x_test,y_test)

# build gradient boosting classifier
gb_clf = ensemble.GradientBoostingClassifier()
gb_clf.fit(x_train, y_train)
gb_clf.score(x_test,y_test)

# naive bayes classifier
from sklearn.naive_bayes import GaussianNB
nb_clf = GaussianNB()
nb_clf.fit(x_train, y_train)
nb_clf.score(x_test,y_test)

#K-nearest neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(x_train, y_train)
knn_clf.score(x_test,y_test)

from sklearn.linear_model import LogisticRegression
lr_clf = LogisticRegression()
lr_clf.fit(x_train, y_train)
lr_clf.score(x_test,y_test)


# SVM classifier
from sklearn.svm import SVC
sv_clf = SVC(probability = True)
sv_clf.fit(x_train, y_train)
sv_clf.score(x_test,y_test)

Post a Comment

1 Comments