Commit 6c58a1df authored by bjanadi's avatar bjanadi

Adding Classifiers

parent 95b63b05
Pipeline #906 canceled with stages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv('iris.csv')
print(data)
x= data.iloc[:,:-1].values
y=data.iloc[:,-1].values
print(x)
print(y)
from sklearn.preprocessing import LabelEncoder
ly = LabelEncoder()
y = ly.fit_transform(y)
sns.set()
sns.pairplot(data[['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']],
hue="species", diag_kind="kde")
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(x_train,y_train)
y_pred_test = gnb.predict(x_test)
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test,y_pred_test)
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver = 'lbfgs',multi_class='auto')
logreg.fit(x_train,y_train)
y_pred = logreg.predict(x_test)
from sklearn.metrics import accuracy_score
acc1 = accuracy_score(y_test,y_pred)
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(x_train,y_train)
y_pred2 = dt.predict(x_test)
acc2 = accuracy_score(y_test,y_pred2)
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=3,algorithm='ball_tree')
clf.fit(x_train,y_train)
y_pred3 = clf.predict(x_test)
acc3 = accuracy_score(y_test,y_pred3)
from sklearn.svm import SVC
svc1 = SVC(C=50,kernel='rbf',gamma=1)
svc1.fit(x_train,y_train)
y_pred4 = svc1.predict(x_test)
from sklearn.metrics import accuracy_score
acc4= accuracy_score(y_test,y_pred4)
# print accuracy
print("Accuracy scores")
print("Decision Tree Classifier: " ,acc2)
print("KNN Classifier: " ,acc)
print("SVM Classifier: " ,acc4)
print("Naive Bayes Classifier: " , acc)
print("Logistic Regression: " ,acc1)
# importing necessary libraries
from sklearn import datasets
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
# loading the iris dataset
iris = datasets.load_iris()
# X -> features, y -> label
X = iris.data
y = iris.target
print(X , y)
# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)
# training a DescisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)
dtree_predictions = dtree_model.predict(X_test)
#accuracy
DecisionAccuracy= dtree_model.score(X_test, y_test)
# creating a confusion matrix
cm = confusion_matrix(y_test, dtree_predictions)
# training a linear SVM classifier
from sklearn.svm import SVC
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)
# model accuracy for X_test
SVMaccuracy = svm_model_linear.score(X_test, y_test)
# creating a confusion matrix
cm = confusion_matrix(y_test, svm_predictions)
# training a KNN classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 7).fit(X_train, y_train)
# accuracy on X_test
KNNaccuracy = knn.score(X_test, y_test)
# creating a confusion matrix
knn_predictions = knn.predict(X_test)
cm = confusion_matrix(y_test, knn_predictions)
# training a Naive Bayes classifier
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB().fit(X_train, y_train)
gnb_predictions = gnb.predict(X_test)
# accuracy on X_test
Bayesaccuracy = gnb.score(X_test, y_test)
# creating a confusion matrix
cm = confusion_matrix(y_test, gnb_predictions)
print("Accuracy scores")
print("Decision Tree Classifier: " , DecisionAccuracy)
print("KNN Classifier: " , KNNaccuracy)
print("SVM Classifier: " , SVMaccuracy)
print("Naive Bayes Classifier: " , Bayesaccuracy)
sepal_length,sepal_width,petal_length,petal_width,species
5.1,3.5,1.4,0.2,setosa
4.9,3,1.4,0.2,setosa
4.7,3.2,1.3,0.2,setosa
4.6,3.1,1.5,0.2,setosa
5,3.6,1.4,0.2,setosa
5.4,3.9,1.7,0.4,setosa
4.6,3.4,1.4,0.3,setosa
5,3.4,1.5,0.2,setosa
4.4,2.9,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5.4,3.7,1.5,0.2,setosa
4.8,3.4,1.6,0.2,setosa
4.8,3,1.4,0.1,setosa
4.3,3,1.1,0.1,setosa
5.8,4,1.2,0.2,setosa
5.7,4.4,1.5,0.4,setosa
5.4,3.9,1.3,0.4,setosa
5.1,3.5,1.4,0.3,setosa
5.7,3.8,1.7,0.3,setosa
5.1,3.8,1.5,0.3,setosa
5.4,3.4,1.7,0.2,setosa
5.1,3.7,1.5,0.4,setosa
4.6,3.6,1,0.2,setosa
5.1,3.3,1.7,0.5,setosa
4.8,3.4,1.9,0.2,setosa
5,3,1.6,0.2,setosa
5,3.4,1.6,0.4,setosa
5.2,3.5,1.5,0.2,setosa
5.2,3.4,1.4,0.2,setosa
4.7,3.2,1.6,0.2,setosa
4.8,3.1,1.6,0.2,setosa
5.4,3.4,1.5,0.4,setosa
5.2,4.1,1.5,0.1,setosa
5.5,4.2,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5,3.2,1.2,0.2,setosa
5.5,3.5,1.3,0.2,setosa
4.9,3.1,1.5,0.1,setosa
4.4,3,1.3,0.2,setosa
5.1,3.4,1.5,0.2,setosa
5,3.5,1.3,0.3,setosa
4.5,2.3,1.3,0.3,setosa
4.4,3.2,1.3,0.2,setosa
5,3.5,1.6,0.6,setosa
5.1,3.8,1.9,0.4,setosa
4.8,3,1.4,0.3,setosa
5.1,3.8,1.6,0.2,setosa
4.6,3.2,1.4,0.2,setosa
5.3,3.7,1.5,0.2,setosa
5,3.3,1.4,0.2,setosa
7,3.2,4.7,1.4,versicolor
6.4,3.2,4.5,1.5,versicolor
6.9,3.1,4.9,1.5,versicolor
5.5,2.3,4,1.3,versicolor
6.5,2.8,4.6,1.5,versicolor
5.7,2.8,4.5,1.3,versicolor
6.3,3.3,4.7,1.6,versicolor
4.9,2.4,3.3,1,versicolor
6.6,2.9,4.6,1.3,versicolor
5.2,2.7,3.9,1.4,versicolor
5,2,3.5,1,versicolor
5.9,3,4.2,1.5,versicolor
6,2.2,4,1,versicolor
6.1,2.9,4.7,1.4,versicolor
5.6,2.9,3.6,1.3,versicolor
6.7,3.1,4.4,1.4,versicolor
5.6,3,4.5,1.5,versicolor
5.8,2.7,4.1,1,versicolor
6.2,2.2,4.5,1.5,versicolor
5.6,2.5,3.9,1.1,versicolor
5.9,3.2,4.8,1.8,versicolor
6.1,2.8,4,1.3,versicolor
6.3,2.5,4.9,1.5,versicolor
6.1,2.8,4.7,1.2,versicolor
6.4,2.9,4.3,1.3,versicolor
6.6,3,4.4,1.4,versicolor
6.8,2.8,4.8,1.4,versicolor
6.7,3,5,1.7,versicolor
6,2.9,4.5,1.5,versicolor
5.7,2.6,3.5,1,versicolor
5.5,2.4,3.8,1.1,versicolor
5.5,2.4,3.7,1,versicolor
5.8,2.7,3.9,1.2,versicolor
6,2.7,5.1,1.6,versicolor
5.4,3,4.5,1.5,versicolor
6,3.4,4.5,1.6,versicolor
6.7,3.1,4.7,1.5,versicolor
6.3,2.3,4.4,1.3,versicolor
5.6,3,4.1,1.3,versicolor
5.5,2.5,4,1.3,versicolor
5.5,2.6,4.4,1.2,versicolor
6.1,3,4.6,1.4,versicolor
5.8,2.6,4,1.2,versicolor
5,2.3,3.3,1,versicolor
5.6,2.7,4.2,1.3,versicolor
5.7,3,4.2,1.2,versicolor
5.7,2.9,4.2,1.3,versicolor
6.2,2.9,4.3,1.3,versicolor
5.1,2.5,3,1.1,versicolor
5.7,2.8,4.1,1.3,versicolor
6.3,3.3,6,2.5,virginica
5.8,2.7,5.1,1.9,virginica
7.1,3,5.9,2.1,virginica
6.3,2.9,5.6,1.8,virginica
6.5,3,5.8,2.2,virginica
7.6,3,6.6,2.1,virginica
4.9,2.5,4.5,1.7,virginica
7.3,2.9,6.3,1.8,virginica
6.7,2.5,5.8,1.8,virginica
7.2,3.6,6.1,2.5,virginica
6.5,3.2,5.1,2,virginica
6.4,2.7,5.3,1.9,virginica
6.8,3,5.5,2.1,virginica
5.7,2.5,5,2,virginica
5.8,2.8,5.1,2.4,virginica
6.4,3.2,5.3,2.3,virginica
6.5,3,5.5,1.8,virginica
7.7,3.8,6.7,2.2,virginica
7.7,2.6,6.9,2.3,virginica
6,2.2,5,1.5,virginica
6.9,3.2,5.7,2.3,virginica
5.6,2.8,4.9,2,virginica
7.7,2.8,6.7,2,virginica
6.3,2.7,4.9,1.8,virginica
6.7,3.3,5.7,2.1,virginica
7.2,3.2,6,1.8,virginica
6.2,2.8,4.8,1.8,virginica
6.1,3,4.9,1.8,virginica
6.4,2.8,5.6,2.1,virginica
7.2,3,5.8,1.6,virginica
7.4,2.8,6.1,1.9,virginica
7.9,3.8,6.4,2,virginica
6.4,2.8,5.6,2.2,virginica
6.3,2.8,5.1,1.5,virginica
6.1,2.6,5.6,1.4,virginica
7.7,3,6.1,2.3,virginica
6.3,3.4,5.6,2.4,virginica
6.4,3.1,5.5,1.8,virginica
6,3,4.8,1.8,virginica
6.9,3.1,5.4,2.1,virginica
6.7,3.1,5.6,2.4,virginica
6.9,3.1,5.1,2.3,virginica
5.8,2.7,5.1,1.9,virginica
6.8,3.2,5.9,2.3,virginica
6.7,3.3,5.7,2.5,virginica
6.7,3,5.2,2.3,virginica
6.3,2.5,5,1.9,virginica
6.5,3,5.2,2,virginica
6.2,3.4,5.4,2.3,virginica
5.9,3,5.1,1.8,virginica
from csv import writer
import pandas as pd
from datetime import datetime
class Data:
data = '';
Read = [];
def __init__(self,data):
self.data= data;
def ReadData(self):
self.Read= pd.read_csv(self.data);
return self.Read;
def append_list_as_row(self,file_name, list_of_elem):
# Open file in append mode
with open(file_name, 'a+', newline='') as write_obj:
# Create a writer object from csv module
csv_writer = writer(write_obj)
# Add contents of list as last row in the csv file
csv_writer.writerow(list_of_elem)
\ No newline at end of file
import numpy as np
from sympy import fft
import GetData as gd
class FeatureExtraction:
list=[]
def __init__(self,list):
self.list= list;
def MinimumPeak(self,list):
min_value = None
for value in list:
if not min_value:
min_value = value
elif value < min_value:
min_value = value
return min_value
def MaximumPeak(self,list):
max_value = None
for value in list:
if not max_value:
max_value = value
elif value > max_value:
max_value = value
return max_value
def Mean(self,list):
return sum(list) / len(list)
def median(self,list):
n = len(list)
s = sorted(list)
return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None
def Variance(self,list):
return np.var(list);
def StandardDeviation(self,list):
return (np.var(list))**0.5;
def Energy(self,list):
transformed=fft(list,4);
array=np.array(transformed);
return max(np.absolute(array));
def RMS(self,list):
RMSTotal = 0.0;
for i in list:
RMSTotal+=(i**2)
return ((RMSTotal)** 0.5)
def most_frequent(self,List):
counter = 0
num = List[0]
for i in List:
curr_frequency = List.count(i)
if(curr_frequency> counter):
counter = curr_frequency
num = i
return num
def getFeatureSingle(self,list):
d= gd.GetData(list)
X= d.getX(list)
Y= d.getY(list)
Z=d.getZ(list)
Magnitudes=d.getMagnitude(list)
Activities= d.getActivities(list)
fe = FeatureExtraction(list);
feature=[];
feature.append(fe.MinimumPeak(X))
feature.append(fe.MinimumPeak(Y));
feature.append(fe.MinimumPeak(Z));
feature.append(fe.MaximumPeak(X));
feature.append(fe.MaximumPeak(Y));
feature.append(fe.MaximumPeak(Z));
feature.append(fe.Mean(X));
feature.append(fe.Mean(Y));
feature.append(fe.Mean(Z));
feature.append(fe.median(X));
feature.append(fe.median(Y));
feature.append(fe.median(Z));
feature.append(fe.Variance(X));
feature.append(fe.Variance(Y));
feature.append(fe.Variance(Z));
feature.append(fe.StandardDeviation(X));
feature.append(fe.StandardDeviation(Y));
feature.append(fe.StandardDeviation(Z));
feature.append(fe.RMS(X));
feature.append(fe.RMS(Y));
feature.append(fe.RMS(Z));
feature.append(fe.Energy(X));
feature.append(fe.Energy(Y));
feature.append(fe.Energy(Z));
feature.append(fe.Mean(Magnitudes));
feature.append(np.bincount(Activities).argmax());
print("Feature Vector \n" ,feature , "\n\n");
return feature;
This diff is collapsed.
from datetime import datetime
class GetData:
data =[];
def __init__(self,data):
self.data=data;
def getX(self,data):
X=[]
for i, row in data.iterrows():
X.append(float(data.xs(i)['accelerometer_X']))
return X;
def getY(self,data):
Y=[]
for i, row in data.iterrows():
Y.append(float(data.xs(i)['accelerometer_Y']))
return Y;
def getZ(self,data):
Z=[]
for i, row in data.iterrows():
Z.append(float(data.xs(i)['accelerometer_Z']))
return Z;
def getMagnitude(self,data):
Magnitudes=[];
for i, row in data.iterrows():
Magnitude= ((float(data.xs(i)['accelerometer_X']))**2 + (float(data.xs(i)['accelerometer_Y']))**2 + (float(data.xs(i)['accelerometer_Z']))**2) ** 0.5;
Magnitudes.append(Magnitude);
return Magnitudes;
def getActivities(self,data):
Activities=[];
for i, row in data.iterrows():
Activities.append(data.xs(i)['activity']);
return Activities;
\ No newline at end of file
import Data as Data
import GetData as getdata
import FeatureExtraction as fe
d= Data.Data('MergedRunningData.csv')
data= d.ReadData()
i,j=0,0;
length = len(data);
while(i<= length):
Index =[];
count = i+30;
while(j<= count):
Index.append(j);
j= j+1;
print(Index);
fIndex , lIndex= Index[0] , Index[-1]
dataset = data.iloc[fIndex: lIndex+1]
gd1=getdata.GetData(dataset)
f= fe.FeatureExtraction(gd1)
FeatureVector = f.getFeatureSingle(dataset)
d.append_list_as_row('FeatureVectors.csv', FeatureVector);
i=i+30;
This diff is collapsed.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
def true_fun(X):
return np.cos(1.5 * np.pi * X)
np.random.seed(0)
n_samples = 30
degrees = [2, 5, 16]
X = np.sort(np.random.rand(n_samples))
y = true_fun(X) + np.random.randn(n_samples) * 0.1
plt.figure(figsize=(14, 5))
for i in range(len(degrees)):
ax = plt.subplot(1, len(degrees), i + 1)
plt.setp(ax, xticks=(), yticks=())
polynomial_features = PolynomialFeatures(degree=degrees[i],
include_bias=False)
linear_regression = LinearRegression()
pipeline = Pipeline([("polynomial_features", polynomial_features),
("linear_regression", linear_regression)])
pipeline.fit(X[:, np.newaxis], y)
# Evaluate the models using crossvalidation
scores = cross_val_score(pipeline, X[:, np.newaxis], y,
scoring="neg_mean_squared_error", cv=10)
X_test = np.linspace(0, 1, 100)
plt.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="Model")
plt.plot(X_test, true_fun(X_test), label="True function")
plt.scatter(X, y, edgecolor='b', s=20, label="Samples")
plt.xlabel("x")
plt.ylabel("y")
plt.xlim((0, 1))
plt.ylim((-2, 2))
plt.legend(loc="best")
plt.title("Degree {}\nMSE = {:.2e}(+/- {:.2e})".format(
degrees[i], -scores.mean(), scores.std()))
plt.show()
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import numpy as np
def print(X, y):
print(X,y)
# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)
print('Training Features Shape:', X_train.shape)
print('Training Labels Shape:', y_train.shape)
print('Testing Features Shape:', X_test.shape)
print('Testing Labels Shape:', y_test.shape)
\ No newline at end of file
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from accuracies import results
from accuracies import print_results
from OverfittingUnderfitting import true_fun
data = pd.read_csv('ModelData.csv', encoding='utf-8')
data.isnull().sum()
true_fun(data);
#Activity Counts
data.MeanActivity.value_counts()
rest = data[data.MeanActivity == 0]
walk = data[data.MeanActivity == 1]
run = data[data.MeanActivity == 2]
plt.bar(0, height=[rest.MeanActivity.count()])
plt.bar(1, height=[walk.MeanActivity.count()])
plt.bar(2, height=[run.MeanActivity.count()])
plt.xticks(np.arange(3), ['rest', 'walk','run'])
plt.show()
#Feature Vectors
features = ['Xmin', 'Ymin', 'Zmin','Xmax','Ymax','Zmax','Xmean','Ymean','Zmean','Xmedian' ,'Ymedian','Zmedian', 'Xvariance', 'Yvariance', 'Zvariance', 'Xstd', 'Ystd', 'Zstd', 'Xrms', 'Yrms', 'Zrms', 'Xenergy', 'Yenergy', 'Zenergy', 'MaxMagnitude']
plot_data_size = 20
rest_plot = rest.head(plot_data_size)
walk_plot = walk.head(plot_data_size)
run_plot = run.head(plot_data_size)
#fig, axes = plt.subplots(9, 3, figsize=(16, 16))
#for i, ax in enumerate(axes.flatten()[:25]):
# c = features[i]
# ax.plot(range(0, plot_data_size), rest_plot[c], label="Rest")
# ax.plot(range(0, plot_data_size), run_plot[c], label="Run")
# ax.plot(range(0, plot_data_size), walk_plot[c], label="Walk")
# ax.set_ylabel(c)
# ax.legend()
#Rest,Run And Walk Patterns
#fig, axes = plt.subplots(9, 3, figsize=(16, 16))
#for i, ax in enumerate(axes.flatten()[:25]):
# c = features[i]
# sns.distplot(rest[c], ax=ax, label='Rest')
# sns.distplot(run[c], ax=ax, label='Run')
# sns.distplot(walk[c], ax=ax, label='Walk')
# ax.legend()
#Train a Model
X = data[:]
y = X.pop("MeanActivity")
#X = data.iloc[:,0:25].values
#y = data.iloc[:,25].values
print_results(X, y)
#results(X,y)
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from sklearn.impute import SimpleImputer
def results(X, y):
feature_list = list(X.columns)
print(feature_list)
# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(X, y, test_size = 0.25, random_state = 0)
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)
#RANDOM FOREST CLASSIFIER
# Import the model we are using
from sklearn.ensemble import RandomForestClassifier
# Instantiate model with 1000 decision trees
rf = RandomForestClassifier(n_estimators = 1000, random_state = 0)
# Train the model on training data
rf.fit(train_features, train_labels);
# Use the forest's predict method on the test data
predictions = rf.predict(test_features)
# Calculate the absolute errors
errors = abs(predictions - test_labels)
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Display the performance metrics
print('Mean Absolute Error:', round(np.mean(errors), 2))
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%. \n')
# Get numerical feature importances
importances = list(rf.feature_importances_)
# List of tuples with variable and importance
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
# Sort the feature importances by most important first
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
# Print out the feature and importances
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];
rf_most_important = RandomForestClassifier(n_estimators= 1000, random_state=0)
# Extract the two most important features
important_indices = [feature_list.index('Ymax'), feature_list.index('Zmin'),feature_list.index('Xrms'), feature_list.index('Ymedian'),feature_list.index('Ymean'), feature_list.index('Xenergy'),feature_list.index('Zvariance'), feature_list.index('Zstd'),feature_list.index('Ymin'), feature_list.index('Zmean'),feature_list.index('Zmedian'), feature_list.index('Yrms'),feature_list.index('Zenergy')]
imputer = SimpleImputer(missing_values= np.nan, strategy= 'mean')
imputer = imputer.fit(X.iloc[:, important_indices])
train_important = imputer.transform(train_features.iloc[:, important_indices])
test_important = imputer.transform(test_features.iloc[:, important_indices])
# Train the random forest
rf_most_important.fit(train_important, train_labels)
# Make predictions and determine the error
predictions = rf_most_important.predict(test_important)
#CLASSIFICATION REPORT
print(metrics.classification_report(predictions, test_labels))
#CONFUSION MATRIX
mat = confusion_matrix(test_labels, predictions)
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label');
errors = abs(predictions - test_labels)
# Display the performance metrics
print('Mean Absolute Error:', round(np.mean(errors), 2))
mape = np.mean(100 * (errors / test_labels))
accuracy = 100 - mape
print('Accuracy:', round(accuracy, 2), '%.')
def print_results(X, y):
print("X : " ,X)
print("Y : ", y)
# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)
print("X train: " ,X_train)
print("Y train", y_train)
print("X test: " ,X_test)
print("Y test", y_test)
print('\n \n Training Features Shape:', X_train.shape)
print('Training Labels Shape:', y_train.shape)
print('Testing Features Shape:', X_test.shape)
print('Testing Labels Shape:\n \n', y_test.shape)
#RANDOM FOREST CLASSIFIER
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=1000)
model.fit(X_train, y_train)
ypred = model.predict(X_test)
#model acccuracy for X_test
RandomAccuracy= model.score(X_test,y_test)
#Classification Report
print(metrics.classification_report(ypred, y_test))
# creating a confusion matrix
mat = confusion_matrix(y_test, ypred)
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label');
#DECISION TREE CLASSIFIER
# training a DescisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)
dtree_predictions = dtree_model.predict(X_test)
#model acccuracy for X_test
DecisionAccuracy= dtree_model.score(X_test,y_test)
#Classification Report
print(metrics.classification_report(dtree_predictions, y_test))
# creating a confusion matrix
Decisioncm = confusion_matrix(y_test, dtree_predictions)
sns.heatmap(Decisioncm.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label');
#SUPPORT VECTOR MACHINE CLASSIFIER (LINEAR)
# training a linear SVM classifier
from sklearn.svm import SVC
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)
# model accuracy for X_test
SVMaccuracy = svm_model_linear.score(X_test, y_test)
#Classification Report
print(metrics.classification_report(svm_predictions, y_test))
# creating a confusion matrix
SVMcm = confusion_matrix(y_test, svm_predictions)
sns.heatmap(SVMcm.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label');
#KNN CLASSIFIER
# training a KNN classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 7).fit(X_train, y_train)
knn_predictions = knn.predict(X_test)
# accuracy on X_test
KNNaccuracy = knn.score(X_test, y_test)
#Classification Report
print(metrics.classification_report(knn_predictions, y_test))
# creating a confusion matrix
KNNcm = confusion_matrix(y_test, knn_predictions)
sns.heatmap(KNNcm.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label');
#NAIVE BAYES CLASSIFIER
# training a Naive Bayes classifier
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB().fit(X_train, y_train)
gnb_predictions = gnb.predict(X_test)
# accuracy on X_test
Naiveaccuracy = gnb.score(X_test, y_test)
#Classification Report
print(metrics.classification_report(gnb_predictions, y_test))
# creating a confusion matrix
Naivecm = confusion_matrix(y_test, gnb_predictions)
sns.heatmap(Naivecm.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label');
# print accuracies
print("Accuracy scores")
print("Random Forest Classifier: " , RandomAccuracy)
print("Decision Tree Classifier: " , DecisionAccuracy)
print("KNN Classifier: " , KNNaccuracy)
print("SVM Classifier: " , SVMaccuracy)
print("Naive Bayes Classifier: " , Naiveaccuracy)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment