Commit 66137f67 authored by Sajana_it20194130's avatar Sajana_it20194130

Upload New File

parent 86b0672c
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import joblib
# Importing the dataset
dataset = pd.read_csv('dataset_malwares.csv')
X = dataset.iloc[:, np.r_[1:3]].values
y = dataset.iloc[:, -1].values
# Data Preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Model Selection and Hyperparameter Tuning
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
param_grid = {
'n_estimators': [100, 200, 300],
'max_depth': [None, 10, 20, 30],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4],
}
rf_classifier = RandomForestClassifier(random_state=0)
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)
# Get the best model from the grid search
best_rf_classifier = grid_search.best_estimator_
# Training the best model on the Training set
best_rf_classifier.fit(X_train, y_train)
# Predicting the Test set results
y_pred = best_rf_classifier.predict(X_test)
# Model Evaluation
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='binary')
print('Accuracy: %.3f' % accuracy)
print('Precision: %.3f' % precision)
print('Recall: %.3f' % recall)
print('F1 Score: %.3f' % f1)
# Visualizing the confusion matrix as a heatmap
sns.heatmap(cm, annot=True, cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
# Dumping the trained model to a file
joblib.dump(best_rf_classifier, 'random_forest_model.joblib')
# Dumping the scaler
joblib.dump(sc, 'standard_scaler.joblib')
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment