Upload New File

66137f67 · Sajana_it20194130 · 86b0672c · 66137f67
Commit 66137f67 authored Nov 03, 2023 by Sajana_it20194130
Hide whitespace changes
Inline Side-by-side

Showing with 76 additions and 0 deletions

RFClassifier.py RFClassifier.py +76 -0

No files found.
--- a/RFClassifier.py
+++ b/RFClassifier.py
+# Importing the libraries
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+import joblib
+
+# Importing the dataset
+dataset = pd.read_csv('dataset_malwares.csv')
+X = dataset.iloc[:, np.r_[1:3]].values
+y = dataset.iloc[:, -1].values
+
+# Data Preprocessing
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+
+# Splitting the dataset into the Training set and Test set
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
+
+# Feature Scaling
+sc = StandardScaler()
+X_train = sc.fit_transform(X_train)
+X_test = sc.transform(X_test)
+
+# Model Selection and Hyperparameter Tuning
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import GridSearchCV
+
+param_grid = {
+    'n_estimators': [100, 200, 300],
+    'max_depth': [None, 10, 20, 30],
+    'min_samples_split': [2, 5, 10],
+    'min_samples_leaf': [1, 2, 4],
+}
+
+rf_classifier = RandomForestClassifier(random_state=0)
+grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, n_jobs=-1)
+grid_search.fit(X_train, y_train)
+
+# Get the best model from the grid search
+best_rf_classifier = grid_search.best_estimator_
+
+# Training the best model on the Training set
+best_rf_classifier.fit(X_train, y_train)
+
+# Predicting the Test set results
+y_pred = best_rf_classifier.predict(X_test)
+
+# Model Evaluation
+from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
+
+cm = confusion_matrix(y_test, y_pred)
+print("Confusion Matrix:\n", cm)
+
+accuracy = accuracy_score(y_test, y_pred)
+precision = precision_score(y_test, y_pred, average='binary')
+recall = recall_score(y_test, y_pred)
+f1 = f1_score(y_test, y_pred, average='binary')
+
+print('Accuracy: %.3f' % accuracy)
+print('Precision: %.3f' % precision)
+print('Recall: %.3f' % recall)
+print('F1 Score: %.3f' % f1)
+
+# Visualizing the confusion matrix as a heatmap
+sns.heatmap(cm, annot=True, cmap='Blues')
+plt.title('Confusion Matrix')
+plt.xlabel('Predicted Label')
+plt.ylabel('True Label')
+plt.show()
+
+# Dumping the trained model to a file
+joblib.dump(best_rf_classifier, 'random_forest_model.joblib')
+
+# Dumping the scaler
+joblib.dump(sc, 'standard_scaler.joblib')
\ No newline at end of file