Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AAGGY
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
23-153
AAGGY
Commits
66137f67
Commit
66137f67
authored
Nov 03, 2023
by
Sajana_it20194130
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
86b0672c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
76 additions
and
0 deletions
+76
-0
RFClassifier.py
RFClassifier.py
+76
-0
No files found.
RFClassifier.py
0 → 100644
View file @
66137f67
# Importing the libraries
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
pandas
as
pd
import
seaborn
as
sns
import
joblib
# Importing the dataset
dataset
=
pd
.
read_csv
(
'dataset_malwares.csv'
)
X
=
dataset
.
iloc
[:,
np
.
r_
[
1
:
3
]]
.
values
y
=
dataset
.
iloc
[:,
-
1
]
.
values
# Data Preprocessing
from
sklearn.preprocessing
import
StandardScaler
from
sklearn.model_selection
import
train_test_split
# Splitting the dataset into the Training set and Test set
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.25
,
random_state
=
0
)
# Feature Scaling
sc
=
StandardScaler
()
X_train
=
sc
.
fit_transform
(
X_train
)
X_test
=
sc
.
transform
(
X_test
)
# Model Selection and Hyperparameter Tuning
from
sklearn.ensemble
import
RandomForestClassifier
from
sklearn.model_selection
import
GridSearchCV
param_grid
=
{
'n_estimators'
:
[
100
,
200
,
300
],
'max_depth'
:
[
None
,
10
,
20
,
30
],
'min_samples_split'
:
[
2
,
5
,
10
],
'min_samples_leaf'
:
[
1
,
2
,
4
],
}
rf_classifier
=
RandomForestClassifier
(
random_state
=
0
)
grid_search
=
GridSearchCV
(
estimator
=
rf_classifier
,
param_grid
=
param_grid
,
cv
=
5
,
n_jobs
=-
1
)
grid_search
.
fit
(
X_train
,
y_train
)
# Get the best model from the grid search
best_rf_classifier
=
grid_search
.
best_estimator_
# Training the best model on the Training set
best_rf_classifier
.
fit
(
X_train
,
y_train
)
# Predicting the Test set results
y_pred
=
best_rf_classifier
.
predict
(
X_test
)
# Model Evaluation
from
sklearn.metrics
import
confusion_matrix
,
accuracy_score
,
precision_score
,
recall_score
,
f1_score
cm
=
confusion_matrix
(
y_test
,
y_pred
)
print
(
"Confusion Matrix:
\n
"
,
cm
)
accuracy
=
accuracy_score
(
y_test
,
y_pred
)
precision
=
precision_score
(
y_test
,
y_pred
,
average
=
'binary'
)
recall
=
recall_score
(
y_test
,
y_pred
)
f1
=
f1_score
(
y_test
,
y_pred
,
average
=
'binary'
)
print
(
'Accuracy:
%.3
f'
%
accuracy
)
print
(
'Precision:
%.3
f'
%
precision
)
print
(
'Recall:
%.3
f'
%
recall
)
print
(
'F1 Score:
%.3
f'
%
f1
)
# Visualizing the confusion matrix as a heatmap
sns
.
heatmap
(
cm
,
annot
=
True
,
cmap
=
'Blues'
)
plt
.
title
(
'Confusion Matrix'
)
plt
.
xlabel
(
'Predicted Label'
)
plt
.
ylabel
(
'True Label'
)
plt
.
show
()
# Dumping the trained model to a file
joblib
.
dump
(
best_rf_classifier
,
'random_forest_model.joblib'
)
# Dumping the scaler
joblib
.
dump
(
sc
,
'standard_scaler.joblib'
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment