Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AAGGY
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
23-153
AAGGY
Commits
5ad2b0c1
Commit
5ad2b0c1
authored
Nov 03, 2023
by
Sajana_it20194130
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
86b0672c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
104 additions
and
0 deletions
+104
-0
NLP.py
NLP.py
+104
-0
No files found.
NLP.py
0 → 100644
View file @
5ad2b0c1
# Natural Language Processing for Predict Network Anomaly
# Importing the libraries
import
matplotlib.pyplot
as
plt
import
pandas
as
pd
import
seaborn
as
sns
import
joblib
# Importing the dataset
dataset
=
pd
.
read_csv
(
'dataset.tsv'
,
delimiter
=
'
\t
'
,
quoting
=
3
)
# Cleaning the texts
import
re
import
nltk
nltk
.
download
(
'stopwords'
)
from
nltk.corpus
import
stopwords
from
nltk.stem.porter
import
PorterStemmer
corpus
=
[]
for
i
in
range
(
0
,
5550
):
log
=
re
.
sub
(
'[^a-zA-Z0-9]'
,
' '
,
dataset
[
'Info'
][
i
])
log
=
log
.
lower
()
log
=
log
.
split
()
ps
=
PorterStemmer
()
log
=
[
ps
.
stem
(
word
)
for
word
in
log
if
not
word
in
set
(
stopwords
.
words
(
'english'
))]
log
=
' '
.
join
(
log
)
corpus
.
append
(
log
)
# Creating the Bag of Words model
from
sklearn.feature_extraction.text
import
CountVectorizer
cv
=
CountVectorizer
(
max_features
=
3000
)
X
=
cv
.
fit_transform
(
corpus
)
.
toarray
()
y
=
dataset
.
iloc
[:,
1
]
.
values
# Splitting the dataset into the Training set and Test set
from
sklearn.model_selection
import
train_test_split
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.20
,
random_state
=
0
)
# Training the Naive Bayes model on the Training set
from
sklearn.naive_bayes
import
GaussianNB
classifier
=
GaussianNB
()
classifier
.
fit
(
X_train
,
y_train
)
# Predicting the Test set results
y_pred
=
classifier
.
predict
(
X_test
)
# Making the Confusion Matrix
from
sklearn.metrics
import
confusion_matrix
cm
=
confusion_matrix
(
y_test
,
y_pred
)
print
(
"Confusion Matrix"
)
print
(
cm
)
#Accuracy
from
sklearn.metrics
import
accuracy_score
# y_test is the actual values, y_pred is the predicted values
accuracy
=
accuracy_score
(
y_test
,
y_pred
)
print
(
'Accuracy:
%.3
f'
%
accuracy
)
#Precision Score
from
sklearn.metrics
import
precision_score
# y_test is the actual values, y_pred is the predicted values
precision
=
precision_score
(
y_test
,
y_pred
,
average
=
'binary'
)
print
(
'Precision:
%.3
f'
%
precision
)
#Recall Score
from
sklearn.metrics
import
recall_score
# y_test is the actual values, y_pred is the predicted values
recall
=
recall_score
(
y_test
,
y_pred
)
print
(
'Recall:
%.3
f'
%
recall
)
#F1 Score
from
sklearn.metrics
import
f1_score
# y_test is the actual values, y_pred is the predicted values
f1
=
f1_score
(
y_test
,
y_pred
,
average
=
'binary'
)
print
(
'F1 Score:
%.3
f'
%
f1
)
print
(
"Predicted Outcomes of the Model"
)
print
(
y_pred
)
print
(
"Actual Outcomes of the Predicted Results"
)
print
(
y_test
)
# Visualizing the confusion matrix as a heatmap
sns
.
heatmap
(
cm
,
annot
=
True
,
cmap
=
'Blues'
)
plt
.
title
(
'Confusion Matrix'
)
plt
.
xlabel
(
'Predicted Label'
)
plt
.
ylabel
(
'True Label'
)
plt
.
show
()
# Calculating the number of true positives, true negatives, false positives, and false negatives
tn
,
fp
,
fn
,
tp
=
confusion_matrix
(
y_test
,
y_pred
)
.
ravel
()
# Creating a bar chart to show the comparison between the predicted and true outcomes
fig
,
ax
=
plt
.
subplots
()
ax
.
bar
([
'True Negatives'
,
'False Positives'
,
'False Negatives'
,
'True Positives'
],
[
tn
,
fp
,
fn
,
tp
])
ax
.
set_xlabel
(
'Outcome'
)
ax
.
set_ylabel
(
'Count'
)
ax
.
set_title
(
'Comparison of Predicted and True Outcomes'
)
plt
.
show
()
########################################
# Dumping the trained model to a file
joblib
.
dump
(
classifier
,
'naive_bayes_model.joblib'
)
# Dumping the CountVectorizer
joblib
.
dump
(
cv
,
'count_vectorizer.joblib'
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment