Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
Anomaly Detection in Microservices Systems
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
sashika sewwandi
Anomaly Detection in Microservices Systems
Commits
e7eccb44
Commit
e7eccb44
authored
Oct 10, 2022
by
sashika sewwandi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
b90f08ef
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
139 additions
and
0 deletions
+139
-0
machine.py
machine.py
+139
-0
No files found.
machine.py
0 → 100644
View file @
e7eccb44
import
pandas
as
pd
import
numpy
as
np
import
seaborn
as
sb
import
sklearn
as
sk
import
warnings
from
sklearn
import
svm
from
sklearn
import
tree
from
sklearn
import
ensemble
from
sklearn.ensemble
import
RandomForestClassifier
from
sklearn.metrics
import
confusion_matrix
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.model_selection
import
train_test_split
from
sklearn.neural_network
import
MLPClassifier
warnings
.
filterwarnings
(
'ignore'
)
# data set read
dataset_01
=
pd
.
read_csv
(
"valid_avengers_dataset.csv"
)
dataset_02
=
pd
.
read_csv
(
'invalid_thor_dataset.csv'
)
# combine data sets
df
=
pd
.
DataFrame
()
df
=
df
.
append
(
dataset_01
,
ignore_index
=
True
)
df
=
df
.
append
(
dataset_02
,
ignore_index
=
True
)
# clean up step
df
[
'views'
]
=
df
.
groupby
(
'id'
)[
'id'
]
.
transform
(
lambda
x
:
np
.
random
.
randint
(
5000
,
10000000
))
# Drop some columns which is not relevant to the analysis (they are not numeric)
cols_to_drop
=
[
'id'
,
'conversation_id'
,
'created_at'
,
'date'
,
'time'
,
'timezone'
,
'user_id'
,
'username'
,
'name'
,
'place'
,
'tweet'
,
'mentions'
,
'urls'
,
'photos'
,
'hashtags'
,
'cashtags'
,
'link'
,
'quote_url'
,
'thumbnail'
,
"near"
,
"geo"
,
"source"
,
"user_rt_id"
,
"user_rt"
,
"retweet_id"
,
"reply_to"
,
"retweet_date"
,
"translate"
,
"trans_src"
,
"trans_dest"
,
"replies_count"
,
"retweets_count"
]
df
=
df
.
drop
(
cols_to_drop
,
axis
=
1
)
# filling null values
df
[
'retweet'
]
=
df
[
'retweet'
]
.
fillna
(
True
)
# Drop all rows with missin data
df
=
df
.
dropna
()
df
.
drop_duplicates
(
inplace
=
True
)
# First, create dummy columns from the Embarked and Sex columns
architectureColumnDummy
=
pd
.
get_dummies
(
df
[
'architecture'
])
retweetColumnDummy
=
pd
.
get_dummies
(
df
[
'retweet'
])
sexColumnDummy
=
pd
.
get_dummies
(
df
[
'sex'
])
languageColumnDummy
=
pd
.
get_dummies
(
df
[
'language'
])
# adding dummy values to table
df
=
pd
.
concat
((
df
,
architectureColumnDummy
,
retweetColumnDummy
,
sexColumnDummy
,
languageColumnDummy
),
axis
=
1
)
# Drop the redundant columns thus converted
df
=
df
.
drop
([
'architecture'
,
'retweet'
,
'sex'
,
'language'
],
axis
=
1
)
# model traning step
# Seperate the dataframe into X and y data
X
=
df
.
values
y
=
df
[
'validation'
]
.
values
# Delete the Survived column from X
X
=
np
.
delete
(
X
,
1
,
axis
=
1
)
# Split the dataset into 70% Training and 30% Test
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.3
,
random_state
=
0
)
# Decision Tree Classifier algorrithm
dt_clf
=
tree
.
DecisionTreeClassifier
(
max_depth
=
5
)
dt_clf
.
fit
(
X_train
,
y_train
)
print
(
"Decision Tree Classifier score: "
,
dt_clf
.
score
(
X_test
,
y_test
))
y_pred
=
dt_clf
.
predict
(
X_test
)
# Random Forest Classifier algorrithm
confusion_matrix
(
y_test
,
y_pred
)
rf_clf
=
ensemble
.
RandomForestClassifier
(
n_estimators
=
100
)
rf_clf
.
fit
(
X_train
,
y_train
)
print
(
"Random Forest Classifier algorrithm score"
,
rf_clf
.
score
(
X_test
,
y_test
))
# Random Forest Classifier algorrithm after tune up
RF
=
RandomForestClassifier
(
n_estimators
=
1000
,
max_depth
=
10
,
random_state
=
0
)
RF
.
fit
(
X_train
,
y_train
)
print
(
"Random Forest Classifier algorrithm after tune up"
)
print
(
"Random Forest Classifier algorrithm round up score"
,
round
(
RF
.
score
(
X
,
y
),
4
))
print
(
"Random Forest Classifier algorrithm score"
,
RF
.
score
(
X_test
,
y_test
))
# Gradient Boosting Classifier
gb_clf
=
ensemble
.
GradientBoostingClassifier
()
gb_clf
.
fit
(
X_train
,
y_train
)
print
(
"Gradient Boosting Classifier algorrithm score"
,
gb_clf
.
score
(
X_test
,
y_test
))
# Gradient Boosting Classifier after tune up
gb_clf
=
ensemble
.
GradientBoostingClassifier
(
n_estimators
=
50
)
gb_clf
.
fit
(
X_train
,
y_train
)
print
(
"Gradient Boosting Classifier algorrithm after tune up"
)
print
(
"Gradient Boosting Classifier algorrithm score"
,
gb_clf
.
score
(
X_test
,
y_test
))
# instantiate the Logistic Regression model
logreg
=
LogisticRegression
(
solver
=
'lbfgs'
,
random_state
=
0
)
# fit the model
logreg
.
fit
(
X_train
,
y_train
)
print
(
"Logistic Regression Classifier algorrithm round up score: "
,
round
(
logreg
.
score
(
X
,
y
),
4
))
print
(
"Logistic Regression Classifier algorrithm score: "
,
logreg
.
score
(
X_test
,
y_test
))
LR
=
LogisticRegression
(
random_state
=
0
,
solver
=
'lbfgs'
,
multi_class
=
'multinomial'
)
.
fit
(
X_train
,
y_train
)
print
(
"Logistic Regression Classifier algorrithm after tune up"
)
print
(
"Logistic Regression Classifier algorrithm round up score: "
,
round
(
LR
.
score
(
X_test
,
y_test
),
4
))
print
(
"Logistic Regression Classifier algorrithm score: "
,
LR
.
score
(
X_test
,
y_test
))
SVM
=
svm
.
LinearSVC
()
SVM
.
fit
(
X_train
,
y_train
)
print
(
"Suport vector machine classifier algorrithm round up score: "
,
round
(
SVM
.
score
(
X
,
y
),
4
))
print
(
"Suport vector machine Classifier algorrithm score: "
,
SVM
.
score
(
X_test
,
y_test
))
SVM
=
svm
.
SVC
(
decision_function_shape
=
"ovo"
)
.
fit
(
X_train
,
y_train
)
print
(
"Suport vector machine Classifier algorrithm after tune up"
)
print
(
"Suport vector machine classifier algorrithm round up score: "
,
round
(
SVM
.
score
(
X_test
,
y_test
),
4
))
print
(
"Suport vector machine classifier algorrithm score: "
,
SVM
.
score
(
X_test
,
y_test
))
NN
=
MLPClassifier
(
solver
=
'lbfgs'
,
alpha
=
1e-5
,
hidden_layer_sizes
=
(
5
,
2
),
random_state
=
1
)
NN
.
fit
(
X_train
,
y_train
)
print
(
"Multi layer perceptron Classifier algorrithm round up score: "
,
round
(
NN
.
score
(
X
,
y
),
4
))
print
(
"Multi layer perceptron classifier algorrithm score: "
,
NN
.
score
(
X_test
,
y_test
))
NN
=
MLPClassifier
(
solver
=
'lbfgs'
,
alpha
=
1e-5
,
hidden_layer_sizes
=
(
150
,
10
),
random_state
=
1
)
.
fit
(
X_train
,
y_train
)
print
(
"Multi layer perceptron Classifier algorrithm after tune up"
)
print
(
"Multi layer perceptron Classifier algorrithm round up score: "
,
round
(
NN
.
score
(
X
,
y
),
4
))
print
(
"Multi layer perceptron classifier algorrithm score: "
,
NN
.
score
(
X_test
,
y_test
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment