Commit 81cdf8d9 authored by Sajana_it20194130's avatar Sajana_it20194130

Upload New File

parent 86b0672c
# Natural Language Processing for Predict IoT Network Anomaly
# Importing the libraries
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import joblib
import numpy as np
# Importing the dataset
dataset = pd.read_csv('DatasetRF.tsv', delimiter = '\t', quoting = 3)
# Cleaning the texts
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []
for i in range(0, 31):
log = re.sub('[^a-zA-Z0-9]', ' ', dataset['traffic'][i])
log = log.lower()
log = log.split()
ps = PorterStemmer()
log = [ps.stem(word) for word in log if not word in set(stopwords.words('english'))]
log = ' '.join(log)
corpus.append(log)
# Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 140)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values
# Training the Random Forest Regression model on the whole dataset
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X, y)
# Saving the trained model
joblib.dump(regressor, 'random_forest_model.joblib')
joblib.dump(cv, 'count_vectorizer.joblib')
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment