Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2021-210
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2021-210
2021-210
Commits
4443e165
Commit
4443e165
authored
Jul 04, 2021
by
Shalitha Deshan Jayasekara
🏘
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'IT18150926_jayasekaraA.P.S.D' into 'master'
add necessary comments to some part See merge request
!9
parents
0ced088e
42bb130b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
6 deletions
+26
-6
NLTK_model/Ontology_scripts.py
NLTK_model/Ontology_scripts.py
+3
-0
NLTK_model/nltk_nlp.py
NLTK_model/nltk_nlp.py
+23
-6
No files found.
NLTK_model/Ontology_scripts.py
View file @
4443e165
...
...
@@ -134,4 +134,7 @@ print(analyzePost(
"good"
,
"good"
,
[
"very"
,
"good"
,
"very good"
,
"brilliant"
]))
NLTK_model/nltk_nlp.py
View file @
4443e165
...
...
@@ -21,18 +21,21 @@ from flask import request
app
=
Flask
(
__name__
)
# import training data set
positive_tweets
=
twitter_samples
.
strings
(
'positive_tweets.json'
)
negative_tweets
=
twitter_samples
.
strings
(
'negative_tweets.json'
)
text
=
twitter_samples
.
strings
(
'tweets.20150430-223406.json'
)
tweet_tokens
=
twitter_samples
.
tokenized
(
'positive_tweets.json'
)
# print(tweet_tokens[0])
# print(negative_tweets)
# print(positive_tweets)
# print(pos_tag(tweet_tokens[0]))
# minimize the words that contain in the sentence
def
lemmatize_sentence
(
tokens
):
lemmatizer
=
WordNetLemmatizer
()
lemmatizer
=
WordNetLemmatizer
()
# https://wordnet.princeton.edu/
lemmatized_sentence
=
[]
# TAG words with NLTK POS tagger : https://www.nltk.org/book/ch05.html
for
word
,
tag
in
pos_tag
(
tokens
):
...
...
@@ -42,7 +45,7 @@ def lemmatize_sentence(tokens):
pos
=
'v'
else
:
pos
=
'a'
lemmatized_sentence
.
append
(
lemmatizer
.
lemmatize
(
word
,
pos
))
lemmatized_sentence
.
append
(
lemmatizer
.
lemmatize
(
word
,
pos
))
# Append object to the end of the list.
return
lemmatized_sentence
# print(tweet_tokens[0])
...
...
@@ -78,6 +81,8 @@ stop_words = stopwords.words('english')
# print(remove_noise(tweet_tokens[0], stop_words))
# the given file as a list of the text content of Tweets as a list of words, screenanames, hashtags, URLs and punctuation symbols
positive_tweet_tokens
=
twitter_samples
.
tokenized
(
'positive_tweets.json'
)
negative_tweet_tokens
=
twitter_samples
.
tokenized
(
'negative_tweets.json'
)
...
...
@@ -102,16 +107,21 @@ def get_all_words(cleaned_tokens_list):
all_pos_words
=
get_all_words
(
positive_cleaned_tokens_list
)
# get frequency distribution post word
freq_dist_pos
=
FreqDist
(
all_pos_words
)
# print(freq_dist_pos.most_common(10))
# check if empty values get those also
def
get_words_for_model
(
cleaned_tokens_list
):
for
tweet_tokens
in
cleaned_tokens_list
:
yield
dict
([
token
,
True
]
for
token
in
tweet_tokens
)
# link data set again
positive_tokens_for_model
=
get_words_for_model
(
positive_cleaned_tokens_list
)
negative_tokens_for_model
=
get_words_for_model
(
negative_cleaned_tokens_list
)
# label data set
positive_dataset
=
[(
tweet_dict
,
"Positive"
)
for
tweet_dict
in
positive_tokens_for_model
]
...
...
@@ -120,6 +130,7 @@ negative_dataset = [(tweet_dict, "Negative")
# Create a single data set with both negative and positive data sets prepared
dataset
=
positive_dataset
+
negative_dataset
# print(dataset)
# Shuffle the data set to mix positive and negative data to be split to train data and test data
random
.
shuffle
(
dataset
)
...
...
@@ -138,15 +149,21 @@ print("Accuracy is:", classify.accuracy(classifier, test_data))
# custom_text = "This is a bad supplier."
# custom_text = "This is a very good supplier."
# custom_text = "This is a very good supplier. but there was some delay in shipping."
custom_text
=
"This is a very good supplier. but there was some delay in shipping. but it is okay."
custom_text
=
"This is a bad post. it gives out wrong idea to the people."
custom_tokens
=
remove_noise
(
word_tokenize
(
custom_text
))
# print(custom_tokens)
# Test print
print
(
classifier
.
classify
(
dict
([
token
,
True
]
for
token
in
custom_tokens
)))
# API endpoint
# Flask API to be used in backend
@
app
.
route
(
"/NLP"
)
def
hello
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment