Merge branch 'IT18150926_jayasekaraA.P.S.D' into 'master'

add necessary comments to some part See merge request !9

Merge branch 'IT18150926_jayasekaraA.P.S.D' into 'master'
add necessary comments to some part See merge request !9
4443e165 · Shalitha Deshan Jayasekara · 0ced088e · 42bb130b · 4443e165 · 4443e165
Commit 4443e165 authored Jul 04, 2021 by Shalitha Deshan Jayasekara 🏘
Hide whitespace changes
Inline Side-by-side

Showing with 26 additions and 6 deletions

NLTK_model/Ontology_scripts.py NLTK_model/Ontology_scripts.py +3 -0

NLTK_model/nltk_nlp.py NLTK_model/nltk_nlp.py +23 -6

No files found.
--- a/NLTK_model/Ontology_scripts.py
+++ b/NLTK_model/Ontology_scripts.py
@@ -134,4 +134,7 @@ print(analyzePost(
    "good",
    "good", [
        "very",
+        "good",
+        "very good",
+        "brilliant"
    ]))
--- a/NLTK_model/nltk_nlp.py
+++ b/NLTK_model/nltk_nlp.py
@@ -21,18 +21,21 @@ from flask import request

 app = Flask(__name__)

+
+# import training data set
 positive_tweets = twitter_samples.strings('positive_tweets.json')
 negative_tweets = twitter_samples.strings('negative_tweets.json')
 text = twitter_samples.strings('tweets.20150430-223406.json')
 tweet_tokens = twitter_samples.tokenized('positive_tweets.json')

 # print(tweet_tokens[0])
-
+# print(negative_tweets)
+# print(positive_tweets)
 # print(pos_tag(tweet_tokens[0]))

-
+# minimize the words that contain in the sentence
 def lemmatize_sentence(tokens):
-    lemmatizer = WordNetLemmatizer()
+    lemmatizer = WordNetLemmatizer()  # https://wordnet.princeton.edu/
    lemmatized_sentence = []
    # TAG words with NLTK POS tagger : https://www.nltk.org/book/ch05.html
    for word, tag in pos_tag(tokens):
@@ -42,7 +45,7 @@ def lemmatize_sentence(tokens):
            pos = 'v'
        else:
            pos = 'a'
-        lemmatized_sentence.append(lemmatizer.lemmatize(word, pos))
+        lemmatized_sentence.append(lemmatizer.lemmatize(word, pos))  # Append object to the end of the list.
    return lemmatized_sentence

 # print(tweet_tokens[0])
@@ -78,6 +81,8 @@ stop_words = stopwords.words('english')

 # print(remove_noise(tweet_tokens[0], stop_words))

+# the given file as a list of the text content of Tweets as a list of words, screenanames, hashtags, URLs and punctuation symbols
+
 positive_tweet_tokens = twitter_samples.tokenized('positive_tweets.json')
 negative_tweet_tokens = twitter_samples.tokenized('negative_tweets.json')

@@ -102,16 +107,21 @@ def get_all_words(cleaned_tokens_list):

 all_pos_words = get_all_words(positive_cleaned_tokens_list)

+# get frequency distribution post word
 freq_dist_pos = FreqDist(all_pos_words)
 # print(freq_dist_pos.most_common(10))

+
+# check if empty values get those also
 def get_words_for_model(cleaned_tokens_list):
    for tweet_tokens in cleaned_tokens_list:
        yield dict([token, True] for token in tweet_tokens)

+# link data set again
 positive_tokens_for_model = get_words_for_model(positive_cleaned_tokens_list)
 negative_tokens_for_model = get_words_for_model(negative_cleaned_tokens_list)

+# label data set
 positive_dataset = [(tweet_dict, "Positive")
                     for tweet_dict in positive_tokens_for_model]

@@ -120,6 +130,7 @@ negative_dataset = [(tweet_dict, "Negative")

 # Create a single data set with both negative and positive data sets prepared
 dataset = positive_dataset + negative_dataset
+# print(dataset)

 # Shuffle the data set to mix positive and negative data to be split to train data and test data
 random.shuffle(dataset)
@@ -138,15 +149,21 @@ print("Accuracy is:", classify.accuracy(classifier, test_data))
 # custom_text = "This is a bad supplier."
 # custom_text = "This is a very good supplier."
 # custom_text = "This is a very good supplier. but there was some delay in shipping."
-custom_text = "This is a very good supplier. but there was some delay in shipping. but it is okay."
-
+custom_text = "This is a bad post. it gives out wrong idea to the people."


 custom_tokens = remove_noise(word_tokenize(custom_text))
+# print(custom_tokens)

 # Test print
 print(classifier.classify(dict([token, True] for token in custom_tokens)))

+
+
+
+
+
+# API endpoint
 # Flask API to be used in backend
 @app.route("/NLP")
 def hello():