emotion analyze file changers done

cd245b26 · Dhananjaya Jayashanka · 118a1e7b · cd245b26
Commit cd245b26 authored Nov 22, 2021 by Dhananjaya Jayashanka
Hide whitespace changes
Inline Side-by-side

Showing with 25 additions and 27 deletions

Emotion/textAnalyze.py Emotion/textAnalyze.py +25 -27

No files found.
--- a/Emotion/textAnalyze.py
+++ b/Emotion/textAnalyze.py
 import string
 from collections import Counter
-
+from nltk.corpus import stopwords
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize

 def textAnalyze(speech):
    text = speech

-    # converting to lowercase
    lower_case = text.lower()
-
-    # Removing punctuations of the full text
    cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation))

-    # splitting text into words
-    tokenized_words = cleaned_text.split()
-
-    stop_words = ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself",
-                  "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself",
-                  "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that",
-                  "these",
-                  "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having",
-                  "do",
-                  "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while",
-                  "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before",
-                  "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under",
-                  "again",
-                  "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both",
-                  "each",
-                  "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so",
-                  "than",
-                  "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"]
-
-    # Removing stop words from the tokenized words list
+    # Using word_tokenize because it's faster than split()
+    tokenized_words = word_tokenize(cleaned_text, "english")
+
+    # Removing Stop Words
    final_words = []
    for word in tokenized_words:
-        if word not in stop_words:
+        if word not in stopwords.words('english'):
            final_words.append(word)

+    # Lemmatization
+    lemma_words = []
+    for word in final_words:
+        word = WordNetLemmatizer().lemmatize(word)
+        lemma_words.append(word)
+
    emotion_list = []

    for i in final_words:
@@ -44,8 +34,16 @@ def textAnalyze(speech):
                clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
                word, emotion = clear_line.split(':')
                if i in word:
-                    final_emotions = emotion.replace("", '').strip()
-                    emotion_list.append(final_emotions)
+                    final_emotion = emotion.replace("", '').strip()
+                    def sentiment_analyse(sentiment_text):
+                        score = SentimentIntensityAnalyzer().polarity_scores(sentiment_text)
+                        if score['neg'] > score['pos']:
+                            emotion_list.append(final_emotion)
+                        elif score['neg'] < score['pos']:
+                            emotion_list.append(final_emotion)
+                        else:
+                            emotion_list.append(final_emotion)
+                    sentiment_analyse(word)

    print(emotion_list)
    w = emotion_list