content analyzing changers done

4fcc4668 · Dhananjaya Jayashanka · e56fb7de · 4fcc4668 · 4fcc4668 · 4fcc4668
Commit 4fcc4668 authored Nov 22, 2021 by Dhananjaya Jayashanka
11 changed files
--- a/content analyzing/bestPracticesForConclusion.txt
+++ b/content analyzing/bestPracticesForConclusion.txt
+ 'I would like to conclude': 'summary',
+ 'According to an analysis': 'summary',
+ 'conclude by saying': 'summary',
+ 'final note': 'summary',
+ 'I close by saying': 'summary',
+ 'i should like to conclude by saying': 'summary',
+ 'I should like to finish by saying': 'summary',
+ 'I shall conclude by saying': 'summary',
+ 'I want to conclude by saying': 'summary',
+ 'I will close by saying': 'summary',
+ 'i will conclude': 'summary',
+ 'i will conclude by saying': 'summary',
+ 'i will end by saying': 'summary',
+ 'i will finish by saying': 'summary',
+ 'i would end by saying': 'summary',
+ 'i would like to conclude by stating': 'summary',
+ 'i would like to end by saying': 'summary',
+ 'in closing': 'summary',
+ 'in conclusion': 'summary',
+ 'let me close by saying': 'summary',
+ 'let me conclude by saying': 'summary',
+ 'let me finish by saying': 'summary',
+ 'may i conclude by saying': 'summary',
+ 'i would like briefly': 'summary',
+ 'i would like to refer briefly': 'summary',
+ 'i would like to return briefly': 'summary',
+ 'let me briefly present': 'summary',
+ 'let me briefly say': 'summary',
+ 'let me briefly touch': 'summary',
+ 'let me comment briefly': 'summary',
+ 'let me quickly': 'summary',
+ 'let me turn briefly': 'summary',
+ 'allow me to say a few': 'summary',
+ 'allow me to touch briefly': 'summary',
+ 'i shall refer briefly': 'summary',
+ 'i should like to refer briefly': 'summary',
+ 'i want to say a couple': 'summary',
+ 'i want to say a few': 'summary',
+ 'i will briefly mention': 'summary',
+ 'i will briefly summarize': 'summary',
+ 'i will comment briefly': 'summary',
+ 'i will refer briefly': 'summary',
+ 'i will touch briefly': 'summary',
+ 'i wish to say a few': 'summary',
+ 'i would like to say a few': 'summary',
+ 'i would like to say a word': 'summary',
+ 'i would like to speak briefly': 'summary',
+ 'let me briefly turn': 'summary',
+ 'let me say a few': 'summary',
+ 'let me touch briefly': 'summary',
+ 'Let me briefly': 'summary',
+ 'tell you a little story': 'story',
+ 'told a story': 'story',
+ 'let me tell you a story': 'story',
+ 'thanks': 'thanking',
+ 'ta muchly': 'thanking',
+ 'thanks a bunch': 'thanking',
+ 'thank you very much': 'thanking',
+ 'many thanks': 'thanking',
+ 'i thank you': 'thanking',
+ 'thanks a million': 'thanking',
+ 'it is hard to find words to express my gratitude': 'thanking',
+ 'merci': 'thanking',
+ 'with gratitude': 'thanking',
+ 'i thank you from the bottom of my heart': 'thanking',
+ 'accept my endless gratitude': 'thanking',
+ 'thank you so much': 'thanking',
+ 'i am all gratitude': 'thanking',
+ 'i am grateful': 'thanking',
+ 'i will never forget what you have done': 'thanking',
+ 'thank you kindly': 'thanking',
+ 'cheers': 'thanking',
+ 'i appreciate that': 'thanking',
+ 'respectfully yours with sincere gratitude': 'thanking',
+ 'accord a thank': 'thanking',
+ 'gracias': 'thanking',
+ 'i thank you most warmly': 'thanking',
+ 'i wish to thank everyone who pitched in': 'thanking',
+ 'give thanks': 'thanking',
+ 'much thanks': 'thanking',
+ 'show appreciation': 'thanking',
+ 'thanks very much': 'thanking',
+ 'with kindest personal regards': 'thanking',
+ 'appreciate it': 'thanking',
+ 'thank you for helping me': 'thanking',
+ 'you are a lifesaver': 'thanking',
+ 'you have my gratitude': 'thanking',
+ 'your generosity overwhelms me': 'thanking',
+ 'i humbly thank you': 'thanking',
+ 'it was so awesome of you': 'thanking',
+ 'thanks so much': 'thanking',
+ 'by all means': 'thanking',
+ 'i wanted to thank you as soon as possible': 'thanking',
+ 'i wanted to thank you as soon as possible': 'thanking',
+ 'nice one': 'thanking',
+ 'of course': 'thanking',
+ 'what would i do without you': 'thanking',
+ 'i will forever be beholden to you': 'thanking',
+ 'i will forever be beholden to you': 'thanking',
+ 'much appreciated': 'thanking',
+ 'my gratitude to you for all you have done': 'thanking',
+ 'thank you for never letting me down': 'thanking',
+ 'thanks for everything': 'thanking',
+ 'please accept my best thanks': 'thanking',
+ 'bless you': 'thanking',
+ 'delighted': 'thanking',
+ 'how can i show you how grateful i am': 'thanking',
+ 'i appreciate your time': 'thanking',
+ 'thanks for taking the time to think of me': 'thanking',
+ 'accept my deepest thanks': 'thanking',
+ 'certainly': 'thanking',
+ 'charmed': 'thanking',
+ 'consider yourself heartily thanked': 'thanking',
+ 'excellent': 'thanking',
+ 'how can i ever possibly thank you': 'thanking',
+ 'i cannot express my appreciation': 'thanking',
+ 'splendid': 'thanking',
+ 'thanks a ton': 'thanking',
+ 'you are the best': 'thanking',
+ 'all i can say is thanks': 'thanking',
+ 'all my love and thanks to you': 'thanking',
+ 'appreciate your feedback': 'thanking',
+ 'appreciate your help': 'thanking',
+ 'appreciate your input': 'thanking',
+ 'blessings': 'thanking',
+ 'expressing thanks': 'thanking',
+ 'how can i repay you': 'thanking',
+ 'i cannot thank you enough': 'thanking',
+ 'i owe you one': 'thanking',
+ 'i really appreciate it': 'thanking',
+ 'i really appreciate that': 'thanking',
+ 'i'm really grateful': 'thanking',
+ 'if anyone deserves thanks it is you': 'thanking',
+ 'sincerely': 'thanking',
+ 'thank you for your thoughtfulness': 'thanking',
+ 'thanks for your consideration': 'thanking',
+ 'thanks heaps': 'thanking',
+ 'that's so kind of you': 'thanking',
+ 'warmest greetings to all': 'thanking',
+ 'warmly': 'thanking',
+ 'with appreciation': 'thanking',
+ 'with sincere appreciation': 'thanking',
+ 'with sincere thanks': 'thanking',
+ 'don't know what to say': 'thanking',
+ 'words are powerless to express my gratitude': 'thanking',
+ 'words cannot describe how thankful i am': 'thanking',
+ 'can't describe how thankful i am': 'thanking',
+ 'don't know what to say': 'thanking',
+ 'how thoughtful of you': 'thanking',
+ 'i can't thank you enough': 'thanking',
+ 'i owe you big time': 'thanking',
+ 'i really appreciate': 'thanking',
+ 'i really appreciate your help': 'thanking',
+ 'i would like to thank you': 'thanking',
+ 'i'll forever be grateful': 'thanking',
+ 'i'm grateful for your assistance': 'thanking',
+ 'i'm really grateful for your help': 'thanking',
+ 'i'm really grateful for your help': 'thanking',
+ 'i'm so grateful': 'thanking',
+ 'if anyone deserve thanks it's you': 'thanking',
+ 'it would be greatly appreciated': 'thanking',
+ 'it's very kind of you': 'thanking',
+ 'mercy bucket': 'thanking',
+ 'mercy buckets': 'thanking',
+ 'million thanks to you': 'thanking',
+ 'my gratitude knows no bounds': 'thanking',
+ 'oh you shouldn't have': 'thanking',
+ 'please accept my deepest thanks': 'thanking',
+ 'thank you for everything': 'thanking',
+ 'thank you for your assistance': 'thanking',
+ 'thanks a heap': 'thanking',
+ 'thanks for that': 'thanking',
+ 'you made my day': 'thanking',
+ 'you saved my day': 'thanking',
+ 'you're a dear': 'thanking',
+ 'you're a life saver': 'thanking',
+ 'you're awesome': 'thanking',
+ 'you're great': 'thanking',
+ 'you've saved my life': 'thanking',
+ 'by dint': 'thanking',
+ 'i thank you in advance': 'thanking',
+ 'i want to thank': 'thanking',
+ 'i wanted to thank': 'thanking',
+ 'i would like to thank': 'thanking',
+ 'letter of thanks': 'thanking',
+ 'say thank': 'thanking',
+ 'thanks ever so much': 'thanking',
+ 'thanks just the same': 'thanking',
+ 'very grateful': 'thanking',
+ 'word of thanks': 'thanking',
\ No newline at end of file
--- a/content analyzing/bestPracticesForIntroduction.txt
+++ b/content analyzing/bestPracticesForIntroduction.txt
+ 'i am honored': 'summary',
+ 'it is an honour': 'summary',
+ 'feel honoured': 'summary',
+ 'great honor': 'summary',
+ 'great honour': 'summary',
+ 'honor me': 'summary',
+ 'honors me': 'summary',
+ 'i am deeply honoured': 'summary',
+ 'i am flattered': 'summary',
+ 'i am humbled': 'summary',
+ 'i am pleased': 'summary',
+ 'i am privileged': 'summary',
+ 'i am very honoured': 'summary',
+ 'i have had the honour': 'summary',
+ 'i have had the privilege': 'summary',
+ 'i now have the honour': 'summary',
+ 'it has been an honor': 'summary',
+ 'it is a great honor': 'summary',
+ 'it is a privilege': 'summary',
+ 'it is indeed an honour': 'summary',
+ 'it was an honour': 'summary',
+ 'it would be an honor': 'summary',
+ 'so honored': 'summary',
+ 'such an honor': 'summary',
+ 'we are honoured': 'summary',
+ 'what a pleasure': 'summary',
+ 'what an honor': 'summary',
+ 'what honor': 'summary',
+ 'what honour': 'summary',
+ 'you honor': 'summary',
+ 'you honor me': 'summary',
+ 'you honour me': 'summary',
+ 'once upon a time': 'summary',
+ 'long ago': 'summary',
+ 'once': 'summary',
+ 'formerly': 'summary',
+ 'in the past': 'summary',
+ 'at one time': 'summary',
+ 'long time ago': 'summary',
+ 'away back': 'summary',
+ 'in former times': 'summary',
+ 'in times gone by': 'summary',
+ 'many years ago': 'summary',
+ 'very long time ago': 'summary',
+ 'in times past': 'summary',
+ 'back in the day': 'summary',
+ 'in the olden days': 'summary',
+ 'some time ago': 'summary',
+ 'many moons ago': 'summary',
+ 'ages ago': 'summary',
+ 'in days gone by': 'summary',
+ 'in earlier times': 'summary',
+ 'in olden times': 'summary',
+ 'good while ago': 'summary',
+ 'in days of old': 'summary',
+ 'in days of yore': 'summary',
+ 'one time previously': 'summary',
+ 'at an earlier time': 'summary',
+ 'back in the old days': 'summary',
+ 'in bygone days': 'summary',
+ 'way back in the past': 'summary',
+ 'down memory lane': 'summary',
+ 'in the good old days': 'summary',
+ 'in years gone by': 'summary',
+ 'long time before': 'summary',
+ 'long while ago': 'summary',
+ 'much further back': 'summary',
+ 'one of these days': 'summary',
+ 'there once was': 'summary',
+ 'there was a time': 'summary',
+ 'there was a time when': 'summary',
+ 'there was once': 'summary',
+ 'time was when': 'summary',
+ 'well before': 'summary',
+ 'ahead of time': 'summary',
+ 'all those years ago': 'summary',
+ 'as long ago': 'summary',
+ 'at the past': 'summary',
+ 'awhile ago': 'summary',
+ 'awhile back': 'summary',
+ 'been a very long time': 'summary',
+ 'donkey's years ago': 'summary',
+ 'during the past few': 'summary',
+ 'during the previous': 'summary',
+ 'earlier on': 'summary',
+ 'far earlier': 'summary',
+ 'far sooner': 'summary',
+ 'have for a long time been': 'summary',
+ 'in an era': 'summary',
+ 'in ancient times': 'summary',
+ 'in days past': 'summary',
+
+
+
+
+
--- a/content analyzing/clearness.py
+++ b/content analyzing/clearness.py
+import spacy
+from scipy.io.wavfile import read
+
+scoreForClearness = 50/100
+nlp = spacy.load("en_core_web_sm")
+
+
+def clearnerssWords(text):
+    doc = nlp(text)
+    returnVal = ""
+    for token in doc:
+        count = len(token)
+        if count > 12:
+            returnVal += f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
+    return {
+        "message": returnVal,
+        "score": scoreForClearness
+    }
+
+
+def clearnerssSentence(text):
+    doc = nlp(text)
+    returnVal = ""
+    for sent in doc.sents:
+        word_count = 0
+        # print(sent.text)
+        for words in sent:
+            # print(words.text)
+            word_count = word_count + 1
+        if word_count > 10:
+            # print(f'"{sent}" is a overcomplicated sentence. There are {word_count}  words in it.')
+            returnVal += f'"{sent}" is a overcomplicated sentence. There are {word_count}  words in it.'
+        # print(word_count)
+    return {
+        "message": returnVal,
+        "score": scoreForClearness
+    }
+
+
+def clearnessReadAudioFile(filePath):
+    # Read the Audiofile
+    samplerate, data = read(filePath)
+    # Frame rate for the Audio
+    print(samplerate)
+
+    # Duration of the audio in seconds.
+    duration = len(data)/samplerate
+    print("Duration of Audio in Seconds", duration)
+    print("Duration of Audio in Minutes", duration/60)
+    print(len(data))
+
--- a/content analyzing/conclusion.py
+++ b/content analyzing/conclusion.py
+import spacy
+import textacy
+
+scoreForConclusion = 60/100
+nlp = spacy.load("en_core_web_sm")
+#Count the total number of characters in the speech
+
+
+def conclisions(speech):
+    totalCharacterCount = len(speech)
+    conclusionCharacterCount = (85/100)*totalCharacterCount
+    conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)])
+    # print(">>>Conclusion<<<")
+    # print(conclusion)
+    return {
+        "message": conclusion,
+        "score": scoreForConclusion
+    }
+
+
+def comments(speech):
+    conclusion = conclisions(speech)["message"]
+    final_words = []
+
+    with open('content analyzing/bestPracticesForConclusion.txt', 'r') as file:
+        for line in file:
+            clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
+            word, emotion = clear_line.split(':')
+
+            if word in conclusion:
+                final_words.append(word)
+
+    return {
+        "message": final_words,
+        "score": scoreForConclusion
+    }
+
+
+def questions(speech):
+    retVal = []
+    doc = nlp(conclisions(speech)["message"])
+
+    # patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
+    #
+    # verb_phrases = textacy.extract.token_matches(doc, patterns)
+    #
+    # for verb_phrases in verb_phrases:
+    #     print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
+    #     print(f"Identified questions : {verb_phrases}")
+    #     retVal.append(f"Identified questions : {verb_phrases}")
+
+    tokens = [token for token in doc]
+
+    for i in range(len(tokens)):
+        # print(tokens[i].pos_)
+        if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'):
+            retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
+
+    return {
+        "message": retVal,
+        "score": scoreForConclusion
+    }
+
--- a/content analyzing/introduction.py
+++ b/content analyzing/introduction.py
+import spacy
+import textacy
+
+scoreForIntroduction = 40/100
+nlp = spacy.load("en_core_web_sm")
+
+
+def introductionFunc(speech):
+    totalCharacterCount = len(speech)
+
+    introductionCharacterCount = (15 / 100) * totalCharacterCount
+    introduction = (speech[0:int(introductionCharacterCount)])
+    # print(">>>Introduction<<<")
+    # print(introduction)
+    return {
+        "message": introduction,
+        "score": scoreForIntroduction
+    }
+
+
+def introductionBestUsesFunc(speech):
+
+    introduction = nlp(introductionFunc(speech)["message"])
+    final_words = []
+
+    with open('content analyzing/bestPracticesForIntroduction.txt', 'r') as file:
+        for line in file:
+            clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
+            word, emotion = clear_line.split(':')
+
+            if word in introduction:
+                final_words.append(word)
+    return {
+        "message": final_words,
+        "score": scoreForIntroduction
+    }
+
+
+def introductionQuestions(speech):
+    doc = nlp(introductionFunc(speech)["message"])
+
+    # patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
+    # retVal = []
+    # verb_phrases = textacy.extract.token_matches(doc, patterns)
+    # for verb_phrases in verb_phrases:
+    #     print("You used questions forms in your introduction. It is a good practice for a introduction.")
+    #     print(f"Identified questions : {verb_phrases}")
+    #     retVal.append(f"Identified questions : {verb_phrases}")
+
+    tokens = [token for token in doc]
+
+    retVal = []
+
+    for i in range(len(tokens)):
+        # print(tokens[i].pos_)
+        if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'):
+            retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
+
+    return {
+        "message": retVal,
+        "score": scoreForIntroduction
+    }
+
--- a/content analyzing/keyWordExtraction.py
+++ b/content analyzing/keyWordExtraction.py
+import spacy
+from spacy.lang.en.stop_words import STOP_WORDS
+import string
+
+nlp = spacy.load('en_core_web_sm')
+
+def keywordExrtraction(topic, speech):
+    Topic = nlp(topic)
+    Content = nlp(speech)
+    stopwords = list(STOP_WORDS)
+
+    punctuation = string.punctuation + '\n'
+
+    word_frequencies = {}
+    for word in Content:
+        if word.text.lower() not in stopwords:
+            if word.text.lower() not in punctuation:
+                if word.text not in word_frequencies.keys():
+                    word_frequencies[word.text] = 1
+                else:
+                    word_frequencies[word.text] += 1
+
+    topicWords = []
+
+    for words in Topic:
+        topicWords.append(words.text)
+    # print(topicWords)
+
+
+    keyWords = []
+
+    print("Extracted Key Words:")
+    for word in word_frequencies.keys():
+        if word_frequencies[word] >= 3:
+            keyWords.append(word)
+            print(word)
+
+    return {
+        "message": keyWords,
+        "score": 50/100
+    }
+
--- a/content analyzing/quotesIdentify.py
+++ b/content analyzing/quotesIdentify.py
+import spacy
+
+
+# Testing the model
+test_text = "I had such high. hopes for this dress and really crappy worst product hate. it wporst bad."
+
+def identifyQuotes(text):
+    nlp = spacy.load("content analyzing/quotesIdentify")
+    output = []
+    doc = nlp(text)
+    return doc.cats
+
+print(identifyQuotes(test_text))
\ No newline at end of file
--- a/content analyzing/speechToText.py
+++ b/content analyzing/speechToText.py
+# importing libraries
+import speech_recognition as sr
+import os
+from pydub import AudioSegment
+from pydub.silence import split_on_silence
+
+# create a speech recognition object
+r = sr.Recognizer()
+
+# a function that splits the audio file into chunks
+# and applies speech recognition
+def get_large_audio_transcription(path):
+    """
+    Splitting the large audio file into chunks
+    and apply speech recognition on each of these chunks
+    """
+    # open the audio file using pydub
+    sound = AudioSegment.from_wav(path)
+    # split audio sound where silence is 700 miliseconds or more and get chunks
+    chunks = split_on_silence(sound,
+        # experiment with this value for your target audio file
+        min_silence_len = 500,
+        # adjust this per requirement
+        silence_thresh = sound.dBFS-14,
+        # keep the silence for 1 second, adjustable as well
+        keep_silence=500,
+    )
+    folder_name = "audio-chunks"
+    # create a directory to store the audio chunks
+    if not os.path.isdir(folder_name):
+        os.mkdir(folder_name)
+    whole_text = ""
+    # process each chunk
+    for i, audio_chunk in enumerate(chunks, start=1):
+        # export audio chunk and save it in
+        # the `folder_name` directory.
+        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
+        audio_chunk.export(chunk_filename, format="wav")
+        # recognize the chunk
+        with sr.AudioFile(chunk_filename) as source:
+            audio_listened = r.record(source)
+            # try converting it to text
+            try:
+                text = r.recognize_google(audio_listened)
+            except sr.UnknownValueError as e:
+                print("Error:", str(e))
+            else:
+                text = f"{text.capitalize()}. "
+                print(chunk_filename, ":", text)
+                whole_text += text
+    # return the text for all chunks detected
+    print(whole_text)
+    return whole_text
+
+# path = "../audio.wav"
+# print("\nFull text:", get_large_audio_transcription(path))
--- a/content analyzing/suggestContent.py
+++ b/content analyzing/suggestContent.py
+from selenium import webdriver
+
+def suggestContent():
+    driver = webdriver.Chrome("chromedriver.exe")
+    driver.get('https://wikipedia.com')
+    searchbox = driver.find_element_by_xpath('//*[@id="searchInput"]')
+    searchbox.send_keys(['cricket'])
+
+    searchButton = driver.find_element_by_xpath('//*[@id="search-form"]/fieldset/button')
+    searchButton.click()
+
--- a/content analyzing/synonyms.py
+++ b/content analyzing/synonyms.py
+import spacy
+from spacy.lang.en.stop_words import STOP_WORDS
+from string import punctuation
+
+nlp = spacy.load("en_core_web_sm")
+
+stopwords = list(STOP_WORDS)
+punctuation = punctuation + '\n'
+
+
+def synonymsFunc(topic, speech):
+    Topic = nlp(topic)
+    Content = nlp(speech)
+
+    Total_similarity = 0
+
+    for token1 in Content:
+        if token1.text.lower() not in stopwords:
+            if token1.text.lower() not in punctuation:
+                for token2 in Topic:
+                    print((token1.text, token2.text), "similarity", token1.similarity(token2))
+                    Total_similarity = Total_similarity + token1.similarity(token2)
+
+
+    print(len(Content))
+    print(f'Total score for the similarity: {Total_similarity}')
+    average_similarity = (Total_similarity/len(Content))*100
+    print(f'Average score for the similarity between topic and content: {average_similarity}%')
+    return {
+        "message": str(f'Average score for the similarity between topic and content: {average_similarity}%'),
+        "score": 50/100
+    }
--- a/content analyzing/webScraping.py
+++ b/content analyzing/webScraping.py
+from selenium import webdriver
+
+def webScrap():
+    driver = webdriver.Chrome("chromedriver.exe")
+    driver.get('https://youtube.com')
+    searchbox = driver.find_element_by_xpath('//*[@id="search"]')
+    searchbox.send_keys(['speeches', 'Transportation'])
+
+    searchButton = driver.find_element_by_xpath('//*[@id="search-icon-legacy"]')
+    searchButton.click()