updated branch

d27758fa · Dhananjaya Jayashanka · f149437a · f149437a · f149437a · d27758fa
Commit d27758fa authored Sep 16, 2021 by Dhananjaya Jayashanka
12 changed files
--- a/README.md
+++ b/README.md
-read me
\ No newline at end of file
--- a/clearness.py
+++ b/clearness.py
-import spacy
-from scipy.io.wavfile import read
-
-#Identify ocer complex words
-nlp = spacy.load("en_core_web_sm")
-doc = nlp("Apple is looking at buying U.K. startup for $1 billion accumulativeness. This ia a dog. name is Chalika Mihiran")
-for token in doc:
-    count = len(token)
-    if count > 12:
-        print(f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word.")
-
-#Identify over complex sentences
-for sent in doc.sents:
-    word_count = 0
-    # print(sent.text)
-    for words in sent:
-        # print(words.text)
-        word_count = word_count + 1
-    if word_count > 10:
-        print(f'"{sent}" is a overcomplicated sentence. There are {word_count}  words in it.')
-    # print(word_count)
-
-
-sentence_tokens = [sent for sent in doc.sents]
-# print(sentence_tokens)
-
-
-#Calculate the rate of the speed
-# Read the Audiofile
-samplerate, data = read('C:/Users/CHALIKA PC/Desktop/Year 04/Research/Audio/videoplayback_1_.wav')
-# Frame rate for the Audio
-print(samplerate)
-
-# Duration of the audio in Seconds
-duration = len(data)/samplerate
-print("Duration of Audio in Seconds", duration)
-print("Duration of Audio in Minutes", duration/60)
-
-print(len(data))
-
-scoreForClearness = 50/100
--- a/content analyzing/clearness.py
+++ b/content analyzing/clearness.py
+import spacy
+from scipy.io.wavfile import read
+
+scoreForClearness = 50/100
+#Identify ocer complex words
+nlp = spacy.load("en_core_web_sm")
+
+def clearnerssWords(text):
+    doc = nlp(text)
+    returnVal = ""
+    for token in doc:
+        count = len(token)
+        if count > 12:
+            # print(f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word.")
+            returnVal += f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
+    return {
+        "message": returnVal,
+        "score": scoreForClearness
+    }
+
+#Identify over complex sentences
+def clearnerssSentence(text):
+    doc = nlp(text)
+    returnVal = ""
+    for sent in doc.sents:
+        word_count = 0
+        # print(sent.text)
+        for words in sent:
+            # print(words.text)
+            word_count = word_count + 1
+        if word_count > 10:
+            # print(f'"{sent}" is a overcomplicated sentence. There are {word_count}  words in it.')
+            returnVal += f'"{sent}" is a overcomplicated sentence. There are {word_count}  words in it.'
+        # print(word_count)
+    return {
+        "message": returnVal,
+        "score": scoreForClearness
+    }
+
+print(clearnerssWords("Apple is looking at buying U.K. startup for $1 billion accumulativeness. This ia a dog. name is Chalika Mihiran"))
+print(clearnerssSentence("Apple is looking at buying U.K. startup for $1 billion accumulativeness. This ia a dog. name is Chalika Mihiran"))
+
+#Calculate the rate of the speed
+def clearnessReadAudioFile(filePath):
+    # Read the Audiofile
+    samplerate, data = read(filePath)
+    # Frame rate for the Audio
+    print(samplerate)
+
+    # Duration of the audio in Seconds
+    duration = len(data)/samplerate
+    print("Duration of Audio in Seconds", duration)
+    print("Duration of Audio in Minutes", duration/60)
+    print(len(data))
+
--- a/conclusion.py
+++ b/conclusion.py
 import spacy
 import textacy

+scoreForConclusion = 60/100
+
 nlp = spacy.load("en_core_web_sm")

 speech = """According to a research Global warming is an international phenomenon where the earth’s mean surface temperature is increasing rapidly due to the accumulation of greenhouse gases in the atmosphere. The source of these greenhouse gases is various, both natural and manmade, such as forest fires, industrialisation, burning of agricultural crops, burning of fossil fuels etc. Global warming has proved to be a huge force created by man that can have the potential to destroy the natural cycle of our planet.
@@ -13,38 +15,68 @@ Certain diseases like Malaria are caused due to global warming since there is th
 I would like to conclude my global warming speech by saying that the international community, where does they governments and citizens has to come forward to reduce your carbon footprint to save the planet for our future generation. All of us living here are the stakeholders of nature and we are responsible for how we take care of our environment. As the saying goes, charity begins at home, we have to start using biodegradable materials at our houses, reduce plastic waste and other such changes in our lifestyle so that we can make an impact to reduce global warming globally."""

 #Count the total number of characters in the speech
-totalCharacterCount = len(speech)
-
-#Extract the conclusion of the speech
-conclusionCharacterCount = (85/100)*totalCharacterCount
-conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)])
-print(">>>Conclusion<<<")
-print(conclusion)
+def conclisions(speech):
+    totalCharacterCount = len(speech)
+    #Extract the conclusion of the speech
+    conclusionCharacterCount = (85/100)*totalCharacterCount
+    conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)])
+    # print(">>>Conclusion<<<")
+    # print(conclusion)
+    return {
+        "message": conclusion,
+        "score": scoreForConclusion
+    }

 #....Analyze the introduction.....

 #identify best uses for introduction
-final_words = []
-
-endingPhrases = ["I would like to conclude", "conclude by saying","final note","I close by saying","According to an analysis","I shall conclude by saying","i should like to conclude by saying","I should like to finish by saying","I want to conclude by saying","I will close by saying","i will conclude","i will conclude by saying","i will end by saying","i will finish by saying","i would end by saying","i would like to conclude","i would end by saying","i would like to conclude by stating","i would like to end by saying","in closing","in conclusion","let me close by saying","let me conclude by saying","let me finish by saying","may i conclude by saying"
-                ,"i would like briefly","i would like to briefly","i would like to refer briefly","i would like to return briefly","let me briefly present","let me briefly say","let me briefly touch","let me comment briefly","let me quickly","let me turn briefly","allow me to say a few","allow me to touch briefly","i shall refer briefly","i should like to refer briefly","i should like to refer briefly","i want to say a couple","i want to say a few","i will briefly mention","i will briefly summarize","i will comment briefly","i will refer briefly","i will touch briefly","i wish to say a few","i would like to say a few","i would like to say a word","i would like to speak briefly","let me briefly turn","let me say a few","let me touch briefly","Let me briefly"]
-for wordPharse in endingPhrases:
-    if wordPharse in conclusion:
-        final_words.append(wordPharse)
-        print(">>>Comments<<<")
-        print("You used some ending phrases in your conclusion. It is a good practice to use ending phrases in your introduction")
-print(final_words)
+def comments(speech):
+    conclusion = conclisions(speech)["message"]
+    final_words = []
+    endingPhrases = ["I would like to conclude", "conclude by saying", "final note", "I close by saying",
+                     "According to an analysis", "I shall conclude by saying", "i should like to conclude by saying",
+                     "I should like to finish by saying", "I want to conclude by saying", "I will close by saying",
+                     "i will conclude", "i will conclude by saying", "i will end by saying", "i will finish by saying",
+                     "i would end by saying", "i would like to conclude", "i would end by saying",
+                     "i would like to conclude by stating", "i would like to end by saying", "in closing",
+                     "in conclusion", "let me close by saying", "let me conclude by saying", "let me finish by saying",
+                     "may i conclude by saying"
+        , "i would like briefly", "i would like to briefly", "i would like to refer briefly",
+                     "i would like to return briefly", "let me briefly present", "let me briefly say",
+                     "let me briefly touch", "let me comment briefly", "let me quickly", "let me turn briefly",
+                     "allow me to say a few", "allow me to touch briefly", "i shall refer briefly",
+                     "i should like to refer briefly", "i should like to refer briefly", "i want to say a couple",
+                     "i want to say a few", "i will briefly mention", "i will briefly summarize",
+                     "i will comment briefly", "i will refer briefly", "i will touch briefly", "i wish to say a few",
+                     "i would like to say a few", "i would like to say a word", "i would like to speak briefly",
+                     "let me briefly turn", "let me say a few", "let me touch briefly", "Let me briefly"]
+    for wordPharse in endingPhrases:
+        if wordPharse in conclusion:
+            final_words.append(wordPharse)
+            # print(">>>Comments<<<")
+            # print("You used some ending phrases in your conclusion. It is a good practice to use ending phrases in your introduction")
+    # print(final_words)
+    return {
+        "message": final_words,
+        "score": scoreForConclusion
+    }

 #Identify questions that user used in  introduction
-doc = nlp(conclusion)
+def questions(speech):
+    retVal = []
+    doc = nlp(conclisions(speech)["message"])

-#Declare the patterns of questions
-patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
+    #Declare the patterns of questions
+    patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]

-verb_phrases = textacy.extract.token_matches(doc, patterns)
+    verb_phrases = textacy.extract.token_matches(doc, patterns)

-for verb_phrases in verb_phrases:
-    print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
-    print(f"Identified questions : {verb_phrases}")
+    for verb_phrases in verb_phrases:
+        print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
+        print(f"Identified questions : {verb_phrases}")
+        retVal.append(f"Identified questions : {verb_phrases}")
+    return {
+        "message": retVal,
+        "score": scoreForConclusion
+    }

-scoreForConclusion = 60/100
\ No newline at end of file
--- a/introduction.py
+++ b/introduction.py
 import spacy
 import textacy

+scoreForIntroduction = 40/100
+
 #Identify words that are in a introduction

 nlp = spacy.load("en_core_web_sm")
@@ -15,37 +17,51 @@ Certain diseases like Malaria are caused due to global warming since there is th
 I would like to conclude my global warming speech by saying that the international community, governments and citizens has to come forward to reduce your carbon footprint to save the planet for our future generation. where does they All of us living here are the stakeholders of nature and we are responsible for how we take care of our environment. As the saying goes, charity begins at home, we have to start using biodegradable materials at our houses, reduce plastic waste and other such changes in our lifestyle so that we can make an impact to reduce global warming globally."""

 #Count the total number of characters in the speech
-totalCharacterCount = len(speech)
+def introductionFunc(speech):
+    totalCharacterCount = len(speech)

-#Extract the introduction of the speech
-introductionCharacterCount = (15 / 100) * totalCharacterCount
-introduction = (speech[0:int(introductionCharacterCount)])
-print(">>>Introduction<<<")
-print(introduction)
+    #Extract the introduction of the speech
+    introductionCharacterCount = (15 / 100) * totalCharacterCount
+    introduction = (speech[0:int(introductionCharacterCount)])
+    # print(">>>Introduction<<<")
+    # print(introduction)
+    return {
+        "message": introduction,
+        "score": scoreForIntroduction
+    }

 #....Analyze the introduction.....

 #identify best uses for introduction
-final_words = []
+def introductionBestUsesFunc(speech):
+    final_words = []

-referStudies = ["According to a study", "According to a research","According to a review","According to a survey","According to an analysis","according to one study","According to research","According to an investigation","According to research conducted","According to the study"
-,"according to the survey","according to this study","after an investigation","for a study","in a studio","in a survey","in one study","results of a study","study finds","study says","survey conducted","survey found"]
-for wordPharse in referStudies:
-    if wordPharse in introduction:
-        final_words.append(wordPharse)
-        print(">>>Comments<<<")
-        print(f"You refer some other's works in your introduction. It is a good practice to refer some one's work in your introduction")
-print(final_words)
+    referStudies = ["According to a study", "According to a research","According to a review","According to a survey","According to an analysis","according to one study","According to research","According to an investigation","According to research conducted","According to the study"
+    ,"according to the survey","according to this study","after an investigation","for a study","in a studio","in a survey","in one study","results of a study","study finds","study says","survey conducted","survey found"]
+    for wordPharse in referStudies:
+        if wordPharse in introductionFunc(speech):
+            final_words.append(wordPharse)
+            print(">>>Comments<<<")
+            print(f"You refer some other's works in your introduction. It is a good practice to refer some one's work in your introduction")
+    print(final_words)
+    return {
+        "message": final_words,
+        "score": scoreForIntroduction
+    }

 #Identify questions that user used in  introduction
-doc = nlp(introduction)
-
-patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
-
-verb_phrases = textacy.extract.token_matches(doc, patterns)
+def introductionQuestions(speech):
+    doc = nlp(introductionFunc(speech)["message"])

-for verb_phrases in verb_phrases:
-    print("You used questions forms in your introduction. It is a good practice for a introduction.")
-    print(f"Identified questions : {verb_phrases}")
+    patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
+    retVal = []
+    verb_phrases = textacy.extract.token_matches(doc, patterns)
+    for verb_phrases in verb_phrases:
+        print("You used questions forms in your introduction. It is a good practice for a introduction.")
+        print(f"Identified questions : {verb_phrases}")
+        retVal.append(f"Identified questions : {verb_phrases}")
+    return {
+        "message": retVal,
+        "score": scoreForIntroduction
+    }

-scoreForIntroduction = 40/100;
\ No newline at end of file
--- a/content analyzing/keyWordExtraction.py
+++ b/content analyzing/keyWordExtraction.py
+import spacy
+from spacy.lang.en.stop_words import STOP_WORDS
+import string
+
+nlp = spacy.load('en_core_web_sm')
+
+def keywordExrtraction(topic, speech):
+    Topic = nlp(topic)
+    Content = nlp(speech)
+    stopwords = list(STOP_WORDS)
+
+    punctuation = string.punctuation + '\n'
+
+
+    # This is a loop for count words apart from stop words and frequencies.
+    word_frequencies = {}
+    for word in Content:
+        if word.text.lower() not in stopwords:
+            if word.text.lower() not in punctuation:
+                if word.text not in word_frequencies.keys():
+                    word_frequencies[word.text] = 1
+                else:
+                    word_frequencies[word.text] += 1
+
+    # print(word_frequencies)
+
+    topicWords = []
+
+    for words in Topic:
+        topicWords.append(words.text)
+    # print(topicWords)
+
+
+    keyWords = []
+
+    print("Extracted Key Words:")
+    for word in word_frequencies.keys():
+        if word_frequencies[word] >= 3:
+            keyWords.append(word)
+            print(word)
+
+    return {
+        "message": keyWords,
+        "score": 50/100
+    }
+
+# keywordExrtraction("dog", "The dog is a pet animal. A dog has sharp teeth so that it can eat flesh very easily, it has four legs, two ears, two eyes, a tail, a mouth, and a nose. It is a very clever animal and is very useful in catching thieves. It runs very fast, barks loudly and attacks the strangers. A dog saves the life of the master from danger. One can find dogs everywhere in the world. Dogs are a very faithful animal. It has a sharp mind and a strong sense of hearing smelling the things. It also has many qualities like swimming in the water, jumping from anywhere, good smelling sense.")
--- a/speechToText.py
+++ b/speechToText.py
@@ -52,5 +52,5 @@ def get_large_audio_transcription(path):
    print(whole_text)
    return whole_text

-path = "videoplayback_1_.wav"
-print("\nFull text:", get_large_audio_transcription(path))
\ No newline at end of file
+# path = "../audio.wav"
+# print("\nFull text:", get_large_audio_transcription(path))
--- a/suggestContent.py
+++ b/suggestContent.py
@@ -6,4 +6,4 @@ searchbox = driver.find_element_by_xpath('//*[@id="searchInput"]')
 searchbox.send_keys(['cricket'])

 searchButton = driver.find_element_by_xpath('//*[@id="search-form"]/fieldset/button')
-searchButton.click()
\ No newline at end of file
+searchButton.click()
--- a/content analyzing/synonyms.py
+++ b/content analyzing/synonyms.py
+import spacy
+from spacy.lang.en.stop_words import STOP_WORDS
+from string import punctuation
+
+nlp = spacy.load("en_core_web_sm")
+
+stopwords = list(STOP_WORDS)
+punctuation = punctuation + '\n'
+
+def synonymsFunc(topic, speech):
+    Topic = nlp(topic)
+    Content = nlp(speech)
+
+    Total_similarity = 0
+
+    for token1 in Content:
+        if token1.text.lower() not in stopwords:
+            if token1.text.lower() not in punctuation:
+                for token2 in Topic:
+                    print((token1.text, token2.text), "similarity", token1.similarity(token2))
+                    Total_similarity = Total_similarity + token1.similarity(token2)
+
+
+    print(len(Content))
+    print(f'Total score for the similarity: {Total_similarity}')
+    average_similarity = (Total_similarity/len(Content))*100
+    print(f'Average score for the similarity between topic and content: {average_similarity}%')
+    return {
+        "message": str(f'Average score for the similarity between topic and content: {average_similarity}%'),
+        "score": 50/100
+    }
--- a/webScraping.py
+++ b/webScraping.py
--- a/keyWordExtraction.py
+++ b/keyWordExtraction.py
-import spacy
-from spacy.lang.en.stop_words import STOP_WORDS
-from string import punctuation
-
-nlp = spacy.load('en_core_web_sm')
-
-Topic = nlp("dog")
-Content = nlp("The dog is a pet animal. A dog has sharp teeth so that it can eat flesh very easily, it has four legs, two ears, two eyes, a tail, a mouth, and a nose. It is a very clever animal and is very useful in catching thieves. It runs very fast, barks loudly and attacks the strangers. A dog saves the life of the master from danger. One can find dogs everywhere in the world. Dogs are a very faithful animal. It has a sharp mind and a strong sense of hearing smelling the things. It also has many qualities like swimming in the water, jumping from anywhere, good smelling sense.")
-stopwords = list(STOP_WORDS)
-
-punctuation = punctuation + '\n'
-
-
-# This is a loop for count words apart from stop words and frequencies.
-word_frequencies = {}
-for word in Content:
-    if word.text.lower() not in stopwords:
-        if word.text.lower() not in punctuation:
-            if word.text not in word_frequencies.keys():
-                word_frequencies[word.text] = 1
-            else:
-                word_frequencies[word.text] += 1
-
-# print(word_frequencies)
-
-topicWords = []
-
-for words in Topic:
-    topicWords.append(words.text)
-# print(topicWords)
-
-
-keyWords = []
-
-print("Extracted Key Words:")
-for word in word_frequencies.keys():
-    if word_frequencies[word] >= 3:
-        keyWords.append(word)
-        print(word)
-
-
-
-
-
-
-
-
-
-
--- a/synonyms.py
+++ b/synonyms.py
-import spacy
-from spacy.lang.en.stop_words import STOP_WORDS
-from string import punctuation
-
-nlp = spacy.load("en_core_web_sm")
-
-stopwords = list(STOP_WORDS)
-punctuation = punctuation + '\n'
-
-Topic = nlp("transportation")
-Content = nlp("Transportation is movement of people and goods from one location to another. Throughout history, the economic wealth and military power of a people or a nation have been closely tied to efficient methods of transportation. Transportation provides access to natural resources and promotes trade, allowing a nation to accumulate wealth and power. Transportation also allows the movement of soldiers, equipment, and supplies so that a nation can wage war.Transportation is movement of people and goods from one location to another. Throughout history, the economic wealth and military power of a people or a nation have been closely tied to efficient methods of transportation. Transportation provides access to natural resources and promotes trade, allowing a nation to accumulate wealth and power. Transportation also allows the movement of soldiers, equipment, and supplies so that a nation can wage war.Transportation is vital to a nation's economy. Reducing the costs of transporting natural resources to production sites and moving finished goods to markets is one of the key factors in economic competition. The transportation industry is the largest industry in the world. It includes the manufacture and distribution of vehicles, the production and distribution of fuel, and the provision of transportation services. In the 1990s, approximately 11 percent of the U.S. gross domestic product and an estimated 10 percent of all jobs in the United States were related to the transportation industry.The same transportation systems that link a nation can also be used in the nation's war efforts. The rapid movement of troops, equipment, and supplies can be a deciding factor in winning a battle or a war. Transportation is usually classified by the medium in which the movement occurs, such as by land, air, water, or pipeline. Within each of the first three media, many different methods are used to move people and goods from place to place. Pipelines are used mainly to transport liquids or gases over long distances.")
-
-Total_similarity = 0
-
-for token1 in Content:
-    if token1.text.lower() not in stopwords:
-        if token1.text.lower() not in punctuation:
-            for token2 in Topic:
-                print((token1.text, token2.text), "similarity", token1.similarity(token2))
-                Total_similarity = Total_similarity + token1.similarity(token2)
-
-
-print(len(Content))
-print(f'Total score for the similarity: {Total_similarity}')
-average_similarity = (Total_similarity/len(Content))*100
-print(f'Average score for the similarity between topic and content: {average_similarity}%')
-