Merge remote-tracking branch 'origin/master'

ca860e38 · Dhananjaya Jayashanka · 435585e2 · 99aaf74d · ca860e38 · ca860e38
Commit ca860e38 authored Sep 18, 2021 by Dhananjaya Jayashanka
8 changed files
--- a/content analyzing/clearness.py
+++ b/content analyzing/clearness.py
+import spacy
+from scipy.io.wavfile import read
+
+scoreForClearness = 50/100
+#Identify ocer complex words
+nlp = spacy.load("en_core_web_sm")
+
+def clearnerssWords(text):
+    doc = nlp(text)
+    returnVal = ""
+    for token in doc:
+        count = len(token)
+        if count > 12:
+            # print(f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word.")
+            returnVal += f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
+    return {
+        "message": returnVal,
+        "score": scoreForClearness
+    }
+
+#Identify over complex sentences
+def clearnerssSentence(text):
+    doc = nlp(text)
+    returnVal = ""
+    for sent in doc.sents:
+        word_count = 0
+        # print(sent.text)
+        for words in sent:
+            # print(words.text)
+            word_count = word_count + 1
+        if word_count > 10:
+            # print(f'"{sent}" is a overcomplicated sentence. There are {word_count}  words in it.')
+            returnVal += f'"{sent}" is a overcomplicated sentence. There are {word_count}  words in it.'
+        # print(word_count)
+    return {
+        "message": returnVal,
+        "score": scoreForClearness
+    }
+
+print(clearnerssWords("Apple is looking at buying U.K. startup for $1 billion accumulativeness. This ia a dog. name is Chalika Mihiran"))
+print(clearnerssSentence("Apple is looking at buying U.K. startup for $1 billion accumulativeness. This ia a dog. name is Chalika Mihiran"))
+
+#Calculate the rate of the speed
+def clearnessReadAudioFile(filePath):
+    # Read the Audiofile
+    samplerate, data = read(filePath)
+    # Frame rate for the Audio
+    print(samplerate)
+
+    # Duration of the audio in seconds
+    duration = len(data)/samplerate
+    print("Duration of Audio in Seconds", duration)
+    print("Duration of Audio in Minutes", duration/60)
+    print(len(data))
+
--- a/content analyzing/conclusion.py
+++ b/content analyzing/conclusion.py
+import spacy
+import textacy
+
+scoreForConclusion = 60/100
+
+nlp = spacy.load("en_core_web_sm")
+
+speech = """According to a research Global warming is an international phenomenon where the earth’s mean surface temperature is increasing rapidly due to the accumulation of greenhouse gases in the atmosphere. The source of these greenhouse gases is various, both natural and manmade, such as forest fires, industrialisation, burning of agricultural crops, burning of fossil fuels etc. Global warming has proved to be a huge force created by man that can have the potential to destroy the natural cycle of our planet.
+Global warming is not a recent phenomenon but it has surely improved and increased because of rapid industrialisation, population explosion, agricultural explosion and the ever-increasing greed for economic growth for countries at the cos of of exploiting our environment. Global warming did exist hundreds of years before when civilizations begin to occur. There are examples of burning fossil fuels and causing pollution even during early civilizations like the Indus valley civilisation or Harappan civilization, but the magnitude of global warming today is thousand times more than what it was a few centuries back.
+Some of the main greenhouse gases that are causing global warming are carbon dioxide, methane, nitrous oxide, sulphur hexafluoride, hydrofluorocarbons and perfluorocarbons. Most of the greenhouse gases are produced due to the consumption of fossil fuels.
+The effects of global warming are plenty and one of the most catastrophic effects of global warming is the deterioration of the ozone layer. Due to the accumulation of chlorofluorocarbons in the stratosphere, scientists have predicted that an area above the continent of Antarctica has a huge ozone hole. The ozone layer is a layer in the atmosphere which protects the Earth’s surface from the harmful ultraviolet radiations coming from the sun. When this ozone layer gets depleted, the living beings on the planet earth is easily exposed to UV rays. This can cause catastrophic effects on human health, agricultural cycle, climatic cycle and on the very existence of our planet.
+Let me mention a few of the impacts of global warming that we are already seeing in the year 2020. It is due to global warming that there is a rise in sea levels which is causing flooding off patches of lands, the low lying setters and islands countries of Indonesia and Japan. Global warming is causing changes in the rainfall patterns. When the change in rainfall patterns and climatic changes occurs abruptly, it becomes difficult for the Meteorological Departments to predict natural calamities and this increases the damage caused to life and property.
+Draughts, forest fires floods and earthquakes are some of the indications for changing in climatic patterns caused due to global warming. Melting of glaciers and polar ice caps has caused almost extinction of habitat near the north pole and the south pole. The polar bears, which are considered to be greatly endangered species, are decreasing in their population by the day because of the melting of glaciers. Melting of glaciers can also lead to an increase in floods and famine in the nearby regions.
+Certain diseases like Malaria are caused due to global warming since there is the migration of species from one place to another. Many scientists also predict that the COVID-19 pandemic that we are witnessing in the year 2020 can also be traced back to the impact of global warming when seen from the bird’s point of you
+I would like to conclude my global warming speech by saying that the international community, where does they governments and citizens has to come forward to reduce your carbon footprint to save the planet for our future generation. All of us living here are the stakeholders of nature and we are responsible for how we take care of our environment. As the saying goes, charity begins at home, we have to start using biodegradable materials at our houses, reduce plastic waste and other such changes in our lifestyle so that we can make an impact to reduce global warming globally."""
+
+#Count the total number of characters in the speech
+def conclisions(speech):
+    totalCharacterCount = len(speech)
+    #Extract the conclusion of the speech
+    conclusionCharacterCount = (85/100)*totalCharacterCount
+    conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)])
+    # print(">>>Conclusion<<<")
+    # print(conclusion)
+    return {
+        "message": conclusion,
+        "score": scoreForConclusion
+    }
+
+#....Analyze the introduction.....
+
+#identify best uses for introduction
+def comments(speech):
+    conclusion = conclisions(speech)["message"]
+    final_words = []
+    endingPhrases = ["I would like to conclude", "conclude by saying", "final note", "I close by saying",
+                     "According to an analysis", "I shall conclude by saying", "i should like to conclude by saying",
+                     "I should like to finish by saying", "I want to conclude by saying", "I will close by saying",
+                     "i will conclude", "i will conclude by saying", "i will end by saying", "i will finish by saying",
+                     "i would end by saying", "i would like to conclude", "i would end by saying",
+                     "i would like to conclude by stating", "i would like to end by saying", "in closing",
+                     "in conclusion", "let me close by saying", "let me conclude by saying", "let me finish by saying",
+                     "may i conclude by saying"
+        , "i would like briefly", "i would like to briefly", "i would like to refer briefly",
+                     "i would like to return briefly", "let me briefly present", "let me briefly say",
+                     "let me briefly touch", "let me comment briefly", "let me quickly", "let me turn briefly",
+                     "allow me to say a few", "allow me to touch briefly", "i shall refer briefly",
+                     "i should like to refer briefly", "i should like to refer briefly", "i want to say a couple",
+                     "i want to say a few", "i will briefly mention", "i will briefly summarize",
+                     "i will comment briefly", "i will refer briefly", "i will touch briefly", "i wish to say a few",
+                     "i would like to say a few", "i would like to say a word", "i would like to speak briefly",
+                     "let me briefly turn", "let me say a few", "let me touch briefly", "Let me briefly"]
+    for wordPharse in endingPhrases:
+        if wordPharse in conclusion:
+            final_words.append(wordPharse)
+            # print(">>>Comments<<<")
+            # print("You used some ending phrases in your conclusion. It is a good practice to use ending phrases in your introduction")
+    # print(final_words)
+    return {
+        "message": final_words,
+        "score": scoreForConclusion
+    }
+
+#Identify questions that user used in  introduction
+def questions(speech):
+    retVal = []
+    doc = nlp(conclisions(speech)["message"])
+
+    #Declare the patterns of questions
+    patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
+
+    verb_phrases = textacy.extract.token_matches(doc, patterns)
+
+    for verb_phrases in verb_phrases:
+        print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
+        print(f"Identified questions : {verb_phrases}")
+        retVal.append(f"Identified questions : {verb_phrases}")
+    return {
+        "message": retVal,
+        "score": scoreForConclusion
+    }
+
--- a/content analyzing/introduction.py
+++ b/content analyzing/introduction.py
+import spacy
+import textacy
+
+scoreForIntroduction = 40/100
+
+#Identify words that are in a introduction
+
+nlp = spacy.load("en_core_web_sm")
+
+speech = """According to a research Global warming is an  where does he international phenomenon where the earth’s mean surface temperature is increasing rapidly due to the accumulation of greenhouse gases in the atmosphere. where does they The source of these greenhouse gases is various, both natural and manmade, such as forest fires, industrialisation, burning of agricultural crops, burning of fossil fuels etc. Global warming has proved to be a huge force created by man that can have the potential to destroy the natural cycle of our planet.
+Global warming is not a recent phenomenon but it has surely improved and increased because of rapid industrialisation, population explosion, agricultural explosion and the ever-increasing greed for economic growth for countries at the cos of of exploiting our environment. Global warming did exist hundreds of years before when civilizations begin to occur. There are examples of burning fossil fuels and causing pollution even during early civilizations like the Indus valley civilisation or Harappan civilization, but the magnitude of global warming today is thousand times more than what it was a few centuries back.
+Some of the main greenhouse gases that are causing global warming are carbon dioxide, methane, nitrous oxide, sulphur hexafluoride, hydrofluorocarbons and perfluorocarbons. Most of the greenhouse gases are produced due to the consumption of fossil fuels.
+The effects of global warming are plenty and one of the most catastrophic effects of global warming is the deterioration of the ozone layer. Due to the accumulation of chlorofluorocarbons in the stratosphere, scientists have predicted that an area above the continent of Antarctica has a huge ozone hole. The ozone layer is a layer in the atmosphere which protects the Earth’s surface from the harmful ultraviolet radiations coming from the sun. When this ozone layer gets depleted, the living beings on the planet earth is easily exposed to UV rays. This can cause catastrophic effects on human health, agricultural cycle, climatic cycle and on the very existence of our planet.
+Let me mention a few of the impacts of global warming that we are already seeing in the year 2020. It is due to global warming that there is a rise in sea levels which is causing flooding off patches of lands, the low lying setters and islands countries of Indonesia and Japan. Global warming is causing changes in the rainfall patterns. When the change in rainfall patterns and climatic changes occurs abruptly, it becomes difficult for the Meteorological Departments to predict natural calamities and this increases the damage caused to life and property.
+Draughts, forest fires floods and earthquakes are some of the indications for changing in climatic patterns caused due to global warming. Melting of glaciers and polar ice caps has caused almost extinction of habitat near the north pole and the south pole. The polar bears, which are considered to be greatly endangered species, are decreasing in their population by the day because of the melting of glaciers. Melting of glaciers can also lead to an increase in floods and famine in the nearby regions.
+Certain diseases like Malaria are caused due to global warming since there is the migration of species from one place to another. Many scientists also predict that the COVID-19 pandemic that we are witnessing in the year 2020 can also be traced back to the impact of global warming when seen from the bird’s point of you
+I would like to conclude my global warming speech by saying that the international community, governments and citizens has to come forward to reduce your carbon footprint to save the planet for our future generation. where does they All of us living here are the stakeholders of nature and we are responsible for how we take care of our environment. As the saying goes, charity begins at home, we have to start using biodegradable materials at our houses, reduce plastic waste and other such changes in our lifestyle so that we can make an impact to reduce global warming globally."""
+
+#Count the total number of characters in the speech
+def introductionFunc(speech):
+    totalCharacterCount = len(speech)
+
+    #Extract the introduction of the speech
+    introductionCharacterCount = (15 / 100) * totalCharacterCount
+    introduction = (speech[0:int(introductionCharacterCount)])
+    # print(">>>Introduction<<<")
+    # print(introduction)
+    return {
+        "message": introduction,
+        "score": scoreForIntroduction
+    }
+
+#....Analyze the introduction.....
+
+#identify best uses for introduction
+def introductionBestUsesFunc(speech):
+    final_words = []
+
+    referStudies = ["According to a study", "According to a research","According to a review","According to a survey","According to an analysis","according to one study","According to research","According to an investigation","According to research conducted","According to the study"
+    ,"according to the survey","according to this study","after an investigation","for a study","in a studio","in a survey","in one study","results of a study","study finds","study says","survey conducted","survey found"]
+    for wordPharse in referStudies:
+        if wordPharse in introductionFunc(speech):
+            final_words.append(wordPharse)
+            print(">>>Comments<<<")
+            print(f"You refer some other's works in your introduction. It is a good practice to refer some one's work in your introduction")
+    print(final_words)
+    return {
+        "message": final_words,
+        "score": scoreForIntroduction
+    }
+
+#Identify questions that user used in  introduction
+def introductionQuestions(speech):
+    doc = nlp(introductionFunc(speech)["message"])
+
+    patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
+    retVal = []
+    verb_phrases = textacy.extract.token_matches(doc, patterns)
+    for verb_phrases in verb_phrases:
+        print("You used questions forms in your introduction. It is a good practice for a introduction.")
+        print(f"Identified questions : {verb_phrases}")
+        retVal.append(f"Identified questions : {verb_phrases}")
+    return {
+        "message": retVal,
+        "score": scoreForIntroduction
+    }
+
--- a/content analyzing/keyWordExtraction.py
+++ b/content analyzing/keyWordExtraction.py
+import spacy
+from spacy.lang.en.stop_words import STOP_WORDS
+import string
+
+nlp = spacy.load('en_core_web_sm')
+
+def keywordExrtraction(topic, speech):
+    Topic = nlp(topic)
+    Content = nlp(speech)
+    stopwords = list(STOP_WORDS)
+
+    punctuation = string.punctuation + '\n'
+
+
+    # This is a loop for count words apart from stop words and frequencies.
+    word_frequencies = {}
+    for word in Content:
+        if word.text.lower() not in stopwords:
+            if word.text.lower() not in punctuation:
+                if word.text not in word_frequencies.keys():
+                    word_frequencies[word.text] = 1
+                else:
+                    word_frequencies[word.text] += 1
+
+    # print(word_frequencies)
+
+    topicWords = []
+
+    for words in Topic:
+        topicWords.append(words.text)
+    # print(topicWords)
+
+
+    keyWords = []
+
+    print("Extracted Key Words:")
+    for word in word_frequencies.keys():
+        if word_frequencies[word] >= 3:
+            keyWords.append(word)
+            print(word)
+
+    return {
+        "message": keyWords,
+        "score": 50/100
+    }
+
+# keywordExrtraction("dog", "The dog is a pet animal. A dog has sharp teeth so that it can eat flesh very easily, it has four legs, two ears, two eyes, a tail, a mouth, and a nose. It is a very clever animal and is very useful in catching thieves. It runs very fast, barks loudly and attacks the strangers. A dog saves the life of the master from danger. One can find dogs everywhere in the world. Dogs are a very faithful animal. It has a sharp mind and a strong sense of hearing smelling the things. It also has many qualities like swimming in the water, jumping from anywhere, good smelling sense.")
--- a/content analyzing/speechToText.py
+++ b/content analyzing/speechToText.py
+# importing libraries
+import speech_recognition as sr
+import os
+from pydub import AudioSegment
+from pydub.silence import split_on_silence
+
+# create a speech recognition object
+r = sr.Recognizer()
+
+# a function that splits the audio file into chunks
+# and applies speech recognition
+def get_large_audio_transcription(path):
+    """
+    Splitting the large audio file into chunks
+    and apply speech recognition on each of these chunks
+    """
+    # open the audio file using pydub
+    sound = AudioSegment.from_wav(path)
+    # split audio sound where silence is 700 miliseconds or more and get chunks
+    chunks = split_on_silence(sound,
+        # experiment with this value for your target audio file
+        min_silence_len = 500,
+        # adjust this per requirement
+        silence_thresh = sound.dBFS-14,
+        # keep the silence for 1 second, adjustable as well
+        keep_silence=500,
+    )
+    folder_name = "audio-chunks"
+    # create a directory to store the audio chunks
+    if not os.path.isdir(folder_name):
+        os.mkdir(folder_name)
+    whole_text = ""
+    # process each chunk
+    for i, audio_chunk in enumerate(chunks, start=1):
+        # export audio chunk and save it in
+        # the `folder_name` directory.
+        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
+        audio_chunk.export(chunk_filename, format="wav")
+        # recognize the chunk
+        with sr.AudioFile(chunk_filename) as source:
+            audio_listened = r.record(source)
+            # try converting it to text
+            try:
+                text = r.recognize_google(audio_listened)
+            except sr.UnknownValueError as e:
+                print("Error:", str(e))
+            else:
+                text = f"{text.capitalize()}. "
+                print(chunk_filename, ":", text)
+                whole_text += text
+    # return the text for all chunks detected
+    print(whole_text)
+    return whole_text
+
+# path = "../audio.wav"
+# print("\nFull text:", get_large_audio_transcription(path))
--- a/content analyzing/suggestContent.py
+++ b/content analyzing/suggestContent.py
+from selenium import webdriver
+
+driver = webdriver.Chrome()
+driver.get('https://wikipedia.com')
+searchbox = driver.find_element_by_xpath('//*[@id="searchInput"]')
+searchbox.send_keys(['cricket'])
+
+searchButton = driver.find_element_by_xpath('//*[@id="search-form"]/fieldset/button')
+searchButton.click()
--- a/content analyzing/synonyms.py
+++ b/content analyzing/synonyms.py
+import spacy
+from spacy.lang.en.stop_words import STOP_WORDS
+from string import punctuation
+
+nlp = spacy.load("en_core_web_sm")
+
+stopwords = list(STOP_WORDS)
+punctuation = punctuation + '\n'
+
+def synonymsFunc(topic, speech):
+    Topic = nlp(topic)
+    Content = nlp(speech)
+
+    Total_similarity = 0
+
+    for token1 in Content:
+        if token1.text.lower() not in stopwords:
+            if token1.text.lower() not in punctuation:
+                for token2 in Topic:
+                    print((token1.text, token2.text), "similarity", token1.similarity(token2))
+                    Total_similarity = Total_similarity + token1.similarity(token2)
+
+
+    print(len(Content))
+    print(f'Total score for the similarity: {Total_similarity}')
+    average_similarity = (Total_similarity/len(Content))*100
+    print(f'Average score for the similarity between topic and content: {average_similarity}%')
+    return {
+        "message": str(f'Average score for the similarity between topic and content: {average_similarity}%'),
+        "score": 50/100
+    }
--- a/content analyzing/webScraping.py
+++ b/content analyzing/webScraping.py
+from selenium import webdriver
+
+driver = webdriver.Chrome()
+driver.get('https://youtube.com')
+searchbox = driver.find_element_by_xpath('//*[@id="search"]')
+searchbox.send_keys(['speeches', 'Transportation'])
+
+searchButton = driver.find_element_by_xpath('//*[@id="search-icon-legacy"]')
+searchButton.click()
\ No newline at end of file