update

6cdca649 · chalikaM · 74a8ac0b · 6cdca649 · 6cdca649 · 6cdca649
Commit 6cdca649 authored Jul 20, 2021 by chalikaM
Showing with 34 additions and 98 deletions

clearness.py clearness.py +8 -19

conclusion.py conclusion.py +7 -12

introduction.py introduction.py +11 -16

keyWordExtraction.py keyWordExtraction.py +8 -30

synonyms.py synonyms.py +0 -21

No files found.
--- a/clearness.py
+++ b/clearness.py
 import spacy
 from scipy.io.wavfile import read
-import numpy as np
-import matplotlib.pyplot as plt
-
-# from pydub import AudioSegment
-import os
-import glob

 #Identify ocer complex words
 nlp = spacy.load("en_core_web_sm")
-doc = nlp("Apple is looking at buying U.K. startup for $1 billion accumulativeness. This ia a dog. <y name is Chalika Mihiran")
+doc = nlp("Apple is looking at buying U.K. startup for $1 billion accumulativeness. This ia a dog. name is Chalika Mihiran")
 for token in doc:
    count = len(token)
    if count > 12:
@@ -22,29 +16,24 @@ for sent in doc.sents:
    for words in sent:
        # print(words.text)
        word_count = word_count + 1
+    if word_count > 10:
+        print(f'"{sent}" is a overcomplicated sentence. There are {word_count}  words in it.')
    # print(word_count)


 sentence_tokens = [sent for sent in doc.sents]
 # print(sentence_tokens)

-# sentence_words = {}
-# word_count = 0
-# for sent in sentence_tokens:
-#     for token in sent:
-#         sentence_words[sent] = word_count + 1
-
-# print(sentence_words)

 #Calculate the rate of the speed
 # Read the Audiofile
 samplerate, data = read('C:/Users/CHALIKA PC/Desktop/Year 04/Research/Audio/videoplayback_1_.wav')
 # Frame rate for the Audio
-# print(samplerate)
+print(samplerate)

 # Duration of the audio in Seconds
-# duration = len(data)/samplerate
-# print("Duration of Audio in Seconds", duration)
-# print("Duration of Audio in Minutes", duration/60)
+duration = len(data)/samplerate
+print("Duration of Audio in Seconds", duration)
+print("Duration of Audio in Minutes", duration/60)

-# print(len(data))
+print(len(data))
--- a/conclusion.py
+++ b/conclusion.py
-import sys
-from operator import index
 import spacy
 import textacy

-#Identify words that are in a introduction
-
-
 nlp = spacy.load("en_core_web_sm")

 speech = """According to a research Global warming is an international phenomenon where the earth’s mean surface temperature is increasing rapidly due to the accumulation of greenhouse gases in the atmosphere. The source of these greenhouse gases is various, both natural and manmade, such as forest fires, industrialisation, burning of agricultural crops, burning of fossil fuels etc. Global warming has proved to be a huge force created by man that can have the potential to destroy the natural cycle of our planet.
@@ -17,18 +12,18 @@ Draughts, forest fires floods and earthquakes are some of the indications for ch
 Certain diseases like Malaria are caused due to global warming since there is the migration of species from one place to another. Many scientists also predict that the COVID-19 pandemic that we are witnessing in the year 2020 can also be traced back to the impact of global warming when seen from the bird’s point of you
 I would like to conclude my global warming speech by saying that the international community, where does they governments and citizens has to come forward to reduce your carbon footprint to save the planet for our future generation. All of us living here are the stakeholders of nature and we are responsible for how we take care of our environment. As the saying goes, charity begins at home, we have to start using biodegradable materials at our houses, reduce plastic waste and other such changes in our lifestyle so that we can make an impact to reduce global warming globally."""

+#Count the total number of characters in the speech
 totalCharacterCount = len(speech)
-print(">>>Total character count<<<")
-print(totalCharacterCount)

+#Extract the conclusion of the speech
 conclusionCharacterCount = (85/100)*totalCharacterCount
-print(">>>Character count for the conclusion<<<")
-print(conclusionCharacterCount)
-
 conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)])
 print(">>>Conclusion<<<")
 print(conclusion)

+#....Analyze the introduction.....
+
+#identify best uses for introduction
 final_words = []

 endingPhrases = ["I would like to conclude", "conclude by saying","final note","I close by saying","According to an analysis","I shall conclude by saying","i should like to conclude by saying","I should like to finish by saying","I want to conclude by saying","I will close by saying","i will conclude","i will conclude by saying","i will end by saying","i will finish by saying","i would end by saying","i would like to conclude","i would end by saying","i would like to conclude by stating","i would like to end by saying","in closing","in conclusion","let me close by saying","let me conclude by saying","let me finish by saying","may i conclude by saying"
@@ -40,14 +35,14 @@ for wordPharse in endingPhrases:
        print("You used some ending phrases in your conclusion. It is a good practice to use ending phrases in your introduction")
 print(final_words)

+#Identify questions that user used in  introduction
 doc = nlp(conclusion)

+#Declare the patterns of questions
 patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]

 verb_phrases = textacy.extract.token_matches(doc, patterns)

-# print(s[0:introductionCharacterCount])
-
 for verb_phrases in verb_phrases:
    print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
    print(f"Identified questions : {verb_phrases}")
\ No newline at end of file
--- a/introduction.py
+++ b/introduction.py
-import sys
-from operator import index
 import spacy
 import textacy

 #Identify words that are in a introduction

-
 nlp = spacy.load("en_core_web_sm")

-speech = """According to a research Global warming is an international phenomenon where the earth’s mean surface temperature is increasing rapidly due to the accumulation of greenhouse gases in the atmosphere. where does they The source of these greenhouse gases is various, both natural and manmade, such as forest fires, industrialisation, burning of agricultural crops, burning of fossil fuels etc. Global warming has proved to be a huge force created by man that can have the potential to destroy the natural cycle of our planet.
+speech = """According to a research Global warming is an  where does he international phenomenon where the earth’s mean surface temperature is increasing rapidly due to the accumulation of greenhouse gases in the atmosphere. where does they The source of these greenhouse gases is various, both natural and manmade, such as forest fires, industrialisation, burning of agricultural crops, burning of fossil fuels etc. Global warming has proved to be a huge force created by man that can have the potential to destroy the natural cycle of our planet.
 Global warming is not a recent phenomenon but it has surely improved and increased because of rapid industrialisation, population explosion, agricultural explosion and the ever-increasing greed for economic growth for countries at the cos of of exploiting our environment. Global warming did exist hundreds of years before when civilizations begin to occur. There are examples of burning fossil fuels and causing pollution even during early civilizations like the Indus valley civilisation or Harappan civilization, but the magnitude of global warming today is thousand times more than what it was a few centuries back.
 Some of the main greenhouse gases that are causing global warming are carbon dioxide, methane, nitrous oxide, sulphur hexafluoride, hydrofluorocarbons and perfluorocarbons. Most of the greenhouse gases are produced due to the consumption of fossil fuels.
 The effects of global warming are plenty and one of the most catastrophic effects of global warming is the deterioration of the ozone layer. Due to the accumulation of chlorofluorocarbons in the stratosphere, scientists have predicted that an area above the continent of Antarctica has a huge ozone hole. The ozone layer is a layer in the atmosphere which protects the Earth’s surface from the harmful ultraviolet radiations coming from the sun. When this ozone layer gets depleted, the living beings on the planet earth is easily exposed to UV rays. This can cause catastrophic effects on human health, agricultural cycle, climatic cycle and on the very existence of our planet.
@@ -17,19 +14,18 @@ Draughts, forest fires floods and earthquakes are some of the indications for ch
 Certain diseases like Malaria are caused due to global warming since there is the migration of species from one place to another. Many scientists also predict that the COVID-19 pandemic that we are witnessing in the year 2020 can also be traced back to the impact of global warming when seen from the bird’s point of you
 I would like to conclude my global warming speech by saying that the international community, governments and citizens has to come forward to reduce your carbon footprint to save the planet for our future generation. where does they All of us living here are the stakeholders of nature and we are responsible for how we take care of our environment. As the saying goes, charity begins at home, we have to start using biodegradable materials at our houses, reduce plastic waste and other such changes in our lifestyle so that we can make an impact to reduce global warming globally."""

-
+#Count the total number of characters in the speech
 totalCharacterCount = len(speech)
-print(">>>Total character count<<<")
-print(totalCharacterCount)
-
-introductionCharacterCount = (15/100)*totalCharacterCount
-print(">>>Character count for the introduction<<<")
-print(introductionCharacterCount)

+#Extract the introduction of the speech
+introductionCharacterCount = (15 / 100) * totalCharacterCount
 introduction = (speech[0:int(introductionCharacterCount)])
 print(">>>Introduction<<<")
 print(introduction)

+#....Analyze the introduction.....
+
+#identify best uses for introduction
 final_words = []

 referStudies = ["According to a study", "According to a research","According to a review","According to a survey","According to an analysis","according to one study","According to research","According to an investigation","According to research conducted","According to the study"
@@ -41,14 +37,13 @@ for wordPharse in referStudies:
        print(f"You refer some other's works in your introduction. It is a good practice to refer some one's work in your introduction")
 print(final_words)

+#Identify questions that user used in  introduction
 doc = nlp(introduction)

 patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]

 verb_phrases = textacy.extract.token_matches(doc, patterns)

-# print(s[0:introductionCharacterCount])
-
-# for verb_phrases in verb_phrases:
-#     print("You used questions forms in your introduction. It is a good practice for a introduction.")
-#     print(f"Identified questions : {verb_phrases}")
\ No newline at end of file
+for verb_phrases in verb_phrases:
+    print("You used questions forms in your introduction. It is a good practice for a introduction.")
+    print(f"Identified questions : {verb_phrases}")
\ No newline at end of file
--- a/keyWordExtraction.py
+++ b/keyWordExtraction.py
 import spacy
-# In here we are importing stop words from spacy. there are already spefied stop words there in the spacy
 from spacy.lang.en.stop_words import STOP_WORDS
 from string import punctuation
-from difflib import SequenceMatcher
-from heapq import nlargest
+
 nlp = spacy.load('en_core_web_sm')

-# content = """
-# There are broadly two types of extractive summarization tasks depending on what the summarization program focuses on. The first is generic summarization, which focuses on obtaining a generic summary or abstract of the collection (whether documents, or sets of images, or videos, news stories etc.). The second is query relevant summarization, sometimes called query-based summarization, which summarizes objects specific to a query. Summarization systems are able to create both query relevant text summaries and generic machine-generated summaries depending on what the user needs.
-# An example of a summarization problem is document summarization, which attempts to automatically produce an abstract from a given document. Sometimes one might be interested in generating a summary from a single source document, while others can use multiple source documents (for example, a cluster of articles on the same topic). This problem is called multi-document summarization. A related application is summarizing news articles. Imagine a system, which automatically pulls together news articles on a given topic (from the web), and concisely represents the latest news as a summary.
-# Image collection summarization is another application example of automatic summarization. It consists in selecting a representative set of images from a larger set of images.[3] A summary in this context is useful to show the most representative images of results in an image collection exploration system. Video summarization is a related domain, where the system automatically creates a trailer of a long video. This also has applications in consumer or personal videos, where one might want to skip the boring or repetitive actions. Similarly, in surveillance videos, one would want to extract important and suspicious activity, while ignoring all the boring and redundant frames captured.
-# """
 Topic = nlp("dog")
 Content = nlp("The dog is a pet animal. A dog has sharp teeth so that it can eat flesh very easily, it has four legs, two ears, two eyes, a tail, a mouth, and a nose. It is a very clever animal and is very useful in catching thieves. It runs very fast, barks loudly and attacks the strangers. A dog saves the life of the master from danger. One can find dogs everywhere in the world. Dogs are a very faithful animal. It has a sharp mind and a strong sense of hearing smelling the things. It also has many qualities like swimming in the water, jumping from anywhere, good smelling sense.")
 stopwords = list(STOP_WORDS)

-# Topic = "dog"
-# content = "The dog is a pet animal. A dog has sharp teeth so that it can eat flesh very easily, it has four legs, two ears, two eyes, a tail, a mouth, and a nose. It is a very clever animal and is very useful in catching thieves. It runs very fast, barks loudly and attacks the strangers. A dog saves the life of the master from danger. One can find dogs everywhere in the world. Dogs are a very faithful animal. It has a sharp mind and a strong sense of hearing smelling the things. It also has many qualities like swimming in the water, jumping from anywhere, good smelling sense."
-#
-#
-# doc = nlp(content)
-
-# Tokenization
-# tokens = [token.text for token in Content]
-# print(tokens)
-
-# There is no new line in punctuation list and in here I added  new line for the punctuations
 punctuation = punctuation + '\n'
-# punctuation
+

 # This is a loop for count words apart from stop words and frequencies.
 word_frequencies = {}
@@ -39,29 +21,25 @@ for word in Content:
            else:
                word_frequencies[word.text] += 1

-print(word_frequencies)
+# print(word_frequencies)

 topicWords = []

 for words in Topic:
    topicWords.append(words.text)
-print(topicWords)
-
+# print(topicWords)


 keyWords = []

+print("Extracted Key Words:")
 for word in word_frequencies.keys():
    if word_frequencies[word] >= 3:
-        # print(word)
        keyWords.append(word)
+        print(word)
+
+

-print(keyWords)
-#
-# for token1 in keyWords:
-#     for token2 in topicWords:
-#                 print("similarity", token1.similarity(token2))
-#                 Total_similarity = Total_similarity + token1.similarity(token2)




--- a/synonyms.py
+++ b/synonyms.py
@@ -2,33 +2,14 @@ import spacy
 from spacy.lang.en.stop_words import STOP_WORDS
 from string import punctuation

-import spacy
-# In here we are importing stop words from spacy. there are already spefied stop words there in the spacy
-from spacy.lang.en.stop_words import STOP_WORDS
-from string import punctuation
-
-
-
 nlp = spacy.load("en_core_web_sm")

-text = "elephant tiger sometimes re. how?"
-
-doc1 = nlp("wolf")
-doc2 = nlp("dog")
-
-# print(doc1.similarity(doc2))
 stopwords = list(STOP_WORDS)
 punctuation = punctuation + '\n'

-# Topic = nlp("technology")
-# Content = nlp("The dog is a pet animal. A dog has sharp teeth so that it can eat flesh very easily, it has four legs, two ears, two eyes, a tail, a mouth, and a nose. It is a very clever animal and is very useful in catching thieves. It runs very fast, barks loudly and attacks the strangers. A dog saves the life of the master from danger. One can find dogs everywhere in the world. Dogs are a very faithful animal. It has a sharp mind and a strong sense of hearing smelling the things. It also has many qualities like swimming in the water, jumping from anywhere, good smelling sense.")
-
 Topic = nlp("transportation")
 Content = nlp("Transportation is movement of people and goods from one location to another. Throughout history, the economic wealth and military power of a people or a nation have been closely tied to efficient methods of transportation. Transportation provides access to natural resources and promotes trade, allowing a nation to accumulate wealth and power. Transportation also allows the movement of soldiers, equipment, and supplies so that a nation can wage war.Transportation is movement of people and goods from one location to another. Throughout history, the economic wealth and military power of a people or a nation have been closely tied to efficient methods of transportation. Transportation provides access to natural resources and promotes trade, allowing a nation to accumulate wealth and power. Transportation also allows the movement of soldiers, equipment, and supplies so that a nation can wage war.Transportation is vital to a nation's economy. Reducing the costs of transporting natural resources to production sites and moving finished goods to markets is one of the key factors in economic competition. The transportation industry is the largest industry in the world. It includes the manufacture and distribution of vehicles, the production and distribution of fuel, and the provision of transportation services. In the 1990s, approximately 11 percent of the U.S. gross domestic product and an estimated 10 percent of all jobs in the United States were related to the transportation industry.The same transportation systems that link a nation can also be used in the nation's war efforts. The rapid movement of troops, equipment, and supplies can be a deciding factor in winning a battle or a war. Transportation is usually classified by the medium in which the movement occurs, such as by land, air, water, or pipeline. Within each of the first three media, many different methods are used to move people and goods from place to place. Pipelines are used mainly to transport liquids or gases over long distances.")

-# for word in ex2:
-#     if word.text.lower() not in stopwords:
-#         if word.text.lower() not in punctuation:
 Total_similarity = 0

 for token1 in Content:
@@ -44,5 +25,3 @@ print(f'Total score for the similarity: {Total_similarity}')
 average_similarity = (Total_similarity/len(Content))*100
 print(f'Average score for the similarity between topic and content: {average_similarity}%')

-# mylist = [(token1.text,token2.text,token1.similarity(token2)) for token2 in ex1 for token1 in ex1]
-# print(mylist)
\ No newline at end of file