Commit 4fcc4668 authored by Dhananjaya Jayashanka's avatar Dhananjaya Jayashanka

content analyzing changers done

parent e56fb7de
'I would like to conclude': 'summary',
'According to an analysis': 'summary',
'conclude by saying': 'summary',
'final note': 'summary',
'I close by saying': 'summary',
'i should like to conclude by saying': 'summary',
'I should like to finish by saying': 'summary',
'I shall conclude by saying': 'summary',
'I want to conclude by saying': 'summary',
'I will close by saying': 'summary',
'i will conclude': 'summary',
'i will conclude by saying': 'summary',
'i will end by saying': 'summary',
'i will finish by saying': 'summary',
'i would end by saying': 'summary',
'i would like to conclude by stating': 'summary',
'i would like to end by saying': 'summary',
'in closing': 'summary',
'in conclusion': 'summary',
'let me close by saying': 'summary',
'let me conclude by saying': 'summary',
'let me finish by saying': 'summary',
'may i conclude by saying': 'summary',
'i would like briefly': 'summary',
'i would like to refer briefly': 'summary',
'i would like to return briefly': 'summary',
'let me briefly present': 'summary',
'let me briefly say': 'summary',
'let me briefly touch': 'summary',
'let me comment briefly': 'summary',
'let me quickly': 'summary',
'let me turn briefly': 'summary',
'allow me to say a few': 'summary',
'allow me to touch briefly': 'summary',
'i shall refer briefly': 'summary',
'i should like to refer briefly': 'summary',
'i want to say a couple': 'summary',
'i want to say a few': 'summary',
'i will briefly mention': 'summary',
'i will briefly summarize': 'summary',
'i will comment briefly': 'summary',
'i will refer briefly': 'summary',
'i will touch briefly': 'summary',
'i wish to say a few': 'summary',
'i would like to say a few': 'summary',
'i would like to say a word': 'summary',
'i would like to speak briefly': 'summary',
'let me briefly turn': 'summary',
'let me say a few': 'summary',
'let me touch briefly': 'summary',
'Let me briefly': 'summary',
'tell you a little story': 'story',
'told a story': 'story',
'let me tell you a story': 'story',
'thanks': 'thanking',
'ta muchly': 'thanking',
'thanks a bunch': 'thanking',
'thank you very much': 'thanking',
'many thanks': 'thanking',
'i thank you': 'thanking',
'thanks a million': 'thanking',
'it is hard to find words to express my gratitude': 'thanking',
'merci': 'thanking',
'with gratitude': 'thanking',
'i thank you from the bottom of my heart': 'thanking',
'accept my endless gratitude': 'thanking',
'thank you so much': 'thanking',
'i am all gratitude': 'thanking',
'i am grateful': 'thanking',
'i will never forget what you have done': 'thanking',
'thank you kindly': 'thanking',
'cheers': 'thanking',
'i appreciate that': 'thanking',
'respectfully yours with sincere gratitude': 'thanking',
'accord a thank': 'thanking',
'gracias': 'thanking',
'i thank you most warmly': 'thanking',
'i wish to thank everyone who pitched in': 'thanking',
'give thanks': 'thanking',
'much thanks': 'thanking',
'show appreciation': 'thanking',
'thanks very much': 'thanking',
'with kindest personal regards': 'thanking',
'appreciate it': 'thanking',
'thank you for helping me': 'thanking',
'you are a lifesaver': 'thanking',
'you have my gratitude': 'thanking',
'your generosity overwhelms me': 'thanking',
'i humbly thank you': 'thanking',
'it was so awesome of you': 'thanking',
'thanks so much': 'thanking',
'by all means': 'thanking',
'i wanted to thank you as soon as possible': 'thanking',
'i wanted to thank you as soon as possible': 'thanking',
'nice one': 'thanking',
'of course': 'thanking',
'what would i do without you': 'thanking',
'i will forever be beholden to you': 'thanking',
'i will forever be beholden to you': 'thanking',
'much appreciated': 'thanking',
'my gratitude to you for all you have done': 'thanking',
'thank you for never letting me down': 'thanking',
'thanks for everything': 'thanking',
'please accept my best thanks': 'thanking',
'bless you': 'thanking',
'delighted': 'thanking',
'how can i show you how grateful i am': 'thanking',
'i appreciate your time': 'thanking',
'thanks for taking the time to think of me': 'thanking',
'accept my deepest thanks': 'thanking',
'certainly': 'thanking',
'charmed': 'thanking',
'consider yourself heartily thanked': 'thanking',
'excellent': 'thanking',
'how can i ever possibly thank you': 'thanking',
'i cannot express my appreciation': 'thanking',
'splendid': 'thanking',
'thanks a ton': 'thanking',
'you are the best': 'thanking',
'all i can say is thanks': 'thanking',
'all my love and thanks to you': 'thanking',
'appreciate your feedback': 'thanking',
'appreciate your help': 'thanking',
'appreciate your input': 'thanking',
'blessings': 'thanking',
'expressing thanks': 'thanking',
'how can i repay you': 'thanking',
'i cannot thank you enough': 'thanking',
'i owe you one': 'thanking',
'i really appreciate it': 'thanking',
'i really appreciate that': 'thanking',
'i'm really grateful': 'thanking',
'if anyone deserves thanks it is you': 'thanking',
'sincerely': 'thanking',
'thank you for your thoughtfulness': 'thanking',
'thanks for your consideration': 'thanking',
'thanks heaps': 'thanking',
'that's so kind of you': 'thanking',
'warmest greetings to all': 'thanking',
'warmly': 'thanking',
'with appreciation': 'thanking',
'with sincere appreciation': 'thanking',
'with sincere thanks': 'thanking',
'don't know what to say': 'thanking',
'words are powerless to express my gratitude': 'thanking',
'words cannot describe how thankful i am': 'thanking',
'can't describe how thankful i am': 'thanking',
'don't know what to say': 'thanking',
'how thoughtful of you': 'thanking',
'i can't thank you enough': 'thanking',
'i owe you big time': 'thanking',
'i really appreciate': 'thanking',
'i really appreciate your help': 'thanking',
'i would like to thank you': 'thanking',
'i'll forever be grateful': 'thanking',
'i'm grateful for your assistance': 'thanking',
'i'm really grateful for your help': 'thanking',
'i'm really grateful for your help': 'thanking',
'i'm so grateful': 'thanking',
'if anyone deserve thanks it's you': 'thanking',
'it would be greatly appreciated': 'thanking',
'it's very kind of you': 'thanking',
'mercy bucket': 'thanking',
'mercy buckets': 'thanking',
'million thanks to you': 'thanking',
'my gratitude knows no bounds': 'thanking',
'oh you shouldn't have': 'thanking',
'please accept my deepest thanks': 'thanking',
'thank you for everything': 'thanking',
'thank you for your assistance': 'thanking',
'thanks a heap': 'thanking',
'thanks for that': 'thanking',
'you made my day': 'thanking',
'you saved my day': 'thanking',
'you're a dear': 'thanking',
'you're a life saver': 'thanking',
'you're awesome': 'thanking',
'you're great': 'thanking',
'you've saved my life': 'thanking',
'by dint': 'thanking',
'i thank you in advance': 'thanking',
'i want to thank': 'thanking',
'i wanted to thank': 'thanking',
'i would like to thank': 'thanking',
'letter of thanks': 'thanking',
'say thank': 'thanking',
'thanks ever so much': 'thanking',
'thanks just the same': 'thanking',
'very grateful': 'thanking',
'word of thanks': 'thanking',
\ No newline at end of file
'i am honored': 'summary',
'it is an honour': 'summary',
'feel honoured': 'summary',
'great honor': 'summary',
'great honour': 'summary',
'honor me': 'summary',
'honors me': 'summary',
'i am deeply honoured': 'summary',
'i am flattered': 'summary',
'i am humbled': 'summary',
'i am pleased': 'summary',
'i am privileged': 'summary',
'i am very honoured': 'summary',
'i have had the honour': 'summary',
'i have had the privilege': 'summary',
'i now have the honour': 'summary',
'it has been an honor': 'summary',
'it is a great honor': 'summary',
'it is a privilege': 'summary',
'it is indeed an honour': 'summary',
'it was an honour': 'summary',
'it would be an honor': 'summary',
'so honored': 'summary',
'such an honor': 'summary',
'we are honoured': 'summary',
'what a pleasure': 'summary',
'what an honor': 'summary',
'what honor': 'summary',
'what honour': 'summary',
'you honor': 'summary',
'you honor me': 'summary',
'you honour me': 'summary',
'once upon a time': 'summary',
'long ago': 'summary',
'once': 'summary',
'formerly': 'summary',
'in the past': 'summary',
'at one time': 'summary',
'long time ago': 'summary',
'away back': 'summary',
'in former times': 'summary',
'in times gone by': 'summary',
'many years ago': 'summary',
'very long time ago': 'summary',
'in times past': 'summary',
'back in the day': 'summary',
'in the olden days': 'summary',
'some time ago': 'summary',
'many moons ago': 'summary',
'ages ago': 'summary',
'in days gone by': 'summary',
'in earlier times': 'summary',
'in olden times': 'summary',
'good while ago': 'summary',
'in days of old': 'summary',
'in days of yore': 'summary',
'one time previously': 'summary',
'at an earlier time': 'summary',
'back in the old days': 'summary',
'in bygone days': 'summary',
'way back in the past': 'summary',
'down memory lane': 'summary',
'in the good old days': 'summary',
'in years gone by': 'summary',
'long time before': 'summary',
'long while ago': 'summary',
'much further back': 'summary',
'one of these days': 'summary',
'there once was': 'summary',
'there was a time': 'summary',
'there was a time when': 'summary',
'there was once': 'summary',
'time was when': 'summary',
'well before': 'summary',
'ahead of time': 'summary',
'all those years ago': 'summary',
'as long ago': 'summary',
'at the past': 'summary',
'awhile ago': 'summary',
'awhile back': 'summary',
'been a very long time': 'summary',
'donkey's years ago': 'summary',
'during the past few': 'summary',
'during the previous': 'summary',
'earlier on': 'summary',
'far earlier': 'summary',
'far sooner': 'summary',
'have for a long time been': 'summary',
'in an era': 'summary',
'in ancient times': 'summary',
'in days past': 'summary',
import spacy
from scipy.io.wavfile import read
scoreForClearness = 50/100
nlp = spacy.load("en_core_web_sm")
def clearnerssWords(text):
doc = nlp(text)
returnVal = ""
for token in doc:
count = len(token)
if count > 12:
returnVal += f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
return {
"message": returnVal,
"score": scoreForClearness
}
def clearnerssSentence(text):
doc = nlp(text)
returnVal = ""
for sent in doc.sents:
word_count = 0
# print(sent.text)
for words in sent:
# print(words.text)
word_count = word_count + 1
if word_count > 10:
# print(f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.')
returnVal += f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.'
# print(word_count)
return {
"message": returnVal,
"score": scoreForClearness
}
def clearnessReadAudioFile(filePath):
# Read the Audiofile
samplerate, data = read(filePath)
# Frame rate for the Audio
print(samplerate)
# Duration of the audio in seconds.
duration = len(data)/samplerate
print("Duration of Audio in Seconds", duration)
print("Duration of Audio in Minutes", duration/60)
print(len(data))
import spacy
import textacy
scoreForConclusion = 60/100
nlp = spacy.load("en_core_web_sm")
#Count the total number of characters in the speech
def conclisions(speech):
totalCharacterCount = len(speech)
conclusionCharacterCount = (85/100)*totalCharacterCount
conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)])
# print(">>>Conclusion<<<")
# print(conclusion)
return {
"message": conclusion,
"score": scoreForConclusion
}
def comments(speech):
conclusion = conclisions(speech)["message"]
final_words = []
with open('content analyzing/bestPracticesForConclusion.txt', 'r') as file:
for line in file:
clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
word, emotion = clear_line.split(':')
if word in conclusion:
final_words.append(word)
return {
"message": final_words,
"score": scoreForConclusion
}
def questions(speech):
retVal = []
doc = nlp(conclisions(speech)["message"])
# patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
#
# verb_phrases = textacy.extract.token_matches(doc, patterns)
#
# for verb_phrases in verb_phrases:
# print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
# print(f"Identified questions : {verb_phrases}")
# retVal.append(f"Identified questions : {verb_phrases}")
tokens = [token for token in doc]
for i in range(len(tokens)):
# print(tokens[i].pos_)
if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'):
retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
return {
"message": retVal,
"score": scoreForConclusion
}
import spacy
import textacy
scoreForIntroduction = 40/100
nlp = spacy.load("en_core_web_sm")
def introductionFunc(speech):
totalCharacterCount = len(speech)
introductionCharacterCount = (15 / 100) * totalCharacterCount
introduction = (speech[0:int(introductionCharacterCount)])
# print(">>>Introduction<<<")
# print(introduction)
return {
"message": introduction,
"score": scoreForIntroduction
}
def introductionBestUsesFunc(speech):
introduction = nlp(introductionFunc(speech)["message"])
final_words = []
with open('content analyzing/bestPracticesForIntroduction.txt', 'r') as file:
for line in file:
clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
word, emotion = clear_line.split(':')
if word in introduction:
final_words.append(word)
return {
"message": final_words,
"score": scoreForIntroduction
}
def introductionQuestions(speech):
doc = nlp(introductionFunc(speech)["message"])
# patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
# retVal = []
# verb_phrases = textacy.extract.token_matches(doc, patterns)
# for verb_phrases in verb_phrases:
# print("You used questions forms in your introduction. It is a good practice for a introduction.")
# print(f"Identified questions : {verb_phrases}")
# retVal.append(f"Identified questions : {verb_phrases}")
tokens = [token for token in doc]
retVal = []
for i in range(len(tokens)):
# print(tokens[i].pos_)
if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'):
retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
return {
"message": retVal,
"score": scoreForIntroduction
}
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import string
nlp = spacy.load('en_core_web_sm')
def keywordExrtraction(topic, speech):
Topic = nlp(topic)
Content = nlp(speech)
stopwords = list(STOP_WORDS)
punctuation = string.punctuation + '\n'
word_frequencies = {}
for word in Content:
if word.text.lower() not in stopwords:
if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys():
word_frequencies[word.text] = 1
else:
word_frequencies[word.text] += 1
topicWords = []
for words in Topic:
topicWords.append(words.text)
# print(topicWords)
keyWords = []
print("Extracted Key Words:")
for word in word_frequencies.keys():
if word_frequencies[word] >= 3:
keyWords.append(word)
print(word)
return {
"message": keyWords,
"score": 50/100
}
import spacy
# Testing the model
test_text = "I had such high. hopes for this dress and really crappy worst product hate. it wporst bad."
def identifyQuotes(text):
nlp = spacy.load("content analyzing/quotesIdentify")
output = []
doc = nlp(text)
return doc.cats
print(identifyQuotes(test_text))
\ No newline at end of file
# importing libraries
import speech_recognition as sr
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
# create a speech recognition object
r = sr.Recognizer()
# a function that splits the audio file into chunks
# and applies speech recognition
def get_large_audio_transcription(path):
"""
Splitting the large audio file into chunks
and apply speech recognition on each of these chunks
"""
# open the audio file using pydub
sound = AudioSegment.from_wav(path)
# split audio sound where silence is 700 miliseconds or more and get chunks
chunks = split_on_silence(sound,
# experiment with this value for your target audio file
min_silence_len = 500,
# adjust this per requirement
silence_thresh = sound.dBFS-14,
# keep the silence for 1 second, adjustable as well
keep_silence=500,
)
folder_name = "audio-chunks"
# create a directory to store the audio chunks
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
whole_text = ""
# process each chunk
for i, audio_chunk in enumerate(chunks, start=1):
# export audio chunk and save it in
# the `folder_name` directory.
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
audio_chunk.export(chunk_filename, format="wav")
# recognize the chunk
with sr.AudioFile(chunk_filename) as source:
audio_listened = r.record(source)
# try converting it to text
try:
text = r.recognize_google(audio_listened)
except sr.UnknownValueError as e:
print("Error:", str(e))
else:
text = f"{text.capitalize()}. "
print(chunk_filename, ":", text)
whole_text += text
# return the text for all chunks detected
print(whole_text)
return whole_text
# path = "../audio.wav"
# print("\nFull text:", get_large_audio_transcription(path))
from selenium import webdriver
def suggestContent():
driver = webdriver.Chrome("chromedriver.exe")
driver.get('https://wikipedia.com')
searchbox = driver.find_element_by_xpath('//*[@id="searchInput"]')
searchbox.send_keys(['cricket'])
searchButton = driver.find_element_by_xpath('//*[@id="search-form"]/fieldset/button')
searchButton.click()
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
nlp = spacy.load("en_core_web_sm")
stopwords = list(STOP_WORDS)
punctuation = punctuation + '\n'
def synonymsFunc(topic, speech):
Topic = nlp(topic)
Content = nlp(speech)
Total_similarity = 0
for token1 in Content:
if token1.text.lower() not in stopwords:
if token1.text.lower() not in punctuation:
for token2 in Topic:
print((token1.text, token2.text), "similarity", token1.similarity(token2))
Total_similarity = Total_similarity + token1.similarity(token2)
print(len(Content))
print(f'Total score for the similarity: {Total_similarity}')
average_similarity = (Total_similarity/len(Content))*100
print(f'Average score for the similarity between topic and content: {average_similarity}%')
return {
"message": str(f'Average score for the similarity between topic and content: {average_similarity}%'),
"score": 50/100
}
from selenium import webdriver
def webScrap():
driver = webdriver.Chrome("chromedriver.exe")
driver.get('https://youtube.com')
searchbox = driver.find_element_by_xpath('//*[@id="search"]')
searchbox.send_keys(['speeches', 'Transportation'])
searchButton = driver.find_element_by_xpath('//*[@id="search-icon-legacy"]')
searchButton.click()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment