......@@ -37,16 +37,16 @@ app.config["DEBUG"] = True
@app.route('/clearness/word', methods=['GET'])
def clearnerssWords():
if clearness.clearnerssWords(request.args['text']):
return clearness.clearnerssWords(request.args['text'])
if clearness.identify_complicated_words(request.args['text']):
return clearness.identify_complicated_words(request.args['text'])
return "No results"
@app.route('/clearness/sentense', methods=['GET'])
def clearnerssSentence():
if clearness.clearnerssSentence(request.args['text']):
return clearness.clearnerssSentence(request.args['text'])
if clearness.identify_complicated_sentences(request.args['text']):
return clearness.identify_complicated_sentences(request.args['text'])
return "No results"
......@@ -61,81 +61,81 @@ def quotesSentence():
@app.route('/emotion/sentense', methods=['GET'])
def emotionSentence():
if textAnalyze.textAnalyze(request.args['text']):
return json.dumps(textAnalyze.textAnalyze(request.args['text']))
if textAnalyze.text_analyze(request.args['text']):
return json.dumps(textAnalyze.text_analyze(request.args['text']))
return "No results"
@app.route('/conclusion', methods=['GET'])
def conclusions():
if conclusion.conclisions(request.args['text']):
return conclusion.conclisions(request.args['text'])
if conclusion.identify_conclusion(request.args['text']):
return conclusion.identify_conclusion(request.args['text'])
return "No results"
@app.route('/conclusion/comments', methods=['GET'])
def comments():
if len(conclusion.comments(request.args['text'])) > 0:
return json.dumps(conclusion.comments(request.args['text']))
if len(conclusion.conclusion_best_practices(request.args['text'])) > 0:
return json.dumps(conclusion.conclusion_best_practices(request.args['text']))
return "No results"
@app.route('/conclusion/questions', methods=['GET'])
def questions():
if conclusion.questions(request.args['text']):
return json.dumps(conclusion.questions(request.args['text']))
if conclusion.conclusion_questions(request.args['text']):
return json.dumps(conclusion.conclusion_questions(request.args['text']))
return "No results"
@app.route('/introduction', methods=['GET'])
def introductionFunc():
if introduction.introductionFunc(request.args['text']):
return introduction.introductionFunc(request.args['text'])
if introduction.identify_introduction(request.args['text']):
return introduction.identify_introduction(request.args['text'])
return "No results"
@app.route('/introduction/bestUses', methods=['GET'])
def introductionBestUsesFunc():
if introduction.introductionBestUsesFunc(request.args['text']):
return introduction.introductionBestUsesFunc(request.args['text'])
if introduction.introduction_best_practices(request.args['text']):
return introduction.introduction_best_practices(request.args['text'])
return "No results"
@app.route('/introduction/questions', methods=['GET'])
def introductionQuestions():
if len(introduction.introductionQuestions(request.args['text'])) > 0:
return json.dumps(introduction.introductionQuestions(request.args['text']))
if len(introduction.introduction_questions(request.args['text'])) > 0:
return json.dumps(introduction.introduction_questions(request.args['text']))
return "No results"
@app.route('/keywordExtraction', methods=['GET'])
def keywordExtraction():
print(keyWordExtraction.keywordExrtraction(request.args['topic'], request.args['speech']))
if len(keyWordExtraction.keywordExrtraction(request.args['topic'], request.args['speech'])) > 0:
return json.dumps(keyWordExtraction.keywordExrtraction(request.args['topic'], request.args['speech']))
print(keyWordExtraction.key_word_extraction(request.args['topic'], request.args['speech']))
if len(keyWordExtraction.key_word_extraction(request.args['topic'], request.args['speech'])) > 0:
return json.dumps(keyWordExtraction.key_word_extraction(request.args['topic'], request.args['speech']))
return "No results"
@app.route('/synonyms', methods=['GET'])
def synonymsFunction():
if synonyms.synonymsFunc(request.args['topic'], request.args['speech']):
return synonyms.synonymsFunc(request.args['topic'], request.args['speech'])
if synonyms.content_relativity(request.args['topic'], request.args['speech']):
return synonyms.content_relativity(request.args['topic'], request.args['speech'])
return "No results"
@app.route('/doubleWords', methods=['GET'])
def doubleWordsFunc():
if len(DoubleWords.processDoubleWords(request.args['text'])) > 0:
return json.dumps(DoubleWords.processDoubleWords(request.args['text']))
if len(DoubleWords.identify_repeated_words(request.args['text'])) > 0:
return json.dumps(DoubleWords.identify_repeated_words(request.args['text']))
return "No results"
......@@ -150,8 +150,8 @@ def fillerWordsFunc():
@app.route('/countPauses', methods=['GET'])
def countPauses():
if Silence.countPauses("temp.wav"):
return Silence.countPauses("temp.wav")
if Silence.count_silences("temp.wav"):
return Silence.count_silences("temp.wav")
return "No results"
......@@ -174,7 +174,7 @@ def gingerItParse():
@app.route('/webScrapping', methods=['GET'])
def webScrapping():
return "Success"
@app.route('/suggestContent', methods=['GET'])
......@@ -218,7 +218,7 @@ def videoUploader():
audioResult = speechToText.get_large_audio_transcription("temp.wav")
videoResult = json.dumps(videoAnalyzing.getEmotions("temp.mp4"))
videoResult = json.dumps(videoAnalyzing.get_emotions("temp.mp4"))
return {
"videoResult": videoResult,
"audioResult": audioResult
......@@ -5,8 +5,10 @@ from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
def textAnalyze(speech):
def text_analyze(speech):
text = speech
#convert text to lower case
lower_case = text.lower()
cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation))
......@@ -20,7 +22,7 @@ def textAnalyze(speech):
if word not in stopwords.words('english'):
# Lemmatization (convert base or dictionary form of a word)
#Lemmatization (convert base or dictionary form of a word)
lemma_words = []
for word in final_words:
word = WordNetLemmatizer().lemmatize(word)
......@@ -31,7 +33,7 @@ def textAnalyze(speech):
#Looping final words and identify emotional words
for i in final_words:
with open('Emotion/emotions.txt', 'r') as file: #Emotion dectionary
with open('Emotion/emotions.txt', 'r') as file:#Emotions dictionary
for line in file:
#Remove dictionary punctuations
clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
......@@ -5,29 +5,13 @@ import numpy as np
import tensorflow as tf
from keras.preprocessing import image
Savedmodel = tf.keras.models.load_model('emotion_lts.h5')
Saved_model = tf.keras.models.load_model('emotion_lts.h5')
objects = ('Angry', 'Happy', 'Sad', 'Neutral')
vid = cv2.VideoCapture(0)
# def run():
# while True:
# _, frame =
# frame = imutils.resize(frame, width=500)
# # result = api(frame)
# cv2.imshow("frame",frame)
# # getPrediction(frame)
# # cv.waitKey(0)
# if cv2.waitKey(20) & 0XFF == ord('q'):
# break
# vid.release()
# cv2.destroyAllWindows()
def emotion_analysis(emotions):
objects = ['Angry', 'Happy', 'Sad', 'Neutral']
y_pos = np.arange(len(objects))
......@@ -37,7 +21,8 @@ def emotion_analysis(emotions):
def getEmotions(filePath):
def get_emotions(filePath):
cap = cv2.VideoCapture(filePath)
emotions = []
......@@ -53,7 +38,7 @@ def getEmotions(filePath):
x /= 255
custom = Savedmodel.predict(x)
custom = Saved_model.predict(x)
# print(custom[0])
......@@ -77,5 +62,5 @@ def getEmotions(filePath):
return emotions
......@@ -10,18 +10,19 @@ stopwords = list(STOP_WORDS)
# print(stopwords)
nlp = spacy.load('en_core_web_sm')
def processDoubleWords(speech):
retVal = []
def identify_repeated_words(speech):
repeated_words = []
doc = nlp(speech)
# Tokenization
tokens = [token.text for token in doc]
print("***** Analyze Repeted Words in you're Speech *****")
print("***** Analyze Repeated Words in you're Speech *****")
for i in range(len(tokens)-1):
if tokens[i] == tokens[i + 1]:
print(f" You stuck in this word :{tokens[i]}")
retVal.append(f" You stuck in this word :{tokens[i]}")
repeated_words.append(f" You stuck in this word :{tokens[i]}")
return {
"message": retVal,
"message": repeated_words,
"score": ScoreforRepetedwords
......@@ -26,5 +26,3 @@ def wordcount(filename, listwords):
print("Have not filler word")
return "Have not filler word"
# print("********Analyze Filler Word in your Speech********")
# wordcount("momo.txt", ["Like","okay" ,"so", "actually" ,"basically","right"])
......@@ -6,7 +6,8 @@ from pydub.silence import split_on_silence
# countPauses("../content analyzing/temp.wav")
ScoreforUserSilence = 70/100
def countPauses(filePath):
def count_silences(filePath):
sound = AudioSegment.from_wav(filePath)
chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
......@@ -5,39 +5,39 @@ scoreForClearness = 50/100
nlp = spacy.load("en_core_web_sm")
def clearnerssWords(text):
def identify_complicated_words(text):
doc = nlp(text)
returnVal = ""
complicated_words = ""
for token in doc:
count = len(token)
if count > 12:
returnVal += f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
if count > 18:
complicated_words += f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
return {
"message": returnVal,
"message": complicated_words,
"score": scoreForClearness
def clearnerssSentence(text):
def identify_complicated_sentences(text):
doc = nlp(text)
returnVal = ""
complicated_sentences = ""
for sent in doc.sents:
word_count = 0
# print(sent.text)
for words in sent:
# print(words.text)
word_count = word_count + 1
if word_count > 10:
if word_count > 43:
# print(f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.')
returnVal += f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.'
complicated_sentences += f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.'
# print(word_count)
return {
"message": returnVal,
"message": complicated_sentences,
"score": scoreForClearness
def clearnessReadAudioFile(filePath):
def analyze_speed(filePath):
# Read the Audiofile
samplerate, data = read(filePath)
# Frame rate for the Audio
......@@ -5,20 +5,18 @@ nlp = spacy.load("en_core_web_sm")
#Count the total number of characters in the speech
def conclisions(speech):
def identify_conclusion(speech):
totalCharacterCount = len(speech)
conclusionCharacterCount = (85/100)*totalCharacterCount
conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)])
# print(">>>Conclusion<<<")
# print(conclusion)
return {
"message": conclusion,
"score": scoreForConclusion
def comments(speech):
conclusion = conclisions(speech)["message"]
def conclusion_best_practices(speech):
conclusion = identify_conclusion(speech)["message"]
final_words = []
with open('content analyzing/bestPracticesForConclusion.txt', 'r') as file:
......@@ -35,23 +33,13 @@ def comments(speech):
def questions(speech):
def conclusion_questions(speech):
retVal = []
doc = nlp(conclisions(speech)["message"])
# patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
# verb_phrases = textacy.extract.token_matches(doc, patterns)
# for verb_phrases in verb_phrases:
# print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
# print(f"Identified questions : {verb_phrases}")
# retVal.append(f"Identified questions : {verb_phrases}")
doc = nlp(identify_conclusion(speech)["message"])
tokens = [token for token in doc]
for i in range(len(tokens)):
# print(tokens[i].pos_)
if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'):
retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
......@@ -4,22 +4,21 @@ scoreForIntroduction = 40/100
nlp = spacy.load("en_core_web_sm")
def introductionFunc(speech):
totalCharacterCount = len(speech)
def identify_introduction(speech):
total_character_count = len(speech)
introduction_character_count = (15 / 100) * total_character_count
introduction = (speech[0:int(introduction_character_count)])
introductionCharacterCount = (15 / 100) * totalCharacterCount
introduction = (speech[0:int(introductionCharacterCount)])
# print(">>>Introduction<<<")
# print(introduction)
return {
"message": introduction,
"score": scoreForIntroduction
def introductionBestUsesFunc(speech):
def introduction_best_practices(speech):
introduction = nlp(introductionFunc(speech)["message"])
introduction = nlp(identify_introduction(speech)["message"])
final_words = []
with open('content analyzing/bestPracticesForIntroduction.txt', 'r') as file:
......@@ -35,28 +34,18 @@ def introductionBestUsesFunc(speech):
def introductionQuestions(speech):
doc = nlp(introductionFunc(speech)["message"])
# patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
# retVal = []
# verb_phrases = textacy.extract.token_matches(doc, patterns)
# for verb_phrases in verb_phrases:
# print("You used questions forms in your introduction. It is a good practice for a introduction.")
# print(f"Identified questions : {verb_phrases}")
# retVal.append(f"Identified questions : {verb_phrases}")
def introduction_questions(speech):
doc = nlp(identify_introduction(speech)["message"])
tokens = [token for token in doc]
retVal = []
identified_questions = []
for i in range(len(tokens)):
# print(tokens[i].pos_)
if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'):
retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
identified_questions.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
return {
"message": retVal,
"message": identified_questions,
"score": scoreForIntroduction
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import string
nlp = spacy.load('en_core_web_sm')
def keywordExrtraction(topic, speech):
Topic = nlp(topic)
Content = nlp(speech)
def key_word_extraction(topic, speech):
topic = nlp(topic)
content = nlp(speech)
stopwords = list(STOP_WORDS)
topic_words = []
key_words = []
punctuation = string.punctuation + '\n'
word_frequencies = {}
for word in Content:
for word in content:
if word.text.lower() not in stopwords:
if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys():
......@@ -20,23 +22,16 @@ def keywordExrtraction(topic, speech):
word_frequencies[word.text] += 1
topicWords = []
for words in Topic:
# print(topicWords)
keyWords = []
for words in topic:
print("Extracted Key Words:")
for word in word_frequencies.keys():
if word_frequencies[word] >= 3:
return {
"message": keyWords,
"message": key_words,
"score": 50/100
import spacy
# Testing the model
test_text = "I had such high. hopes for this dress and really crappy worst product hate. it wporst bad."
def identifyQuotes(text):
nlp = spacy.load("content analyzing/quotesIdentify")
output = []
doc = nlp(text)
return doc.cats
\ No newline at end of file
for sent in doc.sents:
sentence = nlp(sent.text)
return sentence.cats
......@@ -9,6 +9,8 @@ r = sr.Recognizer()
# a function that splits the audio file into chunks
# and applies speech recognition
def get_large_audio_transcription(path):
Splitting the large audio file into chunks
from selenium import webdriver
def suggestContent():
driver = webdriver.Chrome("chromedriver.exe")
searchbox = driver.find_element_by_xpath('//*[@id="searchInput"]')
search_box = driver.find_element_by_xpath('//*[@id="searchInput"]')
searchButton = driver.find_element_by_xpath('//*[@id="search-form"]/fieldset/button')
search_button = driver.find_element_by_xpath('//*[@id="search-form"]/fieldset/button')
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
nlp = spacy.load("en_core_web_sm")
stopwords = list(STOP_WORDS)
punctuation = punctuation + '\n'
def synonymsFunc(topic, speech):
Topic = nlp(topic)
Content = nlp(speech)
def content_relativity(topic, speech):
topic = nlp(topic)
content = nlp(speech)
Total_similarity = 0
total_similarity = 0
for token1 in Content:
for token1 in content:
if token1.text.lower() not in stopwords:
if token1.text.lower() not in punctuation:
for token2 in Topic:
for token2 in topic:
print((token1.text, token2.text), "similarity", token1.similarity(token2))
Total_similarity = Total_similarity + token1.similarity(token2)
total_similarity = total_similarity + token1.similarity(token2)
print(f'Total score for the similarity: {Total_similarity}')
average_similarity = (Total_similarity/len(Content))*100
print(f'Total score for the similarity: {total_similarity}')
average_similarity = (total_similarity/len(content))*100
print(f'Average score for the similarity between topic and content: {average_similarity}%')
return {
"message": str(f'Average score for the similarity between topic and content: {average_similarity}%'),
from selenium import webdriver
def webScrap():
def suggest_youtube_content():
driver = webdriver.Chrome("chromedriver.exe")
searchbox = driver.find_element_by_xpath('//*[@id="search"]')
searchbox.send_keys(['speeches', 'Transportation'])
search_box = driver.find_element_by_xpath('//*[@id="search"]')
search_box.send_keys(['speeches', 'Transportation'])
searchButton = driver.find_element_by_xpath('//*[@id="search-icon-legacy"]')
search_button = driver.find_element_by_xpath('//*[@id="search-icon-legacy"]')
......@@ -12,7 +12,7 @@ app.config["DEBUG"] = True
@app.route('/countFillerWords', methods=['GET'])
def countFillerWords():
fillterWordCount = getFillterWordCount.countFillerWords("../temp.wav")
fillterWordCount = getFillterWordCount.count_filler_words("../temp.wav")
return fillterWordCount
......@@ -18,6 +18,7 @@ def get_numpy_array(features_df):
def get_train_test(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
return X_train, X_test, y_train, y_test
......@@ -11,6 +11,7 @@ import neural_network
# countPauses("../content analyzing/temp.wav")
ScoreforUserSilence = 70/100
def get_numpy_array(features_df):
X = np.array(features_df.feature.tolist())
y = np.array(features_df.class_label.tolist())
......@@ -23,8 +24,9 @@ def get_numpy_array(features_df):
features_df = get_features.extract_features()
X, y, le = get_numpy_array(features_df)
def countFillerWords(filePath):
fillerWordCount = 0
def count_filler_words(filePath):
filler_word_count = 0
sound = AudioSegment.from_wav(filePath)
chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
......@@ -41,14 +43,14 @@ def countFillerWords(filePath):
prediction = neural_network.predict(chunk_file, le, "trained_cnn.h5")
if float(prediction["probability"]) > 0.99:
fillerWordCount += 1
filler_word_count += 1
print("****** How many times Filler words in their Speech *****")
# print count of silence
print("Filler words: ", fillerWordCount)
print("Filler words: ", filler_word_count)
return {
"message": str(fillerWordCount) + " : filler word/s found",
"message": str(filler_word_count) + " : filler word/s found",
"score": ScoreforUserSilence
......@@ -5,6 +5,7 @@ import numpy as np
import glob
import pandas as pd
def get_features(file_name):
......@@ -26,8 +27,8 @@ def get_features(file_name):
mfccs_scaled = np.mean(mfccs.T,axis=0)
return mfccs_scaled
def extract_features():
def extract_features():
# path to dataset containing 10 subdirectories of .ogg files
sub_dirs = os.listdir('data')
......@@ -9,6 +9,7 @@ import numpy as np
import os
from sklearn.metrics import classification_report
def create_mlp(num_labels):
model = Sequential()
......@@ -24,6 +25,7 @@ def create_mlp(num_labels):
return model
def create_cnn(num_labels):
model = Sequential()
......@@ -38,8 +40,8 @@ def create_cnn(num_labels):
return model
def train(model,X_train, X_test, y_train, y_test,model_file):
def train(model,X_train, X_test, y_train, y_test,model_file):
# compile the model
model.compile(loss = 'categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
......@@ -62,15 +64,15 @@ def train(model,X_train, X_test, y_train, y_test,model_file):
print(classification_report(y_test, y_pred))
def compute(X_test,y_test,model_file):
def compute(X_test,y_test,model_file):
# load model from disk
loaded_model = load_model(model_file)
score = loaded_model.evaluate(X_test,y_test)
return score[0],score[1]*100
def predict(filename,le,model_file):
def predict(filename,le,model_file):
model = load_model(model_file)
prediction_feature = get_features.get_features(filename)
if len(prediction_feature) == 0:
