Commit 05965344 authored by Dhananjaya Jayashanka's avatar Dhananjaya Jayashanka

changers done

parent 9d35589f
...@@ -37,16 +37,16 @@ app.config["DEBUG"] = True ...@@ -37,16 +37,16 @@ app.config["DEBUG"] = True
@app.route('/clearness/word', methods=['GET']) @app.route('/clearness/word', methods=['GET'])
@cross_origin() @cross_origin()
def clearnerssWords(): def clearnerssWords():
if clearness.clearnerssWords(request.args['text']): if clearness.identify_complicated_words(request.args['text']):
return clearness.clearnerssWords(request.args['text']) return clearness.identify_complicated_words(request.args['text'])
else: else:
return "No results" return "No results"
@app.route('/clearness/sentense', methods=['GET']) @app.route('/clearness/sentense', methods=['GET'])
@cross_origin() @cross_origin()
def clearnerssSentence(): def clearnerssSentence():
if clearness.clearnerssSentence(request.args['text']): if clearness.identify_complicated_sentences(request.args['text']):
return clearness.clearnerssSentence(request.args['text']) return clearness.identify_complicated_sentences(request.args['text'])
else: else:
return "No results" return "No results"
...@@ -61,81 +61,81 @@ def quotesSentence(): ...@@ -61,81 +61,81 @@ def quotesSentence():
@app.route('/emotion/sentense', methods=['GET']) @app.route('/emotion/sentense', methods=['GET'])
@cross_origin() @cross_origin()
def emotionSentence(): def emotionSentence():
if textAnalyze.textAnalyze(request.args['text']): if textAnalyze.text_analyze(request.args['text']):
return json.dumps(textAnalyze.textAnalyze(request.args['text'])) return json.dumps(textAnalyze.text_analyze(request.args['text']))
else: else:
return "No results" return "No results"
@app.route('/conclusion', methods=['GET']) @app.route('/conclusion', methods=['GET'])
@cross_origin() @cross_origin()
def conclusions(): def conclusions():
if conclusion.conclisions(request.args['text']): if conclusion.identify_conclusion(request.args['text']):
return conclusion.conclisions(request.args['text']) return conclusion.identify_conclusion(request.args['text'])
else: else:
return "No results" return "No results"
@app.route('/conclusion/comments', methods=['GET']) @app.route('/conclusion/comments', methods=['GET'])
@cross_origin() @cross_origin()
def comments(): def comments():
if len(conclusion.comments(request.args['text'])) > 0: if len(conclusion.conclusion_best_practices(request.args['text'])) > 0:
return json.dumps(conclusion.comments(request.args['text'])) return json.dumps(conclusion.conclusion_best_practices(request.args['text']))
else: else:
return "No results" return "No results"
@app.route('/conclusion/questions', methods=['GET']) @app.route('/conclusion/questions', methods=['GET'])
@cross_origin() @cross_origin()
def questions(): def questions():
if conclusion.questions(request.args['text']): if conclusion.conclusion_questions(request.args['text']):
return json.dumps(conclusion.questions(request.args['text'])) return json.dumps(conclusion.conclusion_questions(request.args['text']))
else: else:
return "No results" return "No results"
@app.route('/introduction', methods=['GET']) @app.route('/introduction', methods=['GET'])
@cross_origin() @cross_origin()
def introductionFunc(): def introductionFunc():
if introduction.introductionFunc(request.args['text']): if introduction.identify_introduction(request.args['text']):
return introduction.introductionFunc(request.args['text']) return introduction.identify_introduction(request.args['text'])
else: else:
return "No results" return "No results"
@app.route('/introduction/bestUses', methods=['GET']) @app.route('/introduction/bestUses', methods=['GET'])
@cross_origin() @cross_origin()
def introductionBestUsesFunc(): def introductionBestUsesFunc():
if introduction.introductionBestUsesFunc(request.args['text']): if introduction.introduction_best_practices(request.args['text']):
return introduction.introductionBestUsesFunc(request.args['text']) return introduction.introduction_best_practices(request.args['text'])
else: else:
return "No results" return "No results"
@app.route('/introduction/questions', methods=['GET']) @app.route('/introduction/questions', methods=['GET'])
@cross_origin() @cross_origin()
def introductionQuestions(): def introductionQuestions():
if len(introduction.introductionQuestions(request.args['text'])) > 0: if len(introduction.introduction_questions(request.args['text'])) > 0:
return json.dumps(introduction.introductionQuestions(request.args['text'])) return json.dumps(introduction.introduction_questions(request.args['text']))
else: else:
return "No results" return "No results"
@app.route('/keywordExtraction', methods=['GET']) @app.route('/keywordExtraction', methods=['GET'])
@cross_origin() @cross_origin()
def keywordExtraction(): def keywordExtraction():
print(keyWordExtraction.keywordExrtraction(request.args['topic'], request.args['speech'])) print(keyWordExtraction.key_word_extraction(request.args['topic'], request.args['speech']))
if len(keyWordExtraction.keywordExrtraction(request.args['topic'], request.args['speech'])) > 0: if len(keyWordExtraction.key_word_extraction(request.args['topic'], request.args['speech'])) > 0:
return json.dumps(keyWordExtraction.keywordExrtraction(request.args['topic'], request.args['speech'])) return json.dumps(keyWordExtraction.key_word_extraction(request.args['topic'], request.args['speech']))
else: else:
return "No results" return "No results"
@app.route('/synonyms', methods=['GET']) @app.route('/synonyms', methods=['GET'])
@cross_origin() @cross_origin()
def synonymsFunction(): def synonymsFunction():
if synonyms.synonymsFunc(request.args['topic'], request.args['speech']): if synonyms.content_relativity(request.args['topic'], request.args['speech']):
return synonyms.synonymsFunc(request.args['topic'], request.args['speech']) return synonyms.content_relativity(request.args['topic'], request.args['speech'])
else: else:
return "No results" return "No results"
@app.route('/doubleWords', methods=['GET']) @app.route('/doubleWords', methods=['GET'])
@cross_origin() @cross_origin()
def doubleWordsFunc(): def doubleWordsFunc():
if len(DoubleWords.processDoubleWords(request.args['text'])) > 0: if len(DoubleWords.identify_repeated_words(request.args['text'])) > 0:
return json.dumps(DoubleWords.processDoubleWords(request.args['text'])) return json.dumps(DoubleWords.identify_repeated_words(request.args['text']))
else: else:
return "No results" return "No results"
...@@ -150,8 +150,8 @@ def fillerWordsFunc(): ...@@ -150,8 +150,8 @@ def fillerWordsFunc():
@app.route('/countPauses', methods=['GET']) @app.route('/countPauses', methods=['GET'])
@cross_origin() @cross_origin()
def countPauses(): def countPauses():
if Silence.countPauses("temp.wav"): if Silence.count_silences("temp.wav"):
return Silence.countPauses("temp.wav") return Silence.count_silences("temp.wav")
else: else:
return "No results" return "No results"
...@@ -174,7 +174,7 @@ def gingerItParse(): ...@@ -174,7 +174,7 @@ def gingerItParse():
@app.route('/webScrapping', methods=['GET']) @app.route('/webScrapping', methods=['GET'])
@cross_origin() @cross_origin()
def webScrapping(): def webScrapping():
webScraping.webScrap() webScraping.suggest_youtube_content()
return "Success" return "Success"
@app.route('/suggestContent', methods=['GET']) @app.route('/suggestContent', methods=['GET'])
...@@ -218,7 +218,7 @@ def videoUploader(): ...@@ -218,7 +218,7 @@ def videoUploader():
audioResult = speechToText.get_large_audio_transcription("temp.wav") audioResult = speechToText.get_large_audio_transcription("temp.wav")
videoResult = json.dumps(videoAnalyzing.getEmotions("temp.mp4")) videoResult = json.dumps(videoAnalyzing.get_emotions("temp.mp4"))
return { return {
"videoResult": videoResult, "videoResult": videoResult,
"audioResult": audioResult "audioResult": audioResult
......
...@@ -5,8 +5,10 @@ from nltk.sentiment.vader import SentimentIntensityAnalyzer ...@@ -5,8 +5,10 @@ from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize from nltk.tokenize import word_tokenize
def textAnalyze(speech):
def text_analyze(speech):
text = speech text = speech
#convert text to lower case #convert text to lower case
lower_case = text.lower() lower_case = text.lower()
cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation)) cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation))
...@@ -20,7 +22,7 @@ def textAnalyze(speech): ...@@ -20,7 +22,7 @@ def textAnalyze(speech):
if word not in stopwords.words('english'): if word not in stopwords.words('english'):
final_words.append(word) final_words.append(word)
# Lemmatization (convert base or dictionary form of a word) #Lemmatization (convert base or dictionary form of a word)
lemma_words = [] lemma_words = []
for word in final_words: for word in final_words:
word = WordNetLemmatizer().lemmatize(word) word = WordNetLemmatizer().lemmatize(word)
...@@ -31,7 +33,7 @@ def textAnalyze(speech): ...@@ -31,7 +33,7 @@ def textAnalyze(speech):
#Looping final words and identify emotional words #Looping final words and identify emotional words
for i in final_words: for i in final_words:
with open('Emotion/emotions.txt', 'r') as file: #Emotion dectionary with open('Emotion/emotions.txt', 'r') as file:#Emotions dictionary
for line in file: for line in file:
#Remove dictionary punctuations #Remove dictionary punctuations
clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip() clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
......
...@@ -5,29 +5,13 @@ import numpy as np ...@@ -5,29 +5,13 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
from keras.preprocessing import image from keras.preprocessing import image
Savedmodel = tf.keras.models.load_model('emotion_lts.h5') Saved_model = tf.keras.models.load_model('emotion_lts.h5')
Savedmodel.summary() Saved_model.summary()
objects = ('Angry', 'Happy', 'Sad', 'Neutral') objects = ('Angry', 'Happy', 'Sad', 'Neutral')
vid = cv2.VideoCapture(0) vid = cv2.VideoCapture(0)
#
# def run():
# while True:
#
# _, frame = vid.read()
# frame = imutils.resize(frame, width=500)
#
# # result = api(frame)
#
# cv2.imshow("frame",frame)
# # getPrediction(frame)
#
# # cv.waitKey(0)
# if cv2.waitKey(20) & 0XFF == ord('q'):
# break
#
# vid.release()
# cv2.destroyAllWindows()
def emotion_analysis(emotions): def emotion_analysis(emotions):
objects = ['Angry', 'Happy', 'Sad', 'Neutral'] objects = ['Angry', 'Happy', 'Sad', 'Neutral']
y_pos = np.arange(len(objects)) y_pos = np.arange(len(objects))
...@@ -37,7 +21,8 @@ def emotion_analysis(emotions): ...@@ -37,7 +21,8 @@ def emotion_analysis(emotions):
plt.ylabel('percentage') plt.ylabel('percentage')
plt.title('emotion') plt.title('emotion')
def getEmotions(filePath):
def get_emotions(filePath):
cap = cv2.VideoCapture(filePath) cap = cv2.VideoCapture(filePath)
emotions = [] emotions = []
...@@ -53,7 +38,7 @@ def getEmotions(filePath): ...@@ -53,7 +38,7 @@ def getEmotions(filePath):
x /= 255 x /= 255
custom = Savedmodel.predict(x) custom = Saved_model.predict(x)
# print(custom[0]) # print(custom[0])
emotion_analysis(custom[0]) emotion_analysis(custom[0])
...@@ -77,5 +62,5 @@ def getEmotions(filePath): ...@@ -77,5 +62,5 @@ def getEmotions(filePath):
break break
return emotions return emotions
getEmotions("speech.mp4") get_emotions("speech.mp4")
cv2.destroyAllWindows() cv2.destroyAllWindows()
...@@ -10,18 +10,19 @@ stopwords = list(STOP_WORDS) ...@@ -10,18 +10,19 @@ stopwords = list(STOP_WORDS)
# print(stopwords) # print(stopwords)
nlp = spacy.load('en_core_web_sm') nlp = spacy.load('en_core_web_sm')
def processDoubleWords(speech):
retVal = [] def identify_repeated_words(speech):
repeated_words = []
doc = nlp(speech) doc = nlp(speech)
# Tokenization # Tokenization
tokens = [token.text for token in doc] tokens = [token.text for token in doc]
print("***** Analyze Repeted Words in you're Speech *****") print("***** Analyze Repeated Words in you're Speech *****")
for i in range(len(tokens)-1): for i in range(len(tokens)-1):
if tokens[i] == tokens[i + 1]: if tokens[i] == tokens[i + 1]:
print(f" You stuck in this word :{tokens[i]}") print(f" You stuck in this word :{tokens[i]}")
retVal.append(f" You stuck in this word :{tokens[i]}") repeated_words.append(f" You stuck in this word :{tokens[i]}")
return { return {
"message": retVal, "message": repeated_words,
"score": ScoreforRepetedwords "score": ScoreforRepetedwords
} }
...@@ -26,5 +26,3 @@ def wordcount(filename, listwords): ...@@ -26,5 +26,3 @@ def wordcount(filename, listwords):
print("Have not filler word") print("Have not filler word")
return "Have not filler word" return "Have not filler word"
# print("********Analyze Filler Word in your Speech********")
# wordcount("momo.txt", ["Like","okay" ,"so", "actually" ,"basically","right"])
...@@ -6,7 +6,8 @@ from pydub.silence import split_on_silence ...@@ -6,7 +6,8 @@ from pydub.silence import split_on_silence
# countPauses("../content analyzing/temp.wav") # countPauses("../content analyzing/temp.wav")
ScoreforUserSilence = 70/100 ScoreforUserSilence = 70/100
def countPauses(filePath):
def count_silences(filePath):
sound = AudioSegment.from_wav(filePath) sound = AudioSegment.from_wav(filePath)
chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150) chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
......
...@@ -5,39 +5,39 @@ scoreForClearness = 50/100 ...@@ -5,39 +5,39 @@ scoreForClearness = 50/100
nlp = spacy.load("en_core_web_sm") nlp = spacy.load("en_core_web_sm")
def clearnerssWords(text): def identify_complicated_words(text):
doc = nlp(text) doc = nlp(text)
returnVal = "" complicated_words = ""
for token in doc: for token in doc:
count = len(token) count = len(token)
if count > 12: if count > 18:
returnVal += f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word." complicated_words += f"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
return { return {
"message": returnVal, "message": complicated_words,
"score": scoreForClearness "score": scoreForClearness
} }
def clearnerssSentence(text): def identify_complicated_sentences(text):
doc = nlp(text) doc = nlp(text)
returnVal = "" complicated_sentences = ""
for sent in doc.sents: for sent in doc.sents:
word_count = 0 word_count = 0
# print(sent.text) # print(sent.text)
for words in sent: for words in sent:
# print(words.text) # print(words.text)
word_count = word_count + 1 word_count = word_count + 1
if word_count > 10: if word_count > 43:
# print(f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.') # print(f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.')
returnVal += f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.' complicated_sentences += f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.'
# print(word_count) # print(word_count)
return { return {
"message": returnVal, "message": complicated_sentences,
"score": scoreForClearness "score": scoreForClearness
} }
def clearnessReadAudioFile(filePath): def analyze_speed(filePath):
# Read the Audiofile # Read the Audiofile
samplerate, data = read(filePath) samplerate, data = read(filePath)
# Frame rate for the Audio # Frame rate for the Audio
......
...@@ -5,20 +5,18 @@ nlp = spacy.load("en_core_web_sm") ...@@ -5,20 +5,18 @@ nlp = spacy.load("en_core_web_sm")
#Count the total number of characters in the speech #Count the total number of characters in the speech
def conclisions(speech): def identify_conclusion(speech):
totalCharacterCount = len(speech) totalCharacterCount = len(speech)
conclusionCharacterCount = (85/100)*totalCharacterCount conclusionCharacterCount = (85/100)*totalCharacterCount
conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)]) conclusion = (speech[int(conclusionCharacterCount):int(totalCharacterCount)])
# print(">>>Conclusion<<<")
# print(conclusion)
return { return {
"message": conclusion, "message": conclusion,
"score": scoreForConclusion "score": scoreForConclusion
} }
def comments(speech): def conclusion_best_practices(speech):
conclusion = conclisions(speech)["message"] conclusion = identify_conclusion(speech)["message"]
final_words = [] final_words = []
with open('content analyzing/bestPracticesForConclusion.txt', 'r') as file: with open('content analyzing/bestPracticesForConclusion.txt', 'r') as file:
...@@ -35,23 +33,13 @@ def comments(speech): ...@@ -35,23 +33,13 @@ def comments(speech):
} }
def questions(speech): def conclusion_questions(speech):
retVal = [] retVal = []
doc = nlp(conclisions(speech)["message"]) doc = nlp(identify_conclusion(speech)["message"])
# patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
#
# verb_phrases = textacy.extract.token_matches(doc, patterns)
#
# for verb_phrases in verb_phrases:
# print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
# print(f"Identified questions : {verb_phrases}")
# retVal.append(f"Identified questions : {verb_phrases}")
tokens = [token for token in doc] tokens = [token for token in doc]
for i in range(len(tokens)): for i in range(len(tokens)):
# print(tokens[i].pos_)
if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'): if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'):
retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}") retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
......
...@@ -4,22 +4,21 @@ scoreForIntroduction = 40/100 ...@@ -4,22 +4,21 @@ scoreForIntroduction = 40/100
nlp = spacy.load("en_core_web_sm") nlp = spacy.load("en_core_web_sm")
def introductionFunc(speech): def identify_introduction(speech):
totalCharacterCount = len(speech) total_character_count = len(speech)
introduction_character_count = (15 / 100) * total_character_count
introduction = (speech[0:int(introduction_character_count)])
introductionCharacterCount = (15 / 100) * totalCharacterCount
introduction = (speech[0:int(introductionCharacterCount)])
# print(">>>Introduction<<<")
# print(introduction)
return { return {
"message": introduction, "message": introduction,
"score": scoreForIntroduction "score": scoreForIntroduction
} }
def introductionBestUsesFunc(speech): def introduction_best_practices(speech):
introduction = nlp(introductionFunc(speech)["message"]) introduction = nlp(identify_introduction(speech)["message"])
final_words = [] final_words = []
with open('content analyzing/bestPracticesForIntroduction.txt', 'r') as file: with open('content analyzing/bestPracticesForIntroduction.txt', 'r') as file:
...@@ -35,28 +34,18 @@ def introductionBestUsesFunc(speech): ...@@ -35,28 +34,18 @@ def introductionBestUsesFunc(speech):
} }
def introductionQuestions(speech): def introduction_questions(speech):
doc = nlp(introductionFunc(speech)["message"]) doc = nlp(identify_introduction(speech)["message"])
# patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
# retVal = []
# verb_phrases = textacy.extract.token_matches(doc, patterns)
# for verb_phrases in verb_phrases:
# print("You used questions forms in your introduction. It is a good practice for a introduction.")
# print(f"Identified questions : {verb_phrases}")
# retVal.append(f"Identified questions : {verb_phrases}")
tokens = [token for token in doc] tokens = [token for token in doc]
identified_questions = []
retVal = []
for i in range(len(tokens)): for i in range(len(tokens)):
# print(tokens[i].pos_)
if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'): if (tokens[i].pos_ == 'ADV' and tokens[i + 1].pos_ == 'AUX' and tokens[i + 2].pos_ == 'PRON'):
retVal.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}") identified_questions.append(f"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}")
return { return {
"message": retVal, "message": identified_questions,
"score": scoreForIntroduction "score": scoreForIntroduction
} }
import spacy import spacy
from spacy.lang.en.stop_words import STOP_WORDS from spacy.lang.en.stop_words import STOP_WORDS
import string import string
nlp = spacy.load('en_core_web_sm') nlp = spacy.load('en_core_web_sm')
def keywordExrtraction(topic, speech):
Topic = nlp(topic) def key_word_extraction(topic, speech):
Content = nlp(speech) topic = nlp(topic)
content = nlp(speech)
stopwords = list(STOP_WORDS) stopwords = list(STOP_WORDS)
topic_words = []
key_words = []
punctuation = string.punctuation + '\n' punctuation = string.punctuation + '\n'
word_frequencies = {} word_frequencies = {}
for word in Content: for word in content:
if word.text.lower() not in stopwords: if word.text.lower() not in stopwords:
if word.text.lower() not in punctuation: if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys(): if word.text not in word_frequencies.keys():
...@@ -20,23 +22,16 @@ def keywordExrtraction(topic, speech): ...@@ -20,23 +22,16 @@ def keywordExrtraction(topic, speech):
else: else:
word_frequencies[word.text] += 1 word_frequencies[word.text] += 1
topicWords = [] for words in topic:
topic_words.append(words.text)
for words in Topic:
topicWords.append(words.text)
# print(topicWords)
keyWords = []
print("Extracted Key Words:")
for word in word_frequencies.keys(): for word in word_frequencies.keys():
if word_frequencies[word] >= 3: if word_frequencies[word] >= 3:
keyWords.append(word) key_words.append(word)
print(word) print(word)
return { return {
"message": keyWords, "message": key_words,
"score": 50/100 "score": 50/100
} }
import spacy import spacy
# Testing the model
test_text = "I had such high. hopes for this dress and really crappy worst product hate. it wporst bad."
def identifyQuotes(text): def identifyQuotes(text):
nlp = spacy.load("content analyzing/quotesIdentify") nlp = spacy.load("content analyzing/quotesIdentify")
output = []
doc = nlp(text) doc = nlp(text)
return doc.cats
print(identifyQuotes(test_text)) for sent in doc.sents:
\ No newline at end of file sentence = nlp(sent.text)
return sentence.cats
...@@ -9,6 +9,8 @@ r = sr.Recognizer() ...@@ -9,6 +9,8 @@ r = sr.Recognizer()
# a function that splits the audio file into chunks # a function that splits the audio file into chunks
# and applies speech recognition # and applies speech recognition
def get_large_audio_transcription(path): def get_large_audio_transcription(path):
""" """
Splitting the large audio file into chunks Splitting the large audio file into chunks
......
from selenium import webdriver from selenium import webdriver
def suggestContent(): def suggestContent():
driver = webdriver.Chrome("chromedriver.exe") driver = webdriver.Chrome("chromedriver.exe")
driver.get('https://wikipedia.com') driver.get('https://wikipedia.com')
searchbox = driver.find_element_by_xpath('//*[@id="searchInput"]') search_box = driver.find_element_by_xpath('//*[@id="searchInput"]')
searchbox.send_keys(['cricket']) search_box.send_keys(['cricket'])
searchButton = driver.find_element_by_xpath('//*[@id="search-form"]/fieldset/button') search_button = driver.find_element_by_xpath('//*[@id="search-form"]/fieldset/button')
searchButton.click() search_button.click()
import spacy import spacy
from spacy.lang.en.stop_words import STOP_WORDS from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation from string import punctuation
nlp = spacy.load("en_core_web_sm") nlp = spacy.load("en_core_web_sm")
stopwords = list(STOP_WORDS) stopwords = list(STOP_WORDS)
punctuation = punctuation + '\n' punctuation = punctuation + '\n'
def synonymsFunc(topic, speech): def content_relativity(topic, speech):
Topic = nlp(topic) topic = nlp(topic)
Content = nlp(speech) content = nlp(speech)
Total_similarity = 0 total_similarity = 0
for token1 in Content: for token1 in content:
if token1.text.lower() not in stopwords: if token1.text.lower() not in stopwords:
if token1.text.lower() not in punctuation: if token1.text.lower() not in punctuation:
for token2 in Topic: for token2 in topic:
print((token1.text, token2.text), "similarity", token1.similarity(token2)) print((token1.text, token2.text), "similarity", token1.similarity(token2))
Total_similarity = Total_similarity + token1.similarity(token2) total_similarity = total_similarity + token1.similarity(token2)
print(len(Content)) print(f'Total score for the similarity: {total_similarity}')
print(f'Total score for the similarity: {Total_similarity}') average_similarity = (total_similarity/len(content))*100
average_similarity = (Total_similarity/len(Content))*100
print(f'Average score for the similarity between topic and content: {average_similarity}%') print(f'Average score for the similarity between topic and content: {average_similarity}%')
return { return {
"message": str(f'Average score for the similarity between topic and content: {average_similarity}%'), "message": str(f'Average score for the similarity between topic and content: {average_similarity}%'),
......
from selenium import webdriver from selenium import webdriver
def webScrap():
def suggest_youtube_content():
driver = webdriver.Chrome("chromedriver.exe") driver = webdriver.Chrome("chromedriver.exe")
driver.get('https://youtube.com') driver.get('https://youtube.com')
searchbox = driver.find_element_by_xpath('//*[@id="search"]') search_box = driver.find_element_by_xpath('//*[@id="search"]')
searchbox.send_keys(['speeches', 'Transportation']) search_box.send_keys(['speeches', 'Transportation'])
searchButton = driver.find_element_by_xpath('//*[@id="search-icon-legacy"]') search_button = driver.find_element_by_xpath('//*[@id="search-icon-legacy"]')
searchButton.click() search_button.click()
...@@ -12,7 +12,7 @@ app.config["DEBUG"] = True ...@@ -12,7 +12,7 @@ app.config["DEBUG"] = True
@app.route('/countFillerWords', methods=['GET']) @app.route('/countFillerWords', methods=['GET'])
@cross_origin() @cross_origin()
def countFillerWords(): def countFillerWords():
fillterWordCount = getFillterWordCount.countFillerWords("../temp.wav") fillterWordCount = getFillterWordCount.count_filler_words("../temp.wav")
return fillterWordCount return fillterWordCount
app.run(port=5001) app.run(port=5001)
...@@ -18,6 +18,7 @@ def get_numpy_array(features_df): ...@@ -18,6 +18,7 @@ def get_numpy_array(features_df):
def get_train_test(X, y): def get_train_test(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
return X_train, X_test, y_train, y_test return X_train, X_test, y_train, y_test
......
...@@ -11,6 +11,7 @@ import neural_network ...@@ -11,6 +11,7 @@ import neural_network
# countPauses("../content analyzing/temp.wav") # countPauses("../content analyzing/temp.wav")
ScoreforUserSilence = 70/100 ScoreforUserSilence = 70/100
def get_numpy_array(features_df): def get_numpy_array(features_df):
X = np.array(features_df.feature.tolist()) X = np.array(features_df.feature.tolist())
y = np.array(features_df.class_label.tolist()) y = np.array(features_df.class_label.tolist())
...@@ -23,8 +24,9 @@ def get_numpy_array(features_df): ...@@ -23,8 +24,9 @@ def get_numpy_array(features_df):
features_df = get_features.extract_features() features_df = get_features.extract_features()
X, y, le = get_numpy_array(features_df) X, y, le = get_numpy_array(features_df)
def countFillerWords(filePath):
fillerWordCount = 0 def count_filler_words(filePath):
filler_word_count = 0
sound = AudioSegment.from_wav(filePath) sound = AudioSegment.from_wav(filePath)
chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150) chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
...@@ -41,14 +43,14 @@ def countFillerWords(filePath): ...@@ -41,14 +43,14 @@ def countFillerWords(filePath):
prediction = neural_network.predict(chunk_file, le, "trained_cnn.h5") prediction = neural_network.predict(chunk_file, le, "trained_cnn.h5")
print(prediction) print(prediction)
if float(prediction["probability"]) > 0.99: if float(prediction["probability"]) > 0.99:
fillerWordCount += 1 filler_word_count += 1
print("****** How many times Filler words in their Speech *****") print("****** How many times Filler words in their Speech *****")
# print count of silence # print count of silence
print("Filler words: ", fillerWordCount) print("Filler words: ", filler_word_count)
return { return {
"message": str(fillerWordCount) + " : filler word/s found", "message": str(filler_word_count) + " : filler word/s found",
"score": ScoreforUserSilence "score": ScoreforUserSilence
} }
......
...@@ -5,6 +5,7 @@ import numpy as np ...@@ -5,6 +5,7 @@ import numpy as np
import glob import glob
import pandas as pd import pandas as pd
def get_features(file_name): def get_features(file_name):
print(sf.available_formats()) print(sf.available_formats())
...@@ -26,8 +27,8 @@ def get_features(file_name): ...@@ -26,8 +27,8 @@ def get_features(file_name):
mfccs_scaled = np.mean(mfccs.T,axis=0) mfccs_scaled = np.mean(mfccs.T,axis=0)
return mfccs_scaled return mfccs_scaled
def extract_features():
def extract_features():
# path to dataset containing 10 subdirectories of .ogg files # path to dataset containing 10 subdirectories of .ogg files
sub_dirs = os.listdir('data') sub_dirs = os.listdir('data')
sub_dirs.sort() sub_dirs.sort()
......
...@@ -9,6 +9,7 @@ import numpy as np ...@@ -9,6 +9,7 @@ import numpy as np
import os import os
from sklearn.metrics import classification_report from sklearn.metrics import classification_report
def create_mlp(num_labels): def create_mlp(num_labels):
model = Sequential() model = Sequential()
...@@ -24,6 +25,7 @@ def create_mlp(num_labels): ...@@ -24,6 +25,7 @@ def create_mlp(num_labels):
model.add(Activation('softmax')) model.add(Activation('softmax'))
return model return model
def create_cnn(num_labels): def create_cnn(num_labels):
model = Sequential() model = Sequential()
...@@ -38,8 +40,8 @@ def create_cnn(num_labels): ...@@ -38,8 +40,8 @@ def create_cnn(num_labels):
model.add(Activation('softmax')) model.add(Activation('softmax'))
return model return model
def train(model,X_train, X_test, y_train, y_test,model_file):
def train(model,X_train, X_test, y_train, y_test,model_file):
# compile the model # compile the model
model.compile(loss = 'categorical_crossentropy',metrics=['accuracy'],optimizer='adam') model.compile(loss = 'categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
...@@ -62,15 +64,15 @@ def train(model,X_train, X_test, y_train, y_test,model_file): ...@@ -62,15 +64,15 @@ def train(model,X_train, X_test, y_train, y_test,model_file):
print(classification_report(y_test, y_pred)) print(classification_report(y_test, y_pred))
def compute(X_test,y_test,model_file):
def compute(X_test,y_test,model_file):
# load model from disk # load model from disk
loaded_model = load_model(model_file) loaded_model = load_model(model_file)
score = loaded_model.evaluate(X_test,y_test) score = loaded_model.evaluate(X_test,y_test)
return score[0],score[1]*100 return score[0],score[1]*100
def predict(filename,le,model_file):
def predict(filename,le,model_file):
model = load_model(model_file) model = load_model(model_file)
prediction_feature = get_features.get_features(filename) prediction_feature = get_features.get_features(filename)
if len(prediction_feature) == 0: if len(prediction_feature) == 0:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment