Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2021-060
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2021-060
2021-060
Commits
05965344
Commit
05965344
authored
Nov 23, 2021
by
Dhananjaya Jayashanka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
changers done
parent
9d35589f
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
133 additions
and
169 deletions
+133
-169
API.py
API.py
+29
-29
Emotion/textAnalyze.py
Emotion/textAnalyze.py
+5
-3
Emotion/videoAnalyzing.py
Emotion/videoAnalyzing.py
+8
-23
Flow of the sppech/DoubleWords.py
Flow of the sppech/DoubleWords.py
+6
-5
Flow of the sppech/FillerWords.py
Flow of the sppech/FillerWords.py
+0
-2
Flow of the sppech/Silence.py
Flow of the sppech/Silence.py
+2
-1
content analyzing/clearness.py
content analyzing/clearness.py
+11
-11
content analyzing/conclusion.py
content analyzing/conclusion.py
+5
-17
content analyzing/introduction.py
content analyzing/introduction.py
+12
-23
content analyzing/keyWordExtraction.py
content analyzing/keyWordExtraction.py
+11
-16
content analyzing/quotesIdentify.py
content analyzing/quotesIdentify.py
+5
-6
content analyzing/speechToText.py
content analyzing/speechToText.py
+2
-0
content analyzing/suggestContent.py
content analyzing/suggestContent.py
+5
-4
content analyzing/synonyms.py
content analyzing/synonyms.py
+9
-13
content analyzing/webScraping.py
content analyzing/webScraping.py
+6
-5
filler_words/API.py
filler_words/API.py
+1
-1
filler_words/Train_Neural_Network.py
filler_words/Train_Neural_Network.py
+1
-0
filler_words/getFillterWordCount.py
filler_words/getFillterWordCount.py
+7
-5
filler_words/get_features.py
filler_words/get_features.py
+2
-1
filler_words/neural_network.py
filler_words/neural_network.py
+6
-4
No files found.
API.py
View file @
05965344
...
...
@@ -37,16 +37,16 @@ app.config["DEBUG"] = True
@
app
.
route
(
'/clearness/word'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
clearnerssWords
():
if
clearness
.
clearnerssW
ords
(
request
.
args
[
'text'
]):
return
clearness
.
clearnerssW
ords
(
request
.
args
[
'text'
])
if
clearness
.
identify_complicated_w
ords
(
request
.
args
[
'text'
]):
return
clearness
.
identify_complicated_w
ords
(
request
.
args
[
'text'
])
else
:
return
"No results"
@
app
.
route
(
'/clearness/sentense'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
clearnerssSentence
():
if
clearness
.
clearnerssSentence
(
request
.
args
[
'text'
]):
return
clearness
.
clearnerssSentence
(
request
.
args
[
'text'
])
if
clearness
.
identify_complicated_sentences
(
request
.
args
[
'text'
]):
return
clearness
.
identify_complicated_sentences
(
request
.
args
[
'text'
])
else
:
return
"No results"
...
...
@@ -61,81 +61,81 @@ def quotesSentence():
@
app
.
route
(
'/emotion/sentense'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
emotionSentence
():
if
textAnalyze
.
text
A
nalyze
(
request
.
args
[
'text'
]):
return
json
.
dumps
(
textAnalyze
.
text
A
nalyze
(
request
.
args
[
'text'
]))
if
textAnalyze
.
text
_a
nalyze
(
request
.
args
[
'text'
]):
return
json
.
dumps
(
textAnalyze
.
text
_a
nalyze
(
request
.
args
[
'text'
]))
else
:
return
"No results"
@
app
.
route
(
'/conclusion'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
conclusions
():
if
conclusion
.
conclisions
(
request
.
args
[
'text'
]):
return
conclusion
.
conclisions
(
request
.
args
[
'text'
])
if
conclusion
.
identify_conclusion
(
request
.
args
[
'text'
]):
return
conclusion
.
identify_conclusion
(
request
.
args
[
'text'
])
else
:
return
"No results"
@
app
.
route
(
'/conclusion/comments'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
comments
():
if
len
(
conclusion
.
co
mment
s
(
request
.
args
[
'text'
]))
>
0
:
return
json
.
dumps
(
conclusion
.
co
mment
s
(
request
.
args
[
'text'
]))
if
len
(
conclusion
.
co
nclusion_best_practice
s
(
request
.
args
[
'text'
]))
>
0
:
return
json
.
dumps
(
conclusion
.
co
nclusion_best_practice
s
(
request
.
args
[
'text'
]))
else
:
return
"No results"
@
app
.
route
(
'/conclusion/questions'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
questions
():
if
conclusion
.
questions
(
request
.
args
[
'text'
]):
return
json
.
dumps
(
conclusion
.
questions
(
request
.
args
[
'text'
]))
if
conclusion
.
conclusion_
questions
(
request
.
args
[
'text'
]):
return
json
.
dumps
(
conclusion
.
conclusion_
questions
(
request
.
args
[
'text'
]))
else
:
return
"No results"
@
app
.
route
(
'/introduction'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
introductionFunc
():
if
introduction
.
i
ntroductionFunc
(
request
.
args
[
'text'
]):
return
introduction
.
i
ntroductionFunc
(
request
.
args
[
'text'
])
if
introduction
.
i
dentify_introduction
(
request
.
args
[
'text'
]):
return
introduction
.
i
dentify_introduction
(
request
.
args
[
'text'
])
else
:
return
"No results"
@
app
.
route
(
'/introduction/bestUses'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
introductionBestUsesFunc
():
if
introduction
.
introduction
BestUsesFunc
(
request
.
args
[
'text'
]):
return
introduction
.
introduction
BestUsesFunc
(
request
.
args
[
'text'
])
if
introduction
.
introduction
_best_practices
(
request
.
args
[
'text'
]):
return
introduction
.
introduction
_best_practices
(
request
.
args
[
'text'
])
else
:
return
"No results"
@
app
.
route
(
'/introduction/questions'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
introductionQuestions
():
if
len
(
introduction
.
introduction
Q
uestions
(
request
.
args
[
'text'
]))
>
0
:
return
json
.
dumps
(
introduction
.
introduction
Q
uestions
(
request
.
args
[
'text'
]))
if
len
(
introduction
.
introduction
_q
uestions
(
request
.
args
[
'text'
]))
>
0
:
return
json
.
dumps
(
introduction
.
introduction
_q
uestions
(
request
.
args
[
'text'
]))
else
:
return
"No results"
@
app
.
route
(
'/keywordExtraction'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
keywordExtraction
():
print
(
keyWordExtraction
.
key
wordExr
traction
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
]))
if
len
(
keyWordExtraction
.
key
wordExr
traction
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
]))
>
0
:
return
json
.
dumps
(
keyWordExtraction
.
key
wordExr
traction
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
]))
print
(
keyWordExtraction
.
key
_word_ex
traction
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
]))
if
len
(
keyWordExtraction
.
key
_word_ex
traction
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
]))
>
0
:
return
json
.
dumps
(
keyWordExtraction
.
key
_word_ex
traction
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
]))
else
:
return
"No results"
@
app
.
route
(
'/synonyms'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
synonymsFunction
():
if
synonyms
.
synonymsFunc
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
]):
return
synonyms
.
synonymsFunc
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
])
if
synonyms
.
content_relativity
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
]):
return
synonyms
.
content_relativity
(
request
.
args
[
'topic'
],
request
.
args
[
'speech'
])
else
:
return
"No results"
@
app
.
route
(
'/doubleWords'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
doubleWordsFunc
():
if
len
(
DoubleWords
.
processDoubleW
ords
(
request
.
args
[
'text'
]))
>
0
:
return
json
.
dumps
(
DoubleWords
.
processDoubleW
ords
(
request
.
args
[
'text'
]))
if
len
(
DoubleWords
.
identify_repeated_w
ords
(
request
.
args
[
'text'
]))
>
0
:
return
json
.
dumps
(
DoubleWords
.
identify_repeated_w
ords
(
request
.
args
[
'text'
]))
else
:
return
"No results"
...
...
@@ -150,8 +150,8 @@ def fillerWordsFunc():
@
app
.
route
(
'/countPauses'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
countPauses
():
if
Silence
.
count
Paus
es
(
"temp.wav"
):
return
Silence
.
count
Paus
es
(
"temp.wav"
)
if
Silence
.
count
_silenc
es
(
"temp.wav"
):
return
Silence
.
count
_silenc
es
(
"temp.wav"
)
else
:
return
"No results"
...
...
@@ -174,7 +174,7 @@ def gingerItParse():
@
app
.
route
(
'/webScrapping'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
webScrapping
():
webScraping
.
webScrap
()
webScraping
.
suggest_youtube_content
()
return
"Success"
@
app
.
route
(
'/suggestContent'
,
methods
=
[
'GET'
])
...
...
@@ -218,7 +218,7 @@ def videoUploader():
audioResult
=
speechToText
.
get_large_audio_transcription
(
"temp.wav"
)
videoResult
=
json
.
dumps
(
videoAnalyzing
.
get
E
motions
(
"temp.mp4"
))
videoResult
=
json
.
dumps
(
videoAnalyzing
.
get
_e
motions
(
"temp.mp4"
))
return
{
"videoResult"
:
videoResult
,
"audioResult"
:
audioResult
...
...
Emotion/textAnalyze.py
View file @
05965344
...
...
@@ -5,8 +5,10 @@ from nltk.sentiment.vader import SentimentIntensityAnalyzer
from
nltk.stem
import
WordNetLemmatizer
from
nltk.tokenize
import
word_tokenize
def
textAnalyze
(
speech
):
def
text_analyze
(
speech
):
text
=
speech
#convert text to lower case
lower_case
=
text
.
lower
()
cleaned_text
=
lower_case
.
translate
(
str
.
maketrans
(
''
,
''
,
string
.
punctuation
))
...
...
@@ -20,7 +22,7 @@ def textAnalyze(speech):
if
word
not
in
stopwords
.
words
(
'english'
):
final_words
.
append
(
word
)
#
Lemmatization (convert base or dictionary form of a word)
#Lemmatization (convert base or dictionary form of a word)
lemma_words
=
[]
for
word
in
final_words
:
word
=
WordNetLemmatizer
()
.
lemmatize
(
word
)
...
...
@@ -31,7 +33,7 @@ def textAnalyze(speech):
#Looping final words and identify emotional words
for
i
in
final_words
:
with
open
(
'Emotion/emotions.txt'
,
'r'
)
as
file
:
#Emotion de
ctionary
with
open
(
'Emotion/emotions.txt'
,
'r'
)
as
file
:
#Emotions di
ctionary
for
line
in
file
:
#Remove dictionary punctuations
clear_line
=
line
.
replace
(
"
\n
"
,
''
)
.
replace
(
","
,
''
)
.
replace
(
"'"
,
''
)
.
strip
()
...
...
Emotion/videoAnalyzing.py
View file @
05965344
...
...
@@ -5,29 +5,13 @@ import numpy as np
import
tensorflow
as
tf
from
keras.preprocessing
import
image
Savedmodel
=
tf
.
keras
.
models
.
load_model
(
'emotion_lts.h5'
)
Savedmodel
.
summary
()
Saved
_
model
=
tf
.
keras
.
models
.
load_model
(
'emotion_lts.h5'
)
Saved
_
model
.
summary
()
objects
=
(
'Angry'
,
'Happy'
,
'Sad'
,
'Neutral'
)
vid
=
cv2
.
VideoCapture
(
0
)
#
# def run():
# while True:
#
# _, frame = vid.read()
# frame = imutils.resize(frame, width=500)
#
# # result = api(frame)
#
# cv2.imshow("frame",frame)
# # getPrediction(frame)
#
# # cv.waitKey(0)
# if cv2.waitKey(20) & 0XFF == ord('q'):
# break
#
# vid.release()
# cv2.destroyAllWindows()
def
emotion_analysis
(
emotions
):
objects
=
[
'Angry'
,
'Happy'
,
'Sad'
,
'Neutral'
]
y_pos
=
np
.
arange
(
len
(
objects
))
...
...
@@ -37,7 +21,8 @@ def emotion_analysis(emotions):
plt
.
ylabel
(
'percentage'
)
plt
.
title
(
'emotion'
)
def
getEmotions
(
filePath
):
def
get_emotions
(
filePath
):
cap
=
cv2
.
VideoCapture
(
filePath
)
emotions
=
[]
...
...
@@ -53,7 +38,7 @@ def getEmotions(filePath):
x
/=
255
custom
=
Savedmodel
.
predict
(
x
)
custom
=
Saved
_
model
.
predict
(
x
)
# print(custom[0])
emotion_analysis
(
custom
[
0
])
...
...
@@ -77,5 +62,5 @@ def getEmotions(filePath):
break
return
emotions
get
E
motions
(
"speech.mp4"
)
get
_e
motions
(
"speech.mp4"
)
cv2
.
destroyAllWindows
()
Flow of the sppech/DoubleWords.py
View file @
05965344
...
...
@@ -10,18 +10,19 @@ stopwords = list(STOP_WORDS)
# print(stopwords)
nlp
=
spacy
.
load
(
'en_core_web_sm'
)
def
processDoubleWords
(
speech
):
retVal
=
[]
def
identify_repeated_words
(
speech
):
repeated_words
=
[]
doc
=
nlp
(
speech
)
# Tokenization
tokens
=
[
token
.
text
for
token
in
doc
]
print
(
"***** Analyze Repeted Words in you're Speech *****"
)
print
(
"***** Analyze Repe
a
ted Words in you're Speech *****"
)
for
i
in
range
(
len
(
tokens
)
-
1
):
if
tokens
[
i
]
==
tokens
[
i
+
1
]:
print
(
f
" You stuck in this word :{tokens[i]}"
)
re
tVal
.
append
(
f
" You stuck in this word :{tokens[i]}"
)
re
peated_words
.
append
(
f
" You stuck in this word :{tokens[i]}"
)
return
{
"message"
:
re
tVal
,
"message"
:
re
peated_words
,
"score"
:
ScoreforRepetedwords
}
Flow of the sppech/FillerWords.py
View file @
05965344
...
...
@@ -26,5 +26,3 @@ def wordcount(filename, listwords):
print
(
"Have not filler word"
)
return
"Have not filler word"
# print("********Analyze Filler Word in your Speech********")
# wordcount("momo.txt", ["Like","okay" ,"so", "actually" ,"basically","right"])
Flow of the sppech/Silence.py
View file @
05965344
...
...
@@ -6,7 +6,8 @@ from pydub.silence import split_on_silence
# countPauses("../content analyzing/temp.wav")
ScoreforUserSilence
=
70
/
100
def
countPauses
(
filePath
):
def
count_silences
(
filePath
):
sound
=
AudioSegment
.
from_wav
(
filePath
)
chunks
=
split_on_silence
(
sound
,
min_silence_len
=
200
,
silence_thresh
=
sound
.
dBFS
-
16
,
keep_silence
=
150
)
...
...
content analyzing/clearness.py
View file @
05965344
...
...
@@ -5,39 +5,39 @@ scoreForClearness = 50/100
nlp
=
spacy
.
load
(
"en_core_web_sm"
)
def
clearnerssW
ords
(
text
):
def
identify_complicated_w
ords
(
text
):
doc
=
nlp
(
text
)
returnVal
=
""
complicated_words
=
""
for
token
in
doc
:
count
=
len
(
token
)
if
count
>
1
2
:
returnVal
+=
f
"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
if
count
>
1
8
:
complicated_words
+=
f
"{token.text} - {count} letters: This is a too complicated word. It is better to use more simpler word."
return
{
"message"
:
returnVal
,
"message"
:
complicated_words
,
"score"
:
scoreForClearness
}
def
clearnerssSentence
(
text
):
def
identify_complicated_sentences
(
text
):
doc
=
nlp
(
text
)
returnVal
=
""
complicated_sentences
=
""
for
sent
in
doc
.
sents
:
word_count
=
0
# print(sent.text)
for
words
in
sent
:
# print(words.text)
word_count
=
word_count
+
1
if
word_count
>
10
:
if
word_count
>
43
:
# print(f'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.')
returnVal
+=
f
'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.'
complicated_sentences
+=
f
'"{sent}" is a overcomplicated sentence. There are {word_count} words in it.'
# print(word_count)
return
{
"message"
:
returnVal
,
"message"
:
complicated_sentences
,
"score"
:
scoreForClearness
}
def
clearnessReadAudioFile
(
filePath
):
def
analyze_speed
(
filePath
):
# Read the Audiofile
samplerate
,
data
=
read
(
filePath
)
# Frame rate for the Audio
...
...
content analyzing/conclusion.py
View file @
05965344
...
...
@@ -5,20 +5,18 @@ nlp = spacy.load("en_core_web_sm")
#Count the total number of characters in the speech
def
conclisions
(
speech
):
def
identify_conclusion
(
speech
):
totalCharacterCount
=
len
(
speech
)
conclusionCharacterCount
=
(
85
/
100
)
*
totalCharacterCount
conclusion
=
(
speech
[
int
(
conclusionCharacterCount
):
int
(
totalCharacterCount
)])
# print(">>>Conclusion<<<")
# print(conclusion)
return
{
"message"
:
conclusion
,
"score"
:
scoreForConclusion
}
def
co
mment
s
(
speech
):
conclusion
=
conclisions
(
speech
)[
"message"
]
def
co
nclusion_best_practice
s
(
speech
):
conclusion
=
identify_conclusion
(
speech
)[
"message"
]
final_words
=
[]
with
open
(
'content analyzing/bestPracticesForConclusion.txt'
,
'r'
)
as
file
:
...
...
@@ -35,23 +33,13 @@ def comments(speech):
}
def
questions
(
speech
):
def
conclusion_
questions
(
speech
):
retVal
=
[]
doc
=
nlp
(
conclisions
(
speech
)[
"message"
])
# patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
#
# verb_phrases = textacy.extract.token_matches(doc, patterns)
#
# for verb_phrases in verb_phrases:
# print("You used questions forms in your conclusion. It is a good practice for a conclusion.")
# print(f"Identified questions : {verb_phrases}")
# retVal.append(f"Identified questions : {verb_phrases}")
doc
=
nlp
(
identify_conclusion
(
speech
)[
"message"
])
tokens
=
[
token
for
token
in
doc
]
for
i
in
range
(
len
(
tokens
)):
# print(tokens[i].pos_)
if
(
tokens
[
i
]
.
pos_
==
'ADV'
and
tokens
[
i
+
1
]
.
pos_
==
'AUX'
and
tokens
[
i
+
2
]
.
pos_
==
'PRON'
):
retVal
.
append
(
f
"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}"
)
...
...
content analyzing/introduction.py
View file @
05965344
...
...
@@ -4,22 +4,21 @@ scoreForIntroduction = 40/100
nlp
=
spacy
.
load
(
"en_core_web_sm"
)
def
introductionFunc
(
speech
):
totalCharacterCount
=
len
(
speech
)
def
identify_introduction
(
speech
):
total_character_count
=
len
(
speech
)
introduction_character_count
=
(
15
/
100
)
*
total_character_count
introduction
=
(
speech
[
0
:
int
(
introduction_character_count
)])
introductionCharacterCount
=
(
15
/
100
)
*
totalCharacterCount
introduction
=
(
speech
[
0
:
int
(
introductionCharacterCount
)])
# print(">>>Introduction<<<")
# print(introduction)
return
{
"message"
:
introduction
,
"score"
:
scoreForIntroduction
}
def
introduction
BestUsesFunc
(
speech
):
def
introduction
_best_practices
(
speech
):
introduction
=
nlp
(
i
ntroductionFunc
(
speech
)[
"message"
])
introduction
=
nlp
(
i
dentify_introduction
(
speech
)[
"message"
])
final_words
=
[]
with
open
(
'content analyzing/bestPracticesForIntroduction.txt'
,
'r'
)
as
file
:
...
...
@@ -35,28 +34,18 @@ def introductionBestUsesFunc(speech):
}
def
introductionQuestions
(
speech
):
doc
=
nlp
(
introductionFunc
(
speech
)[
"message"
])
# patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
# retVal = []
# verb_phrases = textacy.extract.token_matches(doc, patterns)
# for verb_phrases in verb_phrases:
# print("You used questions forms in your introduction. It is a good practice for a introduction.")
# print(f"Identified questions : {verb_phrases}")
# retVal.append(f"Identified questions : {verb_phrases}")
def
introduction_questions
(
speech
):
doc
=
nlp
(
identify_introduction
(
speech
)[
"message"
])
tokens
=
[
token
for
token
in
doc
]
retVal
=
[]
identified_questions
=
[]
for
i
in
range
(
len
(
tokens
)):
# print(tokens[i].pos_)
if
(
tokens
[
i
]
.
pos_
==
'ADV'
and
tokens
[
i
+
1
]
.
pos_
==
'AUX'
and
tokens
[
i
+
2
]
.
pos_
==
'PRON'
):
retVal
.
append
(
f
"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}"
)
identified_questions
.
append
(
f
"{tokens[i]} {tokens[i + 1]} {tokens[i + 2]}"
)
return
{
"message"
:
retVal
,
"message"
:
identified_questions
,
"score"
:
scoreForIntroduction
}
content analyzing/keyWordExtraction.py
View file @
05965344
import
spacy
from
spacy.lang.en.stop_words
import
STOP_WORDS
import
string
nlp
=
spacy
.
load
(
'en_core_web_sm'
)
def
keywordExrtraction
(
topic
,
speech
):
Topic
=
nlp
(
topic
)
Content
=
nlp
(
speech
)
def
key_word_extraction
(
topic
,
speech
):
topic
=
nlp
(
topic
)
content
=
nlp
(
speech
)
stopwords
=
list
(
STOP_WORDS
)
topic_words
=
[]
key_words
=
[]
punctuation
=
string
.
punctuation
+
'
\n
'
word_frequencies
=
{}
for
word
in
C
ontent
:
for
word
in
c
ontent
:
if
word
.
text
.
lower
()
not
in
stopwords
:
if
word
.
text
.
lower
()
not
in
punctuation
:
if
word
.
text
not
in
word_frequencies
.
keys
():
...
...
@@ -20,23 +22,16 @@ def keywordExrtraction(topic, speech):
else
:
word_frequencies
[
word
.
text
]
+=
1
topicWords
=
[]
for
words
in
Topic
:
topicWords
.
append
(
words
.
text
)
# print(topicWords)
keyWords
=
[]
for
words
in
topic
:
topic_words
.
append
(
words
.
text
)
print
(
"Extracted Key Words:"
)
for
word
in
word_frequencies
.
keys
():
if
word_frequencies
[
word
]
>=
3
:
key
W
ords
.
append
(
word
)
key
_w
ords
.
append
(
word
)
print
(
word
)
return
{
"message"
:
key
W
ords
,
"message"
:
key
_w
ords
,
"score"
:
50
/
100
}
content analyzing/quotesIdentify.py
View file @
05965344
import
spacy
# Testing the model
test_text
=
"I had such high. hopes for this dress and really crappy worst product hate. it wporst bad."
def
identifyQuotes
(
text
):
nlp
=
spacy
.
load
(
"content analyzing/quotesIdentify"
)
output
=
[]
doc
=
nlp
(
text
)
return
doc
.
cats
print
(
identifyQuotes
(
test_text
))
\ No newline at end of file
for
sent
in
doc
.
sents
:
sentence
=
nlp
(
sent
.
text
)
return
sentence
.
cats
content analyzing/speechToText.py
View file @
05965344
...
...
@@ -9,6 +9,8 @@ r = sr.Recognizer()
# a function that splits the audio file into chunks
# and applies speech recognition
def
get_large_audio_transcription
(
path
):
"""
Splitting the large audio file into chunks
...
...
content analyzing/suggestContent.py
View file @
05965344
from
selenium
import
webdriver
def
suggestContent
():
driver
=
webdriver
.
Chrome
(
"chromedriver.exe"
)
driver
.
get
(
'https://wikipedia.com'
)
searchbox
=
driver
.
find_element_by_xpath
(
'//*[@id="searchInput"]'
)
searchbox
.
send_keys
([
'cricket'
])
search
_
box
=
driver
.
find_element_by_xpath
(
'//*[@id="searchInput"]'
)
search
_
box
.
send_keys
([
'cricket'
])
search
B
utton
=
driver
.
find_element_by_xpath
(
'//*[@id="search-form"]/fieldset/button'
)
search
B
utton
.
click
()
search
_b
utton
=
driver
.
find_element_by_xpath
(
'//*[@id="search-form"]/fieldset/button'
)
search
_b
utton
.
click
()
content analyzing/synonyms.py
View file @
05965344
import
spacy
from
spacy.lang.en.stop_words
import
STOP_WORDS
from
string
import
punctuation
nlp
=
spacy
.
load
(
"en_core_web_sm"
)
stopwords
=
list
(
STOP_WORDS
)
punctuation
=
punctuation
+
'
\n
'
def
synonymsFunc
(
topic
,
speech
):
T
opic
=
nlp
(
topic
)
C
ontent
=
nlp
(
speech
)
def
content_relativity
(
topic
,
speech
):
t
opic
=
nlp
(
topic
)
c
ontent
=
nlp
(
speech
)
T
otal_similarity
=
0
t
otal_similarity
=
0
for
token1
in
C
ontent
:
for
token1
in
c
ontent
:
if
token1
.
text
.
lower
()
not
in
stopwords
:
if
token1
.
text
.
lower
()
not
in
punctuation
:
for
token2
in
T
opic
:
for
token2
in
t
opic
:
print
((
token1
.
text
,
token2
.
text
),
"similarity"
,
token1
.
similarity
(
token2
))
Total_similarity
=
Total_similarity
+
token1
.
similarity
(
token2
)
total_similarity
=
total_similarity
+
token1
.
similarity
(
token2
)
print
(
len
(
Content
))
print
(
f
'Total score for the similarity: {Total_similarity}'
)
average_similarity
=
(
Total_similarity
/
len
(
Content
))
*
100
print
(
f
'Total score for the similarity: {total_similarity}'
)
average_similarity
=
(
total_similarity
/
len
(
content
))
*
100
print
(
f
'Average score for the similarity between topic and content: {average_similarity}
%
'
)
return
{
"message"
:
str
(
f
'Average score for the similarity between topic and content: {average_similarity}
%
'
),
...
...
content analyzing/webScraping.py
View file @
05965344
from
selenium
import
webdriver
def
webScrap
():
def
suggest_youtube_content
():
driver
=
webdriver
.
Chrome
(
"chromedriver.exe"
)
driver
.
get
(
'https://youtube.com'
)
searchbox
=
driver
.
find_element_by_xpath
(
'//*[@id="search"]'
)
searchbox
.
send_keys
([
'speeches'
,
'Transportation'
])
search
_
box
=
driver
.
find_element_by_xpath
(
'//*[@id="search"]'
)
search
_
box
.
send_keys
([
'speeches'
,
'Transportation'
])
search
B
utton
=
driver
.
find_element_by_xpath
(
'//*[@id="search-icon-legacy"]'
)
search
B
utton
.
click
()
search
_b
utton
=
driver
.
find_element_by_xpath
(
'//*[@id="search-icon-legacy"]'
)
search
_b
utton
.
click
()
filler_words/API.py
View file @
05965344
...
...
@@ -12,7 +12,7 @@ app.config["DEBUG"] = True
@
app
.
route
(
'/countFillerWords'
,
methods
=
[
'GET'
])
@
cross_origin
()
def
countFillerWords
():
fillterWordCount
=
getFillterWordCount
.
count
FillerW
ords
(
"../temp.wav"
)
fillterWordCount
=
getFillterWordCount
.
count
_filler_w
ords
(
"../temp.wav"
)
return
fillterWordCount
app
.
run
(
port
=
5001
)
filler_words/Train_Neural_Network.py
View file @
05965344
...
...
@@ -18,6 +18,7 @@ def get_numpy_array(features_df):
def
get_train_test
(
X
,
y
):
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
X
,
y
,
test_size
=
0.2
,
random_state
=
42
)
return
X_train
,
X_test
,
y_train
,
y_test
...
...
filler_words/getFillterWordCount.py
View file @
05965344
...
...
@@ -11,6 +11,7 @@ import neural_network
# countPauses("../content analyzing/temp.wav")
ScoreforUserSilence
=
70
/
100
def
get_numpy_array
(
features_df
):
X
=
np
.
array
(
features_df
.
feature
.
tolist
())
y
=
np
.
array
(
features_df
.
class_label
.
tolist
())
...
...
@@ -23,8 +24,9 @@ def get_numpy_array(features_df):
features_df
=
get_features
.
extract_features
()
X
,
y
,
le
=
get_numpy_array
(
features_df
)
def
countFillerWords
(
filePath
):
fillerWordCount
=
0
def
count_filler_words
(
filePath
):
filler_word_count
=
0
sound
=
AudioSegment
.
from_wav
(
filePath
)
chunks
=
split_on_silence
(
sound
,
min_silence_len
=
200
,
silence_thresh
=
sound
.
dBFS
-
16
,
keep_silence
=
150
)
...
...
@@ -41,14 +43,14 @@ def countFillerWords(filePath):
prediction
=
neural_network
.
predict
(
chunk_file
,
le
,
"trained_cnn.h5"
)
print
(
prediction
)
if
float
(
prediction
[
"probability"
])
>
0.99
:
filler
WordC
ount
+=
1
filler
_word_c
ount
+=
1
print
(
"****** How many times Filler words in their Speech *****"
)
# print count of silence
print
(
"Filler words: "
,
filler
WordC
ount
)
print
(
"Filler words: "
,
filler
_word_c
ount
)
return
{
"message"
:
str
(
filler
WordC
ount
)
+
" : filler word/s found"
,
"message"
:
str
(
filler
_word_c
ount
)
+
" : filler word/s found"
,
"score"
:
ScoreforUserSilence
}
...
...
filler_words/get_features.py
View file @
05965344
...
...
@@ -5,6 +5,7 @@ import numpy as np
import
glob
import
pandas
as
pd
def
get_features
(
file_name
):
print
(
sf
.
available_formats
())
...
...
@@ -26,8 +27,8 @@ def get_features(file_name):
mfccs_scaled
=
np
.
mean
(
mfccs
.
T
,
axis
=
0
)
return
mfccs_scaled
def
extract_features
():
def
extract_features
():
# path to dataset containing 10 subdirectories of .ogg files
sub_dirs
=
os
.
listdir
(
'data'
)
sub_dirs
.
sort
()
...
...
filler_words/neural_network.py
View file @
05965344
...
...
@@ -9,6 +9,7 @@ import numpy as np
import
os
from
sklearn.metrics
import
classification_report
def
create_mlp
(
num_labels
):
model
=
Sequential
()
...
...
@@ -24,6 +25,7 @@ def create_mlp(num_labels):
model
.
add
(
Activation
(
'softmax'
))
return
model
def
create_cnn
(
num_labels
):
model
=
Sequential
()
...
...
@@ -38,8 +40,8 @@ def create_cnn(num_labels):
model
.
add
(
Activation
(
'softmax'
))
return
model
def
train
(
model
,
X_train
,
X_test
,
y_train
,
y_test
,
model_file
):
def
train
(
model
,
X_train
,
X_test
,
y_train
,
y_test
,
model_file
):
# compile the model
model
.
compile
(
loss
=
'categorical_crossentropy'
,
metrics
=
[
'accuracy'
],
optimizer
=
'adam'
)
...
...
@@ -62,15 +64,15 @@ def train(model,X_train, X_test, y_train, y_test,model_file):
print
(
classification_report
(
y_test
,
y_pred
))
def
compute
(
X_test
,
y_test
,
model_file
):
def
compute
(
X_test
,
y_test
,
model_file
):
# load model from disk
loaded_model
=
load_model
(
model_file
)
score
=
loaded_model
.
evaluate
(
X_test
,
y_test
)
return
score
[
0
],
score
[
1
]
*
100
def
predict
(
filename
,
le
,
model_file
):
def
predict
(
filename
,
le
,
model_file
):
model
=
load_model
(
model_file
)
prediction_feature
=
get_features
.
get_features
(
filename
)
if
len
(
prediction_feature
)
==
0
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment