Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
E
Easy Quest - Smart Recruitment Tool with AI - Backend
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
22_23 - J 36
Easy Quest - Smart Recruitment Tool with AI - Backend
Commits
6da59127
Commit
6da59127
authored
Jan 20, 2023
by
Emika Chamodi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Voice analyzer
parent
9f96db15
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1838 additions
and
0 deletions
+1838
-0
voice_analyzer/Voice_Emotion/ReadMe.md
voice_analyzer/Voice_Emotion/ReadMe.md
+1
-0
voice_analyzer/Voice_Emotion/convert_wavs.py
voice_analyzer/Voice_Emotion/convert_wavs.py
+72
-0
voice_analyzer/Voice_Emotion/lib.py
voice_analyzer/Voice_Emotion/lib.py
+96
-0
voice_analyzer/Voice_Emotion/main
voice_analyzer/Voice_Emotion/main
+117
-0
voice_analyzer/Voice_Emotion/train.py
voice_analyzer/Voice_Emotion/train.py
+26
-0
voice_analyzer/Voice_recognizer/Pipfile
voice_analyzer/Voice_recognizer/Pipfile
+19
-0
voice_analyzer/Voice_recognizer/Pipfile.lock
voice_analyzer/Voice_recognizer/Pipfile.lock
+1435
-0
voice_analyzer/Voice_recognizer/ReadMe.md
voice_analyzer/Voice_recognizer/ReadMe.md
+4
-0
voice_analyzer/Voice_recognizer/main.py
voice_analyzer/Voice_recognizer/main.py
+68
-0
voice_analyzer/Voice_recognizer/requirements.txt
voice_analyzer/Voice_recognizer/requirements.txt
+0
-0
No files found.
voice_analyzer/Voice_Emotion/ReadMe.md
0 → 100644
View file @
6da59127
DataSet: https://drive.google.com/file/d/1wWsrN2Ep7x6lWqOXfr4rpKGYrJhWc8z7/view
voice_analyzer/Voice_Emotion/convert_wavs.py
0 → 100644
View file @
6da59127
"""
A utility script used for converting audio samples to be
suitable for feature extraction
"""
import
os
def
convert_audio
(
audio_path
,
target_path
,
remove
=
False
):
"""This function sets the audio `audio_path` to:
- 16000Hz Sampling rate
- one audio channel ( mono )
Params:
audio_path (str): the path of audio wav file you want to convert
target_path (str): target path to save your new converted wav file
remove (bool): whether to remove the old file after converting
Note that this function requires ffmpeg installed in your system."""
os
.
system
(
f
"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}"
)
# os.system(f"ffmpeg -i {audio_path} -ac 1 {target_path}")
if
remove
:
os
.
remove
(
audio_path
)
def
convert_audios
(
path
,
target_path
,
remove
=
False
):
"""Converts a path of wav files to:
- 16000Hz Sampling rate
- one audio channel ( mono )
and then put them into a new folder called `target_path`
Params:
audio_path (str): the path of audio wav file you want to convert
target_path (str): target path to save your new converted wav file
remove (bool): whether to remove the old file after converting
Note that this function requires ffmpeg installed in your system."""
for
dirpath
,
dirnames
,
filenames
in
os
.
walk
(
path
):
for
dirname
in
dirnames
:
dirname
=
os
.
path
.
join
(
dirpath
,
dirname
)
target_dir
=
dirname
.
replace
(
path
,
target_path
)
if
not
os
.
path
.
isdir
(
target_dir
):
os
.
mkdir
(
target_dir
)
for
dirpath
,
_
,
filenames
in
os
.
walk
(
path
):
for
filename
in
filenames
:
file
=
os
.
path
.
join
(
dirpath
,
filename
)
if
file
.
endswith
(
".wav"
):
# it is a wav file
target_file
=
file
.
replace
(
path
,
target_path
)
convert_audio
(
file
,
target_file
,
remove
=
remove
)
if
__name__
==
"__main__"
:
import
argparse
parser
=
argparse
.
ArgumentParser
(
description
=
"""Convert ( compress ) wav files to 16MHz and mono audio channel ( 1 channel )
This utility helps for compressing wav files for training and testing"""
)
parser
.
add_argument
(
"audio_path"
,
help
=
"Folder that contains wav files you want to convert"
)
parser
.
add_argument
(
"target_path"
,
help
=
"Folder to save new wav files"
)
parser
.
add_argument
(
"-r"
,
"--remove"
,
type
=
bool
,
help
=
"Whether to remove the old wav file after converting"
,
default
=
False
)
args
=
parser
.
parse_args
()
audio_path
=
args
.
audio_path
target_path
=
args
.
target_path
if
os
.
path
.
isdir
(
audio_path
):
if
not
os
.
path
.
isdir
(
target_path
):
os
.
makedirs
(
target_path
)
convert_audios
(
audio_path
,
target_path
,
remove
=
args
.
remove
)
elif
os
.
path
.
isfile
(
audio_path
)
and
audio_path
.
endswith
(
".wav"
):
if
not
target_path
.
endswith
(
".wav"
):
target_path
+=
".wav"
convert_audio
(
audio_path
,
target_path
,
remove
=
args
.
remove
)
else
:
raise
TypeError
(
"The audio_path file you specified isn't appropriate for this operation"
)
voice_analyzer/Voice_Emotion/lib.py
0 → 100644
View file @
6da59127
import
soundfile
import
numpy
as
np
import
librosa
import
glob
import
os
from
sklearn.model_selection
import
train_test_split
EMOTIONS
=
{
"01"
:
"neutral"
,
"02"
:
"calm"
,
"03"
:
"happy"
,
"04"
:
"sad"
,
"05"
:
"angry"
,
"06"
:
"fearful"
,
"07"
:
"disgust"
,
"08"
:
"surprised"
}
AVAILABLE_EMOTIONS
=
{
"angry"
,
"sad"
,
"neutral"
,
"happy"
}
def
extract_feature
(
file_name
,
**
kwargs
):
mfcc
=
kwargs
.
get
(
"mfcc"
)
chroma
=
kwargs
.
get
(
"chroma"
)
mel
=
kwargs
.
get
(
"mel"
)
contrast
=
kwargs
.
get
(
"contrast"
)
tonnetz
=
kwargs
.
get
(
"tonnetz"
)
with
soundfile
.
SoundFile
(
file_name
)
as
sound_file
:
X
=
sound_file
.
read
(
dtype
=
"float32"
)
sample_rate
=
sound_file
.
samplerate
if
chroma
or
contrast
:
stft
=
np
.
abs
(
librosa
.
stft
(
X
))
result
=
np
.
array
([])
if
mfcc
:
mfccs
=
np
.
mean
(
librosa
.
feature
.
mfcc
(
y
=
X
,
sr
=
sample_rate
,
n_mfcc
=
40
)
.
T
,
axis
=
0
)
result
=
np
.
hstack
((
result
,
mfccs
))
if
chroma
:
chroma
=
np
.
mean
(
librosa
.
feature
.
chroma_stft
(
S
=
stft
,
sr
=
sample_rate
)
.
T
,
axis
=
0
)
result
=
np
.
hstack
((
result
,
chroma
))
if
mel
:
mel
=
np
.
mean
(
librosa
.
feature
.
melspectrogram
(
X
,
sr
=
sample_rate
)
.
T
,
axis
=
0
)
result
=
np
.
hstack
((
result
,
mel
))
if
contrast
:
contrast
=
np
.
mean
(
librosa
.
feature
.
spectral_contrast
(
S
=
stft
,
sr
=
sample_rate
)
.
T
,
axis
=
0
)
result
=
np
.
hstack
((
result
,
contrast
))
if
tonnetz
:
tonnetz
=
np
.
mean
(
librosa
.
feature
.
tonnetz
(
y
=
librosa
.
effects
.
harmonic
(
X
),
sr
=
sample_rate
)
.
T
,
axis
=
0
)
result
=
np
.
hstack
((
result
,
tonnetz
))
return
result
# update random_state=9
def
load_data
(
test_size
=
0.2
,
random_state
=
7
):
X
,
y
=
[],
[]
for
file
in
glob
.
glob
(
"data/Actor_*/*.wav"
):
basename
=
os
.
path
.
basename
(
file
)
emotion
=
EMOTIONS
[
basename
.
split
(
"-"
)[
2
]]
if
emotion
not
in
AVAILABLE_EMOTIONS
:
continue
features
=
extract_feature
(
file
,
mfcc
=
True
,
chroma
=
True
,
mel
=
True
)
X
.
append
(
features
)
y
.
append
(
emotion
)
return
train_test_split
(
np
.
array
(
X
),
y
,
test_size
=
test_size
,
random_state
=
random_state
)
import
os
,
glob
,
pickle
import
numpy
as
np
from
sklearn.model_selection
import
train_test_split
from
sklearn.neural_network
import
MLPClassifier
from
sklearn.metrics
import
accuracy_score
def
extract_feature_2
(
file_name
,
mfcc
,
chroma
,
mel
):
with
soundfile
.
SoundFile
(
file_name
)
as
sound_file
:
X
=
sound_file
.
read
(
dtype
=
"float32"
)
sample_rate
=
sound_file
.
samplerate
if
chroma
:
stft
=
np
.
abs
(
librosa
.
stft
(
X
))
result
=
np
.
array
([])
if
mfcc
:
mfccs
=
np
.
mean
(
librosa
.
feature
.
mfcc
(
y
=
X
,
sr
=
sample_rate
,
n_mfcc
=
40
)
.
T
,
axis
=
0
)
result
=
np
.
hstack
((
result
,
mfccs
))
if
chroma
:
chroma
=
np
.
mean
(
librosa
.
feature
.
chroma_stft
(
S
=
stft
,
sr
=
sample_rate
)
.
T
,
axis
=
0
)
result
=
np
.
hstack
((
result
,
chroma
))
if
mel
:
mel
=
np
.
mean
(
librosa
.
feature
.
melspectrogram
(
X
,
sr
=
sample_rate
)
.
T
,
axis
=
0
)
result
=
np
.
hstack
((
result
,
mel
))
return
result
voice_analyzer/Voice_Emotion/main
0 → 100644
View file @
6da59127
import
pyaudio
import
os
import
wave
import
pickle
from
sys
import
byteorder
from
array
import
array
from
struct
import
pack
from
sklearn
.
neural_network
import
MLPClassifier
from
lib
import
extract_feature
THRESHOLD
=
500
CHUNK_SIZE
=
1024
FORMAT
=
pyaudio
.
paInt16
RATE
=
16000
SILENCE
=
30
def
is_silent
(
snd_data
):
return
max
(
snd_data
)
<
THRESHOLD
def
normalize
(
snd_data
):
MAXIMUM
=
16384
times
=
float
(
MAXIMUM
)/
max
(
abs
(
i
)
for
i
in
snd_data
)
r
=
array
(
'h'
)
for
i
in
snd_data
:
r
.
append
(
int
(
i
*
times
))
return
r
def
trim
(
snd_data
):
def
_trim
(
snd_data
):
snd_started
=
False
r
=
array
(
'h'
)
for
i
in
snd_data
:
if
not
snd_started
and
abs
(
i
)>
THRESHOLD
:
snd_started
=
True
r
.
append
(
i
)
elif
snd_started
:
r
.
append
(
i
)
return
r
snd_data
=
_trim
(
snd_data
)
snd_data
.
reverse
()
snd_data
=
_trim
(
snd_data
)
snd_data
.
reverse
()
return
snd_data
def
add_silence
(
snd_data
,
seconds
):
r
=
array
(
'h'
,
[
0
for
i
in
range
(
int
(
seconds
*
RATE
))])
r
.
extend
(
snd_data
)
r
.
extend
([
0
for
i
in
range
(
int
(
seconds
*
RATE
))])
return
r
def
record
():
p
=
pyaudio
.
PyAudio
()
stream
=
p
.
open
(
format
=
FORMAT
,
channels
=
1
,
rate
=
RATE
,
input
=
True
,
output
=
True
,
frames_per_buffer
=
CHUNK_SIZE
)
num_silent
=
0
snd_started
=
False
r
=
array
(
'h'
)
while
1
:
#
little
endian
,
signed
short
snd_data
=
array
(
'h'
,
stream
.
read
(
CHUNK_SIZE
))
if
byteorder
==
'big'
:
snd_data
.
byteswap
()
r
.
extend
(
snd_data
)
silent
=
is_silent
(
snd_data
)
if
silent
and
snd_started
:
num_silent
+=
1
elif
not
silent
and
not
snd_started
:
snd_started
=
True
if
snd_started
and
num_silent
>
SILENCE
:
break
sample_width
=
p
.
get_sample_size
(
FORMAT
)
stream
.
stop_stream
()
stream
.
close
()
p
.
terminate
()
r
=
normalize
(
r
)
r
=
trim
(
r
)
r
=
add_silence
(
r
,
0.5
)
return
sample_width
,
r
def
record_to_file
(
path
):
sample_width
,
data
=
record
()
data
=
pack
(
'<'
+
(
'h'
*
len
(
data
)),
*
data
)
wf
=
wave
.
open
(
path
,
'wb'
)
wf
.
setnchannels
(
1
)
wf
.
setsampwidth
(
sample_width
)
wf
.
setframerate
(
RATE
)
wf
.
writeframes
(
data
)
wf
.
close
()
if
__name__
==
"__main__"
:
model
=
pickle
.
load
(
open
(
"result/mlp_classifier.model"
,
"rb"
))
print
(
"Please talk"
)
filename
=
"test.wav"
record_to_file
(
filename
)
features
=
extract_feature
(
filename
,
mfcc
=
True
,
chroma
=
True
,
mel
=
True
).
reshape
(
1
,
-
1
)
result
=
model
.
predict
(
features
)[
0
]
print
(
"result:"
,
result
)
\ No newline at end of file
voice_analyzer/Voice_Emotion/train.py
0 → 100644
View file @
6da59127
from
sklearn.neural_network
import
MLPClassifier
from
sklearn.metrics
import
accuracy_score
from
lib
import
load_data
import
os
import
pickle
X_train
,
X_test
,
y_train
,
y_test
=
load_data
(
test_size
=
0.25
)
model
=
MLPClassifier
(
alpha
=
0.01
,
batch_size
=
256
,
epsilon
=
1e-08
,
hidden_layer_sizes
=
(
300
,),
learning_rate
=
'adaptive'
,
max_iter
=
500
)
print
(
"Training the model..."
)
model
.
fit
(
X_train
,
y_train
)
y_pred
=
model
.
predict
(
X_test
)
accuracy
=
accuracy_score
(
y_true
=
y_test
,
y_pred
=
y_pred
)
print
(
"Accuracy: {:.2f}
%
"
.
format
(
accuracy
*
100
))
if
not
os
.
path
.
isdir
(
"result"
):
os
.
mkdir
(
"result"
)
pickle
.
dump
(
model
,
open
(
"result/mlp_classifier.model"
,
"wb"
))
\ No newline at end of file
voice_analyzer/Voice_recognizer/Pipfile
0 → 100644
View file @
6da59127
[[source]]
url
=
"https://pypi.org/simple"
verify_ssl
=
true
name
=
"pypi"
[packages]
vosk
=
"*"
pydub
=
"*"
transformers
=
"*"
torch
=
"*"
pyaudio
=
"*"
regex
=
"*"
ipywidgets
=
"*"
spacy
=
"*"
[dev-packages]
[requires]
python_version
=
"3.9"
voice_analyzer/Voice_recognizer/Pipfile.lock
0 → 100644
View file @
6da59127
This diff is collapsed.
Click to expand it.
voice_analyzer/Voice_recognizer/ReadMe.md
0 → 100644
View file @
6da59127
Pretrained models:
English : https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip or https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
Punctuation : https://alphacephei.com/vosk/models/vosk-recasepunc-en-0.22.zip
voice_analyzer/Voice_recognizer/main.py
0 → 100644
View file @
6da59127
from
vosk
import
Model
,
KaldiRecognizer
from
pydub
import
AudioSegment
from
transformers
import
pipeline
import
json
import
subprocess
import
spacy
from
spacy.lang.en.stop_words
import
STOP_WORDS
from
string
import
punctuation
from
heapq
import
nlargest
FRAME_RATE
=
16000
CHANNELS
=
1
def
voice_recognition
(
filename
):
model
=
Model
(
model_name
=
"vosk-model-en-us-0.22"
)
rec
=
KaldiRecognizer
(
model
,
FRAME_RATE
)
rec
.
SetWords
(
True
)
mp3
=
AudioSegment
.
from_mp3
(
filename
)
mp3
=
mp3
.
set_channels
(
CHANNELS
)
mp3
=
mp3
.
set_frame_rate
(
FRAME_RATE
)
step
=
45000
transcript
=
""
for
i
in
range
(
0
,
len
(
mp3
),
step
):
print
(
f
"Progress: {i/len(mp3)}"
)
segment
=
mp3
[
i
:
i
+
step
]
rec
.
AcceptWaveform
(
segment
.
raw_data
)
result
=
rec
.
Result
()
text
=
json
.
loads
(
result
)[
"text"
]
transcript
+=
text
cased
=
subprocess
.
check_output
(
'python recasepunc/recasepunc.py predict recasepunc/checkpoint'
,
shell
=
True
,
text
=
True
,
input
=
transcript
)
return
cased
def
summarize
(
text
,
per
):
nlp
=
spacy
.
load
(
'en_core_web_sm'
)
doc
=
nlp
(
text
)
tokens
=
[
token
.
text
for
token
in
doc
]
word_frequencies
=
{}
for
word
in
doc
:
if
word
.
text
.
lower
()
not
in
list
(
STOP_WORDS
):
if
word
.
text
.
lower
()
not
in
punctuation
:
if
word
.
text
not
in
word_frequencies
.
keys
():
word_frequencies
[
word
.
text
]
=
1
else
:
word_frequencies
[
word
.
text
]
+=
1
max_frequency
=
max
(
word_frequencies
.
values
())
for
word
in
word_frequencies
.
keys
():
word_frequencies
[
word
]
=
word_frequencies
[
word
]
/
max_frequency
sentence_tokens
=
[
sent
for
sent
in
doc
.
sents
]
sentence_scores
=
{}
for
sent
in
sentence_tokens
:
for
word
in
sent
:
if
word
.
text
.
lower
()
in
word_frequencies
.
keys
():
if
sent
not
in
sentence_scores
.
keys
():
sentence_scores
[
sent
]
=
word_frequencies
[
word
.
text
.
lower
()]
else
:
sentence_scores
[
sent
]
+=
word_frequencies
[
word
.
text
.
lower
()]
select_length
=
int
(
len
(
sentence_tokens
)
*
per
)
summary
=
nlargest
(
select_length
,
sentence_scores
,
key
=
sentence_scores
.
get
)
final_summary
=
[
word
.
text
for
word
in
summary
]
summary
=
''
.
join
(
final_summary
)
return
summary
transcript
=
voice_recognition
(
"sample_voice.mp3"
)
summary
=
summarize
(
transcript
,
0.05
)
print
(
summary
)
\ No newline at end of file
voice_analyzer/Voice_recognizer/requirements.txt
0 → 100644
View file @
6da59127
B
#
B
#
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment