Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
I
Intelligent English Tutor
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2023-24-027
Intelligent English Tutor
Commits
ff03cfbb
Commit
ff03cfbb
authored
Nov 07, 2023
by
Piumi Navoda
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
data keywords creation
parent
8d7065ec
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
128 additions
and
0 deletions
+128
-0
voicerecognizion/keyword_spotting_service.py
voicerecognizion/keyword_spotting_service.py
+128
-0
No files found.
voicerecognizion/keyword_spotting_service.py
0 → 100644
View file @
ff03cfbb
import
librosa
import
tensorflow
as
tf
import
numpy
as
np
SAVED_MODEL_PATH
=
"model.h5"
SAMPLES_TO_CONSIDER
=
22050
class
_Keyword_Spotting_Service
:
"""Singleton class for keyword spotting inference with trained models.
:param model: Trained model
"""
model
=
None
_mapping
=
[
"dataset
\\
backward"
,
"dataset
\\
bed"
,
"dataset
\\
bird"
,
"dataset
\\
cat"
,
"dataset
\\
dog"
,
"dataset
\\
down"
,
"dataset
\\
eight"
,
"dataset
\\
five"
,
"dataset
\\
follow"
,
"dataset
\\
forward"
,
"dataset
\\
four"
,
"dataset
\\
go"
,
"dataset
\\
happy"
,
"dataset
\\
house"
,
"dataset
\\
learn"
,
"dataset
\\
left"
,
"dataset
\\
nine"
,
"dataset
\\
no"
,
"dataset
\\
off"
,
"dataset
\\
on"
,
"dataset
\\
one"
,
"dataset
\\
right"
,
"dataset
\\
seven"
,
"dataset
\\
six"
,
"dataset
\\
stop"
,
"dataset
\\
three"
,
"dataset
\\
tree"
,
"dataset
\\
two"
,
"dataset
\\
up"
,
"dataset
\\
visual"
,
"dataset
\\
wow"
,
"dataset
\\
yes"
,
"dataset
\\
zero"
]
_instance
=
None
def
predict
(
self
,
file_path
):
"""
:param file_path (str): Path to audio file to predict
:return predicted_keyword (str): Keyword predicted by the model
"""
# extract MFCC
MFCCs
=
self
.
preprocess
(
file_path
)
# we need a 4-dim array to feed to the model for prediction: (# samples, # time steps, # coefficients, 1)
MFCCs
=
MFCCs
[
np
.
newaxis
,
...
,
np
.
newaxis
]
# get the predicted label
predictions
=
self
.
model
.
predict
(
MFCCs
)
predicted_index
=
np
.
argmax
(
predictions
)
predicted_keyword
=
self
.
_mapping
[
predicted_index
]
return
predicted_keyword
def
preprocess
(
self
,
file_path
,
num_mfcc
=
13
,
n_fft
=
2048
,
hop_length
=
512
):
"""Extract MFCCs from audio file.
:param file_path (str): Path of audio file
:param num_mfcc (int): # of coefficients to extract
:param n_fft (int): Interval we consider to apply STFT. Measured in # of samples
:param hop_length (int): Sliding window for STFT. Measured in # of samples
:return MFCCs (ndarray): 2-dim array with MFCC data of shape (# time steps, # coefficients)
"""
# load audio file
signal
,
sample_rate
=
librosa
.
load
(
file_path
)
if
len
(
signal
)
>=
SAMPLES_TO_CONSIDER
:
# ensure consistency of the length of the signal
signal
=
signal
[:
SAMPLES_TO_CONSIDER
]
# extract MFCCs
MFCCs
=
librosa
.
feature
.
mfcc
(
y
=
signal
,
sr
=
sample_rate
,
n_mfcc
=
num_mfcc
,
n_fft
=
n_fft
,
hop_length
=
hop_length
)
return
MFCCs
.
T
def
Keyword_Spotting_Service
():
"""Factory function for Keyword_Spotting_Service class.
:return _Keyword_Spotting_Service._instance (_Keyword_Spotting_Service):
"""
# ensure an instance is created only the first time the factory function is called
if
_Keyword_Spotting_Service
.
_instance
is
None
:
_Keyword_Spotting_Service
.
_instance
=
_Keyword_Spotting_Service
()
_Keyword_Spotting_Service
.
model
=
tf
.
keras
.
models
.
load_model
(
SAVED_MODEL_PATH
)
return
_Keyword_Spotting_Service
.
_instance
if
__name__
==
"__main__"
:
# create 2 instances of the keyword spotting service
kss
=
Keyword_Spotting_Service
()
kss1
=
Keyword_Spotting_Service
()
# check that different instances of the keyword spotting service point back to the same object (singleton)
assert
kss
is
kss1
# make a prediction
keyword
=
kss
.
predict
(
"Sample1.wav"
)
keyword1
=
kss
.
predict
(
"Sample2.wav"
)
keyword2
=
kss
.
predict
(
"Sample3.wav"
)
keyword3
=
kss
.
predict
(
"Sample4.wav"
)
print
(
keyword
)
print
(
keyword1
)
print
(
keyword2
)
print
(
keyword3
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment